[med-svn] [python-pysam] 01/04: Imported Upstream version 0.11.2.2+ds

Tue Jul 4 04:25:06 UTC 2017

This is an automated email from the git hooks/post-receive script.

afif pushed a commit to branch master
in repository python-pysam.

commit 1520aab08562a7f44fd4570ea351a3dbd5db5a35
Author: Afif Elghraoui <afif at debian.org>
Date:   Sun Jul 2 03:50:19 2017 -0400

    Imported Upstream version 0.11.2.2+ds
---
 .gitignore                                         |   14 +-
 MANIFEST.in                                        |    7 +-
 bcftools/HMM.c                                     |  141 +-
 bcftools/HMM.c.pysam.c                             |  141 +-
 bcftools/HMM.h                                     |   26 +-
 {samtools => bcftools}/bam2bcf.c                   |   54 +-
 {samtools => bcftools}/bam2bcf.c.pysam.c           |   54 +-
 {samtools => bcftools}/bam2bcf.h                   |    7 +-
 {samtools => bcftools}/bam2bcf_indel.c             |  141 +-
 {samtools => bcftools}/bam2bcf_indel.c.pysam.c     |  141 +-
 bcftools/bam_sample.c                              |  393 ++
 bcftools/bam_sample.c.pysam.c                      |  395 ++
 samtools/errmod.h => bcftools/bam_sample.h         |   41 +-
 bcftools/bcftools.h                                |    1 +
 bcftools/bin.c                                     |  104 +
 bcftools/bin.c.pysam.c                             |  106 +
 bcftools/bin.h                                     |   65 +
 bcftools/call.h                                    |    3 +-
 bcftools/ccall.c                                   |   10 +-
 bcftools/ccall.c.pysam.c                           |   10 +-
 bcftools/consensus.c                               |   56 +-
 bcftools/consensus.c.pysam.c                       |   56 +-
 bcftools/convert.c                                 |  490 ++-
 bcftools/convert.c.pysam.c                         |  490 ++-
 bcftools/csq.c                                     | 3824 +++++++++++++++++++
 bcftools/csq.c.pysam.c                             | 3826 ++++++++++++++++++++
 bcftools/filter.c                                  |  357 +-
 bcftools/filter.c.pysam.c                          |  357 +-
 bcftools/hclust.c                                  |  400 ++
 bcftools/hclust.c.pysam.c                          |  402 ++
 bcftools/hclust.h                                  |   77 +
 bcftools/kheap.h                                   |  171 +
 bcftools/main.c                                    |   10 +
 bcftools/main.c.pysam.c                            |   10 +
 bcftools/mcall.c                                   |  228 +-
 bcftools/mcall.c.pysam.c                           |  228 +-
 bcftools/mpileup.c                                 | 1110 ++++++
 bcftools/mpileup.c.pysam.c                         | 1112 ++++++
 bcftools/mw.h                                      | 1944 ++++++++++
 bcftools/ploidy.c                                  |   22 +-
 bcftools/ploidy.c.pysam.c                          |   22 +-
 bcftools/ploidy.h                                  |    2 +-
 bcftools/prob1.c                                   |   12 +-
 bcftools/prob1.c.pysam.c                           |   12 +-
 bcftools/prob1.h                                   |    2 +-
 bcftools/regidx.c                                  |  598 +++
 bcftools/regidx.c.pysam.c                          |  600 +++
 bcftools/regidx.h                                  |  191 +
 bcftools/smpl_ilist.c                              |  106 +
 bcftools/smpl_ilist.c.pysam.c                      |  108 +
 bcftools/smpl_ilist.h                              |   47 +
 bcftools/tabix.c                                   |   30 +-
 bcftools/tabix.c.pysam.c                           |   30 +-
 bcftools/tsv2vcf.c                                 |    1 +
 bcftools/tsv2vcf.c.pysam.c                         |    1 +
 bcftools/vcfannotate.c                             |  744 ++--
 bcftools/vcfannotate.c.pysam.c                     |  744 ++--
 bcftools/vcfcall.c                                 |   44 +-
 bcftools/vcfcall.c.pysam.c                         |   44 +-
 bcftools/vcfcnv.c                                  |   46 +-
 bcftools/vcfcnv.c.pysam.c                          |   46 +-
 bcftools/vcfconcat.c                               |  164 +-
 bcftools/vcfconcat.c.pysam.c                       |  164 +-
 bcftools/vcfconvert.c                              |  120 +-
 bcftools/vcfconvert.c.pysam.c                      |  120 +-
 bcftools/vcffilter.c                               |    3 +-
 bcftools/vcffilter.c.pysam.c                       |    3 +-
 bcftools/vcfgtcheck.c                              |  298 +-
 bcftools/vcfgtcheck.c.pysam.c                      |  298 +-
 bcftools/vcfindex.c                                |  115 +-
 bcftools/vcfindex.c.pysam.c                        |  115 +-
 bcftools/vcfmerge.c                                | 1075 ++++--
 bcftools/vcfmerge.c.pysam.c                        | 1075 ++++--
 bcftools/vcfnorm.c                                 |   82 +-
 bcftools/vcfnorm.c.pysam.c                         |   82 +-
 bcftools/vcfplugin.c                               |   56 +-
 bcftools/vcfplugin.c.pysam.c                       |   56 +-
 bcftools/vcfroh.c                                  |  961 +++--
 bcftools/vcfroh.c.pysam.c                          |  961 +++--
 bcftools/vcfstats.c                                |  429 ++-
 bcftools/vcfstats.c.pysam.c                        |  429 ++-
 bcftools/vcfview.c                                 |   50 +-
 bcftools/vcfview.c.pysam.c                         |   50 +-
 bcftools/version.h                                 |    2 +-
 buildwheels.sh                                     |    2 +-
 doc/api.rst                                        |    4 +-
 doc/release.rst                                    |   75 +
 doc/usage.rst                                      |   19 +-
 import.py                                          |   29 +-
 pysam/__init__.py                                  |    2 +
 pysam/cbcftools_util.h                             |    6 +
 pysam/csamtools_util.h                             |    6 +
 pysam/htslib_util.h                                |   20 -
 pysam/libcalignedsegment.pxd                       |    6 -
 pysam/libcalignedsegment.pyx                       |  288 +-
 pysam/libcalignmentfile.pyx                        |  344 +-
 pysam/libcbcf.pxd                                  |   21 +-
 pysam/libcbcf.pyx                                  |  848 ++++-
 pysam/libcbcftools.pxd                             |    3 +
 pysam/libcbcftools.pyx                             |    2 +
 pysam/libcbgzf.pyx                                 |   53 +-
 pysam/libcfaidx.pyx                                |   31 +-
 pysam/libchtslib.pxd                               |  679 +++-
 pysam/libchtslib.pyx                               |  285 +-
 pysam/libcsamtools.pxd                             |    3 +
 pysam/libcsamtools.pyx                             |    2 +
 pysam/libctabix.pxd                                |    4 +
 pysam/libctabix.pyx                                |   42 +-
 pysam/libctabixproxies.pxd                         |   14 +-
 pysam/libctabixproxies.pyx                         |  506 ++-
 pysam/libcutils.pxd                                |    4 +-
 pysam/libcutils.pyx                                |   30 +-
 pysam/pysam_util.c                                 |    6 +-
 pysam/pysam_util.h                                 |    4 +
 pysam/samfile_util.c                               |  172 -
 pysam/samfile_util.h                               |    3 -
 pysam/tabix_util.c                                 |    1 +
 pysam/utils.py                                     |   13 +-
 pysam/version.py                                   |   11 +-
 samtools/bam.h                                     |    2 +-
 samtools/bam2bcf.c                                 |    2 +-
 samtools/bam2bcf.c.pysam.c                         |    2 +-
 samtools/bam2bcf.h                                 |    2 +-
 samtools/bam2bcf_indel.c                           |   26 +-
 samtools/bam2bcf_indel.c.pysam.c                   |   26 +-
 samtools/bam2depth.c                               |   30 +-
 samtools/bam2depth.c.pysam.c                       |   30 +-
 samtools/bam_addrprg.c                             |   86 +-
 samtools/bam_addrprg.c.pysam.c                     |   86 +-
 samtools/bam_cat.c                                 |   60 +-
 samtools/bam_cat.c.pysam.c                         |   60 +-
 samtools/bam_index.c                               |   52 +-
 samtools/bam_index.c.pysam.c                       |   52 +-
 samtools/bam_mate.c                                |   71 +-
 samtools/bam_mate.c.pysam.c                        |   71 +-
 samtools/bam_md.c                                  |  202 +-
 samtools/bam_md.c.pysam.c                          |  204 +-
 samtools/bam_plcmd.c                               |  122 +-
 samtools/bam_plcmd.c.pysam.c                       |  122 +-
 samtools/bam_quickcheck.c                          |   52 +-
 samtools/bam_quickcheck.c.pysam.c                  |   52 +-
 samtools/bam_reheader.c                            |    4 +-
 samtools/bam_reheader.c.pysam.c                    |   14 +-
 samtools/bam_rmdup.c                               |    4 +-
 samtools/bam_rmdup.c.pysam.c                       |    4 +-
 samtools/bam_sort.c                                |  178 +-
 samtools/bam_sort.c.pysam.c                        |  178 +-
 samtools/bam_split.c                               |   99 +-
 samtools/bam_split.c.pysam.c                       |   99 +-
 samtools/bam_stat.c                                |   28 +-
 samtools/bam_stat.c.pysam.c                        |   38 +-
 samtools/bam_tview.c                               |  441 ---
 samtools/bam_tview.c.pysam.c                       |  443 ---
 samtools/bam_tview.h                               |  105 -
 samtools/bam_tview_curses.c                        |  352 --
 samtools/bam_tview_curses.c.pysam.c                |  354 --
 samtools/bam_tview_html.c                          |  377 --
 samtools/bam_tview_html.c.pysam.c                  |  379 --
 samtools/bamshuf.c                                 |   22 +-
 samtools/bamshuf.c.pysam.c                         |   22 +-
 samtools/bamtk.c                                   |   35 +-
 samtools/bamtk.c.pysam.c                           |   39 +-
 samtools/bedcov.c                                  |    8 +-
 samtools/bedcov.c.pysam.c                          |    8 +-
 samtools/cut_target.c                              |   19 +-
 samtools/cut_target.c.pysam.c                      |   19 +-
 samtools/errmod.c                                  |  194 -
 samtools/errmod.c.pysam.c                          |  196 -
 samtools/faidx.c                                   |   74 +-
 samtools/faidx.c.pysam.c                           |   74 +-
 samtools/kprobaln.c                                |  282 --
 samtools/kprobaln.c.pysam.c                        |  284 --
 samtools/kprobaln.h                                |   49 -
 samtools/misc/ace2sam.c                            |    5 +-
 samtools/misc/ace2sam.c.pysam.c                    |    5 +-
 samtools/padding.c                                 |    4 +-
 samtools/padding.c.pysam.c                         |    4 +-
 samtools/phase.c                                   |    6 +-
 samtools/phase.c.pysam.c                           |    6 +-
 samtools/sam.h                                     |    2 +-
 samtools/sam_opts.c                                |    8 +-
 samtools/sam_opts.c.pysam.c                        |    8 +-
 samtools/sam_opts.h                                |    7 +-
 samtools/{test/test.c => sam_utils.c}              |   51 +-
 .../{test/test.c.pysam.c => sam_utils.c.pysam.c}   |   51 +-
 samtools/sam_view.c                                |  498 ++-
 samtools/sam_view.c.pysam.c                        |  500 ++-
 samtools/stats.c                                   |   20 +-
 samtools/stats.c.pysam.c                           |   29 +-
 samtools/test/split/test_filter_header_rg.c        |   15 +-
 .../test/split/test_filter_header_rg.c.pysam.c     |   15 +-
 samtools/test/test.c                               |    8 +-
 samtools/test/test.c.pysam.c                       |    8 +-
 samtools/version.h                                 |    2 +-
 setup.py                                           |   69 +-
 tests/AlignedSegment_test.py                       |   93 +-
 tests/AlignmentFile_test.py                        |  143 +-
 tests/SamFile_test.py                              | 1990 ----------
 tests/StreamFiledescriptors_test.py                |   71 +-
 tests/TestUtils.py                                 |   32 +-
 tests/VariantFile_test.py                          |    5 +-
 tests/faidx_test.py                                |   14 +-
 tests/samtools_test.py                             |   47 +-
 tests/tabix_data/example.gff2.gz                   |  Bin 0 -> 238 bytes
 tests/tabix_data/example.gff2.gz.tbi               |  Bin 0 -> 107 bytes
 tests/tabix_data/example.gff3.gz                   |  Bin 0 -> 3067 bytes
 tests/tabix_data/example.gff3.gz.tbi               |  Bin 0 -> 1457 bytes
 tests/tabix_test.py                                |  262 +-
 tests/tabixproxies_test.py                         |  318 ++
 209 files changed, 28682 insertions(+), 12148 deletions(-)

diff --git a/.gitignore b/.gitignore
index 598948d..0910be8 100644
--- a/.gitignore
+++ b/.gitignore
@@ -23,19 +23,7 @@ htslib/config.mk
 pysam/config.py
 
 # cython files
-pysam/TabProxies.c
-pysam/csamtools.c
-pysam/ctabix.c
-pysam/cvcf.c
-pysam/chtslib.c  
-pysam/cutils.c
-pysam/calignedsegment.c
-pysam/calignmentfile.c
-pysam/cbcf.c
-pysam/cfaidx.c
-pysam/chtslib.c
-pysam/csamfile.c
-pysam/ctabixproxies.c
+pysam/libc*.c
 
 ###### Generic python ignores below ######
 
diff --git a/MANIFEST.in b/MANIFEST.in
index be43691..3f2a9cb 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -10,9 +10,9 @@ include KNOWN_BUGS
 include THANKS
 include cy_build.py
 include requirements.txt
-include pysam/c*.pxd
-include pysam/c*.pyx
-include pysam/c*.c
+include pysam/libc*.pxd
+include pysam/libc*.pyx
+include pysam/libc*.c
 include pysam/*.c
 include pysam/*.h
 include samtools/configure
@@ -29,6 +29,7 @@ include htslib/htslib_vars.mk
 include htslib/configure
 include htslib/config.mk.in
 include htslib/config.h.in
+include htslib/htslib.pc.in
 include htslib/htslib/*.h
 include htslib/cram/*.c
 include htslib/cram/*.h
diff --git a/bcftools/HMM.c b/bcftools/HMM.c
index 9196544..5795987 100644
--- a/bcftools/HMM.c
+++ b/bcftools/HMM.c
@@ -31,6 +31,17 @@
 #include <htslib/hts.h>
 #include "HMM.h"
 
+typedef struct
+{
+    int nstates;        // number of hmm's states
+    int isite;          // take snapshot at i-th position
+    uint32_t pos;       // i-th site's position
+    double *vit_prob;   // viterbi probabilities, NULL for uniform probs
+    double *fwd_prob;   // transition probabilities
+    double *bwd_prob;   // transition probabilities
+}
+snapshot_t;
+
 struct _hmm_t
 {
     int nstates;    // number of states
@@ -50,7 +61,8 @@ struct _hmm_t
     set_tprob_f set_tprob;      // Optional user function to set / modify transition probabilities
                                 //  at each site (one step of Viterbi algorithm)
     void *set_tprob_data;
-    double *init_probs;         // Initial state probabilities, NULL for uniform probs
+    snapshot_t init;            // Initial state probabilities. Set isite=1 when site should be used
+    snapshot_t *snapshot;
 };
 
 uint8_t *hmm_get_viterbi_path(hmm_t *hmm) { return hmm->vpath; }
@@ -78,28 +90,79 @@ static inline void multiply_matrix(int n, double *a, double *b, double *dst, dou
         memcpy(dst,out,sizeof(double)*n*n);
 }
 
+void hmm_init_states(hmm_t *hmm, double *probs)
+{
+    hmm->init.isite = 0;
+    hmm->init.pos   = 0;
+    if ( !hmm->init.vit_prob )
+        hmm->init.vit_prob = (double*) malloc(sizeof(double)*hmm->nstates);
+    if ( !hmm->init.fwd_prob )
+        hmm->init.fwd_prob = (double*) malloc(sizeof(double)*hmm->nstates);
+    if ( !hmm->init.bwd_prob )
+        hmm->init.bwd_prob = (double*) malloc(sizeof(double)*hmm->nstates);
+    
+    int i;
+    if ( probs )
+    {
+        memcpy(hmm->init.vit_prob,probs,sizeof(double)*hmm->nstates);
+        double sum = 0;
+        for (i=0; i<hmm->nstates; i++) sum += hmm->init.vit_prob[i];
+        for (i=0; i<hmm->nstates; i++) hmm->init.vit_prob[i] /= sum;
+    }
+    else
+        for (i=0; i<hmm->nstates; i++) hmm->init.vit_prob[i] = 1./hmm->nstates;
+
+    memcpy(hmm->init.fwd_prob,hmm->init.vit_prob,sizeof(double)*hmm->nstates);
+    memcpy(hmm->init.bwd_prob,hmm->init.vit_prob,sizeof(double)*hmm->nstates);
+}
 hmm_t *hmm_init(int nstates, double *tprob, int ntprob)
 {
     hmm_t *hmm = (hmm_t*) calloc(1,sizeof(hmm_t));
     hmm->nstates = nstates;
     hmm->curr_tprob = (double*) malloc(sizeof(double)*nstates*nstates);
     hmm->tmp = (double*) malloc(sizeof(double)*nstates*nstates);
-
     hmm_set_tprob(hmm, tprob, ntprob);
-
+    hmm_init_states(hmm, NULL);
     return hmm;
 }
 
-void hmm_init_states(hmm_t *hmm, double *probs)
+void *hmm_snapshot(hmm_t *hmm, void *_snapshot, int isite)
 {
-    if ( !probs )
+    snapshot_t *snapshot = (snapshot_t*) _snapshot;
+    if ( snapshot && snapshot->nstates!=hmm->nstates )
     {
-        free(hmm->init_probs);
-        hmm->init_probs = NULL;
+        free(snapshot);
+        snapshot = NULL;
     }
-
-    if ( !hmm->init_probs ) hmm->init_probs = (double*) malloc(sizeof(double)*hmm->nstates);
-    memcpy(hmm->init_probs,probs,sizeof(double)*hmm->nstates);
+    if ( !snapshot )
+    {
+        // Allocate the snapshot as a single memory block so that it can be
+        // free()-ed by the user. So make sure the arrays are aligned..
+        size_t str_size = sizeof(snapshot_t);
+        size_t dbl_size = sizeof(double);
+        size_t pad_size = (dbl_size - str_size % dbl_size) % dbl_size;
+        uint8_t *mem = (uint8_t*) malloc(str_size + pad_size + dbl_size*2*hmm->nstates);
+        snapshot = (snapshot_t*) mem;
+        snapshot->nstates  = hmm->nstates;
+        snapshot->vit_prob = (double*) (mem + str_size + pad_size);
+        snapshot->fwd_prob = snapshot->vit_prob + hmm->nstates;
+    }
+    snapshot->isite = isite;
+    hmm->snapshot = snapshot;
+    return snapshot;
+}
+void hmm_restore(hmm_t *hmm, void *_snapshot)
+{
+    snapshot_t *snapshot = (snapshot_t*) _snapshot;
+    if ( !snapshot ) 
+    {
+        hmm->init.isite = 0;
+        return;
+    }
+    hmm->init.isite = 1;
+    hmm->init.pos   = snapshot->pos;
+    memcpy(hmm->init.vit_prob,snapshot->vit_prob,sizeof(double)*hmm->nstates);
+    memcpy(hmm->init.fwd_prob,snapshot->fwd_prob,sizeof(double)*hmm->nstates);
 }
 
 void hmm_set_tprob(hmm_t *hmm, double *tprob, int ntprob)
@@ -154,23 +217,18 @@ void hmm_run_viterbi(hmm_t *hmm, int n, double *eprobs, uint32_t *sites)
         hmm->vprob_tmp = (double*) malloc(sizeof(double)*hmm->nstates);
     }
 
-
     // Init all states with equal likelihood
     int i,j, nstates = hmm->nstates;
-    if ( hmm->init_probs )
-        for (i=0; i<nstates; i++) hmm->vprob[i] = hmm->init_probs[i];
-    else
-        for (i=0; i<nstates; i++) hmm->vprob[i] = 1./nstates;
+    memcpy(hmm->vprob, hmm->init.vit_prob, sizeof(*hmm->init.vit_prob)*nstates);
+    uint32_t prev_pos = hmm->init.isite ? hmm->init.pos : sites[0];
 
     // Run Viterbi
-    uint32_t prev_pos = sites[0];
     for (i=0; i<n; i++)
     {
         uint8_t *vpath = &hmm->vpath[i*nstates];
         double *eprob  = &eprobs[i*nstates];
 
         int pos_diff = sites[i] == prev_pos ? 0 : sites[i] - prev_pos - 1;
-
         _set_tprob(hmm, pos_diff);
         if ( hmm->set_tprob ) hmm->set_tprob(hmm, prev_pos, sites[i], hmm->set_tprob_data, hmm->curr_tprob);
         prev_pos = sites[i];
@@ -191,6 +249,12 @@ void hmm_run_viterbi(hmm_t *hmm, int n, double *eprobs, uint32_t *sites)
         }
         for (j=0; j<nstates; j++) hmm->vprob_tmp[j] /= vnorm;
         double *tmp = hmm->vprob; hmm->vprob = hmm->vprob_tmp; hmm->vprob_tmp = tmp;
+
+        if ( hmm->snapshot && i==hmm->snapshot->isite )
+        {
+            hmm->snapshot->pos = sites[i];
+            memcpy(hmm->snapshot->vit_prob, hmm->vprob, sizeof(*hmm->vprob)*nstates);
+        }
     }
 
     // Find the most likely state
@@ -224,19 +288,12 @@ void hmm_run_fwd_bwd(hmm_t *hmm, int n, double *eprobs, uint32_t *sites)
 
     // Init all states with equal likelihood
     int i,j,k, nstates = hmm->nstates;
-    if ( hmm->init_probs )
-    {
-        for (i=0; i<nstates; i++) hmm->fwd[i] = hmm->init_probs[i];
-        for (i=0; i<nstates; i++) hmm->bwd[i] = hmm->init_probs[i];
-    }
-    else
-    {
-        for (i=0; i<nstates; i++) hmm->fwd[i] = 1./hmm->nstates;
-        for (i=0; i<nstates; i++) hmm->bwd[i] = 1./hmm->nstates;
-    }
+    memcpy(hmm->fwd, hmm->init.fwd_prob, sizeof(*hmm->init.fwd_prob)*nstates);
+    memcpy(hmm->bwd, hmm->init.bwd_prob, sizeof(*hmm->init.bwd_prob)*nstates);
+
+    uint32_t prev_pos = hmm->init.isite ? hmm->init.pos : sites[0];
 
     // Run fwd 
-    uint32_t prev_pos = sites[0];
     for (i=0; i<n; i++)
     {
         double *fwd_prev = &hmm->fwd[i*nstates];
@@ -261,6 +318,13 @@ void hmm_run_fwd_bwd(hmm_t *hmm, int n, double *eprobs, uint32_t *sites)
         for (j=0; j<nstates; j++) fwd[j] /= norm;
     }
 
+    if ( hmm->snapshot )
+    {
+        i = hmm->snapshot->isite;
+        hmm->snapshot->pos = sites[i];
+        memcpy(hmm->snapshot->fwd_prob, hmm->fwd + (i+1)*nstates, sizeof(*hmm->fwd)*nstates);
+    }
+
     // Run bwd
     double *bwd = hmm->bwd, *bwd_tmp = hmm->bwd_tmp;
     prev_pos = sites[n-1];
@@ -296,7 +360,7 @@ void hmm_run_fwd_bwd(hmm_t *hmm, int n, double *eprobs, uint32_t *sites)
     }
 }
 
-void hmm_run_baum_welch(hmm_t *hmm, int n, double *eprobs, uint32_t *sites)
+double *hmm_run_baum_welch(hmm_t *hmm, int n, double *eprobs, uint32_t *sites)
 {
     // Init arrays when run for the first time
     if ( hmm->nfwd < n )
@@ -312,16 +376,9 @@ void hmm_run_baum_welch(hmm_t *hmm, int n, double *eprobs, uint32_t *sites)
 
     // Init all states with equal likelihood
     int i,j,k, nstates = hmm->nstates;
-    if ( hmm->init_probs )
-    {
-        for (i=0; i<nstates; i++) hmm->fwd[i] = hmm->init_probs[i];
-        for (i=0; i<nstates; i++) hmm->bwd[i] = hmm->init_probs[i];
-    }
-    else
-    {
-        for (i=0; i<nstates; i++) hmm->fwd[i] = 1./hmm->nstates;
-        for (i=0; i<nstates; i++) hmm->bwd[i] = 1./hmm->nstates;
-    }
+    memcpy(hmm->fwd, hmm->init.fwd_prob, sizeof(*hmm->init.fwd_prob)*nstates);
+    memcpy(hmm->bwd, hmm->init.bwd_prob, sizeof(*hmm->init.bwd_prob)*nstates);
+    uint32_t prev_pos = hmm->init.isite ? hmm->init.pos : sites[0];
 
     // New transition matrix: temporary values
     double *tmp_xi = (double*) calloc(nstates*nstates,sizeof(double));
@@ -329,7 +386,6 @@ void hmm_run_baum_welch(hmm_t *hmm, int n, double *eprobs, uint32_t *sites)
     double *fwd_bwd = (double*) malloc(sizeof(double)*nstates);
 
     // Run fwd 
-    uint32_t prev_pos = sites[0];
     for (i=0; i<n; i++)
     {
         double *fwd_prev = &hmm->fwd[i*nstates];
@@ -416,11 +472,14 @@ void hmm_run_baum_welch(hmm_t *hmm, int n, double *eprobs, uint32_t *sites)
     free(tmp_gamma);
     free(tmp_xi);
     free(fwd_bwd);
+    return hmm->curr_tprob;
 }
 
 void hmm_destroy(hmm_t *hmm)
 {
-    free(hmm->init_probs);
+    free(hmm->init.vit_prob);
+    free(hmm->init.fwd_prob);
+    free(hmm->init.bwd_prob);
     free(hmm->vprob);
     free(hmm->vprob_tmp);
     free(hmm->vpath);
diff --git a/bcftools/HMM.c.pysam.c b/bcftools/HMM.c.pysam.c
index a3b91ff..513da35 100644
--- a/bcftools/HMM.c.pysam.c
+++ b/bcftools/HMM.c.pysam.c
@@ -33,6 +33,17 @@
 #include <htslib/hts.h>
 #include "HMM.h"
 
+typedef struct
+{
+    int nstates;        // number of hmm's states
+    int isite;          // take snapshot at i-th position
+    uint32_t pos;       // i-th site's position
+    double *vit_prob;   // viterbi probabilities, NULL for uniform probs
+    double *fwd_prob;   // transition probabilities
+    double *bwd_prob;   // transition probabilities
+}
+snapshot_t;
+
 struct _hmm_t
 {
     int nstates;    // number of states
@@ -52,7 +63,8 @@ struct _hmm_t
     set_tprob_f set_tprob;      // Optional user function to set / modify transition probabilities
                                 //  at each site (one step of Viterbi algorithm)
     void *set_tprob_data;
-    double *init_probs;         // Initial state probabilities, NULL for uniform probs
+    snapshot_t init;            // Initial state probabilities. Set isite=1 when site should be used
+    snapshot_t *snapshot;
 };
 
 uint8_t *hmm_get_viterbi_path(hmm_t *hmm) { return hmm->vpath; }
@@ -80,28 +92,79 @@ static inline void multiply_matrix(int n, double *a, double *b, double *dst, dou
         memcpy(dst,out,sizeof(double)*n*n);
 }
 
+void hmm_init_states(hmm_t *hmm, double *probs)
+{
+    hmm->init.isite = 0;
+    hmm->init.pos   = 0;
+    if ( !hmm->init.vit_prob )
+        hmm->init.vit_prob = (double*) malloc(sizeof(double)*hmm->nstates);
+    if ( !hmm->init.fwd_prob )
+        hmm->init.fwd_prob = (double*) malloc(sizeof(double)*hmm->nstates);
+    if ( !hmm->init.bwd_prob )
+        hmm->init.bwd_prob = (double*) malloc(sizeof(double)*hmm->nstates);
+    
+    int i;
+    if ( probs )
+    {
+        memcpy(hmm->init.vit_prob,probs,sizeof(double)*hmm->nstates);
+        double sum = 0;
+        for (i=0; i<hmm->nstates; i++) sum += hmm->init.vit_prob[i];
+        for (i=0; i<hmm->nstates; i++) hmm->init.vit_prob[i] /= sum;
+    }
+    else
+        for (i=0; i<hmm->nstates; i++) hmm->init.vit_prob[i] = 1./hmm->nstates;
+
+    memcpy(hmm->init.fwd_prob,hmm->init.vit_prob,sizeof(double)*hmm->nstates);
+    memcpy(hmm->init.bwd_prob,hmm->init.vit_prob,sizeof(double)*hmm->nstates);
+}
 hmm_t *hmm_init(int nstates, double *tprob, int ntprob)
 {
     hmm_t *hmm = (hmm_t*) calloc(1,sizeof(hmm_t));
     hmm->nstates = nstates;
     hmm->curr_tprob = (double*) malloc(sizeof(double)*nstates*nstates);
     hmm->tmp = (double*) malloc(sizeof(double)*nstates*nstates);
-
     hmm_set_tprob(hmm, tprob, ntprob);
-
+    hmm_init_states(hmm, NULL);
     return hmm;
 }
 
-void hmm_init_states(hmm_t *hmm, double *probs)
+void *hmm_snapshot(hmm_t *hmm, void *_snapshot, int isite)
 {
-    if ( !probs )
+    snapshot_t *snapshot = (snapshot_t*) _snapshot;
+    if ( snapshot && snapshot->nstates!=hmm->nstates )
     {
-        free(hmm->init_probs);
-        hmm->init_probs = NULL;
+        free(snapshot);
+        snapshot = NULL;
     }
-
-    if ( !hmm->init_probs ) hmm->init_probs = (double*) malloc(sizeof(double)*hmm->nstates);
-    memcpy(hmm->init_probs,probs,sizeof(double)*hmm->nstates);
+    if ( !snapshot )
+    {
+        // Allocate the snapshot as a single memory block so that it can be
+        // free()-ed by the user. So make sure the arrays are aligned..
+        size_t str_size = sizeof(snapshot_t);
+        size_t dbl_size = sizeof(double);
+        size_t pad_size = (dbl_size - str_size % dbl_size) % dbl_size;
+        uint8_t *mem = (uint8_t*) malloc(str_size + pad_size + dbl_size*2*hmm->nstates);
+        snapshot = (snapshot_t*) mem;
+        snapshot->nstates  = hmm->nstates;
+        snapshot->vit_prob = (double*) (mem + str_size + pad_size);
+        snapshot->fwd_prob = snapshot->vit_prob + hmm->nstates;
+    }
+    snapshot->isite = isite;
+    hmm->snapshot = snapshot;
+    return snapshot;
+}
+void hmm_restore(hmm_t *hmm, void *_snapshot)
+{
+    snapshot_t *snapshot = (snapshot_t*) _snapshot;
+    if ( !snapshot ) 
+    {
+        hmm->init.isite = 0;
+        return;
+    }
+    hmm->init.isite = 1;
+    hmm->init.pos   = snapshot->pos;
+    memcpy(hmm->init.vit_prob,snapshot->vit_prob,sizeof(double)*hmm->nstates);
+    memcpy(hmm->init.fwd_prob,snapshot->fwd_prob,sizeof(double)*hmm->nstates);
 }
 
 void hmm_set_tprob(hmm_t *hmm, double *tprob, int ntprob)
@@ -156,23 +219,18 @@ void hmm_run_viterbi(hmm_t *hmm, int n, double *eprobs, uint32_t *sites)
         hmm->vprob_tmp = (double*) malloc(sizeof(double)*hmm->nstates);
     }
 
-
     // Init all states with equal likelihood
     int i,j, nstates = hmm->nstates;
-    if ( hmm->init_probs )
-        for (i=0; i<nstates; i++) hmm->vprob[i] = hmm->init_probs[i];
-    else
-        for (i=0; i<nstates; i++) hmm->vprob[i] = 1./nstates;
+    memcpy(hmm->vprob, hmm->init.vit_prob, sizeof(*hmm->init.vit_prob)*nstates);
+    uint32_t prev_pos = hmm->init.isite ? hmm->init.pos : sites[0];
 
     // Run Viterbi
-    uint32_t prev_pos = sites[0];
     for (i=0; i<n; i++)
     {
         uint8_t *vpath = &hmm->vpath[i*nstates];
         double *eprob  = &eprobs[i*nstates];
 
         int pos_diff = sites[i] == prev_pos ? 0 : sites[i] - prev_pos - 1;
-
         _set_tprob(hmm, pos_diff);
         if ( hmm->set_tprob ) hmm->set_tprob(hmm, prev_pos, sites[i], hmm->set_tprob_data, hmm->curr_tprob);
         prev_pos = sites[i];
@@ -193,6 +251,12 @@ void hmm_run_viterbi(hmm_t *hmm, int n, double *eprobs, uint32_t *sites)
         }
         for (j=0; j<nstates; j++) hmm->vprob_tmp[j] /= vnorm;
         double *tmp = hmm->vprob; hmm->vprob = hmm->vprob_tmp; hmm->vprob_tmp = tmp;
+
+        if ( hmm->snapshot && i==hmm->snapshot->isite )
+        {
+            hmm->snapshot->pos = sites[i];
+            memcpy(hmm->snapshot->vit_prob, hmm->vprob, sizeof(*hmm->vprob)*nstates);
+        }
     }
 
     // Find the most likely state
@@ -226,19 +290,12 @@ void hmm_run_fwd_bwd(hmm_t *hmm, int n, double *eprobs, uint32_t *sites)
 
     // Init all states with equal likelihood
     int i,j,k, nstates = hmm->nstates;
-    if ( hmm->init_probs )
-    {
-        for (i=0; i<nstates; i++) hmm->fwd[i] = hmm->init_probs[i];
-        for (i=0; i<nstates; i++) hmm->bwd[i] = hmm->init_probs[i];
-    }
-    else
-    {
-        for (i=0; i<nstates; i++) hmm->fwd[i] = 1./hmm->nstates;
-        for (i=0; i<nstates; i++) hmm->bwd[i] = 1./hmm->nstates;
-    }
+    memcpy(hmm->fwd, hmm->init.fwd_prob, sizeof(*hmm->init.fwd_prob)*nstates);
+    memcpy(hmm->bwd, hmm->init.bwd_prob, sizeof(*hmm->init.bwd_prob)*nstates);
+
+    uint32_t prev_pos = hmm->init.isite ? hmm->init.pos : sites[0];
 
     // Run fwd 
-    uint32_t prev_pos = sites[0];
     for (i=0; i<n; i++)
     {
         double *fwd_prev = &hmm->fwd[i*nstates];
@@ -263,6 +320,13 @@ void hmm_run_fwd_bwd(hmm_t *hmm, int n, double *eprobs, uint32_t *sites)
         for (j=0; j<nstates; j++) fwd[j] /= norm;
     }
 
+    if ( hmm->snapshot )
+    {
+        i = hmm->snapshot->isite;
+        hmm->snapshot->pos = sites[i];
+        memcpy(hmm->snapshot->fwd_prob, hmm->fwd + (i+1)*nstates, sizeof(*hmm->fwd)*nstates);
+    }
+
     // Run bwd
     double *bwd = hmm->bwd, *bwd_tmp = hmm->bwd_tmp;
     prev_pos = sites[n-1];
@@ -298,7 +362,7 @@ void hmm_run_fwd_bwd(hmm_t *hmm, int n, double *eprobs, uint32_t *sites)
     }
 }
 
-void hmm_run_baum_welch(hmm_t *hmm, int n, double *eprobs, uint32_t *sites)
+double *hmm_run_baum_welch(hmm_t *hmm, int n, double *eprobs, uint32_t *sites)
 {
     // Init arrays when run for the first time
     if ( hmm->nfwd < n )
@@ -314,16 +378,9 @@ void hmm_run_baum_welch(hmm_t *hmm, int n, double *eprobs, uint32_t *sites)
 
     // Init all states with equal likelihood
     int i,j,k, nstates = hmm->nstates;
-    if ( hmm->init_probs )
-    {
-        for (i=0; i<nstates; i++) hmm->fwd[i] = hmm->init_probs[i];
-        for (i=0; i<nstates; i++) hmm->bwd[i] = hmm->init_probs[i];
-    }
-    else
-    {
-        for (i=0; i<nstates; i++) hmm->fwd[i] = 1./hmm->nstates;
-        for (i=0; i<nstates; i++) hmm->bwd[i] = 1./hmm->nstates;
-    }
+    memcpy(hmm->fwd, hmm->init.fwd_prob, sizeof(*hmm->init.fwd_prob)*nstates);
+    memcpy(hmm->bwd, hmm->init.bwd_prob, sizeof(*hmm->init.bwd_prob)*nstates);
+    uint32_t prev_pos = hmm->init.isite ? hmm->init.pos : sites[0];
 
     // New transition matrix: temporary values
     double *tmp_xi = (double*) calloc(nstates*nstates,sizeof(double));
@@ -331,7 +388,6 @@ void hmm_run_baum_welch(hmm_t *hmm, int n, double *eprobs, uint32_t *sites)
     double *fwd_bwd = (double*) malloc(sizeof(double)*nstates);
 
     // Run fwd 
-    uint32_t prev_pos = sites[0];
     for (i=0; i<n; i++)
     {
         double *fwd_prev = &hmm->fwd[i*nstates];
@@ -418,11 +474,14 @@ void hmm_run_baum_welch(hmm_t *hmm, int n, double *eprobs, uint32_t *sites)
     free(tmp_gamma);
     free(tmp_xi);
     free(fwd_bwd);
+    return hmm->curr_tprob;
 }
 
 void hmm_destroy(hmm_t *hmm)
 {
-    free(hmm->init_probs);
+    free(hmm->init.vit_prob);
+    free(hmm->init.fwd_prob);
+    free(hmm->init.bwd_prob);
     free(hmm->vprob);
     free(hmm->vprob_tmp);
     free(hmm->vpath);
diff --git a/bcftools/HMM.h b/bcftools/HMM.h
index 7f01245..3e5cf7f 100644
--- a/bcftools/HMM.h
+++ b/bcftools/HMM.h
@@ -44,6 +44,10 @@ typedef void (*set_tprob_f) (hmm_t *hmm, uint32_t prev_pos, uint32_t pos, void *
 hmm_t *hmm_init(int nstates, double *tprob, int ntprob);
 void hmm_set_tprob(hmm_t *hmm, double *tprob, int ntprob);
 
+#define HMM_VIT 1
+#define HMM_FWD 2
+#define HMM_BWD 4
+
 /**
  *   hmm_init_states() - initial state probabilities
  *   @probs:  initial state probabilities or NULL to reset to default
@@ -53,6 +57,20 @@ void hmm_set_tprob(hmm_t *hmm, double *tprob, int ntprob);
 void hmm_init_states(hmm_t *hmm, double *probs);
 
 /**
+ *   hmm_snapshot() - take the model's snapshot, intended for sliding HMM
+ *   @snapshot: NULL or snapshot returned by previous hmm_snapshot() call, must be free()-ed by the caller
+ *   @isite:    take the snapshot at i-th step
+ */
+void *hmm_snapshot(hmm_t *hmm, void *snapshot, int isite);
+
+/**
+ *   hmm_restore() - restore model's snapshot, intended for sliding HMM
+ *   @snapshot: snapshot returned by hmm_snapshot() call or NULL to reset
+ *   @isite:    take the snapshot at i-th step
+ */
+void hmm_restore(hmm_t *hmm, void *snapshot);
+
+/**
  *   hmm_get_tprob() - return the array of transition matrices, precalculated
  *      to ntprob positions. The first matrix is the initial tprob matrix
  *      set by hmm_init() or hmm_set_tprob()
@@ -103,11 +121,11 @@ double *hmm_get_fwd_bwd_prob(hmm_t *hmm);
  *   @eprob:    emission probabilities for each site and state (nsites x nstates)
  *   @sites:    list of positions
  *
- *   Same as hmm_run_fwd_bwd, in addition curr_tprob contains the new
- *   transition probabilities. In this verison, emission probabilities
- *   are not updated.
+ *   Same as hmm_run_fwd_bwd, in addition a pointer to a matrix with the new
+ *   transition probabilities is returned. In this verison, emission
+ *   probabilities are not updated.
  */
-void hmm_run_baum_welch(hmm_t *hmm, int nsites, double *eprob, uint32_t *sites);
+double *hmm_run_baum_welch(hmm_t *hmm, int nsites, double *eprob, uint32_t *sites);
 
 void hmm_destroy(hmm_t *hmm);
 
diff --git a/samtools/bam2bcf.c b/bcftools/bam2bcf.c
similarity index 96%
copy from samtools/bam2bcf.c
copy to bcftools/bam2bcf.c
index 85ce307..b4fb7f1 100644
--- a/samtools/bam2bcf.c
+++ b/bcftools/bam2bcf.c
@@ -23,17 +23,15 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 DEALINGS IN THE SOFTWARE.  */
 
-#include <config.h>
-
 #include <math.h>
 #include <stdint.h>
 #include <assert.h>
 #include <float.h>
+#include <htslib/hts.h>
 #include <htslib/sam.h>
 #include <htslib/kstring.h>
 #include <htslib/kfunc.h>
 #include "bam2bcf.h"
-#include "errmod.h"
 
 extern  void ks_introsort_uint32_t(size_t n, uint32_t a[]);
 
@@ -46,7 +44,7 @@ bcf_callaux_t *bcf_call_init(double theta, int min_baseQ)
 {
     bcf_callaux_t *bca;
     if (theta <= 0.) theta = CALL_DEFTHETA;
-    bca = calloc(1, sizeof(bcf_callaux_t));
+    bca = (bcf_callaux_t*) calloc(1, sizeof(bcf_callaux_t));
     bca->capQ = 60;
     bca->openQ = 40; bca->extQ = 20; bca->tandemQ = 100;
     bca->min_baseQ = min_baseQ;
@@ -55,15 +53,15 @@ bcf_callaux_t *bcf_call_init(double theta, int min_baseQ)
     bca->min_support = 1;
     bca->per_sample_flt = 0;
     bca->npos = 100;
-    bca->ref_pos = malloc(bca->npos*sizeof(int));
-    bca->alt_pos = malloc(bca->npos*sizeof(int));
+    bca->ref_pos = (int*) malloc(bca->npos*sizeof(int));
+    bca->alt_pos = (int*) malloc(bca->npos*sizeof(int));
     bca->nqual = 60;
-    bca->ref_mq  = malloc(bca->nqual*sizeof(int));
-    bca->alt_mq  = malloc(bca->nqual*sizeof(int));
-    bca->ref_bq  = malloc(bca->nqual*sizeof(int));
-    bca->alt_bq  = malloc(bca->nqual*sizeof(int));
-    bca->fwd_mqs = malloc(bca->nqual*sizeof(int));
-    bca->rev_mqs = malloc(bca->nqual*sizeof(int));
+    bca->ref_mq  = (int*) malloc(bca->nqual*sizeof(int));
+    bca->alt_mq  = (int*) malloc(bca->nqual*sizeof(int));
+    bca->ref_bq  = (int*) malloc(bca->nqual*sizeof(int));
+    bca->alt_bq  = (int*) malloc(bca->nqual*sizeof(int));
+    bca->fwd_mqs = (int*) malloc(bca->nqual*sizeof(int));
+    bca->rev_mqs = (int*) malloc(bca->nqual*sizeof(int));
     return bca;
 }
 
@@ -352,11 +350,22 @@ double calc_chisq_bias(int *a, int *b, int n)
     return prob;
 }
 
+static double mann_whitney_1947_(int n, int m, int U)
+{
+     if (U<0) return 0;
+     if (n==0||m==0) return U==0 ? 1 : 0;
+    return (double)n/(n+m)*mann_whitney_1947_(n-1,m,U-m) + (double)m/(n+m)*mann_whitney_1947_(n,m-1,U);
+}
+
 double mann_whitney_1947(int n, int m, int U)
 {
-    if (U<0) return 0;
-    if (n==0||m==0) return U==0 ? 1 : 0;
-    return (double)n/(n+m)*mann_whitney_1947(n-1,m,U-m) + (double)m/(n+m)*mann_whitney_1947(n,m-1,U);
+    #include "mw.h"
+
+    assert(n >= 2 && m >= 2);
+
+    return (n < 8 && m < 8 && U < 50)
+        ? mw[n-2][m-2][U]
+        : mann_whitney_1947_(n,m,U);
 }
 
 double mann_whitney_1947_cdf(int n, int m, int U)
@@ -418,11 +427,16 @@ double calc_mwu_bias(int *a, int *b, int n)
     double U = 0, ties = 0;
     for (i=0; i<n; i++)
     {
-        na += a[i];
-        U  += a[i] * (nb + b[i]*0.5);
-        nb += b[i];
-        if ( a[i] && b[i] )
-        {
+        if (!a[i]) {
+            if (!b[i]) continue;
+            nb += b[i];
+        } else if (!b[i]) {
+            na += a[i];
+            U  += a[i] * nb;
+        } else {
+            na += a[i];
+            U  += a[i] * (nb + b[i]*0.5);
+            nb += b[i];
             double tie = a[i] + b[i];
             ties += (tie*tie-1)*tie;
         }
diff --git a/samtools/bam2bcf.c.pysam.c b/bcftools/bam2bcf.c.pysam.c
similarity index 96%
copy from samtools/bam2bcf.c.pysam.c
copy to bcftools/bam2bcf.c.pysam.c
index 6938ec0..5a1a443 100644
--- a/samtools/bam2bcf.c.pysam.c
+++ b/bcftools/bam2bcf.c.pysam.c
@@ -25,17 +25,15 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 DEALINGS IN THE SOFTWARE.  */
 
-#include <config.h>
-
 #include <math.h>
 #include <stdint.h>
 #include <assert.h>
 #include <float.h>
+#include <htslib/hts.h>
 #include <htslib/sam.h>
 #include <htslib/kstring.h>
 #include <htslib/kfunc.h>
 #include "bam2bcf.h"
-#include "errmod.h"
 
 extern  void ks_introsort_uint32_t(size_t n, uint32_t a[]);
 
@@ -48,7 +46,7 @@ bcf_callaux_t *bcf_call_init(double theta, int min_baseQ)
 {
     bcf_callaux_t *bca;
     if (theta <= 0.) theta = CALL_DEFTHETA;
-    bca = calloc(1, sizeof(bcf_callaux_t));
+    bca = (bcf_callaux_t*) calloc(1, sizeof(bcf_callaux_t));
     bca->capQ = 60;
     bca->openQ = 40; bca->extQ = 20; bca->tandemQ = 100;
     bca->min_baseQ = min_baseQ;
@@ -57,15 +55,15 @@ bcf_callaux_t *bcf_call_init(double theta, int min_baseQ)
     bca->min_support = 1;
     bca->per_sample_flt = 0;
     bca->npos = 100;
-    bca->ref_pos = malloc(bca->npos*sizeof(int));
-    bca->alt_pos = malloc(bca->npos*sizeof(int));
+    bca->ref_pos = (int*) malloc(bca->npos*sizeof(int));
+    bca->alt_pos = (int*) malloc(bca->npos*sizeof(int));
     bca->nqual = 60;
-    bca->ref_mq  = malloc(bca->nqual*sizeof(int));
-    bca->alt_mq  = malloc(bca->nqual*sizeof(int));
-    bca->ref_bq  = malloc(bca->nqual*sizeof(int));
-    bca->alt_bq  = malloc(bca->nqual*sizeof(int));
-    bca->fwd_mqs = malloc(bca->nqual*sizeof(int));
-    bca->rev_mqs = malloc(bca->nqual*sizeof(int));
+    bca->ref_mq  = (int*) malloc(bca->nqual*sizeof(int));
+    bca->alt_mq  = (int*) malloc(bca->nqual*sizeof(int));
+    bca->ref_bq  = (int*) malloc(bca->nqual*sizeof(int));
+    bca->alt_bq  = (int*) malloc(bca->nqual*sizeof(int));
+    bca->fwd_mqs = (int*) malloc(bca->nqual*sizeof(int));
+    bca->rev_mqs = (int*) malloc(bca->nqual*sizeof(int));
     return bca;
 }
 
@@ -354,11 +352,22 @@ double calc_chisq_bias(int *a, int *b, int n)
     return prob;
 }
 
+static double mann_whitney_1947_(int n, int m, int U)
+{
+     if (U<0) return 0;
+     if (n==0||m==0) return U==0 ? 1 : 0;
+    return (double)n/(n+m)*mann_whitney_1947_(n-1,m,U-m) + (double)m/(n+m)*mann_whitney_1947_(n,m-1,U);
+}
+
 double mann_whitney_1947(int n, int m, int U)
 {
-    if (U<0) return 0;
-    if (n==0||m==0) return U==0 ? 1 : 0;
-    return (double)n/(n+m)*mann_whitney_1947(n-1,m,U-m) + (double)m/(n+m)*mann_whitney_1947(n,m-1,U);
+    #include "mw.h"
+
+    assert(n >= 2 && m >= 2);
+
+    return (n < 8 && m < 8 && U < 50)
+        ? mw[n-2][m-2][U]
+        : mann_whitney_1947_(n,m,U);
 }
 
 double mann_whitney_1947_cdf(int n, int m, int U)
@@ -420,11 +429,16 @@ double calc_mwu_bias(int *a, int *b, int n)
     double U = 0, ties = 0;
     for (i=0; i<n; i++)
     {
-        na += a[i];
-        U  += a[i] * (nb + b[i]*0.5);
-        nb += b[i];
-        if ( a[i] && b[i] )
-        {
+        if (!a[i]) {
+            if (!b[i]) continue;
+            nb += b[i];
+        } else if (!b[i]) {
+            na += a[i];
+            U  += a[i] * nb;
+        } else {
+            na += a[i];
+            U  += a[i] * (nb + b[i]*0.5);
+            nb += b[i];
             double tie = a[i] + b[i];
             ties += (tie*tie-1)*tie;
         }
diff --git a/samtools/bam2bcf.h b/bcftools/bam2bcf.h
similarity index 96%
copy from samtools/bam2bcf.h
copy to bcftools/bam2bcf.h
index 22c67cc..f81f9cf 100644
--- a/samtools/bam2bcf.h
+++ b/bcftools/bam2bcf.h
@@ -1,7 +1,7 @@
 /*  bam2bcf.h -- variant calling.
 
     Copyright (C) 2010-2012 Broad Institute.
-    Copyright (C) 2012-2014 Genome Research Ltd.
+    Copyright (C) 2012-2014,2016 Genome Research Ltd.
 
     Author: Heng Li <lh3 at sanger.ac.uk>
 
@@ -27,8 +27,8 @@ DEALINGS IN THE SOFTWARE.  */
 #define BAM2BCF_H
 
 #include <stdint.h>
+#include <htslib/hts.h>
 #include <htslib/vcf.h>
-#include "errmod.h"
 
 /**
  *  A simplified version of Mann-Whitney U-test is calculated
@@ -128,8 +128,7 @@ extern "C" {
     int bcf_call_combine(int n, const bcf_callret1_t *calls, bcf_callaux_t *bca, int ref_base /*4-bit*/, bcf_call_t *call);
     int bcf_call2bcf(bcf_call_t *bc, bcf1_t *b, bcf_callret1_t *bcr, int fmt_flag,
                      const bcf_callaux_t *bca, const char *ref);
-    int bcf_call_gap_prep(int n, int *n_plp, bam_pileup1_t **plp, int pos, bcf_callaux_t *bca, const char *ref,
-                          const void *rghash);
+    int bcf_call_gap_prep(int n, int *n_plp, bam_pileup1_t **plp, int pos, bcf_callaux_t *bca, const char *ref);
     void bcf_callaux_clean(bcf_callaux_t *bca, bcf_call_t *call);
 
 #ifdef __cplusplus
diff --git a/samtools/bam2bcf_indel.c b/bcftools/bam2bcf_indel.c
similarity index 83%
copy from samtools/bam2bcf_indel.c
copy to bcftools/bam2bcf_indel.c
index 5b353fc..52837b5 100644
--- a/samtools/bam2bcf_indel.c
+++ b/bcftools/bam2bcf_indel.c
@@ -1,7 +1,7 @@
 /*  bam2bcf_indel.c -- indel caller.
 
     Copyright (C) 2010, 2011 Broad Institute.
-    Copyright (C) 2012-2014 Genome Research Ltd.
+    Copyright (C) 2012-2014,2016 Genome Research Ltd.
 
     Author: Heng Li <lh3 at sanger.ac.uk>
 
@@ -23,70 +23,20 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 DEALINGS IN THE SOFTWARE.  */
 
-#include <config.h>
-
 #include <assert.h>
 #include <ctype.h>
 #include <string.h>
-#include "htslib/sam.h"
+#include <htslib/hts.h>
+#include <htslib/sam.h>
+#include <htslib/khash_str2int.h>
 #include "bam2bcf.h"
-#include "kprobaln.h"
-#include "htslib/khash.h"
-KHASH_SET_INIT_STR(rg)
 
-#include "htslib/ksort.h"
+#include <htslib/ksort.h>
 KSORT_INIT_GENERIC(uint32_t)
 
 #define MINUS_CONST 0x10000000
 #define INDEL_WINDOW_SIZE 50
 
-void *bcf_call_add_rg(void *_hash, const char *hdtext, const char *list)
-{
-    const char *s, *p, *q, *r, *t;
-    khash_t(rg) *hash;
-    if (list == 0 || hdtext == 0) return _hash;
-    if (_hash == 0) _hash = kh_init(rg);
-    hash = (khash_t(rg)*)_hash;
-    if ((s = strstr(hdtext, "@RG\t")) == 0) return hash;
-    do {
-        t = strstr(s + 4, "@RG\t"); // the next @RG
-        if ((p = strstr(s, "\tID:")) != 0) p += 4;
-        if ((q = strstr(s, "\tPL:")) != 0) q += 4;
-        if (p && q && (t == 0 || (p < t && q < t))) { // ID and PL are both present
-            int lp, lq;
-            char *x;
-            for (r = p; *r && *r != '\t' && *r != '\n'; ++r) { }
-            lp = r - p;
-            for (r = q; *r && *r != '\t' && *r != '\n'; ++r) { }
-            lq = r - q;
-            x = calloc((lp > lq? lp : lq) + 1, 1);
-            for (r = q; *r && *r != '\t' && *r != '\n'; ++r) x[r-q] = *r;
-            if (strstr(list, x)) { // insert ID to the hash table
-                khint_t k;
-                int ret;
-                for (r = p; *r && *r != '\t' && *r != '\n'; ++r) x[r-p] = *r;
-                x[r-p] = 0;
-                k = kh_get(rg, hash, x);
-                if (k == kh_end(hash)) k = kh_put(rg, hash, x, &ret);
-                else free(x);
-            } else free(x);
-        }
-        s = t;
-    } while (s);
-    return hash;
-}
-
-void bcf_call_del_rghash(void *_hash)
-{
-    khint_t k;
-    khash_t(rg) *hash = (khash_t(rg)*)_hash;
-    if (hash == 0) return;
-    for (k = kh_begin(hash); k < kh_end(hash); ++k)
-        if (kh_exist(hash, k))
-            free((char*)kh_key(hash, k));
-    kh_destroy(rg, hash);
-}
-
 static int tpos2qpos(const bam1_core_t *c, const uint32_t *cigar, int32_t tpos, int is_left, int32_t *_tpos)
 {
     int k, x = c->pos, y = 0, last_y = 0;
@@ -146,30 +96,13 @@ static inline int est_indelreg(int pos, const char *ref, int l, char *ins4)
             - 8: estimated sequence quality                     .. (aux>>8)&0xff
             - 8: indel quality                                  .. aux&0xff
  */
-int bcf_call_gap_prep(int n, int *n_plp, bam_pileup1_t **plp, int pos, bcf_callaux_t *bca, const char *ref,
-                      const void *rghash)
+int bcf_call_gap_prep(int n, int *n_plp, bam_pileup1_t **plp, int pos, bcf_callaux_t *bca, const char *ref)
 {
     int i, s, j, k, t, n_types, *types, max_rd_len, left, right, max_ins, *score1, *score2, max_ref2;
     int N, K, l_run, ref_type, n_alt;
     char *inscns = 0, *ref2, *query, **ref_sample;
-    khash_t(rg) *hash = (khash_t(rg)*)rghash;
     if (ref == 0 || bca == 0) return -1;
-    // mark filtered reads
-    if (rghash) {
-        N = 0;
-        for (s = N = 0; s < n; ++s) {
-            for (i = 0; i < n_plp[s]; ++i) {
-                bam_pileup1_t *p = plp[s] + i;
-                const uint8_t *rg = bam_aux_get(p->b, "RG");
-                p->aux = 1; // filtered by default
-                if (rg) {
-                    khint_t k = kh_get(rg, hash, (const char*)(rg + 1));
-                    if (k != kh_end(hash)) p->aux = 0, ++N; // not filtered
-                }
-            }
-        }
-        if (N == 0) return -1; // no reads left
-    }
+
     // determine if there is a gap
     for (s = N = 0; s < n; ++s) {
         for (i = 0; i < n_plp[s]; ++i)
@@ -182,19 +115,17 @@ int bcf_call_gap_prep(int n, int *n_plp, bam_pileup1_t **plp, int pos, bcf_calla
         bca->max_support = bca->max_frac = 0;
         int m, n_alt = 0, n_tot = 0, indel_support_ok = 0;
         uint32_t *aux;
-        aux = calloc(N + 1, 4);
+        aux = (uint32_t*) calloc(N + 1, 4);
         m = max_rd_len = 0;
         aux[m++] = MINUS_CONST; // zero indel is always a type
         for (s = 0; s < n; ++s) {
             int na = 0, nt = 0;
             for (i = 0; i < n_plp[s]; ++i) {
                 const bam_pileup1_t *p = plp[s] + i;
-                if (rghash == 0 || p->aux == 0) {
-                    ++nt;
-                    if (p->indel != 0) {
-                        ++na;
-                        aux[m++] = MINUS_CONST + p->indel;
-                    }
+                ++nt;
+                if (p->indel != 0) {
+                    ++na;
+                    aux[m++] = MINUS_CONST + p->indel;
                 }
                 j = bam_cigar2qlen(p->b->core.n_cigar, bam_get_cigar(p->b));
                 if (j > max_rd_len) max_rd_len = j;
@@ -260,13 +191,13 @@ int bcf_call_gap_prep(int n, int *n_plp, bam_pileup1_t **plp, int pos, bcf_calla
         int L = right - left + 1, max_i, max2_i;
         uint32_t *cns, max, max2;
         char *ref0, *r;
-        ref_sample = calloc(n, sizeof(char*));
-        cns = calloc(L, 4);
-        ref0 = calloc(L, 1);
+        ref_sample = (char**) calloc(n, sizeof(char*));
+        cns = (uint32_t*) calloc(L, 4);
+        ref0 = (char*) calloc(L, 1);
         for (i = 0; i < right - left; ++i)
             ref0[i] = seq_nt16_table[(int)ref[i+left]];
         for (s = 0; s < n; ++s) {
-            r = ref_sample[s] = calloc(L, 1);
+            r = ref_sample[s] = (char*) calloc(L, 1);
             memset(cns, 0, sizeof(int) * L);
             // collect ref and non-ref counts
             for (i = 0; i < n_plp[s]; ++i) {
@@ -317,7 +248,7 @@ int bcf_call_gap_prep(int n, int *n_plp, bam_pileup1_t **plp, int pos, bcf_calla
     // construct the consensus sequence
     max_ins = types[n_types - 1];   // max_ins is at least 0
     if (max_ins > 0) {
-        int *inscns_aux = calloc(5 * n_types * max_ins, sizeof(int));
+        int *inscns_aux = (int*) calloc(5 * n_types * max_ins, sizeof(int));
         // count the number of occurrences of each base at each position for each type of insertion
         for (t = 0; t < n_types; ++t) {
             if (types[t] > 0) {
@@ -337,7 +268,7 @@ int bcf_call_gap_prep(int n, int *n_plp, bam_pileup1_t **plp, int pos, bcf_calla
             }
         }
         // use the majority rule to construct the consensus
-        inscns = calloc(n_types * max_ins, 1);
+        inscns = (char*) calloc(n_types * max_ins, 1);
         for (t = 0; t < n_types; ++t) {
             for (j = 0; j < types[t]; ++j) {
                 int max = 0, max_k = -1, *ia = &inscns_aux[(t*max_ins+j)*5];
@@ -352,14 +283,14 @@ int bcf_call_gap_prep(int n, int *n_plp, bam_pileup1_t **plp, int pos, bcf_calla
     }
     // compute the likelihood given each type of indel for each read
     max_ref2 = right - left + 2 + 2 * (max_ins > -types[0]? max_ins : -types[0]);
-    ref2  = calloc(max_ref2, 1);
-    query = calloc(right - left + max_rd_len + max_ins + 2, 1);
-    score1 = calloc(N * n_types, sizeof(int));
-    score2 = calloc(N * n_types, sizeof(int));
+    ref2  = (char*) calloc(max_ref2, 1);
+    query = (char*) calloc(right - left + max_rd_len + max_ins + 2, 1);
+    score1 = (int*) calloc(N * n_types, sizeof(int));
+    score2 = (int*) calloc(N * n_types, sizeof(int));
     bca->indelreg = 0;
     for (t = 0; t < n_types; ++t) {
         int l, ir;
-        kpa_par_t apf1 = { 1e-4, 1e-2, 10 }, apf2 = { 1e-6, 1e-3, 10 };
+        probaln_par_t apf1 = { 1e-4, 1e-2, 10 }, apf2 = { 1e-6, 1e-3, 10 };
         apf1.bw = apf2.bw = abs(types[t]) + 3;
         // compute indelreg
         if (types[t] == 0) ir = 0;
@@ -404,7 +335,7 @@ int bcf_call_gap_prep(int n, int *n_plp, bam_pileup1_t **plp, int pos, bcf_calla
                 { // do realignment; this is the bottleneck
                     const uint8_t *qual = bam_get_qual(p->b), *bq;
                     uint8_t *qq;
-                    qq = calloc(qend - qbeg, 1);
+                    qq = (uint8_t*) calloc(qend - qbeg, 1);
                     bq = (uint8_t*)bam_aux_get(p->b, "ZQ");
                     if (bq) ++bq; // skip type
                     for (l = qbeg; l < qend; ++l) {
@@ -412,14 +343,14 @@ int bcf_call_gap_prep(int n, int *n_plp, bam_pileup1_t **plp, int pos, bcf_calla
                         if (qq[l - qbeg] > 30) qq[l - qbeg] = 30;
                         if (qq[l - qbeg] < 7) qq[l - qbeg] = 7;
                     }
-                    sc = kpa_glocal((uint8_t*)ref2 + tbeg - left, tend - tbeg + abs(types[t]),
-                                    (uint8_t*)query, qend - qbeg, qq, &apf1, 0, 0);
+                    sc = probaln_glocal((uint8_t*)ref2 + tbeg - left, tend - tbeg + abs(types[t]),
+                                        (uint8_t*)query, qend - qbeg, qq, &apf1, 0, 0);
                     l = (int)(100. * sc / (qend - qbeg) + .499); // used for adjusting indelQ below
                     if (l > 255) l = 255;
                     score1[K*n_types + t] = score2[K*n_types + t] = sc<<8 | l;
                     if (sc > 5) {
-                        sc = kpa_glocal((uint8_t*)ref2 + tbeg - left, tend - tbeg + abs(types[t]),
-                                        (uint8_t*)query, qend - qbeg, qq, &apf2, 0, 0);
+                        sc = probaln_glocal((uint8_t*)ref2 + tbeg - left, tend - tbeg + abs(types[t]),
+                                            (uint8_t*)query, qend - qbeg, qq, &apf2, 0, 0);
                         l = (int)(100. * sc / (qend - qbeg) + .499);
                         if (l > 255) l = 255;
                         score2[K*n_types + t] = sc<<8 | l;
@@ -439,10 +370,13 @@ int bcf_call_gap_prep(int n, int *n_plp, bam_pileup1_t **plp, int pos, bcf_calla
     }
     free(ref2); free(query);
     { // compute indelQ
-        int *sc, tmp, *sumq;
-        sc   = alloca(n_types * sizeof(int));
-        sumq = alloca(n_types * sizeof(int));
-        memset(sumq, 0, sizeof(int) * n_types);
+        int sc_a[16], sumq_a[16];
+        int tmp, *sc = sc_a, *sumq = sumq_a;
+        if (n_types > 16) {
+            sc   = (int *)malloc(n_types * sizeof(int));
+            sumq = (int *)malloc(n_types * sizeof(int));
+        }
+        memset(sumq, 0, n_types * sizeof(int));
         for (s = K = 0; s < n; ++s) {
             for (i = 0; i < n_plp[s]; ++i, ++K) {
                 bam_pileup1_t *p = plp[s] + i;
@@ -493,7 +427,7 @@ int bcf_call_gap_prep(int n, int *n_plp, bam_pileup1_t **plp, int pos, bcf_calla
         }
         // determine bca->indel_types[] and bca->inscns
         bca->maxins = max_ins;
-        bca->inscns = realloc(bca->inscns, bca->maxins * 4);
+        bca->inscns = (char*) realloc(bca->inscns, bca->maxins * 4);
         for (t = 0; t < n_types; ++t)
             sumq[t] = sumq[t]<<6 | t;
         for (t = 1; t < n_types; ++t) // insertion sort
@@ -523,6 +457,9 @@ int bcf_call_gap_prep(int n, int *n_plp, bam_pileup1_t **plp, int pos, bcf_calla
                 //fprintf(stderr, "X pos=%d read=%d:%d name=%s call=%d type=%d seqQ=%d indelQ=%d\n", pos, s, i, bam1_qname(p->b), (p->aux>>16)&0x3f, bca->indel_types[(p->aux>>16)&0x3f], (p->aux>>8)&0xff, p->aux&0xff);
             }
         }
+
+        if (sc   != sc_a)   free(sc);
+        if (sumq != sumq_a) free(sumq);
     }
     free(score1); free(score2);
     // free
diff --git a/samtools/bam2bcf_indel.c.pysam.c b/bcftools/bam2bcf_indel.c.pysam.c
similarity index 83%
copy from samtools/bam2bcf_indel.c.pysam.c
copy to bcftools/bam2bcf_indel.c.pysam.c
index 21cbb03..0d36841 100644
--- a/samtools/bam2bcf_indel.c.pysam.c
+++ b/bcftools/bam2bcf_indel.c.pysam.c
@@ -3,7 +3,7 @@
 /*  bam2bcf_indel.c -- indel caller.
 
     Copyright (C) 2010, 2011 Broad Institute.
-    Copyright (C) 2012-2014 Genome Research Ltd.
+    Copyright (C) 2012-2014,2016 Genome Research Ltd.
 
     Author: Heng Li <lh3 at sanger.ac.uk>
 
@@ -25,70 +25,20 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 DEALINGS IN THE SOFTWARE.  */
 
-#include <config.h>
-
 #include <assert.h>
 #include <ctype.h>
 #include <string.h>
-#include "htslib/sam.h"
+#include <htslib/hts.h>
+#include <htslib/sam.h>
+#include <htslib/khash_str2int.h>
 #include "bam2bcf.h"
-#include "kprobaln.h"
-#include "htslib/khash.h"
-KHASH_SET_INIT_STR(rg)
 
-#include "htslib/ksort.h"
+#include <htslib/ksort.h>
 KSORT_INIT_GENERIC(uint32_t)
 
 #define MINUS_CONST 0x10000000
 #define INDEL_WINDOW_SIZE 50
 
-void *bcf_call_add_rg(void *_hash, const char *hdtext, const char *list)
-{
-    const char *s, *p, *q, *r, *t;
-    khash_t(rg) *hash;
-    if (list == 0 || hdtext == 0) return _hash;
-    if (_hash == 0) _hash = kh_init(rg);
-    hash = (khash_t(rg)*)_hash;
-    if ((s = strstr(hdtext, "@RG\t")) == 0) return hash;
-    do {
-        t = strstr(s + 4, "@RG\t"); // the next @RG
-        if ((p = strstr(s, "\tID:")) != 0) p += 4;
-        if ((q = strstr(s, "\tPL:")) != 0) q += 4;
-        if (p && q && (t == 0 || (p < t && q < t))) { // ID and PL are both present
-            int lp, lq;
-            char *x;
-            for (r = p; *r && *r != '\t' && *r != '\n'; ++r) { }
-            lp = r - p;
-            for (r = q; *r && *r != '\t' && *r != '\n'; ++r) { }
-            lq = r - q;
-            x = calloc((lp > lq? lp : lq) + 1, 1);
-            for (r = q; *r && *r != '\t' && *r != '\n'; ++r) x[r-q] = *r;
-            if (strstr(list, x)) { // insert ID to the hash table
-                khint_t k;
-                int ret;
-                for (r = p; *r && *r != '\t' && *r != '\n'; ++r) x[r-p] = *r;
-                x[r-p] = 0;
-                k = kh_get(rg, hash, x);
-                if (k == kh_end(hash)) k = kh_put(rg, hash, x, &ret);
-                else free(x);
-            } else free(x);
-        }
-        s = t;
-    } while (s);
-    return hash;
-}
-
-void bcf_call_del_rghash(void *_hash)
-{
-    khint_t k;
-    khash_t(rg) *hash = (khash_t(rg)*)_hash;
-    if (hash == 0) return;
-    for (k = kh_begin(hash); k < kh_end(hash); ++k)
-        if (kh_exist(hash, k))
-            free((char*)kh_key(hash, k));
-    kh_destroy(rg, hash);
-}
-
 static int tpos2qpos(const bam1_core_t *c, const uint32_t *cigar, int32_t tpos, int is_left, int32_t *_tpos)
 {
     int k, x = c->pos, y = 0, last_y = 0;
@@ -148,30 +98,13 @@ static inline int est_indelreg(int pos, const char *ref, int l, char *ins4)
             - 8: estimated sequence quality                     .. (aux>>8)&0xff
             - 8: indel quality                                  .. aux&0xff
  */
-int bcf_call_gap_prep(int n, int *n_plp, bam_pileup1_t **plp, int pos, bcf_callaux_t *bca, const char *ref,
-                      const void *rghash)
+int bcf_call_gap_prep(int n, int *n_plp, bam_pileup1_t **plp, int pos, bcf_callaux_t *bca, const char *ref)
 {
     int i, s, j, k, t, n_types, *types, max_rd_len, left, right, max_ins, *score1, *score2, max_ref2;
     int N, K, l_run, ref_type, n_alt;
     char *inscns = 0, *ref2, *query, **ref_sample;
-    khash_t(rg) *hash = (khash_t(rg)*)rghash;
     if (ref == 0 || bca == 0) return -1;
-    // mark filtered reads
-    if (rghash) {
-        N = 0;
-        for (s = N = 0; s < n; ++s) {
-            for (i = 0; i < n_plp[s]; ++i) {
-                bam_pileup1_t *p = plp[s] + i;
-                const uint8_t *rg = bam_aux_get(p->b, "RG");
-                p->aux = 1; // filtered by default
-                if (rg) {
-                    khint_t k = kh_get(rg, hash, (const char*)(rg + 1));
-                    if (k != kh_end(hash)) p->aux = 0, ++N; // not filtered
-                }
-            }
-        }
-        if (N == 0) return -1; // no reads left
-    }
+
     // determine if there is a gap
     for (s = N = 0; s < n; ++s) {
         for (i = 0; i < n_plp[s]; ++i)
@@ -184,19 +117,17 @@ int bcf_call_gap_prep(int n, int *n_plp, bam_pileup1_t **plp, int pos, bcf_calla
         bca->max_support = bca->max_frac = 0;
         int m, n_alt = 0, n_tot = 0, indel_support_ok = 0;
         uint32_t *aux;
-        aux = calloc(N + 1, 4);
+        aux = (uint32_t*) calloc(N + 1, 4);
         m = max_rd_len = 0;
         aux[m++] = MINUS_CONST; // zero indel is always a type
         for (s = 0; s < n; ++s) {
             int na = 0, nt = 0;
             for (i = 0; i < n_plp[s]; ++i) {
                 const bam_pileup1_t *p = plp[s] + i;
-                if (rghash == 0 || p->aux == 0) {
-                    ++nt;
-                    if (p->indel != 0) {
-                        ++na;
-                        aux[m++] = MINUS_CONST + p->indel;
-                    }
+                ++nt;
+                if (p->indel != 0) {
+                    ++na;
+                    aux[m++] = MINUS_CONST + p->indel;
                 }
                 j = bam_cigar2qlen(p->b->core.n_cigar, bam_get_cigar(p->b));
                 if (j > max_rd_len) max_rd_len = j;
@@ -262,13 +193,13 @@ int bcf_call_gap_prep(int n, int *n_plp, bam_pileup1_t **plp, int pos, bcf_calla
         int L = right - left + 1, max_i, max2_i;
         uint32_t *cns, max, max2;
         char *ref0, *r;
-        ref_sample = calloc(n, sizeof(char*));
-        cns = calloc(L, 4);
-        ref0 = calloc(L, 1);
+        ref_sample = (char**) calloc(n, sizeof(char*));
+        cns = (uint32_t*) calloc(L, 4);
+        ref0 = (char*) calloc(L, 1);
         for (i = 0; i < right - left; ++i)
             ref0[i] = seq_nt16_table[(int)ref[i+left]];
         for (s = 0; s < n; ++s) {
-            r = ref_sample[s] = calloc(L, 1);
+            r = ref_sample[s] = (char*) calloc(L, 1);
             memset(cns, 0, sizeof(int) * L);
             // collect ref and non-ref counts
             for (i = 0; i < n_plp[s]; ++i) {
@@ -319,7 +250,7 @@ int bcf_call_gap_prep(int n, int *n_plp, bam_pileup1_t **plp, int pos, bcf_calla
     // construct the consensus sequence
     max_ins = types[n_types - 1];   // max_ins is at least 0
     if (max_ins > 0) {
-        int *inscns_aux = calloc(5 * n_types * max_ins, sizeof(int));
+        int *inscns_aux = (int*) calloc(5 * n_types * max_ins, sizeof(int));
         // count the number of occurrences of each base at each position for each type of insertion
         for (t = 0; t < n_types; ++t) {
             if (types[t] > 0) {
@@ -339,7 +270,7 @@ int bcf_call_gap_prep(int n, int *n_plp, bam_pileup1_t **plp, int pos, bcf_calla
             }
         }
         // use the majority rule to construct the consensus
-        inscns = calloc(n_types * max_ins, 1);
+        inscns = (char*) calloc(n_types * max_ins, 1);
         for (t = 0; t < n_types; ++t) {
             for (j = 0; j < types[t]; ++j) {
                 int max = 0, max_k = -1, *ia = &inscns_aux[(t*max_ins+j)*5];
@@ -354,14 +285,14 @@ int bcf_call_gap_prep(int n, int *n_plp, bam_pileup1_t **plp, int pos, bcf_calla
     }
     // compute the likelihood given each type of indel for each read
     max_ref2 = right - left + 2 + 2 * (max_ins > -types[0]? max_ins : -types[0]);
-    ref2  = calloc(max_ref2, 1);
-    query = calloc(right - left + max_rd_len + max_ins + 2, 1);
-    score1 = calloc(N * n_types, sizeof(int));
-    score2 = calloc(N * n_types, sizeof(int));
+    ref2  = (char*) calloc(max_ref2, 1);
+    query = (char*) calloc(right - left + max_rd_len + max_ins + 2, 1);
+    score1 = (int*) calloc(N * n_types, sizeof(int));
+    score2 = (int*) calloc(N * n_types, sizeof(int));
     bca->indelreg = 0;
     for (t = 0; t < n_types; ++t) {
         int l, ir;
-        kpa_par_t apf1 = { 1e-4, 1e-2, 10 }, apf2 = { 1e-6, 1e-3, 10 };
+        probaln_par_t apf1 = { 1e-4, 1e-2, 10 }, apf2 = { 1e-6, 1e-3, 10 };
         apf1.bw = apf2.bw = abs(types[t]) + 3;
         // compute indelreg
         if (types[t] == 0) ir = 0;
@@ -406,7 +337,7 @@ int bcf_call_gap_prep(int n, int *n_plp, bam_pileup1_t **plp, int pos, bcf_calla
                 { // do realignment; this is the bottleneck
                     const uint8_t *qual = bam_get_qual(p->b), *bq;
                     uint8_t *qq;
-                    qq = calloc(qend - qbeg, 1);
+                    qq = (uint8_t*) calloc(qend - qbeg, 1);
                     bq = (uint8_t*)bam_aux_get(p->b, "ZQ");
                     if (bq) ++bq; // skip type
                     for (l = qbeg; l < qend; ++l) {
@@ -414,14 +345,14 @@ int bcf_call_gap_prep(int n, int *n_plp, bam_pileup1_t **plp, int pos, bcf_calla
                         if (qq[l - qbeg] > 30) qq[l - qbeg] = 30;
                         if (qq[l - qbeg] < 7) qq[l - qbeg] = 7;
                     }
-                    sc = kpa_glocal((uint8_t*)ref2 + tbeg - left, tend - tbeg + abs(types[t]),
-                                    (uint8_t*)query, qend - qbeg, qq, &apf1, 0, 0);
+                    sc = probaln_glocal((uint8_t*)ref2 + tbeg - left, tend - tbeg + abs(types[t]),
+                                        (uint8_t*)query, qend - qbeg, qq, &apf1, 0, 0);
                     l = (int)(100. * sc / (qend - qbeg) + .499); // used for adjusting indelQ below
                     if (l > 255) l = 255;
                     score1[K*n_types + t] = score2[K*n_types + t] = sc<<8 | l;
                     if (sc > 5) {
-                        sc = kpa_glocal((uint8_t*)ref2 + tbeg - left, tend - tbeg + abs(types[t]),
-                                        (uint8_t*)query, qend - qbeg, qq, &apf2, 0, 0);
+                        sc = probaln_glocal((uint8_t*)ref2 + tbeg - left, tend - tbeg + abs(types[t]),
+                                            (uint8_t*)query, qend - qbeg, qq, &apf2, 0, 0);
                         l = (int)(100. * sc / (qend - qbeg) + .499);
                         if (l > 255) l = 255;
                         score2[K*n_types + t] = sc<<8 | l;
@@ -441,10 +372,13 @@ int bcf_call_gap_prep(int n, int *n_plp, bam_pileup1_t **plp, int pos, bcf_calla
     }
     free(ref2); free(query);
     { // compute indelQ
-        int *sc, tmp, *sumq;
-        sc   = alloca(n_types * sizeof(int));
-        sumq = alloca(n_types * sizeof(int));
-        memset(sumq, 0, sizeof(int) * n_types);
+        int sc_a[16], sumq_a[16];
+        int tmp, *sc = sc_a, *sumq = sumq_a;
+        if (n_types > 16) {
+            sc   = (int *)malloc(n_types * sizeof(int));
+            sumq = (int *)malloc(n_types * sizeof(int));
+        }
+        memset(sumq, 0, n_types * sizeof(int));
         for (s = K = 0; s < n; ++s) {
             for (i = 0; i < n_plp[s]; ++i, ++K) {
                 bam_pileup1_t *p = plp[s] + i;
@@ -495,7 +429,7 @@ int bcf_call_gap_prep(int n, int *n_plp, bam_pileup1_t **plp, int pos, bcf_calla
         }
         // determine bca->indel_types[] and bca->inscns
         bca->maxins = max_ins;
-        bca->inscns = realloc(bca->inscns, bca->maxins * 4);
+        bca->inscns = (char*) realloc(bca->inscns, bca->maxins * 4);
         for (t = 0; t < n_types; ++t)
             sumq[t] = sumq[t]<<6 | t;
         for (t = 1; t < n_types; ++t) // insertion sort
@@ -525,6 +459,9 @@ int bcf_call_gap_prep(int n, int *n_plp, bam_pileup1_t **plp, int pos, bcf_calla
                 //fprintf(pysam_stderr, "X pos=%d read=%d:%d name=%s call=%d type=%d seqQ=%d indelQ=%d\n", pos, s, i, bam1_qname(p->b), (p->aux>>16)&0x3f, bca->indel_types[(p->aux>>16)&0x3f], (p->aux>>8)&0xff, p->aux&0xff);
             }
         }
+
+        if (sc   != sc_a)   free(sc);
+        if (sumq != sumq_a) free(sumq);
     }
     free(score1); free(score2);
     // free
diff --git a/bcftools/bam_sample.c b/bcftools/bam_sample.c
new file mode 100644
index 0000000..66f5729
--- /dev/null
+++ b/bcftools/bam_sample.c
@@ -0,0 +1,393 @@
+/*  bam_sample.c -- group data by sample.
+
+    Copyright (C) 2010, 2011 Broad Institute.
+    Copyright (C) 2013, 2016 Genome Research Ltd.
+
+    Author: Heng Li <lh3 at sanger.ac.uk>, Petr Danecek <pd3 at sanger.ac.uk>
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+DEALINGS IN THE SOFTWARE.  */
+
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+#include <htslib/hts.h>
+#include <htslib/kstring.h>
+#include <htslib/khash_str2int.h>
+#include <khash_str2str.h>
+#include "bam_sample.h"
+#include "bcftools.h"
+
+
+typedef struct
+{
+    char *fname;
+    void *rg2idx;       // hash: read group name to BCF output sample index. Maintained by bsmpl_add_readgroup
+    int default_idx;    // default BCF output sample index, set only when all readgroups are treated as one sample
+}
+file_t;
+
+struct _bam_smpl_t
+{
+    kstring_t tmp;
+    file_t *files;
+    int ignore_rg, nsmpl, nfiles;
+    char **smpl;        // list of BCF output sample names. Maintained by bsmpl_add_readgroup
+    void *sample_list;  // hash: BAM input sample name to BCF output sample name. This is the -s/-S list
+    int sample_logic;   // the -s/-S logic, 1: include, 0: exclude
+    void *rg_list;      // hash: BAM/rg_id to sample name or */rg_id for global ids. This is the -G list
+    int rg_logic;       // the -G logic, 1: include, 0: exclude
+    void *name2idx;     // hash: BCF output sample name to BCF output sample index. Maintained by bsmpl_add_readgroup
+};
+
+bam_smpl_t *bam_smpl_init(void)
+{
+    bam_smpl_t *bsmpl;
+    bsmpl = (bam_smpl_t*) calloc(1, sizeof(bam_smpl_t));
+    bsmpl->name2idx = khash_str2int_init();
+    return bsmpl;
+}
+
+void bam_smpl_destroy(bam_smpl_t *bsmpl)
+{
+    if ( !bsmpl ) return;
+    if ( bsmpl->name2idx ) khash_str2int_destroy_free(bsmpl->name2idx);
+    if ( bsmpl->sample_list ) khash_str2str_destroy_free_all(bsmpl->sample_list);
+    if ( bsmpl->rg_list ) khash_str2str_destroy_free_all(bsmpl->rg_list);
+    int i;
+    for (i=0; i<bsmpl->nfiles; i++)
+    {
+        file_t *file = &bsmpl->files[i];
+        if ( file->rg2idx ) khash_str2int_destroy_free(file->rg2idx);
+        free(file->fname);
+    }
+    free(bsmpl->smpl);
+    free(bsmpl->files);
+    free(bsmpl->tmp.s);
+    free(bsmpl);
+}
+
+void bam_smpl_ignore_readgroups(bam_smpl_t* bsmpl)
+{
+    bsmpl->ignore_rg = 1;
+}
+
+static void bsmpl_add_readgroup(bam_smpl_t *bsmpl, file_t *file, const char *rg_id, const char *smpl_name)
+{
+    int ismpl = -1;
+    if ( smpl_name )
+    {
+        if ( khash_str2int_get(bsmpl->name2idx,smpl_name,&ismpl) < 0 )
+        {
+            // new sample
+            bsmpl->nsmpl++;
+            bsmpl->smpl = (char**) realloc(bsmpl->smpl,sizeof(char*)*bsmpl->nsmpl);
+            bsmpl->smpl[bsmpl->nsmpl-1] = strdup(smpl_name);
+            ismpl = khash_str2int_inc(bsmpl->name2idx,bsmpl->smpl[bsmpl->nsmpl-1]);
+        }
+    }
+    if ( !strcmp("*",rg_id) )
+    {
+        // all read groups in the bam treated as the same sample
+        file->default_idx = ismpl;
+        return;
+    }
+    if ( !file->rg2idx ) file->rg2idx = khash_str2int_init();
+    if ( khash_str2int_has_key(file->rg2idx,rg_id) ) return;    // duplicate @RG:ID
+    khash_str2int_set(file->rg2idx, strdup(rg_id), ismpl);
+}
+static int bsmpl_keep_readgroup(bam_smpl_t *bsmpl, file_t *file, const char *rg_id, const char **smpl_name)
+{
+    char *rg_smpl = khash_str2str_get(bsmpl->rg_list,rg_id);    // unique read group present in one bam only
+    if ( !rg_smpl )
+    {
+        // read group specific to this bam
+        bsmpl->tmp.l = 0;
+        ksprintf(&bsmpl->tmp,"%s\t%s",rg_id,file->fname);
+        rg_smpl = khash_str2str_get(bsmpl->rg_list,bsmpl->tmp.s);
+    }
+    if ( !rg_smpl )
+    {
+        // any read group in this file?
+        bsmpl->tmp.l = 0;
+        ksprintf(&bsmpl->tmp,"*\t%s",file->fname);
+        rg_smpl = khash_str2str_get(bsmpl->rg_list,bsmpl->tmp.s);
+    }
+    if ( !rg_smpl && bsmpl->rg_logic ) return 0;
+    if ( rg_smpl && !bsmpl->rg_logic ) return 0;
+
+    if ( rg_smpl && rg_smpl[0]!='\t' ) *smpl_name = rg_smpl;    // rename the sample
+    return 1;
+}
+
+/*
+    The logic of this function is a bit complicated because we want to work
+    also with broken bams containing read groups that are not listed in the
+    header. The desired behavior is as follows:
+        - when -G is given, read groups which are not listed in the header must
+          be given explicitly using the "?" symbol in -G.
+          Otherwise:
+        - if the bam has no header, all reads in the file are assigned to a
+          single sample named after the file
+        - if there is at least one sample defined in the header, reads with no
+          read group id or with a read group id not listed in the header are
+          assigned to the first sample encountered in the header
+*/
+int bam_smpl_add_bam(bam_smpl_t *bsmpl, char *bam_hdr, const char *fname)
+{
+    bsmpl->nfiles++;
+    bsmpl->files = (file_t*) realloc(bsmpl->files,bsmpl->nfiles*sizeof(file_t));
+    file_t *file = &bsmpl->files[bsmpl->nfiles-1];
+    memset(file,0,sizeof(file_t));
+    file->fname  = strdup(fname);
+    file->default_idx = -1;
+
+    if ( bsmpl->ignore_rg || !bam_hdr )
+    {
+        // The option --ignore-RG is set or there is no BAM header: use the file name as the sample name
+        bsmpl_add_readgroup(bsmpl,file,"*",file->fname);
+        return bsmpl->nfiles-1;
+    }
+
+    void *bam_smpls = khash_str2int_init();
+    int first_smpl = -1, nskipped = 0;
+    const char *p = bam_hdr, *q, *r;
+    while ((q = strstr(p, "@RG")) != 0) 
+    {
+        p = q + 3;
+        r = q = 0;
+        if ((q = strstr(p, "\tID:")) != 0) q += 4;
+        if ((r = strstr(p, "\tSM:")) != 0) r += 4;
+        if (r && q)
+        {
+            char *u, *v;
+            int ioq, ior;
+            for (u = (char*)q; *u && *u != '\t' && *u != '\n'; ++u);
+            for (v = (char*)r; *v && *v != '\t' && *v != '\n'; ++v);
+            ioq = *u; ior = *v; *u = *v = '\0';
+
+            // q now points to a null terminated read group id
+            // r points to a null terminated sample name
+            if ( !strcmp("*",q) || !strcmp("?",q) )
+                error("Error: the read group IDs \"*\" and \"?\" have a special meaning in the mpileup code. Please fix the code or the bam: %s\n", fname);
+
+            int accept_rg = 1;
+            if ( bsmpl->sample_list )
+            {
+                // restrict samples based on the -s/-S options
+                char *name = khash_str2str_get(bsmpl->sample_list,r);
+                if ( bsmpl->sample_logic==0 )
+                    accept_rg = name ? 0 : 1;
+                else if ( !name )
+                    accept_rg = 0;
+                else
+                    r = name;
+            }
+            if ( accept_rg && bsmpl->rg_list )
+            {
+                // restrict readgroups based on the -G option, possibly renaming the sample
+                accept_rg = bsmpl_keep_readgroup(bsmpl,file,q,&r);
+            }
+            if ( accept_rg )
+                bsmpl_add_readgroup(bsmpl,file,q,r);
+            else
+            {
+                bsmpl_add_readgroup(bsmpl,file,q,NULL); // ignore this RG but note that it was seen in the header
+                nskipped++;
+            }
+
+            if ( first_smpl<0 )
+                khash_str2int_get(bsmpl->name2idx,r,&first_smpl);
+            if ( !khash_str2int_has_key(bam_smpls,r) )
+                khash_str2int_inc(bam_smpls,strdup(r));
+
+            *u = ioq; *v = ior;
+        }
+        else
+            break;
+        p = q > r ? q : r;
+    }
+    int nsmpls = khash_str2int_size(bam_smpls);
+    khash_str2int_destroy_free(bam_smpls);
+
+    const char *smpl_name = NULL;
+    int accept_null_rg = 1;
+    if ( bsmpl->rg_list && !bsmpl_keep_readgroup(bsmpl,file,"?",&smpl_name) ) accept_null_rg = 0;
+    if ( bsmpl->sample_list && first_smpl==-1 ) accept_null_rg = 0;
+
+    if ( !accept_null_rg && first_smpl==-1 )
+    {
+        // no suitable read group is available in this bam: ignore the whole file.
+        free(file->fname);
+        bsmpl->nfiles--;
+        return -1;
+    }
+    if ( !accept_null_rg ) return bsmpl->nfiles-1;
+    if ( nsmpls==1 && !nskipped )
+    {
+        file->default_idx = first_smpl;
+        return bsmpl->nfiles-1;
+    }
+    if ( !smpl_name ) smpl_name = first_smpl==-1 ? file->fname : bsmpl->smpl[first_smpl];
+
+    bsmpl_add_readgroup(bsmpl,file,"?",smpl_name);
+    return bsmpl->nfiles-1;
+}
+
+const char **bam_smpl_get_samples(bam_smpl_t *bsmpl, int *nsmpl)
+{
+    *nsmpl = bsmpl->nsmpl;
+    return (const char**)bsmpl->smpl;
+}
+
+int bam_smpl_get_sample_id(bam_smpl_t *bsmpl, int bam_id, bam1_t *bam_rec)
+{
+    file_t *file = &bsmpl->files[bam_id];
+    if ( file->default_idx >= 0 ) return file->default_idx;
+
+    char *aux_rg = (char*) bam_aux_get(bam_rec, "RG");
+    aux_rg = aux_rg ? aux_rg+1 : "?";
+
+    int rg_id;
+    if ( khash_str2int_get(file->rg2idx, aux_rg, &rg_id)==0 ) return rg_id;
+    if ( khash_str2int_get(file->rg2idx, "?", &rg_id)==0 ) return rg_id;
+    return -1;
+}
+
+int bam_smpl_add_samples(bam_smpl_t *bsmpl, char *list, int is_file)
+{
+    if ( list[0]!='^' ) bsmpl->sample_logic = 1;
+    else list++;
+
+    int i, nsamples = 0;
+    char **samples = hts_readlist(list, is_file, &nsamples);
+    if ( !nsamples ) return 0;
+
+    kstring_t ori = {0,0,0};
+    kstring_t ren = {0,0,0};
+
+    bsmpl->sample_list = khash_str2str_init();
+    for (i=0; i<nsamples; i++)
+    {
+        char *ptr = samples[i];
+        ori.l = ren.l = 0;
+        int escaped = 0;
+        while ( *ptr )
+        {
+            if ( *ptr=='\\' && !escaped ) { escaped = 1; ptr++; continue; }
+            if ( isspace(*ptr) && !escaped ) break;
+            kputc(*ptr, &ori);
+            escaped = 0;
+            ptr++;
+        }
+        if ( *ptr )
+        {
+            while ( *ptr && isspace(*ptr) ) ptr++;
+            while ( *ptr )
+            {
+                if ( *ptr=='\\' && !escaped ) { escaped = 1; ptr++; continue; }
+                if ( isspace(*ptr) && !escaped ) break;
+                kputc(*ptr, &ren);
+                escaped = 0;
+                ptr++;
+            }
+        }
+        khash_str2str_set(bsmpl->sample_list,strdup(ori.s),strdup(ren.l?ren.s:ori.s));
+        free(samples[i]);
+    }
+    free(samples);
+    free(ori.s);
+    free(ren.s);
+    return nsamples;
+}
+
+int bam_smpl_add_readgroups(bam_smpl_t *bsmpl, char *list, int is_file)
+{
+    if ( list[0]!='^' ) bsmpl->rg_logic = 1;
+    else list++;
+
+    int i, nrows  = 0;
+    char **rows = hts_readlist(list, is_file, &nrows);
+    if ( !nrows ) return 0;
+
+    kstring_t fld1 = {0,0,0};
+    kstring_t fld2 = {0,0,0};
+    kstring_t fld3 = {0,0,0};
+
+    bsmpl->rg_list = khash_str2str_init();
+    for (i=0; i<nrows; i++)
+    {
+        char *ptr = rows[i];
+        fld1.l = fld2.l = fld3.l = 0;
+        int escaped = 0;
+        while ( *ptr )
+        {
+            if ( *ptr=='\\' && !escaped ) { escaped = 1; ptr++; continue; }
+            if ( isspace(*ptr) && !escaped ) break;
+            kputc(*ptr, &fld1);
+            escaped = 0;
+            ptr++;
+        }
+        if ( *ptr )
+        {
+            while ( *ptr && isspace(*ptr) ) ptr++;
+            while ( *ptr )
+            {
+                if ( *ptr=='\\' && !escaped ) { escaped = 1; ptr++; continue; }
+                if ( isspace(*ptr) && !escaped ) break;
+                kputc(*ptr, &fld2);
+                escaped = 0;
+                ptr++;
+            }
+        }
+        if ( *ptr )
+        {
+            while ( *ptr && isspace(*ptr) ) ptr++;
+            while ( *ptr )
+            {
+                if ( *ptr=='\\' && !escaped ) { escaped = 1; ptr++; continue; }
+                if ( isspace(*ptr) && !escaped ) break;
+                kputc(*ptr, &fld3);
+                escaped = 0;
+                ptr++;
+            }
+        }
+        if ( fld3.l )
+        {
+            // ID FILE SAMPLE
+            kputc('\t',&fld1);
+            kputs(fld2.s,&fld1);
+            fld2.l = 0;
+            kputs(fld3.s,&fld2);
+        }
+        // fld2.s now contains a new sample name. If NULL, use \t to keep the bam header name
+        char *value = khash_str2str_get(bsmpl->rg_list,fld1.s);
+        if ( !value )
+            khash_str2str_set(bsmpl->rg_list,strdup(fld1.s),strdup(fld2.l?fld2.s:"\t"));
+        else if ( strcmp(value,fld2.l?fld2.s:"\t") )
+            error("Error: The read group \"%s\" was assigned to two different samples: \"%s\" and \"%s\"\n", fld1.s,value,fld2.l?fld2.s:"\t");
+        free(rows[i]);
+    }
+    free(rows);
+    free(fld1.s);
+    free(fld2.s);
+    free(fld3.s);
+    return nrows;
+}
+
+
diff --git a/bcftools/bam_sample.c.pysam.c b/bcftools/bam_sample.c.pysam.c
new file mode 100644
index 0000000..76d7a61
--- /dev/null
+++ b/bcftools/bam_sample.c.pysam.c
@@ -0,0 +1,395 @@
+#include "pysam.h"
+
+/*  bam_sample.c -- group data by sample.
+
+    Copyright (C) 2010, 2011 Broad Institute.
+    Copyright (C) 2013, 2016 Genome Research Ltd.
+
+    Author: Heng Li <lh3 at sanger.ac.uk>, Petr Danecek <pd3 at sanger.ac.uk>
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+DEALINGS IN THE SOFTWARE.  */
+
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+#include <htslib/hts.h>
+#include <htslib/kstring.h>
+#include <htslib/khash_str2int.h>
+#include <khash_str2str.h>
+#include "bam_sample.h"
+#include "bcftools.h"
+
+
+typedef struct
+{
+    char *fname;
+    void *rg2idx;       // hash: read group name to BCF output sample index. Maintained by bsmpl_add_readgroup
+    int default_idx;    // default BCF output sample index, set only when all readgroups are treated as one sample
+}
+file_t;
+
+struct _bam_smpl_t
+{
+    kstring_t tmp;
+    file_t *files;
+    int ignore_rg, nsmpl, nfiles;
+    char **smpl;        // list of BCF output sample names. Maintained by bsmpl_add_readgroup
+    void *sample_list;  // hash: BAM input sample name to BCF output sample name. This is the -s/-S list
+    int sample_logic;   // the -s/-S logic, 1: include, 0: exclude
+    void *rg_list;      // hash: BAM/rg_id to sample name or */rg_id for global ids. This is the -G list
+    int rg_logic;       // the -G logic, 1: include, 0: exclude
+    void *name2idx;     // hash: BCF output sample name to BCF output sample index. Maintained by bsmpl_add_readgroup
+};
+
+bam_smpl_t *bam_smpl_init(void)
+{
+    bam_smpl_t *bsmpl;
+    bsmpl = (bam_smpl_t*) calloc(1, sizeof(bam_smpl_t));
+    bsmpl->name2idx = khash_str2int_init();
+    return bsmpl;
+}
+
+void bam_smpl_destroy(bam_smpl_t *bsmpl)
+{
+    if ( !bsmpl ) return;
+    if ( bsmpl->name2idx ) khash_str2int_destroy_free(bsmpl->name2idx);
+    if ( bsmpl->sample_list ) khash_str2str_destroy_free_all(bsmpl->sample_list);
+    if ( bsmpl->rg_list ) khash_str2str_destroy_free_all(bsmpl->rg_list);
+    int i;
+    for (i=0; i<bsmpl->nfiles; i++)
+    {
+        file_t *file = &bsmpl->files[i];
+        if ( file->rg2idx ) khash_str2int_destroy_free(file->rg2idx);
+        free(file->fname);
+    }
+    free(bsmpl->smpl);
+    free(bsmpl->files);
+    free(bsmpl->tmp.s);
+    free(bsmpl);
+}
+
+void bam_smpl_ignore_readgroups(bam_smpl_t* bsmpl)
+{
+    bsmpl->ignore_rg = 1;
+}
+
+static void bsmpl_add_readgroup(bam_smpl_t *bsmpl, file_t *file, const char *rg_id, const char *smpl_name)
+{
+    int ismpl = -1;
+    if ( smpl_name )
+    {
+        if ( khash_str2int_get(bsmpl->name2idx,smpl_name,&ismpl) < 0 )
+        {
+            // new sample
+            bsmpl->nsmpl++;
+            bsmpl->smpl = (char**) realloc(bsmpl->smpl,sizeof(char*)*bsmpl->nsmpl);
+            bsmpl->smpl[bsmpl->nsmpl-1] = strdup(smpl_name);
+            ismpl = khash_str2int_inc(bsmpl->name2idx,bsmpl->smpl[bsmpl->nsmpl-1]);
+        }
+    }
+    if ( !strcmp("*",rg_id) )
+    {
+        // all read groups in the bam treated as the same sample
+        file->default_idx = ismpl;
+        return;
+    }
+    if ( !file->rg2idx ) file->rg2idx = khash_str2int_init();
+    if ( khash_str2int_has_key(file->rg2idx,rg_id) ) return;    // duplicate @RG:ID
+    khash_str2int_set(file->rg2idx, strdup(rg_id), ismpl);
+}
+static int bsmpl_keep_readgroup(bam_smpl_t *bsmpl, file_t *file, const char *rg_id, const char **smpl_name)
+{
+    char *rg_smpl = khash_str2str_get(bsmpl->rg_list,rg_id);    // unique read group present in one bam only
+    if ( !rg_smpl )
+    {
+        // read group specific to this bam
+        bsmpl->tmp.l = 0;
+        ksprintf(&bsmpl->tmp,"%s\t%s",rg_id,file->fname);
+        rg_smpl = khash_str2str_get(bsmpl->rg_list,bsmpl->tmp.s);
+    }
+    if ( !rg_smpl )
+    {
+        // any read group in this file?
+        bsmpl->tmp.l = 0;
+        ksprintf(&bsmpl->tmp,"*\t%s",file->fname);
+        rg_smpl = khash_str2str_get(bsmpl->rg_list,bsmpl->tmp.s);
+    }
+    if ( !rg_smpl && bsmpl->rg_logic ) return 0;
+    if ( rg_smpl && !bsmpl->rg_logic ) return 0;
+
+    if ( rg_smpl && rg_smpl[0]!='\t' ) *smpl_name = rg_smpl;    // rename the sample
+    return 1;
+}
+
+/*
+    The logic of this function is a bit complicated because we want to work
+    also with broken bams containing read groups that are not listed in the
+    header. The desired behavior is as follows:
+        - when -G is given, read groups which are not listed in the header must
+          be given explicitly using the "?" symbol in -G.
+          Otherwise:
+        - if the bam has no header, all reads in the file are assigned to a
+          single sample named after the file
+        - if there is at least one sample defined in the header, reads with no
+          read group id or with a read group id not listed in the header are
+          assigned to the first sample encountered in the header
+*/
+int bam_smpl_add_bam(bam_smpl_t *bsmpl, char *bam_hdr, const char *fname)
+{
+    bsmpl->nfiles++;
+    bsmpl->files = (file_t*) realloc(bsmpl->files,bsmpl->nfiles*sizeof(file_t));
+    file_t *file = &bsmpl->files[bsmpl->nfiles-1];
+    memset(file,0,sizeof(file_t));
+    file->fname  = strdup(fname);
+    file->default_idx = -1;
+
+    if ( bsmpl->ignore_rg || !bam_hdr )
+    {
+        // The option --ignore-RG is set or there is no BAM header: use the file name as the sample name
+        bsmpl_add_readgroup(bsmpl,file,"*",file->fname);
+        return bsmpl->nfiles-1;
+    }
+
+    void *bam_smpls = khash_str2int_init();
+    int first_smpl = -1, nskipped = 0;
+    const char *p = bam_hdr, *q, *r;
+    while ((q = strstr(p, "@RG")) != 0) 
+    {
+        p = q + 3;
+        r = q = 0;
+        if ((q = strstr(p, "\tID:")) != 0) q += 4;
+        if ((r = strstr(p, "\tSM:")) != 0) r += 4;
+        if (r && q)
+        {
+            char *u, *v;
+            int ioq, ior;
+            for (u = (char*)q; *u && *u != '\t' && *u != '\n'; ++u);
+            for (v = (char*)r; *v && *v != '\t' && *v != '\n'; ++v);
+            ioq = *u; ior = *v; *u = *v = '\0';
+
+            // q now points to a null terminated read group id
+            // r points to a null terminated sample name
+            if ( !strcmp("*",q) || !strcmp("?",q) )
+                error("Error: the read group IDs \"*\" and \"?\" have a special meaning in the mpileup code. Please fix the code or the bam: %s\n", fname);
+
+            int accept_rg = 1;
+            if ( bsmpl->sample_list )
+            {
+                // restrict samples based on the -s/-S options
+                char *name = khash_str2str_get(bsmpl->sample_list,r);
+                if ( bsmpl->sample_logic==0 )
+                    accept_rg = name ? 0 : 1;
+                else if ( !name )
+                    accept_rg = 0;
+                else
+                    r = name;
+            }
+            if ( accept_rg && bsmpl->rg_list )
+            {
+                // restrict readgroups based on the -G option, possibly renaming the sample
+                accept_rg = bsmpl_keep_readgroup(bsmpl,file,q,&r);
+            }
+            if ( accept_rg )
+                bsmpl_add_readgroup(bsmpl,file,q,r);
+            else
+            {
+                bsmpl_add_readgroup(bsmpl,file,q,NULL); // ignore this RG but note that it was seen in the header
+                nskipped++;
+            }
+
+            if ( first_smpl<0 )
+                khash_str2int_get(bsmpl->name2idx,r,&first_smpl);
+            if ( !khash_str2int_has_key(bam_smpls,r) )
+                khash_str2int_inc(bam_smpls,strdup(r));
+
+            *u = ioq; *v = ior;
+        }
+        else
+            break;
+        p = q > r ? q : r;
+    }
+    int nsmpls = khash_str2int_size(bam_smpls);
+    khash_str2int_destroy_free(bam_smpls);
+
+    const char *smpl_name = NULL;
+    int accept_null_rg = 1;
+    if ( bsmpl->rg_list && !bsmpl_keep_readgroup(bsmpl,file,"?",&smpl_name) ) accept_null_rg = 0;
+    if ( bsmpl->sample_list && first_smpl==-1 ) accept_null_rg = 0;
+
+    if ( !accept_null_rg && first_smpl==-1 )
+    {
+        // no suitable read group is available in this bam: ignore the whole file.
+        free(file->fname);
+        bsmpl->nfiles--;
+        return -1;
+    }
+    if ( !accept_null_rg ) return bsmpl->nfiles-1;
+    if ( nsmpls==1 && !nskipped )
+    {
+        file->default_idx = first_smpl;
+        return bsmpl->nfiles-1;
+    }
+    if ( !smpl_name ) smpl_name = first_smpl==-1 ? file->fname : bsmpl->smpl[first_smpl];
+
+    bsmpl_add_readgroup(bsmpl,file,"?",smpl_name);
+    return bsmpl->nfiles-1;
+}
+
+const char **bam_smpl_get_samples(bam_smpl_t *bsmpl, int *nsmpl)
+{
+    *nsmpl = bsmpl->nsmpl;
+    return (const char**)bsmpl->smpl;
+}
+
+int bam_smpl_get_sample_id(bam_smpl_t *bsmpl, int bam_id, bam1_t *bam_rec)
+{
+    file_t *file = &bsmpl->files[bam_id];
+    if ( file->default_idx >= 0 ) return file->default_idx;
+
+    char *aux_rg = (char*) bam_aux_get(bam_rec, "RG");
+    aux_rg = aux_rg ? aux_rg+1 : "?";
+
+    int rg_id;
+    if ( khash_str2int_get(file->rg2idx, aux_rg, &rg_id)==0 ) return rg_id;
+    if ( khash_str2int_get(file->rg2idx, "?", &rg_id)==0 ) return rg_id;
+    return -1;
+}
+
+int bam_smpl_add_samples(bam_smpl_t *bsmpl, char *list, int is_file)
+{
+    if ( list[0]!='^' ) bsmpl->sample_logic = 1;
+    else list++;
+
+    int i, nsamples = 0;
+    char **samples = hts_readlist(list, is_file, &nsamples);
+    if ( !nsamples ) return 0;
+
+    kstring_t ori = {0,0,0};
+    kstring_t ren = {0,0,0};
+
+    bsmpl->sample_list = khash_str2str_init();
+    for (i=0; i<nsamples; i++)
+    {
+        char *ptr = samples[i];
+        ori.l = ren.l = 0;
+        int escaped = 0;
+        while ( *ptr )
+        {
+            if ( *ptr=='\\' && !escaped ) { escaped = 1; ptr++; continue; }
+            if ( isspace(*ptr) && !escaped ) break;
+            kputc(*ptr, &ori);
+            escaped = 0;
+            ptr++;
+        }
+        if ( *ptr )
+        {
+            while ( *ptr && isspace(*ptr) ) ptr++;
+            while ( *ptr )
+            {
+                if ( *ptr=='\\' && !escaped ) { escaped = 1; ptr++; continue; }
+                if ( isspace(*ptr) && !escaped ) break;
+                kputc(*ptr, &ren);
+                escaped = 0;
+                ptr++;
+            }
+        }
+        khash_str2str_set(bsmpl->sample_list,strdup(ori.s),strdup(ren.l?ren.s:ori.s));
+        free(samples[i]);
+    }
+    free(samples);
+    free(ori.s);
+    free(ren.s);
+    return nsamples;
+}
+
+int bam_smpl_add_readgroups(bam_smpl_t *bsmpl, char *list, int is_file)
+{
+    if ( list[0]!='^' ) bsmpl->rg_logic = 1;
+    else list++;
+
+    int i, nrows  = 0;
+    char **rows = hts_readlist(list, is_file, &nrows);
+    if ( !nrows ) return 0;
+
+    kstring_t fld1 = {0,0,0};
+    kstring_t fld2 = {0,0,0};
+    kstring_t fld3 = {0,0,0};
+
+    bsmpl->rg_list = khash_str2str_init();
+    for (i=0; i<nrows; i++)
+    {
+        char *ptr = rows[i];
+        fld1.l = fld2.l = fld3.l = 0;
+        int escaped = 0;
+        while ( *ptr )
+        {
+            if ( *ptr=='\\' && !escaped ) { escaped = 1; ptr++; continue; }
+            if ( isspace(*ptr) && !escaped ) break;
+            kputc(*ptr, &fld1);
+            escaped = 0;
+            ptr++;
+        }
+        if ( *ptr )
+        {
+            while ( *ptr && isspace(*ptr) ) ptr++;
+            while ( *ptr )
+            {
+                if ( *ptr=='\\' && !escaped ) { escaped = 1; ptr++; continue; }
+                if ( isspace(*ptr) && !escaped ) break;
+                kputc(*ptr, &fld2);
+                escaped = 0;
+                ptr++;
+            }
+        }
+        if ( *ptr )
+        {
+            while ( *ptr && isspace(*ptr) ) ptr++;
+            while ( *ptr )
+            {
+                if ( *ptr=='\\' && !escaped ) { escaped = 1; ptr++; continue; }
+                if ( isspace(*ptr) && !escaped ) break;
+                kputc(*ptr, &fld3);
+                escaped = 0;
+                ptr++;
+            }
+        }
+        if ( fld3.l )
+        {
+            // ID FILE SAMPLE
+            kputc('\t',&fld1);
+            kputs(fld2.s,&fld1);
+            fld2.l = 0;
+            kputs(fld3.s,&fld2);
+        }
+        // fld2.s now contains a new sample name. If NULL, use \t to keep the bam header name
+        char *value = khash_str2str_get(bsmpl->rg_list,fld1.s);
+        if ( !value )
+            khash_str2str_set(bsmpl->rg_list,strdup(fld1.s),strdup(fld2.l?fld2.s:"\t"));
+        else if ( strcmp(value,fld2.l?fld2.s:"\t") )
+            error("Error: The read group \"%s\" was assigned to two different samples: \"%s\" and \"%s\"\n", fld1.s,value,fld2.l?fld2.s:"\t");
+        free(rows[i]);
+    }
+    free(rows);
+    free(fld1.s);
+    free(fld2.s);
+    free(fld3.s);
+    return nrows;
+}
+
+
diff --git a/samtools/errmod.h b/bcftools/bam_sample.h
similarity index 53%
rename from samtools/errmod.h
rename to bcftools/bam_sample.h
index 6db46f4..5cbcc39 100644
--- a/samtools/errmod.h
+++ b/bcftools/bam_sample.h
@@ -1,9 +1,9 @@
-/*  errmod.h -- revised MAQ error model.
+/*  bam_sample.h -- group data by sample.
 
     Copyright (C) 2010 Broad Institute.
-    Copyright (C) 2012 Genome Research Ltd.
+    Copyright (C) 2016 Genome Research Ltd.
 
-    Author: Heng Li <lh3 at sanger.ac.uk>
+    Author: Heng Li <lh3 at sanger.ac.uk>, Petr Danecek <pd3 at sanger.ac.uk>
 
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
@@ -23,27 +23,28 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 DEALINGS IN THE SOFTWARE.  */
 
-#ifndef ERRMOD_H
-#define ERRMOD_H
+#ifndef BAM_SAMPLE_H
+#define BAM_SAMPLE_H
 
-#include <stdint.h>
+#include <htslib/sam.h>
 
-struct __errmod_coef_t;
+typedef struct _bam_smpl_t bam_smpl_t;
 
-typedef struct {
-    double depcorr;
-    struct __errmod_coef_t *coef;
-} errmod_t;
+bam_smpl_t *bam_smpl_init(void);
 
-errmod_t *errmod_init(double depcorr);
-void errmod_destroy(errmod_t *em);
+int bam_smpl_add_samples(bam_smpl_t *bsmpl, char *list, int is_file);
+int bam_smpl_add_readgroups(bam_smpl_t *bsmpl, char *list, int is_file);
+void bam_smpl_ignore_readgroups(bam_smpl_t* bsmpl);
 
-/*
-    n: number of bases
-    m: maximum base
-    bases[i]: qual:6, strand:1, base:4
-    q[i*m+j]: phred-scaled likelihood of (i,j)
- */
-int errmod_cal(const errmod_t *em, int n, int m, uint16_t *bases, float *q);
+// The above should be called only before bams are added. Returns the BAM id
+// to be passed to bam_smpl_get_sample_id() later. It is safe to assume
+// sequential numbering, starting from 0.
+//
+int bam_smpl_add_bam(bam_smpl_t *bsmpl, char *bam_hdr, const char *fname);
+
+const char **bam_smpl_get_samples(bam_smpl_t *bsmpl, int *nsmpl);
+int bam_smpl_get_sample_id(bam_smpl_t *bsmpl, int bam_id, bam1_t *bam_rec);
+
+void bam_smpl_destroy(bam_smpl_t *bsmpl);
 
 #endif
diff --git a/bcftools/bcftools.h b/bcftools/bcftools.h
index d4e856d..7d2d49f 100644
--- a/bcftools/bcftools.h
+++ b/bcftools/bcftools.h
@@ -30,6 +30,7 @@ THE SOFTWARE.  */
 #include <htslib/vcf.h>
 #include <math.h>
 
+#define FT_TAB_TEXT 0       // custom tab-delimited text file
 #define FT_GZ 1
 #define FT_VCF 2
 #define FT_VCF_GZ (FT_GZ|FT_VCF)
diff --git a/bcftools/bin.c b/bcftools/bin.c
new file mode 100644
index 0000000..b558b20
--- /dev/null
+++ b/bcftools/bin.c
@@ -0,0 +1,104 @@
+/* The MIT License
+
+   Copyright (c) 2016 Genome Research Ltd.
+
+   Author: Petr Danecek <pd3 at sanger.ac.uk>
+   
+   Permission is hereby granted, free of charge, to any person obtaining a copy
+   of this software and associated documentation files (the "Software"), to deal
+   in the Software without restriction, including without limitation the rights
+   to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+   copies of the Software, and to permit persons to whom the Software is
+   furnished to do so, subject to the following conditions:
+   
+   The above copyright notice and this permission notice shall be included in
+   all copies or substantial portions of the Software.
+   
+   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+   IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+   AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+   LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+   THE SOFTWARE.
+
+ */
+
+#include <stdio.h>
+#include "bcftools.h"
+#include "bin.h"
+
+struct _bin_t
+{
+    float *bins;
+    int nbins;
+};
+
+bin_t *bin_init(const char *list_def, float min, float max)
+{
+    bin_t *bin = (bin_t*) calloc(1,sizeof(bin_t));
+
+    // a comma indicates a list, otherwise a file
+    int is_file = strchr(list_def,',') ? 0 : 1;
+    int i, nlist;
+    char **list = hts_readlist(list_def, is_file, &nlist);
+    bin->nbins = nlist;
+    bin->bins  = (float*) malloc(sizeof(float)*nlist);
+    for (i=0; i<nlist; i++)
+    {
+        char *tmp;
+        bin->bins[i] = strtod(list[i],&tmp);
+        if ( !tmp ) error("Could not parse %s: %s\n", list_def, list[i]);
+        if ( min!=max && (bin->bins[i]<min || bin->bins[i]>max) )
+            error("Expected values from the interval [%f,%f], found %s\n", list[i]);
+        free(list[i]); 
+    }
+    free(list);
+
+    if ( min!=max )
+    {
+        // make sure we've got both boundaries: min,max.
+        assert( nlist>1 );
+        float max_err = (bin->bins[1] - bin->bins[0])*1e-6;
+        if ( fabs(bin->bins[0] - min) > max_err )
+        {
+            bin->bins = (float*) realloc(bin->bins, (++bin->nbins)*sizeof(float));
+            memmove(bin->bins+1, bin->bins, sizeof(float)*(bin->nbins-1));
+            bin->bins[0] = min;
+        }
+        if ( fabs(bin->bins[bin->nbins-1] - max) > max_err ) 
+        {
+            bin->bins = (float*) realloc(bin->bins, (++bin->nbins)*sizeof(float));
+            bin->bins[bin->nbins-1] = max;
+        }
+    }
+    return bin;
+}
+
+void bin_destroy(bin_t *bin)
+{
+    free(bin->bins);
+    free(bin);
+}
+
+int bin_get_size(bin_t *bin) { return bin->nbins; }
+
+float bin_get_value(bin_t *bin, int idx) { return bin->bins[idx]; }
+
+int bin_get_idx(bin_t *bin, float value)
+{
+    if ( bin->bins[bin->nbins-1] < value ) return bin->nbins-1;
+
+    // Binary search in half-closed,half-open intervals [)
+    int imin = 0, imax = bin->nbins - 2;
+    while ( imin<imax )
+    {
+        int i = (imin+imax)/2;
+        if ( value < bin->bins[i] ) imax = i - 1;
+        else if ( value > bin->bins[i] ) imin = i + 1;
+        else return i;
+    }
+    if ( bin->bins[imax] <= value ) return imax;
+    return imin - 1;
+}
+
diff --git a/bcftools/bin.c.pysam.c b/bcftools/bin.c.pysam.c
new file mode 100644
index 0000000..6469b57
--- /dev/null
+++ b/bcftools/bin.c.pysam.c
@@ -0,0 +1,106 @@
+#include "pysam.h"
+
+/* The MIT License
+
+   Copyright (c) 2016 Genome Research Ltd.
+
+   Author: Petr Danecek <pd3 at sanger.ac.uk>
+   
+   Permission is hereby granted, free of charge, to any person obtaining a copy
+   of this software and associated documentation files (the "Software"), to deal
+   in the Software without restriction, including without limitation the rights
+   to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+   copies of the Software, and to permit persons to whom the Software is
+   furnished to do so, subject to the following conditions:
+   
+   The above copyright notice and this permission notice shall be included in
+   all copies or substantial portions of the Software.
+   
+   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+   IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+   AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+   LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+   THE SOFTWARE.
+
+ */
+
+#include <stdio.h>
+#include "bcftools.h"
+#include "bin.h"
+
+struct _bin_t
+{
+    float *bins;
+    int nbins;
+};
+
+bin_t *bin_init(const char *list_def, float min, float max)
+{
+    bin_t *bin = (bin_t*) calloc(1,sizeof(bin_t));
+
+    // a comma indicates a list, otherwise a file
+    int is_file = strchr(list_def,',') ? 0 : 1;
+    int i, nlist;
+    char **list = hts_readlist(list_def, is_file, &nlist);
+    bin->nbins = nlist;
+    bin->bins  = (float*) malloc(sizeof(float)*nlist);
+    for (i=0; i<nlist; i++)
+    {
+        char *tmp;
+        bin->bins[i] = strtod(list[i],&tmp);
+        if ( !tmp ) error("Could not parse %s: %s\n", list_def, list[i]);
+        if ( min!=max && (bin->bins[i]<min || bin->bins[i]>max) )
+            error("Expected values from the interval [%f,%f], found %s\n", list[i]);
+        free(list[i]); 
+    }
+    free(list);
+
+    if ( min!=max )
+    {
+        // make sure we've got both boundaries: min,max.
+        assert( nlist>1 );
+        float max_err = (bin->bins[1] - bin->bins[0])*1e-6;
+        if ( fabs(bin->bins[0] - min) > max_err )
+        {
+            bin->bins = (float*) realloc(bin->bins, (++bin->nbins)*sizeof(float));
+            memmove(bin->bins+1, bin->bins, sizeof(float)*(bin->nbins-1));
+            bin->bins[0] = min;
+        }
+        if ( fabs(bin->bins[bin->nbins-1] - max) > max_err ) 
+        {
+            bin->bins = (float*) realloc(bin->bins, (++bin->nbins)*sizeof(float));
+            bin->bins[bin->nbins-1] = max;
+        }
+    }
+    return bin;
+}
+
+void bin_destroy(bin_t *bin)
+{
+    free(bin->bins);
+    free(bin);
+}
+
+int bin_get_size(bin_t *bin) { return bin->nbins; }
+
+float bin_get_value(bin_t *bin, int idx) { return bin->bins[idx]; }
+
+int bin_get_idx(bin_t *bin, float value)
+{
+    if ( bin->bins[bin->nbins-1] < value ) return bin->nbins-1;
+
+    // Binary search in half-closed,half-open intervals [)
+    int imin = 0, imax = bin->nbins - 2;
+    while ( imin<imax )
+    {
+        int i = (imin+imax)/2;
+        if ( value < bin->bins[i] ) imax = i - 1;
+        else if ( value > bin->bins[i] ) imin = i + 1;
+        else return i;
+    }
+    if ( bin->bins[imax] <= value ) return imax;
+    return imin - 1;
+}
+
diff --git a/bcftools/bin.h b/bcftools/bin.h
new file mode 100644
index 0000000..ab9e5b1
--- /dev/null
+++ b/bcftools/bin.h
@@ -0,0 +1,65 @@
+/* The MIT License
+
+   Copyright (c) 2016 Genome Research Ltd.
+
+   Author: Petr Danecek <pd3 at sanger.ac.uk>
+   
+   Permission is hereby granted, free of charge, to any person obtaining a copy
+   of this software and associated documentation files (the "Software"), to deal
+   in the Software without restriction, including without limitation the rights
+   to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+   copies of the Software, and to permit persons to whom the Software is
+   furnished to do so, subject to the following conditions:
+   
+   The above copyright notice and this permission notice shall be included in
+   all copies or substantial portions of the Software.
+   
+   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+   IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+   AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+   LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+   THE SOFTWARE.
+
+ */
+
+/*
+    Simple binning of float values into predefined bins
+*/
+
+#ifndef __BIN_H__
+#define __BIN_H__
+
+#include <stdio.h>
+
+typedef struct _bin_t bin_t;
+
+/*
+ *  bin_init() - init bins
+ *  @list: list of half-open intervals [). If the list does not contain commas,
+ *      it is interpreted as a file name.
+ *  @min,max:  extreme values. This is for user convenience so that well-known
+ *      extremes can be left out from the list. Ignored if min=max
+ */
+bin_t *bin_init(const char *list, float min, float max);
+void bin_destroy(bin_t *bin);
+
+/*
+ *  bin_get_size() - number of boundaries, subtract 1 to get the number of bins
+ */
+int bin_get_size(bin_t *bin);
+
+/*
+   bin_get_idx() - find the bin index which corresponds to the value (binary search)
+   Returns the bin index 0 <= idx <= size-2 or -1,size-1 for out of range values.
+ */
+int bin_get_idx(bin_t *bin, float value);
+
+/*
+   bin_get_value() - get the i-th boundary value, i=0,..,size-1
+ */
+float bin_get_value(bin_t *bin, int ith);
+
+#endif
+
diff --git a/bcftools/call.h b/bcftools/call.h
index bbf0a52..0d707a0 100644
--- a/bcftools/call.h
+++ b/bcftools/call.h
@@ -72,6 +72,7 @@ typedef struct
     double trio_Pm_SNPs, trio_Pm_del, trio_Pm_ins;      // P(mendelian) for trio calling, see mcall_call_trio_genotypes()
     int32_t *ugts, *cgts;   // unconstraind and constrained GTs
     uint32_t output_tags;
+    char *prior_AN, *prior_AC;  // reference panel AF tags (AF=AC/AN)
 
     // ccall only
     double indel_frac, min_perm_p, min_lrt;
@@ -102,7 +103,7 @@ call_t;
 void error(const char *format, ...);
 
 /*
- *  *call() - return negative value on error or the number of non-reference
+ *  call() - return -1 value on critical error; -2 to skip the site; or the number of non-reference
  *            alleles on success.
  */
 int mcall(call_t *call, bcf1_t *rec);    // multiallic and rare-variant calling model
diff --git a/bcftools/ccall.c b/bcftools/ccall.c
index bb43d61..9f6958a 100644
--- a/bcftools/ccall.c
+++ b/bcftools/ccall.c
@@ -189,8 +189,6 @@ static int update_bcf1(call_t *call, bcf1_t *rec, const bcf_p1rst_t *pr, double
             bcf_update_info_string(call->hdr, rec, "CGT", tmp);
         }
     }
-    if (pr == 0) return 1;
-
     is_var = (pr->p_ref < call->pref);
     r = is_var? pr->p_ref : pr->p_var;
 
@@ -232,11 +230,7 @@ static int update_bcf1(call_t *call, bcf1_t *rec, const bcf_p1rst_t *pr, double
 
     // Remove unused alleles
     int nals_ori = rec->n_allele, nals = !is_var && !(call->flag & CALL_KEEPALT) ? 1 : pr->rank0 < 2? 2 : pr->rank0+1;
-    if ( call->flag & CALL_KEEPALT && call->unseen>0 )
-    {
-        assert( call->unseen==nals-1 );
-        nals--;
-    }
+    if ( call->flag & CALL_KEEPALT && call->unseen==nals-1 ) nals--;
     
     if ( nals<rec->n_allele )
     {
@@ -272,7 +266,7 @@ static int update_bcf1(call_t *call, bcf1_t *rec, const bcf_p1rst_t *pr, double
     int i;
     for (i=0; i<rec->n_sample; i++)
     {
-        int x = ( is_var || call->output_tags & CALL_FMT_GQ ) ? bcf_p1_call_gt(p1, pr->f_exp, i) : 2;
+        int x = ( is_var || call->output_tags & CALL_FMT_GQ ) ? bcf_p1_call_gt(p1, pr->f_exp, i, is_var) : 2;
         int gt = x&3;
         if ( !call->ploidy || call->ploidy[i]==2 )
         {
diff --git a/bcftools/ccall.c.pysam.c b/bcftools/ccall.c.pysam.c
index d4ceb01..1765d84 100644
--- a/bcftools/ccall.c.pysam.c
+++ b/bcftools/ccall.c.pysam.c
@@ -191,8 +191,6 @@ static int update_bcf1(call_t *call, bcf1_t *rec, const bcf_p1rst_t *pr, double
             bcf_update_info_string(call->hdr, rec, "CGT", tmp);
         }
     }
-    if (pr == 0) return 1;
-
     is_var = (pr->p_ref < call->pref);
     r = is_var? pr->p_ref : pr->p_var;
 
@@ -234,11 +232,7 @@ static int update_bcf1(call_t *call, bcf1_t *rec, const bcf_p1rst_t *pr, double
 
     // Remove unused alleles
     int nals_ori = rec->n_allele, nals = !is_var && !(call->flag & CALL_KEEPALT) ? 1 : pr->rank0 < 2? 2 : pr->rank0+1;
-    if ( call->flag & CALL_KEEPALT && call->unseen>0 )
-    {
-        assert( call->unseen==nals-1 );
-        nals--;
-    }
+    if ( call->flag & CALL_KEEPALT && call->unseen==nals-1 ) nals--;
     
     if ( nals<rec->n_allele )
     {
@@ -274,7 +268,7 @@ static int update_bcf1(call_t *call, bcf1_t *rec, const bcf_p1rst_t *pr, double
     int i;
     for (i=0; i<rec->n_sample; i++)
     {
-        int x = ( is_var || call->output_tags & CALL_FMT_GQ ) ? bcf_p1_call_gt(p1, pr->f_exp, i) : 2;
+        int x = ( is_var || call->output_tags & CALL_FMT_GQ ) ? bcf_p1_call_gt(p1, pr->f_exp, i, is_var) : 2;
         int gt = x&3;
         if ( !call->ploidy || call->ploidy[i]==2 )
         {
diff --git a/bcftools/consensus.c b/bcftools/consensus.c
index 051f353..4fccc4f 100644
--- a/bcftools/consensus.c
+++ b/bcftools/consensus.c
@@ -27,6 +27,7 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
+#include <strings.h>
 #include <errno.h>
 #include <getopt.h>
 #include <unistd.h>
@@ -35,7 +36,7 @@
 #include <htslib/kstring.h>
 #include <htslib/synced_bcf_reader.h>
 #include <htslib/kseq.h>
-#include <htslib/regidx.h>
+#include "regidx.h"
 #include "bcftools.h"
 #include "rbuf.h"
 
@@ -68,6 +69,7 @@ typedef struct
     int nvcf_buf, rid;
 
     regidx_t *mask;
+    regitr_t *itr;
 
     int chain_id;       // chain_id, to provide a unique ID to each chain in the chain output
     chain_t *chain;     // chain structure to store the sequence of ungapped blocks between the ref and alt sequences
@@ -202,6 +204,7 @@ static void init_data(args_t *args)
     {
         args->mask = regidx_init(args->mask_fname,NULL,NULL,0,NULL);
         if ( !args->mask ) error("Failed to initialize mask regions\n");
+        args->itr = regitr_init(args->mask);
     }
     // In case we want to store the chains
     if ( args->chain_fname )
@@ -228,6 +231,7 @@ static void destroy_data(args_t *args)
     free(args->vcf_buf);
     free(args->fa_buf.s);
     if ( args->mask ) regidx_destroy(args->mask);
+    if ( args->itr ) regitr_destroy(args->itr);
     if ( args->chain_fname )
         if ( fclose(args->fp_chain) ) error("Close failed: %s\n", args->chain_fname);
     if ( fclose(args->fp_out) ) error("Close failed: %s\n", args->output_fname);
@@ -409,12 +413,27 @@ static void apply_variant(args_t *args, bcf1_t *rec)
         rec->d.allele[1][0] = gt2iupac(ial,jal);
     }
 
+    int len_diff = 0, alen = 0;
     int idx = rec->pos - args->fa_ori_pos + args->fa_mod_off;
-    if ( idx<0 || idx>=args->fa_buf.l ) 
+    if ( idx<0 )
+    {
+        fprintf(stderr,"Warning: ignoring overlapping variant starting at %s:%d\n", bcf_seqname(args->hdr,rec),rec->pos+1);
+        return;
+    }
+    if ( rec->rlen > args->fa_buf.l - idx )
+    {
+        rec->rlen = args->fa_buf.l - idx;
+        alen = strlen(rec->d.allele[ialt]);
+        if ( alen > rec->rlen )
+        {
+            rec->d.allele[ialt][rec->rlen] = 0;
+            fprintf(stderr,"Warning: trimming variant starting at %s:%d\n", bcf_seqname(args->hdr,rec),rec->pos+1);
+        }
+    }
+    if ( idx>=args->fa_buf.l ) 
         error("FIXME: %s:%d .. idx=%d, ori_pos=%d, len=%d, off=%d\n",bcf_seqname(args->hdr,rec),rec->pos+1,idx,args->fa_ori_pos,args->fa_buf.l,args->fa_mod_off);
 
     // sanity check the reference base
-    int len_diff = 0, alen = 0;
     if ( rec->d.allele[ialt][0]=='<' )
     {
         if ( strcasecmp(rec->d.allele[ialt], "<DEL>") )
@@ -495,18 +514,16 @@ static void mask_region(args_t *args, char *seq, int len)
     int start = args->fa_src_pos - len;
     int end   = args->fa_src_pos;
 
-    regitr_t itr;
-    if ( !regidx_overlap(args->mask, chr,start,end, &itr) ) return;
+    if ( !regidx_overlap(args->mask, chr,start,end, args->itr) ) return;
 
     int idx_start, idx_end, i;
-    while ( REGITR_OVERLAP(itr,start,end) )
+    while ( regitr_overlap(args->itr) )
     {
-        idx_start = REGITR_START(itr) - start;
-        idx_end   = REGITR_END(itr) - start;
+        idx_start = args->itr->beg - start;
+        idx_end   = args->itr->end - start;
         if ( idx_start < 0 ) idx_start = 0;
         if ( idx_end >= len ) idx_end = len - 1;
         for (i=idx_start; i<=idx_end; i++) seq[i] = 'N';
-        itr.i++;
     }
 }
 
@@ -519,7 +536,7 @@ static void consensus(args_t *args)
     {
         if ( str.s[0]=='>' )
         {
-            // new sequence encountered, apply all chached variants
+            // new sequence encountered, apply all cached variants
             while ( args->vcf_rbuf.n )
             {
                 if (args->chain) {
@@ -576,7 +593,17 @@ static void consensus(args_t *args)
         }
         if ( !rec_ptr ) flush_fa_buffer(args, 60);
     }
-    if (args->chain) {
+    bcf1_t **rec_ptr = NULL;
+    while ( args->rid>=0 && (rec_ptr = next_vcf_line(args)) )
+    {
+        bcf1_t *rec = *rec_ptr;
+        if ( rec->rid!=args->rid ) break;
+        if ( args->fa_end_pos && rec->pos > args->fa_end_pos ) break;
+        if ( args->fa_ori_pos + args->fa_buf.l - args->fa_mod_off <= rec->pos ) break;
+        apply_variant(args, rec);
+    }
+    if (args->chain)
+    {
         print_chain(args);
         destroy_chain(args);
     }
@@ -588,8 +615,11 @@ static void consensus(args_t *args)
 static void usage(args_t *args)
 {
     fprintf(stderr, "\n");
-    fprintf(stderr, "About:   Create consensus sequence by applying VCF variants to a reference\n");
-    fprintf(stderr, "         fasta file.\n");
+    fprintf(stderr, "About: Create consensus sequence by applying VCF variants to a reference fasta\n");
+    fprintf(stderr, "       file. By default, the program will apply all ALT variants. Using the\n");
+    fprintf(stderr, "       --sample (and, optionally, --haplotype) option will apply genotype\n");
+    fprintf(stderr, "       (or haplotype) calls from FORMAT/GT. The program ignores allelic depth\n");
+    fprintf(stderr, "       information, such as INFO/AD or FORMAT/AD.\n");
     fprintf(stderr, "Usage:   bcftools consensus [OPTIONS] <file.vcf>\n");
     fprintf(stderr, "Options:\n");
     fprintf(stderr, "    -f, --fasta-ref <file>     reference sequence in fasta format\n");
diff --git a/bcftools/consensus.c.pysam.c b/bcftools/consensus.c.pysam.c
index 91aa5ae..51d9339 100644
--- a/bcftools/consensus.c.pysam.c
+++ b/bcftools/consensus.c.pysam.c
@@ -29,6 +29,7 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
+#include <strings.h>
 #include <errno.h>
 #include <getopt.h>
 #include <unistd.h>
@@ -37,7 +38,7 @@
 #include <htslib/kstring.h>
 #include <htslib/synced_bcf_reader.h>
 #include <htslib/kseq.h>
-#include <htslib/regidx.h>
+#include "regidx.h"
 #include "bcftools.h"
 #include "rbuf.h"
 
@@ -70,6 +71,7 @@ typedef struct
     int nvcf_buf, rid;
 
     regidx_t *mask;
+    regitr_t *itr;
 
     int chain_id;       // chain_id, to provide a unique ID to each chain in the chain output
     chain_t *chain;     // chain structure to store the sequence of ungapped blocks between the ref and alt sequences
@@ -204,6 +206,7 @@ static void init_data(args_t *args)
     {
         args->mask = regidx_init(args->mask_fname,NULL,NULL,0,NULL);
         if ( !args->mask ) error("Failed to initialize mask regions\n");
+        args->itr = regitr_init(args->mask);
     }
     // In case we want to store the chains
     if ( args->chain_fname )
@@ -230,6 +233,7 @@ static void destroy_data(args_t *args)
     free(args->vcf_buf);
     free(args->fa_buf.s);
     if ( args->mask ) regidx_destroy(args->mask);
+    if ( args->itr ) regitr_destroy(args->itr);
     if ( args->chain_fname )
         if ( fclose(args->fp_chain) ) error("Close failed: %s\n", args->chain_fname);
     if ( fclose(args->fp_out) ) error("Close failed: %s\n", args->output_fname);
@@ -411,12 +415,27 @@ static void apply_variant(args_t *args, bcf1_t *rec)
         rec->d.allele[1][0] = gt2iupac(ial,jal);
     }
 
+    int len_diff = 0, alen = 0;
     int idx = rec->pos - args->fa_ori_pos + args->fa_mod_off;
-    if ( idx<0 || idx>=args->fa_buf.l ) 
+    if ( idx<0 )
+    {
+        fprintf(pysam_stderr,"Warning: ignoring overlapping variant starting at %s:%d\n", bcf_seqname(args->hdr,rec),rec->pos+1);
+        return;
+    }
+    if ( rec->rlen > args->fa_buf.l - idx )
+    {
+        rec->rlen = args->fa_buf.l - idx;
+        alen = strlen(rec->d.allele[ialt]);
+        if ( alen > rec->rlen )
+        {
+            rec->d.allele[ialt][rec->rlen] = 0;
+            fprintf(pysam_stderr,"Warning: trimming variant starting at %s:%d\n", bcf_seqname(args->hdr,rec),rec->pos+1);
+        }
+    }
+    if ( idx>=args->fa_buf.l ) 
         error("FIXME: %s:%d .. idx=%d, ori_pos=%d, len=%d, off=%d\n",bcf_seqname(args->hdr,rec),rec->pos+1,idx,args->fa_ori_pos,args->fa_buf.l,args->fa_mod_off);
 
     // sanity check the reference base
-    int len_diff = 0, alen = 0;
     if ( rec->d.allele[ialt][0]=='<' )
     {
         if ( strcasecmp(rec->d.allele[ialt], "<DEL>") )
@@ -497,18 +516,16 @@ static void mask_region(args_t *args, char *seq, int len)
     int start = args->fa_src_pos - len;
     int end   = args->fa_src_pos;
 
-    regitr_t itr;
-    if ( !regidx_overlap(args->mask, chr,start,end, &itr) ) return;
+    if ( !regidx_overlap(args->mask, chr,start,end, args->itr) ) return;
 
     int idx_start, idx_end, i;
-    while ( REGITR_OVERLAP(itr,start,end) )
+    while ( regitr_overlap(args->itr) )
     {
-        idx_start = REGITR_START(itr) - start;
-        idx_end   = REGITR_END(itr) - start;
+        idx_start = args->itr->beg - start;
+        idx_end   = args->itr->end - start;
         if ( idx_start < 0 ) idx_start = 0;
         if ( idx_end >= len ) idx_end = len - 1;
         for (i=idx_start; i<=idx_end; i++) seq[i] = 'N';
-        itr.i++;
     }
 }
 
@@ -521,7 +538,7 @@ static void consensus(args_t *args)
     {
         if ( str.s[0]=='>' )
         {
-            // new sequence encountered, apply all chached variants
+            // new sequence encountered, apply all cached variants
             while ( args->vcf_rbuf.n )
             {
                 if (args->chain) {
@@ -578,7 +595,17 @@ static void consensus(args_t *args)
         }
         if ( !rec_ptr ) flush_fa_buffer(args, 60);
     }
-    if (args->chain) {
+    bcf1_t **rec_ptr = NULL;
+    while ( args->rid>=0 && (rec_ptr = next_vcf_line(args)) )
+    {
+        bcf1_t *rec = *rec_ptr;
+        if ( rec->rid!=args->rid ) break;
+        if ( args->fa_end_pos && rec->pos > args->fa_end_pos ) break;
+        if ( args->fa_ori_pos + args->fa_buf.l - args->fa_mod_off <= rec->pos ) break;
+        apply_variant(args, rec);
+    }
+    if (args->chain)
+    {
         print_chain(args);
         destroy_chain(args);
     }
@@ -590,8 +617,11 @@ static void consensus(args_t *args)
 static void usage(args_t *args)
 {
     fprintf(pysam_stderr, "\n");
-    fprintf(pysam_stderr, "About:   Create consensus sequence by applying VCF variants to a reference\n");
-    fprintf(pysam_stderr, "         fasta file.\n");
+    fprintf(pysam_stderr, "About: Create consensus sequence by applying VCF variants to a reference fasta\n");
+    fprintf(pysam_stderr, "       file. By default, the program will apply all ALT variants. Using the\n");
+    fprintf(pysam_stderr, "       --sample (and, optionally, --haplotype) option will apply genotype\n");
+    fprintf(pysam_stderr, "       (or haplotype) calls from FORMAT/GT. The program ignores allelic depth\n");
+    fprintf(pysam_stderr, "       information, such as INFO/AD or FORMAT/AD.\n");
     fprintf(pysam_stderr, "Usage:   bcftools consensus [OPTIONS] <file.vcf>\n");
     fprintf(pysam_stderr, "Options:\n");
     fprintf(pysam_stderr, "    -f, --fasta-ref <file>     reference sequence in fasta format\n");
diff --git a/bcftools/convert.c b/bcftools/convert.c
index 3e289f0..05dce01 100644
--- a/bcftools/convert.c
+++ b/bcftools/convert.c
@@ -1,6 +1,6 @@
 /*  convert.c -- functions for converting between VCF/BCF and related formats.
 
-    Copyright (C) 2013-2014 Genome Research Ltd.
+    Copyright (C) 2013-2017 Genome Research Ltd.
 
     Author: Petr Danecek <pd3 at sanger.ac.uk>
 
@@ -62,13 +62,19 @@ THE SOFTWARE.  */
 #define T_IUPAC_GT     23
 #define T_GT_TO_HAP    24   // not publicly advertised
 #define T_GT_TO_HAP2   25   // not publicly advertised
+#define T_TBCSQ        26
+#define T_END          27
+#define T_POS0         28
+#define T_END0         29
 
 typedef struct _fmt_t
 {
     int type, id, is_gt_field, ready, subscript;
     char *key;
     bcf_fmt_t *fmt;
+    void *usr;                  // user data (optional)
     void (*handler)(convert_t *, bcf1_t *, struct _fmt_t *, int, kstring_t *);
+    void (*destroy)(void*);     // clean user data (optional)
 }
 fmt_t;
 
@@ -88,9 +94,19 @@ struct _convert_t
     int allow_undef_tags;
 };
 
+typedef struct
+{
+    kstring_t hap1,hap2;
+    char **str;
+    int n, m;
+}
+bcsq_t;
 
 static void process_chrom(convert_t *convert, bcf1_t *line, fmt_t *fmt, int isample, kstring_t *str) { kputs(convert->header->id[BCF_DT_CTG][line->rid].key, str); }
 static void process_pos(convert_t *convert, bcf1_t *line, fmt_t *fmt, int isample, kstring_t *str) { kputw(line->pos+1, str); }
+static void process_pos0(convert_t *convert, bcf1_t *line, fmt_t *fmt, int isample, kstring_t *str) { kputw(line->pos, str); }
+static void process_end(convert_t *convert, bcf1_t *line, fmt_t *fmt, int isample, kstring_t *str) { kputw(line->pos+line->rlen, str); }
+static void process_end0(convert_t *convert, bcf1_t *line, fmt_t *fmt, int isample, kstring_t *str) { kputw(line->pos+line->rlen-1, str); }
 static void process_id(convert_t *convert, bcf1_t *line, fmt_t *fmt, int isample, kstring_t *str) { kputs(line->d.id, str); }
 static void process_ref(convert_t *convert, bcf1_t *line, fmt_t *fmt, int isample, kstring_t *str) { kputs(line->d.allele[0], str); }
 static void process_alt(convert_t *convert, bcf1_t *line, fmt_t *fmt, int isample, kstring_t *str)
@@ -125,7 +141,7 @@ static void process_first_alt(convert_t *convert, bcf1_t *line, fmt_t *fmt, int
 static void process_qual(convert_t *convert, bcf1_t *line, fmt_t *fmt, int isample, kstring_t *str)
 {
     if ( bcf_float_is_missing(line->qual) ) kputc('.', str);
-    else ksprintf(str, "%g", line->qual);
+    else kputd(line->qual, str);
 }
 static void process_filter(convert_t *convert, bcf1_t *line, fmt_t *fmt, int isample, kstring_t *str)
 {
@@ -193,7 +209,7 @@ static void process_info(convert_t *convert, bcf1_t *line, fmt_t *fmt, int isamp
             case BCF_BT_INT8:  if ( info->v1.i==bcf_int8_missing ) kputc('.', str); else kputw(info->v1.i, str); break;
             case BCF_BT_INT16: if ( info->v1.i==bcf_int16_missing ) kputc('.', str); else kputw(info->v1.i, str); break;
             case BCF_BT_INT32: if ( info->v1.i==bcf_int32_missing ) kputc('.', str); else kputw(info->v1.i, str); break;
-            case BCF_BT_FLOAT: if ( bcf_float_is_missing(info->v1.f) ) kputc('.', str); else ksprintf(str, "%g", info->v1.f); break;
+            case BCF_BT_FLOAT: if ( bcf_float_is_missing(info->v1.f) ) kputc('.', str); else kputd(info->v1.f, str); break;
             case BCF_BT_CHAR:  kputc(info->v1.i, str); break;
             default: fprintf(stderr,"todo: type %d\n", info->type); exit(1); break;
         }
@@ -215,7 +231,7 @@ static void process_info(convert_t *convert, bcf1_t *line, fmt_t *fmt, int isamp
             case BCF_BT_INT8:  BRANCH(int8_t,  val==bcf_int8_missing,  val==bcf_int8_vector_end,  kputw(val, str)); break;
             case BCF_BT_INT16: BRANCH(int16_t, val==bcf_int16_missing, val==bcf_int16_vector_end, kputw(val, str)); break;
             case BCF_BT_INT32: BRANCH(int32_t, val==bcf_int32_missing, val==bcf_int32_vector_end, kputw(val, str)); break;
-            case BCF_BT_FLOAT: BRANCH(float,   bcf_float_is_missing(val), bcf_float_is_vector_end(val), ksprintf(str, "%g", val)); break;
+            case BCF_BT_FLOAT: BRANCH(float,   bcf_float_is_missing(val), bcf_float_is_vector_end(val), kputd(val, str)); break;
             default: fprintf(stderr,"todo: type %d\n", info->type); exit(1); break;
         }
         #undef BRANCH
@@ -226,6 +242,7 @@ static void process_info(convert_t *convert, bcf1_t *line, fmt_t *fmt, int isamp
 static void init_format(convert_t *convert, bcf1_t *line, fmt_t *fmt)
 {
     fmt->id = bcf_hdr_id2int(convert->header, BCF_DT_ID, fmt->key);
+    if ( !bcf_hdr_idinfo_exists(convert->header,BCF_HL_FMT,fmt->id) ) fmt->id = -1;
     fmt->fmt = NULL;
     if ( fmt->id >= 0 )
     {
@@ -261,7 +278,7 @@ static void process_format(convert_t *convert, bcf1_t *line, fmt_t *fmt, int isa
             if ( bcf_float_is_missing(ptr[fmt->subscript]) || bcf_float_is_vector_end(ptr[fmt->subscript]) )
                 kputc('.', str);
             else
-                ksprintf(str, "%g", ptr[fmt->subscript]);
+                kputd(ptr[fmt->subscript], str);
         }
         else if ( fmt->fmt->type != BCF_BT_CHAR )
         {
@@ -316,6 +333,111 @@ static void process_tgt(convert_t *convert, bcf1_t *line, fmt_t *fmt, int isampl
     }
     if (l == 0) kputc('.', str);
 }
+static void destroy_tbcsq(void *usr)
+{
+    if ( !usr ) return;
+    bcsq_t *csq = (bcsq_t*) usr;
+    free(csq->hap1.s);
+    free(csq->hap2.s);
+    if ( csq->n )
+        free(csq->str[0]);
+    free(csq->str);
+    free(csq);
+}
+static void process_tbcsq(convert_t *convert, bcf1_t *line, fmt_t *fmt, int isample, kstring_t *str)
+{
+    if ( !fmt->ready )
+    {
+        init_format(convert, line, fmt);
+
+        bcsq_t *csq;
+        if ( fmt->usr )
+        {
+            csq = (bcsq_t*) fmt->usr;
+            if ( csq->n )
+                free(csq->str[0]);
+            csq->n = 0;
+        }
+        else
+            csq = (bcsq_t*) calloc(1,sizeof(bcsq_t));
+        fmt->usr = csq;
+
+        int i=0, len = 0;
+        char *tmp = NULL;
+        if ( bcf_get_info_string(convert->header,line,fmt->key,&tmp,&len)<0 )
+        {
+            csq->n = 0;
+            return;
+        }
+        do
+        {
+            csq->n++;
+            hts_expand(char*, csq->n, csq->m, csq->str);
+            csq->str[ csq->n-1 ] = tmp + i;
+            while ( i<len && tmp[i]!=',' ) i++;
+            if ( i<len && tmp[i]==',' ) tmp[i++] = 0;
+        }
+        while ( i<len );
+    }
+
+    bcsq_t *csq = (bcsq_t*)fmt->usr;
+
+    if ( fmt->fmt==NULL || !csq->n ) return;
+
+    csq->hap1.l = 0;
+    csq->hap2.l = 0;
+
+    int mask = fmt->subscript==0 ? 3 : 1;   // merge both haplotypes if subscript==0
+
+    #define BRANCH(type_t, nbits) { \
+        type_t *x = (type_t*)(fmt->fmt->p + isample*fmt->fmt->size); \
+        int i,j; \
+        if ( fmt->subscript<=0 || fmt->subscript==1 ) \
+        { \
+            for (j=0; j < fmt->fmt->n; j++) \
+            { \
+                type_t val = x[j]; \
+                if ( !val ) continue; \
+                for (i=0; i<nbits; i+=2) \
+                    if ( val & (mask<<i) ) { kputs(csq->str[(j*32+i)/2], &csq->hap1); kputc_(',', &csq->hap1); } \
+            } \
+        } \
+        if ( fmt->subscript<0 || fmt->subscript==2 ) \
+        { \
+            for (j=0; j < fmt->fmt->n; j++) \
+            { \
+                type_t val = x[j]; \
+                if ( !val ) continue; \
+                for (i=1; i<nbits; i+=2) \
+                    if ( val & (1<<i) ) { kputs(csq->str[(j*32+i)/2], &csq->hap2); kputc_(',', &csq->hap2); } \
+            } \
+        } \
+    }
+    switch (fmt->fmt->type)
+    {
+        case BCF_BT_INT8:  BRANCH(uint8_t, 8); break;
+        case BCF_BT_INT16: BRANCH(uint16_t,16); break;
+        case BCF_BT_INT32: BRANCH(uint32_t,32); break;
+        default: error("Unexpected type: %d\n", fmt->fmt->type); exit(1); break;
+    }
+    #undef BRANCH
+
+    if ( !csq->hap1.l && !csq->hap2.l ) return;
+
+    if ( csq->hap1.l ) csq->hap1.s[--csq->hap1.l] = 0;
+    if ( csq->hap2.l ) csq->hap2.s[--csq->hap2.l] = 0;
+
+    if ( fmt->subscript<0 )
+    {
+        kputs(csq->hap1.l?csq->hap1.s:".", str);
+        kputc_('\t', str);
+        kputs(csq->hap2.l?csq->hap2.s:".", str);
+    }
+    else if ( fmt->subscript<2 )
+        kputs(csq->hap1.l?csq->hap1.s:".", str);
+    else
+        kputs(csq->hap2.l?csq->hap2.s:".", str);
+}
 static void init_format_iupac(convert_t *convert, bcf1_t *line, fmt_t *fmt)
 {
     init_format(convert, line, fmt);
@@ -409,6 +531,7 @@ static void process_type(convert_t *convert, bcf1_t *line, fmt_t *fmt, int isamp
     if ( line_type & VCF_MNP ) { if (i) kputc(',',str); kputs("MNP", str); i++; }
     if ( line_type & VCF_INDEL ) { if (i) kputc(',',str); kputs("INDEL", str); i++; }
     if ( line_type & VCF_OTHER ) { if (i) kputc(',',str); kputs("OTHER", str); i++; }
+    if ( line_type & VCF_BND ) { if (i) kputc(',',str); kputs("BND", str); i++; }
 }
 static void process_line(convert_t *convert, bcf1_t *line, fmt_t *fmt, int isample, kstring_t *str)
 {
@@ -597,103 +720,260 @@ static void process_gt_to_hap(convert_t *convert, bcf1_t *line, fmt_t *fmt, int
     // the allele (0/1) and the asterisk (*); e.g., "0* 1*" for a
     // heterozygous genotype of unknown phase.
 
-    int m, n, i;
-
-    m = convert->ndat / sizeof(int32_t);
-    n = bcf_get_genotypes(convert->header, line, &convert->dat, &m);
-    convert->ndat = m * sizeof(int32_t);
-
-    if ( n<=0 )
-    {
-        // Throw an error or silently proceed?
-        //
-        // for (i=0; i<convert->nsamples; i++) kputs(" ...", str);
-        // return;
-
-        error("Error parsing GT tag at %s:%d\n", bcf_seqname(convert->header, line), line->pos+1);
-    }
-
-    n /= convert->nsamples;
+    int i, gt_id = bcf_hdr_id2int(convert->header, BCF_DT_ID, "GT");
+    if ( !bcf_hdr_idinfo_exists(convert->header,BCF_HL_FMT,gt_id) )
+        error("FORMAT/GT tag not present at %s:%d\n", bcf_seqname(convert->header, line), line->pos+1);
+    if ( !(line->unpacked & BCF_UN_FMT) ) bcf_unpack(line, BCF_UN_FMT);
+    bcf_fmt_t *fmt_gt = NULL;
+    for (i=0; i<line->n_fmt; i++)
+        if ( line->d.fmt[i].id==gt_id ) { fmt_gt = &line->d.fmt[i]; break; }
+    if ( !fmt_gt )
+        error("FORMAT/GT tag not present at %s:%d\n", bcf_seqname(convert->header, line), line->pos+1);
+
+    // Alloc all memory in advance to avoid kput routines. The biggest allowed allele index is 99
+    if ( line->n_allele > 100 )
+        error("Too many alleles (%d) at %s:%d\n", line->n_allele, bcf_seqname(convert->header, line), line->pos+1);
+    if ( ks_resize(str, str->l+convert->nsamples*8) != 0 )
+        error("Could not alloc %d bytes\n", str->l + convert->nsamples*8);
+
+    if ( fmt_gt->type!=BCF_BT_INT8 )    // todo: use BRANCH_INT if the VCF is valid
+        error("Uh, too many alleles (%d) or redundant BCF representation at %s:%d\n", line->n_allele, bcf_seqname(convert->header, line), line->pos+1);
+
+    int8_t *ptr = ((int8_t*) fmt_gt->p) - fmt_gt->n;
     for (i=0; i<convert->nsamples; i++)
     {
-        int32_t *ptr = (int32_t*)convert->dat + i*n;
-        int j;
-        for (j=0; j<n; j++)
-            if ( ptr[j]==bcf_int32_vector_end ) break;
-
-        if (i>0) kputs(" ", str); // no space separation for first column
-        if ( j==2 )
+        ptr += fmt_gt->n;
+        if ( ptr[0]==2 )
         {
-            // diploid
-            if ( bcf_gt_is_missing(ptr[0]) || bcf_gt_is_missing(ptr[1]) ) {
-                kputs("? ?", str);
+            if ( ptr[1]==3 ) /* 0|0 */
+            {
+                str->s[str->l++] = '0'; str->s[str->l++] = ' '; str->s[str->l++] = '0'; str->s[str->l++] = ' ';
+            }
+            else if ( ptr[1]==5 ) /* 0|1 */
+            {
+                str->s[str->l++] = '0'; str->s[str->l++] = ' '; str->s[str->l++] = '1'; str->s[str->l++] = ' ';
+            }
+            else if ( ptr[1]==bcf_int8_vector_end ) /* 0 */
+            {
+                str->s[str->l++] = '0'; str->s[str->l++] = ' '; str->s[str->l++] = '-'; str->s[str->l++] = ' ';
+            }
+            else if ( ptr[1]==2 ) /* 0/0 */
+            {
+                str->s[str->l++] = '0'; str->s[str->l++] = '*'; str->s[str->l++] = ' '; str->s[str->l++] = '0'; str->s[str->l++] = '*'; str->s[str->l++] = ' ';
+            }
+            else if ( ptr[1]==4 ) /* 0/1 */
+            {
+                str->s[str->l++] = '0'; str->s[str->l++] = '*'; str->s[str->l++] = ' '; str->s[str->l++] = '1'; str->s[str->l++] = '*'; str->s[str->l++] = ' ';
+            }
+            else if ( bcf_gt_is_missing(ptr[1]) ) /* 0/. */
+            {
+                str->s[str->l++] = '?'; str->s[str->l++] = ' '; str->s[str->l++] = '?'; str->s[str->l++] = ' ';
             }
-            else if ( bcf_gt_is_phased(ptr[1])) {
-                ksprintf(str, "%d %d", bcf_gt_allele(ptr[0]), bcf_gt_allele(ptr[1]));
+            else if ( bcf_gt_is_phased(ptr[1]) ) /* 0|x */
+            {
+                str->s[str->l++] = '0'; str->s[str->l++] = ' ';
+                kputw(bcf_gt_allele(ptr[1]),str);
+                str->s[str->l++] = ' ';
             }
-            else {
-                ksprintf(str, "%d* %d*", bcf_gt_allele(ptr[0]), bcf_gt_allele(ptr[1]));
+            else /* 0/x */
+            {
+                str->s[str->l++] = '0'; str->s[str->l++] = '*'; str->s[str->l++] = ' ';
+                kputw(bcf_gt_allele(ptr[1]),str);
+                str->s[str->l++] = '*'; str->s[str->l++] = ' ';
             }
         }
-        else if ( j==1 )
+        else if ( ptr[0]==4 )
         {
-            // haploid
-            if ( bcf_gt_is_missing(ptr[0]) )
-                kputs("? -", str);
-            else if ( bcf_gt_allele(ptr[0])==1 )
-                kputs("1 -", str);       // first ALT allele
-            else
-                kputs("0 -", str);       // REF or something else than first ALT
+            if ( ptr[1]==3 ) /* 1|0 */
+            {
+                str->s[str->l++] = '1'; str->s[str->l++] = ' '; str->s[str->l++] = '0'; str->s[str->l++] = ' ';
+            }
+            else if ( ptr[1]==5 ) /* 1|1 */
+            {
+                str->s[str->l++] = '1'; str->s[str->l++] = ' '; str->s[str->l++] = '1'; str->s[str->l++] = ' ';
+            }
+            else if ( ptr[1]==bcf_int8_vector_end ) /* 1 */
+            {
+                str->s[str->l++] = '1'; str->s[str->l++] = ' '; str->s[str->l++] = '-'; str->s[str->l++] = ' ';
+            }
+            else if ( ptr[1]==2 ) /* 1/0 */
+            {
+                str->s[str->l++] = '1'; str->s[str->l++] = '*'; str->s[str->l++] = ' '; str->s[str->l++] = '0'; str->s[str->l++] = '*'; str->s[str->l++] = ' ';
+            }
+            else if ( ptr[1]==4 ) /* 1/1 */
+            {
+                str->s[str->l++] = '1'; str->s[str->l++] = '*'; str->s[str->l++] = ' '; str->s[str->l++] = '1'; str->s[str->l++] = '*'; str->s[str->l++] = ' ';
+            }
+            else if ( bcf_gt_is_missing(ptr[1]) ) /* 1/. */
+            {
+                str->s[str->l++] = '?'; str->s[str->l++] = ' '; str->s[str->l++] = '?'; str->s[str->l++] = ' ';
+            }
+            else if ( bcf_gt_is_phased(ptr[1]) )    /* 1|x */
+            {
+                str->s[str->l++] = '1'; str->s[str->l++] = ' ';
+                kputw(bcf_gt_allele(ptr[1]),str);
+                str->s[str->l++] = ' ';
+            }
+            else /* 1/x */
+            {
+                str->s[str->l++] = '1'; str->s[str->l++] = '*'; str->s[str->l++] = ' ';
+                kputw(bcf_gt_allele(ptr[1]),str);
+                str->s[str->l++] = '*'; str->s[str->l++] = ' ';
+            }
+        }
+        else if ( bcf_gt_is_missing(ptr[0]) )
+        {
+            if ( ptr[1]==bcf_int8_vector_end ) 
+            {
+                str->s[str->l++] = '?'; str->s[str->l++] = ' '; str->s[str->l++] = '-'; str->s[str->l++] = ' ';
+            }
+            else 
+            { 
+                str->s[str->l++] = '?'; str->s[str->l++] = ' '; str->s[str->l++] = '?'; str->s[str->l++] = ' ';
+            }
+        }
+        else if ( ptr[1]==bcf_int8_vector_end )
+        {
+            /* use REF for something else than first ALT */
+            str->s[str->l++] = '0'; str->s[str->l++] = ' '; str->s[str->l++] = '-'; str->s[str->l++] = ' ';
+        }
+        else
+        {
+            kputw(bcf_gt_allele(ptr[0]),str);
+            if ( bcf_gt_is_phased(ptr[1]) ) str->s[str->l++] = '*';
+            str->s[str->l++] = ' ';
+            kputw(bcf_gt_allele(ptr[1]),str);
+            if ( bcf_gt_is_phased(ptr[1]) ) str->s[str->l++] = '*';
+            str->s[str->l++] = ' ';
         }
-        else error("FIXME: not ready for ploidy %d\n", j);
     }
+    str->s[--str->l] = 0;     // delete the last space
 }
 static void process_gt_to_hap2(convert_t *convert, bcf1_t *line, fmt_t *fmt, int isample, kstring_t *str)
 {
     // same as process_gt_to_hap but converts haploid genotypes into diploid
-    int m, n, i;
-
-    m = convert->ndat / sizeof(int32_t);
-    n = bcf_get_genotypes(convert->header, line, &convert->dat, &m);
-    convert->ndat = m * sizeof(int32_t);
-
-    if ( n<=0 )
-        error("Error parsing GT tag at %s:%d\n", bcf_seqname(convert->header, line), line->pos+1);
 
-    n /= convert->nsamples;
+    int i, gt_id = bcf_hdr_id2int(convert->header, BCF_DT_ID, "GT");
+    if ( !bcf_hdr_idinfo_exists(convert->header,BCF_HL_FMT,gt_id) )
+        error("FORMAT/GT tag not present at %s:%d\n", bcf_seqname(convert->header, line), line->pos+1);
+    if ( !(line->unpacked & BCF_UN_FMT) ) bcf_unpack(line, BCF_UN_FMT);
+    bcf_fmt_t *fmt_gt = NULL;
+    for (i=0; i<line->n_fmt; i++)
+        if ( line->d.fmt[i].id==gt_id ) { fmt_gt = &line->d.fmt[i]; break; }
+    if ( !fmt_gt )
+        error("FORMAT/GT tag not present at %s:%d\n", bcf_seqname(convert->header, line), line->pos+1);
+
+    // Alloc all memory in advance to avoid kput routines. The biggest allowed allele index is 99
+    if ( line->n_allele > 100 )
+        error("Too many alleles (%d) at %s:%d\n", line->n_allele, bcf_seqname(convert->header, line), line->pos+1);
+    if ( ks_resize(str, str->l+convert->nsamples*8) != 0 )
+        error("Could not alloc %d bytes\n", str->l + convert->nsamples*8);
+
+    if ( fmt_gt->type!=BCF_BT_INT8 )    // todo: use BRANCH_INT if the VCF is valid
+        error("Uh, too many alleles (%d) or redundant BCF representation at %s:%d\n", line->n_allele, bcf_seqname(convert->header, line), line->pos+1);
+
+    int8_t *ptr = ((int8_t*) fmt_gt->p) - fmt_gt->n;
     for (i=0; i<convert->nsamples; i++)
     {
-        int32_t *ptr = (int32_t*)convert->dat + i*n;
-        int j;
-        for (j=0; j<n; j++)
-            if ( ptr[j]==bcf_int32_vector_end ) break;
-
-        if (i>0) kputs(" ", str); // no space separation for first column
-        if ( j==2 )
+        ptr += fmt_gt->n;
+        if ( ptr[0]==2 )
         {
-            // diploid
-            if ( bcf_gt_is_missing(ptr[0]) || bcf_gt_is_missing(ptr[1]) ) {
-                kputs("? ?", str);
+            if ( ptr[1]==3 ) /* 0|0 */
+            {
+                str->s[str->l++] = '0'; str->s[str->l++] = ' '; str->s[str->l++] = '0'; str->s[str->l++] = ' ';
+            }
+            else if ( ptr[1]==5 ) /* 0|1 */
+            {
+                str->s[str->l++] = '0'; str->s[str->l++] = ' '; str->s[str->l++] = '1'; str->s[str->l++] = ' ';
+            }
+            else if ( ptr[1]==bcf_int8_vector_end ) /* 0 -> 0|0 */
+            {
+                str->s[str->l++] = '0'; str->s[str->l++] = ' '; str->s[str->l++] = '0'; str->s[str->l++] = ' ';
+            }
+            else if ( ptr[1]==2 ) /* 0/0 */
+            {
+                str->s[str->l++] = '0'; str->s[str->l++] = '*'; str->s[str->l++] = ' '; str->s[str->l++] = '0'; str->s[str->l++] = '*'; str->s[str->l++] = ' ';
+            }
+            else if ( ptr[1]==4 ) /* 0/1 */
+            {
+                str->s[str->l++] = '0'; str->s[str->l++] = '*'; str->s[str->l++] = ' '; str->s[str->l++] = '1'; str->s[str->l++] = '*'; str->s[str->l++] = ' ';
             }
-            else if ( bcf_gt_is_phased(ptr[1])) {
-                ksprintf(str, "%d %d", bcf_gt_allele(ptr[0]), bcf_gt_allele(ptr[1]));
+            else if ( bcf_gt_is_missing(ptr[1]) ) /* 0/. */
+            {
+                str->s[str->l++] = '?'; str->s[str->l++] = ' '; str->s[str->l++] = '?'; str->s[str->l++] = ' ';
             }
-            else {
-                ksprintf(str, "%d* %d*", bcf_gt_allele(ptr[0]), bcf_gt_allele(ptr[1]));
+            else if ( bcf_gt_is_phased(ptr[1]) ) /* 0|x */
+            {
+                str->s[str->l++] = '0'; str->s[str->l++] = ' ';
+                kputw(bcf_gt_allele(ptr[1]),str);
+                str->s[str->l++] = ' ';
+            }
+            else /* 0/x */
+            {
+                str->s[str->l++] = '0'; str->s[str->l++] = '*'; str->s[str->l++] = ' ';
+                kputw(bcf_gt_allele(ptr[1]),str);
+                str->s[str->l++] = '*'; str->s[str->l++] = ' ';
             }
         }
-        else if ( j==1 )
+        else if ( ptr[0]==4 )
         {
-            // haploid
-            if ( bcf_gt_is_missing(ptr[0]) )
-                kputs("? ?", str);
-            else if ( bcf_gt_allele(ptr[0])==1 )
-                kputs("1 1", str);       // first ALT allele
-            else
-                kputs("0 0", str);       // REF or something else than first ALT
+            if ( ptr[1]==3 ) /* 1|0 */
+            {
+                str->s[str->l++] = '1'; str->s[str->l++] = ' '; str->s[str->l++] = '0'; str->s[str->l++] = ' ';
+            }
+            else if ( ptr[1]==5 ) /* 1|1 */
+            {
+                str->s[str->l++] = '1'; str->s[str->l++] = ' '; str->s[str->l++] = '1'; str->s[str->l++] = ' ';
+            }
+            else if ( ptr[1]==bcf_int8_vector_end ) /* 1 -> 1|1 */
+            {
+                str->s[str->l++] = '1'; str->s[str->l++] = ' '; str->s[str->l++] = '1'; str->s[str->l++] = ' ';
+            }
+            else if ( ptr[1]==2 ) /* 1/0 */
+            {
+                str->s[str->l++] = '1'; str->s[str->l++] = '*'; str->s[str->l++] = ' '; str->s[str->l++] = '0'; str->s[str->l++] = '*'; str->s[str->l++] = ' ';
+            }
+            else if ( ptr[1]==4 ) /* 1/1 */
+            {
+                str->s[str->l++] = '1'; str->s[str->l++] = '*'; str->s[str->l++] = ' '; str->s[str->l++] = '1'; str->s[str->l++] = '*'; str->s[str->l++] = ' ';
+            }
+            else if ( bcf_gt_is_missing(ptr[1]) ) /* 1/. */
+            {
+                str->s[str->l++] = '?'; str->s[str->l++] = ' '; str->s[str->l++] = '?'; str->s[str->l++] = ' ';
+            }
+            else if ( bcf_gt_is_phased(ptr[1]) )    /* 1|x */
+            {
+                str->s[str->l++] = '1'; str->s[str->l++] = ' ';
+                kputw(bcf_gt_allele(ptr[1]),str);
+                str->s[str->l++] = ' ';
+            }
+            else /* 1/x */
+            {
+                str->s[str->l++] = '1'; str->s[str->l++] = '*'; str->s[str->l++] = ' ';
+                kputw(bcf_gt_allele(ptr[1]),str);
+                str->s[str->l++] = '*'; str->s[str->l++] = ' ';
+            }
+        }
+        else if ( bcf_gt_is_missing(ptr[0]) )
+        {
+            str->s[str->l++] = '?'; str->s[str->l++] = ' '; str->s[str->l++] = '?'; str->s[str->l++] = ' ';
+        }
+        else if ( ptr[1]==bcf_int8_vector_end )
+        {
+            /* use REF for something else than first ALT */
+            str->s[str->l++] = '0'; str->s[str->l++] = ' '; str->s[str->l++] = '0'; str->s[str->l++] = ' ';
+        }
+        else
+        {
+            kputw(bcf_gt_allele(ptr[0]),str);
+            if ( bcf_gt_is_phased(ptr[1]) ) str->s[str->l++] = '*';
+            str->s[str->l++] = ' ';
+            kputw(bcf_gt_allele(ptr[1]),str);
+            if ( bcf_gt_is_phased(ptr[1]) ) str->s[str->l++] = '*';
+            str->s[str->l++] = ' ';
         }
-        else error("FIXME: not ready for ploidy %d\n", j);
     }
+    str->s[--str->l] = 0;     // delete the last space
 }
 
 static fmt_t *register_tag(convert_t *convert, int type, char *key, int is_gtf)
@@ -709,6 +989,8 @@ static fmt_t *register_tag(convert_t *convert, int type, char *key, int is_gtf)
     fmt->key   = key ? strdup(key) : NULL;
     fmt->is_gt_field = is_gtf;
     fmt->subscript = -1;
+    fmt->usr     = NULL;
+    fmt->destroy = NULL;
 
     // Allow non-format tags, such as CHROM, INFO, etc., to appear amongst the format tags.
     if ( key )
@@ -718,6 +1000,9 @@ static fmt_t *register_tag(convert_t *convert, int type, char *key, int is_gtf)
         {
             if ( !strcmp("CHROM",key) ) { fmt->type = T_CHROM; }
             else if ( !strcmp("POS",key) ) { fmt->type = T_POS; }
+            else if ( !strcmp("POS0",key) ) { fmt->type = T_POS0; }
+            else if ( !strcmp("END",key) ) { fmt->type = T_END; }
+            else if ( !strcmp("END0",key) ) { fmt->type = T_END0; }
             else if ( !strcmp("ID",key) ) { fmt->type = T_ID; }
             else if ( !strcmp("REF",key) ) { fmt->type = T_REF; }
             else if ( !strcmp("ALT",key) ) { fmt->type = T_ALT; }
@@ -742,6 +1027,9 @@ static fmt_t *register_tag(convert_t *convert, int type, char *key, int is_gtf)
         case T_GP_TO_PROB3: fmt->handler = &process_gp_to_prob3; break;
         case T_CHROM: fmt->handler = &process_chrom; break;
         case T_POS: fmt->handler = &process_pos; break;
+        case T_POS0: fmt->handler = &process_pos0; break;
+        case T_END: fmt->handler = &process_end; break;
+        case T_END0: fmt->handler = &process_end0; break;
         case T_ID: fmt->handler = &process_id; break;
         case T_REF: fmt->handler = &process_ref; break;
         case T_ALT: fmt->handler = &process_alt; break;
@@ -759,15 +1047,17 @@ static fmt_t *register_tag(convert_t *convert, int type, char *key, int is_gtf)
         case T_IUPAC_GT: fmt->handler = &process_iupac_gt; convert->max_unpack |= BCF_UN_FMT; break;
         case T_GT_TO_HAP: fmt->handler = &process_gt_to_hap; convert->max_unpack |= BCF_UN_FMT; break;
         case T_GT_TO_HAP2: fmt->handler = &process_gt_to_hap2; convert->max_unpack |= BCF_UN_FMT; break;
-        case T_LINE: fmt->handler = &process_line; break;
+        case T_TBCSQ: fmt->handler = &process_tbcsq; fmt->destroy = &destroy_tbcsq; convert->max_unpack |= BCF_UN_FMT; break;
+        case T_LINE: fmt->handler = &process_line; convert->max_unpack |= BCF_UN_FMT; break;
         default: error("TODO: handler for type %d\n", fmt->type);
     }
-    if ( key )
+    if ( key && fmt->type==T_INFO )
     {
-        if ( fmt->type==T_INFO )
+        fmt->id = bcf_hdr_id2int(convert->header, BCF_DT_ID, key);
+        if ( !bcf_hdr_idinfo_exists(convert->header,BCF_HL_INFO,fmt->id) )
         {
-            fmt->id = bcf_hdr_id2int(convert->header, BCF_DT_ID, key);
-            if ( fmt->id==-1 ) convert->undef_info_tag = strdup(key);
+            fmt->id = -1;
+            convert->undef_info_tag = strdup(key);
         }
     }
     return fmt;
@@ -797,6 +1087,16 @@ static char *parse_tag(convert_t *convert, char *p, int is_gtf)
         if ( !strcmp(str.s, "SAMPLE") ) register_tag(convert, T_SAMPLE, "SAMPLE", is_gtf);
         else if ( !strcmp(str.s, "GT") ) register_tag(convert, T_GT, "GT", is_gtf);
         else if ( !strcmp(str.s, "TGT") ) register_tag(convert, T_TGT, "GT", is_gtf);
+        else if ( !strcmp(str.s, "TBCSQ") ) 
+        {
+            fmt_t *fmt = register_tag(convert, T_TBCSQ, "BCSQ", is_gtf);
+            fmt->subscript = parse_subscript(&q);
+            if ( fmt->subscript==-1 )
+            { 
+                if ( !strncmp(q,"{*}",3) ) { fmt->subscript = 0; q += 3; }
+            }
+            else fmt->subscript++;
+        }
         else if ( !strcmp(str.s, "IUPACGT") ) register_tag(convert, T_IUPAC_GT, "GT", is_gtf);
         else if ( !strcmp(str.s, "INFO") )
         {
@@ -819,6 +1119,9 @@ static char *parse_tag(convert_t *convert, char *p, int is_gtf)
     {
         if ( !strcmp(str.s, "CHROM") ) register_tag(convert, T_CHROM, str.s, is_gtf);
         else if ( !strcmp(str.s, "POS") ) register_tag(convert, T_POS, str.s, is_gtf);
+        else if ( !strcmp(str.s, "POS0") ) register_tag(convert, T_POS0, str.s, is_gtf);
+        else if ( !strcmp(str.s, "END") ) register_tag(convert, T_END, str.s, is_gtf);
+        else if ( !strcmp(str.s, "END0") ) register_tag(convert, T_END0, str.s, is_gtf);
         else if ( !strcmp(str.s, "ID") ) register_tag(convert, T_ID, str.s, is_gtf);
         else if ( !strcmp(str.s, "REF") ) register_tag(convert, T_REF, str.s, is_gtf);
         else if ( !strcmp(str.s, "ALT") ) 
@@ -903,6 +1206,8 @@ convert_t *convert_init(bcf_hdr_t *hdr, int *samples, int nsamples, const char *
             default:  p = parse_sep(convert, p, is_gtf); break;
         }
     }
+    if ( is_gtf )
+        error("Could not parse the format string, missing the square bracket \"]\": %s\n", convert->format_str);
 
     if ( nsamples )
     {
@@ -923,7 +1228,10 @@ void convert_destroy(convert_t *convert)
 {
     int i;
     for (i=0; i<convert->nfmt; i++)
+    {
+        if ( convert->fmt[i].destroy ) convert->fmt[i].destroy(convert->fmt[i].usr);
         free(convert->fmt[i].key);
+    }
     free(convert->fmt);
     free(convert->undef_info_tag);
     free(convert->dat);
@@ -984,7 +1292,7 @@ int convert_header(convert_t *convert, kstring_t *str)
 int convert_line(convert_t *convert, bcf1_t *line, kstring_t *str)
 {
     if ( !convert->allow_undef_tags && convert->undef_info_tag )
-        error("Error: no such tag defined in the VCF header: INFO/%s\n", convert->undef_info_tag);
+        error("Error: no such tag defined in the VCF header: INFO/%s. FORMAT fields must be in square brackets, e.g. \"[ %s]\"\n", convert->undef_info_tag,convert->undef_info_tag);
 
     int l_ori = str->l;
     bcf_unpack(line, convert->max_unpack);
@@ -993,17 +1301,24 @@ int convert_line(convert_t *convert, bcf1_t *line, kstring_t *str)
     str->l = 0;
     for (i=0; i<convert->nfmt; i++)
     {
-        // Genotype fields
+        // Genotype fields. 
         if ( convert->fmt[i].is_gt_field )
         {
             int j = i, js, k;
-            while ( convert->fmt[j].is_gt_field )
+            while ( j<convert->nfmt && convert->fmt[j].is_gt_field )
             {
                 convert->fmt[j].ready = 0;
                 j++;
             }
             for (js=0; js<convert->nsamples; js++)
             {
+                // Here comes a hack designed for TBCSQ. When running on large files,
+                // such as 1000GP, there are too many empty fields in the output and
+                // it's very very slow. Therefore in case the handler does not add
+                // anything to the string, we trim all genotype fields enclosed in square
+                // brackets here. This may be changed in future, time will show...
+                size_t l_start = str->l;
+            
                 int ks = convert->samples[js];
                 for (k=i; k<j; k++)
                 {
@@ -1013,7 +1328,11 @@ int convert_line(convert_t *convert, bcf1_t *line, kstring_t *str)
                             kputc(bcf_sr_has_line(convert->readers,ir)?'1':'0', str);
                     }
                     else if ( convert->fmt[k].handler )
+                    {
+                        size_t l = str->l;
                         convert->fmt[k].handler(convert, line, &convert->fmt[k], ks, str);
+                        if ( l==str->l ) { str->l = l_start; break; }  // only TBCSQ does this
+                    }
                 }
             }
             i = j-1;
@@ -1027,6 +1346,7 @@ int convert_line(convert_t *convert, bcf1_t *line, kstring_t *str)
         }
         else if ( convert->fmt[i].handler )
             convert->fmt[i].handler(convert, line, &convert->fmt[i], -1, str);
+
     }
     return str->l - l_ori;
 }
diff --git a/bcftools/convert.c.pysam.c b/bcftools/convert.c.pysam.c
index 084ef50..95814b7 100644
--- a/bcftools/convert.c.pysam.c
+++ b/bcftools/convert.c.pysam.c
@@ -2,7 +2,7 @@
 
 /*  convert.c -- functions for converting between VCF/BCF and related formats.
 
-    Copyright (C) 2013-2014 Genome Research Ltd.
+    Copyright (C) 2013-2017 Genome Research Ltd.
 
     Author: Petr Danecek <pd3 at sanger.ac.uk>
 
@@ -64,13 +64,19 @@ THE SOFTWARE.  */
 #define T_IUPAC_GT     23
 #define T_GT_TO_HAP    24   // not publicly advertised
 #define T_GT_TO_HAP2   25   // not publicly advertised
+#define T_TBCSQ        26
+#define T_END          27
+#define T_POS0         28
+#define T_END0         29
 
 typedef struct _fmt_t
 {
     int type, id, is_gt_field, ready, subscript;
     char *key;
     bcf_fmt_t *fmt;
+    void *usr;                  // user data (optional)
     void (*handler)(convert_t *, bcf1_t *, struct _fmt_t *, int, kstring_t *);
+    void (*destroy)(void*);     // clean user data (optional)
 }
 fmt_t;
 
@@ -90,9 +96,19 @@ struct _convert_t
     int allow_undef_tags;
 };
 
+typedef struct
+{
+    kstring_t hap1,hap2;
+    char **str;
+    int n, m;
+}
+bcsq_t;
 
 static void process_chrom(convert_t *convert, bcf1_t *line, fmt_t *fmt, int isample, kstring_t *str) { kputs(convert->header->id[BCF_DT_CTG][line->rid].key, str); }
 static void process_pos(convert_t *convert, bcf1_t *line, fmt_t *fmt, int isample, kstring_t *str) { kputw(line->pos+1, str); }
+static void process_pos0(convert_t *convert, bcf1_t *line, fmt_t *fmt, int isample, kstring_t *str) { kputw(line->pos, str); }
+static void process_end(convert_t *convert, bcf1_t *line, fmt_t *fmt, int isample, kstring_t *str) { kputw(line->pos+line->rlen, str); }
+static void process_end0(convert_t *convert, bcf1_t *line, fmt_t *fmt, int isample, kstring_t *str) { kputw(line->pos+line->rlen-1, str); }
 static void process_id(convert_t *convert, bcf1_t *line, fmt_t *fmt, int isample, kstring_t *str) { kputs(line->d.id, str); }
 static void process_ref(convert_t *convert, bcf1_t *line, fmt_t *fmt, int isample, kstring_t *str) { kputs(line->d.allele[0], str); }
 static void process_alt(convert_t *convert, bcf1_t *line, fmt_t *fmt, int isample, kstring_t *str)
@@ -127,7 +143,7 @@ static void process_first_alt(convert_t *convert, bcf1_t *line, fmt_t *fmt, int
 static void process_qual(convert_t *convert, bcf1_t *line, fmt_t *fmt, int isample, kstring_t *str)
 {
     if ( bcf_float_is_missing(line->qual) ) kputc('.', str);
-    else ksprintf(str, "%g", line->qual);
+    else kputd(line->qual, str);
 }
 static void process_filter(convert_t *convert, bcf1_t *line, fmt_t *fmt, int isample, kstring_t *str)
 {
@@ -195,7 +211,7 @@ static void process_info(convert_t *convert, bcf1_t *line, fmt_t *fmt, int isamp
             case BCF_BT_INT8:  if ( info->v1.i==bcf_int8_missing ) kputc('.', str); else kputw(info->v1.i, str); break;
             case BCF_BT_INT16: if ( info->v1.i==bcf_int16_missing ) kputc('.', str); else kputw(info->v1.i, str); break;
             case BCF_BT_INT32: if ( info->v1.i==bcf_int32_missing ) kputc('.', str); else kputw(info->v1.i, str); break;
-            case BCF_BT_FLOAT: if ( bcf_float_is_missing(info->v1.f) ) kputc('.', str); else ksprintf(str, "%g", info->v1.f); break;
+            case BCF_BT_FLOAT: if ( bcf_float_is_missing(info->v1.f) ) kputc('.', str); else kputd(info->v1.f, str); break;
             case BCF_BT_CHAR:  kputc(info->v1.i, str); break;
             default: fprintf(pysam_stderr,"todo: type %d\n", info->type); exit(1); break;
         }
@@ -217,7 +233,7 @@ static void process_info(convert_t *convert, bcf1_t *line, fmt_t *fmt, int isamp
             case BCF_BT_INT8:  BRANCH(int8_t,  val==bcf_int8_missing,  val==bcf_int8_vector_end,  kputw(val, str)); break;
             case BCF_BT_INT16: BRANCH(int16_t, val==bcf_int16_missing, val==bcf_int16_vector_end, kputw(val, str)); break;
             case BCF_BT_INT32: BRANCH(int32_t, val==bcf_int32_missing, val==bcf_int32_vector_end, kputw(val, str)); break;
-            case BCF_BT_FLOAT: BRANCH(float,   bcf_float_is_missing(val), bcf_float_is_vector_end(val), ksprintf(str, "%g", val)); break;
+            case BCF_BT_FLOAT: BRANCH(float,   bcf_float_is_missing(val), bcf_float_is_vector_end(val), kputd(val, str)); break;
             default: fprintf(pysam_stderr,"todo: type %d\n", info->type); exit(1); break;
         }
         #undef BRANCH
@@ -228,6 +244,7 @@ static void process_info(convert_t *convert, bcf1_t *line, fmt_t *fmt, int isamp
 static void init_format(convert_t *convert, bcf1_t *line, fmt_t *fmt)
 {
     fmt->id = bcf_hdr_id2int(convert->header, BCF_DT_ID, fmt->key);
+    if ( !bcf_hdr_idinfo_exists(convert->header,BCF_HL_FMT,fmt->id) ) fmt->id = -1;
     fmt->fmt = NULL;
     if ( fmt->id >= 0 )
     {
@@ -263,7 +280,7 @@ static void process_format(convert_t *convert, bcf1_t *line, fmt_t *fmt, int isa
             if ( bcf_float_is_missing(ptr[fmt->subscript]) || bcf_float_is_vector_end(ptr[fmt->subscript]) )
                 kputc('.', str);
             else
-                ksprintf(str, "%g", ptr[fmt->subscript]);
+                kputd(ptr[fmt->subscript], str);
         }
         else if ( fmt->fmt->type != BCF_BT_CHAR )
         {
@@ -318,6 +335,111 @@ static void process_tgt(convert_t *convert, bcf1_t *line, fmt_t *fmt, int isampl
     }
     if (l == 0) kputc('.', str);
 }
+static void destroy_tbcsq(void *usr)
+{
+    if ( !usr ) return;
+    bcsq_t *csq = (bcsq_t*) usr;
+    free(csq->hap1.s);
+    free(csq->hap2.s);
+    if ( csq->n )
+        free(csq->str[0]);
+    free(csq->str);
+    free(csq);
+}
+static void process_tbcsq(convert_t *convert, bcf1_t *line, fmt_t *fmt, int isample, kstring_t *str)
+{
+    if ( !fmt->ready )
+    {
+        init_format(convert, line, fmt);
+
+        bcsq_t *csq;
+        if ( fmt->usr )
+        {
+            csq = (bcsq_t*) fmt->usr;
+            if ( csq->n )
+                free(csq->str[0]);
+            csq->n = 0;
+        }
+        else
+            csq = (bcsq_t*) calloc(1,sizeof(bcsq_t));
+        fmt->usr = csq;
+
+        int i=0, len = 0;
+        char *tmp = NULL;
+        if ( bcf_get_info_string(convert->header,line,fmt->key,&tmp,&len)<0 )
+        {
+            csq->n = 0;
+            return;
+        }
+        do
+        {
+            csq->n++;
+            hts_expand(char*, csq->n, csq->m, csq->str);
+            csq->str[ csq->n-1 ] = tmp + i;
+            while ( i<len && tmp[i]!=',' ) i++;
+            if ( i<len && tmp[i]==',' ) tmp[i++] = 0;
+        }
+        while ( i<len );
+    }
+
+    bcsq_t *csq = (bcsq_t*)fmt->usr;
+
+    if ( fmt->fmt==NULL || !csq->n ) return;
+
+    csq->hap1.l = 0;
+    csq->hap2.l = 0;
+
+    int mask = fmt->subscript==0 ? 3 : 1;   // merge both haplotypes if subscript==0
+
+    #define BRANCH(type_t, nbits) { \
+        type_t *x = (type_t*)(fmt->fmt->p + isample*fmt->fmt->size); \
+        int i,j; \
+        if ( fmt->subscript<=0 || fmt->subscript==1 ) \
+        { \
+            for (j=0; j < fmt->fmt->n; j++) \
+            { \
+                type_t val = x[j]; \
+                if ( !val ) continue; \
+                for (i=0; i<nbits; i+=2) \
+                    if ( val & (mask<<i) ) { kputs(csq->str[(j*32+i)/2], &csq->hap1); kputc_(',', &csq->hap1); } \
+            } \
+        } \
+        if ( fmt->subscript<0 || fmt->subscript==2 ) \
+        { \
+            for (j=0; j < fmt->fmt->n; j++) \
+            { \
+                type_t val = x[j]; \
+                if ( !val ) continue; \
+                for (i=1; i<nbits; i+=2) \
+                    if ( val & (1<<i) ) { kputs(csq->str[(j*32+i)/2], &csq->hap2); kputc_(',', &csq->hap2); } \
+            } \
+        } \
+    }
+    switch (fmt->fmt->type)
+    {
+        case BCF_BT_INT8:  BRANCH(uint8_t, 8); break;
+        case BCF_BT_INT16: BRANCH(uint16_t,16); break;
+        case BCF_BT_INT32: BRANCH(uint32_t,32); break;
+        default: error("Unexpected type: %d\n", fmt->fmt->type); exit(1); break;
+    }
+    #undef BRANCH
+
+    if ( !csq->hap1.l && !csq->hap2.l ) return;
+
+    if ( csq->hap1.l ) csq->hap1.s[--csq->hap1.l] = 0;
+    if ( csq->hap2.l ) csq->hap2.s[--csq->hap2.l] = 0;
+
+    if ( fmt->subscript<0 )
+    {
+        kputs(csq->hap1.l?csq->hap1.s:".", str);
+        kputc_('\t', str);
+        kputs(csq->hap2.l?csq->hap2.s:".", str);
+    }
+    else if ( fmt->subscript<2 )
+        kputs(csq->hap1.l?csq->hap1.s:".", str);
+    else
+        kputs(csq->hap2.l?csq->hap2.s:".", str);
+}
 static void init_format_iupac(convert_t *convert, bcf1_t *line, fmt_t *fmt)
 {
     init_format(convert, line, fmt);
@@ -411,6 +533,7 @@ static void process_type(convert_t *convert, bcf1_t *line, fmt_t *fmt, int isamp
     if ( line_type & VCF_MNP ) { if (i) kputc(',',str); kputs("MNP", str); i++; }
     if ( line_type & VCF_INDEL ) { if (i) kputc(',',str); kputs("INDEL", str); i++; }
     if ( line_type & VCF_OTHER ) { if (i) kputc(',',str); kputs("OTHER", str); i++; }
+    if ( line_type & VCF_BND ) { if (i) kputc(',',str); kputs("BND", str); i++; }
 }
 static void process_line(convert_t *convert, bcf1_t *line, fmt_t *fmt, int isample, kstring_t *str)
 {
@@ -599,103 +722,260 @@ static void process_gt_to_hap(convert_t *convert, bcf1_t *line, fmt_t *fmt, int
     // the allele (0/1) and the asterisk (*); e.g., "0* 1*" for a
     // heterozygous genotype of unknown phase.
 
-    int m, n, i;
-
-    m = convert->ndat / sizeof(int32_t);
-    n = bcf_get_genotypes(convert->header, line, &convert->dat, &m);
-    convert->ndat = m * sizeof(int32_t);
-
-    if ( n<=0 )
-    {
-        // Throw an error or silently proceed?
-        //
-        // for (i=0; i<convert->nsamples; i++) kputs(" ...", str);
-        // return;
-
-        error("Error parsing GT tag at %s:%d\n", bcf_seqname(convert->header, line), line->pos+1);
-    }
-
-    n /= convert->nsamples;
+    int i, gt_id = bcf_hdr_id2int(convert->header, BCF_DT_ID, "GT");
+    if ( !bcf_hdr_idinfo_exists(convert->header,BCF_HL_FMT,gt_id) )
+        error("FORMAT/GT tag not present at %s:%d\n", bcf_seqname(convert->header, line), line->pos+1);
+    if ( !(line->unpacked & BCF_UN_FMT) ) bcf_unpack(line, BCF_UN_FMT);
+    bcf_fmt_t *fmt_gt = NULL;
+    for (i=0; i<line->n_fmt; i++)
+        if ( line->d.fmt[i].id==gt_id ) { fmt_gt = &line->d.fmt[i]; break; }
+    if ( !fmt_gt )
+        error("FORMAT/GT tag not present at %s:%d\n", bcf_seqname(convert->header, line), line->pos+1);
+
+    // Alloc all memory in advance to avoid kput routines. The biggest allowed allele index is 99
+    if ( line->n_allele > 100 )
+        error("Too many alleles (%d) at %s:%d\n", line->n_allele, bcf_seqname(convert->header, line), line->pos+1);
+    if ( ks_resize(str, str->l+convert->nsamples*8) != 0 )
+        error("Could not alloc %d bytes\n", str->l + convert->nsamples*8);
+
+    if ( fmt_gt->type!=BCF_BT_INT8 )    // todo: use BRANCH_INT if the VCF is valid
+        error("Uh, too many alleles (%d) or redundant BCF representation at %s:%d\n", line->n_allele, bcf_seqname(convert->header, line), line->pos+1);
+
+    int8_t *ptr = ((int8_t*) fmt_gt->p) - fmt_gt->n;
     for (i=0; i<convert->nsamples; i++)
     {
-        int32_t *ptr = (int32_t*)convert->dat + i*n;
-        int j;
-        for (j=0; j<n; j++)
-            if ( ptr[j]==bcf_int32_vector_end ) break;
-
-        if (i>0) kputs(" ", str); // no space separation for first column
-        if ( j==2 )
+        ptr += fmt_gt->n;
+        if ( ptr[0]==2 )
         {
-            // diploid
-            if ( bcf_gt_is_missing(ptr[0]) || bcf_gt_is_missing(ptr[1]) ) {
-                kputs("? ?", str);
+            if ( ptr[1]==3 ) /* 0|0 */
+            {
+                str->s[str->l++] = '0'; str->s[str->l++] = ' '; str->s[str->l++] = '0'; str->s[str->l++] = ' ';
+            }
+            else if ( ptr[1]==5 ) /* 0|1 */
+            {
+                str->s[str->l++] = '0'; str->s[str->l++] = ' '; str->s[str->l++] = '1'; str->s[str->l++] = ' ';
+            }
+            else if ( ptr[1]==bcf_int8_vector_end ) /* 0 */
+            {
+                str->s[str->l++] = '0'; str->s[str->l++] = ' '; str->s[str->l++] = '-'; str->s[str->l++] = ' ';
+            }
+            else if ( ptr[1]==2 ) /* 0/0 */
+            {
+                str->s[str->l++] = '0'; str->s[str->l++] = '*'; str->s[str->l++] = ' '; str->s[str->l++] = '0'; str->s[str->l++] = '*'; str->s[str->l++] = ' ';
+            }
+            else if ( ptr[1]==4 ) /* 0/1 */
+            {
+                str->s[str->l++] = '0'; str->s[str->l++] = '*'; str->s[str->l++] = ' '; str->s[str->l++] = '1'; str->s[str->l++] = '*'; str->s[str->l++] = ' ';
+            }
+            else if ( bcf_gt_is_missing(ptr[1]) ) /* 0/. */
+            {
+                str->s[str->l++] = '?'; str->s[str->l++] = ' '; str->s[str->l++] = '?'; str->s[str->l++] = ' ';
             }
-            else if ( bcf_gt_is_phased(ptr[1])) {
-                ksprintf(str, "%d %d", bcf_gt_allele(ptr[0]), bcf_gt_allele(ptr[1]));
+            else if ( bcf_gt_is_phased(ptr[1]) ) /* 0|x */
+            {
+                str->s[str->l++] = '0'; str->s[str->l++] = ' ';
+                kputw(bcf_gt_allele(ptr[1]),str);
+                str->s[str->l++] = ' ';
             }
-            else {
-                ksprintf(str, "%d* %d*", bcf_gt_allele(ptr[0]), bcf_gt_allele(ptr[1]));
+            else /* 0/x */
+            {
+                str->s[str->l++] = '0'; str->s[str->l++] = '*'; str->s[str->l++] = ' ';
+                kputw(bcf_gt_allele(ptr[1]),str);
+                str->s[str->l++] = '*'; str->s[str->l++] = ' ';
             }
         }
-        else if ( j==1 )
+        else if ( ptr[0]==4 )
         {
-            // haploid
-            if ( bcf_gt_is_missing(ptr[0]) )
-                kputs("? -", str);
-            else if ( bcf_gt_allele(ptr[0])==1 )
-                kputs("1 -", str);       // first ALT allele
-            else
-                kputs("0 -", str);       // REF or something else than first ALT
+            if ( ptr[1]==3 ) /* 1|0 */
+            {
+                str->s[str->l++] = '1'; str->s[str->l++] = ' '; str->s[str->l++] = '0'; str->s[str->l++] = ' ';
+            }
+            else if ( ptr[1]==5 ) /* 1|1 */
+            {
+                str->s[str->l++] = '1'; str->s[str->l++] = ' '; str->s[str->l++] = '1'; str->s[str->l++] = ' ';
+            }
+            else if ( ptr[1]==bcf_int8_vector_end ) /* 1 */
+            {
+                str->s[str->l++] = '1'; str->s[str->l++] = ' '; str->s[str->l++] = '-'; str->s[str->l++] = ' ';
+            }
+            else if ( ptr[1]==2 ) /* 1/0 */
+            {
+                str->s[str->l++] = '1'; str->s[str->l++] = '*'; str->s[str->l++] = ' '; str->s[str->l++] = '0'; str->s[str->l++] = '*'; str->s[str->l++] = ' ';
+            }
+            else if ( ptr[1]==4 ) /* 1/1 */
+            {
+                str->s[str->l++] = '1'; str->s[str->l++] = '*'; str->s[str->l++] = ' '; str->s[str->l++] = '1'; str->s[str->l++] = '*'; str->s[str->l++] = ' ';
+            }
+            else if ( bcf_gt_is_missing(ptr[1]) ) /* 1/. */
+            {
+                str->s[str->l++] = '?'; str->s[str->l++] = ' '; str->s[str->l++] = '?'; str->s[str->l++] = ' ';
+            }
+            else if ( bcf_gt_is_phased(ptr[1]) )    /* 1|x */
+            {
+                str->s[str->l++] = '1'; str->s[str->l++] = ' ';
+                kputw(bcf_gt_allele(ptr[1]),str);
+                str->s[str->l++] = ' ';
+            }
+            else /* 1/x */
+            {
+                str->s[str->l++] = '1'; str->s[str->l++] = '*'; str->s[str->l++] = ' ';
+                kputw(bcf_gt_allele(ptr[1]),str);
+                str->s[str->l++] = '*'; str->s[str->l++] = ' ';
+            }
+        }
+        else if ( bcf_gt_is_missing(ptr[0]) )
+        {
+            if ( ptr[1]==bcf_int8_vector_end ) 
+            {
+                str->s[str->l++] = '?'; str->s[str->l++] = ' '; str->s[str->l++] = '-'; str->s[str->l++] = ' ';
+            }
+            else 
+            { 
+                str->s[str->l++] = '?'; str->s[str->l++] = ' '; str->s[str->l++] = '?'; str->s[str->l++] = ' ';
+            }
+        }
+        else if ( ptr[1]==bcf_int8_vector_end )
+        {
+            /* use REF for something else than first ALT */
+            str->s[str->l++] = '0'; str->s[str->l++] = ' '; str->s[str->l++] = '-'; str->s[str->l++] = ' ';
+        }
+        else
+        {
+            kputw(bcf_gt_allele(ptr[0]),str);
+            if ( bcf_gt_is_phased(ptr[1]) ) str->s[str->l++] = '*';
+            str->s[str->l++] = ' ';
+            kputw(bcf_gt_allele(ptr[1]),str);
+            if ( bcf_gt_is_phased(ptr[1]) ) str->s[str->l++] = '*';
+            str->s[str->l++] = ' ';
         }
-        else error("FIXME: not ready for ploidy %d\n", j);
     }
+    str->s[--str->l] = 0;     // delete the last space
 }
 static void process_gt_to_hap2(convert_t *convert, bcf1_t *line, fmt_t *fmt, int isample, kstring_t *str)
 {
     // same as process_gt_to_hap but converts haploid genotypes into diploid
-    int m, n, i;
-
-    m = convert->ndat / sizeof(int32_t);
-    n = bcf_get_genotypes(convert->header, line, &convert->dat, &m);
-    convert->ndat = m * sizeof(int32_t);
-
-    if ( n<=0 )
-        error("Error parsing GT tag at %s:%d\n", bcf_seqname(convert->header, line), line->pos+1);
 
-    n /= convert->nsamples;
+    int i, gt_id = bcf_hdr_id2int(convert->header, BCF_DT_ID, "GT");
+    if ( !bcf_hdr_idinfo_exists(convert->header,BCF_HL_FMT,gt_id) )
+        error("FORMAT/GT tag not present at %s:%d\n", bcf_seqname(convert->header, line), line->pos+1);
+    if ( !(line->unpacked & BCF_UN_FMT) ) bcf_unpack(line, BCF_UN_FMT);
+    bcf_fmt_t *fmt_gt = NULL;
+    for (i=0; i<line->n_fmt; i++)
+        if ( line->d.fmt[i].id==gt_id ) { fmt_gt = &line->d.fmt[i]; break; }
+    if ( !fmt_gt )
+        error("FORMAT/GT tag not present at %s:%d\n", bcf_seqname(convert->header, line), line->pos+1);
+
+    // Alloc all memory in advance to avoid kput routines. The biggest allowed allele index is 99
+    if ( line->n_allele > 100 )
+        error("Too many alleles (%d) at %s:%d\n", line->n_allele, bcf_seqname(convert->header, line), line->pos+1);
+    if ( ks_resize(str, str->l+convert->nsamples*8) != 0 )
+        error("Could not alloc %d bytes\n", str->l + convert->nsamples*8);
+
+    if ( fmt_gt->type!=BCF_BT_INT8 )    // todo: use BRANCH_INT if the VCF is valid
+        error("Uh, too many alleles (%d) or redundant BCF representation at %s:%d\n", line->n_allele, bcf_seqname(convert->header, line), line->pos+1);
+
+    int8_t *ptr = ((int8_t*) fmt_gt->p) - fmt_gt->n;
     for (i=0; i<convert->nsamples; i++)
     {
-        int32_t *ptr = (int32_t*)convert->dat + i*n;
-        int j;
-        for (j=0; j<n; j++)
-            if ( ptr[j]==bcf_int32_vector_end ) break;
-
-        if (i>0) kputs(" ", str); // no space separation for first column
-        if ( j==2 )
+        ptr += fmt_gt->n;
+        if ( ptr[0]==2 )
         {
-            // diploid
-            if ( bcf_gt_is_missing(ptr[0]) || bcf_gt_is_missing(ptr[1]) ) {
-                kputs("? ?", str);
+            if ( ptr[1]==3 ) /* 0|0 */
+            {
+                str->s[str->l++] = '0'; str->s[str->l++] = ' '; str->s[str->l++] = '0'; str->s[str->l++] = ' ';
+            }
+            else if ( ptr[1]==5 ) /* 0|1 */
+            {
+                str->s[str->l++] = '0'; str->s[str->l++] = ' '; str->s[str->l++] = '1'; str->s[str->l++] = ' ';
+            }
+            else if ( ptr[1]==bcf_int8_vector_end ) /* 0 -> 0|0 */
+            {
+                str->s[str->l++] = '0'; str->s[str->l++] = ' '; str->s[str->l++] = '0'; str->s[str->l++] = ' ';
+            }
+            else if ( ptr[1]==2 ) /* 0/0 */
+            {
+                str->s[str->l++] = '0'; str->s[str->l++] = '*'; str->s[str->l++] = ' '; str->s[str->l++] = '0'; str->s[str->l++] = '*'; str->s[str->l++] = ' ';
+            }
+            else if ( ptr[1]==4 ) /* 0/1 */
+            {
+                str->s[str->l++] = '0'; str->s[str->l++] = '*'; str->s[str->l++] = ' '; str->s[str->l++] = '1'; str->s[str->l++] = '*'; str->s[str->l++] = ' ';
             }
-            else if ( bcf_gt_is_phased(ptr[1])) {
-                ksprintf(str, "%d %d", bcf_gt_allele(ptr[0]), bcf_gt_allele(ptr[1]));
+            else if ( bcf_gt_is_missing(ptr[1]) ) /* 0/. */
+            {
+                str->s[str->l++] = '?'; str->s[str->l++] = ' '; str->s[str->l++] = '?'; str->s[str->l++] = ' ';
             }
-            else {
-                ksprintf(str, "%d* %d*", bcf_gt_allele(ptr[0]), bcf_gt_allele(ptr[1]));
+            else if ( bcf_gt_is_phased(ptr[1]) ) /* 0|x */
+            {
+                str->s[str->l++] = '0'; str->s[str->l++] = ' ';
+                kputw(bcf_gt_allele(ptr[1]),str);
+                str->s[str->l++] = ' ';
+            }
+            else /* 0/x */
+            {
+                str->s[str->l++] = '0'; str->s[str->l++] = '*'; str->s[str->l++] = ' ';
+                kputw(bcf_gt_allele(ptr[1]),str);
+                str->s[str->l++] = '*'; str->s[str->l++] = ' ';
             }
         }
-        else if ( j==1 )
+        else if ( ptr[0]==4 )
         {
-            // haploid
-            if ( bcf_gt_is_missing(ptr[0]) )
-                kputs("? ?", str);
-            else if ( bcf_gt_allele(ptr[0])==1 )
-                kputs("1 1", str);       // first ALT allele
-            else
-                kputs("0 0", str);       // REF or something else than first ALT
+            if ( ptr[1]==3 ) /* 1|0 */
+            {
+                str->s[str->l++] = '1'; str->s[str->l++] = ' '; str->s[str->l++] = '0'; str->s[str->l++] = ' ';
+            }
+            else if ( ptr[1]==5 ) /* 1|1 */
+            {
+                str->s[str->l++] = '1'; str->s[str->l++] = ' '; str->s[str->l++] = '1'; str->s[str->l++] = ' ';
+            }
+            else if ( ptr[1]==bcf_int8_vector_end ) /* 1 -> 1|1 */
+            {
+                str->s[str->l++] = '1'; str->s[str->l++] = ' '; str->s[str->l++] = '1'; str->s[str->l++] = ' ';
+            }
+            else if ( ptr[1]==2 ) /* 1/0 */
+            {
+                str->s[str->l++] = '1'; str->s[str->l++] = '*'; str->s[str->l++] = ' '; str->s[str->l++] = '0'; str->s[str->l++] = '*'; str->s[str->l++] = ' ';
+            }
+            else if ( ptr[1]==4 ) /* 1/1 */
+            {
+                str->s[str->l++] = '1'; str->s[str->l++] = '*'; str->s[str->l++] = ' '; str->s[str->l++] = '1'; str->s[str->l++] = '*'; str->s[str->l++] = ' ';
+            }
+            else if ( bcf_gt_is_missing(ptr[1]) ) /* 1/. */
+            {
+                str->s[str->l++] = '?'; str->s[str->l++] = ' '; str->s[str->l++] = '?'; str->s[str->l++] = ' ';
+            }
+            else if ( bcf_gt_is_phased(ptr[1]) )    /* 1|x */
+            {
+                str->s[str->l++] = '1'; str->s[str->l++] = ' ';
+                kputw(bcf_gt_allele(ptr[1]),str);
+                str->s[str->l++] = ' ';
+            }
+            else /* 1/x */
+            {
+                str->s[str->l++] = '1'; str->s[str->l++] = '*'; str->s[str->l++] = ' ';
+                kputw(bcf_gt_allele(ptr[1]),str);
+                str->s[str->l++] = '*'; str->s[str->l++] = ' ';
+            }
+        }
+        else if ( bcf_gt_is_missing(ptr[0]) )
+        {
+            str->s[str->l++] = '?'; str->s[str->l++] = ' '; str->s[str->l++] = '?'; str->s[str->l++] = ' ';
+        }
+        else if ( ptr[1]==bcf_int8_vector_end )
+        {
+            /* use REF for something else than first ALT */
+            str->s[str->l++] = '0'; str->s[str->l++] = ' '; str->s[str->l++] = '0'; str->s[str->l++] = ' ';
+        }
+        else
+        {
+            kputw(bcf_gt_allele(ptr[0]),str);
+            if ( bcf_gt_is_phased(ptr[1]) ) str->s[str->l++] = '*';
+            str->s[str->l++] = ' ';
+            kputw(bcf_gt_allele(ptr[1]),str);
+            if ( bcf_gt_is_phased(ptr[1]) ) str->s[str->l++] = '*';
+            str->s[str->l++] = ' ';
         }
-        else error("FIXME: not ready for ploidy %d\n", j);
     }
+    str->s[--str->l] = 0;     // delete the last space
 }
 
 static fmt_t *register_tag(convert_t *convert, int type, char *key, int is_gtf)
@@ -711,6 +991,8 @@ static fmt_t *register_tag(convert_t *convert, int type, char *key, int is_gtf)
     fmt->key   = key ? strdup(key) : NULL;
     fmt->is_gt_field = is_gtf;
     fmt->subscript = -1;
+    fmt->usr     = NULL;
+    fmt->destroy = NULL;
 
     // Allow non-format tags, such as CHROM, INFO, etc., to appear amongst the format tags.
     if ( key )
@@ -720,6 +1002,9 @@ static fmt_t *register_tag(convert_t *convert, int type, char *key, int is_gtf)
         {
             if ( !strcmp("CHROM",key) ) { fmt->type = T_CHROM; }
             else if ( !strcmp("POS",key) ) { fmt->type = T_POS; }
+            else if ( !strcmp("POS0",key) ) { fmt->type = T_POS0; }
+            else if ( !strcmp("END",key) ) { fmt->type = T_END; }
+            else if ( !strcmp("END0",key) ) { fmt->type = T_END0; }
             else if ( !strcmp("ID",key) ) { fmt->type = T_ID; }
             else if ( !strcmp("REF",key) ) { fmt->type = T_REF; }
             else if ( !strcmp("ALT",key) ) { fmt->type = T_ALT; }
@@ -744,6 +1029,9 @@ static fmt_t *register_tag(convert_t *convert, int type, char *key, int is_gtf)
         case T_GP_TO_PROB3: fmt->handler = &process_gp_to_prob3; break;
         case T_CHROM: fmt->handler = &process_chrom; break;
         case T_POS: fmt->handler = &process_pos; break;
+        case T_POS0: fmt->handler = &process_pos0; break;
+        case T_END: fmt->handler = &process_end; break;
+        case T_END0: fmt->handler = &process_end0; break;
         case T_ID: fmt->handler = &process_id; break;
         case T_REF: fmt->handler = &process_ref; break;
         case T_ALT: fmt->handler = &process_alt; break;
@@ -761,15 +1049,17 @@ static fmt_t *register_tag(convert_t *convert, int type, char *key, int is_gtf)
         case T_IUPAC_GT: fmt->handler = &process_iupac_gt; convert->max_unpack |= BCF_UN_FMT; break;
         case T_GT_TO_HAP: fmt->handler = &process_gt_to_hap; convert->max_unpack |= BCF_UN_FMT; break;
         case T_GT_TO_HAP2: fmt->handler = &process_gt_to_hap2; convert->max_unpack |= BCF_UN_FMT; break;
-        case T_LINE: fmt->handler = &process_line; break;
+        case T_TBCSQ: fmt->handler = &process_tbcsq; fmt->destroy = &destroy_tbcsq; convert->max_unpack |= BCF_UN_FMT; break;
+        case T_LINE: fmt->handler = &process_line; convert->max_unpack |= BCF_UN_FMT; break;
         default: error("TODO: handler for type %d\n", fmt->type);
     }
-    if ( key )
+    if ( key && fmt->type==T_INFO )
     {
-        if ( fmt->type==T_INFO )
+        fmt->id = bcf_hdr_id2int(convert->header, BCF_DT_ID, key);
+        if ( !bcf_hdr_idinfo_exists(convert->header,BCF_HL_INFO,fmt->id) )
         {
-            fmt->id = bcf_hdr_id2int(convert->header, BCF_DT_ID, key);
-            if ( fmt->id==-1 ) convert->undef_info_tag = strdup(key);
+            fmt->id = -1;
+            convert->undef_info_tag = strdup(key);
         }
     }
     return fmt;
@@ -799,6 +1089,16 @@ static char *parse_tag(convert_t *convert, char *p, int is_gtf)
         if ( !strcmp(str.s, "SAMPLE") ) register_tag(convert, T_SAMPLE, "SAMPLE", is_gtf);
         else if ( !strcmp(str.s, "GT") ) register_tag(convert, T_GT, "GT", is_gtf);
         else if ( !strcmp(str.s, "TGT") ) register_tag(convert, T_TGT, "GT", is_gtf);
+        else if ( !strcmp(str.s, "TBCSQ") ) 
+        {
+            fmt_t *fmt = register_tag(convert, T_TBCSQ, "BCSQ", is_gtf);
+            fmt->subscript = parse_subscript(&q);
+            if ( fmt->subscript==-1 )
+            { 
+                if ( !strncmp(q,"{*}",3) ) { fmt->subscript = 0; q += 3; }
+            }
+            else fmt->subscript++;
+        }
         else if ( !strcmp(str.s, "IUPACGT") ) register_tag(convert, T_IUPAC_GT, "GT", is_gtf);
         else if ( !strcmp(str.s, "INFO") )
         {
@@ -821,6 +1121,9 @@ static char *parse_tag(convert_t *convert, char *p, int is_gtf)
     {
         if ( !strcmp(str.s, "CHROM") ) register_tag(convert, T_CHROM, str.s, is_gtf);
         else if ( !strcmp(str.s, "POS") ) register_tag(convert, T_POS, str.s, is_gtf);
+        else if ( !strcmp(str.s, "POS0") ) register_tag(convert, T_POS0, str.s, is_gtf);
+        else if ( !strcmp(str.s, "END") ) register_tag(convert, T_END, str.s, is_gtf);
+        else if ( !strcmp(str.s, "END0") ) register_tag(convert, T_END0, str.s, is_gtf);
         else if ( !strcmp(str.s, "ID") ) register_tag(convert, T_ID, str.s, is_gtf);
         else if ( !strcmp(str.s, "REF") ) register_tag(convert, T_REF, str.s, is_gtf);
         else if ( !strcmp(str.s, "ALT") ) 
@@ -905,6 +1208,8 @@ convert_t *convert_init(bcf_hdr_t *hdr, int *samples, int nsamples, const char *
             default:  p = parse_sep(convert, p, is_gtf); break;
         }
     }
+    if ( is_gtf )
+        error("Could not parse the format string, missing the square bracket \"]\": %s\n", convert->format_str);
 
     if ( nsamples )
     {
@@ -925,7 +1230,10 @@ void convert_destroy(convert_t *convert)
 {
     int i;
     for (i=0; i<convert->nfmt; i++)
+    {
+        if ( convert->fmt[i].destroy ) convert->fmt[i].destroy(convert->fmt[i].usr);
         free(convert->fmt[i].key);
+    }
     free(convert->fmt);
     free(convert->undef_info_tag);
     free(convert->dat);
@@ -986,7 +1294,7 @@ int convert_header(convert_t *convert, kstring_t *str)
 int convert_line(convert_t *convert, bcf1_t *line, kstring_t *str)
 {
     if ( !convert->allow_undef_tags && convert->undef_info_tag )
-        error("Error: no such tag defined in the VCF header: INFO/%s\n", convert->undef_info_tag);
+        error("Error: no such tag defined in the VCF header: INFO/%s. FORMAT fields must be in square brackets, e.g. \"[ %s]\"\n", convert->undef_info_tag,convert->undef_info_tag);
 
     int l_ori = str->l;
     bcf_unpack(line, convert->max_unpack);
@@ -995,17 +1303,24 @@ int convert_line(convert_t *convert, bcf1_t *line, kstring_t *str)
     str->l = 0;
     for (i=0; i<convert->nfmt; i++)
     {
-        // Genotype fields
+        // Genotype fields. 
         if ( convert->fmt[i].is_gt_field )
         {
             int j = i, js, k;
-            while ( convert->fmt[j].is_gt_field )
+            while ( j<convert->nfmt && convert->fmt[j].is_gt_field )
             {
                 convert->fmt[j].ready = 0;
                 j++;
             }
             for (js=0; js<convert->nsamples; js++)
             {
+                // Here comes a hack designed for TBCSQ. When running on large files,
+                // such as 1000GP, there are too many empty fields in the output and
+                // it's very very slow. Therefore in case the handler does not add
+                // anything to the string, we trim all genotype fields enclosed in square
+                // brackets here. This may be changed in future, time will show...
+                size_t l_start = str->l;
+            
                 int ks = convert->samples[js];
                 for (k=i; k<j; k++)
                 {
@@ -1015,7 +1330,11 @@ int convert_line(convert_t *convert, bcf1_t *line, kstring_t *str)
                             kputc(bcf_sr_has_line(convert->readers,ir)?'1':'0', str);
                     }
                     else if ( convert->fmt[k].handler )
+                    {
+                        size_t l = str->l;
                         convert->fmt[k].handler(convert, line, &convert->fmt[k], ks, str);
+                        if ( l==str->l ) { str->l = l_start; break; }  // only TBCSQ does this
+                    }
                 }
             }
             i = j-1;
@@ -1029,6 +1348,7 @@ int convert_line(convert_t *convert, bcf1_t *line, kstring_t *str)
         }
         else if ( convert->fmt[i].handler )
             convert->fmt[i].handler(convert, line, &convert->fmt[i], -1, str);
+
     }
     return str->l - l_ori;
 }
diff --git a/bcftools/csq.c b/bcftools/csq.c
new file mode 100644
index 0000000..b1db103
--- /dev/null
+++ b/bcftools/csq.c
@@ -0,0 +1,3824 @@
+/* The MIT License
+
+   Copyright (c) 2016 Genome Research Ltd.
+
+   Author: Petr Danecek <pd3 at sanger.ac.uk>
+   
+   Permission is hereby granted, free of charge, to any person obtaining a copy
+   of this software and associated documentation files (the "Software"), to deal
+   in the Software without restriction, including without limitation the rights
+   to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+   copies of the Software, and to permit persons to whom the Software is
+   furnished to do so, subject to the following conditions:
+   
+   The above copyright notice and this permission notice shall be included in
+   all copies or substantial portions of the Software.
+   
+   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+   IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+   AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+   LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+   THE SOFTWARE.
+
+ */
+/*
+    Things that would be nice to have
+        - for stop-lost events (also in frameshifts) report the number of truncated aa's
+        - memory could be greatly reduced by indexing gff (but it is quite compact already)
+        - deletions that go beyond transcript boundaries are not checked at sequence level
+            - alloc tscript->ref in hap_finalize, introduce fa_off_beg:16,fa_off_end:16
+            - see test/csq/ENST00000573314/insertion-overlap.vcf #1476288882
+
+    Read about transcript types here
+        http://vega.sanger.ac.uk/info/about/gene_and_transcript_types.html
+        http://www.ensembl.org/info/genome/variation/predicted_data.html
+        http://www.gencodegenes.org/gencode_biotypes.html
+
+    List of supported biotypes
+        antisense
+        IG_C_gene
+        IG_D_gene
+        IG_J_gene
+        IG_LV_gene
+        IG_V_gene
+        lincRNA
+        macro_lncRNA
+        miRNA
+        misc_RNA
+        Mt_rRNA
+        Mt_tRNA
+        polymorphic_pseudogene
+        processed_transcript
+        protein_coding
+        ribozyme
+        rRNA
+        sRNA
+        scRNA
+        scaRNA
+        sense_intronic
+        sense_overlapping
+        snRNA
+        snoRNA
+        TR_C_gene
+        TR_D_gene
+        TR_J_gene
+        TR_V_gene
+
+    The gff parsing logic
+        We collect features such by combining gff lines A,B,C as follows:
+            A .. gene line with a supported biotype
+                    A.ID=~/^gene:/
+
+            B .. transcript line referencing A
+                    B.ID=~/^transcript:/ && B.Parent=~/^gene:A.ID/
+
+            C .. corresponding CDS, exon, and UTR lines:
+                    C[3] in {"CDS","exon","three_prime_UTR","five_prime_UTR"} && C.Parent=~/^transcript:B.ID/ 
+
+        For coding biotypes ("protein_coding" or "polymorphic_pseudogene") the
+        complete chain link C -> B -> A is required. For the rest, link B -> A suffices.
+        
+                
+    The supported consequence types, sorted by impact:
+        splice_acceptor_variant .. end region of an intron changed (2bp at the 3' end of an intron)
+        splice_donor_variant    .. start region of an intron changed (2bp at the 5' end of an intron)
+        stop_gained             .. DNA sequence variant resulting in a stop codon
+        frameshift_variant      .. number of inserted/deleted bases not a multiple of three, disrupted translational frame
+        stop_lost               .. elongated transcript, stop codon changed
+        start_lost              .. the first codon changed
+        inframe_altering        .. combination of indels leading to unchanged reading frame and length
+        inframe_insertion       .. inserted coding sequence, unchanged reading frame
+        inframe_deletion        .. deleted coding sequence, unchanged reading frame
+        missense_variant        .. amino acid (aa) change, unchanged length
+        splice_region_variant   .. change within 1-3 bases of the exon or 3-8 bases of the intron
+        synonymous_variant      .. DNA sequence variant resulting in no amino acid change
+        stop_retained_variant   .. different stop codon
+        non_coding_variant      .. variant in non-coding sequence, such as RNA gene
+        5_prime_UTR_variant
+        3_prime_UTR_variant
+        intron_variant          .. reported only if none of the above
+        intergenic_variant      .. reported only if none of the above
+
+
+    The annotation algorithm.
+        The algorithm checks if the variant falls in a region of a supported type. The
+        search is performed in the following order, until a match is found:
+            1. idx_cds(gf_cds_t) - lookup CDS by position, create haplotypes, call consequences
+            2. idx_utr(gf_utr_t) - check UTR hits
+            3. idx_exon(gf_exon_t) - check for splice variants
+            4. idx_tscript(tscript_t) - check for intronic variants, RNAs, etc.
+
+        These regidx indexes are created by parsing a gff3 file as follows:
+            1.  create the array "ftr" of all UTR, CDS, exons. This will be
+            processed later and pruned based on transcript types we want to keep.
+            In the same go, create the hash "id2tr" of transcripts to keep
+            (based on biotype) which maps from transcript_id to a transcript. At
+            the same time also build the hash "gid2gene" which maps from gene_id to
+            gf_gene_t pointer.
+            
+            2.  build "idx_cds", "idx_tscript", "idx_utr" and "idx_exon" indexes.
+            Use only features from "ftr" which are present in "id2tr".
+
+            3.  clean data that won't be needed anymore: ftr, id2tr, gid2gene.
+        
+    Data structures.
+        idx_cds, idx_utr, idx_exon, idx_tscript:
+            as described above, regidx structures for fast lookup of exons/transcripts
+            overlapping a region, the payload is a pointer to tscript.cds
+*/
+ 
+#include <stdio.h>
+#include <stdlib.h>
+#include <getopt.h>
+#include <math.h>
+#include <htslib/hts.h>
+#include <htslib/vcf.h>
+#include <htslib/synced_bcf_reader.h>
+#include <htslib/khash.h>
+#include <htslib/khash_str2int.h>
+#include <htslib/kseq.h>
+#include <htslib/faidx.h>
+#include <errno.h>
+#include <unistd.h>
+#include <stdint.h>
+#include <ctype.h>
+#include "bcftools.h"
+#include "filter.h"
+#include "regidx.h"
+#include "kheap.h"
+#include "smpl_ilist.h"
+#include "rbuf.h"
+
+#ifndef __FUNCTION__
+#  define __FUNCTION__ __func__
+#endif
+
+// Logic of the filters: include or exclude sites which match the filters?
+#define FLT_INCLUDE 1
+#define FLT_EXCLUDE 2
+
+// Definition of splice_region, splice_acceptor and splice_donor
+#define N_SPLICE_DONOR         2      
+#define N_SPLICE_REGION_EXON   3 
+#define N_SPLICE_REGION_INTRON 8 
+
+// Ensembl ID format, e.g. 
+//     ENST00000423372 for human .. ENST%011d
+//  ENSMUST00000120394 for mouse .. ENSMUST%011d
+char  ENSID_BUF[32], *ENSID_FMT = NULL;
+static inline char *ENSID(uint32_t id)
+{
+    sprintf(ENSID_BUF,ENSID_FMT,id);
+    return ENSID_BUF;
+}
+
+
+#define N_REF_PAD 10    // number of bases to avoid boundary effects
+
+#define STRAND_REV 0
+#define STRAND_FWD 1
+
+#define TRIM_NONE   0
+#define TRIM_5PRIME 1
+#define TRIM_3PRIME 2
+
+// How to treat phased/unphased genotypes
+#define PHASE_REQUIRE 0     // --phase r
+#define PHASE_MERGE   1     // --phase m
+#define PHASE_AS_IS   2     // --phase a
+#define PHASE_SKIP    3     // --phase s
+#define PHASE_NON_REF 4     // --phase R
+#define PHASE_DROP_GT 5     // --samples -
+
+// Node types in the haplotype tree
+#define HAP_CDS   0
+#define HAP_ROOT  1 
+#define HAP_SSS   2     // start/stop/splice
+
+#define CSQ_PRINTED_UPSTREAM    (1<<0)
+#define CSQ_SYNONYMOUS_VARIANT  (1<<1)
+#define CSQ_MISSENSE_VARIANT    (1<<2)
+#define CSQ_STOP_LOST           (1<<3)
+#define CSQ_STOP_GAINED         (1<<4)
+#define CSQ_INFRAME_DELETION    (1<<5)
+#define CSQ_INFRAME_INSERTION   (1<<6)
+#define CSQ_FRAMESHIFT_VARIANT  (1<<7)
+#define CSQ_SPLICE_ACCEPTOR     (1<<8)
+#define CSQ_SPLICE_DONOR        (1<<9)
+#define CSQ_START_LOST          (1<<10)
+#define CSQ_SPLICE_REGION       (1<<11)
+#define CSQ_STOP_RETAINED       (1<<12)
+#define CSQ_UTR5                (1<<13)
+#define CSQ_UTR3                (1<<14)
+#define CSQ_NON_CODING          (1<<15)
+#define CSQ_INTRON              (1<<16)
+//#define CSQ_INTERGENIC          (1<<17)
+#define CSQ_INFRAME_ALTERING    (1<<18)
+#define CSQ_UPSTREAM_STOP       (1<<19)     // adds * in front of the csq string
+#define CSQ_INCOMPLETE_CDS      (1<<20)     // to remove START/STOP in incomplete CDS, see ENSG00000173376/synon.vcf
+#define CSQ_CODING_SEQUENCE     (1<<21)     // cannot tell exactly what it is, but it does affect the coding sequence
+
+// Haplotype-aware consequences, printed in one vcf record only, the rest has a reference @12345
+#define CSQ_COMPOUND (CSQ_SYNONYMOUS_VARIANT|CSQ_MISSENSE_VARIANT|CSQ_STOP_LOST|CSQ_STOP_GAINED| \
+                      CSQ_INFRAME_DELETION|CSQ_INFRAME_INSERTION|CSQ_FRAMESHIFT_VARIANT| \
+                      CSQ_START_LOST|CSQ_STOP_RETAINED|CSQ_INFRAME_ALTERING|CSQ_INCOMPLETE_CDS| \
+                      CSQ_UPSTREAM_STOP)
+#define CSQ_START_STOP          (CSQ_STOP_LOST|CSQ_STOP_GAINED|CSQ_STOP_RETAINED|CSQ_START_LOST)
+
+#define CSQ_PRN_STRAND(csq)     ((csq)&CSQ_COMPOUND && !((csq)&(CSQ_SPLICE_ACCEPTOR|CSQ_SPLICE_DONOR|CSQ_SPLICE_REGION)))
+#define CSQ_PRN_TSCRIPT         (~(CSQ_INTRON|CSQ_NON_CODING))
+#define CSQ_PRN_BIOTYPE         CSQ_NON_CODING
+
+// see kput_vcsq()
+const char *csq_strings[] = 
+{
+    NULL, 
+    "synonymous", 
+    "missense", 
+    "stop_lost", 
+    "stop_gained", 
+    "inframe_deletion", 
+    "inframe_insertion", 
+    "frameshift", 
+    "splice_acceptor", 
+    "splice_donor", 
+    "start_lost", 
+    "splice_region", 
+    "stop_retained", 
+    "5_prime_utr", 
+    "3_prime_utr", 
+    "non_coding", 
+    "intron", 
+    "intergenic",
+    "inframe_altering",
+    NULL,
+    NULL,
+    "coding_sequence"
+};
+
+
+// GFF line types
+#define GFF_TSCRIPT_LINE 1
+#define GFF_GENE_LINE    2
+
+
+/* 
+    Genomic features, for fast lookup by position to overlapping features
+*/
+#define GF_coding_bit 6
+#define GF_is_coding(x) ((x) & (1<<GF_coding_bit))
+#define GF_MT_rRNA                       1                      // non-coding: 1, 2, ...
+#define GF_MT_tRNA                       2
+#define GF_lincRNA                       3
+#define GF_miRNA                         4
+#define GF_MISC_RNA                      5
+#define GF_rRNA                          6
+#define GF_snRNA                         7
+#define GF_snoRNA                        8
+#define GF_PROCESSED_TRANSCRIPT          9
+#define GF_ANTISENSE                    10
+#define GF_macro_lncRNA                 11
+#define GF_ribozyme                     12
+#define GF_sRNA                         13
+#define GF_scRNA                        14
+#define GF_scaRNA                       15
+#define GF_SENSE_INTRONIC               16
+#define GF_SENSE_OVERLAPPING            17
+#define GF_PSEUDOGENE                   18
+#define GF_PROCESSED_PSEUDOGENE         19
+#define GF_ARTIFACT                     20
+#define GF_IG_PSEUDOGENE                21
+#define GF_IG_C_PSEUDOGENE              22
+#define GF_IG_J_PSEUDOGENE              23
+#define GF_IG_V_PSEUDOGENE              24
+#define GF_TR_V_PSEUDOGENE              25
+#define GF_TR_J_PSEUDOGENE              26
+#define GF_MT_tRNA_PSEUDOGENE           27
+#define GF_misc_RNA_PSEUDOGENE          28
+#define GF_miRNA_PSEUDOGENE             29
+#define GF_RIBOZYME                     30
+#define GF_RETAINED_INTRON              31
+#define GF_RETROTRANSPOSED              32
+#define GF_tRNA_PSEUDOGENE              33
+#define GF_TRANSCRIBED_PROCESSED_PSEUDOGENE     34
+#define GF_TRANSCRIBED_UNPROCESSED_PSEUDOGENE   35
+#define GF_TRANSCRIBED_UNITARY_PSEUDOGENE       36
+#define GF_TRANSLATED_UNPROCESSED_PSEUDOGENE    37
+#define GF_TRANSLATED_PROCESSED_PSEUDOGENE      38
+#define GF_KNOWN_NCRNA                          39
+#define GF_UNITARY_PSEUDOGENE                   40
+#define GF_UNPROCESSED_PSEUDOGENE               41
+#define GF_LRG_GENE                             42
+#define GF_3PRIME_OVERLAPPING_ncRNA             43
+#define GF_DISRUPTED_DOMAIN                     44
+#define GF_vaultRNA                             45
+#define GF_BIDIRECTIONAL_PROMOTER_lncRNA        46
+#define GF_AMBIGUOUS_ORF                        47
+#define GF_PROTEIN_CODING               (1|(1<<GF_coding_bit))  // coding: 65, 66, ...
+#define GF_POLYMORPHIC_PSEUDOGENE       (2|(1<<GF_coding_bit))
+#define GF_IG_C                         (3|(1<<GF_coding_bit))
+#define GF_IG_D                         (4|(1<<GF_coding_bit))
+#define GF_IG_J                         (5|(1<<GF_coding_bit))
+#define GF_IG_LV                        (6|(1<<GF_coding_bit))
+#define GF_IG_V                         (7|(1<<GF_coding_bit))
+#define GF_TR_C                         (8|(1<<GF_coding_bit))
+#define GF_TR_D                         (9|(1<<GF_coding_bit))
+#define GF_TR_J                        (10|(1<<GF_coding_bit))
+#define GF_TR_V                        (11|(1<<GF_coding_bit))
+#define GF_NMD                         (12|(1<<GF_coding_bit))
+#define GF_NON_STOP_DECAY              (13|(1<<GF_coding_bit))
+#define GF_CDS      ((1<<(GF_coding_bit+1))+1)                  // special types: 129, 130, ...
+#define GF_EXON     ((1<<(GF_coding_bit+1))+2)
+#define GF_UTR3     ((1<<(GF_coding_bit+1))+3)
+#define GF_UTR5     ((1<<(GF_coding_bit+1))+4)
+// GF_MAX = (1<<30)-1, see hap_node_t
+
+typedef struct _tscript_t tscript_t;
+typedef struct
+{
+    tscript_t *tr;      // transcript
+    uint32_t beg;       // the start coordinate of the CDS (on the reference strand, 0-based)
+    uint32_t pos;       // 0-based index of the first exon base within the transcript (only to
+                        //  update hap_node_t.sbeg in hap_init, could be calculated on the fly)
+    uint32_t len;       // exon length
+    uint32_t icds:30,   // exon index within the transcript
+             phase:2;   // offset of the CDS
+}
+gf_cds_t;
+typedef struct
+{
+    char *name;           // human readable name, e.g. ORF45
+    uint8_t iseq;
+}
+gf_gene_t;
+typedef struct
+{
+    uint32_t beg,end;
+    tscript_t *tr;
+}
+gf_exon_t;
+typedef enum { prime3, prime5 } utr_t;
+typedef struct
+{
+    utr_t which;
+    uint32_t beg,end;
+    tscript_t *tr;
+}
+gf_utr_t;
+
+
+/*
+    Structures related to VCF output:
+
+    vcsq_t
+        information required to assemble consequence lines such as "inframe_deletion|XYZ|ENST01|+|5TY>5I|121ACG>A+124TA>T"
+
+    vcrec_t 
+        single VCF record and csq tied to this record. (Haplotype can have multiple
+        consequences in several VCF records. Each record can have multiple consequences
+        from multiple haplotypes.)
+
+    csq_t
+        a top-level consequence tied to a haplotype
+
+    vbuf_t
+    pos2vbuf
+        VCF records with the same position clustered together for a fast lookup via pos2vbuf
+*/
+typedef struct _vbuf_t vbuf_t;
+typedef struct _vcsq_t vcsq_t;
+struct _vcsq_t
+{
+    uint32_t strand:1,
+             type:31;   // one of CSQ_* types
+    uint32_t trid;
+    uint32_t biotype;   // one of GF_* types
+    char *gene;         // gene name
+    bcf1_t *ref;        // if type&CSQ_PRINTED_UPSTREAM, ref consequence "@1234"
+    kstring_t vstr;     // variant string, eg 5TY>5I|121ACG>A+124TA>T
+};
+typedef struct
+{
+    bcf1_t *line;
+    uint32_t *smpl;     // bitmask of sample consequences with first/second haplotype interleaved
+    uint32_t nfmt:4, nvcsq:28, mvcsq;
+    vcsq_t *vcsq;       // there can be multiple consequences for a single VCF record
+}
+vrec_t;
+typedef struct
+{
+    uint32_t pos;
+    vrec_t *vrec;   // vcf line that this csq is tied to; needed when printing haplotypes (hap_stage_vcf)
+    int idx;        // 0-based index of the csq at the VCF line, for FMT/BCSQ
+    vcsq_t type;
+}
+csq_t;
+struct _vbuf_t
+{
+    vrec_t **vrec;   // buffer of VCF lines with the same position
+    int n, m;
+};
+KHASH_MAP_INIT_INT(pos2vbuf, vbuf_t*)
+
+
+/*
+    Structures related to haplotype-aware consequences in coding regions
+
+    hap_node_t
+        node of a haplotype tree. Each transcript has one tree
+
+    tscript_t
+        despite its general name, it is intended for coding transcripts only
+
+    hap_t
+    hstack_t
+        for traversal of the haplotype tree and braking combined
+        consequences into independent parts
+*/
+typedef struct _hap_node_t hap_node_t;
+struct _hap_node_t
+{
+    char *seq;          // cds segment [parent_node,this_node)
+    char *var;          // variant "ref>alt"
+    uint32_t type:2,    // HAP_ROOT or HAP_CDS
+             csq:30;    // this node's consequence
+    int dlen;           // alt minus ref length: <0 del, >0 ins, 0 substitution
+    uint32_t rbeg;      // variant's VCF position (0-based, inclusive)
+    int32_t rlen;       // variant's rlen; alen=rlen+dlen; fake for non CDS types
+    uint32_t sbeg;      // variant's position on the spliced reference transcript (0-based, inclusive, N_REF_PAD not included)
+    uint32_t icds;      // which exon does this node's variant overlaps
+    hap_node_t **child, *prev;  // children haplotypes and previous coding node
+    int nchild, mchild;
+    bcf1_t *cur_rec, *rec;      // current VCF record and node's VCF record
+    uint32_t nend;              // number of haplotypes ending in this node
+    int *cur_child, mcur_child; // mapping from the allele to the currently active child
+    csq_t *csq_list;            // list of haplotype's consequences, broken by position
+    int ncsq_list, mcsq_list;
+};
+struct _tscript_t
+{
+    uint32_t id;        // transcript id
+    uint32_t beg,end;   // transcript's beg and end coordinate (ref strand, 0-based, inclusive)
+    uint32_t strand:1,  // STRAND_REV or STRAND_FWD
+             ncds:31,   // number of exons
+             mcds;
+    gf_cds_t **cds;     // ordered list of exons
+    char *ref;          // reference sequence, padded with N_REF_PAD bases on both ends
+    char *sref;         // spliced reference sequence, padded with N_REF_PAD bases on both ends
+    hap_node_t *root;   // root of the haplotype tree
+    hap_node_t **hap;   // pointer to haplotype leaves, two for each sample
+    int nhap, nsref;    // number of haplotypes and length of sref, including 2*N_REF_PAD
+    uint32_t trim:2,    // complete, 5' or 3' trimmed, see TRIM_* types
+             type:30;   // one of GF_* types
+    gf_gene_t *gene;
+};
+static inline int cmp_tscript(tscript_t **a, tscript_t **b)
+{
+    return ( (*a)->end  < (*b)->end ) ? 1 : 0;
+}
+KHEAP_INIT(trhp, tscript_t*, cmp_tscript)
+typedef khp_trhp_t tr_heap_t;
+typedef struct
+{
+    hap_node_t *node;   // current node
+    int ichild;         // current child in the active node
+    int dlen;           // total dlen, from the root to the active node
+    size_t slen;        // total sequence length, from the root to the active node
+}
+hstack_t;
+typedef struct
+{
+    int mstack;
+    hstack_t *stack;
+    tscript_t *tr;      // tr->ref: spliced transcript on ref strand
+    kstring_t sseq;     // spliced haplotype sequence on ref strand
+    kstring_t tseq;     // the variable part of translated haplotype transcript, coding strand
+    kstring_t tref;     // the variable part of translated reference transcript, coding strand
+    uint32_t sbeg;      // stack's sbeg, for cases first node's type is HAP_SSS
+    int upstream_stop;
+}
+hap_t;
+
+
+/*
+    Helper structures, only for initialization
+    
+    ftr_t
+        temporary list of all exons, CDS, UTRs 
+*/
+KHASH_MAP_INIT_INT(int2tscript, tscript_t*)
+KHASH_MAP_INIT_INT(int2int, int)
+KHASH_MAP_INIT_INT(int2gene, gf_gene_t*)
+typedef struct
+{
+    int type;       // GF_CDS, GF_EXON, GF_5UTR, GF_3UTR
+    uint32_t beg;
+    uint32_t end;
+    uint32_t trid;
+    uint32_t strand:1;   // STRAND_REV,STRAND_FWD
+    uint32_t phase:2;    // 0, 1 or 2
+    uint32_t iseq:29;
+}
+ftr_t;
+typedef struct
+{
+    // all exons, CDS, UTRs
+    ftr_t *ftr;
+    int nftr, mftr;
+
+    // mapping from transcript ensembl id to gene id
+    kh_int2gene_t *gid2gene;
+
+    // mapping from transcript id to tscript, for quick CDS anchoring
+    kh_int2tscript_t *id2tr;
+
+    // sequences
+    void *seq2int;
+    char **seq;
+    int nseq, mseq;
+
+    // ignored biotypes
+    void *ignored_biotypes;
+}
+aux_t;
+
+typedef struct _args_t
+{
+    // the main regidx lookups, from chr:beg-end to overlapping features and
+    // index iterator
+    regidx_t *idx_cds, *idx_utr, *idx_exon, *idx_tscript;
+    regitr_t *itr;
+
+    // temporary structures, deleted after initializtion
+    aux_t init;
+
+    // text tab-delimited output (out) or vcf/bcf output (out_fh)
+    FILE *out;
+    htsFile *out_fh;
+
+    // vcf
+    bcf_srs_t *sr;
+    bcf_hdr_t *hdr;
+    int hdr_nsmpl;          // actual number of samples in the vcf, for bcf_update_format_values()
+
+    // include or exclude sites which match the filters
+    filter_t *filter;
+    char *filter_str;
+    int filter_logic;       // FLT_INCLUDE or FLT_EXCLUDE
+
+    // samples to process
+    int sample_is_file;
+    char *sample_list;
+    smpl_ilist_t *smpl;
+
+    char *outdir, **argv, *fa_fname, *gff_fname, *output_fname;
+    char *bcsq_tag;
+    int argc, output_type;
+    int phase, quiet, local_csq;
+    int ncsq_max, nfmt_bcsq;    // maximum number of csq per site that can be accessed from FORMAT/BCSQ
+    int ncsq_small_warned;
+    
+    int rid;                    // current chromosome
+    tr_heap_t *active_tr;       // heap of active transcripts for quick flushing
+    hap_t *hap;                 // transcript haplotype recursion
+    vbuf_t **vcf_buf;           // buffered VCF lines to annotate with CSQ and flush
+    rbuf_t vcf_rbuf;            // round buffer indexes to vcf_buf
+    kh_pos2vbuf_t *pos2vbuf;    // fast lookup of buffered lines by position
+    tscript_t **rm_tr;          // buffer of transcripts to clean
+    int nrm_tr, mrm_tr;
+    csq_t *csq_buf;             // pool of csq not managed by hap_node_t, i.e. non-CDS csqs
+    int ncsq_buf, mcsq_buf;
+
+    faidx_t *fai;
+    kstring_t str, str2;
+    int32_t *gt_arr, mgt_arr;
+}
+args_t;
+
+// AAA, AAC, ...
+const char *gencode = "KNKNTTTTRSRSIIMIQHQHPPPPRRRRLLLLEDEDAAAAGGGGVVVV*Y*YSSSS*CWCLFLF";
+const uint8_t nt4[] =
+{
+    4,4,4,4, 4,4,4,4, 4,4,4,4, 4,4,4,4,
+    4,4,4,4, 4,4,4,4, 4,4,4,4, 4,4,4,4,
+    4,4,4,4, 4,4,4,4, 4,4,4,4, 4,4,4,4,
+    4,4,4,4, 4,4,4,4, 4,4,4,4, 4,4,4,4,
+    4,0,4,1, 4,4,4,2, 4,4,4,4, 4,4,4,4,
+    4,4,4,4, 3,4,4,4, 4,4,4,4, 4,4,4,4,
+    4,0,4,1, 4,4,4,2, 4,4,4,4, 4,4,4,4,
+    4,4,4,4, 3
+};
+const uint8_t cnt4[] =
+{
+    4,4,4,4, 4,4,4,4, 4,4,4,4, 4,4,4,4,
+    4,4,4,4, 4,4,4,4, 4,4,4,4, 4,4,4,4,
+    4,4,4,4, 4,4,4,4, 4,4,4,4, 4,4,4,4,
+    4,4,4,4, 4,4,4,4, 4,4,4,4, 4,4,4,4,
+    4,3,4,2, 4,4,4,1, 4,4,4,4, 4,4,4,4,
+    4,4,4,4, 0,4,4,4, 4,4,4,4, 4,4,4,4,
+    4,3,4,2, 4,4,4,1, 4,4,4,4, 4,4,4,4,
+    4,4,4,4, 0
+};
+#define dna2aa(x)  gencode[  nt4[(uint8_t)(x)[0]]<<4 |  nt4[(uint8_t)(x)[1]]<<2 |  nt4[(uint8_t)(x)[2]] ]
+#define cdna2aa(x) gencode[ cnt4[(uint8_t)(x)[2]]<<4 | cnt4[(uint8_t)(x)[1]]<<2 | cnt4[(uint8_t)(x)[0]] ]
+
+static const char *gf_strings_noncoding[] = 
+{ 
+    "MT_rRNA", "MT_tRNA", "lincRNA", "miRNA", "misc_RNA", "rRNA", "snRNA", "snoRNA", "processed_transcript",
+    "antisense", "macro_lncRNA", "ribozyme", "sRNA", "scRNA", "scaRNA", "sense_intronic", "sense_overlapping",
+    "pseudogene", "processed_pseudogene", "artifact", "IG_pseudogene", "IG_C_pseudogene", "IG_J_pseudogene", 
+    "IG_V_pseudogene", "TR_V_pseudogene", "TR_J_pseudogene", "MT_tRNA_pseudogene", "misc_RNA_pseudogene", 
+    "miRNA_pseudogene", "ribozyme", "retained_intron", "retrotransposed", "Trna_pseudogene", "transcribed_processed_pseudogene", 
+    "transcribed_unprocessed_pseudogene", "transcribed_unitary_pseudogene",    "translated_unprocessed_pseudogene",
+    "translated_processed_pseudogene", "known_ncRNA", "unitary_pseudogene", "unprocessed_pseudogene",
+    "LRG_gene", "3_prime_overlapping_ncRNA", "disrupted_domain", "vaultRNA", "bidirectional_promoter_lncRNA", "ambiguous_orf"
+};
+static const char *gf_strings_coding[] = { "protein_coding", "polymorphic_pseudogene", "IG_C", "IG_D", "IG_J", "IG_LV", "IG_V", "TR_C", "TR_D", "TR_J", "TR_V", "NMD", "non_stop_decay"};
+static const char *gf_strings_special[] = { "CDS", "exon", "3_prime_UTR", "5_prime_UTR" };
+
+const char *gf_type2gff_string(int type)
+{
+    if ( !GF_is_coding(type) )
+    {
+        if ( type < (1<<GF_coding_bit) ) return gf_strings_noncoding[type-1];
+        type &= (1<<(GF_coding_bit+1)) - 1;
+        return gf_strings_special[type - 1];
+    }
+    type &= (1<<GF_coding_bit) - 1;
+    return gf_strings_coding[type - 1];
+}
+
+/*
+    gff parsing functions
+*/
+static inline int feature_set_seq(args_t *args, char *chr_beg, char *chr_end)
+{
+    aux_t *aux = &args->init;
+    char c = chr_end[1];
+    chr_end[1] = 0;
+    int iseq;
+    if ( khash_str2int_get(aux->seq2int, chr_beg, &iseq)!=0 )
+    {
+        hts_expand(char*, aux->nseq+1, aux->mseq, aux->seq);
+        aux->seq[aux->nseq] = strdup(chr_beg);
+        iseq = khash_str2int_inc(aux->seq2int, aux->seq[aux->nseq]);
+        aux->nseq++;
+        assert( aux->nseq < 256 );  // see gf_gene_t.iseq
+    }
+    chr_end[1] = c;
+    return iseq;
+}
+static inline char *gff_skip(const char *line, char *ss)
+{
+    while ( *ss && *ss!='\t' ) ss++;
+    if ( !*ss ) error("[%s:%d %s] Could not parse the line: %s\n",__FILE__,__LINE__,__FUNCTION__,line);
+    return ss+1;
+}
+static inline void gff_parse_chr(const char *line, char **chr_beg, char **chr_end)
+{
+    char *se = (char*) line;
+    while ( *se && *se!='\t' ) se++;
+    if ( !*se ) error("[%s:%d %s] Could not parse the line: %s\n",__FILE__,__LINE__,__FUNCTION__,line);
+    *chr_beg = (char*) line;
+    *chr_end = se-1;
+}
+static inline char *gff_parse_beg_end(const char *line, char *ss, uint32_t *beg, uint32_t *end)
+{
+    char *se = ss;
+    *beg = strtol(ss, &se, 10) - 1;
+    if ( ss==se ) error("[%s:%d %s] Could not parse the line:\n\t%s\n\t%s\n",__FILE__,__LINE__,__FUNCTION__,line,ss);
+    ss = se+1;
+    *end = strtol(ss, &se, 10) - 1;
+    if ( ss==se ) error("[%s:%d %s] Could not parse the line: %s\n",__FILE__,__LINE__,__FUNCTION__,line);
+    return se+1;
+}
+static inline uint32_t gff_parse_id(const char *line, const char *needle, char *ss)
+{
+    ss = strstr(ss,needle);
+    if ( !ss ) error("[%s:%d %s] Could not parse the line, \"%s\" not present: %s\n",__FILE__,__LINE__,__FUNCTION__,needle,line);
+    ss += strlen(needle);
+    while ( *ss && !isdigit(*ss) ) ss++;
+    if ( !ss ) error("[%s:%d %s] Could not parse the line: %s\n",__FILE__,__LINE__,__FUNCTION__, line);
+    char *se;
+    uint32_t id = strtol(ss, &se, 10);
+    if ( ss==se ) error("[%s:%d %s] Could not parse the line: %s\n",__FILE__,__LINE__,__FUNCTION__, line);
+    if ( *se && *se!=';' && *se!='\t' ) error("[%s:%d %s] Could not parse the line: %s\n",__FILE__,__LINE__,__FUNCTION__,line);
+    assert( id <= 0xffffff );   // see gf_gene_t.id. Ensembl IDs are never that big in practice
+    return id;
+}
+static void gff_parse_ensid_fmt(const char *line, const char *needle, char *ss)
+{
+    ss = strstr(ss,needle);
+    if ( !ss ) error("[%s:%d %s] Could not parse the line, \"%s\" not present: %s\n",__FILE__,__LINE__,__FUNCTION__,needle,line);
+    ss += strlen(needle);
+    char *se = ss;
+    while ( *se && !isdigit(*se) ) se++;
+    kstring_t str = {0,0,0};
+    kputsn(ss,se-ss,&str);
+    ss = se;
+    while ( *se && isdigit(*se) ) se++;
+    ksprintf(&str,"%%0%dd",(int)(se-ss));
+    ENSID_FMT = str.s;
+}
+static inline int gff_parse_type(char *line)
+{
+    line = strstr(line,"ID=");
+    if ( !line ) return -1;
+    line += 3;
+    if ( !strncmp(line,"transcript:",11) ) return GFF_TSCRIPT_LINE;
+    else if ( !strncmp(line,"gene:",5) ) return GFF_GENE_LINE;
+    return -1;
+}
+static inline int gff_parse_biotype(char *_line)
+{
+    char *line = strstr(_line,"biotype=");
+    if ( !line ) return -1;
+
+    line += 8;
+    switch (*line)
+    {
+        case 'p': 
+            if ( !strncmp(line,"protein_coding",14) ) return GF_PROTEIN_CODING;
+            else if ( !strncmp(line,"pseudogene",10) ) return GF_PSEUDOGENE;
+            else if ( !strncmp(line,"processed_transcript",20) ) return GF_PROCESSED_TRANSCRIPT;
+            else if ( !strncmp(line,"processed_pseudogene",20) ) return GF_PROCESSED_PSEUDOGENE;
+            else if ( !strncmp(line,"polymorphic_pseudogene",22) ) return GF_POLYMORPHIC_PSEUDOGENE;
+            break;
+        case 'a':
+            if ( !strncmp(line,"artifact",8) ) return GF_ARTIFACT;
+            else if ( !strncmp(line,"antisense",9) ) return GF_ANTISENSE;
+            else if ( !strncmp(line,"ambiguous_orf",13) ) return GF_AMBIGUOUS_ORF;
+            break;
+        case 'I':
+            if ( !strncmp(line,"IG_C_gene",9) ) return GF_IG_C;
+            else if ( !strncmp(line,"IG_D_gene",9) ) return GF_IG_D;
+            else if ( !strncmp(line,"IG_J_gene",9) ) return GF_IG_J;
+            else if ( !strncmp(line,"IG_LV_gene",10) ) return GF_IG_LV;
+            else if ( !strncmp(line,"IG_V_gene",9) ) return GF_IG_V;
+            else if ( !strncmp(line,"IG_pseudogene",13) ) return GF_IG_PSEUDOGENE;
+            else if ( !strncmp(line,"IG_C_pseudogene",15) ) return GF_IG_C_PSEUDOGENE;
+            else if ( !strncmp(line,"IG_J_pseudogene",15) ) return GF_IG_J_PSEUDOGENE;
+            else if ( !strncmp(line,"IG_V_pseudogene",15) ) return GF_IG_V_PSEUDOGENE;
+            break;
+        case 'T':
+            if ( !strncmp(line,"TR_C_gene",9) ) return GF_TR_C;
+            else if ( !strncmp(line,"TR_D_gene",9) ) return GF_TR_D;
+            else if ( !strncmp(line,"TR_J_gene",9) ) return GF_TR_J;
+            else if ( !strncmp(line,"TR_V_gene",9) ) return GF_TR_V;
+            else if ( !strncmp(line,"TR_V_pseudogene",15) ) return GF_TR_V_PSEUDOGENE;
+            else if ( !strncmp(line,"TR_J_pseudogene",15) ) return GF_TR_J_PSEUDOGENE;
+            break;
+        case 'M':
+            if ( !strncmp(line,"Mt_tRNA_pseudogene",18) ) return GF_MT_tRNA_PSEUDOGENE;
+            else if ( !strncmp(line,"Mt_tRNA",7) ) return GF_MT_tRNA;
+            else if ( !strncmp(line,"Mt_rRNA",7) ) return GF_MT_tRNA;
+            break;
+        case 'l':
+            if ( !strncmp(line,"lincRNA",7) ) return GF_lincRNA;
+            break;
+        case 'm':
+            if ( !strncmp(line,"macro_lncRNA",12) ) return GF_macro_lncRNA;
+            else if ( !strncmp(line,"misc_RNA_pseudogene",19) ) return GF_misc_RNA_PSEUDOGENE;
+            else if ( !strncmp(line,"miRNA_pseudogene",16) ) return GF_miRNA_PSEUDOGENE;
+            else if ( !strncmp(line,"miRNA",5) ) return GF_miRNA;
+            else if ( !strncmp(line,"misc_RNA",8) ) return GF_MISC_RNA;
+            break;
+        case 'r':
+            if ( !strncmp(line,"rRNA",4) ) return GF_rRNA;
+            else if ( !strncmp(line,"ribozyme",8) ) return GF_RIBOZYME;
+            else if ( !strncmp(line,"retained_intron",15) ) return GF_RETAINED_INTRON;
+            else if ( !strncmp(line,"retrotransposed",15) ) return GF_RETROTRANSPOSED;
+            break;
+        case 's':
+            if ( !strncmp(line,"snRNA",5) ) return GF_snRNA;
+            else if ( !strncmp(line,"sRNA",4) ) return GF_sRNA;
+            else if ( !strncmp(line,"scRNA",5) ) return GF_scRNA;
+            else if ( !strncmp(line,"scaRNA",6) ) return GF_scaRNA;
+            else if ( !strncmp(line,"snoRNA",6) ) return GF_snoRNA;
+            else if ( !strncmp(line,"sense_intronic",14) ) return GF_SENSE_INTRONIC;
+            else if ( !strncmp(line,"sense_overlapping",17) ) return GF_SENSE_OVERLAPPING;
+            break;
+        case 't':
+            if ( !strncmp(line,"tRNA_pseudogene",15) ) return GF_tRNA_PSEUDOGENE;
+            else if ( !strncmp(line,"transcribed_processed_pseudogene",32) ) return GF_TRANSCRIBED_PROCESSED_PSEUDOGENE;
+            else if ( !strncmp(line,"transcribed_unprocessed_pseudogene",34) ) return GF_TRANSCRIBED_UNPROCESSED_PSEUDOGENE; 
+            else if ( !strncmp(line,"transcribed_unitary_pseudogene",30) ) return GF_TRANSCRIBED_UNITARY_PSEUDOGENE;
+            else if ( !strncmp(line,"translated_unprocessed_pseudogene",33) ) return GF_TRANSLATED_UNPROCESSED_PSEUDOGENE;
+            else if ( !strncmp(line,"translated_processed_pseudogene",31) ) return GF_TRANSLATED_PROCESSED_PSEUDOGENE;
+            break;
+        case 'n':
+            if ( !strncmp(line,"nonsense_mediated_decay",23) ) return GF_NMD;
+            else if ( !strncmp(line,"non_stop_decay",14) ) return GF_NON_STOP_DECAY;
+            break;
+        case 'k':
+            if ( !strncmp(line,"known_ncrna",11) ) return GF_KNOWN_NCRNA;
+            break;
+        case 'u':
+            if ( !strncmp(line,"unitary_pseudogene",18) ) return GF_UNITARY_PSEUDOGENE;
+            else if ( !strncmp(line,"unprocessed_pseudogene",22) ) return GF_UNPROCESSED_PSEUDOGENE;
+            break;
+        case 'L':
+            if ( !strncmp(line,"LRG_gene",8) ) return GF_LRG_GENE;
+            break;
+        case '3':
+            if ( !strncmp(line,"3prime_overlapping_ncRNA",24) ) return GF_3PRIME_OVERLAPPING_ncRNA;
+            break;
+        case 'd':
+            if ( !strncmp(line,"disrupted_domain",16) ) return GF_DISRUPTED_DOMAIN;
+            break;
+        case 'v':
+            if ( !strncmp(line,"vaultRNA",8) ) return GF_vaultRNA;
+            break;
+        case 'b':
+            if ( !strncmp(line,"bidirectional_promoter_lncRNA",29) ) return GF_BIDIRECTIONAL_PROMOTER_lncRNA;
+            break;
+    }
+    return 0;
+}
+static inline int gff_ignored_biotype(args_t *args, char *ss)
+{
+    ss = strstr(ss,"biotype=");
+    if ( !ss ) return 0;
+
+    ss += 8;
+    char *se = ss, tmp;
+    while ( *se && *se!=';' ) se++;
+    tmp = *se;
+    *se = 0;
+
+    char *key = ss;
+    int n = 0;
+    if ( khash_str2int_get(args->init.ignored_biotypes, ss, &n)!=0 ) key = strdup(ss);
+    khash_str2int_set(args->init.ignored_biotypes, key, n+1);
+
+    *se = tmp;
+    return 1;
+}
+gf_gene_t *gene_init(aux_t *aux, uint32_t gene_id)
+{
+    khint_t k = kh_get(int2gene, aux->gid2gene, (int)gene_id);
+    gf_gene_t *gene = (k == kh_end(aux->gid2gene)) ? NULL : kh_val(aux->gid2gene, k);
+    if ( !gene )
+    {
+        gene = (gf_gene_t*) calloc(1,sizeof(gf_gene_t));
+        int ret;
+        k = kh_put(int2gene, aux->gid2gene, (int)gene_id, &ret);
+        kh_val(aux->gid2gene,k) = gene;
+    }
+    return gene;
+}
+void gff_parse_transcript(args_t *args, const char *line, char *ss, ftr_t *ftr)
+{
+    aux_t *aux = &args->init;
+    int biotype = gff_parse_biotype(ss);
+    if ( biotype <= 0 )
+    {
+        if ( !gff_ignored_biotype(args, ss) && args->quiet<2 ) fprintf(stderr,"ignored transcript: %s\n",line);
+        return;
+    }
+
+    // create a mapping from transcript_id to gene_id
+    uint32_t trid = gff_parse_id(line, "ID=transcript:", ss);
+    uint32_t gene_id = gff_parse_id(line, "Parent=gene:", ss);
+
+    if ( !ENSID_FMT ) gff_parse_ensid_fmt(line, "ID=transcript:", ss);      // id prefix different across species
+
+    tscript_t *tr = (tscript_t*) calloc(1,sizeof(tscript_t));
+    tr->id     = trid;
+    tr->strand = ftr->strand;
+    tr->gene   = gene_init(aux, gene_id);
+    tr->type   = biotype;
+    tr->beg    = ftr->beg;
+    tr->end    = ftr->end;
+
+    khint_t k;
+    int ret;
+    k = kh_put(int2tscript, aux->id2tr, (int)trid, &ret);
+    kh_val(aux->id2tr,k) = tr;
+}
+void gff_parse_gene(args_t *args, const char *line, char *ss, char *chr_beg, char *chr_end, ftr_t *ftr)
+{
+    int biotype = gff_parse_biotype(ss);
+    if ( biotype <= 0 )
+    {
+        if ( !gff_ignored_biotype(args, ss) && args->quiet<2 ) fprintf(stderr,"ignored gene: %s\n",line);
+        return;
+    }
+
+    aux_t *aux = &args->init;
+
+    // substring search for "ID=gene:ENSG00000437963"
+    uint32_t gene_id = gff_parse_id(line, "ID=gene:", ss);
+    gf_gene_t *gene = gene_init(aux, gene_id);
+    assert( !gene->name );      // the gene_id should be unique
+
+    gene->iseq = feature_set_seq(args, chr_beg,chr_end);
+
+    // substring search for "Name=OR4F5"
+    ss = strstr(chr_end+2,"Name=");
+    if ( !ss ) error("Could not parse the line, \"Name=\" not present: %s\n", line);
+    ss += 5;
+    char *se = ss;
+    while ( *se && *se!=';' && !isspace(*se) ) se++;
+    gene->name = (char*) malloc(se-ss+1);
+    memcpy(gene->name,ss,se-ss);
+    gene->name[se-ss] = 0;
+}
+int gff_parse(args_t *args, char *line, ftr_t *ftr)
+{
+    // - skip empty lines and commented lines
+    // - columns 
+    //      1.      chr
+    //      2.      <skip>
+    //      3.      CDS, transcript, gene, ...
+    //      4-5.    beg,end
+    //      6.      <skip>
+    //      7.      strand
+    //      8.      phase
+    //      9.      Parent=transcript:ENST(\d+);ID=... etc
+
+    char *ss = line;
+    if ( !*ss ) return -1;      // skip blank lines
+    if ( *ss=='#' ) return -1;  // skip comments
+
+    char *chr_beg, *chr_end;
+    gff_parse_chr(line, &chr_beg, &chr_end);
+    ss = gff_skip(line, chr_end + 2);
+
+    // 3. column: is this a CDS, transcript, gene, etc.
+    if ( !strncmp("exon\t",ss,5) ) { ftr->type = GF_EXON; ss += 5; }
+    else if ( !strncmp("CDS\t",ss,4) ) { ftr->type = GF_CDS; ss += 4; }
+    else if ( !strncmp("three_prime_UTR\t",ss,16) ) { ftr->type = GF_UTR3; ss += 16; }
+    else if ( !strncmp("five_prime_UTR\t",ss,15) ) { ftr->type = GF_UTR5; ss += 15; }
+    else
+    {
+        ss = gff_skip(line, ss);
+        ss = gff_parse_beg_end(line, ss, &ftr->beg,&ftr->end);
+        ss = gff_skip(line, ss);
+        int type = gff_parse_type(ss);
+        if ( type!=GFF_TSCRIPT_LINE && type!=GFF_GENE_LINE ) 
+        {
+            // we ignore these, debug print to see new types:
+            ss = strstr(ss,"ID=");
+            if ( !ss ) return -1;   // no ID, ignore the line
+            if ( !strncmp("chromosome",ss+3,10) ) return -1;
+            if ( !strncmp("supercontig",ss+3,11) ) return -1;
+            if ( args->quiet<2 ) fprintf(stderr,"ignored: %s\n", line);
+            return -1;
+        }
+
+        // 7. column: strand
+        if ( *ss == '+' ) ftr->strand = STRAND_FWD;
+        else if ( *ss == '-' ) ftr->strand = STRAND_REV;
+        else error("Unknown strand: %c .. %s\n", *ss,ss);
+
+        if ( type==GFF_TSCRIPT_LINE )
+            gff_parse_transcript(args, line, ss, ftr);
+        else
+            gff_parse_gene(args, line, ss, chr_beg, chr_end, ftr);
+
+        return -1;
+    }
+    ss = gff_parse_beg_end(line, ss, &ftr->beg,&ftr->end);
+    ss = gff_skip(line, ss);
+
+    // 7. column: strand
+    if ( *ss == '+' ) ftr->strand = STRAND_FWD;
+    else if ( *ss == '-' ) ftr->strand = STRAND_REV;
+    else { if ( args->quiet<2 ) fprintf(stderr,"Skipping unknown strand: %c\n", *ss); return -1; }
+    ss += 2;
+
+    // 8. column: phase (codon offset)
+    if ( *ss == '0' ) ftr->phase = 0;
+    else if ( *ss == '1' ) ftr->phase = 1;
+    else if ( *ss == '2' ) ftr->phase = 2;
+    else if ( *ss == '.' ) ftr->phase = 0;      // exons do not have phase
+    else { if ( args->quiet<2 ) fprintf(stderr,"Skipping unknown phase: %c, %s\n", *ss, line); return -1; }
+    ss += 2;
+
+    // substring search for "Parent=transcript:ENST00000437963"
+    ftr->trid = gff_parse_id(line, "Parent=transcript:", ss);
+    ftr->iseq = feature_set_seq(args, chr_beg,chr_end);
+    return 0;
+}
+
+static int cmp_cds_ptr(const void *a, const void *b)
+{
+    // comparison function for qsort of transcripts's CDS
+    if ( (*((gf_cds_t**)a))->beg < (*((gf_cds_t**)b))->beg ) return -1;
+    if ( (*((gf_cds_t**)a))->beg > (*((gf_cds_t**)b))->beg ) return 1;
+    return 0;
+}
+
+static inline void chr_beg_end(aux_t *aux, int iseq, char **chr_beg, char **chr_end)
+{
+    *chr_beg = *chr_end = aux->seq[iseq];
+    while ( (*chr_end)[1] ) (*chr_end)++;
+}
+tscript_t *tscript_init(aux_t *aux, uint32_t trid)
+{
+    khint_t k = kh_get(int2tscript, aux->id2tr, (int)trid);
+    tscript_t *tr = (k == kh_end(aux->id2tr)) ? NULL : kh_val(aux->id2tr, k);
+    assert( tr );
+    return tr;
+}
+void register_cds(args_t *args, ftr_t *ftr)
+{
+    // Make the CDS searchable via idx_cds. Note we do not malloc tr->cds just yet.
+    //  ftr is the result of parsing a gff CDS line
+    aux_t *aux = &args->init;
+
+    tscript_t *tr = tscript_init(aux, ftr->trid);
+    if ( tr->strand != ftr->strand ) error("Conflicting strand in transcript %"PRIu32" .. %d vs %d\n",ftr->trid,tr->strand,ftr->strand);
+    
+    gf_cds_t *cds = (gf_cds_t*) malloc(sizeof(gf_cds_t));
+    cds->tr    = tr;
+    cds->beg   = ftr->beg;
+    cds->len   = ftr->end - ftr->beg + 1;
+    cds->icds  = 0;     // to keep valgrind on mac happy
+    cds->phase = ftr->phase;
+    
+    hts_expand(gf_cds_t*,tr->ncds+1,tr->mcds,tr->cds);
+    tr->cds[tr->ncds++] = cds;
+}
+void register_utr(args_t *args, ftr_t *ftr)
+{
+    aux_t *aux = &args->init;
+    gf_utr_t *utr = (gf_utr_t*) malloc(sizeof(gf_utr_t));
+    utr->which = ftr->type==GF_UTR3 ? prime3 : prime5;
+    utr->beg   = ftr->beg;
+    utr->end   = ftr->end;
+    utr->tr    = tscript_init(aux, ftr->trid);
+
+    char *chr_beg, *chr_end;
+    chr_beg_end(&args->init, utr->tr->gene->iseq, &chr_beg, &chr_end);
+    regidx_push(args->idx_utr, chr_beg,chr_end, utr->beg,utr->end, &utr);
+}
+void register_exon(args_t *args, ftr_t *ftr)
+{
+    aux_t *aux = &args->init;
+    gf_exon_t *exon = (gf_exon_t*) malloc(sizeof(gf_exon_t));
+    exon->beg = ftr->beg;
+    exon->end = ftr->end;
+    exon->tr  = tscript_init(aux, ftr->trid);
+
+    char *chr_beg, *chr_end;
+    chr_beg_end(&args->init, exon->tr->gene->iseq, &chr_beg, &chr_end);
+    regidx_push(args->idx_exon, chr_beg,chr_end, exon->beg - N_SPLICE_REGION_INTRON, exon->end + N_SPLICE_REGION_INTRON, &exon);
+}
+
+void tscript_init_cds(args_t *args)
+{
+    aux_t *aux = &args->init;
+
+    // Sort CDS in all transcripts, set offsets, check their phase, length, create index (idx_cds)
+    khint_t k;
+    for (k=0; k<kh_end(aux->id2tr); k++)
+    {
+        if ( !kh_exist(aux->id2tr, k) ) continue;
+        tscript_t *tr = (tscript_t*) kh_val(aux->id2tr, k);
+
+        // position-to-tscript lookup
+        char *chr_beg, *chr_end;
+        chr_beg_end(aux, tr->gene->iseq, &chr_beg, &chr_end);
+        regidx_push(args->idx_tscript, chr_beg, chr_end, tr->beg, tr->end, &tr);
+
+        if ( !tr->ncds ) continue;      // transcript with no CDS
+
+        // sort CDs
+        qsort(tr->cds, tr->ncds, sizeof(gf_cds_t*), cmp_cds_ptr);
+
+        // trim non-coding start
+        int i, len = 0;
+        if ( tr->strand==STRAND_FWD )
+        {
+            if ( tr->cds[0]->phase ) tr->trim |= TRIM_5PRIME;
+            tr->cds[0]->beg += tr->cds[0]->phase;
+            tr->cds[0]->len -= tr->cds[0]->phase;
+            tr->cds[0]->phase = 0;
+
+            // sanity check phase
+            for (i=0; i<tr->ncds; i++)
+            {
+                int phase = tr->cds[i]->phase ? 3 - tr->cds[i]->phase : 0;
+                if ( phase!=len%3)
+                    error("GFF3 assumption failed for transcript %s, CDS=%d: phase!=len%%3 (phase=%d, len=%d)\n",ENSID(tr->id),tr->cds[i]->beg+1,phase,len);
+                assert( phase == len%3 );
+                len += tr->cds[i]->len; 
+            }
+        }
+        else
+        {
+            // Check that the phase is not bigger than CDS length. Curiously, this can really happen,
+            // see Mus_musculus.GRCm38.85.gff3.gz, transcript:ENSMUST00000163141
+            // todo: the same for the fwd strand
+            i = tr->ncds - 1;
+            int phase = tr->cds[i]->phase;
+            if ( phase ) tr->trim |= TRIM_5PRIME;
+            while ( i>=0 && phase > tr->cds[i]->len )
+            {
+                phase -= tr->cds[i]->len;
+                tr->cds[i]->phase = 0;
+                tr->cds[i]->len   = 0;
+                i--;
+            }
+            tr->cds[i]->len  -= tr->cds[i]->phase;
+            tr->cds[i]->phase = 0;
+
+            // sanity check phase
+            for (i=tr->ncds-1; i>=0; i--)
+            {
+                int phase = tr->cds[i]->phase ? 3 - tr->cds[i]->phase : 0;
+                if ( phase!=len%3)
+                    error("GFF3 assumption failed for transcript %s, CDS=%d: phase!=len%%3 (phase=%d, len=%d)\n",ENSID(tr->id),tr->cds[i]->beg+1,phase,len);
+                len += tr->cds[i]->len;
+            }
+        }
+
+        // set len. At the same check that CDS within a transcript do not overlap
+        len = 0;
+        for (i=0; i<tr->ncds; i++)
+        {
+            tr->cds[i]->icds = i;
+            len += tr->cds[i]->len; 
+            if ( !i ) continue;
+
+            gf_cds_t *a = tr->cds[i-1];
+            gf_cds_t *b = tr->cds[i];
+            if ( a->beg + a->len - 1 >= b->beg ) 
+                error("Error: CDS overlap in the transcript %"PRIu32": %"PRIu32"-%"PRIu32" and %"PRIu32"-%"PRIu32"\n", 
+                    kh_key(aux->id2tr, k), a->beg+1,a->beg+a->len, b->beg+1,b->beg+b->len);
+        }
+        if ( len%3 != 0 )
+        {
+            // There are 13k transcripts with incomplete 3' CDS. See for example ENST00000524289
+            //  http://sep2015.archive.ensembl.org/Homo_sapiens/Transcript/Sequence_cDNA?db=core;g=ENSG00000155868;r=5:157138846-157159019;t=ENST00000524289
+            // Also, the incomplete CDS can be too short (1 or 2bp), so it is not enough to trim the last one.
+
+            tr->trim |= TRIM_3PRIME;
+            if ( tr->strand==STRAND_FWD )
+            {
+                i = tr->ncds - 1;
+                while ( i>=0 && len%3 )
+                {
+                    int dlen = tr->cds[i]->len >= len%3 ? len%3 : tr->cds[i]->len;
+                    tr->cds[i]->len -= dlen;
+                    len -= dlen;
+                    i--;
+                }
+            }
+            else
+            {
+                i = 0;
+                while ( i<tr->ncds && len%3 )
+                {
+                    int dlen = tr->cds[i]->len >= len%3 ? len%3 : tr->cds[i]->len;
+                    tr->cds[i]->len -= dlen;
+                    tr->cds[i]->beg += dlen;
+                    len -= dlen;
+                    i++;
+                }
+            }
+        }
+
+        // set CDS offsets and insert into regidx
+        len=0;
+        for (i=0; i<tr->ncds; i++)
+        {
+            tr->cds[i]->pos = len;
+            len += tr->cds[i]->len;
+            regidx_push(args->idx_cds, chr_beg,chr_end, tr->cds[i]->beg,tr->cds[i]->beg+tr->cds[i]->len-1, &tr->cds[i]);
+        }
+    }
+}
+
+void regidx_free_gf(void *payload) { free(*((gf_cds_t**)payload)); }
+void regidx_free_tscript(void *payload) { tscript_t *tr = *((tscript_t**)payload); free(tr->cds); free(tr); }
+
+void init_gff(args_t *args)
+{
+    aux_t *aux = &args->init;
+    aux->seq2int   = khash_str2int_init();   // chrom's numeric id
+    aux->gid2gene  = kh_init(int2gene);      // gene id to gf_gene_t, for idx_gene
+    aux->id2tr     = kh_init(int2tscript);   // transcript id to tscript_t
+    args->idx_tscript = regidx_init(NULL, NULL, regidx_free_tscript, sizeof(tscript_t*), NULL);
+    aux->ignored_biotypes = khash_str2int_init();
+
+    // parse gff
+    kstring_t str = {0,0,0};
+    htsFile *fp = hts_open(args->gff_fname,"r");
+    if ( !fp ) error("Failed to read %s\n", args->gff_fname);
+    while ( hts_getline(fp, KS_SEP_LINE, &str) > 0 )
+    {
+        hts_expand(ftr_t, aux->nftr+1, aux->mftr, aux->ftr);
+        int ret = gff_parse(args, str.s, aux->ftr + aux->nftr);
+        if ( !ret ) aux->nftr++;
+    }
+    free(str.s);
+    if ( hts_close(fp)!=0 ) error("Close failed: %s\n", args->gff_fname);
+
+
+    // process gff information: connect CDS and exons to transcripts
+    args->idx_cds  = regidx_init(NULL, NULL, regidx_free_gf, sizeof(gf_cds_t*), NULL);
+    args->idx_utr  = regidx_init(NULL, NULL, regidx_free_gf, sizeof(gf_utr_t*), NULL);
+    args->idx_exon = regidx_init(NULL, NULL, regidx_free_gf, sizeof(gf_exon_t*), NULL);
+    args->itr      = regitr_init(NULL);
+
+    int i;
+    for (i=0; i<aux->nftr; i++)
+    {
+        ftr_t *ftr = &aux->ftr[i];
+
+        // check whether to keep this feature: is there a mapping trid -> gene_id -> gene?
+        khint_t k = kh_get(int2tscript, aux->id2tr, (int)ftr->trid);
+        if ( k==kh_end(aux->id2tr) ) continue;       // no such transcript
+
+        tscript_t *tr = kh_val(aux->id2tr,k);
+        if ( !tr->gene->name )
+        {
+            // not a supported biotype (e.g. gene:pseudogene, transcript:processed_transcript)
+            regidx_free_tscript(&tr);
+            kh_del(int2tscript, aux->id2tr,k);
+            continue;
+        }
+
+        // populate regidx by category: 
+        //      ftr->type   .. GF_CDS, GF_EXON, GF_UTR3, GF_UTR5
+        //      gene->type  .. GF_PROTEIN_CODING, GF_MT_rRNA, GF_IG_C, ...
+        if ( ftr->type==GF_CDS ) register_cds(args, ftr);
+        else if ( ftr->type==GF_EXON ) register_exon(args, ftr);
+        else if ( ftr->type==GF_UTR5 ) register_utr(args, ftr);
+        else if ( ftr->type==GF_UTR3 ) register_utr(args, ftr);
+        else
+            error("something: %s\t%d\t%d\t%s\t%s\n", aux->seq[ftr->iseq],ftr->beg+1,ftr->end+1,ENSID(ftr->trid),gf_type2gff_string(ftr->type));
+    }
+    tscript_init_cds(args);
+
+    if ( !args->quiet )
+    {
+        fprintf(stderr,"Indexed %d transcripts, %d exons, %d CDSs, %d UTRs\n", 
+                regidx_nregs(args->idx_tscript),
+                regidx_nregs(args->idx_exon),
+                regidx_nregs(args->idx_cds),
+                regidx_nregs(args->idx_utr));
+    }
+
+    free(aux->ftr);
+    khash_str2int_destroy_free(aux->seq2int);
+    // keeping only to destroy the genes at the end: kh_destroy(int2gene,aux->gid2gene);
+    kh_destroy(int2tscript,aux->id2tr);
+    free(aux->seq);
+
+    if ( args->quiet<2 && khash_str2int_size(aux->ignored_biotypes) )
+    {
+        khash_t(str2int) *ign = (khash_t(str2int)*)aux->ignored_biotypes;
+        fprintf(stderr,"Ignored the following biotypes:\n");
+        for (i = kh_begin(ign); i < kh_end(ign); i++)
+        {
+            if ( !kh_exist(ign,i)) continue;
+            fprintf(stderr,"\t%dx\t.. %s\n", kh_value(ign,i), kh_key(ign,i));
+        }
+    }
+    khash_str2int_destroy_free(aux->ignored_biotypes);
+}
+
+void init_data(args_t *args)
+{
+    args->nfmt_bcsq = 1 + (args->ncsq_max - 1) / 32; 
+
+    if ( !args->quiet ) fprintf(stderr,"Parsing %s ...\n", args->gff_fname);
+    init_gff(args);
+
+    args->rid = -1;
+
+    if ( args->filter_str )
+        args->filter = filter_init(args->hdr, args->filter_str);
+
+    args->fai = fai_load(args->fa_fname);
+    if ( !args->fai ) error("Failed to load the fai index: %s\n", args->fa_fname);
+
+    args->pos2vbuf  = kh_init(pos2vbuf);
+    args->active_tr = khp_init(trhp);
+    args->hap = (hap_t*) calloc(1,sizeof(hap_t));
+
+    // init samples
+    if ( !bcf_hdr_nsamples(args->hdr) ) args->phase = PHASE_DROP_GT;
+    if ( args->sample_list && !strcmp("-",args->sample_list) )
+    {
+        // ignore all samples
+        if ( args->output_type==FT_TAB_TEXT ) 
+        {
+            // significant speedup for plain VCFs
+            bcf_hdr_set_samples(args->hdr,NULL,0);
+        }
+        args->phase = PHASE_DROP_GT;
+    }
+    else
+        args->smpl = smpl_ilist_init(args->hdr, args->sample_list, args->sample_is_file, SMPL_STRICT);
+    args->hdr_nsmpl = args->phase==PHASE_DROP_GT ? 0 : bcf_hdr_nsamples(args->hdr);
+
+    if ( args->output_type==FT_TAB_TEXT )
+    {
+        args->out = args->output_fname ? fopen(args->output_fname,"w") : stdout;
+        if ( !args->out ) error("Failed to open %s: %s\n", args->output_fname,strerror(errno));
+
+        fprintf(args->out,"# This file was produced by: bcftools +csq(%s+htslib-%s)\n", bcftools_version(),hts_version());
+        fprintf(args->out,"# The command line was:\tbcftools +%s", args->argv[0]);
+        int i;
+        for (i=1; i<args->argc; i++)
+            fprintf(args->out," %s",args->argv[i]);
+        fprintf(args->out,"\n");
+        fprintf(args->out,"# LOG\t[2]Message\n");
+        fprintf(args->out,"# CSQ"); i = 1;
+        fprintf(args->out,"\t[%d]Sample", ++i);
+        fprintf(args->out,"\t[%d]Haplotype", ++i);
+        fprintf(args->out,"\t[%d]Chromosome", ++i);
+        fprintf(args->out,"\t[%d]Position", ++i);
+        fprintf(args->out,"\t[%d]Consequence", ++i);
+        fprintf(args->out,"\n");
+    }
+    else
+    {
+        args->out_fh = hts_open(args->output_fname? args->output_fname : "-",hts_bcf_wmode(args->output_type));
+        if ( args->out_fh == NULL ) error("Can't write to %s: %s\n", args->output_fname? args->output_fname : "standard output", strerror(errno));
+        bcf_hdr_append_version(args->hdr,args->argc,args->argv,"bcftools/csq");
+        bcf_hdr_printf(args->hdr,"##INFO=<ID=%s,Number=.,Type=String,Description=\"%s consequence annotation from BCFtools/csq. Format: '[*]consequence|gene|transcript|biotype[|strand|amino_acid_change|dna_change]' or, for consequences of variants split across multiple sites, a pointer to the record storing the consequences '@position'. '*' prefix indicates a consequence downstream from a stop \">",args->bcsq_tag, args->local_csq ? "Local" : "Haplotype-aware");
+        if ( args->hdr_nsmpl ) 
+            bcf_hdr_printf(args->hdr,"##FORMAT=<ID=%s,Number=.,Type=Integer,Description=\"Bitmask of indexes to INFO/BCSQ, with interleaved first/second haplotype. Use \\\"bcftools query -f'[%%CHROM\\t%%POS\\t%%SAMPLE\\t%%TBCSQ\\n]'\\\" to translate.\">",args->bcsq_tag);
+        bcf_hdr_write(args->out_fh, args->hdr);
+    }
+    if ( !args->quiet ) fprintf(stderr,"Calling...\n");
+}
+
+void destroy_data(args_t *args)
+{
+    regidx_destroy(args->idx_cds);
+    regidx_destroy(args->idx_utr);
+    regidx_destroy(args->idx_exon);
+    regidx_destroy(args->idx_tscript);
+    regitr_destroy(args->itr);
+
+    khint_t k,i,j;
+    for (k=0; k<kh_end(args->init.gid2gene); k++)
+    {
+        if ( !kh_exist(args->init.gid2gene, k) ) continue;
+        gf_gene_t *gene = (gf_gene_t*) kh_val(args->init.gid2gene, k);
+        free(gene->name);
+        free(gene);
+    }
+    kh_destroy(int2gene,args->init.gid2gene);
+
+    if ( args->filter )
+        filter_destroy(args->filter);
+
+    khp_destroy(trhp,args->active_tr);
+    kh_destroy(pos2vbuf,args->pos2vbuf);
+    if ( args->smpl ) smpl_ilist_destroy(args->smpl);
+    int ret;
+    if ( args->out_fh )
+        ret = hts_close(args->out_fh);
+    else
+        ret = fclose(args->out);
+    if ( ret ) error("Error: close failed .. %s\n", args->output_fname?args->output_fname:"stdout");
+    for (i=0; i<args->vcf_rbuf.m; i++)
+    {
+        vbuf_t *vbuf = args->vcf_buf[i];
+        if ( !vbuf ) continue;
+        for (j=0; j<vbuf->m; j++)
+        {
+            if ( !vbuf->vrec[j] ) continue;
+            if ( vbuf->vrec[j]->line ) bcf_destroy(vbuf->vrec[j]->line);
+            free(vbuf->vrec[j]->smpl);
+            free(vbuf->vrec[j]->vcsq);
+            free(vbuf->vrec[j]);
+        }
+        free(vbuf->vrec);
+        free(vbuf);
+    }
+    free(args->vcf_buf);
+    free(args->rm_tr);
+    free(args->csq_buf);
+    free(args->hap->stack);
+    free(args->hap->sseq.s);
+    free(args->hap->tseq.s);
+    free(args->hap->tref.s);
+    free(args->hap);
+    fai_destroy(args->fai);
+    free(args->gt_arr);
+    free(args->str.s);
+    free(args->str2.s);
+    free(ENSID_FMT);
+}
+
+/*
+    The splice_* functions are for consquences around splice sites: start,stop,splice_*
+ */
+#define SPLICE_VAR_REF 0   // ref: ACGT>ACGT, csq not applicable, skip completely
+#define SPLICE_OUTSIDE 1   // splice acceptor or similar; csq set and is done, does not overlap the region
+#define SPLICE_INSIDE  2   // overlaps coding region; csq can be set but coding prediction is needed 
+#define SPLICE_OVERLAP 3   // indel overlaps region boundary, csq set but could not determine csq
+typedef struct
+{
+    tscript_t *tr;
+    struct {
+        int32_t pos, rlen, alen;
+        char *ref, *alt;
+        bcf1_t *rec;
+    } vcf;
+    uint16_t check_acceptor:1,  // check distance from exon start (fwd) or end (rev)
+             check_start:1,     // this is the first coding exon (relative to transcript orientation), check first (fwd) or last (rev) codon 
+             check_stop:1,      // this is the last coding exon (relative to transcript orientation), check last (fwd) or first (rev) codon
+             check_donor:1,     // as with check_acceptor
+             check_region_beg:1,    // do/don't check for splices at this end, eg. in the first or last exon
+             check_region_end:1,    // 
+             check_utr:1,           // check splice sites (acceptor/donor/region_*) only if not in utr
+             set_refalt:1;          // set kref,kalt, if set, check also for synonymous events
+    uint32_t csq;
+    int tbeg, tend;             // number of trimmed bases from beg and end of ref,alt allele
+    uint32_t ref_beg,           // ref coordinates with spurious bases removed, ACC>AC can become AC>A or CC>C, whichever gives 
+             ref_end;           // a more conservative csq (the first and last base in kref.s)
+    kstring_t kref, kalt;       // trimmed alleles, set only with SPLICE_OLAP
+}
+splice_t;
+void splice_init(splice_t *splice, bcf1_t *rec)
+{
+    memset(splice,0,sizeof(*splice));
+    splice->vcf.rec  = rec;
+    splice->vcf.pos  = rec->pos;
+    splice->vcf.rlen = rec->rlen;
+    splice->vcf.ref  = rec->d.allele[0];
+}
+static inline void splice_build_hap(splice_t *splice, uint32_t beg, int len)
+{
+    // len>0 .. beg is the first base, del filled from right
+    // len<0 .. beg is the last base, del filled from left
+
+    int rlen, alen, rbeg, abeg;     // first base to include (ref coordinates)
+    if ( len<0 )
+    {
+        rlen = alen = -len;
+        rbeg = beg - rlen + 1;
+        int dlen = splice->vcf.alen - splice->vcf.rlen;
+        if ( dlen<0 && beg < splice->ref_end ) // incomplete del, beg is in the middle
+            dlen += splice->ref_end - beg;
+        abeg = rbeg + dlen;
+    }
+    else
+    {
+        rbeg = abeg = beg;
+        rlen = alen = len;
+        // check for incomplete del as above??
+    }
+
+#define XDBG 0
+#if XDBG
+fprintf(stderr,"build_hap:  rbeg=%d + %d    abeg=%d \n",rbeg,rlen,abeg);
+#endif 
+    splice->kref.l = 0;
+    splice->kalt.l = 0;
+
+    // add the part before vcf.ref, in the vcf.ref and after vcf.ref
+    int roff;   // how many vcf.ref bases already used
+    if ( rbeg < splice->vcf.pos )
+    {
+        assert( splice->tr->beg <= rbeg );  // this can be extended thanks to N_REF_PAD
+        kputsn(splice->tr->ref + N_REF_PAD + rbeg - splice->tr->beg, splice->vcf.pos - rbeg, &splice->kref);
+        roff = 0;
+    }
+    else
+        roff = rbeg - splice->vcf.pos;
+#if XDBG
+fprintf(stderr,"r1: %s  roff=%d\n",splice->kref.s,roff);
+#endif
+
+    if ( roff < splice->vcf.rlen && splice->kref.l < rlen )
+    {
+        int len = splice->vcf.rlen - roff;  // len still available in vcf.ref
+        if ( len > rlen - splice->kref.l ) len = rlen - splice->kref.l; // how much of ref allele is still needed
+        kputsn(splice->vcf.ref + roff, len, &splice->kref);
+    }
+#if XDBG
+fprintf(stderr,"r2: %s\n",splice->kref.s);
+#endif
+
+    uint32_t end = splice->vcf.pos + splice->vcf.rlen;    // position just after the ref allele
+    if ( splice->kref.l < rlen )
+    {
+        if ( end + rlen - splice->kref.l - 1 > splice->tr->end ) // trim, the requested sequence is too long (could be extended, see N_REF_PAD)
+            rlen -= end + rlen - splice->kref.l - 1 - splice->tr->end;
+        if ( splice->kref.l < rlen )
+            kputsn(splice->tr->ref + N_REF_PAD + end - splice->tr->beg, rlen - splice->kref.l, &splice->kref);
+    }
+#if XDBG
+fprintf(stderr,"r3: %s\n",splice->kref.s);
+#endif
+
+
+    int aoff;
+    if ( abeg < splice->vcf.pos )
+    {
+        assert( splice->tr->beg <= abeg );
+        kputsn(splice->tr->ref + N_REF_PAD + abeg - splice->tr->beg, splice->vcf.pos - abeg, &splice->kalt);
+        aoff = 0;
+    }
+    else
+        aoff = abeg - splice->vcf.pos;
+#if XDBG
+fprintf(stderr,"a1: %s  aoff=%d\n",splice->kalt.s,aoff);
+#endif
+
+    if ( aoff < splice->vcf.alen && splice->kalt.l < alen )
+    {
+        int len = splice->vcf.alen - aoff;  // len still available in vcf.alt
+        if ( len > alen - splice->kalt.l ) len = alen - splice->kalt.l; // how much of alt allele is still needed
+        kputsn(splice->vcf.alt + aoff, len, &splice->kalt);
+        aoff -= len;
+    }
+    if ( aoff < 0 ) aoff = 0;
+    else aoff--;
+#if XDBG
+fprintf(stderr,"a2: %s  aoff=%d\n",splice->kalt.s,aoff);
+#endif
+
+    end = splice->vcf.pos + splice->vcf.rlen;    // position just after the ref allele
+    if ( splice->kalt.l < alen )
+    {
+        if ( end + alen + aoff - splice->kalt.l - 1 > splice->tr->end ) // trim, the requested sequence is too long
+            alen -= end + alen + aoff - splice->kalt.l - 1 - splice->tr->end;
+        if ( alen > 0 && alen > splice->kalt.l )
+            kputsn(splice->tr->ref + aoff + N_REF_PAD + end - splice->tr->beg, alen - splice->kalt.l, &splice->kalt);
+    }
+#if XDBG
+fprintf(stderr,"a3: %s\n",splice->kalt.s);
+fprintf(stderr," [%s]\n [%s]\n\n",splice->kref.s,splice->kalt.s);
+#endif
+}
+void csq_stage(args_t *args, csq_t *csq, bcf1_t *rec);
+static inline int csq_stage_utr(args_t *args, regitr_t *itr, bcf1_t *rec, uint32_t trid)
+{
+    while ( regitr_overlap(itr) )
+    {
+        gf_utr_t *utr = regitr_payload(itr, gf_utr_t*);
+        tscript_t *tr = utr->tr;
+        if ( tr->id != trid ) continue;
+        csq_t csq; 
+        memset(&csq, 0, sizeof(csq_t));
+        csq.pos          = rec->pos;
+        csq.type.type    = utr->which==prime5 ? CSQ_UTR5 : CSQ_UTR3;
+        csq.type.biotype = tr->type;
+        csq.type.strand  = tr->strand;
+        csq.type.trid    = tr->id;
+        csq.type.gene    = tr->gene->name;
+        csq_stage(args, &csq, rec);
+        return csq.type.type;
+    }
+    return 0;
+}
+static inline void csq_stage_splice(args_t *args, bcf1_t *rec, tscript_t *tr, uint32_t type)
+{
+#if XDBG
+fprintf(stderr,"csq_stage_splice %d: type=%d\n",rec->pos+1,type);
+#endif
+    if ( !type ) return;
+    csq_t csq; 
+    memset(&csq, 0, sizeof(csq_t));
+    csq.pos          = rec->pos;
+    csq.type.type    = type;
+    csq.type.biotype = tr->type;
+    csq.type.strand  = tr->strand;
+    csq.type.trid    = tr->id;
+    csq.type.gene    = tr->gene->name;
+    csq_stage(args, &csq, rec);
+}
+static inline int splice_csq_ins(args_t *args, splice_t *splice, uint32_t ex_beg, uint32_t ex_end)
+{
+    // coordinates that matter for consequences, eg AC>ACG trimmed to C>CG, 1bp
+    // before and after the inserted bases
+    if ( splice->tbeg || splice->vcf.ref[0]!=splice->vcf.alt[0] )
+    {
+        splice->ref_beg = splice->vcf.pos + splice->tbeg - 1;
+        splice->ref_end = splice->vcf.pos + splice->vcf.rlen - splice->tend;
+    }
+    else
+    {
+        if ( splice->tend ) splice->tend--;
+        splice->ref_beg = splice->vcf.pos;
+        splice->ref_end = splice->vcf.pos + splice->vcf.rlen - splice->tend;
+    }
+#if XDBG
+fprintf(stderr,"ins: %s>%s .. ex=%d,%d  beg,end=%d,%d  tbeg,tend=%d,%d  check_utr=%d start,stop,beg,end=%d,%d,%d,%d\n", splice->vcf.ref,splice->vcf.alt,ex_beg,ex_end,splice->ref_beg,splice->ref_end,splice->tbeg,splice->tend,splice->check_utr,splice->check_start,splice->check_stop,splice->check_region_beg,splice->check_region_end);
+#endif
+
+    int ret;
+    if ( splice->ref_beg >= ex_end )   // fully outside, beyond the exon
+    {
+        if ( splice->check_utr )
+        {
+            regitr_t *itr = regitr_init(NULL);
+            const char *chr = bcf_seqname(args->hdr,splice->vcf.rec);
+            if ( regidx_overlap(args->idx_utr,chr,splice->ref_beg+1,splice->ref_beg+1, itr) )     // adjacent utr
+            {
+                ret = csq_stage_utr(args, itr, splice->vcf.rec, splice->tr->id);
+                if ( ret!=0 ) 
+                {
+                    regitr_destroy(itr);
+                    return SPLICE_OUTSIDE; // overlaps utr
+                }
+            }
+            regitr_destroy(itr);
+        }
+        if ( !splice->check_region_end ) return SPLICE_OUTSIDE;
+        char *ref = NULL, *alt = NULL;
+        if ( splice->set_refalt )   // seq identity is checked only when tr->ref is available
+        {
+            splice_build_hap(splice, ex_end+1, N_SPLICE_REGION_INTRON);
+            ref = splice->kref.s, alt = splice->kalt.s;
+        }
+        if ( splice->ref_beg < ex_end + N_SPLICE_REGION_INTRON && splice->ref_end > ex_end + N_SPLICE_DONOR )
+        {
+            splice->csq |= CSQ_SPLICE_REGION;
+            if ( ref && !strncmp(ref,alt,N_SPLICE_REGION_INTRON) ) splice->csq |= CSQ_SYNONYMOUS_VARIANT;
+        }
+        if ( splice->ref_beg < ex_end + N_SPLICE_DONOR )
+        {
+            if ( splice->check_donor && splice->tr->strand==STRAND_FWD ) splice->csq |= CSQ_SPLICE_DONOR;
+            if ( splice->check_acceptor && splice->tr->strand==STRAND_REV ) splice->csq |= CSQ_SPLICE_ACCEPTOR;
+            if ( ref && !strncmp(ref,alt,N_SPLICE_DONOR) ) splice->csq |= CSQ_SYNONYMOUS_VARIANT;
+        }
+        csq_stage_splice(args, splice->vcf.rec, splice->tr, splice->csq);
+        return SPLICE_OUTSIDE;
+    }
+    if ( splice->ref_end < ex_beg || (splice->ref_end == ex_beg && !splice->check_region_beg) )    // fully outside, before the exon
+    {
+        if ( splice->check_utr )
+        {
+            regitr_t *itr = regitr_init(NULL);
+            const char *chr = bcf_seqname(args->hdr,splice->vcf.rec);
+            if ( regidx_overlap(args->idx_utr,chr,splice->ref_end-1,splice->ref_end-1, itr) )     // adjacent utr
+            {
+                ret = csq_stage_utr(args, itr, splice->vcf.rec, splice->tr->id);
+                if ( ret!=0 )
+                {
+                    regitr_destroy(itr);
+                    return SPLICE_OUTSIDE; // overlaps utr
+                }
+            }
+            regitr_destroy(itr);
+        }
+        if ( !splice->check_region_beg ) return SPLICE_OUTSIDE;
+        char *ref = NULL, *alt = NULL;
+        if ( splice->set_refalt )   // seq identity is checked only when tr->ref is available
+        {
+            splice_build_hap(splice, ex_beg - N_SPLICE_REGION_INTRON, N_SPLICE_REGION_INTRON);
+            ref = splice->kref.s, alt = splice->kalt.s;
+        }
+        if ( splice->ref_end > ex_beg - N_SPLICE_REGION_INTRON && splice->ref_beg < ex_beg - N_SPLICE_DONOR )
+        {
+            splice->csq |= CSQ_SPLICE_REGION;
+            if ( ref && !strncmp(ref,alt,N_SPLICE_REGION_INTRON) ) splice->csq |= CSQ_SYNONYMOUS_VARIANT;
+        }
+        if ( splice->ref_end > ex_beg - N_SPLICE_DONOR )
+        {
+            if ( splice->check_donor && splice->tr->strand==STRAND_REV ) splice->csq |= CSQ_SPLICE_DONOR;
+            if ( splice->check_acceptor && splice->tr->strand==STRAND_FWD ) splice->csq |= CSQ_SPLICE_ACCEPTOR;
+            if ( ref && !strncmp(ref+N_SPLICE_REGION_INTRON-N_SPLICE_DONOR,alt+N_SPLICE_REGION_INTRON-N_SPLICE_DONOR,N_SPLICE_DONOR) ) splice->csq |= CSQ_SYNONYMOUS_VARIANT;
+        }
+        csq_stage_splice(args, splice->vcf.rec, splice->tr, splice->csq);
+        return SPLICE_OUTSIDE;
+    }
+    // overlaps the exon or inside the exon
+    // possible todo: find better alignment for frameshifting variants?
+    if ( splice->ref_beg <= ex_beg + 2 )    // in the first 3bp
+    {
+        if ( splice->check_region_beg ) splice->csq |= CSQ_SPLICE_REGION;
+        if ( splice->tr->strand==STRAND_FWD ) { if ( splice->check_start ) splice->csq |= CSQ_START_LOST; }
+        else { if ( splice->check_stop ) splice->csq |= CSQ_STOP_LOST; }
+    }
+    if ( splice->ref_end > ex_end - 2 )
+    {
+        if ( splice->check_region_end ) splice->csq |= CSQ_SPLICE_REGION;
+        if ( splice->tr->strand==STRAND_REV ) { if ( splice->check_start ) splice->csq |= CSQ_START_LOST; }
+        else { if ( splice->check_stop ) splice->csq |= CSQ_STOP_LOST; }
+    }
+    if ( splice->set_refalt )
+    {
+        // Make sure the variant will not end up left aligned to avoid overlapping vcf records
+        //      splice_build_hap(splice, splice->ref_beg, splice->vcf.alen - splice->tend - splice->tbeg + 1);
+        //      splice->vcf.rlen -= splice->tbeg + splice->tend - 1;
+        //      if ( splice->kref.l > splice->vcf.rlen ) { splice->kref.l = splice->vcf.rlen;  splice->kref.s[splice->kref.l] = 0; }
+        if ( splice->ref_beg < splice->vcf.pos )    // this must have been caused by too much trimming from right
+        {
+            int dlen = splice->vcf.pos - splice->ref_beg;
+            assert( dlen==1 );
+            splice->tbeg += dlen;
+            if ( splice->tbeg + splice->tend == splice->vcf.rlen ) splice->tend -= dlen;
+            splice->ref_beg = splice->vcf.pos;
+        }
+        if ( splice->ref_end==ex_beg ) splice->tend--;  // prevent zero-length ref allele
+        splice_build_hap(splice, splice->ref_beg, splice->vcf.alen - splice->tend - splice->tbeg + 1);
+        splice->vcf.rlen -= splice->tbeg + splice->tend - 1;
+        if ( splice->kref.l > splice->vcf.rlen ) { splice->kref.l = splice->vcf.rlen;  splice->kref.s[splice->kref.l] = 0; }
+    }
+    csq_stage_splice(args, splice->vcf.rec, splice->tr, splice->csq);
+    return SPLICE_INSIDE;
+}
+
+static inline int splice_csq_del(args_t *args, splice_t *splice, uint32_t ex_beg, uint32_t ex_end)
+{
+    // coordinates that matter for consequences, eg AC>ACG trimmed to C>CG
+    splice->ref_beg = splice->vcf.pos + splice->tbeg - 1;                       // 1b before the deleted base
+    splice->ref_end = splice->vcf.pos + splice->vcf.rlen - splice->tend - 1;    // the last deleted base
+
+#if XDBG
+fprintf(stderr,"del: %s>%s .. ex=%d,%d  beg,end=%d,%d  tbeg,tend=%d,%d  check_utr=%d start,stop,beg,end=%d,%d,%d,%d\n", splice->vcf.ref,splice->vcf.alt,ex_beg,ex_end,splice->ref_beg,splice->ref_end,splice->tbeg,splice->tend,splice->check_utr,splice->check_start,splice->check_stop,splice->check_region_beg,splice->check_region_end);
+#endif
+
+    if ( splice->ref_beg + 1 < ex_beg )     // the part before the exon; ref_beg is off by -1
+    {
+        if ( splice->check_region_beg )
+        {
+            int csq = 0;
+            if ( splice->check_utr )
+            {
+                regitr_t *itr = regitr_init(NULL);
+                const char *chr = bcf_seqname(args->hdr,splice->vcf.rec);
+                if ( regidx_overlap(args->idx_utr,chr,splice->ref_beg,ex_beg-1, itr) )     // adjacent utr
+                    csq = csq_stage_utr(args, itr, splice->vcf.rec, splice->tr->id);
+                regitr_destroy(itr);
+            }
+            if ( !csq )
+            {
+                char *ref = NULL, *alt = NULL;
+                if ( splice->set_refalt )   // seq identity is checked only when tr->ref is available
+                {
+                    // filling from the left does not work for ENST00000341065/frame3.vcf
+                    //    CAG.GTGGCCAG      CAG.GTGGCCAG
+                    //    CA-.--GGCCAG  vs  CAG.---GCCAG
+                    //  splice_build_hap(splice, ex_beg-1, -N_SPLICE_REGION_INTRON);
+                    //
+                    // filling from the right:
+                    splice_build_hap(splice, ex_beg - N_SPLICE_REGION_INTRON, N_SPLICE_REGION_INTRON);
+                    ref = splice->kref.s, alt = splice->kalt.s;
+                }
+                if ( splice->ref_end >= ex_beg - N_SPLICE_REGION_INTRON && splice->ref_beg < ex_beg - N_SPLICE_DONOR )
+                {
+                    splice->csq |= CSQ_SPLICE_REGION;
+                    if ( ref && alt && !strncmp(ref,alt,N_SPLICE_REGION_INTRON) ) splice->csq |= CSQ_SYNONYMOUS_VARIANT;
+                }
+                if ( splice->ref_end >= ex_beg - N_SPLICE_DONOR )
+                {
+                    if ( splice->check_donor && splice->tr->strand==STRAND_REV ) splice->csq |= CSQ_SPLICE_DONOR;
+                    if ( splice->check_acceptor && splice->tr->strand==STRAND_FWD ) splice->csq |= CSQ_SPLICE_ACCEPTOR;
+                    if ( ref && alt && !strncmp(ref+N_SPLICE_REGION_INTRON-N_SPLICE_DONOR,alt+N_SPLICE_REGION_INTRON-N_SPLICE_DONOR,N_SPLICE_DONOR) ) splice->csq |= CSQ_SYNONYMOUS_VARIANT;
+                }
+            }
+        }
+        if ( splice->ref_end >= ex_beg ) 
+        {
+            splice->tbeg = splice->ref_beg - splice->vcf.pos + 1;
+            splice->ref_beg = ex_beg - 1;
+            if ( splice->tbeg + splice->tend == splice->vcf.alen )
+            {
+                // the deletion overlaps ex_beg and cannot be easily realigned to the right
+                if ( !splice->tend )
+                {
+                    splice->csq |= CSQ_CODING_SEQUENCE;
+                    return SPLICE_OVERLAP;
+                }
+                splice->tend--;
+            }
+        }
+    }
+    if ( ex_end < splice->ref_end )     // the part after the exon
+    {
+        if ( splice->check_region_end )
+        {
+            int csq = 0;
+            if ( splice->check_utr )
+            {
+                regitr_t *itr = regitr_init(NULL);
+                const char *chr = bcf_seqname(args->hdr,splice->vcf.rec);
+                if ( regidx_overlap(args->idx_utr,chr,ex_end+1,splice->ref_end, itr) )     // adjacent utr
+                    csq = csq_stage_utr(args, itr, splice->vcf.rec, splice->tr->id);
+                regitr_destroy(itr);
+            }
+            if ( !csq )
+            {
+                char *ref = NULL, *alt = NULL;
+                if ( splice->set_refalt )   // seq identity is checked only when tr->ref is available
+                {
+                    splice_build_hap(splice, ex_end+1, N_SPLICE_REGION_INTRON);  // ref,alt positioned at the first intron base
+                    ref = splice->kref.s, alt = splice->kalt.s;
+                }
+                if ( splice->ref_beg < ex_end + N_SPLICE_REGION_INTRON && splice->ref_end > ex_end + N_SPLICE_DONOR )
+                {
+                    splice->csq |= CSQ_SPLICE_REGION;
+                    if ( ref && alt && !strncmp(ref,alt,N_SPLICE_REGION_INTRON) ) splice->csq |= CSQ_SYNONYMOUS_VARIANT;
+                }
+                if ( splice->ref_beg < ex_end + N_SPLICE_DONOR )
+                {
+                    if ( splice->check_donor && splice->tr->strand==STRAND_FWD ) splice->csq |= CSQ_SPLICE_DONOR;
+                    if ( splice->check_acceptor && splice->tr->strand==STRAND_REV ) splice->csq |= CSQ_SPLICE_ACCEPTOR;
+                    if ( ref && alt && !strncmp(ref+N_SPLICE_REGION_INTRON-N_SPLICE_DONOR,alt+N_SPLICE_REGION_INTRON-N_SPLICE_DONOR,N_SPLICE_DONOR) ) splice->csq |= CSQ_SYNONYMOUS_VARIANT;
+                }
+            }
+        }
+        if ( splice->ref_beg < ex_end ) 
+        {
+            splice->tend = splice->vcf.rlen - (splice->ref_end - splice->vcf.pos + 1);
+            splice->ref_end = ex_end;
+        }
+    }
+    if ( splice->ref_end < ex_beg || splice->ref_beg >= ex_end )
+    {
+        csq_stage_splice(args, splice->vcf.rec, splice->tr, splice->csq);
+        return SPLICE_OUTSIDE;
+    }
+
+    if ( splice->ref_beg < ex_beg + 2 ) // ref_beg is off by -1
+    {
+        if ( splice->check_region_beg ) splice->csq |= CSQ_SPLICE_REGION;
+        if ( splice->tr->strand==STRAND_FWD ) { if ( splice->check_start ) splice->csq |= CSQ_START_LOST; }
+        else { if ( splice->check_stop ) splice->csq |= CSQ_STOP_LOST; }
+    }
+    if ( splice->ref_end > ex_end - 3 )
+    {
+        if ( splice->check_region_end ) splice->csq |= CSQ_SPLICE_REGION;
+        if ( splice->tr->strand==STRAND_REV ) { if ( splice->check_start ) splice->csq |= CSQ_START_LOST; }
+        else { if ( splice->check_stop ) splice->csq |= CSQ_STOP_LOST; }
+    }
+    if ( splice->set_refalt )
+    {
+        if ( splice->tbeg>0 ) splice->tbeg--;  //why is this?
+        if ( splice->vcf.rlen > splice->tbeg + splice->tend && splice->vcf.alen > splice->tbeg + splice->tend )
+        {
+            splice->vcf.rlen -= splice->tbeg + splice->tend;
+            splice->vcf.alen -= splice->tbeg + splice->tend;
+        }
+        splice->kref.l = 0; kputsn(splice->vcf.ref + splice->tbeg, splice->vcf.rlen, &splice->kref); 
+        splice->kalt.l = 0; kputsn(splice->vcf.alt + splice->tbeg, splice->vcf.alen, &splice->kalt); 
+        if ( (splice->ref_beg+1 < ex_beg && splice->ref_end >= ex_beg) || (splice->ref_beg+1 < ex_end && splice->ref_end >= ex_end) ) // ouch, ugly ENST00000409523/long-overlapping-del.vcf
+        {
+            splice->csq |= (splice->ref_end - splice->ref_beg + 1)%3 ? CSQ_FRAMESHIFT_VARIANT : CSQ_INFRAME_DELETION;
+            return SPLICE_OVERLAP;
+        }
+    }
+    csq_stage_splice(args, splice->vcf.rec, splice->tr, splice->csq);
+    return SPLICE_INSIDE;
+}
+
+static inline int splice_csq_mnp(args_t *args, splice_t *splice, uint32_t ex_beg, uint32_t ex_end)
+{
+    // not a real variant, can be ignored: eg ACGT>ACGT
+    if ( splice->tbeg + splice->tend == splice->vcf.rlen ) return SPLICE_VAR_REF;
+
+    splice->ref_beg = splice->vcf.pos + splice->tbeg;
+    splice->ref_end = splice->vcf.pos + splice->vcf.rlen - splice->tend - 1;
+
+#if XDBG
+fprintf(stderr,"mnp: %s>%s .. ex=%d,%d  beg,end=%d,%d  tbeg,tend=%d,%d  check_utr=%d start,stop,beg,end=%d,%d,%d,%d\n", splice->vcf.ref,splice->vcf.alt,ex_beg,ex_end,splice->ref_beg,splice->ref_end,splice->tbeg,splice->tend,splice->check_utr,splice->check_start,splice->check_stop,splice->check_region_beg,splice->check_region_end);
+#endif
+
+    if ( splice->ref_beg < ex_beg )     // the part before the exon
+    {
+        if ( splice->check_region_beg )
+        {
+            int csq = 0;
+            if ( splice->check_utr )
+            {
+                regitr_t *itr = regitr_init(NULL);
+                const char *chr = bcf_seqname(args->hdr,splice->vcf.rec);
+                if ( regidx_overlap(args->idx_utr,chr,splice->ref_beg,ex_beg-1, itr) )     // adjacent utr
+                    csq = csq_stage_utr(args, itr, splice->vcf.rec, splice->tr->id);
+                regitr_destroy(itr);
+            }
+            if ( !csq )
+            {
+                if ( splice->ref_end >= ex_beg - N_SPLICE_REGION_INTRON && splice->ref_beg < ex_beg - N_SPLICE_DONOR )
+                    splice->csq |= CSQ_SPLICE_REGION;
+                if ( splice->ref_end >= ex_beg - N_SPLICE_DONOR )
+                {
+                    if ( splice->check_donor && splice->tr->strand==STRAND_REV ) splice->csq |= CSQ_SPLICE_DONOR;
+                    if ( splice->check_acceptor && splice->tr->strand==STRAND_FWD ) splice->csq |= CSQ_SPLICE_ACCEPTOR;
+                }
+            }
+        }
+        if ( splice->ref_end >= ex_beg ) 
+        {
+            splice->tbeg = splice->ref_beg - splice->vcf.pos;
+            splice->ref_beg = ex_beg;
+        }
+    }
+    if ( ex_end < splice->ref_end )     // the part after the exon
+    {
+        if ( splice->check_region_end )
+        {
+            int csq = 0;
+            if ( splice->check_utr )
+            {
+                regitr_t *itr = regitr_init(NULL);
+                const char *chr = bcf_seqname(args->hdr,splice->vcf.rec);
+                if ( regidx_overlap(args->idx_utr,chr,ex_end+1,splice->ref_end, itr) )     // adjacent utr
+                    csq = csq_stage_utr(args, itr, splice->vcf.rec, splice->tr->id);
+                regitr_destroy(itr);
+            }
+            if ( !csq )
+            {
+                if ( splice->ref_beg <= ex_end + N_SPLICE_REGION_INTRON && splice->ref_end > ex_end + N_SPLICE_DONOR )
+                    splice->csq |= CSQ_SPLICE_REGION;
+                if ( splice->ref_beg <= ex_end + N_SPLICE_DONOR )
+                {
+                    if ( splice->check_donor && splice->tr->strand==STRAND_FWD ) splice->csq |= CSQ_SPLICE_DONOR;
+                    if ( splice->check_acceptor && splice->tr->strand==STRAND_REV ) splice->csq |= CSQ_SPLICE_ACCEPTOR;
+                }
+            }
+        }
+        if ( splice->ref_beg <= ex_end ) 
+        {
+            splice->tend = splice->vcf.rlen - (splice->ref_end - splice->vcf.pos + 1);
+            splice->ref_end = ex_end;
+        }
+    }
+    if ( splice->ref_end < ex_beg || splice->ref_beg > ex_end )
+    {
+        csq_stage_splice(args, splice->vcf.rec, splice->tr, splice->csq);
+        return SPLICE_OUTSIDE;
+    }
+
+    if ( splice->ref_beg < ex_beg + 3 )
+    {
+        if ( splice->check_region_beg ) splice->csq |= CSQ_SPLICE_REGION;
+        if ( splice->tr->strand==STRAND_FWD ) { if ( splice->check_start ) splice->csq |= CSQ_START_LOST; }
+        else { if ( splice->check_stop ) splice->csq |= CSQ_STOP_LOST; }
+    }
+    if ( splice->ref_end > ex_end - 3 )
+    {
+        if ( splice->check_region_end ) splice->csq |= CSQ_SPLICE_REGION;
+        if ( splice->tr->strand==STRAND_REV ) { if ( splice->check_start ) splice->csq |= CSQ_START_LOST; }
+        else { if ( splice->check_stop ) splice->csq |= CSQ_STOP_LOST; }
+    }
+    if ( splice->set_refalt )
+    {
+        splice->vcf.rlen -= splice->tbeg + splice->tend;
+        splice->kref.l = 0; kputsn(splice->vcf.ref + splice->tbeg, splice->vcf.rlen, &splice->kref); 
+        splice->kalt.l = 0; kputsn(splice->vcf.alt + splice->tbeg, splice->vcf.rlen, &splice->kalt); 
+    }
+    csq_stage_splice(args, splice->vcf.rec, splice->tr, splice->csq);
+    return SPLICE_INSIDE;
+}
+static inline int splice_csq(args_t *args, splice_t *splice, uint32_t ex_beg, uint32_t ex_end)
+{
+    splice->csq = 0;
+    splice->vcf.alen = strlen(splice->vcf.alt);
+
+    int rlen1 = splice->vcf.rlen - 1, alen1 = splice->vcf.alen - 1, i = 0;
+    splice->tbeg = 0, splice->tend = 0;
+
+    // trim from the right, then from the left
+    while ( i<=rlen1 && i<=alen1 )
+    {
+        if ( splice->vcf.ref[rlen1-i] != splice->vcf.alt[alen1-i] ) break;
+        i++;
+    }
+    splice->tend = i;
+    rlen1 -= i, alen1 -= i, i = 0;
+    while ( i<=rlen1 && i<=alen1 )
+    {
+        if ( splice->vcf.ref[i] != splice->vcf.alt[i] ) break;
+        i++;
+    }
+    splice->tbeg = i;
+
+    // The mnp, ins and del code was split into near-identical functions for clarity and debugging;
+    // possible todo: generalize once stable
+    if ( splice->vcf.rlen==splice->vcf.alen ) return splice_csq_mnp(args, splice, ex_beg, ex_end);
+    if ( splice->vcf.rlen < splice->vcf.alen ) return splice_csq_ins(args, splice, ex_beg, ex_end);
+    if ( splice->vcf.rlen > splice->vcf.alen ) return splice_csq_del(args, splice, ex_beg, ex_end);
+
+    return 0;
+}
+
+// return value: 0 added, 1 overlapping variant, 2 silent discard (intronic,alt=ref)
+int hap_init(args_t *args, hap_node_t *parent, hap_node_t *child, gf_cds_t *cds, bcf1_t *rec, int ial)
+{
+    int i;
+    kstring_t str = {0,0,0};
+    tscript_t *tr = cds->tr;
+    child->icds = cds->icds;     // index of cds in the tscript's list of exons
+
+    splice_t splice;
+    splice_init(&splice, rec);
+    splice.tr = tr;
+    splice.vcf.alt  = rec->d.allele[ial];
+    splice.check_acceptor = splice.check_donor = splice.set_refalt = splice.check_utr = 1;
+    if ( !(tr->trim & TRIM_5PRIME) )
+    {
+        if ( tr->strand==STRAND_FWD ) { if ( child->icds==0 ) splice.check_start = 1; }
+        else { if ( child->icds==tr->ncds-1 ) splice.check_start = 1; }
+    }
+    if ( !(tr->trim & TRIM_3PRIME) )
+    {
+        if ( tr->strand==STRAND_FWD ) { if ( child->icds==tr->ncds-1 ) splice.check_stop = 1; }
+        else { if ( child->icds==0 ) splice.check_stop = 1; }
+    }
+    if ( splice.check_start )   // do not check starts in incomplete CDS, defined as not starting with M
+    {
+        if ( tr->strand==STRAND_FWD ) { if ( dna2aa(tr->ref+N_REF_PAD+cds->beg-tr->beg) != 'M' ) splice.check_start = 0; }
+        else { if ( cdna2aa(tr->ref+N_REF_PAD+cds->beg-tr->beg+cds->len-3) != 'M' ) splice.check_start = 0; }
+    }
+    if ( child->icds!=0 ) splice.check_region_beg = 1;
+    if ( child->icds!=tr->ncds-1 ) splice.check_region_end = 1;
+
+#if XDBG
+fprintf(stderr,"\n%d [%s][%s]   check start:%d,stop:%d\n",splice.vcf.pos+1,splice.vcf.ref,splice.vcf.alt,splice.check_start,splice.check_stop);
+#endif
+    int ret = splice_csq(args, &splice, cds->beg, cds->beg + cds->len - 1);
+#if XDBG
+fprintf(stderr,"cds splice_csq: %d [%s][%s] .. beg,end=%d %d, ret=%d, csq=%d\n\n",splice.vcf.pos+1,splice.kref.s,splice.kalt.s,splice.ref_beg+1,splice.ref_end+1,ret,splice.csq);
+#endif
+
+    if ( ret==SPLICE_VAR_REF ) return 2;  // not a variant, eg REF=CA ALT=CA
+    if ( ret==SPLICE_OUTSIDE || ret==SPLICE_OVERLAP )  // not a coding csq
+    {
+        free(splice.kref.s);
+        free(splice.kalt.s);
+
+        if ( !splice.csq ) return 2;        // fully intronic, no csq
+
+        // splice_region/acceptor/donor
+        child->seq  = NULL;
+        child->sbeg = 0;
+        child->rbeg = rec->pos;
+        child->rlen = 0;
+        child->dlen = 0;
+        kputs(rec->d.allele[0],&str);
+        kputc('>',&str);
+        kputs(rec->d.allele[ial],&str);
+        child->var  = str.s;
+        child->type = HAP_SSS;
+        child->csq  = splice.csq;
+        child->prev = parent->type==HAP_SSS ? parent->prev : parent;
+        child->rec  = rec;
+        return 0;
+    }
+    if ( splice.csq & CSQ_SYNONYMOUS_VARIANT ) splice.csq &= ~CSQ_SYNONYMOUS_VARIANT;   // synonymous&splice,frame could become synonymous&frame,splice
+
+    int dbeg = 0;
+    if ( splice.ref_beg < cds->beg )
+    {
+        // The vcf record overlaps the exon boundary, but the variant itself
+        // should fit inside since we are here. This will need more work.
+        // #1475227917
+        dbeg = cds->beg - splice.ref_beg;
+        splice.kref.l -= dbeg;
+        splice.ref_beg = cds->beg;
+        assert( dbeg <= splice.kalt.l );
+    }
+
+    if ( parent->type==HAP_SSS ) parent = parent->prev;
+    if ( parent->type==HAP_CDS )    
+    {
+        i = parent->icds;
+        if ( i!=cds->icds )
+        {
+            // the variant is on a new exon, finish up the previous
+            int len = tr->cds[i]->len - parent->rbeg - parent->rlen + tr->cds[i]->beg;
+            if ( len > 0 )
+                kputsn_(tr->ref + N_REF_PAD + parent->rbeg + parent->rlen - tr->beg, len, &str);
+        }
+
+        // append any skipped non-variant exons
+        while ( ++i < cds->icds )
+            kputsn_(tr->ref + N_REF_PAD + tr->cds[i]->beg - tr->beg, tr->cds[i]->len, &str);
+
+        if ( parent->icds==child->icds )
+        {
+            int len = splice.ref_beg - parent->rbeg - parent->rlen;
+            if ( len < 0 )   // overlapping variants
+            {
+                free(str.s);
+                return 1;
+            }
+            kputsn_(tr->ref + N_REF_PAD + parent->rbeg + parent->rlen - tr->beg, len, &str);
+        }
+        else
+            kputsn_(tr->ref + N_REF_PAD + cds->beg - tr->beg, splice.ref_beg - cds->beg, &str);
+    }
+    kputs(splice.kalt.s + dbeg, &str);
+
+    child->seq  = str.s;
+    child->sbeg = cds->pos + (splice.ref_beg - cds->beg);
+    child->rbeg = splice.ref_beg;
+    child->rlen = splice.kref.l;
+    child->type = HAP_CDS;
+    child->prev = parent;
+    child->rec  = rec;
+    child->csq  = splice.csq;
+
+    // set vlen and the "ref>alt" string
+    {
+        int rlen = strlen(rec->d.allele[0]);
+        int alen = strlen(rec->d.allele[ial]);
+        child->dlen = alen - rlen;
+        child->var  = (char*) malloc(rlen+alen+2);
+        memcpy(child->var,rec->d.allele[0],rlen);
+        child->var[rlen] = '>';
+        memcpy(child->var+rlen+1,rec->d.allele[ial],alen);
+        child->var[rlen+alen+1] = 0;
+    }
+
+    // yuck, the whole CDS is modified/deleted, not ready for this, todo.
+    if ( child->rbeg + child->rlen > cds->beg + cds->len )
+    {
+        child->type = HAP_SSS;
+        if ( !child->csq ) child->csq |= CSQ_CODING_SEQUENCE;  // hack, specifically for ENST00000390520/deletion-overlap.vcf
+    }
+
+    free(splice.kref.s);
+    free(splice.kalt.s);
+    return 0;
+}
+void hap_destroy(hap_node_t *hap)
+{
+    int i;
+    for (i=0; i<hap->nchild; i++)
+        if ( hap->child[i] ) hap_destroy(hap->child[i]);
+    for (i=0; i<hap->mcsq_list; i++) free(hap->csq_list[i].type.vstr.s);
+    free(hap->csq_list);
+    free(hap->child);
+    free(hap->cur_child);
+    free(hap->seq);
+    free(hap->var);
+    free(hap);
+}
+
+
+/*
+    ref:    spliced reference and its length (ref.l)
+    seq:    part of the spliced query transcript on the reference strand to translate, its 
+                length (seq.l) and the total length of the complete transcript (seq.m)
+    sbeg:   seq offset within the spliced query transcript
+    rbeg:   seq offset within ref, 0-based
+    rend:   last base of seq within ref, plus one. If seq does not contain indels, it is rend=rbeg+seq->l
+    strand: coding strand - 0:rev, 1:fwd
+    tseq:   translated sequence (aa)
+    fill:   frameshift, fill until the end (strand=fwd) or from the start (strand=rev)
+ */
+void cds_translate(kstring_t *_ref, kstring_t *_seq, uint32_t sbeg, uint32_t rbeg, uint32_t rend, int strand, kstring_t *tseq, int fill)
+{
+#if XDBG
+fprintf(stderr,"translate: %d %d %d  fill=%d  seq.l=%d\n",sbeg,rbeg,rend,fill,(int)_seq->l);
+#endif
+    char tmp[3], *codon, *end;
+    int i, len, npad;
+
+    kstring_t ref = *_ref;
+    kstring_t seq = *_seq;
+
+    tseq->l = 0;
+    if ( !seq.l )
+    {
+        kputc('?', tseq);
+        return;
+    }
+
+#define DBG 0
+#if DBG
+ fprintf(stderr,"translate: sbeg,rbeg,rend=%d %d %d  fill=%d  seq.l=%d\n",sbeg,rbeg,rend,fill,(int)_seq->l);
+ fprintf(stderr,"    ref: l=%d %s\n", (int)ref.l,ref.s);
+ fprintf(stderr,"    seq: l=%d m=%d ", (int)seq.l,(int)seq.m);
+ for (i=0; i<seq.l; i++) fprintf(stderr,"%c",seq.s[i]); fprintf(stderr,"\n");
+ fprintf(stderr,"    sbeg,rbeg,rend: %d,%d,%d\n", sbeg,rbeg,rend);
+ fprintf(stderr,"    strand,fill: %d,%d\n", strand,fill);
+#endif
+
+    if ( strand==STRAND_FWD )
+    {
+        // left padding
+        npad = sbeg % 3;
+#if DBG>1
+        fprintf(stderr,"    npad: %d\n",npad);
+#endif
+        assert( npad<=rbeg );
+
+        for (i=0; i<npad; i++)
+            tmp[i] = ref.s[rbeg+i-npad+N_REF_PAD];
+        for (; i<3 && i-npad<seq.l; i++)
+            tmp[i] = seq.s[i-npad];
+        len = seq.l - i + npad;    // the remaining length of padded sseq
+#if DBG>1
+        fprintf(stderr,"\t i=%d\n", i);
+#endif
+        if ( i==3 )
+        {
+            kputc_(dna2aa(tmp), tseq);
+#if DBG>1
+            fprintf(stderr,"[1]%c%c%c\n",tmp[0],tmp[1],tmp[2]);
+#endif
+            codon = seq.s + 3 - npad;        // next codon
+            end   = codon + len - 1 - (len % 3);    // last position of a valid codon
+            while ( codon < end )
+            {
+                kputc_(dna2aa(codon), tseq);
+#if DBG>1
+                fprintf(stderr,"[2]%c%c%c\n",codon[0],codon[1],codon[2]);
+#endif
+                codon += 3;
+            }
+            end = seq.s + seq.l - 1;
+            for (i=0; codon+i<=end; i++) tmp[i] = codon[i];
+        }
+
+        // right padding
+        codon = ref.s + rend + N_REF_PAD;
+        if ( i>0 )
+        {
+#if DBG>1
+            if(i==1)fprintf(stderr,"[3]%c\n",tmp[0]);
+            if(i==2)fprintf(stderr,"[3]%c%c\n",tmp[0],tmp[1]);
+#endif
+            for (; i<3; i++)
+            {
+                tmp[i] = *codon;
+                codon++;
+            }
+            kputc_(dna2aa(tmp), tseq);
+#if DBG>1
+            fprintf(stderr,"[4]%c%c%c\n",tmp[0],tmp[1],tmp[2]);
+#endif
+        }
+        if ( fill!=0 )
+        {
+            end = ref.s + ref.l - N_REF_PAD;
+            while ( codon+3 <= end )
+            {
+                kputc_(dna2aa(codon), tseq);
+#if DBG>1
+                fprintf(stderr,"[5]%c%c%c\t%c\n",codon[0],codon[1],codon[2],dna2aa(codon));
+#endif
+                codon += 3;
+            }
+        }
+    }
+    else    // STRAND_REV
+    {
+        // right padding - number of bases to take from ref
+        npad = (seq.m - (sbeg + seq.l)) % 3; 
+#if DBG>1
+        fprintf(stderr,"    npad: %d\n",npad);
+#endif
+if ( !(npad>=0 && sbeg+seq.l+npad<=seq.m) ) fprintf(stderr,"sbeg=%d  seq.l=%d seq.m=%d\n",sbeg,(int)seq.l,(int)seq.m);
+        assert( npad>=0 && sbeg+seq.l+npad<=seq.m );  // todo: first codon on the rev strand
+
+        if ( npad==2 )
+        {
+            tmp[1] = ref.s[rend+N_REF_PAD];
+            tmp[2] = ref.s[rend+N_REF_PAD+1];
+            i = 0;
+        }
+        else if ( npad==1 )
+        {
+            tmp[2] = ref.s[rend+N_REF_PAD];
+            i = 1;
+        }
+        else
+            i = 2;
+
+        end = seq.s + seq.l;
+        for (; i>=0 && end>seq.s; i--) tmp[i] = *(--end);
+#if DBG>1
+        fprintf(stderr,"\t i=%d\n", i);
+        if(i==1)fprintf(stderr,"[0]    %c\n",tmp[2]);
+        if(i==0)fprintf(stderr,"[0]  %c%c\n",tmp[1],tmp[2]);
+#endif
+        if ( i==-1 )
+        {
+#if DBG>1
+            fprintf(stderr,"[1]%c%c%c\t%c\n",tmp[0],tmp[1],tmp[2], cdna2aa(tmp));
+#endif
+            kputc_(cdna2aa(tmp), tseq);
+            codon = end - 3;
+            while ( codon >= seq.s )
+            {
+                kputc_(cdna2aa(codon), tseq);
+#if DBG>1
+                fprintf(stderr,"[2]%c%c%c\t%c\n",codon[0],codon[1],codon[2], cdna2aa(codon));
+#endif
+                codon -= 3;
+            }
+            if ( seq.s-codon==2 )
+            {
+                tmp[2] = seq.s[0]; 
+                i = 1;
+            }
+            else if ( seq.s-codon==1 )
+            {
+                tmp[1] = seq.s[0]; 
+                tmp[2] = seq.s[1];
+                i = 0;
+            }
+            else
+                i = -1;
+#if DBG>1
+            if(i==1)fprintf(stderr,"[3]   %c\n",tmp[2]);
+            if(i==0)fprintf(stderr,"[3] %c%c\n",tmp[1],tmp[2]);
+#endif
+        }
+        // left padding
+        end = ref.s + N_REF_PAD + rbeg;
+        if ( i>=0 )
+        {
+            for (; i>=0 && end>=ref.s; i--) tmp[i] = *(--end);
+            kputc_(cdna2aa(tmp), tseq);
+#if DBG>1
+            fprintf(stderr,"[4]%c%c%c\t%c\n",tmp[0],tmp[1],tmp[2],cdna2aa(tmp));
+#endif
+        }
+        if ( fill!=0 )
+        {
+            codon = end - 3;
+            while ( codon >= ref.s + N_REF_PAD )
+            {
+                kputc_(cdna2aa(codon), tseq);
+#if DBG>1
+                fprintf(stderr,"[5]%c%c%c\t%c\n",codon[0],codon[1],codon[2],cdna2aa(codon));
+#endif
+                codon -= 3;
+            }
+        }
+    }
+    kputc_(0,tseq); tseq->l--;
+#if DBG
+ fprintf(stderr,"    tseq: %s\n", tseq->s);
+#endif
+}
+
+void tscript_splice_ref(tscript_t *tr)
+{
+    int i, len = 0;
+    for (i=0; i<tr->ncds; i++) 
+        len += tr->cds[i]->len;
+
+    tr->nsref = len + 2*N_REF_PAD;
+    tr->sref  = (char*) malloc(len + 1 + 2*N_REF_PAD);
+    len = 0;
+
+    memcpy(tr->sref, tr->ref + tr->cds[0]->beg - tr->beg, N_REF_PAD);
+    len += N_REF_PAD;
+
+    for (i=0; i<tr->ncds; i++)
+    {
+        memcpy(tr->sref + len, tr->ref + N_REF_PAD + tr->cds[i]->beg - tr->beg, tr->cds[i]->len);
+        len += tr->cds[i]->len;
+    }
+    memcpy(tr->sref + len, tr->ref + N_REF_PAD + tr->cds[tr->ncds-1]->beg - tr->beg, N_REF_PAD);
+    len += N_REF_PAD;
+
+    tr->sref[len] = 0;
+}
+
+// returns: 0 if consequence was added, 1 if it already exists or could not be added
+int csq_push(args_t *args, csq_t *csq, bcf1_t *rec)
+{
+#if XDBG
+fprintf(stderr,"csq_push: %d .. %d\n",rec->pos+1,csq->type.type);
+#endif
+    khint_t k = kh_get(pos2vbuf, args->pos2vbuf, (int)csq->pos);
+    vbuf_t *vbuf = (k == kh_end(args->pos2vbuf)) ? NULL : kh_val(args->pos2vbuf, k);
+    if ( !vbuf ) error("This should not happen. %s:%d  %s\n",bcf_seqname(args->hdr,rec),csq->pos+1,csq->type.vstr);
+
+    int i;
+    for (i=0; i<vbuf->n; i++)
+        if ( vbuf->vrec[i]->line==rec ) break;
+    if ( i==vbuf->n ) error("This should not happen.. %s:%d  %s\n", bcf_seqname(args->hdr,rec),csq->pos+1,csq->type.vstr);
+    vrec_t *vrec = vbuf->vrec[i];
+
+    // if the variant overlaps donor/acceptor and also splice region, report only donor/acceptor
+    if ( csq->type.type & CSQ_SPLICE_REGION && csq->type.type & (CSQ_SPLICE_DONOR|CSQ_SPLICE_ACCEPTOR) ) 
+        csq->type.type &= ~CSQ_SPLICE_REGION;
+
+    if ( csq->type.type & CSQ_PRINTED_UPSTREAM )
+    {
+        for (i=0; i<vrec->nvcsq; i++)
+        {
+            // Same as below, to avoid records like
+            //      3630 .. @3632,stop_lost|AL627309.1|ENST00000423372|protein_coding|-
+            //      3632 .. stop_lost|AL627309.1|ENST00000423372|protein_coding|-|260*>260G|3630T>A+3632A>C
+            if ( csq->type.type&CSQ_START_STOP && vrec->vcsq[i].type&CSQ_START_STOP )
+            {
+                vrec->vcsq[i] = csq->type;
+                goto exit_duplicate;
+            }
+            if ( !(vrec->vcsq[i].type & CSQ_PRINTED_UPSTREAM) ) continue;
+            if ( csq->type.ref != vrec->vcsq[i].ref ) continue;
+            goto exit_duplicate;
+        }
+    }
+    else if ( csq->type.type & CSQ_COMPOUND )
+    {
+        for (i=0; i<vrec->nvcsq; i++)
+        {
+            if ( csq->type.trid != vrec->vcsq[i].trid && (csq->type.type|vrec->vcsq[i].type)&CSQ_PRN_TSCRIPT ) continue;
+            if ( csq->type.biotype != vrec->vcsq[i].biotype ) continue;
+            if ( csq->type.gene != vrec->vcsq[i].gene ) continue;
+            if ( csq->type.vstr.s || vrec->vcsq[i].vstr.s ) 
+            {
+                // This is a bit hacky, but we want a simpler and more predictable output. The splice_csq() function
+                // can trigger stop/start events based on indel overlap, then another stop/start event can be triggered
+                // from add_csq() or test_cds_local() based on sequence comparison, and on output we could find two
+                // consequences:
+                //      stop_lost|AL627309.1|ENST00000423372|protein_coding|-
+                //      stop_lost&inframe_insertion|AL627309.1|ENST00000423372|protein_coding|-|260*>260CL|3630T>TAAA
+                if ( !csq->type.vstr.s || !vrec->vcsq[i].vstr.s ) 
+                {
+                    if ( csq->type.type&CSQ_START_STOP && vrec->vcsq[i].type&CSQ_START_STOP )
+                    {
+                        vrec->vcsq[i].type |= csq->type.type;
+
+                        // remove stop_lost&synonymous if stop_retained set
+                        if ( vrec->vcsq[i].type&CSQ_STOP_RETAINED ) 
+                            vrec->vcsq[i].type &= ~(CSQ_STOP_LOST|CSQ_SYNONYMOUS_VARIANT);
+
+                        if ( !vrec->vcsq[i].vstr.s ) vrec->vcsq[i].vstr = csq->type.vstr;
+                        goto exit_duplicate;
+                    }
+                    continue;
+                }
+                if ( strcmp(csq->type.vstr.s,vrec->vcsq[i].vstr.s) ) continue;
+            }
+            vrec->vcsq[i].type |= csq->type.type; 
+            goto exit_duplicate;
+        }
+    }
+    else
+    {
+        for (i=0; i<vrec->nvcsq; i++)
+        {
+            if ( csq->type.trid != vrec->vcsq[i].trid && (csq->type.type|vrec->vcsq[i].type)&CSQ_PRN_TSCRIPT) continue;
+            if ( csq->type.biotype != vrec->vcsq[i].biotype ) continue;
+            if ( !(vrec->vcsq[i].type & CSQ_COMPOUND) ) 
+            {
+                vrec->vcsq[i].type |= csq->type.type;
+                goto exit_duplicate;
+            }
+            if ( vrec->vcsq[i].type==(vrec->vcsq[i].type|csq->type.type) ) goto exit_duplicate;
+        }
+    }
+    // no such csq yet in this vcf record
+    csq->vrec = vrec;
+    csq->idx  = i;
+    vrec->nvcsq++;
+    hts_expand0(vcsq_t, vrec->nvcsq, vrec->mvcsq, vrec->vcsq);
+    vrec->vcsq[i] = csq->type;
+    return 0;
+
+exit_duplicate:
+    csq->vrec = vrec;
+    csq->idx  = i;
+    return 1;
+}
+
+//  soff .. position of the variant within the trimmed query transcript
+//  sbeg .. position of the variant within the query transcript
+//  rbeg .. position on the reference transcript (if there are no indels, then rbeg=send)
+//  rpos .. VCF position
+#define node2soff(i) (hap->stack[i].slen - (hap->stack[i].node->rlen + hap->stack[i].node->dlen))
+#define node2sbeg(i) (hap->sbeg + node2soff(i))
+#define node2send(i) (hap->sbeg + hap->stack[i].slen)
+#define node2rbeg(i) (hap->stack[i].node->sbeg)
+#define node2rend(i) (hap->stack[i].node->sbeg + hap->stack[i].node->rlen)
+#define node2rpos(i) (hap->stack[i].node->rec->pos)
+
+void kput_vcsq(vcsq_t *csq, kstring_t *str)
+{
+    // Remove start/stop from incomplete CDS, but only if there is another
+    // consequence as something must be reported
+    if ( csq->type & CSQ_INCOMPLETE_CDS && (csq->type & ~(CSQ_START_STOP|CSQ_INCOMPLETE_CDS|CSQ_UPSTREAM_STOP)) ) csq->type &= ~(CSQ_START_STOP|CSQ_INCOMPLETE_CDS);
+
+    // Remove missense from start/stops
+    if ( csq->type & CSQ_START_STOP && csq->type & CSQ_MISSENSE_VARIANT ) csq->type &= ~CSQ_MISSENSE_VARIANT;
+
+    if ( csq->type & CSQ_PRINTED_UPSTREAM && csq->ref )
+    {
+        kputc_('@',str);
+        kputw(csq->ref->pos+1, str);
+        return;
+    }
+    if ( csq->type & CSQ_UPSTREAM_STOP )
+        kputc_('*',str);
+
+    int i, n = sizeof(csq_strings)/sizeof(char*);
+    for (i=1; i<n; i++)
+        if ( csq_strings[i] && csq->type&(1<<i) ) { kputs(csq_strings[i],str); break; }
+    i++;
+    for (; i<n; i++)
+        if ( csq_strings[i] && csq->type&(1<<i) ) { kputc_('&',str); kputs(csq_strings[i],str); }
+
+    kputc_('|', str);
+    if ( csq->gene ) kputs(csq->gene , str);
+
+    kputc_('|', str);
+    if ( csq->type & CSQ_PRN_TSCRIPT ) ksprintf(str, "%s",ENSID(csq->trid));
+
+    kputc_('|', str);
+    kputs(gf_type2gff_string(csq->biotype), str);
+
+    if ( CSQ_PRN_STRAND(csq->type) || csq->vstr.l )
+        kputs(csq->strand==STRAND_FWD ? "|+" : "|-", str);
+
+    if ( csq->vstr.l )
+        kputs(csq->vstr.s, str);
+}
+
+void hap_add_csq(args_t *args, hap_t *hap, hap_node_t *node, int tlen, int ibeg, int iend, int dlen, int indel)
+{
+    int i;
+    tscript_t *tr = hap->tr;
+    int ref_node = tr->strand==STRAND_FWD ? ibeg : iend;
+
+    int icsq = node->ncsq_list++;
+    hts_expand0(csq_t,node->ncsq_list,node->mcsq_list,node->csq_list);
+    csq_t *csq = &node->csq_list[icsq];
+    csq->pos  = hap->stack[ref_node].node->rec->pos;
+    csq->type.trid    = tr->id;
+    csq->type.gene    = tr->gene->name;
+    csq->type.strand  = tr->strand;
+    csq->type.biotype = tr->type;
+
+    // only now we see the translated sequence and can determine if the stop/start changes are real
+    int rm_csq = 0; 
+    csq->type.type = 0;
+    for (i=ibeg; i<=iend; i++)
+        csq->type.type |= hap->stack[i].node->csq & CSQ_COMPOUND;
+    if ( dlen==0 && indel ) csq->type.type |= CSQ_INFRAME_ALTERING;
+
+    int has_upstream_stop = hap->upstream_stop;
+    if ( hap->stack[ibeg].node->type != HAP_SSS )
+    {
+        // check for truncating stops
+        for (i=0; i<hap->tref.l; i++)
+            if ( hap->tref.s[i]=='*' ) break;
+        if ( i!=hap->tref.l )
+        {
+            hap->tref.l = i+1;
+            hap->tref.s[i+1] = 0;
+        }
+        for (i=0; i<hap->tseq.l; i++)
+            if ( hap->tseq.s[i]=='*' ) break;
+        if ( i!=hap->tseq.l )
+        {
+            hap->tseq.l = i+1;
+            hap->tseq.s[i+1] = 0;
+            hap->upstream_stop = 1;
+        }
+        if ( csq->type.type & CSQ_STOP_LOST )
+        {
+            if ( hap->tref.s[hap->tref.l-1]=='*' && hap->tref.s[hap->tref.l-1] == hap->tseq.s[hap->tseq.l-1] ) 
+            {
+                rm_csq |= CSQ_STOP_LOST;
+                csq->type.type |= CSQ_STOP_RETAINED;
+            }
+            else if ( hap->tref.s[hap->tref.l-1]!='*' )
+            {
+                // This is CDS 3' incomplete ENSG00000173376/synon.vcf, can also be missense
+                // We observe in real data a change to a stop, ENST00000528237/retained-stop-incomplete-cds.vcf
+                if ( hap->tseq.s[hap->tseq.l-1] == '*' )
+                {
+                    rm_csq |= CSQ_STOP_GAINED;
+                    csq->type.type |= CSQ_STOP_RETAINED;
+                }
+                else
+                    csq->type.type |= CSQ_INCOMPLETE_CDS;
+            }
+        }
+        if ( csq->type.type & CSQ_START_LOST && hap->tref.s[0]!='M' )
+        {
+            rm_csq |= CSQ_START_LOST;
+            csq->type.type &= ~CSQ_START_LOST;
+        }
+        if ( dlen!=0 )
+        {
+            if ( dlen%3 )
+                csq->type.type |= CSQ_FRAMESHIFT_VARIANT;
+            else if ( dlen<0 )
+                csq->type.type |= CSQ_INFRAME_DELETION;
+            else
+                csq->type.type |= CSQ_INFRAME_INSERTION;
+        }
+        else
+        {
+            for (i=0; i<hap->tref.l; i++) 
+                if ( hap->tref.s[i] != hap->tseq.s[i] ) break;
+            if ( i==hap->tref.l )
+                csq->type.type |= CSQ_SYNONYMOUS_VARIANT;
+            else if ( hap->tref.s[i] ==  '*' )
+                csq->type.type |= CSQ_STOP_LOST;
+            else if ( hap->tseq.s[i] ==  '*' )
+                csq->type.type |= CSQ_STOP_GAINED;
+            else
+                csq->type.type |= CSQ_MISSENSE_VARIANT;
+        }
+    }
+    if ( has_upstream_stop ) csq->type.type |= CSQ_UPSTREAM_STOP;
+    csq->type.type &= ~rm_csq;
+
+    if ( hap->stack[ibeg].node->type == HAP_SSS  )
+    {
+        node->csq_list[icsq].type.type   |= hap->stack[ibeg].node->csq & ~rm_csq;
+        node->csq_list[icsq].type.ref     = hap->stack[ibeg].node->rec;
+        node->csq_list[icsq].type.biotype = tr->type;
+        csq_push(args, node->csq_list+icsq, hap->stack[ibeg].node->rec);
+        return;
+    }
+
+    kstring_t str = node->csq_list[icsq].type.vstr;
+    str.l = 0;
+
+    // create the aa variant string
+    int aa_rbeg = tr->strand==STRAND_FWD ? node2rbeg(ibeg)/3+1 : (hap->tr->nsref - 2*N_REF_PAD - node2rend(iend))/3+1;
+    int aa_sbeg = tr->strand==STRAND_FWD ? node2sbeg(ibeg)/3+1 : (tlen - node2send(iend))/3+1;
+    kputc_('|', &str);
+    kputw(aa_rbeg, &str);
+    kputs(hap->tref.s, &str);
+    if ( !(csq->type.type & CSQ_SYNONYMOUS_VARIANT) )
+    {
+        kputc_('>', &str);
+        kputw(aa_sbeg, &str);
+        kputs(hap->tseq.s, &str);
+    }
+    kputc_('|', &str);
+
+    // create the dna variant string and, in case of combined variants,
+    // insert silent CSQ_PRINTED_UPSTREAM variants
+    for (i=ibeg; i<=iend; i++)
+    {
+        if ( i>ibeg ) kputc_('+', &str);
+        kputw(node2rpos(i)+1, &str);
+        kputs(hap->stack[i].node->var, &str);
+    }
+    node->csq_list[icsq].type.vstr = str;
+    csq_push(args, node->csq_list+icsq, hap->stack[ref_node].node->rec);
+
+    for (i=ibeg; i<=iend; i++)
+    {
+        // csq are printed at one position only for combined variants, the rest is
+        // silent and references the first
+        if ( hap->stack[i].node->csq & ~CSQ_COMPOUND )
+        {
+            node->ncsq_list++;
+            hts_expand0(csq_t,node->ncsq_list,node->mcsq_list,node->csq_list);
+            csq_t *tmp_csq = &node->csq_list[node->ncsq_list - 1];
+            tmp_csq->pos  = hap->stack[i].node->rec->pos;
+            tmp_csq->type.trid    = tr->id;
+            tmp_csq->type.gene    = tr->gene->name;
+            tmp_csq->type.strand  = tr->strand;
+            tmp_csq->type.type    = hap->stack[i].node->csq & ~CSQ_COMPOUND & ~rm_csq;
+            tmp_csq->type.biotype = tr->type;
+            tmp_csq->type.vstr.l  = 0;
+            kputs(str.s,&tmp_csq->type.vstr);
+            csq_push(args, tmp_csq, hap->stack[i].node->rec);
+        }
+        if ( i!=ref_node && (node->csq_list[icsq].type.type & CSQ_COMPOUND || !(hap->stack[i].node->csq & ~CSQ_COMPOUND)) )
+        {
+            node->ncsq_list++;
+            hts_expand0(csq_t,node->ncsq_list,node->mcsq_list,node->csq_list);
+            csq_t *tmp_csq = &node->csq_list[node->ncsq_list - 1];
+            tmp_csq->pos  = hap->stack[i].node->rec->pos;
+            tmp_csq->type.trid    = tr->id;
+            tmp_csq->type.gene    = tr->gene->name;
+            tmp_csq->type.strand  = tr->strand;
+            tmp_csq->type.type    = CSQ_PRINTED_UPSTREAM | hap->stack[i].node->csq;
+            tmp_csq->type.biotype = tr->type;
+            tmp_csq->type.ref     = hap->stack[ref_node].node->rec;
+            tmp_csq->type.vstr.l  = 0;
+            csq_push(args, tmp_csq, hap->stack[i].node->rec);
+        }
+    }
+}
+
+void hap_finalize(args_t *args, hap_t *hap)
+{
+    tscript_t *tr = hap->tr;
+    if ( !tr->sref )
+        tscript_splice_ref(tr);
+
+    kstring_t sref;
+    sref.s = tr->sref;
+    sref.l = tr->nsref;
+    sref.m = sref.l;
+
+    int istack = 0;
+    hts_expand(hstack_t,1,hap->mstack,hap->stack);
+
+    hap->sseq.l = 0;
+    hap->tseq.l = 0;
+    hap->stack[0].node = tr->root;
+    hap->stack[0].ichild = -1;
+    hap->stack[0].slen = 0;
+    hap->stack[0].dlen = 0;
+
+    while ( istack>=0 )
+    {
+        hstack_t *stack  = &hap->stack[istack];
+        hap_node_t *node = hap->stack[istack].node;
+        while ( ++hap->stack[istack].ichild < node->nchild )
+        {
+            if ( node->child[stack->ichild] ) break;
+        }
+        if ( stack->ichild == node->nchild ) { istack--; continue; }
+
+        node = node->child[stack->ichild];
+
+        istack++;
+        hts_expand(hstack_t,istack+1,hap->mstack,hap->stack);
+        stack = &hap->stack[istack-1];
+
+        hap->stack[istack].node = node;
+        hap->stack[istack].ichild = -1;
+
+        hap->sseq.l = stack->slen;
+        if ( node->type==HAP_CDS ) kputs(node->seq, &hap->sseq);
+        hap->stack[istack].slen = hap->sseq.l;
+        hap->stack[istack].dlen = hap->stack[istack-1].dlen + node->dlen;
+
+        if ( !node->nend ) continue;    // not a leaf node
+
+        // The spliced sequence has been built for the current haplotype and stored
+        // in hap->sseq. Now we break it and output as independent parts
+        
+        kstring_t sseq;
+        sseq.m = sref.m - 2*N_REF_PAD + hap->stack[istack].dlen;  // total length of the spliced query transcript
+        hap->upstream_stop = 0;
+
+        int i = 1, dlen = 0, ibeg, indel = 0;
+        while ( i<istack && hap->stack[i].node->type == HAP_SSS ) i++;
+        hap->sbeg = hap->stack[i].node->sbeg;
+
+        if ( tr->strand==STRAND_FWD )
+        {
+            i = 0, ibeg = -1;
+            while ( ++i <= istack )
+            {
+                if ( hap->stack[i].node->type == HAP_SSS )
+                {
+                    // start/stop/splice site overlap: don't know how to build the haplotypes correctly, skipping
+                    hap_add_csq(args,hap,node,0,i,i,0,0);
+                    continue;
+                }
+                dlen += hap->stack[i].node->dlen;
+                if ( hap->stack[i].node->dlen ) indel = 1;
+                if ( i<istack )
+                {
+                    if ( dlen%3 )   // frameshift
+                    {
+                        if ( ibeg==-1 ) ibeg = i;
+                        continue;
+                    }
+                    int icur  = node2sbeg(i);
+                    int inext = node2sbeg(i+1);
+                    if ( icur/3 == inext/3 )    // in the same codon, can't be flushed yet
+                    {
+                        if ( ibeg==-1 ) ibeg = i;
+                        continue;
+                    }
+                }
+                if ( ibeg<0 ) ibeg = i;
+
+                int ioff = node2soff(ibeg);
+                int icur = node2sbeg(ibeg);
+                int rbeg = node2rbeg(ibeg);
+                int rend = node2rend(i);
+                int fill = dlen%3;
+
+                // alt
+                if ( hap->sseq.l )
+                {
+                    sseq.l = hap->stack[i].slen - ioff;
+                    sseq.s = hap->sseq.s + ioff;
+                }
+                else    // splice site overlap, see #1475227917
+                    sseq.l = fill = 0;
+                cds_translate(&sref, &sseq, icur,rbeg,rend, tr->strand, &hap->tseq, fill);
+
+                // ref
+                sseq.l = node2rend(i) - rbeg;
+                sseq.s = sref.s + N_REF_PAD + rbeg;
+                sseq.m = sref.m - 2*N_REF_PAD;
+                cds_translate(&sref, &sseq, rbeg,rbeg,rend, tr->strand, &hap->tref, fill);
+                sseq.m = sref.m - 2*N_REF_PAD + hap->stack[istack].dlen;
+
+                hap_add_csq(args,hap,node,0, ibeg,i,dlen,indel);
+                ibeg = -1;
+                dlen = 0;
+                indel = 0;
+            }
+        }
+        else
+        {
+            i = istack + 1, ibeg = -1;
+            while ( --i > 0 )
+            {
+                if ( hap->stack[i].node->type == HAP_SSS )
+                {
+                    hap_add_csq(args,hap,node,0,i,i,0,0);
+                    continue;
+                }
+                dlen += hap->stack[i].node->dlen;
+                if ( hap->stack[i].node->dlen ) indel = 1;
+                if ( i>1 && hap->stack[i-1].node->type != HAP_SSS )
+                {
+                    if ( dlen%3 )
+                    {
+                        if ( ibeg==-1 ) ibeg = i;
+                        continue;
+                    }
+                    int icur  = sseq.m - 1 - node2sbeg(i);
+                    int inext = sseq.m - 1 - node2sbeg(i-1);
+                    if ( icur/3 == inext/3 )
+                    {
+                        if ( ibeg==-1 ) ibeg = i;
+                        continue;
+                    }
+                }
+                if ( ibeg<0 ) ibeg = i;
+                int ioff = node2soff(i);
+                int icur = node2sbeg(i);
+                int rbeg = node2rbeg(i);
+                int rend = node2rend(ibeg);
+                int fill = dlen%3;
+
+                // alt
+                if ( hap->sseq.l )
+                {
+                    sseq.l = hap->stack[ibeg].slen - ioff;
+                    sseq.s = hap->sseq.s + ioff;
+                }
+                else    // splice site overlap, see #1475227917
+                    sseq.l = fill = 0;
+                cds_translate(&sref, &sseq, icur,rbeg,rend, tr->strand, &hap->tseq, fill);
+
+                // ref
+                sseq.l = node2rend(ibeg) - rbeg;
+                sseq.s = sref.s + N_REF_PAD + rbeg;
+                sseq.m = sref.m - 2*N_REF_PAD;
+                cds_translate(&sref, &sseq, rbeg,rbeg,rend, tr->strand, &hap->tref, fill);
+                sseq.m = sref.m - 2*N_REF_PAD + hap->stack[istack].dlen;
+
+                hap_add_csq(args,hap,node,sseq.m, i,ibeg,dlen,indel);
+                ibeg = -1;
+                dlen = 0;
+                indel = 0;
+            }
+        }
+    }
+}
+
+static inline void csq_print_text(args_t *args, csq_t *csq, int ismpl, int ihap)
+{
+    if ( csq->type.type & CSQ_PRINTED_UPSTREAM ) return;
+
+    char *smpl = ismpl >= 0 ? args->hdr->samples[ismpl] : "-";
+    const char *chr = bcf_hdr_id2name(args->hdr,args->rid);
+
+    fprintf(args->out,"CSQ\t%s\t", smpl);
+    if ( ihap>0 )
+        fprintf(args->out,"%d", ihap);
+    else
+        fprintf(args->out,"-");
+
+    args->str.l = 0;
+    kput_vcsq(&csq->type, &args->str);
+    fprintf(args->out,"\t%s\t%d\t%s\n",chr,csq->pos+1,args->str.s);
+}
+static inline void hap_print_text(args_t *args, tscript_t *tr, int ismpl, int ihap, hap_node_t *node)
+{
+    if ( !node || !node->ncsq_list ) return;
+
+    char *smpl = ismpl >= 0 ? args->hdr->samples[ismpl] : "-";
+    const char *chr = bcf_hdr_id2name(args->hdr,args->rid);
+
+    int i;
+    for (i=0; i<node->ncsq_list; i++)
+    {
+        csq_t *csq = node->csq_list + i;
+        if ( csq->type.type & CSQ_PRINTED_UPSTREAM ) continue;
+        assert( csq->type.vstr.l );
+
+        fprintf(args->out,"CSQ\t%s\t", smpl);
+        if ( ihap>0 )
+            fprintf(args->out,"%d", ihap);
+        else
+            fprintf(args->out,"-");
+
+        args->str.l = 0;
+        kput_vcsq(&csq->type, &args->str);
+        fprintf(args->out,"\t%s\t%d\t%s\n",chr,csq->pos+1,args->str.s);
+    }
+}
+
+static inline void hap_stage_vcf(args_t *args, tscript_t *tr, int ismpl, int ihap, hap_node_t *node)
+{
+    if ( !node || !node->ncsq_list || ismpl<0 ) return;
+
+    int i;
+    for (i=0; i<node->ncsq_list; i++)
+    {
+        csq_t *csq = node->csq_list + i;
+        vrec_t *vrec = csq->vrec;
+        int icsq = 2*csq->idx + ihap;
+        if ( icsq >= args->ncsq_max ) // more than ncsq_max consequences, so can't fit it in FMT
+        {
+            int print_warning = 1;
+            if ( args->quiet )
+            {
+                if ( args->quiet > 1 || args->ncsq_small_warned ) print_warning = 0;
+                args->ncsq_small_warned = 1;
+            }
+            if ( print_warning )
+            {
+                fprintf(stderr,"Warning: --ncsq %d is too small to annotate %s at %s:%d with %d-th csq\n",
+                        args->ncsq_max/2,args->hdr->samples[ismpl],bcf_hdr_id2name(args->hdr,args->rid),vrec->line->pos+1,csq->idx+1);
+                if ( args->quiet ) fprintf(stderr,"(This warning is printed only once)\n");
+            }
+            break;
+        }
+        if ( vrec->nfmt < 1 + icsq/32 ) vrec->nfmt = 1 + icsq/32;
+        vrec->smpl[ismpl*args->nfmt_bcsq + icsq/32] |= 1 << (icsq % 32);
+    }
+}
+
+void hap_flush(args_t *args, uint32_t pos)
+{
+    int i,j;
+    tr_heap_t *heap = args->active_tr;
+
+    while ( heap->ndat && heap->dat[0]->end<=pos )
+    {
+        tscript_t *tr = heap->dat[0];
+        khp_delete(trhp, heap);
+
+        args->hap->tr = tr;
+        if ( tr->root && tr->root->nchild ) // normal, non-localized calling
+        {
+            hap_finalize(args, args->hap);
+
+            if ( args->output_type==FT_TAB_TEXT )   // plain text output, not a vcf
+            {
+                if ( args->phase==PHASE_DROP_GT )
+                    hap_print_text(args, tr, -1,0, tr->hap[0]);
+                else
+                {
+                    for (i=0; i<args->smpl->n; i++)
+                    {
+                        for (j=0; j<2; j++)
+                            hap_print_text(args, tr, args->smpl->idx[i],j+1, tr->hap[i*2+j]);
+                    }
+                }
+            }
+            else if ( args->phase!=PHASE_DROP_GT )
+            {
+                for (i=0; i<args->smpl->n; i++)
+                {
+                    for (j=0; j<2; j++)
+                        hap_stage_vcf(args, tr, args->smpl->idx[i],j, tr->hap[i*2+j]);
+                }
+            }
+        }
+
+        // mark the transcript for deletion. Cannot delete it immediately because
+        // by-position VCF output will need them when flushed by vcf_buf_push
+        args->nrm_tr++;
+        hts_expand(tscript_t*,args->nrm_tr,args->mrm_tr,args->rm_tr);
+        args->rm_tr[args->nrm_tr-1] = tr;
+    }
+}
+
+#define SWAP(type_t, a, b) { type_t t = a; a = b; b = t; }
+
+void vbuf_push(args_t *args, bcf1_t **rec_ptr)
+{
+    int i;
+
+    assert(rec_ptr);
+    bcf1_t *rec = *rec_ptr;
+
+    // check for duplicate records
+    i = args->vcf_rbuf.n ? rbuf_last(&args->vcf_rbuf) : -1;
+    if ( i<0 || args->vcf_buf[i]->vrec[0]->line->pos!=rec->pos ) 
+    {
+        // vcf record with a new pos
+        rbuf_expand0(&args->vcf_rbuf, vbuf_t*, args->vcf_rbuf.n+1, args->vcf_buf);
+        i = rbuf_append(&args->vcf_rbuf);
+        if ( !args->vcf_buf[i] ) args->vcf_buf[i] = (vbuf_t*) calloc(1,sizeof(vbuf_t));
+        args->vcf_buf[i]->n = 0;
+    }
+    vbuf_t *vbuf = args->vcf_buf[i];
+    vbuf->n++;
+    hts_expand0(vrec_t*, vbuf->n, vbuf->m, vbuf->vrec);
+    if ( !vbuf->vrec[vbuf->n - 1] )
+        vbuf->vrec[vbuf->n - 1] = (vrec_t*) calloc(1,sizeof(vrec_t));
+
+    vrec_t *vrec = vbuf->vrec[vbuf->n - 1];
+    if ( args->phase!=PHASE_DROP_GT && args->smpl->n )
+    {
+        if ( !vrec->smpl ) vrec->smpl = (uint32_t*) calloc(args->hdr_nsmpl,sizeof(*vrec->smpl) * args->nfmt_bcsq);
+        else memset(vrec->smpl,0,args->hdr_nsmpl*sizeof(*vrec->smpl) * args->nfmt_bcsq);
+    }
+    if ( !vrec->line ) vrec->line = bcf_init1();
+    SWAP(bcf1_t*, (*rec_ptr), vrec->line);
+
+    int ret;
+    khint_t k = kh_put(pos2vbuf, args->pos2vbuf, (int)rec->pos, &ret);
+    kh_val(args->pos2vbuf,k) = vbuf;
+}
+
+void vbuf_flush(args_t *args)
+{
+    if ( args->active_tr->ndat ) return; // cannot output buffered VCF lines (args.vbuf) until all active transcripts are gone
+
+    int i,j;
+    while ( (i=rbuf_shift(&args->vcf_rbuf))>=0 )
+    {
+        vbuf_t *vbuf = args->vcf_buf[i];
+        for (i=0; i<vbuf->n; i++)
+        {
+            vrec_t *vrec = vbuf->vrec[i];
+            if ( !args->out_fh ) // not a VCF output
+            {
+                vrec->nvcsq = 0;
+                continue;
+            }
+            if ( !vrec->nvcsq )
+            {
+                bcf_write(args->out_fh, args->hdr, vrec->line);
+                continue;
+            }
+            
+            args->str.l = 0;
+            kput_vcsq(&vrec->vcsq[0], &args->str);
+            for (j=1; j<vrec->nvcsq; j++)
+            {
+                kputc_(',', &args->str);
+                kput_vcsq(&vrec->vcsq[j], &args->str);
+            }
+            bcf_update_info_string(args->hdr, vrec->line, args->bcsq_tag, args->str.s);
+            if ( args->hdr_nsmpl )
+            {
+                if ( vrec->nfmt < args->nfmt_bcsq )
+                    for (j=1; j<args->hdr_nsmpl; j++) memcpy(vrec->smpl+j*vrec->nfmt, vrec->smpl+j*args->nfmt_bcsq, vrec->nfmt*sizeof(*vrec->smpl));
+                bcf_update_format_int32(args->hdr, vrec->line, args->bcsq_tag, vrec->smpl, args->hdr_nsmpl*vrec->nfmt);
+            }
+            vrec->nvcsq = 0;
+            bcf_write(args->out_fh, args->hdr, vrec->line);
+        }
+        if ( vbuf->n )
+        {
+            khint_t k = kh_get(pos2vbuf, args->pos2vbuf, vbuf->vrec[0]->line->pos);
+            if ( k != kh_end(args->pos2vbuf) ) kh_del(pos2vbuf, args->pos2vbuf, k);
+        }
+        vbuf->n = 0;
+    }
+
+    for (i=0; i<args->nrm_tr; i++)
+    {
+        tscript_t *tr = args->rm_tr[i];
+        if ( tr->root ) hap_destroy(tr->root);
+        tr->root = NULL;
+        free(tr->hap);
+        free(tr->ref);
+        free(tr->sref);
+    }
+    args->nrm_tr = 0;
+    args->ncsq_buf = 0;
+}
+
+void tscript_init_ref(args_t *args, tscript_t *tr, const char *chr)
+{
+    int i, len;
+    int pad_beg = tr->beg >= N_REF_PAD ? N_REF_PAD : tr->beg;
+
+    tr->ref = faidx_fetch_seq(args->fai, chr, tr->beg - pad_beg, tr->end + N_REF_PAD, &len);
+    if ( !tr->ref )
+        error("faidx_fetch_seq failed %s:%d-%d\n", chr,tr->beg+1,tr->end+1);
+
+    int pad_end = len - (tr->end - tr->beg + 1 + pad_beg);
+    if ( pad_beg + pad_end != 2*N_REF_PAD )
+    {
+        char *ref = (char*) malloc(tr->end - tr->beg + 1 + 2*N_REF_PAD);
+        for (i=0; i < N_REF_PAD - pad_beg; i++) ref[i] = 'N';
+        memcpy(ref+i, tr->ref, len);
+        for (i=0; i < N_REF_PAD - pad_end; i++) ref[i+len] = 'N';
+        free(tr->ref);
+        tr->ref = ref;
+    }
+}
+
+static void sanity_check_ref(args_t *args, tscript_t *tr, bcf1_t *rec)
+{
+    char *ref = tr->ref + (rec->pos + N_REF_PAD >= tr->beg ? rec->pos - tr->beg + N_REF_PAD : 0);
+    char *vcf = rec->d.allele[0] + (rec->pos + N_REF_PAD >= tr->beg ? 0 : tr->beg - N_REF_PAD - rec->pos);
+    assert( vcf - rec->d.allele[0] < strlen(rec->d.allele[0]) );
+    while ( *ref && *vcf )
+    {
+        if ( *ref!=*vcf && toupper(*ref)!=toupper(*vcf) ) 
+            error("Error: the fasta reference does not match the VCF REF allele at %s:%d .. %s\n", bcf_seqname(args->hdr,rec),rec->pos+1,rec->d.allele[0]);
+        ref++;
+        vcf++;
+    }
+}
+
+int test_cds_local(args_t *args, bcf1_t *rec)
+{
+    int i,j, ret = 0;
+    const char *chr = bcf_seqname(args->hdr,rec);
+    // note that the off-by-one extension of rlen is deliberate to account for insertions
+    if ( !regidx_overlap(args->idx_cds,chr,rec->pos,rec->pos+rec->rlen, args->itr) ) return 0;
+
+    // structures to fake the normal test_cds machinery
+    hap_node_t root, node;
+    root.type  = HAP_ROOT;
+    kstring_t *tref = &args->hap->tref, *tseq = &args->hap->tseq;
+
+    while ( regitr_overlap(args->itr) )
+    {
+        gf_cds_t *cds = regitr_payload(args->itr,gf_cds_t*);
+        tscript_t *tr = cds->tr;
+        if ( !GF_is_coding(tr->type) ) continue;
+        ret = 1;
+
+        if ( !tr->ref )
+        {
+            tscript_init_ref(args, tr, chr);
+            tscript_splice_ref(tr);
+            khp_insert(trhp, args->active_tr, &tr);     // only to clean the reference afterwards
+        }
+
+        sanity_check_ref(args, tr, rec);
+
+        kstring_t sref;
+        sref.s = tr->sref;
+        sref.l = tr->nsref;
+        sref.m = sref.l;
+
+        for (i=1; i<rec->n_allele; i++)
+        {
+            if ( hap_init(args, &root, &node, cds, rec, i)!=0 ) continue;
+
+            csq_t csq; 
+            memset(&csq, 0, sizeof(csq_t));
+            csq.pos          = rec->pos;
+            csq.type.biotype = tr->type;
+            csq.type.strand  = tr->strand;
+            csq.type.trid    = tr->id;
+            csq.type.gene    = tr->gene->name;
+
+            int csq_type = node.csq;
+
+            // code repetition: it would be nice to reuse the code from hap_add_csq, needs have refactoring though
+            if ( node.type == HAP_SSS )
+            {
+                csq.type.type = csq_type;
+                csq_stage(args, &csq, rec);
+            }
+            else
+            {
+                kstring_t sseq;
+                sseq.m = sref.m - 2*N_REF_PAD + node.dlen;
+                sseq.s = node.seq;
+                int alen = sseq.l = strlen(sseq.s);
+                int fill = node.dlen%3 && alen ? 1 : 0; // see #1475227917
+                cds_translate(&sref, &sseq, node.sbeg,node.sbeg,node.sbeg+node.rlen, tr->strand, tseq, fill);
+
+                sseq.m = sref.m - 2*N_REF_PAD;
+                sseq.s = sref.s + N_REF_PAD + node.sbeg;
+                sseq.l = node.rlen;
+                cds_translate(&sref, &sseq, node.sbeg,node.sbeg,node.sbeg+node.rlen, tr->strand, tref, fill);
+
+                // check for truncating stops
+                for (j=0; j<tref->l; j++)
+                    if ( tref->s[j]=='*' ) break;
+                if ( j!=tref->l )
+                {
+                    tref->l = j+1;
+                    tref->s[j+1] = 0;
+                }
+                for (j=0; j<tseq->l; j++)
+                    if ( tseq->s[j]=='*' ) break;
+                if ( j!=tseq->l )
+                {
+                    tseq->l = j+1;
+                    tseq->s[j+1] = 0;
+                }
+                if ( csq_type & CSQ_STOP_LOST )
+                {
+                    if ( tref->s[tref->l-1]=='*' && tref->s[tref->l-1] == tseq->s[tseq->l-1] ) 
+                    {
+                        csq_type &= ~CSQ_STOP_LOST;
+                        csq_type |= CSQ_STOP_RETAINED;
+                    }
+                    else if (tref->s[tref->l-1]!='*' )
+                    {
+                        // This is CDS 3' incomplete ENSG00000173376/synon.vcf, can also be missense
+                        // We observe in real data a change to a stop, ENST00000528237/retained-stop-incomplete-cds.vcf
+                        if ( tseq->s[tseq->l-1] == '*' )
+                        {
+                            csq_type &= ~CSQ_STOP_GAINED;
+                            csq_type |= CSQ_STOP_RETAINED;
+                        }
+                        else
+                            csq_type |= CSQ_INCOMPLETE_CDS;
+                    }
+                }
+                if ( csq_type & CSQ_START_LOST && tref->s[0]!='M' )
+                    csq_type &= ~CSQ_START_LOST;
+                if ( node.dlen!=0 )
+                {
+                    if ( node.dlen%3 )
+                        csq_type |= CSQ_FRAMESHIFT_VARIANT;
+                    else if ( node.dlen<0 )
+                        csq_type |= CSQ_INFRAME_DELETION;
+                    else
+                        csq_type |= CSQ_INFRAME_INSERTION;
+                }
+                else
+                {
+                    for (j=0; j<tref->l; j++) 
+                        if ( tref->s[j] != tseq->s[j] ) break;
+                    if ( j==tref->l )
+                        csq_type |= CSQ_SYNONYMOUS_VARIANT;
+                    else if ( tref->s[j] ==  '*' )
+                        csq_type |= CSQ_STOP_LOST;
+                    else if ( tseq->s[j] ==  '*' )
+                        csq_type |= CSQ_STOP_GAINED;
+                    else
+                        csq_type |= CSQ_MISSENSE_VARIANT;
+                }
+                if ( csq_type & CSQ_COMPOUND )
+                {
+                    // create the aa variant string
+                    kstring_t str = {0,0,0};
+                    int aa_rbeg = tr->strand==STRAND_FWD ? node.sbeg/3+1 : (tr->nsref - 2*N_REF_PAD - node.sbeg - node.rlen)/3+1;
+                    int aa_sbeg = tr->strand==STRAND_FWD ? node.sbeg/3+1 : (tr->nsref - 2*N_REF_PAD + node.dlen - node.sbeg - alen)/3+1;
+                    kputc_('|', &str);
+                    kputw(aa_rbeg, &str);
+                    kputs(tref->s, &str);
+                    if ( !(csq_type & CSQ_SYNONYMOUS_VARIANT) )
+                    {
+                        kputc_('>', &str);
+                        kputw(aa_sbeg, &str);
+                        kputs(tseq->s, &str);
+                    }
+                    kputc_('|', &str);
+                    kputw(rec->pos+1, &str);
+                    kputs(node.var, &str);
+                    csq.type.vstr = str;
+                    csq.type.type = csq_type & CSQ_COMPOUND;
+                    csq_stage(args, &csq, rec);
+
+                    // all this only to clean vstr when vrec is flushed
+                    if ( !tr->root )
+                        tr->root = (hap_node_t*) calloc(1,sizeof(hap_node_t));
+                    tr->root->ncsq_list++;
+                    hts_expand0(csq_t,tr->root->ncsq_list,tr->root->mcsq_list,tr->root->csq_list);
+                    csq_t *rm_csq = tr->root->csq_list + tr->root->ncsq_list - 1;
+                    rm_csq->type.vstr = str;            
+                }
+                if ( csq_type & ~CSQ_COMPOUND )
+                {
+                    csq.type.type = csq_type & ~CSQ_COMPOUND;
+                    csq.type.vstr.l = 0;
+                    csq_stage(args, &csq, rec);
+                }
+            }
+            free(node.seq);
+            free(node.var);
+        }
+    }
+    return ret;
+}
+
+int test_cds(args_t *args, bcf1_t *rec)
+{
+    int i, ret = 0, hap_ret;
+    const char *chr = bcf_seqname(args->hdr,rec);
+    // note that the off-by-one extension of rlen is deliberate to account for insertions
+    if ( !regidx_overlap(args->idx_cds,chr,rec->pos,rec->pos+rec->rlen, args->itr) ) return 0;
+    while ( regitr_overlap(args->itr) )
+    {
+        gf_cds_t *cds = regitr_payload(args->itr,gf_cds_t*);
+        tscript_t *tr = cds->tr;
+        if ( !GF_is_coding(tr->type) ) continue;
+        ret = 1;
+        if ( !tr->root )
+        {
+            // initialize the transcript and its haplotype tree, fetch the reference sequence
+            tscript_init_ref(args, tr, chr);
+
+            tr->root = (hap_node_t*) calloc(1,sizeof(hap_node_t));
+            tr->nhap = args->phase==PHASE_DROP_GT ? 1 : 2*args->smpl->n;     // maximum ploidy = diploid
+            tr->hap  = (hap_node_t**) malloc(tr->nhap*sizeof(hap_node_t*));
+            for (i=0; i<tr->nhap; i++) tr->hap[i] = NULL;
+            tr->root->nend = tr->nhap;
+            tr->root->type = HAP_ROOT;
+
+            khp_insert(trhp, args->active_tr, &tr);
+        }
+
+        sanity_check_ref(args, tr, rec);
+
+        if ( args->phase==PHASE_DROP_GT )
+        {
+            if ( rec->d.allele[1][0]=='<' || rec->d.allele[1][0]=='*' ) { continue; }
+            hap_node_t *parent = tr->hap[0] ? tr->hap[0] : tr->root;
+            hap_node_t *child  = (hap_node_t*)calloc(1,sizeof(hap_node_t));
+            if ( (hap_ret=hap_init(args, parent, child, cds, rec, 1))!=0 )
+            {
+                // overlapping or intron variant, cannot apply
+                if ( hap_ret==1 )
+                {
+                    if ( !args->quiet )
+                        fprintf(stderr,"Warning: Skipping overlapping variants at %s:%d\t%s>%s\n", chr,rec->pos+1,rec->d.allele[0],rec->d.allele[1]);
+                    if ( args->out ) 
+                        fprintf(args->out,"LOG\tWarning: Skipping overlapping variants at %s:%d\t%s>%s\n", chr,rec->pos+1,rec->d.allele[0],rec->d.allele[1]);
+                }
+                else ret = 1;   // prevent reporting as intron in test_tscript
+                free(child);
+                continue;
+            }
+            parent->nend--;
+            parent->nchild = 1;
+            parent->mchild = 1;
+            parent->child  = (hap_node_t**) malloc(sizeof(hap_node_t*));
+            parent->child[0] = child;
+            tr->hap[0] = child;
+            tr->hap[0]->nend = 1;
+            continue;
+        }
+
+        // apply the VCF variants and extend the haplotype tree
+        int j, ismpl, ihap, ngts = bcf_get_genotypes(args->hdr, rec, &args->gt_arr, &args->mgt_arr);
+        ngts /= bcf_hdr_nsamples(args->hdr);
+        if ( ngts!=1 && ngts!=2 ) 
+        {
+            if ( !args->quiet )
+                fprintf(stderr,"Warning: Skipping site with non-diploid/non-haploid genotypes at %s:%d\t%s>%s\n", chr,rec->pos+1,rec->d.allele[0],rec->d.allele[1]);
+            if ( args->out ) 
+                fprintf(args->out,"LOG\tWarning: Skipping site with non-diploid/non-haploid genotypes at %s:%d\t%s>%s\n", chr,rec->pos+1,rec->d.allele[0],rec->d.allele[1]);
+            continue;
+        }
+        for (ismpl=0; ismpl<args->smpl->n; ismpl++)
+        {
+            int32_t *gt = args->gt_arr + args->smpl->idx[ismpl]*ngts;
+            if ( gt[0]==bcf_gt_missing ) continue;
+
+            if ( ngts>1 && gt[0]!=gt[1] && gt[1]!=bcf_gt_missing && gt[1]!=bcf_int32_vector_end )
+            {
+                if ( args->phase==PHASE_MERGE )
+                {
+                    if ( !bcf_gt_allele(gt[0]) ) gt[0] = gt[1];
+                }
+                if ( !bcf_gt_is_phased(gt[0]) && !bcf_gt_is_phased(gt[1]) )
+                {
+                    if ( args->phase==PHASE_REQUIRE )
+                        error("Unphased genotype at %s:%d, sample %s. See the --phase option.\n", chr,rec->pos+1,args->hdr->samples[args->smpl->idx[ismpl]]);
+                    if ( args->phase==PHASE_SKIP )
+                        continue;
+                    if ( args->phase==PHASE_NON_REF )
+                    {
+                        if ( !bcf_gt_allele(gt[0]) ) gt[0] = gt[1];
+                        else if ( !bcf_gt_allele(gt[1]) ) gt[1] = gt[0];
+                    }
+                }
+            }
+
+            for (ihap=0; ihap<ngts; ihap++)
+            {
+                if ( gt[ihap]==bcf_gt_missing || gt[ihap]==bcf_int32_vector_end ) continue;
+
+                i = 2*ismpl + ihap;
+
+                int ial = bcf_gt_allele(gt[ihap]);
+                if ( !ial ) continue;
+                assert( ial < rec->n_allele );
+                if ( rec->d.allele[ial][0]=='<' || rec->d.allele[ial][0]=='*' ) { continue; }
+
+                hap_node_t *parent = tr->hap[i] ? tr->hap[i] : tr->root;
+                if ( parent->cur_rec==rec && parent->cur_child[ial]>=0 )
+                {
+                    // this haplotype has been seen in another sample
+                    tr->hap[i] = parent->child[ parent->cur_child[ial] ];
+                    tr->hap[i]->nend++;
+                    parent->nend--;
+                    continue;
+                }
+
+                hap_node_t *child = (hap_node_t*)calloc(1,sizeof(hap_node_t));
+                if ( (hap_ret=hap_init(args, parent, child, cds, rec, ial))!=0 )
+                {
+                    // overlapping or intron variant, cannot apply
+                    if ( hap_ret==1 )
+                    {
+                        if ( !args->quiet )
+                            fprintf(stderr,"Warning: Skipping overlapping variants at %s:%d, sample %s\t%s>%s\n",
+                                    chr,rec->pos+1,args->hdr->samples[args->smpl->idx[ismpl]],rec->d.allele[0],rec->d.allele[ial]);
+                        if ( args->out  )
+                            fprintf(args->out,"LOG\tWarning: Skipping overlapping variants at %s:%d, sample %s\t%s>%s\n",
+                                    chr,rec->pos+1,args->hdr->samples[args->smpl->idx[ismpl]],rec->d.allele[0],rec->d.allele[ial]);
+                    }
+                    free(child);
+                    continue;
+                }
+
+                if ( parent->cur_rec!=rec )
+                {
+                    hts_expand(int,rec->n_allele,parent->mcur_child,parent->cur_child);
+                    for (j=0; j<rec->n_allele; j++) parent->cur_child[j] = -1;
+                    parent->cur_rec = rec;
+                }
+
+                j = parent->nchild++;
+                hts_expand0(hap_node_t*,parent->nchild,parent->mchild,parent->child);
+                parent->cur_child[ial] = j;
+                parent->child[j] = child;
+                tr->hap[i] = child;
+                tr->hap[i]->nend++;
+                parent->nend--;
+            }
+        }
+    }
+    return ret;
+}
+
+void csq_stage(args_t *args, csq_t *csq, bcf1_t *rec)
+{
+    // known issues: tab output leads to unsorted output. This is because
+    // coding haplotypes are printed in one go and buffering is not used
+    // with tab output. VCF output is OK though.
+    if ( csq_push(args, csq, rec)!=0 ) return;    // the consequence already exists
+
+    int i,j,ngt = 0;
+    if ( args->phase!=PHASE_DROP_GT )
+    {
+        ngt = bcf_get_genotypes(args->hdr, rec, &args->gt_arr, &args->mgt_arr);
+        if ( ngt>0 ) ngt /= bcf_hdr_nsamples(args->hdr);
+    }
+    if ( ngt<=0 )
+    {
+        if ( args->output_type==FT_TAB_TEXT )
+            csq_print_text(args, csq, -1,0);
+        return;
+    }
+    assert( ngt<=2 );
+
+    if ( args->output_type==FT_TAB_TEXT )
+    {
+        for (i=0; i<args->smpl->n; i++)
+        {
+            int32_t *gt = args->gt_arr + args->smpl->idx[i]*ngt;
+            for (j=0; j<ngt; j++)
+            {
+                if ( gt[j]==bcf_gt_missing || gt[j]==bcf_int32_vector_end || !bcf_gt_allele(gt[j]) ) continue;
+                csq_print_text(args, csq, args->smpl->idx[i],j+1);
+            }
+        }
+        return;
+    }
+
+    vrec_t *vrec = csq->vrec;
+    for (i=0; i<args->smpl->n; i++)
+    {
+        int32_t *gt = args->gt_arr + args->smpl->idx[i]*ngt;
+        for (j=0; j<ngt; j++)
+        {
+            if ( gt[j]==bcf_gt_missing || gt[j]==bcf_int32_vector_end || !bcf_gt_allele(gt[j]) ) continue;
+
+            int icsq = 2*csq->idx + j;
+            if ( icsq >= args->ncsq_max ) // more than ncsq_max consequences, so can't fit it in FMT
+            {
+                int ismpl = args->smpl->idx[i];
+                int print_warning = 1;
+                if ( args->quiet )
+                {
+                    if ( args->quiet > 1 || args->ncsq_small_warned ) print_warning = 0;
+                    args->ncsq_small_warned = 1;
+                }
+                if ( print_warning )
+                {
+                    fprintf(stderr,"Warning: --ncsq %d is too small to annotate %s at %s:%d with %d-th csq\n",
+                            args->ncsq_max/2,args->hdr->samples[ismpl],bcf_hdr_id2name(args->hdr,args->rid),vrec->line->pos+1,csq->idx+1);
+                    if ( args->quiet ) fprintf(stderr,"(This warning is printed only once)\n");
+                }
+                break;
+            }
+            if ( vrec->nfmt < 1 + icsq/32 ) vrec->nfmt = 1 + icsq/32;
+            vrec->smpl[i*args->nfmt_bcsq + icsq/32] |= 1 << (icsq % 32);
+        }
+    }
+}
+int test_utr(args_t *args, bcf1_t *rec)
+{
+    const char *chr = bcf_seqname(args->hdr,rec);
+    // note that the off-by-one extension of rlen is deliberate to account for insertions
+    if ( !regidx_overlap(args->idx_utr,chr,rec->pos,rec->pos+rec->rlen, args->itr) ) return 0;
+
+    splice_t splice;
+    splice_init(&splice, rec);
+
+    int i, ret = 0;
+    while ( regitr_overlap(args->itr) )
+    {
+        gf_utr_t *utr = regitr_payload(args->itr, gf_utr_t*);
+        tscript_t *tr = splice.tr = utr->tr;
+        for (i=1; i<rec->n_allele; i++)
+        {
+            if ( rec->d.allele[1][0]=='<' || rec->d.allele[1][0]=='*' ) { continue; }
+            splice.vcf.alt = rec->d.allele[i];
+            int splice_ret = splice_csq(args, &splice, utr->beg, utr->end);
+            if ( splice_ret!=SPLICE_INSIDE && splice_ret!=SPLICE_OVERLAP ) continue;
+            csq_t csq; 
+            memset(&csq, 0, sizeof(csq_t));
+            csq.pos          = rec->pos;
+            csq.type.type    = utr->which==prime5 ? CSQ_UTR5 : CSQ_UTR3;
+            csq.type.biotype = tr->type;
+            csq.type.strand  = tr->strand;
+            csq.type.trid    = tr->id;
+            csq.type.gene    = tr->gene->name;
+            csq_stage(args, &csq, rec);
+            ret = 1;
+        }
+    }
+    assert(!splice.kref.s);
+    assert(!splice.kalt.s);
+    return ret;
+}
+int test_splice(args_t *args, bcf1_t *rec)
+{
+    const char *chr = bcf_seqname(args->hdr,rec);
+    if ( !regidx_overlap(args->idx_exon,chr,rec->pos,rec->pos + rec->rlen, args->itr) ) return 0;
+
+    splice_t splice;
+    splice_init(&splice, rec);
+    splice.check_acceptor = splice.check_donor = 1;
+
+    int i, ret = 0;
+    while ( regitr_overlap(args->itr) )
+    {
+        gf_exon_t *exon = regitr_payload(args->itr, gf_exon_t*);
+        splice.tr = exon->tr;
+        if ( !splice.tr->ncds ) continue;  // not a coding transcript, no interest in splice sites
+
+        splice.check_region_beg = splice.tr->beg==exon->beg ? 0 : 1;
+        splice.check_region_end = splice.tr->end==exon->end ? 0 : 1;
+
+        for (i=1; i<rec->n_allele; i++)
+        {
+            if ( rec->d.allele[1][0]=='<' || rec->d.allele[1][0]=='*' ) { continue; }
+            splice.vcf.alt = rec->d.allele[i];
+            splice_csq(args, &splice, exon->beg, exon->end);
+            if ( splice.csq ) ret = 1;
+        }
+    }
+    free(splice.kref.s);
+    free(splice.kalt.s);
+    return ret;
+}
+int test_tscript(args_t *args, bcf1_t *rec)
+{
+    const char *chr = bcf_seqname(args->hdr,rec);
+    if ( !regidx_overlap(args->idx_tscript,chr,rec->pos,rec->pos+rec->rlen, args->itr) ) return 0;
+
+    splice_t splice;
+    splice_init(&splice, rec);
+
+    int i, ret = 0;
+    while ( regitr_overlap(args->itr) )
+    {
+        tscript_t *tr = splice.tr = regitr_payload(args->itr, tscript_t*);
+        for (i=1; i<rec->n_allele; i++)
+        {
+            if ( rec->d.allele[1][0]=='<' || rec->d.allele[1][0]=='*' ) { continue; }
+            splice.vcf.alt = rec->d.allele[i];
+            int splice_ret = splice_csq(args, &splice, tr->beg, tr->end);
+            if ( splice_ret!=SPLICE_INSIDE && splice_ret!=SPLICE_OVERLAP ) continue;    // SPLICE_OUTSIDE or SPLICE_REF
+            csq_t csq; 
+            memset(&csq, 0, sizeof(csq_t));
+            csq.pos          = rec->pos;
+            csq.type.type    = GF_is_coding(tr->type) ? CSQ_INTRON : CSQ_NON_CODING;
+            csq.type.biotype = tr->type;
+            csq.type.strand  = tr->strand;
+            csq.type.trid    = tr->id;
+            csq.type.gene    = tr->gene->name;
+            csq_stage(args, &csq, rec);
+            ret = 1;
+        }
+    }
+    assert(!splice.kref.s);
+    assert(!splice.kalt.s);
+    return ret;
+}
+
+void process(args_t *args, bcf1_t **rec_ptr)
+{
+    if ( !rec_ptr )
+    {
+        hap_flush(args, REGIDX_MAX);
+        vbuf_flush(args);
+        return;
+    }
+
+    bcf1_t *rec = *rec_ptr;
+
+    int call_csq = 1;
+    if ( !rec->n_allele ) call_csq = 0;   // no alternate allele
+    else if ( rec->n_allele==2 && (rec->d.allele[1][0]=='<' || rec->d.allele[1][0]=='*') ) call_csq = 0;     // gVCF, no alt allele
+    else if ( args->filter )
+    {
+        call_csq = filter_test(args->filter, rec, NULL);
+        if ( args->filter_logic==FLT_EXCLUDE ) call_csq = call_csq ? 0 : 1;
+    }
+    if ( !call_csq )
+    {
+        if ( !args->out_fh ) return;    // not a VCF output
+        vbuf_push(args, rec_ptr);
+        vbuf_flush(args);
+        return;
+    }
+
+    if ( args->rid != rec->rid ) 
+    {
+        hap_flush(args, REGIDX_MAX);
+        vbuf_flush(args);
+    }
+    args->rid = rec->rid;
+    vbuf_push(args, rec_ptr);
+
+    int hit = args->local_csq ? test_cds_local(args, rec) : test_cds(args, rec);
+    hit += test_utr(args, rec);
+    hit += test_splice(args, rec);
+    if ( !hit ) test_tscript(args, rec);
+
+    hap_flush(args, rec->pos-1);
+    vbuf_flush(args);
+
+    return;
+}
+
+const char *usage(void)
+{
+    return 
+        "\n"
+        "About: Haplotype-aware consequence caller.\n"
+        "Usage: bcftools csq [options] in.vcf\n"
+        "\n"
+        "Required options:\n"
+        "   -f, --fasta-ref <file>          reference file in fasta format\n"
+        "   -g, --gff-annot <file>          gff3 annotation file\n"
+        "\n"
+        "CSQ options:\n"
+        "   -c, --custom-tag <string>       use this tag instead of the default BCSQ\n"
+        "   -l, --local-csq                 localized predictions, consider only one VCF record at a time\n"
+        "   -n, --ncsq <int>                maximum number of consequences to consider per site [16]\n"
+        "   -p, --phase <a|m|r|R|s>         how to construct haplotypes and how to deal with unphased data: [r]\n"
+        "                                     a: take GTs as is, create haplotypes regardless of phase (0/1 -> 0|1)\n"
+        "                                     m: merge *all* GTs into a single haplotype (0/1 -> 1, 1/2 -> 1)\n"
+        "                                     r: require phased GTs, throw an error on unphased het GTs\n"
+        "                                     R: create non-reference haplotypes if possible (0/1 -> 1|1, 1/2 -> 1|2)\n"
+        "                                     s: skip unphased GTs\n"
+        "Options:\n"
+        "   -e, --exclude <expr>            exclude sites for which the expression is true\n"
+        "   -i, --include <expr>            select sites for which the expression is true\n"
+        "   -o, --output <file>             write output to a file [standard output]\n"
+        "   -O, --output-type <b|u|z|v|t>   b: compressed BCF, u: uncompressed BCF, z: compressed VCF\n"
+        "                                   v: uncompressed VCF, t: plain tab-delimited text output [v]\n"
+        "   -q, --quiet                     suppress warning messages. Can be given two times for even less messages\n"
+        "   -r, --regions <region>          restrict to comma-separated list of regions\n"
+        "   -R, --regions-file <file>       restrict to regions listed in a file\n"
+        "   -s, --samples <-|list>          samples to include or \"-\" to apply all variants and ignore samples\n"
+        "   -S, --samples-file <file>       samples to include\n"
+        "   -t, --targets <region>          similar to -r but streams rather than index-jumps\n"
+        "   -T, --targets-file <file>       similar to -R but streams rather than index-jumps\n"
+        "\n"
+        "Example:\n"
+        "   bcftools csq -f hs37d5.fa -g Homo_sapiens.GRCh37.82.gff3.gz in.vcf\n"
+        "\n"
+        "   # GFF3 annotation files can be downloaded from Ensembl. e.g. for human:\n"
+        "   ftp://ftp.ensembl.org/pub/current_gff3/homo_sapiens/\n"
+        "   ftp://ftp.ensembl.org/pub/grch37/release-84/gff3/homo_sapiens/\n"
+        "\n";
+}
+
+int main_csq(int argc, char *argv[])
+{
+    args_t *args = (args_t*) calloc(1,sizeof(args_t));
+    args->argc = argc; args->argv = argv;
+    args->output_type = FT_VCF;
+    args->bcsq_tag = "BCSQ";
+    args->ncsq_max = 2*16;
+
+    static struct option loptions[] =
+    {
+        {"help",0,0,'h'},
+        {"ncsq",1,0,'n'},
+        {"custom-tag",1,0,'c'},
+        {"local-csq",0,0,'l'},
+        {"gff-annot",1,0,'g'},
+        {"fasta-ref",1,0,'f'},
+        {"include",1,0,'i'},
+        {"exclude",1,0,'e'},
+        {"output",1,0,'o'},
+        {"output-type",1,NULL,'O'},
+        {"phase",1,0,'p'},
+        {"quiet",0,0,'q'},
+        {"regions",1,0,'r'},
+        {"regions-file",1,0,'R'},
+        {"samples",1,0,'s'},
+        {"samples-file",1,0,'S'},
+        {"targets",1,0,'t'},
+        {"targets-file",1,0,'T'},
+        {0,0,0,0}
+    };
+    int c, targets_is_file = 0, regions_is_file = 0; 
+    char *targets_list = NULL, *regions_list = NULL;
+    while ((c = getopt_long(argc, argv, "?hr:R:t:T:i:e:f:o:O:g:s:S:p:qc:ln:",loptions,NULL)) >= 0)
+    {
+        switch (c) 
+        {
+            case 'l': args->local_csq = 1; break;
+            case 'c': args->bcsq_tag = optarg; break;
+            case 'q': args->quiet++; break;
+            case 'p':
+                switch (optarg[0]) 
+                {
+                    case 'a': args->phase = PHASE_AS_IS; break;
+                    case 'm': args->phase = PHASE_MERGE; break;
+                    case 'r': args->phase = PHASE_REQUIRE; break;
+                    case 'R': args->phase = PHASE_NON_REF; break;
+                    case 's': args->phase = PHASE_SKIP; break;
+                    default: error("The -p code \"%s\" not recognised\n", optarg);
+                }
+                break;
+            case 'f': args->fa_fname = optarg; break;
+            case 'g': args->gff_fname = optarg; break;
+            case 'n': 
+                args->ncsq_max = 2 * atoi(optarg);
+                if ( args->ncsq_max <=0 ) error("Expected positive integer with -n, got %s\n", optarg);
+                break;
+            case 'o': args->output_fname = optarg; break;
+            case 'O':
+                      switch (optarg[0]) {
+                          case 't': args->output_type = FT_TAB_TEXT; break;
+                          case 'b': args->output_type = FT_BCF_GZ; break;
+                          case 'u': args->output_type = FT_BCF; break;
+                          case 'z': args->output_type = FT_VCF_GZ; break;
+                          case 'v': args->output_type = FT_VCF; break;
+                          default: error("The output type \"%s\" not recognised\n", optarg);
+                      }
+                      break;
+            case 'e': args->filter_str = optarg; args->filter_logic |= FLT_EXCLUDE; break;
+            case 'i': args->filter_str = optarg; args->filter_logic |= FLT_INCLUDE; break;
+            case 'r': regions_list = optarg; break;
+            case 'R': regions_list = optarg; regions_is_file = 1; break;
+            case 's': args->sample_list = optarg; break;
+            case 'S': args->sample_list = optarg; args->sample_is_file = 1; break;
+            case 't': targets_list = optarg; break;
+            case 'T': targets_list = optarg; targets_is_file = 1; break;
+            case 'h':
+            case '?': error("%s",usage());
+            default: error("The option not recognised: %s\n\n", optarg); break;
+        }
+    }
+    char *fname = NULL;
+    if ( optind==argc )
+    {
+        if ( !isatty(fileno((FILE *)stdin)) ) fname = "-";  // reading from stdin
+        else error("%s", usage());
+    }
+    else fname = argv[optind];
+    if ( argc - optind>1 ) error("%s", usage());
+    if ( !args->fa_fname ) error("Missing the --fa-ref option\n");
+    if ( !args->gff_fname ) error("Missing the --gff option\n");
+    args->sr = bcf_sr_init();
+    if ( targets_list && bcf_sr_set_targets(args->sr, targets_list, targets_is_file, 0)<0 )
+        error("Failed to read the targets: %s\n", targets_list);
+    if ( regions_list && bcf_sr_set_regions(args->sr, regions_list, regions_is_file)<0 )
+        error("Failed to read the regions: %s\n", regions_list);
+    if ( !bcf_sr_add_reader(args->sr, fname) )
+        error("Failed to open %s: %s\n", fname,bcf_sr_strerror(args->sr->errnum));
+    args->hdr = bcf_sr_get_header(args->sr,0);
+
+    init_data(args);
+    while ( bcf_sr_next_line(args->sr) )
+    {
+        process(args, &args->sr->readers[0].buffer[0]);
+    }
+    process(args,NULL);
+
+    destroy_data(args);
+    bcf_sr_destroy(args->sr);
+    free(args);
+
+    return 0;
+}
+
diff --git a/bcftools/csq.c.pysam.c b/bcftools/csq.c.pysam.c
new file mode 100644
index 0000000..b79a030
--- /dev/null
+++ b/bcftools/csq.c.pysam.c
@@ -0,0 +1,3826 @@
+#include "pysam.h"
+
+/* The MIT License
+
+   Copyright (c) 2016 Genome Research Ltd.
+
+   Author: Petr Danecek <pd3 at sanger.ac.uk>
+   
+   Permission is hereby granted, free of charge, to any person obtaining a copy
+   of this software and associated documentation files (the "Software"), to deal
+   in the Software without restriction, including without limitation the rights
+   to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+   copies of the Software, and to permit persons to whom the Software is
+   furnished to do so, subject to the following conditions:
+   
+   The above copyright notice and this permission notice shall be included in
+   all copies or substantial portions of the Software.
+   
+   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+   IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+   AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+   LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+   THE SOFTWARE.
+
+ */
+/*
+    Things that would be nice to have
+        - for stop-lost events (also in frameshifts) report the number of truncated aa's
+        - memory could be greatly reduced by indexing gff (but it is quite compact already)
+        - deletions that go beyond transcript boundaries are not checked at sequence level
+            - alloc tscript->ref in hap_finalize, introduce fa_off_beg:16,fa_off_end:16
+            - see test/csq/ENST00000573314/insertion-overlap.vcf #1476288882
+
+    Read about transcript types here
+        http://vega.sanger.ac.uk/info/about/gene_and_transcript_types.html
+        http://www.ensembl.org/info/genome/variation/predicted_data.html
+        http://www.gencodegenes.org/gencode_biotypes.html
+
+    List of supported biotypes
+        antisense
+        IG_C_gene
+        IG_D_gene
+        IG_J_gene
+        IG_LV_gene
+        IG_V_gene
+        lincRNA
+        macro_lncRNA
+        miRNA
+        misc_RNA
+        Mt_rRNA
+        Mt_tRNA
+        polymorphic_pseudogene
+        processed_transcript
+        protein_coding
+        ribozyme
+        rRNA
+        sRNA
+        scRNA
+        scaRNA
+        sense_intronic
+        sense_overlapping
+        snRNA
+        snoRNA
+        TR_C_gene
+        TR_D_gene
+        TR_J_gene
+        TR_V_gene
+
+    The gff parsing logic
+        We collect features such by combining gff lines A,B,C as follows:
+            A .. gene line with a supported biotype
+                    A.ID=~/^gene:/
+
+            B .. transcript line referencing A
+                    B.ID=~/^transcript:/ && B.Parent=~/^gene:A.ID/
+
+            C .. corresponding CDS, exon, and UTR lines:
+                    C[3] in {"CDS","exon","three_prime_UTR","five_prime_UTR"} && C.Parent=~/^transcript:B.ID/ 
+
+        For coding biotypes ("protein_coding" or "polymorphic_pseudogene") the
+        complete chain link C -> B -> A is required. For the rest, link B -> A suffices.
+        
+                
+    The supported consequence types, sorted by impact:
+        splice_acceptor_variant .. end region of an intron changed (2bp at the 3' end of an intron)
+        splice_donor_variant    .. start region of an intron changed (2bp at the 5' end of an intron)
+        stop_gained             .. DNA sequence variant resulting in a stop codon
+        frameshift_variant      .. number of inserted/deleted bases not a multiple of three, disrupted translational frame
+        stop_lost               .. elongated transcript, stop codon changed
+        start_lost              .. the first codon changed
+        inframe_altering        .. combination of indels leading to unchanged reading frame and length
+        inframe_insertion       .. inserted coding sequence, unchanged reading frame
+        inframe_deletion        .. deleted coding sequence, unchanged reading frame
+        missense_variant        .. amino acid (aa) change, unchanged length
+        splice_region_variant   .. change within 1-3 bases of the exon or 3-8 bases of the intron
+        synonymous_variant      .. DNA sequence variant resulting in no amino acid change
+        stop_retained_variant   .. different stop codon
+        non_coding_variant      .. variant in non-coding sequence, such as RNA gene
+        5_prime_UTR_variant
+        3_prime_UTR_variant
+        intron_variant          .. reported only if none of the above
+        intergenic_variant      .. reported only if none of the above
+
+
+    The annotation algorithm.
+        The algorithm checks if the variant falls in a region of a supported type. The
+        search is performed in the following order, until a match is found:
+            1. idx_cds(gf_cds_t) - lookup CDS by position, create haplotypes, call consequences
+            2. idx_utr(gf_utr_t) - check UTR hits
+            3. idx_exon(gf_exon_t) - check for splice variants
+            4. idx_tscript(tscript_t) - check for intronic variants, RNAs, etc.
+
+        These regidx indexes are created by parsing a gff3 file as follows:
+            1.  create the array "ftr" of all UTR, CDS, exons. This will be
+            processed later and pruned based on transcript types we want to keep.
+            In the same go, create the hash "id2tr" of transcripts to keep
+            (based on biotype) which maps from transcript_id to a transcript. At
+            the same time also build the hash "gid2gene" which maps from gene_id to
+            gf_gene_t pointer.
+            
+            2.  build "idx_cds", "idx_tscript", "idx_utr" and "idx_exon" indexes.
+            Use only features from "ftr" which are present in "id2tr".
+
+            3.  clean data that won't be needed anymore: ftr, id2tr, gid2gene.
+        
+    Data structures.
+        idx_cds, idx_utr, idx_exon, idx_tscript:
+            as described above, regidx structures for fast lookup of exons/transcripts
+            overlapping a region, the payload is a pointer to tscript.cds
+*/
+ 
+#include <stdio.h>
+#include <stdlib.h>
+#include <getopt.h>
+#include <math.h>
+#include <htslib/hts.h>
+#include <htslib/vcf.h>
+#include <htslib/synced_bcf_reader.h>
+#include <htslib/khash.h>
+#include <htslib/khash_str2int.h>
+#include <htslib/kseq.h>
+#include <htslib/faidx.h>
+#include <errno.h>
+#include <unistd.h>
+#include <stdint.h>
+#include <ctype.h>
+#include "bcftools.h"
+#include "filter.h"
+#include "regidx.h"
+#include "kheap.h"
+#include "smpl_ilist.h"
+#include "rbuf.h"
+
+#ifndef __FUNCTION__
+#  define __FUNCTION__ __func__
+#endif
+
+// Logic of the filters: include or exclude sites which match the filters?
+#define FLT_INCLUDE 1
+#define FLT_EXCLUDE 2
+
+// Definition of splice_region, splice_acceptor and splice_donor
+#define N_SPLICE_DONOR         2      
+#define N_SPLICE_REGION_EXON   3 
+#define N_SPLICE_REGION_INTRON 8 
+
+// Ensembl ID format, e.g. 
+//     ENST00000423372 for human .. ENST%011d
+//  ENSMUST00000120394 for mouse .. ENSMUST%011d
+char  ENSID_BUF[32], *ENSID_FMT = NULL;
+static inline char *ENSID(uint32_t id)
+{
+    sprintf(ENSID_BUF,ENSID_FMT,id);
+    return ENSID_BUF;
+}
+
+
+#define N_REF_PAD 10    // number of bases to avoid boundary effects
+
+#define STRAND_REV 0
+#define STRAND_FWD 1
+
+#define TRIM_NONE   0
+#define TRIM_5PRIME 1
+#define TRIM_3PRIME 2
+
+// How to treat phased/unphased genotypes
+#define PHASE_REQUIRE 0     // --phase r
+#define PHASE_MERGE   1     // --phase m
+#define PHASE_AS_IS   2     // --phase a
+#define PHASE_SKIP    3     // --phase s
+#define PHASE_NON_REF 4     // --phase R
+#define PHASE_DROP_GT 5     // --samples -
+
+// Node types in the haplotype tree
+#define HAP_CDS   0
+#define HAP_ROOT  1 
+#define HAP_SSS   2     // start/stop/splice
+
+#define CSQ_PRINTED_UPSTREAM    (1<<0)
+#define CSQ_SYNONYMOUS_VARIANT  (1<<1)
+#define CSQ_MISSENSE_VARIANT    (1<<2)
+#define CSQ_STOP_LOST           (1<<3)
+#define CSQ_STOP_GAINED         (1<<4)
+#define CSQ_INFRAME_DELETION    (1<<5)
+#define CSQ_INFRAME_INSERTION   (1<<6)
+#define CSQ_FRAMESHIFT_VARIANT  (1<<7)
+#define CSQ_SPLICE_ACCEPTOR     (1<<8)
+#define CSQ_SPLICE_DONOR        (1<<9)
+#define CSQ_START_LOST          (1<<10)
+#define CSQ_SPLICE_REGION       (1<<11)
+#define CSQ_STOP_RETAINED       (1<<12)
+#define CSQ_UTR5                (1<<13)
+#define CSQ_UTR3                (1<<14)
+#define CSQ_NON_CODING          (1<<15)
+#define CSQ_INTRON              (1<<16)
+//#define CSQ_INTERGENIC          (1<<17)
+#define CSQ_INFRAME_ALTERING    (1<<18)
+#define CSQ_UPSTREAM_STOP       (1<<19)     // adds * in front of the csq string
+#define CSQ_INCOMPLETE_CDS      (1<<20)     // to remove START/STOP in incomplete CDS, see ENSG00000173376/synon.vcf
+#define CSQ_CODING_SEQUENCE     (1<<21)     // cannot tell exactly what it is, but it does affect the coding sequence
+
+// Haplotype-aware consequences, printed in one vcf record only, the rest has a reference @12345
+#define CSQ_COMPOUND (CSQ_SYNONYMOUS_VARIANT|CSQ_MISSENSE_VARIANT|CSQ_STOP_LOST|CSQ_STOP_GAINED| \
+                      CSQ_INFRAME_DELETION|CSQ_INFRAME_INSERTION|CSQ_FRAMESHIFT_VARIANT| \
+                      CSQ_START_LOST|CSQ_STOP_RETAINED|CSQ_INFRAME_ALTERING|CSQ_INCOMPLETE_CDS| \
+                      CSQ_UPSTREAM_STOP)
+#define CSQ_START_STOP          (CSQ_STOP_LOST|CSQ_STOP_GAINED|CSQ_STOP_RETAINED|CSQ_START_LOST)
+
+#define CSQ_PRN_STRAND(csq)     ((csq)&CSQ_COMPOUND && !((csq)&(CSQ_SPLICE_ACCEPTOR|CSQ_SPLICE_DONOR|CSQ_SPLICE_REGION)))
+#define CSQ_PRN_TSCRIPT         (~(CSQ_INTRON|CSQ_NON_CODING))
+#define CSQ_PRN_BIOTYPE         CSQ_NON_CODING
+
+// see kput_vcsq()
+const char *csq_strings[] = 
+{
+    NULL, 
+    "synonymous", 
+    "missense", 
+    "stop_lost", 
+    "stop_gained", 
+    "inframe_deletion", 
+    "inframe_insertion", 
+    "frameshift", 
+    "splice_acceptor", 
+    "splice_donor", 
+    "start_lost", 
+    "splice_region", 
+    "stop_retained", 
+    "5_prime_utr", 
+    "3_prime_utr", 
+    "non_coding", 
+    "intron", 
+    "intergenic",
+    "inframe_altering",
+    NULL,
+    NULL,
+    "coding_sequence"
+};
+
+
+// GFF line types
+#define GFF_TSCRIPT_LINE 1
+#define GFF_GENE_LINE    2
+
+
+/* 
+    Genomic features, for fast lookup by position to overlapping features
+*/
+#define GF_coding_bit 6
+#define GF_is_coding(x) ((x) & (1<<GF_coding_bit))
+#define GF_MT_rRNA                       1                      // non-coding: 1, 2, ...
+#define GF_MT_tRNA                       2
+#define GF_lincRNA                       3
+#define GF_miRNA                         4
+#define GF_MISC_RNA                      5
+#define GF_rRNA                          6
+#define GF_snRNA                         7
+#define GF_snoRNA                        8
+#define GF_PROCESSED_TRANSCRIPT          9
+#define GF_ANTISENSE                    10
+#define GF_macro_lncRNA                 11
+#define GF_ribozyme                     12
+#define GF_sRNA                         13
+#define GF_scRNA                        14
+#define GF_scaRNA                       15
+#define GF_SENSE_INTRONIC               16
+#define GF_SENSE_OVERLAPPING            17
+#define GF_PSEUDOGENE                   18
+#define GF_PROCESSED_PSEUDOGENE         19
+#define GF_ARTIFACT                     20
+#define GF_IG_PSEUDOGENE                21
+#define GF_IG_C_PSEUDOGENE              22
+#define GF_IG_J_PSEUDOGENE              23
+#define GF_IG_V_PSEUDOGENE              24
+#define GF_TR_V_PSEUDOGENE              25
+#define GF_TR_J_PSEUDOGENE              26
+#define GF_MT_tRNA_PSEUDOGENE           27
+#define GF_misc_RNA_PSEUDOGENE          28
+#define GF_miRNA_PSEUDOGENE             29
+#define GF_RIBOZYME                     30
+#define GF_RETAINED_INTRON              31
+#define GF_RETROTRANSPOSED              32
+#define GF_tRNA_PSEUDOGENE              33
+#define GF_TRANSCRIBED_PROCESSED_PSEUDOGENE     34
+#define GF_TRANSCRIBED_UNPROCESSED_PSEUDOGENE   35
+#define GF_TRANSCRIBED_UNITARY_PSEUDOGENE       36
+#define GF_TRANSLATED_UNPROCESSED_PSEUDOGENE    37
+#define GF_TRANSLATED_PROCESSED_PSEUDOGENE      38
+#define GF_KNOWN_NCRNA                          39
+#define GF_UNITARY_PSEUDOGENE                   40
+#define GF_UNPROCESSED_PSEUDOGENE               41
+#define GF_LRG_GENE                             42
+#define GF_3PRIME_OVERLAPPING_ncRNA             43
+#define GF_DISRUPTED_DOMAIN                     44
+#define GF_vaultRNA                             45
+#define GF_BIDIRECTIONAL_PROMOTER_lncRNA        46
+#define GF_AMBIGUOUS_ORF                        47
+#define GF_PROTEIN_CODING               (1|(1<<GF_coding_bit))  // coding: 65, 66, ...
+#define GF_POLYMORPHIC_PSEUDOGENE       (2|(1<<GF_coding_bit))
+#define GF_IG_C                         (3|(1<<GF_coding_bit))
+#define GF_IG_D                         (4|(1<<GF_coding_bit))
+#define GF_IG_J                         (5|(1<<GF_coding_bit))
+#define GF_IG_LV                        (6|(1<<GF_coding_bit))
+#define GF_IG_V                         (7|(1<<GF_coding_bit))
+#define GF_TR_C                         (8|(1<<GF_coding_bit))
+#define GF_TR_D                         (9|(1<<GF_coding_bit))
+#define GF_TR_J                        (10|(1<<GF_coding_bit))
+#define GF_TR_V                        (11|(1<<GF_coding_bit))
+#define GF_NMD                         (12|(1<<GF_coding_bit))
+#define GF_NON_STOP_DECAY              (13|(1<<GF_coding_bit))
+#define GF_CDS      ((1<<(GF_coding_bit+1))+1)                  // special types: 129, 130, ...
+#define GF_EXON     ((1<<(GF_coding_bit+1))+2)
+#define GF_UTR3     ((1<<(GF_coding_bit+1))+3)
+#define GF_UTR5     ((1<<(GF_coding_bit+1))+4)
+// GF_MAX = (1<<30)-1, see hap_node_t
+
+typedef struct _tscript_t tscript_t;
+typedef struct
+{
+    tscript_t *tr;      // transcript
+    uint32_t beg;       // the start coordinate of the CDS (on the reference strand, 0-based)
+    uint32_t pos;       // 0-based index of the first exon base within the transcript (only to
+                        //  update hap_node_t.sbeg in hap_init, could be calculated on the fly)
+    uint32_t len;       // exon length
+    uint32_t icds:30,   // exon index within the transcript
+             phase:2;   // offset of the CDS
+}
+gf_cds_t;
+typedef struct
+{
+    char *name;           // human readable name, e.g. ORF45
+    uint8_t iseq;
+}
+gf_gene_t;
+typedef struct
+{
+    uint32_t beg,end;
+    tscript_t *tr;
+}
+gf_exon_t;
+typedef enum { prime3, prime5 } utr_t;
+typedef struct
+{
+    utr_t which;
+    uint32_t beg,end;
+    tscript_t *tr;
+}
+gf_utr_t;
+
+
+/*
+    Structures related to VCF output:
+
+    vcsq_t
+        information required to assemble consequence lines such as "inframe_deletion|XYZ|ENST01|+|5TY>5I|121ACG>A+124TA>T"
+
+    vcrec_t 
+        single VCF record and csq tied to this record. (Haplotype can have multiple
+        consequences in several VCF records. Each record can have multiple consequences
+        from multiple haplotypes.)
+
+    csq_t
+        a top-level consequence tied to a haplotype
+
+    vbuf_t
+    pos2vbuf
+        VCF records with the same position clustered together for a fast lookup via pos2vbuf
+*/
+typedef struct _vbuf_t vbuf_t;
+typedef struct _vcsq_t vcsq_t;
+struct _vcsq_t
+{
+    uint32_t strand:1,
+             type:31;   // one of CSQ_* types
+    uint32_t trid;
+    uint32_t biotype;   // one of GF_* types
+    char *gene;         // gene name
+    bcf1_t *ref;        // if type&CSQ_PRINTED_UPSTREAM, ref consequence "@1234"
+    kstring_t vstr;     // variant string, eg 5TY>5I|121ACG>A+124TA>T
+};
+typedef struct
+{
+    bcf1_t *line;
+    uint32_t *smpl;     // bitmask of sample consequences with first/second haplotype interleaved
+    uint32_t nfmt:4, nvcsq:28, mvcsq;
+    vcsq_t *vcsq;       // there can be multiple consequences for a single VCF record
+}
+vrec_t;
+typedef struct
+{
+    uint32_t pos;
+    vrec_t *vrec;   // vcf line that this csq is tied to; needed when printing haplotypes (hap_stage_vcf)
+    int idx;        // 0-based index of the csq at the VCF line, for FMT/BCSQ
+    vcsq_t type;
+}
+csq_t;
+struct _vbuf_t
+{
+    vrec_t **vrec;   // buffer of VCF lines with the same position
+    int n, m;
+};
+KHASH_MAP_INIT_INT(pos2vbuf, vbuf_t*)
+
+
+/*
+    Structures related to haplotype-aware consequences in coding regions
+
+    hap_node_t
+        node of a haplotype tree. Each transcript has one tree
+
+    tscript_t
+        despite its general name, it is intended for coding transcripts only
+
+    hap_t
+    hstack_t
+        for traversal of the haplotype tree and braking combined
+        consequences into independent parts
+*/
+typedef struct _hap_node_t hap_node_t;
+struct _hap_node_t
+{
+    char *seq;          // cds segment [parent_node,this_node)
+    char *var;          // variant "ref>alt"
+    uint32_t type:2,    // HAP_ROOT or HAP_CDS
+             csq:30;    // this node's consequence
+    int dlen;           // alt minus ref length: <0 del, >0 ins, 0 substitution
+    uint32_t rbeg;      // variant's VCF position (0-based, inclusive)
+    int32_t rlen;       // variant's rlen; alen=rlen+dlen; fake for non CDS types
+    uint32_t sbeg;      // variant's position on the spliced reference transcript (0-based, inclusive, N_REF_PAD not included)
+    uint32_t icds;      // which exon does this node's variant overlaps
+    hap_node_t **child, *prev;  // children haplotypes and previous coding node
+    int nchild, mchild;
+    bcf1_t *cur_rec, *rec;      // current VCF record and node's VCF record
+    uint32_t nend;              // number of haplotypes ending in this node
+    int *cur_child, mcur_child; // mapping from the allele to the currently active child
+    csq_t *csq_list;            // list of haplotype's consequences, broken by position
+    int ncsq_list, mcsq_list;
+};
+struct _tscript_t
+{
+    uint32_t id;        // transcript id
+    uint32_t beg,end;   // transcript's beg and end coordinate (ref strand, 0-based, inclusive)
+    uint32_t strand:1,  // STRAND_REV or STRAND_FWD
+             ncds:31,   // number of exons
+             mcds;
+    gf_cds_t **cds;     // ordered list of exons
+    char *ref;          // reference sequence, padded with N_REF_PAD bases on both ends
+    char *sref;         // spliced reference sequence, padded with N_REF_PAD bases on both ends
+    hap_node_t *root;   // root of the haplotype tree
+    hap_node_t **hap;   // pointer to haplotype leaves, two for each sample
+    int nhap, nsref;    // number of haplotypes and length of sref, including 2*N_REF_PAD
+    uint32_t trim:2,    // complete, 5' or 3' trimmed, see TRIM_* types
+             type:30;   // one of GF_* types
+    gf_gene_t *gene;
+};
+static inline int cmp_tscript(tscript_t **a, tscript_t **b)
+{
+    return ( (*a)->end  < (*b)->end ) ? 1 : 0;
+}
+KHEAP_INIT(trhp, tscript_t*, cmp_tscript)
+typedef khp_trhp_t tr_heap_t;
+typedef struct
+{
+    hap_node_t *node;   // current node
+    int ichild;         // current child in the active node
+    int dlen;           // total dlen, from the root to the active node
+    size_t slen;        // total sequence length, from the root to the active node
+}
+hstack_t;
+typedef struct
+{
+    int mstack;
+    hstack_t *stack;
+    tscript_t *tr;      // tr->ref: spliced transcript on ref strand
+    kstring_t sseq;     // spliced haplotype sequence on ref strand
+    kstring_t tseq;     // the variable part of translated haplotype transcript, coding strand
+    kstring_t tref;     // the variable part of translated reference transcript, coding strand
+    uint32_t sbeg;      // stack's sbeg, for cases first node's type is HAP_SSS
+    int upstream_stop;
+}
+hap_t;
+
+
+/*
+    Helper structures, only for initialization
+    
+    ftr_t
+        temporary list of all exons, CDS, UTRs 
+*/
+KHASH_MAP_INIT_INT(int2tscript, tscript_t*)
+KHASH_MAP_INIT_INT(int2int, int)
+KHASH_MAP_INIT_INT(int2gene, gf_gene_t*)
+typedef struct
+{
+    int type;       // GF_CDS, GF_EXON, GF_5UTR, GF_3UTR
+    uint32_t beg;
+    uint32_t end;
+    uint32_t trid;
+    uint32_t strand:1;   // STRAND_REV,STRAND_FWD
+    uint32_t phase:2;    // 0, 1 or 2
+    uint32_t iseq:29;
+}
+ftr_t;
+typedef struct
+{
+    // all exons, CDS, UTRs
+    ftr_t *ftr;
+    int nftr, mftr;
+
+    // mapping from transcript ensembl id to gene id
+    kh_int2gene_t *gid2gene;
+
+    // mapping from transcript id to tscript, for quick CDS anchoring
+    kh_int2tscript_t *id2tr;
+
+    // sequences
+    void *seq2int;
+    char **seq;
+    int nseq, mseq;
+
+    // ignored biotypes
+    void *ignored_biotypes;
+}
+aux_t;
+
+typedef struct _args_t
+{
+    // the main regidx lookups, from chr:beg-end to overlapping features and
+    // index iterator
+    regidx_t *idx_cds, *idx_utr, *idx_exon, *idx_tscript;
+    regitr_t *itr;
+
+    // temporary structures, deleted after initializtion
+    aux_t init;
+
+    // text tab-delimited output (out) or vcf/bcf output (out_fh)
+    FILE *out;
+    htsFile *out_fh;
+
+    // vcf
+    bcf_srs_t *sr;
+    bcf_hdr_t *hdr;
+    int hdr_nsmpl;          // actual number of samples in the vcf, for bcf_update_format_values()
+
+    // include or exclude sites which match the filters
+    filter_t *filter;
+    char *filter_str;
+    int filter_logic;       // FLT_INCLUDE or FLT_EXCLUDE
+
+    // samples to process
+    int sample_is_file;
+    char *sample_list;
+    smpl_ilist_t *smpl;
+
+    char *outdir, **argv, *fa_fname, *gff_fname, *output_fname;
+    char *bcsq_tag;
+    int argc, output_type;
+    int phase, quiet, local_csq;
+    int ncsq_max, nfmt_bcsq;    // maximum number of csq per site that can be accessed from FORMAT/BCSQ
+    int ncsq_small_warned;
+    
+    int rid;                    // current chromosome
+    tr_heap_t *active_tr;       // heap of active transcripts for quick flushing
+    hap_t *hap;                 // transcript haplotype recursion
+    vbuf_t **vcf_buf;           // buffered VCF lines to annotate with CSQ and flush
+    rbuf_t vcf_rbuf;            // round buffer indexes to vcf_buf
+    kh_pos2vbuf_t *pos2vbuf;    // fast lookup of buffered lines by position
+    tscript_t **rm_tr;          // buffer of transcripts to clean
+    int nrm_tr, mrm_tr;
+    csq_t *csq_buf;             // pool of csq not managed by hap_node_t, i.e. non-CDS csqs
+    int ncsq_buf, mcsq_buf;
+
+    faidx_t *fai;
+    kstring_t str, str2;
+    int32_t *gt_arr, mgt_arr;
+}
+args_t;
+
+// AAA, AAC, ...
+const char *gencode = "KNKNTTTTRSRSIIMIQHQHPPPPRRRRLLLLEDEDAAAAGGGGVVVV*Y*YSSSS*CWCLFLF";
+const uint8_t nt4[] =
+{
+    4,4,4,4, 4,4,4,4, 4,4,4,4, 4,4,4,4,
+    4,4,4,4, 4,4,4,4, 4,4,4,4, 4,4,4,4,
+    4,4,4,4, 4,4,4,4, 4,4,4,4, 4,4,4,4,
+    4,4,4,4, 4,4,4,4, 4,4,4,4, 4,4,4,4,
+    4,0,4,1, 4,4,4,2, 4,4,4,4, 4,4,4,4,
+    4,4,4,4, 3,4,4,4, 4,4,4,4, 4,4,4,4,
+    4,0,4,1, 4,4,4,2, 4,4,4,4, 4,4,4,4,
+    4,4,4,4, 3
+};
+const uint8_t cnt4[] =
+{
+    4,4,4,4, 4,4,4,4, 4,4,4,4, 4,4,4,4,
+    4,4,4,4, 4,4,4,4, 4,4,4,4, 4,4,4,4,
+    4,4,4,4, 4,4,4,4, 4,4,4,4, 4,4,4,4,
+    4,4,4,4, 4,4,4,4, 4,4,4,4, 4,4,4,4,
+    4,3,4,2, 4,4,4,1, 4,4,4,4, 4,4,4,4,
+    4,4,4,4, 0,4,4,4, 4,4,4,4, 4,4,4,4,
+    4,3,4,2, 4,4,4,1, 4,4,4,4, 4,4,4,4,
+    4,4,4,4, 0
+};
+#define dna2aa(x)  gencode[  nt4[(uint8_t)(x)[0]]<<4 |  nt4[(uint8_t)(x)[1]]<<2 |  nt4[(uint8_t)(x)[2]] ]
+#define cdna2aa(x) gencode[ cnt4[(uint8_t)(x)[2]]<<4 | cnt4[(uint8_t)(x)[1]]<<2 | cnt4[(uint8_t)(x)[0]] ]
+
+static const char *gf_strings_noncoding[] = 
+{ 
+    "MT_rRNA", "MT_tRNA", "lincRNA", "miRNA", "misc_RNA", "rRNA", "snRNA", "snoRNA", "processed_transcript",
+    "antisense", "macro_lncRNA", "ribozyme", "sRNA", "scRNA", "scaRNA", "sense_intronic", "sense_overlapping",
+    "pseudogene", "processed_pseudogene", "artifact", "IG_pseudogene", "IG_C_pseudogene", "IG_J_pseudogene", 
+    "IG_V_pseudogene", "TR_V_pseudogene", "TR_J_pseudogene", "MT_tRNA_pseudogene", "misc_RNA_pseudogene", 
+    "miRNA_pseudogene", "ribozyme", "retained_intron", "retrotransposed", "Trna_pseudogene", "transcribed_processed_pseudogene", 
+    "transcribed_unprocessed_pseudogene", "transcribed_unitary_pseudogene",    "translated_unprocessed_pseudogene",
+    "translated_processed_pseudogene", "known_ncRNA", "unitary_pseudogene", "unprocessed_pseudogene",
+    "LRG_gene", "3_prime_overlapping_ncRNA", "disrupted_domain", "vaultRNA", "bidirectional_promoter_lncRNA", "ambiguous_orf"
+};
+static const char *gf_strings_coding[] = { "protein_coding", "polymorphic_pseudogene", "IG_C", "IG_D", "IG_J", "IG_LV", "IG_V", "TR_C", "TR_D", "TR_J", "TR_V", "NMD", "non_stop_decay"};
+static const char *gf_strings_special[] = { "CDS", "exon", "3_prime_UTR", "5_prime_UTR" };
+
+const char *gf_type2gff_string(int type)
+{
+    if ( !GF_is_coding(type) )
+    {
+        if ( type < (1<<GF_coding_bit) ) return gf_strings_noncoding[type-1];
+        type &= (1<<(GF_coding_bit+1)) - 1;
+        return gf_strings_special[type - 1];
+    }
+    type &= (1<<GF_coding_bit) - 1;
+    return gf_strings_coding[type - 1];
+}
+
+/*
+    gff parsing functions
+*/
+static inline int feature_set_seq(args_t *args, char *chr_beg, char *chr_end)
+{
+    aux_t *aux = &args->init;
+    char c = chr_end[1];
+    chr_end[1] = 0;
+    int iseq;
+    if ( khash_str2int_get(aux->seq2int, chr_beg, &iseq)!=0 )
+    {
+        hts_expand(char*, aux->nseq+1, aux->mseq, aux->seq);
+        aux->seq[aux->nseq] = strdup(chr_beg);
+        iseq = khash_str2int_inc(aux->seq2int, aux->seq[aux->nseq]);
+        aux->nseq++;
+        assert( aux->nseq < 256 );  // see gf_gene_t.iseq
+    }
+    chr_end[1] = c;
+    return iseq;
+}
+static inline char *gff_skip(const char *line, char *ss)
+{
+    while ( *ss && *ss!='\t' ) ss++;
+    if ( !*ss ) error("[%s:%d %s] Could not parse the line: %s\n",__FILE__,__LINE__,__FUNCTION__,line);
+    return ss+1;
+}
+static inline void gff_parse_chr(const char *line, char **chr_beg, char **chr_end)
+{
+    char *se = (char*) line;
+    while ( *se && *se!='\t' ) se++;
+    if ( !*se ) error("[%s:%d %s] Could not parse the line: %s\n",__FILE__,__LINE__,__FUNCTION__,line);
+    *chr_beg = (char*) line;
+    *chr_end = se-1;
+}
+static inline char *gff_parse_beg_end(const char *line, char *ss, uint32_t *beg, uint32_t *end)
+{
+    char *se = ss;
+    *beg = strtol(ss, &se, 10) - 1;
+    if ( ss==se ) error("[%s:%d %s] Could not parse the line:\n\t%s\n\t%s\n",__FILE__,__LINE__,__FUNCTION__,line,ss);
+    ss = se+1;
+    *end = strtol(ss, &se, 10) - 1;
+    if ( ss==se ) error("[%s:%d %s] Could not parse the line: %s\n",__FILE__,__LINE__,__FUNCTION__,line);
+    return se+1;
+}
+static inline uint32_t gff_parse_id(const char *line, const char *needle, char *ss)
+{
+    ss = strstr(ss,needle);
+    if ( !ss ) error("[%s:%d %s] Could not parse the line, \"%s\" not present: %s\n",__FILE__,__LINE__,__FUNCTION__,needle,line);
+    ss += strlen(needle);
+    while ( *ss && !isdigit(*ss) ) ss++;
+    if ( !ss ) error("[%s:%d %s] Could not parse the line: %s\n",__FILE__,__LINE__,__FUNCTION__, line);
+    char *se;
+    uint32_t id = strtol(ss, &se, 10);
+    if ( ss==se ) error("[%s:%d %s] Could not parse the line: %s\n",__FILE__,__LINE__,__FUNCTION__, line);
+    if ( *se && *se!=';' && *se!='\t' ) error("[%s:%d %s] Could not parse the line: %s\n",__FILE__,__LINE__,__FUNCTION__,line);
+    assert( id <= 0xffffff );   // see gf_gene_t.id. Ensembl IDs are never that big in practice
+    return id;
+}
+static void gff_parse_ensid_fmt(const char *line, const char *needle, char *ss)
+{
+    ss = strstr(ss,needle);
+    if ( !ss ) error("[%s:%d %s] Could not parse the line, \"%s\" not present: %s\n",__FILE__,__LINE__,__FUNCTION__,needle,line);
+    ss += strlen(needle);
+    char *se = ss;
+    while ( *se && !isdigit(*se) ) se++;
+    kstring_t str = {0,0,0};
+    kputsn(ss,se-ss,&str);
+    ss = se;
+    while ( *se && isdigit(*se) ) se++;
+    ksprintf(&str,"%%0%dd",(int)(se-ss));
+    ENSID_FMT = str.s;
+}
+static inline int gff_parse_type(char *line)
+{
+    line = strstr(line,"ID=");
+    if ( !line ) return -1;
+    line += 3;
+    if ( !strncmp(line,"transcript:",11) ) return GFF_TSCRIPT_LINE;
+    else if ( !strncmp(line,"gene:",5) ) return GFF_GENE_LINE;
+    return -1;
+}
+static inline int gff_parse_biotype(char *_line)
+{
+    char *line = strstr(_line,"biotype=");
+    if ( !line ) return -1;
+
+    line += 8;
+    switch (*line)
+    {
+        case 'p': 
+            if ( !strncmp(line,"protein_coding",14) ) return GF_PROTEIN_CODING;
+            else if ( !strncmp(line,"pseudogene",10) ) return GF_PSEUDOGENE;
+            else if ( !strncmp(line,"processed_transcript",20) ) return GF_PROCESSED_TRANSCRIPT;
+            else if ( !strncmp(line,"processed_pseudogene",20) ) return GF_PROCESSED_PSEUDOGENE;
+            else if ( !strncmp(line,"polymorphic_pseudogene",22) ) return GF_POLYMORPHIC_PSEUDOGENE;
+            break;
+        case 'a':
+            if ( !strncmp(line,"artifact",8) ) return GF_ARTIFACT;
+            else if ( !strncmp(line,"antisense",9) ) return GF_ANTISENSE;
+            else if ( !strncmp(line,"ambiguous_orf",13) ) return GF_AMBIGUOUS_ORF;
+            break;
+        case 'I':
+            if ( !strncmp(line,"IG_C_gene",9) ) return GF_IG_C;
+            else if ( !strncmp(line,"IG_D_gene",9) ) return GF_IG_D;
+            else if ( !strncmp(line,"IG_J_gene",9) ) return GF_IG_J;
+            else if ( !strncmp(line,"IG_LV_gene",10) ) return GF_IG_LV;
+            else if ( !strncmp(line,"IG_V_gene",9) ) return GF_IG_V;
+            else if ( !strncmp(line,"IG_pseudogene",13) ) return GF_IG_PSEUDOGENE;
+            else if ( !strncmp(line,"IG_C_pseudogene",15) ) return GF_IG_C_PSEUDOGENE;
+            else if ( !strncmp(line,"IG_J_pseudogene",15) ) return GF_IG_J_PSEUDOGENE;
+            else if ( !strncmp(line,"IG_V_pseudogene",15) ) return GF_IG_V_PSEUDOGENE;
+            break;
+        case 'T':
+            if ( !strncmp(line,"TR_C_gene",9) ) return GF_TR_C;
+            else if ( !strncmp(line,"TR_D_gene",9) ) return GF_TR_D;
+            else if ( !strncmp(line,"TR_J_gene",9) ) return GF_TR_J;
+            else if ( !strncmp(line,"TR_V_gene",9) ) return GF_TR_V;
+            else if ( !strncmp(line,"TR_V_pseudogene",15) ) return GF_TR_V_PSEUDOGENE;
+            else if ( !strncmp(line,"TR_J_pseudogene",15) ) return GF_TR_J_PSEUDOGENE;
+            break;
+        case 'M':
+            if ( !strncmp(line,"Mt_tRNA_pseudogene",18) ) return GF_MT_tRNA_PSEUDOGENE;
+            else if ( !strncmp(line,"Mt_tRNA",7) ) return GF_MT_tRNA;
+            else if ( !strncmp(line,"Mt_rRNA",7) ) return GF_MT_tRNA;
+            break;
+        case 'l':
+            if ( !strncmp(line,"lincRNA",7) ) return GF_lincRNA;
+            break;
+        case 'm':
+            if ( !strncmp(line,"macro_lncRNA",12) ) return GF_macro_lncRNA;
+            else if ( !strncmp(line,"misc_RNA_pseudogene",19) ) return GF_misc_RNA_PSEUDOGENE;
+            else if ( !strncmp(line,"miRNA_pseudogene",16) ) return GF_miRNA_PSEUDOGENE;
+            else if ( !strncmp(line,"miRNA",5) ) return GF_miRNA;
+            else if ( !strncmp(line,"misc_RNA",8) ) return GF_MISC_RNA;
+            break;
+        case 'r':
+            if ( !strncmp(line,"rRNA",4) ) return GF_rRNA;
+            else if ( !strncmp(line,"ribozyme",8) ) return GF_RIBOZYME;
+            else if ( !strncmp(line,"retained_intron",15) ) return GF_RETAINED_INTRON;
+            else if ( !strncmp(line,"retrotransposed",15) ) return GF_RETROTRANSPOSED;
+            break;
+        case 's':
+            if ( !strncmp(line,"snRNA",5) ) return GF_snRNA;
+            else if ( !strncmp(line,"sRNA",4) ) return GF_sRNA;
+            else if ( !strncmp(line,"scRNA",5) ) return GF_scRNA;
+            else if ( !strncmp(line,"scaRNA",6) ) return GF_scaRNA;
+            else if ( !strncmp(line,"snoRNA",6) ) return GF_snoRNA;
+            else if ( !strncmp(line,"sense_intronic",14) ) return GF_SENSE_INTRONIC;
+            else if ( !strncmp(line,"sense_overlapping",17) ) return GF_SENSE_OVERLAPPING;
+            break;
+        case 't':
+            if ( !strncmp(line,"tRNA_pseudogene",15) ) return GF_tRNA_PSEUDOGENE;
+            else if ( !strncmp(line,"transcribed_processed_pseudogene",32) ) return GF_TRANSCRIBED_PROCESSED_PSEUDOGENE;
+            else if ( !strncmp(line,"transcribed_unprocessed_pseudogene",34) ) return GF_TRANSCRIBED_UNPROCESSED_PSEUDOGENE; 
+            else if ( !strncmp(line,"transcribed_unitary_pseudogene",30) ) return GF_TRANSCRIBED_UNITARY_PSEUDOGENE;
+            else if ( !strncmp(line,"translated_unprocessed_pseudogene",33) ) return GF_TRANSLATED_UNPROCESSED_PSEUDOGENE;
+            else if ( !strncmp(line,"translated_processed_pseudogene",31) ) return GF_TRANSLATED_PROCESSED_PSEUDOGENE;
+            break;
+        case 'n':
+            if ( !strncmp(line,"nonsense_mediated_decay",23) ) return GF_NMD;
+            else if ( !strncmp(line,"non_stop_decay",14) ) return GF_NON_STOP_DECAY;
+            break;
+        case 'k':
+            if ( !strncmp(line,"known_ncrna",11) ) return GF_KNOWN_NCRNA;
+            break;
+        case 'u':
+            if ( !strncmp(line,"unitary_pseudogene",18) ) return GF_UNITARY_PSEUDOGENE;
+            else if ( !strncmp(line,"unprocessed_pseudogene",22) ) return GF_UNPROCESSED_PSEUDOGENE;
+            break;
+        case 'L':
+            if ( !strncmp(line,"LRG_gene",8) ) return GF_LRG_GENE;
+            break;
+        case '3':
+            if ( !strncmp(line,"3prime_overlapping_ncRNA",24) ) return GF_3PRIME_OVERLAPPING_ncRNA;
+            break;
+        case 'd':
+            if ( !strncmp(line,"disrupted_domain",16) ) return GF_DISRUPTED_DOMAIN;
+            break;
+        case 'v':
+            if ( !strncmp(line,"vaultRNA",8) ) return GF_vaultRNA;
+            break;
+        case 'b':
+            if ( !strncmp(line,"bidirectional_promoter_lncRNA",29) ) return GF_BIDIRECTIONAL_PROMOTER_lncRNA;
+            break;
+    }
+    return 0;
+}
+static inline int gff_ignored_biotype(args_t *args, char *ss)
+{
+    ss = strstr(ss,"biotype=");
+    if ( !ss ) return 0;
+
+    ss += 8;
+    char *se = ss, tmp;
+    while ( *se && *se!=';' ) se++;
+    tmp = *se;
+    *se = 0;
+
+    char *key = ss;
+    int n = 0;
+    if ( khash_str2int_get(args->init.ignored_biotypes, ss, &n)!=0 ) key = strdup(ss);
+    khash_str2int_set(args->init.ignored_biotypes, key, n+1);
+
+    *se = tmp;
+    return 1;
+}
+gf_gene_t *gene_init(aux_t *aux, uint32_t gene_id)
+{
+    khint_t k = kh_get(int2gene, aux->gid2gene, (int)gene_id);
+    gf_gene_t *gene = (k == kh_end(aux->gid2gene)) ? NULL : kh_val(aux->gid2gene, k);
+    if ( !gene )
+    {
+        gene = (gf_gene_t*) calloc(1,sizeof(gf_gene_t));
+        int ret;
+        k = kh_put(int2gene, aux->gid2gene, (int)gene_id, &ret);
+        kh_val(aux->gid2gene,k) = gene;
+    }
+    return gene;
+}
+void gff_parse_transcript(args_t *args, const char *line, char *ss, ftr_t *ftr)
+{
+    aux_t *aux = &args->init;
+    int biotype = gff_parse_biotype(ss);
+    if ( biotype <= 0 )
+    {
+        if ( !gff_ignored_biotype(args, ss) && args->quiet<2 ) fprintf(pysam_stderr,"ignored transcript: %s\n",line);
+        return;
+    }
+
+    // create a mapping from transcript_id to gene_id
+    uint32_t trid = gff_parse_id(line, "ID=transcript:", ss);
+    uint32_t gene_id = gff_parse_id(line, "Parent=gene:", ss);
+
+    if ( !ENSID_FMT ) gff_parse_ensid_fmt(line, "ID=transcript:", ss);      // id prefix different across species
+
+    tscript_t *tr = (tscript_t*) calloc(1,sizeof(tscript_t));
+    tr->id     = trid;
+    tr->strand = ftr->strand;
+    tr->gene   = gene_init(aux, gene_id);
+    tr->type   = biotype;
+    tr->beg    = ftr->beg;
+    tr->end    = ftr->end;
+
+    khint_t k;
+    int ret;
+    k = kh_put(int2tscript, aux->id2tr, (int)trid, &ret);
+    kh_val(aux->id2tr,k) = tr;
+}
+void gff_parse_gene(args_t *args, const char *line, char *ss, char *chr_beg, char *chr_end, ftr_t *ftr)
+{
+    int biotype = gff_parse_biotype(ss);
+    if ( biotype <= 0 )
+    {
+        if ( !gff_ignored_biotype(args, ss) && args->quiet<2 ) fprintf(pysam_stderr,"ignored gene: %s\n",line);
+        return;
+    }
+
+    aux_t *aux = &args->init;
+
+    // substring search for "ID=gene:ENSG00000437963"
+    uint32_t gene_id = gff_parse_id(line, "ID=gene:", ss);
+    gf_gene_t *gene = gene_init(aux, gene_id);
+    assert( !gene->name );      // the gene_id should be unique
+
+    gene->iseq = feature_set_seq(args, chr_beg,chr_end);
+
+    // substring search for "Name=OR4F5"
+    ss = strstr(chr_end+2,"Name=");
+    if ( !ss ) error("Could not parse the line, \"Name=\" not present: %s\n", line);
+    ss += 5;
+    char *se = ss;
+    while ( *se && *se!=';' && !isspace(*se) ) se++;
+    gene->name = (char*) malloc(se-ss+1);
+    memcpy(gene->name,ss,se-ss);
+    gene->name[se-ss] = 0;
+}
+int gff_parse(args_t *args, char *line, ftr_t *ftr)
+{
+    // - skip empty lines and commented lines
+    // - columns 
+    //      1.      chr
+    //      2.      <skip>
+    //      3.      CDS, transcript, gene, ...
+    //      4-5.    beg,end
+    //      6.      <skip>
+    //      7.      strand
+    //      8.      phase
+    //      9.      Parent=transcript:ENST(\d+);ID=... etc
+
+    char *ss = line;
+    if ( !*ss ) return -1;      // skip blank lines
+    if ( *ss=='#' ) return -1;  // skip comments
+
+    char *chr_beg, *chr_end;
+    gff_parse_chr(line, &chr_beg, &chr_end);
+    ss = gff_skip(line, chr_end + 2);
+
+    // 3. column: is this a CDS, transcript, gene, etc.
+    if ( !strncmp("exon\t",ss,5) ) { ftr->type = GF_EXON; ss += 5; }
+    else if ( !strncmp("CDS\t",ss,4) ) { ftr->type = GF_CDS; ss += 4; }
+    else if ( !strncmp("three_prime_UTR\t",ss,16) ) { ftr->type = GF_UTR3; ss += 16; }
+    else if ( !strncmp("five_prime_UTR\t",ss,15) ) { ftr->type = GF_UTR5; ss += 15; }
+    else
+    {
+        ss = gff_skip(line, ss);
+        ss = gff_parse_beg_end(line, ss, &ftr->beg,&ftr->end);
+        ss = gff_skip(line, ss);
+        int type = gff_parse_type(ss);
+        if ( type!=GFF_TSCRIPT_LINE && type!=GFF_GENE_LINE ) 
+        {
+            // we ignore these, debug print to see new types:
+            ss = strstr(ss,"ID=");
+            if ( !ss ) return -1;   // no ID, ignore the line
+            if ( !strncmp("chromosome",ss+3,10) ) return -1;
+            if ( !strncmp("supercontig",ss+3,11) ) return -1;
+            if ( args->quiet<2 ) fprintf(pysam_stderr,"ignored: %s\n", line);
+            return -1;
+        }
+
+        // 7. column: strand
+        if ( *ss == '+' ) ftr->strand = STRAND_FWD;
+        else if ( *ss == '-' ) ftr->strand = STRAND_REV;
+        else error("Unknown strand: %c .. %s\n", *ss,ss);
+
+        if ( type==GFF_TSCRIPT_LINE )
+            gff_parse_transcript(args, line, ss, ftr);
+        else
+            gff_parse_gene(args, line, ss, chr_beg, chr_end, ftr);
+
+        return -1;
+    }
+    ss = gff_parse_beg_end(line, ss, &ftr->beg,&ftr->end);
+    ss = gff_skip(line, ss);
+
+    // 7. column: strand
+    if ( *ss == '+' ) ftr->strand = STRAND_FWD;
+    else if ( *ss == '-' ) ftr->strand = STRAND_REV;
+    else { if ( args->quiet<2 ) fprintf(pysam_stderr,"Skipping unknown strand: %c\n", *ss); return -1; }
+    ss += 2;
+
+    // 8. column: phase (codon offset)
+    if ( *ss == '0' ) ftr->phase = 0;
+    else if ( *ss == '1' ) ftr->phase = 1;
+    else if ( *ss == '2' ) ftr->phase = 2;
+    else if ( *ss == '.' ) ftr->phase = 0;      // exons do not have phase
+    else { if ( args->quiet<2 ) fprintf(pysam_stderr,"Skipping unknown phase: %c, %s\n", *ss, line); return -1; }
+    ss += 2;
+
+    // substring search for "Parent=transcript:ENST00000437963"
+    ftr->trid = gff_parse_id(line, "Parent=transcript:", ss);
+    ftr->iseq = feature_set_seq(args, chr_beg,chr_end);
+    return 0;
+}
+
+static int cmp_cds_ptr(const void *a, const void *b)
+{
+    // comparison function for qsort of transcripts's CDS
+    if ( (*((gf_cds_t**)a))->beg < (*((gf_cds_t**)b))->beg ) return -1;
+    if ( (*((gf_cds_t**)a))->beg > (*((gf_cds_t**)b))->beg ) return 1;
+    return 0;
+}
+
+static inline void chr_beg_end(aux_t *aux, int iseq, char **chr_beg, char **chr_end)
+{
+    *chr_beg = *chr_end = aux->seq[iseq];
+    while ( (*chr_end)[1] ) (*chr_end)++;
+}
+tscript_t *tscript_init(aux_t *aux, uint32_t trid)
+{
+    khint_t k = kh_get(int2tscript, aux->id2tr, (int)trid);
+    tscript_t *tr = (k == kh_end(aux->id2tr)) ? NULL : kh_val(aux->id2tr, k);
+    assert( tr );
+    return tr;
+}
+void register_cds(args_t *args, ftr_t *ftr)
+{
+    // Make the CDS searchable via idx_cds. Note we do not malloc tr->cds just yet.
+    //  ftr is the result of parsing a gff CDS line
+    aux_t *aux = &args->init;
+
+    tscript_t *tr = tscript_init(aux, ftr->trid);
+    if ( tr->strand != ftr->strand ) error("Conflicting strand in transcript %"PRIu32" .. %d vs %d\n",ftr->trid,tr->strand,ftr->strand);
+    
+    gf_cds_t *cds = (gf_cds_t*) malloc(sizeof(gf_cds_t));
+    cds->tr    = tr;
+    cds->beg   = ftr->beg;
+    cds->len   = ftr->end - ftr->beg + 1;
+    cds->icds  = 0;     // to keep valgrind on mac happy
+    cds->phase = ftr->phase;
+    
+    hts_expand(gf_cds_t*,tr->ncds+1,tr->mcds,tr->cds);
+    tr->cds[tr->ncds++] = cds;
+}
+void register_utr(args_t *args, ftr_t *ftr)
+{
+    aux_t *aux = &args->init;
+    gf_utr_t *utr = (gf_utr_t*) malloc(sizeof(gf_utr_t));
+    utr->which = ftr->type==GF_UTR3 ? prime3 : prime5;
+    utr->beg   = ftr->beg;
+    utr->end   = ftr->end;
+    utr->tr    = tscript_init(aux, ftr->trid);
+
+    char *chr_beg, *chr_end;
+    chr_beg_end(&args->init, utr->tr->gene->iseq, &chr_beg, &chr_end);
+    regidx_push(args->idx_utr, chr_beg,chr_end, utr->beg,utr->end, &utr);
+}
+void register_exon(args_t *args, ftr_t *ftr)
+{
+    aux_t *aux = &args->init;
+    gf_exon_t *exon = (gf_exon_t*) malloc(sizeof(gf_exon_t));
+    exon->beg = ftr->beg;
+    exon->end = ftr->end;
+    exon->tr  = tscript_init(aux, ftr->trid);
+
+    char *chr_beg, *chr_end;
+    chr_beg_end(&args->init, exon->tr->gene->iseq, &chr_beg, &chr_end);
+    regidx_push(args->idx_exon, chr_beg,chr_end, exon->beg - N_SPLICE_REGION_INTRON, exon->end + N_SPLICE_REGION_INTRON, &exon);
+}
+
+void tscript_init_cds(args_t *args)
+{
+    aux_t *aux = &args->init;
+
+    // Sort CDS in all transcripts, set offsets, check their phase, length, create index (idx_cds)
+    khint_t k;
+    for (k=0; k<kh_end(aux->id2tr); k++)
+    {
+        if ( !kh_exist(aux->id2tr, k) ) continue;
+        tscript_t *tr = (tscript_t*) kh_val(aux->id2tr, k);
+
+        // position-to-tscript lookup
+        char *chr_beg, *chr_end;
+        chr_beg_end(aux, tr->gene->iseq, &chr_beg, &chr_end);
+        regidx_push(args->idx_tscript, chr_beg, chr_end, tr->beg, tr->end, &tr);
+
+        if ( !tr->ncds ) continue;      // transcript with no CDS
+
+        // sort CDs
+        qsort(tr->cds, tr->ncds, sizeof(gf_cds_t*), cmp_cds_ptr);
+
+        // trim non-coding start
+        int i, len = 0;
+        if ( tr->strand==STRAND_FWD )
+        {
+            if ( tr->cds[0]->phase ) tr->trim |= TRIM_5PRIME;
+            tr->cds[0]->beg += tr->cds[0]->phase;
+            tr->cds[0]->len -= tr->cds[0]->phase;
+            tr->cds[0]->phase = 0;
+
+            // sanity check phase
+            for (i=0; i<tr->ncds; i++)
+            {
+                int phase = tr->cds[i]->phase ? 3 - tr->cds[i]->phase : 0;
+                if ( phase!=len%3)
+                    error("GFF3 assumption failed for transcript %s, CDS=%d: phase!=len%%3 (phase=%d, len=%d)\n",ENSID(tr->id),tr->cds[i]->beg+1,phase,len);
+                assert( phase == len%3 );
+                len += tr->cds[i]->len; 
+            }
+        }
+        else
+        {
+            // Check that the phase is not bigger than CDS length. Curiously, this can really happen,
+            // see Mus_musculus.GRCm38.85.gff3.gz, transcript:ENSMUST00000163141
+            // todo: the same for the fwd strand
+            i = tr->ncds - 1;
+            int phase = tr->cds[i]->phase;
+            if ( phase ) tr->trim |= TRIM_5PRIME;
+            while ( i>=0 && phase > tr->cds[i]->len )
+            {
+                phase -= tr->cds[i]->len;
+                tr->cds[i]->phase = 0;
+                tr->cds[i]->len   = 0;
+                i--;
+            }
+            tr->cds[i]->len  -= tr->cds[i]->phase;
+            tr->cds[i]->phase = 0;
+
+            // sanity check phase
+            for (i=tr->ncds-1; i>=0; i--)
+            {
+                int phase = tr->cds[i]->phase ? 3 - tr->cds[i]->phase : 0;
+                if ( phase!=len%3)
+                    error("GFF3 assumption failed for transcript %s, CDS=%d: phase!=len%%3 (phase=%d, len=%d)\n",ENSID(tr->id),tr->cds[i]->beg+1,phase,len);
+                len += tr->cds[i]->len;
+            }
+        }
+
+        // set len. At the same check that CDS within a transcript do not overlap
+        len = 0;
+        for (i=0; i<tr->ncds; i++)
+        {
+            tr->cds[i]->icds = i;
+            len += tr->cds[i]->len; 
+            if ( !i ) continue;
+
+            gf_cds_t *a = tr->cds[i-1];
+            gf_cds_t *b = tr->cds[i];
+            if ( a->beg + a->len - 1 >= b->beg ) 
+                error("Error: CDS overlap in the transcript %"PRIu32": %"PRIu32"-%"PRIu32" and %"PRIu32"-%"PRIu32"\n", 
+                    kh_key(aux->id2tr, k), a->beg+1,a->beg+a->len, b->beg+1,b->beg+b->len);
+        }
+        if ( len%3 != 0 )
+        {
+            // There are 13k transcripts with incomplete 3' CDS. See for example ENST00000524289
+            //  http://sep2015.archive.ensembl.org/Homo_sapiens/Transcript/Sequence_cDNA?db=core;g=ENSG00000155868;r=5:157138846-157159019;t=ENST00000524289
+            // Also, the incomplete CDS can be too short (1 or 2bp), so it is not enough to trim the last one.
+
+            tr->trim |= TRIM_3PRIME;
+            if ( tr->strand==STRAND_FWD )
+            {
+                i = tr->ncds - 1;
+                while ( i>=0 && len%3 )
+                {
+                    int dlen = tr->cds[i]->len >= len%3 ? len%3 : tr->cds[i]->len;
+                    tr->cds[i]->len -= dlen;
+                    len -= dlen;
+                    i--;
+                }
+            }
+            else
+            {
+                i = 0;
+                while ( i<tr->ncds && len%3 )
+                {
+                    int dlen = tr->cds[i]->len >= len%3 ? len%3 : tr->cds[i]->len;
+                    tr->cds[i]->len -= dlen;
+                    tr->cds[i]->beg += dlen;
+                    len -= dlen;
+                    i++;
+                }
+            }
+        }
+
+        // set CDS offsets and insert into regidx
+        len=0;
+        for (i=0; i<tr->ncds; i++)
+        {
+            tr->cds[i]->pos = len;
+            len += tr->cds[i]->len;
+            regidx_push(args->idx_cds, chr_beg,chr_end, tr->cds[i]->beg,tr->cds[i]->beg+tr->cds[i]->len-1, &tr->cds[i]);
+        }
+    }
+}
+
+void regidx_free_gf(void *payload) { free(*((gf_cds_t**)payload)); }
+void regidx_free_tscript(void *payload) { tscript_t *tr = *((tscript_t**)payload); free(tr->cds); free(tr); }
+
+void init_gff(args_t *args)
+{
+    aux_t *aux = &args->init;
+    aux->seq2int   = khash_str2int_init();   // chrom's numeric id
+    aux->gid2gene  = kh_init(int2gene);      // gene id to gf_gene_t, for idx_gene
+    aux->id2tr     = kh_init(int2tscript);   // transcript id to tscript_t
+    args->idx_tscript = regidx_init(NULL, NULL, regidx_free_tscript, sizeof(tscript_t*), NULL);
+    aux->ignored_biotypes = khash_str2int_init();
+
+    // parse gff
+    kstring_t str = {0,0,0};
+    htsFile *fp = hts_open(args->gff_fname,"r");
+    if ( !fp ) error("Failed to read %s\n", args->gff_fname);
+    while ( hts_getline(fp, KS_SEP_LINE, &str) > 0 )
+    {
+        hts_expand(ftr_t, aux->nftr+1, aux->mftr, aux->ftr);
+        int ret = gff_parse(args, str.s, aux->ftr + aux->nftr);
+        if ( !ret ) aux->nftr++;
+    }
+    free(str.s);
+    if ( hts_close(fp)!=0 ) error("Close failed: %s\n", args->gff_fname);
+
+
+    // process gff information: connect CDS and exons to transcripts
+    args->idx_cds  = regidx_init(NULL, NULL, regidx_free_gf, sizeof(gf_cds_t*), NULL);
+    args->idx_utr  = regidx_init(NULL, NULL, regidx_free_gf, sizeof(gf_utr_t*), NULL);
+    args->idx_exon = regidx_init(NULL, NULL, regidx_free_gf, sizeof(gf_exon_t*), NULL);
+    args->itr      = regitr_init(NULL);
+
+    int i;
+    for (i=0; i<aux->nftr; i++)
+    {
+        ftr_t *ftr = &aux->ftr[i];
+
+        // check whether to keep this feature: is there a mapping trid -> gene_id -> gene?
+        khint_t k = kh_get(int2tscript, aux->id2tr, (int)ftr->trid);
+        if ( k==kh_end(aux->id2tr) ) continue;       // no such transcript
+
+        tscript_t *tr = kh_val(aux->id2tr,k);
+        if ( !tr->gene->name )
+        {
+            // not a supported biotype (e.g. gene:pseudogene, transcript:processed_transcript)
+            regidx_free_tscript(&tr);
+            kh_del(int2tscript, aux->id2tr,k);
+            continue;
+        }
+
+        // populate regidx by category: 
+        //      ftr->type   .. GF_CDS, GF_EXON, GF_UTR3, GF_UTR5
+        //      gene->type  .. GF_PROTEIN_CODING, GF_MT_rRNA, GF_IG_C, ...
+        if ( ftr->type==GF_CDS ) register_cds(args, ftr);
+        else if ( ftr->type==GF_EXON ) register_exon(args, ftr);
+        else if ( ftr->type==GF_UTR5 ) register_utr(args, ftr);
+        else if ( ftr->type==GF_UTR3 ) register_utr(args, ftr);
+        else
+            error("something: %s\t%d\t%d\t%s\t%s\n", aux->seq[ftr->iseq],ftr->beg+1,ftr->end+1,ENSID(ftr->trid),gf_type2gff_string(ftr->type));
+    }
+    tscript_init_cds(args);
+
+    if ( !args->quiet )
+    {
+        fprintf(pysam_stderr,"Indexed %d transcripts, %d exons, %d CDSs, %d UTRs\n", 
+                regidx_nregs(args->idx_tscript),
+                regidx_nregs(args->idx_exon),
+                regidx_nregs(args->idx_cds),
+                regidx_nregs(args->idx_utr));
+    }
+
+    free(aux->ftr);
+    khash_str2int_destroy_free(aux->seq2int);
+    // keeping only to destroy the genes at the end: kh_destroy(int2gene,aux->gid2gene);
+    kh_destroy(int2tscript,aux->id2tr);
+    free(aux->seq);
+
+    if ( args->quiet<2 && khash_str2int_size(aux->ignored_biotypes) )
+    {
+        khash_t(str2int) *ign = (khash_t(str2int)*)aux->ignored_biotypes;
+        fprintf(pysam_stderr,"Ignored the following biotypes:\n");
+        for (i = kh_begin(ign); i < kh_end(ign); i++)
+        {
+            if ( !kh_exist(ign,i)) continue;
+            fprintf(pysam_stderr,"\t%dx\t.. %s\n", kh_value(ign,i), kh_key(ign,i));
+        }
+    }
+    khash_str2int_destroy_free(aux->ignored_biotypes);
+}
+
+void init_data(args_t *args)
+{
+    args->nfmt_bcsq = 1 + (args->ncsq_max - 1) / 32; 
+
+    if ( !args->quiet ) fprintf(pysam_stderr,"Parsing %s ...\n", args->gff_fname);
+    init_gff(args);
+
+    args->rid = -1;
+
+    if ( args->filter_str )
+        args->filter = filter_init(args->hdr, args->filter_str);
+
+    args->fai = fai_load(args->fa_fname);
+    if ( !args->fai ) error("Failed to load the fai index: %s\n", args->fa_fname);
+
+    args->pos2vbuf  = kh_init(pos2vbuf);
+    args->active_tr = khp_init(trhp);
+    args->hap = (hap_t*) calloc(1,sizeof(hap_t));
+
+    // init samples
+    if ( !bcf_hdr_nsamples(args->hdr) ) args->phase = PHASE_DROP_GT;
+    if ( args->sample_list && !strcmp("-",args->sample_list) )
+    {
+        // ignore all samples
+        if ( args->output_type==FT_TAB_TEXT ) 
+        {
+            // significant speedup for plain VCFs
+            bcf_hdr_set_samples(args->hdr,NULL,0);
+        }
+        args->phase = PHASE_DROP_GT;
+    }
+    else
+        args->smpl = smpl_ilist_init(args->hdr, args->sample_list, args->sample_is_file, SMPL_STRICT);
+    args->hdr_nsmpl = args->phase==PHASE_DROP_GT ? 0 : bcf_hdr_nsamples(args->hdr);
+
+    if ( args->output_type==FT_TAB_TEXT )
+    {
+        args->out = args->output_fname ? fopen(args->output_fname,"w") : pysam_stdout;
+        if ( !args->out ) error("Failed to open %s: %s\n", args->output_fname,strerror(errno));
+
+        fprintf(args->out,"# This file was produced by: bcftools +csq(%s+htslib-%s)\n", bcftools_version(),hts_version());
+        fprintf(args->out,"# The command line was:\tbcftools +%s", args->argv[0]);
+        int i;
+        for (i=1; i<args->argc; i++)
+            fprintf(args->out," %s",args->argv[i]);
+        fprintf(args->out,"\n");
+        fprintf(args->out,"# LOG\t[2]Message\n");
+        fprintf(args->out,"# CSQ"); i = 1;
+        fprintf(args->out,"\t[%d]Sample", ++i);
+        fprintf(args->out,"\t[%d]Haplotype", ++i);
+        fprintf(args->out,"\t[%d]Chromosome", ++i);
+        fprintf(args->out,"\t[%d]Position", ++i);
+        fprintf(args->out,"\t[%d]Consequence", ++i);
+        fprintf(args->out,"\n");
+    }
+    else
+    {
+        args->out_fh = hts_open(args->output_fname? args->output_fname : "-",hts_bcf_wmode(args->output_type));
+        if ( args->out_fh == NULL ) error("Can't write to %s: %s\n", args->output_fname? args->output_fname : "standard output", strerror(errno));
+        bcf_hdr_append_version(args->hdr,args->argc,args->argv,"bcftools/csq");
+        bcf_hdr_printf(args->hdr,"##INFO=<ID=%s,Number=.,Type=String,Description=\"%s consequence annotation from BCFtools/csq. Format: '[*]consequence|gene|transcript|biotype[|strand|amino_acid_change|dna_change]' or, for consequences of variants split across multiple sites, a pointer to the record storing the consequences '@position'. '*' prefix indicates a consequence downstream from a stop \">",args->bcsq_tag, args->local_csq ? "Local" : "Haplotype-aware");
+        if ( args->hdr_nsmpl ) 
+            bcf_hdr_printf(args->hdr,"##FORMAT=<ID=%s,Number=.,Type=Integer,Description=\"Bitmask of indexes to INFO/BCSQ, with interleaved first/second haplotype. Use \\\"bcftools query -f'[%%CHROM\\t%%POS\\t%%SAMPLE\\t%%TBCSQ\\n]'\\\" to translate.\">",args->bcsq_tag);
+        bcf_hdr_write(args->out_fh, args->hdr);
+    }
+    if ( !args->quiet ) fprintf(pysam_stderr,"Calling...\n");
+}
+
+void destroy_data(args_t *args)
+{
+    regidx_destroy(args->idx_cds);
+    regidx_destroy(args->idx_utr);
+    regidx_destroy(args->idx_exon);
+    regidx_destroy(args->idx_tscript);
+    regitr_destroy(args->itr);
+
+    khint_t k,i,j;
+    for (k=0; k<kh_end(args->init.gid2gene); k++)
+    {
+        if ( !kh_exist(args->init.gid2gene, k) ) continue;
+        gf_gene_t *gene = (gf_gene_t*) kh_val(args->init.gid2gene, k);
+        free(gene->name);
+        free(gene);
+    }
+    kh_destroy(int2gene,args->init.gid2gene);
+
+    if ( args->filter )
+        filter_destroy(args->filter);
+
+    khp_destroy(trhp,args->active_tr);
+    kh_destroy(pos2vbuf,args->pos2vbuf);
+    if ( args->smpl ) smpl_ilist_destroy(args->smpl);
+    int ret;
+    if ( args->out_fh )
+        ret = hts_close(args->out_fh);
+    else
+        ret = fclose(args->out);
+    if ( ret ) error("Error: close failed .. %s\n", args->output_fname?args->output_fname:"pysam_stdout");
+    for (i=0; i<args->vcf_rbuf.m; i++)
+    {
+        vbuf_t *vbuf = args->vcf_buf[i];
+        if ( !vbuf ) continue;
+        for (j=0; j<vbuf->m; j++)
+        {
+            if ( !vbuf->vrec[j] ) continue;
+            if ( vbuf->vrec[j]->line ) bcf_destroy(vbuf->vrec[j]->line);
+            free(vbuf->vrec[j]->smpl);
+            free(vbuf->vrec[j]->vcsq);
+            free(vbuf->vrec[j]);
+        }
+        free(vbuf->vrec);
+        free(vbuf);
+    }
+    free(args->vcf_buf);
+    free(args->rm_tr);
+    free(args->csq_buf);
+    free(args->hap->stack);
+    free(args->hap->sseq.s);
+    free(args->hap->tseq.s);
+    free(args->hap->tref.s);
+    free(args->hap);
+    fai_destroy(args->fai);
+    free(args->gt_arr);
+    free(args->str.s);
+    free(args->str2.s);
+    free(ENSID_FMT);
+}
+
+/*
+    The splice_* functions are for consquences around splice sites: start,stop,splice_*
+ */
+#define SPLICE_VAR_REF 0   // ref: ACGT>ACGT, csq not applicable, skip completely
+#define SPLICE_OUTSIDE 1   // splice acceptor or similar; csq set and is done, does not overlap the region
+#define SPLICE_INSIDE  2   // overlaps coding region; csq can be set but coding prediction is needed 
+#define SPLICE_OVERLAP 3   // indel overlaps region boundary, csq set but could not determine csq
+typedef struct
+{
+    tscript_t *tr;
+    struct {
+        int32_t pos, rlen, alen;
+        char *ref, *alt;
+        bcf1_t *rec;
+    } vcf;
+    uint16_t check_acceptor:1,  // check distance from exon start (fwd) or end (rev)
+             check_start:1,     // this is the first coding exon (relative to transcript orientation), check first (fwd) or last (rev) codon 
+             check_stop:1,      // this is the last coding exon (relative to transcript orientation), check last (fwd) or first (rev) codon
+             check_donor:1,     // as with check_acceptor
+             check_region_beg:1,    // do/don't check for splices at this end, eg. in the first or last exon
+             check_region_end:1,    // 
+             check_utr:1,           // check splice sites (acceptor/donor/region_*) only if not in utr
+             set_refalt:1;          // set kref,kalt, if set, check also for synonymous events
+    uint32_t csq;
+    int tbeg, tend;             // number of trimmed bases from beg and end of ref,alt allele
+    uint32_t ref_beg,           // ref coordinates with spurious bases removed, ACC>AC can become AC>A or CC>C, whichever gives 
+             ref_end;           // a more conservative csq (the first and last base in kref.s)
+    kstring_t kref, kalt;       // trimmed alleles, set only with SPLICE_OLAP
+}
+splice_t;
+void splice_init(splice_t *splice, bcf1_t *rec)
+{
+    memset(splice,0,sizeof(*splice));
+    splice->vcf.rec  = rec;
+    splice->vcf.pos  = rec->pos;
+    splice->vcf.rlen = rec->rlen;
+    splice->vcf.ref  = rec->d.allele[0];
+}
+static inline void splice_build_hap(splice_t *splice, uint32_t beg, int len)
+{
+    // len>0 .. beg is the first base, del filled from right
+    // len<0 .. beg is the last base, del filled from left
+
+    int rlen, alen, rbeg, abeg;     // first base to include (ref coordinates)
+    if ( len<0 )
+    {
+        rlen = alen = -len;
+        rbeg = beg - rlen + 1;
+        int dlen = splice->vcf.alen - splice->vcf.rlen;
+        if ( dlen<0 && beg < splice->ref_end ) // incomplete del, beg is in the middle
+            dlen += splice->ref_end - beg;
+        abeg = rbeg + dlen;
+    }
+    else
+    {
+        rbeg = abeg = beg;
+        rlen = alen = len;
+        // check for incomplete del as above??
+    }
+
+#define XDBG 0
+#if XDBG
+fprintf(pysam_stderr,"build_hap:  rbeg=%d + %d    abeg=%d \n",rbeg,rlen,abeg);
+#endif 
+    splice->kref.l = 0;
+    splice->kalt.l = 0;
+
+    // add the part before vcf.ref, in the vcf.ref and after vcf.ref
+    int roff;   // how many vcf.ref bases already used
+    if ( rbeg < splice->vcf.pos )
+    {
+        assert( splice->tr->beg <= rbeg );  // this can be extended thanks to N_REF_PAD
+        kputsn(splice->tr->ref + N_REF_PAD + rbeg - splice->tr->beg, splice->vcf.pos - rbeg, &splice->kref);
+        roff = 0;
+    }
+    else
+        roff = rbeg - splice->vcf.pos;
+#if XDBG
+fprintf(pysam_stderr,"r1: %s  roff=%d\n",splice->kref.s,roff);
+#endif
+
+    if ( roff < splice->vcf.rlen && splice->kref.l < rlen )
+    {
+        int len = splice->vcf.rlen - roff;  // len still available in vcf.ref
+        if ( len > rlen - splice->kref.l ) len = rlen - splice->kref.l; // how much of ref allele is still needed
+        kputsn(splice->vcf.ref + roff, len, &splice->kref);
+    }
+#if XDBG
+fprintf(pysam_stderr,"r2: %s\n",splice->kref.s);
+#endif
+
+    uint32_t end = splice->vcf.pos + splice->vcf.rlen;    // position just after the ref allele
+    if ( splice->kref.l < rlen )
+    {
+        if ( end + rlen - splice->kref.l - 1 > splice->tr->end ) // trim, the requested sequence is too long (could be extended, see N_REF_PAD)
+            rlen -= end + rlen - splice->kref.l - 1 - splice->tr->end;
+        if ( splice->kref.l < rlen )
+            kputsn(splice->tr->ref + N_REF_PAD + end - splice->tr->beg, rlen - splice->kref.l, &splice->kref);
+    }
+#if XDBG
+fprintf(pysam_stderr,"r3: %s\n",splice->kref.s);
+#endif
+
+
+    int aoff;
+    if ( abeg < splice->vcf.pos )
+    {
+        assert( splice->tr->beg <= abeg );
+        kputsn(splice->tr->ref + N_REF_PAD + abeg - splice->tr->beg, splice->vcf.pos - abeg, &splice->kalt);
+        aoff = 0;
+    }
+    else
+        aoff = abeg - splice->vcf.pos;
+#if XDBG
+fprintf(pysam_stderr,"a1: %s  aoff=%d\n",splice->kalt.s,aoff);
+#endif
+
+    if ( aoff < splice->vcf.alen && splice->kalt.l < alen )
+    {
+        int len = splice->vcf.alen - aoff;  // len still available in vcf.alt
+        if ( len > alen - splice->kalt.l ) len = alen - splice->kalt.l; // how much of alt allele is still needed
+        kputsn(splice->vcf.alt + aoff, len, &splice->kalt);
+        aoff -= len;
+    }
+    if ( aoff < 0 ) aoff = 0;
+    else aoff--;
+#if XDBG
+fprintf(pysam_stderr,"a2: %s  aoff=%d\n",splice->kalt.s,aoff);
+#endif
+
+    end = splice->vcf.pos + splice->vcf.rlen;    // position just after the ref allele
+    if ( splice->kalt.l < alen )
+    {
+        if ( end + alen + aoff - splice->kalt.l - 1 > splice->tr->end ) // trim, the requested sequence is too long
+            alen -= end + alen + aoff - splice->kalt.l - 1 - splice->tr->end;
+        if ( alen > 0 && alen > splice->kalt.l )
+            kputsn(splice->tr->ref + aoff + N_REF_PAD + end - splice->tr->beg, alen - splice->kalt.l, &splice->kalt);
+    }
+#if XDBG
+fprintf(pysam_stderr,"a3: %s\n",splice->kalt.s);
+fprintf(pysam_stderr," [%s]\n [%s]\n\n",splice->kref.s,splice->kalt.s);
+#endif
+}
+void csq_stage(args_t *args, csq_t *csq, bcf1_t *rec);
+static inline int csq_stage_utr(args_t *args, regitr_t *itr, bcf1_t *rec, uint32_t trid)
+{
+    while ( regitr_overlap(itr) )
+    {
+        gf_utr_t *utr = regitr_payload(itr, gf_utr_t*);
+        tscript_t *tr = utr->tr;
+        if ( tr->id != trid ) continue;
+        csq_t csq; 
+        memset(&csq, 0, sizeof(csq_t));
+        csq.pos          = rec->pos;
+        csq.type.type    = utr->which==prime5 ? CSQ_UTR5 : CSQ_UTR3;
+        csq.type.biotype = tr->type;
+        csq.type.strand  = tr->strand;
+        csq.type.trid    = tr->id;
+        csq.type.gene    = tr->gene->name;
+        csq_stage(args, &csq, rec);
+        return csq.type.type;
+    }
+    return 0;
+}
+static inline void csq_stage_splice(args_t *args, bcf1_t *rec, tscript_t *tr, uint32_t type)
+{
+#if XDBG
+fprintf(pysam_stderr,"csq_stage_splice %d: type=%d\n",rec->pos+1,type);
+#endif
+    if ( !type ) return;
+    csq_t csq; 
+    memset(&csq, 0, sizeof(csq_t));
+    csq.pos          = rec->pos;
+    csq.type.type    = type;
+    csq.type.biotype = tr->type;
+    csq.type.strand  = tr->strand;
+    csq.type.trid    = tr->id;
+    csq.type.gene    = tr->gene->name;
+    csq_stage(args, &csq, rec);
+}
+static inline int splice_csq_ins(args_t *args, splice_t *splice, uint32_t ex_beg, uint32_t ex_end)
+{
+    // coordinates that matter for consequences, eg AC>ACG trimmed to C>CG, 1bp
+    // before and after the inserted bases
+    if ( splice->tbeg || splice->vcf.ref[0]!=splice->vcf.alt[0] )
+    {
+        splice->ref_beg = splice->vcf.pos + splice->tbeg - 1;
+        splice->ref_end = splice->vcf.pos + splice->vcf.rlen - splice->tend;
+    }
+    else
+    {
+        if ( splice->tend ) splice->tend--;
+        splice->ref_beg = splice->vcf.pos;
+        splice->ref_end = splice->vcf.pos + splice->vcf.rlen - splice->tend;
+    }
+#if XDBG
+fprintf(pysam_stderr,"ins: %s>%s .. ex=%d,%d  beg,end=%d,%d  tbeg,tend=%d,%d  check_utr=%d start,stop,beg,end=%d,%d,%d,%d\n", splice->vcf.ref,splice->vcf.alt,ex_beg,ex_end,splice->ref_beg,splice->ref_end,splice->tbeg,splice->tend,splice->check_utr,splice->check_start,splice->check_stop,splice->check_region_beg,splice->check_region_end);
+#endif
+
+    int ret;
+    if ( splice->ref_beg >= ex_end )   // fully outside, beyond the exon
+    {
+        if ( splice->check_utr )
+        {
+            regitr_t *itr = regitr_init(NULL);
+            const char *chr = bcf_seqname(args->hdr,splice->vcf.rec);
+            if ( regidx_overlap(args->idx_utr,chr,splice->ref_beg+1,splice->ref_beg+1, itr) )     // adjacent utr
+            {
+                ret = csq_stage_utr(args, itr, splice->vcf.rec, splice->tr->id);
+                if ( ret!=0 ) 
+                {
+                    regitr_destroy(itr);
+                    return SPLICE_OUTSIDE; // overlaps utr
+                }
+            }
+            regitr_destroy(itr);
+        }
+        if ( !splice->check_region_end ) return SPLICE_OUTSIDE;
+        char *ref = NULL, *alt = NULL;
+        if ( splice->set_refalt )   // seq identity is checked only when tr->ref is available
+        {
+            splice_build_hap(splice, ex_end+1, N_SPLICE_REGION_INTRON);
+            ref = splice->kref.s, alt = splice->kalt.s;
+        }
+        if ( splice->ref_beg < ex_end + N_SPLICE_REGION_INTRON && splice->ref_end > ex_end + N_SPLICE_DONOR )
+        {
+            splice->csq |= CSQ_SPLICE_REGION;
+            if ( ref && !strncmp(ref,alt,N_SPLICE_REGION_INTRON) ) splice->csq |= CSQ_SYNONYMOUS_VARIANT;
+        }
+        if ( splice->ref_beg < ex_end + N_SPLICE_DONOR )
+        {
+            if ( splice->check_donor && splice->tr->strand==STRAND_FWD ) splice->csq |= CSQ_SPLICE_DONOR;
+            if ( splice->check_acceptor && splice->tr->strand==STRAND_REV ) splice->csq |= CSQ_SPLICE_ACCEPTOR;
+            if ( ref && !strncmp(ref,alt,N_SPLICE_DONOR) ) splice->csq |= CSQ_SYNONYMOUS_VARIANT;
+        }
+        csq_stage_splice(args, splice->vcf.rec, splice->tr, splice->csq);
+        return SPLICE_OUTSIDE;
+    }
+    if ( splice->ref_end < ex_beg || (splice->ref_end == ex_beg && !splice->check_region_beg) )    // fully outside, before the exon
+    {
+        if ( splice->check_utr )
+        {
+            regitr_t *itr = regitr_init(NULL);
+            const char *chr = bcf_seqname(args->hdr,splice->vcf.rec);
+            if ( regidx_overlap(args->idx_utr,chr,splice->ref_end-1,splice->ref_end-1, itr) )     // adjacent utr
+            {
+                ret = csq_stage_utr(args, itr, splice->vcf.rec, splice->tr->id);
+                if ( ret!=0 )
+                {
+                    regitr_destroy(itr);
+                    return SPLICE_OUTSIDE; // overlaps utr
+                }
+            }
+            regitr_destroy(itr);
+        }
+        if ( !splice->check_region_beg ) return SPLICE_OUTSIDE;
+        char *ref = NULL, *alt = NULL;
+        if ( splice->set_refalt )   // seq identity is checked only when tr->ref is available
+        {
+            splice_build_hap(splice, ex_beg - N_SPLICE_REGION_INTRON, N_SPLICE_REGION_INTRON);
+            ref = splice->kref.s, alt = splice->kalt.s;
+        }
+        if ( splice->ref_end > ex_beg - N_SPLICE_REGION_INTRON && splice->ref_beg < ex_beg - N_SPLICE_DONOR )
+        {
+            splice->csq |= CSQ_SPLICE_REGION;
+            if ( ref && !strncmp(ref,alt,N_SPLICE_REGION_INTRON) ) splice->csq |= CSQ_SYNONYMOUS_VARIANT;
+        }
+        if ( splice->ref_end > ex_beg - N_SPLICE_DONOR )
+        {
+            if ( splice->check_donor && splice->tr->strand==STRAND_REV ) splice->csq |= CSQ_SPLICE_DONOR;
+            if ( splice->check_acceptor && splice->tr->strand==STRAND_FWD ) splice->csq |= CSQ_SPLICE_ACCEPTOR;
+            if ( ref && !strncmp(ref+N_SPLICE_REGION_INTRON-N_SPLICE_DONOR,alt+N_SPLICE_REGION_INTRON-N_SPLICE_DONOR,N_SPLICE_DONOR) ) splice->csq |= CSQ_SYNONYMOUS_VARIANT;
+        }
+        csq_stage_splice(args, splice->vcf.rec, splice->tr, splice->csq);
+        return SPLICE_OUTSIDE;
+    }
+    // overlaps the exon or inside the exon
+    // possible todo: find better alignment for frameshifting variants?
+    if ( splice->ref_beg <= ex_beg + 2 )    // in the first 3bp
+    {
+        if ( splice->check_region_beg ) splice->csq |= CSQ_SPLICE_REGION;
+        if ( splice->tr->strand==STRAND_FWD ) { if ( splice->check_start ) splice->csq |= CSQ_START_LOST; }
+        else { if ( splice->check_stop ) splice->csq |= CSQ_STOP_LOST; }
+    }
+    if ( splice->ref_end > ex_end - 2 )
+    {
+        if ( splice->check_region_end ) splice->csq |= CSQ_SPLICE_REGION;
+        if ( splice->tr->strand==STRAND_REV ) { if ( splice->check_start ) splice->csq |= CSQ_START_LOST; }
+        else { if ( splice->check_stop ) splice->csq |= CSQ_STOP_LOST; }
+    }
+    if ( splice->set_refalt )
+    {
+        // Make sure the variant will not end up left aligned to avoid overlapping vcf records
+        //      splice_build_hap(splice, splice->ref_beg, splice->vcf.alen - splice->tend - splice->tbeg + 1);
+        //      splice->vcf.rlen -= splice->tbeg + splice->tend - 1;
+        //      if ( splice->kref.l > splice->vcf.rlen ) { splice->kref.l = splice->vcf.rlen;  splice->kref.s[splice->kref.l] = 0; }
+        if ( splice->ref_beg < splice->vcf.pos )    // this must have been caused by too much trimming from right
+        {
+            int dlen = splice->vcf.pos - splice->ref_beg;
+            assert( dlen==1 );
+            splice->tbeg += dlen;
+            if ( splice->tbeg + splice->tend == splice->vcf.rlen ) splice->tend -= dlen;
+            splice->ref_beg = splice->vcf.pos;
+        }
+        if ( splice->ref_end==ex_beg ) splice->tend--;  // prevent zero-length ref allele
+        splice_build_hap(splice, splice->ref_beg, splice->vcf.alen - splice->tend - splice->tbeg + 1);
+        splice->vcf.rlen -= splice->tbeg + splice->tend - 1;
+        if ( splice->kref.l > splice->vcf.rlen ) { splice->kref.l = splice->vcf.rlen;  splice->kref.s[splice->kref.l] = 0; }
+    }
+    csq_stage_splice(args, splice->vcf.rec, splice->tr, splice->csq);
+    return SPLICE_INSIDE;
+}
+
+static inline int splice_csq_del(args_t *args, splice_t *splice, uint32_t ex_beg, uint32_t ex_end)
+{
+    // coordinates that matter for consequences, eg AC>ACG trimmed to C>CG
+    splice->ref_beg = splice->vcf.pos + splice->tbeg - 1;                       // 1b before the deleted base
+    splice->ref_end = splice->vcf.pos + splice->vcf.rlen - splice->tend - 1;    // the last deleted base
+
+#if XDBG
+fprintf(pysam_stderr,"del: %s>%s .. ex=%d,%d  beg,end=%d,%d  tbeg,tend=%d,%d  check_utr=%d start,stop,beg,end=%d,%d,%d,%d\n", splice->vcf.ref,splice->vcf.alt,ex_beg,ex_end,splice->ref_beg,splice->ref_end,splice->tbeg,splice->tend,splice->check_utr,splice->check_start,splice->check_stop,splice->check_region_beg,splice->check_region_end);
+#endif
+
+    if ( splice->ref_beg + 1 < ex_beg )     // the part before the exon; ref_beg is off by -1
+    {
+        if ( splice->check_region_beg )
+        {
+            int csq = 0;
+            if ( splice->check_utr )
+            {
+                regitr_t *itr = regitr_init(NULL);
+                const char *chr = bcf_seqname(args->hdr,splice->vcf.rec);
+                if ( regidx_overlap(args->idx_utr,chr,splice->ref_beg,ex_beg-1, itr) )     // adjacent utr
+                    csq = csq_stage_utr(args, itr, splice->vcf.rec, splice->tr->id);
+                regitr_destroy(itr);
+            }
+            if ( !csq )
+            {
+                char *ref = NULL, *alt = NULL;
+                if ( splice->set_refalt )   // seq identity is checked only when tr->ref is available
+                {
+                    // filling from the left does not work for ENST00000341065/frame3.vcf
+                    //    CAG.GTGGCCAG      CAG.GTGGCCAG
+                    //    CA-.--GGCCAG  vs  CAG.---GCCAG
+                    //  splice_build_hap(splice, ex_beg-1, -N_SPLICE_REGION_INTRON);
+                    //
+                    // filling from the right:
+                    splice_build_hap(splice, ex_beg - N_SPLICE_REGION_INTRON, N_SPLICE_REGION_INTRON);
+                    ref = splice->kref.s, alt = splice->kalt.s;
+                }
+                if ( splice->ref_end >= ex_beg - N_SPLICE_REGION_INTRON && splice->ref_beg < ex_beg - N_SPLICE_DONOR )
+                {
+                    splice->csq |= CSQ_SPLICE_REGION;
+                    if ( ref && alt && !strncmp(ref,alt,N_SPLICE_REGION_INTRON) ) splice->csq |= CSQ_SYNONYMOUS_VARIANT;
+                }
+                if ( splice->ref_end >= ex_beg - N_SPLICE_DONOR )
+                {
+                    if ( splice->check_donor && splice->tr->strand==STRAND_REV ) splice->csq |= CSQ_SPLICE_DONOR;
+                    if ( splice->check_acceptor && splice->tr->strand==STRAND_FWD ) splice->csq |= CSQ_SPLICE_ACCEPTOR;
+                    if ( ref && alt && !strncmp(ref+N_SPLICE_REGION_INTRON-N_SPLICE_DONOR,alt+N_SPLICE_REGION_INTRON-N_SPLICE_DONOR,N_SPLICE_DONOR) ) splice->csq |= CSQ_SYNONYMOUS_VARIANT;
+                }
+            }
+        }
+        if ( splice->ref_end >= ex_beg ) 
+        {
+            splice->tbeg = splice->ref_beg - splice->vcf.pos + 1;
+            splice->ref_beg = ex_beg - 1;
+            if ( splice->tbeg + splice->tend == splice->vcf.alen )
+            {
+                // the deletion overlaps ex_beg and cannot be easily realigned to the right
+                if ( !splice->tend )
+                {
+                    splice->csq |= CSQ_CODING_SEQUENCE;
+                    return SPLICE_OVERLAP;
+                }
+                splice->tend--;
+            }
+        }
+    }
+    if ( ex_end < splice->ref_end )     // the part after the exon
+    {
+        if ( splice->check_region_end )
+        {
+            int csq = 0;
+            if ( splice->check_utr )
+            {
+                regitr_t *itr = regitr_init(NULL);
+                const char *chr = bcf_seqname(args->hdr,splice->vcf.rec);
+                if ( regidx_overlap(args->idx_utr,chr,ex_end+1,splice->ref_end, itr) )     // adjacent utr
+                    csq = csq_stage_utr(args, itr, splice->vcf.rec, splice->tr->id);
+                regitr_destroy(itr);
+            }
+            if ( !csq )
+            {
+                char *ref = NULL, *alt = NULL;
+                if ( splice->set_refalt )   // seq identity is checked only when tr->ref is available
+                {
+                    splice_build_hap(splice, ex_end+1, N_SPLICE_REGION_INTRON);  // ref,alt positioned at the first intron base
+                    ref = splice->kref.s, alt = splice->kalt.s;
+                }
+                if ( splice->ref_beg < ex_end + N_SPLICE_REGION_INTRON && splice->ref_end > ex_end + N_SPLICE_DONOR )
+                {
+                    splice->csq |= CSQ_SPLICE_REGION;
+                    if ( ref && alt && !strncmp(ref,alt,N_SPLICE_REGION_INTRON) ) splice->csq |= CSQ_SYNONYMOUS_VARIANT;
+                }
+                if ( splice->ref_beg < ex_end + N_SPLICE_DONOR )
+                {
+                    if ( splice->check_donor && splice->tr->strand==STRAND_FWD ) splice->csq |= CSQ_SPLICE_DONOR;
+                    if ( splice->check_acceptor && splice->tr->strand==STRAND_REV ) splice->csq |= CSQ_SPLICE_ACCEPTOR;
+                    if ( ref && alt && !strncmp(ref+N_SPLICE_REGION_INTRON-N_SPLICE_DONOR,alt+N_SPLICE_REGION_INTRON-N_SPLICE_DONOR,N_SPLICE_DONOR) ) splice->csq |= CSQ_SYNONYMOUS_VARIANT;
+                }
+            }
+        }
+        if ( splice->ref_beg < ex_end ) 
+        {
+            splice->tend = splice->vcf.rlen - (splice->ref_end - splice->vcf.pos + 1);
+            splice->ref_end = ex_end;
+        }
+    }
+    if ( splice->ref_end < ex_beg || splice->ref_beg >= ex_end )
+    {
+        csq_stage_splice(args, splice->vcf.rec, splice->tr, splice->csq);
+        return SPLICE_OUTSIDE;
+    }
+
+    if ( splice->ref_beg < ex_beg + 2 ) // ref_beg is off by -1
+    {
+        if ( splice->check_region_beg ) splice->csq |= CSQ_SPLICE_REGION;
+        if ( splice->tr->strand==STRAND_FWD ) { if ( splice->check_start ) splice->csq |= CSQ_START_LOST; }
+        else { if ( splice->check_stop ) splice->csq |= CSQ_STOP_LOST; }
+    }
+    if ( splice->ref_end > ex_end - 3 )
+    {
+        if ( splice->check_region_end ) splice->csq |= CSQ_SPLICE_REGION;
+        if ( splice->tr->strand==STRAND_REV ) { if ( splice->check_start ) splice->csq |= CSQ_START_LOST; }
+        else { if ( splice->check_stop ) splice->csq |= CSQ_STOP_LOST; }
+    }
+    if ( splice->set_refalt )
+    {
+        if ( splice->tbeg>0 ) splice->tbeg--;  //why is this?
+        if ( splice->vcf.rlen > splice->tbeg + splice->tend && splice->vcf.alen > splice->tbeg + splice->tend )
+        {
+            splice->vcf.rlen -= splice->tbeg + splice->tend;
+            splice->vcf.alen -= splice->tbeg + splice->tend;
+        }
+        splice->kref.l = 0; kputsn(splice->vcf.ref + splice->tbeg, splice->vcf.rlen, &splice->kref); 
+        splice->kalt.l = 0; kputsn(splice->vcf.alt + splice->tbeg, splice->vcf.alen, &splice->kalt); 
+        if ( (splice->ref_beg+1 < ex_beg && splice->ref_end >= ex_beg) || (splice->ref_beg+1 < ex_end && splice->ref_end >= ex_end) ) // ouch, ugly ENST00000409523/long-overlapping-del.vcf
+        {
+            splice->csq |= (splice->ref_end - splice->ref_beg + 1)%3 ? CSQ_FRAMESHIFT_VARIANT : CSQ_INFRAME_DELETION;
+            return SPLICE_OVERLAP;
+        }
+    }
+    csq_stage_splice(args, splice->vcf.rec, splice->tr, splice->csq);
+    return SPLICE_INSIDE;
+}
+
+static inline int splice_csq_mnp(args_t *args, splice_t *splice, uint32_t ex_beg, uint32_t ex_end)
+{
+    // not a real variant, can be ignored: eg ACGT>ACGT
+    if ( splice->tbeg + splice->tend == splice->vcf.rlen ) return SPLICE_VAR_REF;
+
+    splice->ref_beg = splice->vcf.pos + splice->tbeg;
+    splice->ref_end = splice->vcf.pos + splice->vcf.rlen - splice->tend - 1;
+
+#if XDBG
+fprintf(pysam_stderr,"mnp: %s>%s .. ex=%d,%d  beg,end=%d,%d  tbeg,tend=%d,%d  check_utr=%d start,stop,beg,end=%d,%d,%d,%d\n", splice->vcf.ref,splice->vcf.alt,ex_beg,ex_end,splice->ref_beg,splice->ref_end,splice->tbeg,splice->tend,splice->check_utr,splice->check_start,splice->check_stop,splice->check_region_beg,splice->check_region_end);
+#endif
+
+    if ( splice->ref_beg < ex_beg )     // the part before the exon
+    {
+        if ( splice->check_region_beg )
+        {
+            int csq = 0;
+            if ( splice->check_utr )
+            {
+                regitr_t *itr = regitr_init(NULL);
+                const char *chr = bcf_seqname(args->hdr,splice->vcf.rec);
+                if ( regidx_overlap(args->idx_utr,chr,splice->ref_beg,ex_beg-1, itr) )     // adjacent utr
+                    csq = csq_stage_utr(args, itr, splice->vcf.rec, splice->tr->id);
+                regitr_destroy(itr);
+            }
+            if ( !csq )
+            {
+                if ( splice->ref_end >= ex_beg - N_SPLICE_REGION_INTRON && splice->ref_beg < ex_beg - N_SPLICE_DONOR )
+                    splice->csq |= CSQ_SPLICE_REGION;
+                if ( splice->ref_end >= ex_beg - N_SPLICE_DONOR )
+                {
+                    if ( splice->check_donor && splice->tr->strand==STRAND_REV ) splice->csq |= CSQ_SPLICE_DONOR;
+                    if ( splice->check_acceptor && splice->tr->strand==STRAND_FWD ) splice->csq |= CSQ_SPLICE_ACCEPTOR;
+                }
+            }
+        }
+        if ( splice->ref_end >= ex_beg ) 
+        {
+            splice->tbeg = splice->ref_beg - splice->vcf.pos;
+            splice->ref_beg = ex_beg;
+        }
+    }
+    if ( ex_end < splice->ref_end )     // the part after the exon
+    {
+        if ( splice->check_region_end )
+        {
+            int csq = 0;
+            if ( splice->check_utr )
+            {
+                regitr_t *itr = regitr_init(NULL);
+                const char *chr = bcf_seqname(args->hdr,splice->vcf.rec);
+                if ( regidx_overlap(args->idx_utr,chr,ex_end+1,splice->ref_end, itr) )     // adjacent utr
+                    csq = csq_stage_utr(args, itr, splice->vcf.rec, splice->tr->id);
+                regitr_destroy(itr);
+            }
+            if ( !csq )
+            {
+                if ( splice->ref_beg <= ex_end + N_SPLICE_REGION_INTRON && splice->ref_end > ex_end + N_SPLICE_DONOR )
+                    splice->csq |= CSQ_SPLICE_REGION;
+                if ( splice->ref_beg <= ex_end + N_SPLICE_DONOR )
+                {
+                    if ( splice->check_donor && splice->tr->strand==STRAND_FWD ) splice->csq |= CSQ_SPLICE_DONOR;
+                    if ( splice->check_acceptor && splice->tr->strand==STRAND_REV ) splice->csq |= CSQ_SPLICE_ACCEPTOR;
+                }
+            }
+        }
+        if ( splice->ref_beg <= ex_end ) 
+        {
+            splice->tend = splice->vcf.rlen - (splice->ref_end - splice->vcf.pos + 1);
+            splice->ref_end = ex_end;
+        }
+    }
+    if ( splice->ref_end < ex_beg || splice->ref_beg > ex_end )
+    {
+        csq_stage_splice(args, splice->vcf.rec, splice->tr, splice->csq);
+        return SPLICE_OUTSIDE;
+    }
+
+    if ( splice->ref_beg < ex_beg + 3 )
+    {
+        if ( splice->check_region_beg ) splice->csq |= CSQ_SPLICE_REGION;
+        if ( splice->tr->strand==STRAND_FWD ) { if ( splice->check_start ) splice->csq |= CSQ_START_LOST; }
+        else { if ( splice->check_stop ) splice->csq |= CSQ_STOP_LOST; }
+    }
+    if ( splice->ref_end > ex_end - 3 )
+    {
+        if ( splice->check_region_end ) splice->csq |= CSQ_SPLICE_REGION;
+        if ( splice->tr->strand==STRAND_REV ) { if ( splice->check_start ) splice->csq |= CSQ_START_LOST; }
+        else { if ( splice->check_stop ) splice->csq |= CSQ_STOP_LOST; }
+    }
+    if ( splice->set_refalt )
+    {
+        splice->vcf.rlen -= splice->tbeg + splice->tend;
+        splice->kref.l = 0; kputsn(splice->vcf.ref + splice->tbeg, splice->vcf.rlen, &splice->kref); 
+        splice->kalt.l = 0; kputsn(splice->vcf.alt + splice->tbeg, splice->vcf.rlen, &splice->kalt); 
+    }
+    csq_stage_splice(args, splice->vcf.rec, splice->tr, splice->csq);
+    return SPLICE_INSIDE;
+}
+static inline int splice_csq(args_t *args, splice_t *splice, uint32_t ex_beg, uint32_t ex_end)
+{
+    splice->csq = 0;
+    splice->vcf.alen = strlen(splice->vcf.alt);
+
+    int rlen1 = splice->vcf.rlen - 1, alen1 = splice->vcf.alen - 1, i = 0;
+    splice->tbeg = 0, splice->tend = 0;
+
+    // trim from the right, then from the left
+    while ( i<=rlen1 && i<=alen1 )
+    {
+        if ( splice->vcf.ref[rlen1-i] != splice->vcf.alt[alen1-i] ) break;
+        i++;
+    }
+    splice->tend = i;
+    rlen1 -= i, alen1 -= i, i = 0;
+    while ( i<=rlen1 && i<=alen1 )
+    {
+        if ( splice->vcf.ref[i] != splice->vcf.alt[i] ) break;
+        i++;
+    }
+    splice->tbeg = i;
+
+    // The mnp, ins and del code was split into near-identical functions for clarity and debugging;
+    // possible todo: generalize once stable
+    if ( splice->vcf.rlen==splice->vcf.alen ) return splice_csq_mnp(args, splice, ex_beg, ex_end);
+    if ( splice->vcf.rlen < splice->vcf.alen ) return splice_csq_ins(args, splice, ex_beg, ex_end);
+    if ( splice->vcf.rlen > splice->vcf.alen ) return splice_csq_del(args, splice, ex_beg, ex_end);
+
+    return 0;
+}
+
+// return value: 0 added, 1 overlapping variant, 2 silent discard (intronic,alt=ref)
+int hap_init(args_t *args, hap_node_t *parent, hap_node_t *child, gf_cds_t *cds, bcf1_t *rec, int ial)
+{
+    int i;
+    kstring_t str = {0,0,0};
+    tscript_t *tr = cds->tr;
+    child->icds = cds->icds;     // index of cds in the tscript's list of exons
+
+    splice_t splice;
+    splice_init(&splice, rec);
+    splice.tr = tr;
+    splice.vcf.alt  = rec->d.allele[ial];
+    splice.check_acceptor = splice.check_donor = splice.set_refalt = splice.check_utr = 1;
+    if ( !(tr->trim & TRIM_5PRIME) )
+    {
+        if ( tr->strand==STRAND_FWD ) { if ( child->icds==0 ) splice.check_start = 1; }
+        else { if ( child->icds==tr->ncds-1 ) splice.check_start = 1; }
+    }
+    if ( !(tr->trim & TRIM_3PRIME) )
+    {
+        if ( tr->strand==STRAND_FWD ) { if ( child->icds==tr->ncds-1 ) splice.check_stop = 1; }
+        else { if ( child->icds==0 ) splice.check_stop = 1; }
+    }
+    if ( splice.check_start )   // do not check starts in incomplete CDS, defined as not starting with M
+    {
+        if ( tr->strand==STRAND_FWD ) { if ( dna2aa(tr->ref+N_REF_PAD+cds->beg-tr->beg) != 'M' ) splice.check_start = 0; }
+        else { if ( cdna2aa(tr->ref+N_REF_PAD+cds->beg-tr->beg+cds->len-3) != 'M' ) splice.check_start = 0; }
+    }
+    if ( child->icds!=0 ) splice.check_region_beg = 1;
+    if ( child->icds!=tr->ncds-1 ) splice.check_region_end = 1;
+
+#if XDBG
+fprintf(pysam_stderr,"\n%d [%s][%s]   check start:%d,stop:%d\n",splice.vcf.pos+1,splice.vcf.ref,splice.vcf.alt,splice.check_start,splice.check_stop);
+#endif
+    int ret = splice_csq(args, &splice, cds->beg, cds->beg + cds->len - 1);
+#if XDBG
+fprintf(pysam_stderr,"cds splice_csq: %d [%s][%s] .. beg,end=%d %d, ret=%d, csq=%d\n\n",splice.vcf.pos+1,splice.kref.s,splice.kalt.s,splice.ref_beg+1,splice.ref_end+1,ret,splice.csq);
+#endif
+
+    if ( ret==SPLICE_VAR_REF ) return 2;  // not a variant, eg REF=CA ALT=CA
+    if ( ret==SPLICE_OUTSIDE || ret==SPLICE_OVERLAP )  // not a coding csq
+    {
+        free(splice.kref.s);
+        free(splice.kalt.s);
+
+        if ( !splice.csq ) return 2;        // fully intronic, no csq
+
+        // splice_region/acceptor/donor
+        child->seq  = NULL;
+        child->sbeg = 0;
+        child->rbeg = rec->pos;
+        child->rlen = 0;
+        child->dlen = 0;
+        kputs(rec->d.allele[0],&str);
+        kputc('>',&str);
+        kputs(rec->d.allele[ial],&str);
+        child->var  = str.s;
+        child->type = HAP_SSS;
+        child->csq  = splice.csq;
+        child->prev = parent->type==HAP_SSS ? parent->prev : parent;
+        child->rec  = rec;
+        return 0;
+    }
+    if ( splice.csq & CSQ_SYNONYMOUS_VARIANT ) splice.csq &= ~CSQ_SYNONYMOUS_VARIANT;   // synonymous&splice,frame could become synonymous&frame,splice
+
+    int dbeg = 0;
+    if ( splice.ref_beg < cds->beg )
+    {
+        // The vcf record overlaps the exon boundary, but the variant itself
+        // should fit inside since we are here. This will need more work.
+        // #1475227917
+        dbeg = cds->beg - splice.ref_beg;
+        splice.kref.l -= dbeg;
+        splice.ref_beg = cds->beg;
+        assert( dbeg <= splice.kalt.l );
+    }
+
+    if ( parent->type==HAP_SSS ) parent = parent->prev;
+    if ( parent->type==HAP_CDS )    
+    {
+        i = parent->icds;
+        if ( i!=cds->icds )
+        {
+            // the variant is on a new exon, finish up the previous
+            int len = tr->cds[i]->len - parent->rbeg - parent->rlen + tr->cds[i]->beg;
+            if ( len > 0 )
+                kputsn_(tr->ref + N_REF_PAD + parent->rbeg + parent->rlen - tr->beg, len, &str);
+        }
+
+        // append any skipped non-variant exons
+        while ( ++i < cds->icds )
+            kputsn_(tr->ref + N_REF_PAD + tr->cds[i]->beg - tr->beg, tr->cds[i]->len, &str);
+
+        if ( parent->icds==child->icds )
+        {
+            int len = splice.ref_beg - parent->rbeg - parent->rlen;
+            if ( len < 0 )   // overlapping variants
+            {
+                free(str.s);
+                return 1;
+            }
+            kputsn_(tr->ref + N_REF_PAD + parent->rbeg + parent->rlen - tr->beg, len, &str);
+        }
+        else
+            kputsn_(tr->ref + N_REF_PAD + cds->beg - tr->beg, splice.ref_beg - cds->beg, &str);
+    }
+    kputs(splice.kalt.s + dbeg, &str);
+
+    child->seq  = str.s;
+    child->sbeg = cds->pos + (splice.ref_beg - cds->beg);
+    child->rbeg = splice.ref_beg;
+    child->rlen = splice.kref.l;
+    child->type = HAP_CDS;
+    child->prev = parent;
+    child->rec  = rec;
+    child->csq  = splice.csq;
+
+    // set vlen and the "ref>alt" string
+    {
+        int rlen = strlen(rec->d.allele[0]);
+        int alen = strlen(rec->d.allele[ial]);
+        child->dlen = alen - rlen;
+        child->var  = (char*) malloc(rlen+alen+2);
+        memcpy(child->var,rec->d.allele[0],rlen);
+        child->var[rlen] = '>';
+        memcpy(child->var+rlen+1,rec->d.allele[ial],alen);
+        child->var[rlen+alen+1] = 0;
+    }
+
+    // yuck, the whole CDS is modified/deleted, not ready for this, todo.
+    if ( child->rbeg + child->rlen > cds->beg + cds->len )
+    {
+        child->type = HAP_SSS;
+        if ( !child->csq ) child->csq |= CSQ_CODING_SEQUENCE;  // hack, specifically for ENST00000390520/deletion-overlap.vcf
+    }
+
+    free(splice.kref.s);
+    free(splice.kalt.s);
+    return 0;
+}
+void hap_destroy(hap_node_t *hap)
+{
+    int i;
+    for (i=0; i<hap->nchild; i++)
+        if ( hap->child[i] ) hap_destroy(hap->child[i]);
+    for (i=0; i<hap->mcsq_list; i++) free(hap->csq_list[i].type.vstr.s);
+    free(hap->csq_list);
+    free(hap->child);
+    free(hap->cur_child);
+    free(hap->seq);
+    free(hap->var);
+    free(hap);
+}
+
+
+/*
+    ref:    spliced reference and its length (ref.l)
+    seq:    part of the spliced query transcript on the reference strand to translate, its 
+                length (seq.l) and the total length of the complete transcript (seq.m)
+    sbeg:   seq offset within the spliced query transcript
+    rbeg:   seq offset within ref, 0-based
+    rend:   last base of seq within ref, plus one. If seq does not contain indels, it is rend=rbeg+seq->l
+    strand: coding strand - 0:rev, 1:fwd
+    tseq:   translated sequence (aa)
+    fill:   frameshift, fill until the end (strand=fwd) or from the start (strand=rev)
+ */
+void cds_translate(kstring_t *_ref, kstring_t *_seq, uint32_t sbeg, uint32_t rbeg, uint32_t rend, int strand, kstring_t *tseq, int fill)
+{
+#if XDBG
+fprintf(pysam_stderr,"translate: %d %d %d  fill=%d  seq.l=%d\n",sbeg,rbeg,rend,fill,(int)_seq->l);
+#endif
+    char tmp[3], *codon, *end;
+    int i, len, npad;
+
+    kstring_t ref = *_ref;
+    kstring_t seq = *_seq;
+
+    tseq->l = 0;
+    if ( !seq.l )
+    {
+        kputc('?', tseq);
+        return;
+    }
+
+#define DBG 0
+#if DBG
+ fprintf(pysam_stderr,"translate: sbeg,rbeg,rend=%d %d %d  fill=%d  seq.l=%d\n",sbeg,rbeg,rend,fill,(int)_seq->l);
+ fprintf(pysam_stderr,"    ref: l=%d %s\n", (int)ref.l,ref.s);
+ fprintf(pysam_stderr,"    seq: l=%d m=%d ", (int)seq.l,(int)seq.m);
+ for (i=0; i<seq.l; i++) fprintf(pysam_stderr,"%c",seq.s[i]); fprintf(pysam_stderr,"\n");
+ fprintf(pysam_stderr,"    sbeg,rbeg,rend: %d,%d,%d\n", sbeg,rbeg,rend);
+ fprintf(pysam_stderr,"    strand,fill: %d,%d\n", strand,fill);
+#endif
+
+    if ( strand==STRAND_FWD )
+    {
+        // left padding
+        npad = sbeg % 3;
+#if DBG>1
+        fprintf(pysam_stderr,"    npad: %d\n",npad);
+#endif
+        assert( npad<=rbeg );
+
+        for (i=0; i<npad; i++)
+            tmp[i] = ref.s[rbeg+i-npad+N_REF_PAD];
+        for (; i<3 && i-npad<seq.l; i++)
+            tmp[i] = seq.s[i-npad];
+        len = seq.l - i + npad;    // the remaining length of padded sseq
+#if DBG>1
+        fprintf(pysam_stderr,"\t i=%d\n", i);
+#endif
+        if ( i==3 )
+        {
+            kputc_(dna2aa(tmp), tseq);
+#if DBG>1
+            fprintf(pysam_stderr,"[1]%c%c%c\n",tmp[0],tmp[1],tmp[2]);
+#endif
+            codon = seq.s + 3 - npad;        // next codon
+            end   = codon + len - 1 - (len % 3);    // last position of a valid codon
+            while ( codon < end )
+            {
+                kputc_(dna2aa(codon), tseq);
+#if DBG>1
+                fprintf(pysam_stderr,"[2]%c%c%c\n",codon[0],codon[1],codon[2]);
+#endif
+                codon += 3;
+            }
+            end = seq.s + seq.l - 1;
+            for (i=0; codon+i<=end; i++) tmp[i] = codon[i];
+        }
+
+        // right padding
+        codon = ref.s + rend + N_REF_PAD;
+        if ( i>0 )
+        {
+#if DBG>1
+            if(i==1)fprintf(pysam_stderr,"[3]%c\n",tmp[0]);
+            if(i==2)fprintf(pysam_stderr,"[3]%c%c\n",tmp[0],tmp[1]);
+#endif
+            for (; i<3; i++)
+            {
+                tmp[i] = *codon;
+                codon++;
+            }
+            kputc_(dna2aa(tmp), tseq);
+#if DBG>1
+            fprintf(pysam_stderr,"[4]%c%c%c\n",tmp[0],tmp[1],tmp[2]);
+#endif
+        }
+        if ( fill!=0 )
+        {
+            end = ref.s + ref.l - N_REF_PAD;
+            while ( codon+3 <= end )
+            {
+                kputc_(dna2aa(codon), tseq);
+#if DBG>1
+                fprintf(pysam_stderr,"[5]%c%c%c\t%c\n",codon[0],codon[1],codon[2],dna2aa(codon));
+#endif
+                codon += 3;
+            }
+        }
+    }
+    else    // STRAND_REV
+    {
+        // right padding - number of bases to take from ref
+        npad = (seq.m - (sbeg + seq.l)) % 3; 
+#if DBG>1
+        fprintf(pysam_stderr,"    npad: %d\n",npad);
+#endif
+if ( !(npad>=0 && sbeg+seq.l+npad<=seq.m) ) fprintf(pysam_stderr,"sbeg=%d  seq.l=%d seq.m=%d\n",sbeg,(int)seq.l,(int)seq.m);
+        assert( npad>=0 && sbeg+seq.l+npad<=seq.m );  // todo: first codon on the rev strand
+
+        if ( npad==2 )
+        {
+            tmp[1] = ref.s[rend+N_REF_PAD];
+            tmp[2] = ref.s[rend+N_REF_PAD+1];
+            i = 0;
+        }
+        else if ( npad==1 )
+        {
+            tmp[2] = ref.s[rend+N_REF_PAD];
+            i = 1;
+        }
+        else
+            i = 2;
+
+        end = seq.s + seq.l;
+        for (; i>=0 && end>seq.s; i--) tmp[i] = *(--end);
+#if DBG>1
+        fprintf(pysam_stderr,"\t i=%d\n", i);
+        if(i==1)fprintf(pysam_stderr,"[0]    %c\n",tmp[2]);
+        if(i==0)fprintf(pysam_stderr,"[0]  %c%c\n",tmp[1],tmp[2]);
+#endif
+        if ( i==-1 )
+        {
+#if DBG>1
+            fprintf(pysam_stderr,"[1]%c%c%c\t%c\n",tmp[0],tmp[1],tmp[2], cdna2aa(tmp));
+#endif
+            kputc_(cdna2aa(tmp), tseq);
+            codon = end - 3;
+            while ( codon >= seq.s )
+            {
+                kputc_(cdna2aa(codon), tseq);
+#if DBG>1
+                fprintf(pysam_stderr,"[2]%c%c%c\t%c\n",codon[0],codon[1],codon[2], cdna2aa(codon));
+#endif
+                codon -= 3;
+            }
+            if ( seq.s-codon==2 )
+            {
+                tmp[2] = seq.s[0]; 
+                i = 1;
+            }
+            else if ( seq.s-codon==1 )
+            {
+                tmp[1] = seq.s[0]; 
+                tmp[2] = seq.s[1];
+                i = 0;
+            }
+            else
+                i = -1;
+#if DBG>1
+            if(i==1)fprintf(pysam_stderr,"[3]   %c\n",tmp[2]);
+            if(i==0)fprintf(pysam_stderr,"[3] %c%c\n",tmp[1],tmp[2]);
+#endif
+        }
+        // left padding
+        end = ref.s + N_REF_PAD + rbeg;
+        if ( i>=0 )
+        {
+            for (; i>=0 && end>=ref.s; i--) tmp[i] = *(--end);
+            kputc_(cdna2aa(tmp), tseq);
+#if DBG>1
+            fprintf(pysam_stderr,"[4]%c%c%c\t%c\n",tmp[0],tmp[1],tmp[2],cdna2aa(tmp));
+#endif
+        }
+        if ( fill!=0 )
+        {
+            codon = end - 3;
+            while ( codon >= ref.s + N_REF_PAD )
+            {
+                kputc_(cdna2aa(codon), tseq);
+#if DBG>1
+                fprintf(pysam_stderr,"[5]%c%c%c\t%c\n",codon[0],codon[1],codon[2],cdna2aa(codon));
+#endif
+                codon -= 3;
+            }
+        }
+    }
+    kputc_(0,tseq); tseq->l--;
+#if DBG
+ fprintf(pysam_stderr,"    tseq: %s\n", tseq->s);
+#endif
+}
+
+void tscript_splice_ref(tscript_t *tr)
+{
+    int i, len = 0;
+    for (i=0; i<tr->ncds; i++) 
+        len += tr->cds[i]->len;
+
+    tr->nsref = len + 2*N_REF_PAD;
+    tr->sref  = (char*) malloc(len + 1 + 2*N_REF_PAD);
+    len = 0;
+
+    memcpy(tr->sref, tr->ref + tr->cds[0]->beg - tr->beg, N_REF_PAD);
+    len += N_REF_PAD;
+
+    for (i=0; i<tr->ncds; i++)
+    {
+        memcpy(tr->sref + len, tr->ref + N_REF_PAD + tr->cds[i]->beg - tr->beg, tr->cds[i]->len);
+        len += tr->cds[i]->len;
+    }
+    memcpy(tr->sref + len, tr->ref + N_REF_PAD + tr->cds[tr->ncds-1]->beg - tr->beg, N_REF_PAD);
+    len += N_REF_PAD;
+
+    tr->sref[len] = 0;
+}
+
+// returns: 0 if consequence was added, 1 if it already exists or could not be added
+int csq_push(args_t *args, csq_t *csq, bcf1_t *rec)
+{
+#if XDBG
+fprintf(pysam_stderr,"csq_push: %d .. %d\n",rec->pos+1,csq->type.type);
+#endif
+    khint_t k = kh_get(pos2vbuf, args->pos2vbuf, (int)csq->pos);
+    vbuf_t *vbuf = (k == kh_end(args->pos2vbuf)) ? NULL : kh_val(args->pos2vbuf, k);
+    if ( !vbuf ) error("This should not happen. %s:%d  %s\n",bcf_seqname(args->hdr,rec),csq->pos+1,csq->type.vstr);
+
+    int i;
+    for (i=0; i<vbuf->n; i++)
+        if ( vbuf->vrec[i]->line==rec ) break;
+    if ( i==vbuf->n ) error("This should not happen.. %s:%d  %s\n", bcf_seqname(args->hdr,rec),csq->pos+1,csq->type.vstr);
+    vrec_t *vrec = vbuf->vrec[i];
+
+    // if the variant overlaps donor/acceptor and also splice region, report only donor/acceptor
+    if ( csq->type.type & CSQ_SPLICE_REGION && csq->type.type & (CSQ_SPLICE_DONOR|CSQ_SPLICE_ACCEPTOR) ) 
+        csq->type.type &= ~CSQ_SPLICE_REGION;
+
+    if ( csq->type.type & CSQ_PRINTED_UPSTREAM )
+    {
+        for (i=0; i<vrec->nvcsq; i++)
+        {
+            // Same as below, to avoid records like
+            //      3630 .. @3632,stop_lost|AL627309.1|ENST00000423372|protein_coding|-
+            //      3632 .. stop_lost|AL627309.1|ENST00000423372|protein_coding|-|260*>260G|3630T>A+3632A>C
+            if ( csq->type.type&CSQ_START_STOP && vrec->vcsq[i].type&CSQ_START_STOP )
+            {
+                vrec->vcsq[i] = csq->type;
+                goto exit_duplicate;
+            }
+            if ( !(vrec->vcsq[i].type & CSQ_PRINTED_UPSTREAM) ) continue;
+            if ( csq->type.ref != vrec->vcsq[i].ref ) continue;
+            goto exit_duplicate;
+        }
+    }
+    else if ( csq->type.type & CSQ_COMPOUND )
+    {
+        for (i=0; i<vrec->nvcsq; i++)
+        {
+            if ( csq->type.trid != vrec->vcsq[i].trid && (csq->type.type|vrec->vcsq[i].type)&CSQ_PRN_TSCRIPT ) continue;
+            if ( csq->type.biotype != vrec->vcsq[i].biotype ) continue;
+            if ( csq->type.gene != vrec->vcsq[i].gene ) continue;
+            if ( csq->type.vstr.s || vrec->vcsq[i].vstr.s ) 
+            {
+                // This is a bit hacky, but we want a simpler and more predictable output. The splice_csq() function
+                // can trigger stop/start events based on indel overlap, then another stop/start event can be triggered
+                // from add_csq() or test_cds_local() based on sequence comparison, and on output we could find two
+                // consequences:
+                //      stop_lost|AL627309.1|ENST00000423372|protein_coding|-
+                //      stop_lost&inframe_insertion|AL627309.1|ENST00000423372|protein_coding|-|260*>260CL|3630T>TAAA
+                if ( !csq->type.vstr.s || !vrec->vcsq[i].vstr.s ) 
+                {
+                    if ( csq->type.type&CSQ_START_STOP && vrec->vcsq[i].type&CSQ_START_STOP )
+                    {
+                        vrec->vcsq[i].type |= csq->type.type;
+
+                        // remove stop_lost&synonymous if stop_retained set
+                        if ( vrec->vcsq[i].type&CSQ_STOP_RETAINED ) 
+                            vrec->vcsq[i].type &= ~(CSQ_STOP_LOST|CSQ_SYNONYMOUS_VARIANT);
+
+                        if ( !vrec->vcsq[i].vstr.s ) vrec->vcsq[i].vstr = csq->type.vstr;
+                        goto exit_duplicate;
+                    }
+                    continue;
+                }
+                if ( strcmp(csq->type.vstr.s,vrec->vcsq[i].vstr.s) ) continue;
+            }
+            vrec->vcsq[i].type |= csq->type.type; 
+            goto exit_duplicate;
+        }
+    }
+    else
+    {
+        for (i=0; i<vrec->nvcsq; i++)
+        {
+            if ( csq->type.trid != vrec->vcsq[i].trid && (csq->type.type|vrec->vcsq[i].type)&CSQ_PRN_TSCRIPT) continue;
+            if ( csq->type.biotype != vrec->vcsq[i].biotype ) continue;
+            if ( !(vrec->vcsq[i].type & CSQ_COMPOUND) ) 
+            {
+                vrec->vcsq[i].type |= csq->type.type;
+                goto exit_duplicate;
+            }
+            if ( vrec->vcsq[i].type==(vrec->vcsq[i].type|csq->type.type) ) goto exit_duplicate;
+        }
+    }
+    // no such csq yet in this vcf record
+    csq->vrec = vrec;
+    csq->idx  = i;
+    vrec->nvcsq++;
+    hts_expand0(vcsq_t, vrec->nvcsq, vrec->mvcsq, vrec->vcsq);
+    vrec->vcsq[i] = csq->type;
+    return 0;
+
+exit_duplicate:
+    csq->vrec = vrec;
+    csq->idx  = i;
+    return 1;
+}
+
+//  soff .. position of the variant within the trimmed query transcript
+//  sbeg .. position of the variant within the query transcript
+//  rbeg .. position on the reference transcript (if there are no indels, then rbeg=send)
+//  rpos .. VCF position
+#define node2soff(i) (hap->stack[i].slen - (hap->stack[i].node->rlen + hap->stack[i].node->dlen))
+#define node2sbeg(i) (hap->sbeg + node2soff(i))
+#define node2send(i) (hap->sbeg + hap->stack[i].slen)
+#define node2rbeg(i) (hap->stack[i].node->sbeg)
+#define node2rend(i) (hap->stack[i].node->sbeg + hap->stack[i].node->rlen)
+#define node2rpos(i) (hap->stack[i].node->rec->pos)
+
+void kput_vcsq(vcsq_t *csq, kstring_t *str)
+{
+    // Remove start/stop from incomplete CDS, but only if there is another
+    // consequence as something must be reported
+    if ( csq->type & CSQ_INCOMPLETE_CDS && (csq->type & ~(CSQ_START_STOP|CSQ_INCOMPLETE_CDS|CSQ_UPSTREAM_STOP)) ) csq->type &= ~(CSQ_START_STOP|CSQ_INCOMPLETE_CDS);
+
+    // Remove missense from start/stops
+    if ( csq->type & CSQ_START_STOP && csq->type & CSQ_MISSENSE_VARIANT ) csq->type &= ~CSQ_MISSENSE_VARIANT;
+
+    if ( csq->type & CSQ_PRINTED_UPSTREAM && csq->ref )
+    {
+        kputc_('@',str);
+        kputw(csq->ref->pos+1, str);
+        return;
+    }
+    if ( csq->type & CSQ_UPSTREAM_STOP )
+        kputc_('*',str);
+
+    int i, n = sizeof(csq_strings)/sizeof(char*);
+    for (i=1; i<n; i++)
+        if ( csq_strings[i] && csq->type&(1<<i) ) { kputs(csq_strings[i],str); break; }
+    i++;
+    for (; i<n; i++)
+        if ( csq_strings[i] && csq->type&(1<<i) ) { kputc_('&',str); kputs(csq_strings[i],str); }
+
+    kputc_('|', str);
+    if ( csq->gene ) kputs(csq->gene , str);
+
+    kputc_('|', str);
+    if ( csq->type & CSQ_PRN_TSCRIPT ) ksprintf(str, "%s",ENSID(csq->trid));
+
+    kputc_('|', str);
+    kputs(gf_type2gff_string(csq->biotype), str);
+
+    if ( CSQ_PRN_STRAND(csq->type) || csq->vstr.l )
+        kputs(csq->strand==STRAND_FWD ? "|+" : "|-", str);
+
+    if ( csq->vstr.l )
+        kputs(csq->vstr.s, str);
+}
+
+void hap_add_csq(args_t *args, hap_t *hap, hap_node_t *node, int tlen, int ibeg, int iend, int dlen, int indel)
+{
+    int i;
+    tscript_t *tr = hap->tr;
+    int ref_node = tr->strand==STRAND_FWD ? ibeg : iend;
+
+    int icsq = node->ncsq_list++;
+    hts_expand0(csq_t,node->ncsq_list,node->mcsq_list,node->csq_list);
+    csq_t *csq = &node->csq_list[icsq];
+    csq->pos  = hap->stack[ref_node].node->rec->pos;
+    csq->type.trid    = tr->id;
+    csq->type.gene    = tr->gene->name;
+    csq->type.strand  = tr->strand;
+    csq->type.biotype = tr->type;
+
+    // only now we see the translated sequence and can determine if the stop/start changes are real
+    int rm_csq = 0; 
+    csq->type.type = 0;
+    for (i=ibeg; i<=iend; i++)
+        csq->type.type |= hap->stack[i].node->csq & CSQ_COMPOUND;
+    if ( dlen==0 && indel ) csq->type.type |= CSQ_INFRAME_ALTERING;
+
+    int has_upstream_stop = hap->upstream_stop;
+    if ( hap->stack[ibeg].node->type != HAP_SSS )
+    {
+        // check for truncating stops
+        for (i=0; i<hap->tref.l; i++)
+            if ( hap->tref.s[i]=='*' ) break;
+        if ( i!=hap->tref.l )
+        {
+            hap->tref.l = i+1;
+            hap->tref.s[i+1] = 0;
+        }
+        for (i=0; i<hap->tseq.l; i++)
+            if ( hap->tseq.s[i]=='*' ) break;
+        if ( i!=hap->tseq.l )
+        {
+            hap->tseq.l = i+1;
+            hap->tseq.s[i+1] = 0;
+            hap->upstream_stop = 1;
+        }
+        if ( csq->type.type & CSQ_STOP_LOST )
+        {
+            if ( hap->tref.s[hap->tref.l-1]=='*' && hap->tref.s[hap->tref.l-1] == hap->tseq.s[hap->tseq.l-1] ) 
+            {
+                rm_csq |= CSQ_STOP_LOST;
+                csq->type.type |= CSQ_STOP_RETAINED;
+            }
+            else if ( hap->tref.s[hap->tref.l-1]!='*' )
+            {
+                // This is CDS 3' incomplete ENSG00000173376/synon.vcf, can also be missense
+                // We observe in real data a change to a stop, ENST00000528237/retained-stop-incomplete-cds.vcf
+                if ( hap->tseq.s[hap->tseq.l-1] == '*' )
+                {
+                    rm_csq |= CSQ_STOP_GAINED;
+                    csq->type.type |= CSQ_STOP_RETAINED;
+                }
+                else
+                    csq->type.type |= CSQ_INCOMPLETE_CDS;
+            }
+        }
+        if ( csq->type.type & CSQ_START_LOST && hap->tref.s[0]!='M' )
+        {
+            rm_csq |= CSQ_START_LOST;
+            csq->type.type &= ~CSQ_START_LOST;
+        }
+        if ( dlen!=0 )
+        {
+            if ( dlen%3 )
+                csq->type.type |= CSQ_FRAMESHIFT_VARIANT;
+            else if ( dlen<0 )
+                csq->type.type |= CSQ_INFRAME_DELETION;
+            else
+                csq->type.type |= CSQ_INFRAME_INSERTION;
+        }
+        else
+        {
+            for (i=0; i<hap->tref.l; i++) 
+                if ( hap->tref.s[i] != hap->tseq.s[i] ) break;
+            if ( i==hap->tref.l )
+                csq->type.type |= CSQ_SYNONYMOUS_VARIANT;
+            else if ( hap->tref.s[i] ==  '*' )
+                csq->type.type |= CSQ_STOP_LOST;
+            else if ( hap->tseq.s[i] ==  '*' )
+                csq->type.type |= CSQ_STOP_GAINED;
+            else
+                csq->type.type |= CSQ_MISSENSE_VARIANT;
+        }
+    }
+    if ( has_upstream_stop ) csq->type.type |= CSQ_UPSTREAM_STOP;
+    csq->type.type &= ~rm_csq;
+
+    if ( hap->stack[ibeg].node->type == HAP_SSS  )
+    {
+        node->csq_list[icsq].type.type   |= hap->stack[ibeg].node->csq & ~rm_csq;
+        node->csq_list[icsq].type.ref     = hap->stack[ibeg].node->rec;
+        node->csq_list[icsq].type.biotype = tr->type;
+        csq_push(args, node->csq_list+icsq, hap->stack[ibeg].node->rec);
+        return;
+    }
+
+    kstring_t str = node->csq_list[icsq].type.vstr;
+    str.l = 0;
+
+    // create the aa variant string
+    int aa_rbeg = tr->strand==STRAND_FWD ? node2rbeg(ibeg)/3+1 : (hap->tr->nsref - 2*N_REF_PAD - node2rend(iend))/3+1;
+    int aa_sbeg = tr->strand==STRAND_FWD ? node2sbeg(ibeg)/3+1 : (tlen - node2send(iend))/3+1;
+    kputc_('|', &str);
+    kputw(aa_rbeg, &str);
+    kputs(hap->tref.s, &str);
+    if ( !(csq->type.type & CSQ_SYNONYMOUS_VARIANT) )
+    {
+        kputc_('>', &str);
+        kputw(aa_sbeg, &str);
+        kputs(hap->tseq.s, &str);
+    }
+    kputc_('|', &str);
+
+    // create the dna variant string and, in case of combined variants,
+    // insert silent CSQ_PRINTED_UPSTREAM variants
+    for (i=ibeg; i<=iend; i++)
+    {
+        if ( i>ibeg ) kputc_('+', &str);
+        kputw(node2rpos(i)+1, &str);
+        kputs(hap->stack[i].node->var, &str);
+    }
+    node->csq_list[icsq].type.vstr = str;
+    csq_push(args, node->csq_list+icsq, hap->stack[ref_node].node->rec);
+
+    for (i=ibeg; i<=iend; i++)
+    {
+        // csq are printed at one position only for combined variants, the rest is
+        // silent and references the first
+        if ( hap->stack[i].node->csq & ~CSQ_COMPOUND )
+        {
+            node->ncsq_list++;
+            hts_expand0(csq_t,node->ncsq_list,node->mcsq_list,node->csq_list);
+            csq_t *tmp_csq = &node->csq_list[node->ncsq_list - 1];
+            tmp_csq->pos  = hap->stack[i].node->rec->pos;
+            tmp_csq->type.trid    = tr->id;
+            tmp_csq->type.gene    = tr->gene->name;
+            tmp_csq->type.strand  = tr->strand;
+            tmp_csq->type.type    = hap->stack[i].node->csq & ~CSQ_COMPOUND & ~rm_csq;
+            tmp_csq->type.biotype = tr->type;
+            tmp_csq->type.vstr.l  = 0;
+            kputs(str.s,&tmp_csq->type.vstr);
+            csq_push(args, tmp_csq, hap->stack[i].node->rec);
+        }
+        if ( i!=ref_node && (node->csq_list[icsq].type.type & CSQ_COMPOUND || !(hap->stack[i].node->csq & ~CSQ_COMPOUND)) )
+        {
+            node->ncsq_list++;
+            hts_expand0(csq_t,node->ncsq_list,node->mcsq_list,node->csq_list);
+            csq_t *tmp_csq = &node->csq_list[node->ncsq_list - 1];
+            tmp_csq->pos  = hap->stack[i].node->rec->pos;
+            tmp_csq->type.trid    = tr->id;
+            tmp_csq->type.gene    = tr->gene->name;
+            tmp_csq->type.strand  = tr->strand;
+            tmp_csq->type.type    = CSQ_PRINTED_UPSTREAM | hap->stack[i].node->csq;
+            tmp_csq->type.biotype = tr->type;
+            tmp_csq->type.ref     = hap->stack[ref_node].node->rec;
+            tmp_csq->type.vstr.l  = 0;
+            csq_push(args, tmp_csq, hap->stack[i].node->rec);
+        }
+    }
+}
+
+void hap_finalize(args_t *args, hap_t *hap)
+{
+    tscript_t *tr = hap->tr;
+    if ( !tr->sref )
+        tscript_splice_ref(tr);
+
+    kstring_t sref;
+    sref.s = tr->sref;
+    sref.l = tr->nsref;
+    sref.m = sref.l;
+
+    int istack = 0;
+    hts_expand(hstack_t,1,hap->mstack,hap->stack);
+
+    hap->sseq.l = 0;
+    hap->tseq.l = 0;
+    hap->stack[0].node = tr->root;
+    hap->stack[0].ichild = -1;
+    hap->stack[0].slen = 0;
+    hap->stack[0].dlen = 0;
+
+    while ( istack>=0 )
+    {
+        hstack_t *stack  = &hap->stack[istack];
+        hap_node_t *node = hap->stack[istack].node;
+        while ( ++hap->stack[istack].ichild < node->nchild )
+        {
+            if ( node->child[stack->ichild] ) break;
+        }
+        if ( stack->ichild == node->nchild ) { istack--; continue; }
+
+        node = node->child[stack->ichild];
+
+        istack++;
+        hts_expand(hstack_t,istack+1,hap->mstack,hap->stack);
+        stack = &hap->stack[istack-1];
+
+        hap->stack[istack].node = node;
+        hap->stack[istack].ichild = -1;
+
+        hap->sseq.l = stack->slen;
+        if ( node->type==HAP_CDS ) kputs(node->seq, &hap->sseq);
+        hap->stack[istack].slen = hap->sseq.l;
+        hap->stack[istack].dlen = hap->stack[istack-1].dlen + node->dlen;
+
+        if ( !node->nend ) continue;    // not a leaf node
+
+        // The spliced sequence has been built for the current haplotype and stored
+        // in hap->sseq. Now we break it and output as independent parts
+        
+        kstring_t sseq;
+        sseq.m = sref.m - 2*N_REF_PAD + hap->stack[istack].dlen;  // total length of the spliced query transcript
+        hap->upstream_stop = 0;
+
+        int i = 1, dlen = 0, ibeg, indel = 0;
+        while ( i<istack && hap->stack[i].node->type == HAP_SSS ) i++;
+        hap->sbeg = hap->stack[i].node->sbeg;
+
+        if ( tr->strand==STRAND_FWD )
+        {
+            i = 0, ibeg = -1;
+            while ( ++i <= istack )
+            {
+                if ( hap->stack[i].node->type == HAP_SSS )
+                {
+                    // start/stop/splice site overlap: don't know how to build the haplotypes correctly, skipping
+                    hap_add_csq(args,hap,node,0,i,i,0,0);
+                    continue;
+                }
+                dlen += hap->stack[i].node->dlen;
+                if ( hap->stack[i].node->dlen ) indel = 1;
+                if ( i<istack )
+                {
+                    if ( dlen%3 )   // frameshift
+                    {
+                        if ( ibeg==-1 ) ibeg = i;
+                        continue;
+                    }
+                    int icur  = node2sbeg(i);
+                    int inext = node2sbeg(i+1);
+                    if ( icur/3 == inext/3 )    // in the same codon, can't be flushed yet
+                    {
+                        if ( ibeg==-1 ) ibeg = i;
+                        continue;
+                    }
+                }
+                if ( ibeg<0 ) ibeg = i;
+
+                int ioff = node2soff(ibeg);
+                int icur = node2sbeg(ibeg);
+                int rbeg = node2rbeg(ibeg);
+                int rend = node2rend(i);
+                int fill = dlen%3;
+
+                // alt
+                if ( hap->sseq.l )
+                {
+                    sseq.l = hap->stack[i].slen - ioff;
+                    sseq.s = hap->sseq.s + ioff;
+                }
+                else    // splice site overlap, see #1475227917
+                    sseq.l = fill = 0;
+                cds_translate(&sref, &sseq, icur,rbeg,rend, tr->strand, &hap->tseq, fill);
+
+                // ref
+                sseq.l = node2rend(i) - rbeg;
+                sseq.s = sref.s + N_REF_PAD + rbeg;
+                sseq.m = sref.m - 2*N_REF_PAD;
+                cds_translate(&sref, &sseq, rbeg,rbeg,rend, tr->strand, &hap->tref, fill);
+                sseq.m = sref.m - 2*N_REF_PAD + hap->stack[istack].dlen;
+
+                hap_add_csq(args,hap,node,0, ibeg,i,dlen,indel);
+                ibeg = -1;
+                dlen = 0;
+                indel = 0;
+            }
+        }
+        else
+        {
+            i = istack + 1, ibeg = -1;
+            while ( --i > 0 )
+            {
+                if ( hap->stack[i].node->type == HAP_SSS )
+                {
+                    hap_add_csq(args,hap,node,0,i,i,0,0);
+                    continue;
+                }
+                dlen += hap->stack[i].node->dlen;
+                if ( hap->stack[i].node->dlen ) indel = 1;
+                if ( i>1 && hap->stack[i-1].node->type != HAP_SSS )
+                {
+                    if ( dlen%3 )
+                    {
+                        if ( ibeg==-1 ) ibeg = i;
+                        continue;
+                    }
+                    int icur  = sseq.m - 1 - node2sbeg(i);
+                    int inext = sseq.m - 1 - node2sbeg(i-1);
+                    if ( icur/3 == inext/3 )
+                    {
+                        if ( ibeg==-1 ) ibeg = i;
+                        continue;
+                    }
+                }
+                if ( ibeg<0 ) ibeg = i;
+                int ioff = node2soff(i);
+                int icur = node2sbeg(i);
+                int rbeg = node2rbeg(i);
+                int rend = node2rend(ibeg);
+                int fill = dlen%3;
+
+                // alt
+                if ( hap->sseq.l )
+                {
+                    sseq.l = hap->stack[ibeg].slen - ioff;
+                    sseq.s = hap->sseq.s + ioff;
+                }
+                else    // splice site overlap, see #1475227917
+                    sseq.l = fill = 0;
+                cds_translate(&sref, &sseq, icur,rbeg,rend, tr->strand, &hap->tseq, fill);
+
+                // ref
+                sseq.l = node2rend(ibeg) - rbeg;
+                sseq.s = sref.s + N_REF_PAD + rbeg;
+                sseq.m = sref.m - 2*N_REF_PAD;
+                cds_translate(&sref, &sseq, rbeg,rbeg,rend, tr->strand, &hap->tref, fill);
+                sseq.m = sref.m - 2*N_REF_PAD + hap->stack[istack].dlen;
+
+                hap_add_csq(args,hap,node,sseq.m, i,ibeg,dlen,indel);
+                ibeg = -1;
+                dlen = 0;
+                indel = 0;
+            }
+        }
+    }
+}
+
+static inline void csq_print_text(args_t *args, csq_t *csq, int ismpl, int ihap)
+{
+    if ( csq->type.type & CSQ_PRINTED_UPSTREAM ) return;
+
+    char *smpl = ismpl >= 0 ? args->hdr->samples[ismpl] : "-";
+    const char *chr = bcf_hdr_id2name(args->hdr,args->rid);
+
+    fprintf(args->out,"CSQ\t%s\t", smpl);
+    if ( ihap>0 )
+        fprintf(args->out,"%d", ihap);
+    else
+        fprintf(args->out,"-");
+
+    args->str.l = 0;
+    kput_vcsq(&csq->type, &args->str);
+    fprintf(args->out,"\t%s\t%d\t%s\n",chr,csq->pos+1,args->str.s);
+}
+static inline void hap_print_text(args_t *args, tscript_t *tr, int ismpl, int ihap, hap_node_t *node)
+{
+    if ( !node || !node->ncsq_list ) return;
+
+    char *smpl = ismpl >= 0 ? args->hdr->samples[ismpl] : "-";
+    const char *chr = bcf_hdr_id2name(args->hdr,args->rid);
+
+    int i;
+    for (i=0; i<node->ncsq_list; i++)
+    {
+        csq_t *csq = node->csq_list + i;
+        if ( csq->type.type & CSQ_PRINTED_UPSTREAM ) continue;
+        assert( csq->type.vstr.l );
+
+        fprintf(args->out,"CSQ\t%s\t", smpl);
+        if ( ihap>0 )
+            fprintf(args->out,"%d", ihap);
+        else
+            fprintf(args->out,"-");
+
+        args->str.l = 0;
+        kput_vcsq(&csq->type, &args->str);
+        fprintf(args->out,"\t%s\t%d\t%s\n",chr,csq->pos+1,args->str.s);
+    }
+}
+
+static inline void hap_stage_vcf(args_t *args, tscript_t *tr, int ismpl, int ihap, hap_node_t *node)
+{
+    if ( !node || !node->ncsq_list || ismpl<0 ) return;
+
+    int i;
+    for (i=0; i<node->ncsq_list; i++)
+    {
+        csq_t *csq = node->csq_list + i;
+        vrec_t *vrec = csq->vrec;
+        int icsq = 2*csq->idx + ihap;
+        if ( icsq >= args->ncsq_max ) // more than ncsq_max consequences, so can't fit it in FMT
+        {
+            int print_warning = 1;
+            if ( args->quiet )
+            {
+                if ( args->quiet > 1 || args->ncsq_small_warned ) print_warning = 0;
+                args->ncsq_small_warned = 1;
+            }
+            if ( print_warning )
+            {
+                fprintf(pysam_stderr,"Warning: --ncsq %d is too small to annotate %s at %s:%d with %d-th csq\n",
+                        args->ncsq_max/2,args->hdr->samples[ismpl],bcf_hdr_id2name(args->hdr,args->rid),vrec->line->pos+1,csq->idx+1);
+                if ( args->quiet ) fprintf(pysam_stderr,"(This warning is printed only once)\n");
+            }
+            break;
+        }
+        if ( vrec->nfmt < 1 + icsq/32 ) vrec->nfmt = 1 + icsq/32;
+        vrec->smpl[ismpl*args->nfmt_bcsq + icsq/32] |= 1 << (icsq % 32);
+    }
+}
+
+void hap_flush(args_t *args, uint32_t pos)
+{
+    int i,j;
+    tr_heap_t *heap = args->active_tr;
+
+    while ( heap->ndat && heap->dat[0]->end<=pos )
+    {
+        tscript_t *tr = heap->dat[0];
+        khp_delete(trhp, heap);
+
+        args->hap->tr = tr;
+        if ( tr->root && tr->root->nchild ) // normal, non-localized calling
+        {
+            hap_finalize(args, args->hap);
+
+            if ( args->output_type==FT_TAB_TEXT )   // plain text output, not a vcf
+            {
+                if ( args->phase==PHASE_DROP_GT )
+                    hap_print_text(args, tr, -1,0, tr->hap[0]);
+                else
+                {
+                    for (i=0; i<args->smpl->n; i++)
+                    {
+                        for (j=0; j<2; j++)
+                            hap_print_text(args, tr, args->smpl->idx[i],j+1, tr->hap[i*2+j]);
+                    }
+                }
+            }
+            else if ( args->phase!=PHASE_DROP_GT )
+            {
+                for (i=0; i<args->smpl->n; i++)
+                {
+                    for (j=0; j<2; j++)
+                        hap_stage_vcf(args, tr, args->smpl->idx[i],j, tr->hap[i*2+j]);
+                }
+            }
+        }
+
+        // mark the transcript for deletion. Cannot delete it immediately because
+        // by-position VCF output will need them when flushed by vcf_buf_push
+        args->nrm_tr++;
+        hts_expand(tscript_t*,args->nrm_tr,args->mrm_tr,args->rm_tr);
+        args->rm_tr[args->nrm_tr-1] = tr;
+    }
+}
+
+#define SWAP(type_t, a, b) { type_t t = a; a = b; b = t; }
+
+void vbuf_push(args_t *args, bcf1_t **rec_ptr)
+{
+    int i;
+
+    assert(rec_ptr);
+    bcf1_t *rec = *rec_ptr;
+
+    // check for duplicate records
+    i = args->vcf_rbuf.n ? rbuf_last(&args->vcf_rbuf) : -1;
+    if ( i<0 || args->vcf_buf[i]->vrec[0]->line->pos!=rec->pos ) 
+    {
+        // vcf record with a new pos
+        rbuf_expand0(&args->vcf_rbuf, vbuf_t*, args->vcf_rbuf.n+1, args->vcf_buf);
+        i = rbuf_append(&args->vcf_rbuf);
+        if ( !args->vcf_buf[i] ) args->vcf_buf[i] = (vbuf_t*) calloc(1,sizeof(vbuf_t));
+        args->vcf_buf[i]->n = 0;
+    }
+    vbuf_t *vbuf = args->vcf_buf[i];
+    vbuf->n++;
+    hts_expand0(vrec_t*, vbuf->n, vbuf->m, vbuf->vrec);
+    if ( !vbuf->vrec[vbuf->n - 1] )
+        vbuf->vrec[vbuf->n - 1] = (vrec_t*) calloc(1,sizeof(vrec_t));
+
+    vrec_t *vrec = vbuf->vrec[vbuf->n - 1];
+    if ( args->phase!=PHASE_DROP_GT && args->smpl->n )
+    {
+        if ( !vrec->smpl ) vrec->smpl = (uint32_t*) calloc(args->hdr_nsmpl,sizeof(*vrec->smpl) * args->nfmt_bcsq);
+        else memset(vrec->smpl,0,args->hdr_nsmpl*sizeof(*vrec->smpl) * args->nfmt_bcsq);
+    }
+    if ( !vrec->line ) vrec->line = bcf_init1();
+    SWAP(bcf1_t*, (*rec_ptr), vrec->line);
+
+    int ret;
+    khint_t k = kh_put(pos2vbuf, args->pos2vbuf, (int)rec->pos, &ret);
+    kh_val(args->pos2vbuf,k) = vbuf;
+}
+
+void vbuf_flush(args_t *args)
+{
+    if ( args->active_tr->ndat ) return; // cannot output buffered VCF lines (args.vbuf) until all active transcripts are gone
+
+    int i,j;
+    while ( (i=rbuf_shift(&args->vcf_rbuf))>=0 )
+    {
+        vbuf_t *vbuf = args->vcf_buf[i];
+        for (i=0; i<vbuf->n; i++)
+        {
+            vrec_t *vrec = vbuf->vrec[i];
+            if ( !args->out_fh ) // not a VCF output
+            {
+                vrec->nvcsq = 0;
+                continue;
+            }
+            if ( !vrec->nvcsq )
+            {
+                bcf_write(args->out_fh, args->hdr, vrec->line);
+                continue;
+            }
+            
+            args->str.l = 0;
+            kput_vcsq(&vrec->vcsq[0], &args->str);
+            for (j=1; j<vrec->nvcsq; j++)
+            {
+                kputc_(',', &args->str);
+                kput_vcsq(&vrec->vcsq[j], &args->str);
+            }
+            bcf_update_info_string(args->hdr, vrec->line, args->bcsq_tag, args->str.s);
+            if ( args->hdr_nsmpl )
+            {
+                if ( vrec->nfmt < args->nfmt_bcsq )
+                    for (j=1; j<args->hdr_nsmpl; j++) memcpy(vrec->smpl+j*vrec->nfmt, vrec->smpl+j*args->nfmt_bcsq, vrec->nfmt*sizeof(*vrec->smpl));
+                bcf_update_format_int32(args->hdr, vrec->line, args->bcsq_tag, vrec->smpl, args->hdr_nsmpl*vrec->nfmt);
+            }
+            vrec->nvcsq = 0;
+            bcf_write(args->out_fh, args->hdr, vrec->line);
+        }
+        if ( vbuf->n )
+        {
+            khint_t k = kh_get(pos2vbuf, args->pos2vbuf, vbuf->vrec[0]->line->pos);
+            if ( k != kh_end(args->pos2vbuf) ) kh_del(pos2vbuf, args->pos2vbuf, k);
+        }
+        vbuf->n = 0;
+    }
+
+    for (i=0; i<args->nrm_tr; i++)
+    {
+        tscript_t *tr = args->rm_tr[i];
+        if ( tr->root ) hap_destroy(tr->root);
+        tr->root = NULL;
+        free(tr->hap);
+        free(tr->ref);
+        free(tr->sref);
+    }
+    args->nrm_tr = 0;
+    args->ncsq_buf = 0;
+}
+
+void tscript_init_ref(args_t *args, tscript_t *tr, const char *chr)
+{
+    int i, len;
+    int pad_beg = tr->beg >= N_REF_PAD ? N_REF_PAD : tr->beg;
+
+    tr->ref = faidx_fetch_seq(args->fai, chr, tr->beg - pad_beg, tr->end + N_REF_PAD, &len);
+    if ( !tr->ref )
+        error("faidx_fetch_seq failed %s:%d-%d\n", chr,tr->beg+1,tr->end+1);
+
+    int pad_end = len - (tr->end - tr->beg + 1 + pad_beg);
+    if ( pad_beg + pad_end != 2*N_REF_PAD )
+    {
+        char *ref = (char*) malloc(tr->end - tr->beg + 1 + 2*N_REF_PAD);
+        for (i=0; i < N_REF_PAD - pad_beg; i++) ref[i] = 'N';
+        memcpy(ref+i, tr->ref, len);
+        for (i=0; i < N_REF_PAD - pad_end; i++) ref[i+len] = 'N';
+        free(tr->ref);
+        tr->ref = ref;
+    }
+}
+
+static void sanity_check_ref(args_t *args, tscript_t *tr, bcf1_t *rec)
+{
+    char *ref = tr->ref + (rec->pos + N_REF_PAD >= tr->beg ? rec->pos - tr->beg + N_REF_PAD : 0);
+    char *vcf = rec->d.allele[0] + (rec->pos + N_REF_PAD >= tr->beg ? 0 : tr->beg - N_REF_PAD - rec->pos);
+    assert( vcf - rec->d.allele[0] < strlen(rec->d.allele[0]) );
+    while ( *ref && *vcf )
+    {
+        if ( *ref!=*vcf && toupper(*ref)!=toupper(*vcf) ) 
+            error("Error: the fasta reference does not match the VCF REF allele at %s:%d .. %s\n", bcf_seqname(args->hdr,rec),rec->pos+1,rec->d.allele[0]);
+        ref++;
+        vcf++;
+    }
+}
+
+int test_cds_local(args_t *args, bcf1_t *rec)
+{
+    int i,j, ret = 0;
+    const char *chr = bcf_seqname(args->hdr,rec);
+    // note that the off-by-one extension of rlen is deliberate to account for insertions
+    if ( !regidx_overlap(args->idx_cds,chr,rec->pos,rec->pos+rec->rlen, args->itr) ) return 0;
+
+    // structures to fake the normal test_cds machinery
+    hap_node_t root, node;
+    root.type  = HAP_ROOT;
+    kstring_t *tref = &args->hap->tref, *tseq = &args->hap->tseq;
+
+    while ( regitr_overlap(args->itr) )
+    {
+        gf_cds_t *cds = regitr_payload(args->itr,gf_cds_t*);
+        tscript_t *tr = cds->tr;
+        if ( !GF_is_coding(tr->type) ) continue;
+        ret = 1;
+
+        if ( !tr->ref )
+        {
+            tscript_init_ref(args, tr, chr);
+            tscript_splice_ref(tr);
+            khp_insert(trhp, args->active_tr, &tr);     // only to clean the reference afterwards
+        }
+
+        sanity_check_ref(args, tr, rec);
+
+        kstring_t sref;
+        sref.s = tr->sref;
+        sref.l = tr->nsref;
+        sref.m = sref.l;
+
+        for (i=1; i<rec->n_allele; i++)
+        {
+            if ( hap_init(args, &root, &node, cds, rec, i)!=0 ) continue;
+
+            csq_t csq; 
+            memset(&csq, 0, sizeof(csq_t));
+            csq.pos          = rec->pos;
+            csq.type.biotype = tr->type;
+            csq.type.strand  = tr->strand;
+            csq.type.trid    = tr->id;
+            csq.type.gene    = tr->gene->name;
+
+            int csq_type = node.csq;
+
+            // code repetition: it would be nice to reuse the code from hap_add_csq, needs have refactoring though
+            if ( node.type == HAP_SSS )
+            {
+                csq.type.type = csq_type;
+                csq_stage(args, &csq, rec);
+            }
+            else
+            {
+                kstring_t sseq;
+                sseq.m = sref.m - 2*N_REF_PAD + node.dlen;
+                sseq.s = node.seq;
+                int alen = sseq.l = strlen(sseq.s);
+                int fill = node.dlen%3 && alen ? 1 : 0; // see #1475227917
+                cds_translate(&sref, &sseq, node.sbeg,node.sbeg,node.sbeg+node.rlen, tr->strand, tseq, fill);
+
+                sseq.m = sref.m - 2*N_REF_PAD;
+                sseq.s = sref.s + N_REF_PAD + node.sbeg;
+                sseq.l = node.rlen;
+                cds_translate(&sref, &sseq, node.sbeg,node.sbeg,node.sbeg+node.rlen, tr->strand, tref, fill);
+
+                // check for truncating stops
+                for (j=0; j<tref->l; j++)
+                    if ( tref->s[j]=='*' ) break;
+                if ( j!=tref->l )
+                {
+                    tref->l = j+1;
+                    tref->s[j+1] = 0;
+                }
+                for (j=0; j<tseq->l; j++)
+                    if ( tseq->s[j]=='*' ) break;
+                if ( j!=tseq->l )
+                {
+                    tseq->l = j+1;
+                    tseq->s[j+1] = 0;
+                }
+                if ( csq_type & CSQ_STOP_LOST )
+                {
+                    if ( tref->s[tref->l-1]=='*' && tref->s[tref->l-1] == tseq->s[tseq->l-1] ) 
+                    {
+                        csq_type &= ~CSQ_STOP_LOST;
+                        csq_type |= CSQ_STOP_RETAINED;
+                    }
+                    else if (tref->s[tref->l-1]!='*' )
+                    {
+                        // This is CDS 3' incomplete ENSG00000173376/synon.vcf, can also be missense
+                        // We observe in real data a change to a stop, ENST00000528237/retained-stop-incomplete-cds.vcf
+                        if ( tseq->s[tseq->l-1] == '*' )
+                        {
+                            csq_type &= ~CSQ_STOP_GAINED;
+                            csq_type |= CSQ_STOP_RETAINED;
+                        }
+                        else
+                            csq_type |= CSQ_INCOMPLETE_CDS;
+                    }
+                }
+                if ( csq_type & CSQ_START_LOST && tref->s[0]!='M' )
+                    csq_type &= ~CSQ_START_LOST;
+                if ( node.dlen!=0 )
+                {
+                    if ( node.dlen%3 )
+                        csq_type |= CSQ_FRAMESHIFT_VARIANT;
+                    else if ( node.dlen<0 )
+                        csq_type |= CSQ_INFRAME_DELETION;
+                    else
+                        csq_type |= CSQ_INFRAME_INSERTION;
+                }
+                else
+                {
+                    for (j=0; j<tref->l; j++) 
+                        if ( tref->s[j] != tseq->s[j] ) break;
+                    if ( j==tref->l )
+                        csq_type |= CSQ_SYNONYMOUS_VARIANT;
+                    else if ( tref->s[j] ==  '*' )
+                        csq_type |= CSQ_STOP_LOST;
+                    else if ( tseq->s[j] ==  '*' )
+                        csq_type |= CSQ_STOP_GAINED;
+                    else
+                        csq_type |= CSQ_MISSENSE_VARIANT;
+                }
+                if ( csq_type & CSQ_COMPOUND )
+                {
+                    // create the aa variant string
+                    kstring_t str = {0,0,0};
+                    int aa_rbeg = tr->strand==STRAND_FWD ? node.sbeg/3+1 : (tr->nsref - 2*N_REF_PAD - node.sbeg - node.rlen)/3+1;
+                    int aa_sbeg = tr->strand==STRAND_FWD ? node.sbeg/3+1 : (tr->nsref - 2*N_REF_PAD + node.dlen - node.sbeg - alen)/3+1;
+                    kputc_('|', &str);
+                    kputw(aa_rbeg, &str);
+                    kputs(tref->s, &str);
+                    if ( !(csq_type & CSQ_SYNONYMOUS_VARIANT) )
+                    {
+                        kputc_('>', &str);
+                        kputw(aa_sbeg, &str);
+                        kputs(tseq->s, &str);
+                    }
+                    kputc_('|', &str);
+                    kputw(rec->pos+1, &str);
+                    kputs(node.var, &str);
+                    csq.type.vstr = str;
+                    csq.type.type = csq_type & CSQ_COMPOUND;
+                    csq_stage(args, &csq, rec);
+
+                    // all this only to clean vstr when vrec is flushed
+                    if ( !tr->root )
+                        tr->root = (hap_node_t*) calloc(1,sizeof(hap_node_t));
+                    tr->root->ncsq_list++;
+                    hts_expand0(csq_t,tr->root->ncsq_list,tr->root->mcsq_list,tr->root->csq_list);
+                    csq_t *rm_csq = tr->root->csq_list + tr->root->ncsq_list - 1;
+                    rm_csq->type.vstr = str;            
+                }
+                if ( csq_type & ~CSQ_COMPOUND )
+                {
+                    csq.type.type = csq_type & ~CSQ_COMPOUND;
+                    csq.type.vstr.l = 0;
+                    csq_stage(args, &csq, rec);
+                }
+            }
+            free(node.seq);
+            free(node.var);
+        }
+    }
+    return ret;
+}
+
+int test_cds(args_t *args, bcf1_t *rec)
+{
+    int i, ret = 0, hap_ret;
+    const char *chr = bcf_seqname(args->hdr,rec);
+    // note that the off-by-one extension of rlen is deliberate to account for insertions
+    if ( !regidx_overlap(args->idx_cds,chr,rec->pos,rec->pos+rec->rlen, args->itr) ) return 0;
+    while ( regitr_overlap(args->itr) )
+    {
+        gf_cds_t *cds = regitr_payload(args->itr,gf_cds_t*);
+        tscript_t *tr = cds->tr;
+        if ( !GF_is_coding(tr->type) ) continue;
+        ret = 1;
+        if ( !tr->root )
+        {
+            // initialize the transcript and its haplotype tree, fetch the reference sequence
+            tscript_init_ref(args, tr, chr);
+
+            tr->root = (hap_node_t*) calloc(1,sizeof(hap_node_t));
+            tr->nhap = args->phase==PHASE_DROP_GT ? 1 : 2*args->smpl->n;     // maximum ploidy = diploid
+            tr->hap  = (hap_node_t**) malloc(tr->nhap*sizeof(hap_node_t*));
+            for (i=0; i<tr->nhap; i++) tr->hap[i] = NULL;
+            tr->root->nend = tr->nhap;
+            tr->root->type = HAP_ROOT;
+
+            khp_insert(trhp, args->active_tr, &tr);
+        }
+
+        sanity_check_ref(args, tr, rec);
+
+        if ( args->phase==PHASE_DROP_GT )
+        {
+            if ( rec->d.allele[1][0]=='<' || rec->d.allele[1][0]=='*' ) { continue; }
+            hap_node_t *parent = tr->hap[0] ? tr->hap[0] : tr->root;
+            hap_node_t *child  = (hap_node_t*)calloc(1,sizeof(hap_node_t));
+            if ( (hap_ret=hap_init(args, parent, child, cds, rec, 1))!=0 )
+            {
+                // overlapping or intron variant, cannot apply
+                if ( hap_ret==1 )
+                {
+                    if ( !args->quiet )
+                        fprintf(pysam_stderr,"Warning: Skipping overlapping variants at %s:%d\t%s>%s\n", chr,rec->pos+1,rec->d.allele[0],rec->d.allele[1]);
+                    if ( args->out ) 
+                        fprintf(args->out,"LOG\tWarning: Skipping overlapping variants at %s:%d\t%s>%s\n", chr,rec->pos+1,rec->d.allele[0],rec->d.allele[1]);
+                }
+                else ret = 1;   // prevent reporting as intron in test_tscript
+                free(child);
+                continue;
+            }
+            parent->nend--;
+            parent->nchild = 1;
+            parent->mchild = 1;
+            parent->child  = (hap_node_t**) malloc(sizeof(hap_node_t*));
+            parent->child[0] = child;
+            tr->hap[0] = child;
+            tr->hap[0]->nend = 1;
+            continue;
+        }
+
+        // apply the VCF variants and extend the haplotype tree
+        int j, ismpl, ihap, ngts = bcf_get_genotypes(args->hdr, rec, &args->gt_arr, &args->mgt_arr);
+        ngts /= bcf_hdr_nsamples(args->hdr);
+        if ( ngts!=1 && ngts!=2 ) 
+        {
+            if ( !args->quiet )
+                fprintf(pysam_stderr,"Warning: Skipping site with non-diploid/non-haploid genotypes at %s:%d\t%s>%s\n", chr,rec->pos+1,rec->d.allele[0],rec->d.allele[1]);
+            if ( args->out ) 
+                fprintf(args->out,"LOG\tWarning: Skipping site with non-diploid/non-haploid genotypes at %s:%d\t%s>%s\n", chr,rec->pos+1,rec->d.allele[0],rec->d.allele[1]);
+            continue;
+        }
+        for (ismpl=0; ismpl<args->smpl->n; ismpl++)
+        {
+            int32_t *gt = args->gt_arr + args->smpl->idx[ismpl]*ngts;
+            if ( gt[0]==bcf_gt_missing ) continue;
+
+            if ( ngts>1 && gt[0]!=gt[1] && gt[1]!=bcf_gt_missing && gt[1]!=bcf_int32_vector_end )
+            {
+                if ( args->phase==PHASE_MERGE )
+                {
+                    if ( !bcf_gt_allele(gt[0]) ) gt[0] = gt[1];
+                }
+                if ( !bcf_gt_is_phased(gt[0]) && !bcf_gt_is_phased(gt[1]) )
+                {
+                    if ( args->phase==PHASE_REQUIRE )
+                        error("Unphased genotype at %s:%d, sample %s. See the --phase option.\n", chr,rec->pos+1,args->hdr->samples[args->smpl->idx[ismpl]]);
+                    if ( args->phase==PHASE_SKIP )
+                        continue;
+                    if ( args->phase==PHASE_NON_REF )
+                    {
+                        if ( !bcf_gt_allele(gt[0]) ) gt[0] = gt[1];
+                        else if ( !bcf_gt_allele(gt[1]) ) gt[1] = gt[0];
+                    }
+                }
+            }
+
+            for (ihap=0; ihap<ngts; ihap++)
+            {
+                if ( gt[ihap]==bcf_gt_missing || gt[ihap]==bcf_int32_vector_end ) continue;
+
+                i = 2*ismpl + ihap;
+
+                int ial = bcf_gt_allele(gt[ihap]);
+                if ( !ial ) continue;
+                assert( ial < rec->n_allele );
+                if ( rec->d.allele[ial][0]=='<' || rec->d.allele[ial][0]=='*' ) { continue; }
+
+                hap_node_t *parent = tr->hap[i] ? tr->hap[i] : tr->root;
+                if ( parent->cur_rec==rec && parent->cur_child[ial]>=0 )
+                {
+                    // this haplotype has been seen in another sample
+                    tr->hap[i] = parent->child[ parent->cur_child[ial] ];
+                    tr->hap[i]->nend++;
+                    parent->nend--;
+                    continue;
+                }
+
+                hap_node_t *child = (hap_node_t*)calloc(1,sizeof(hap_node_t));
+                if ( (hap_ret=hap_init(args, parent, child, cds, rec, ial))!=0 )
+                {
+                    // overlapping or intron variant, cannot apply
+                    if ( hap_ret==1 )
+                    {
+                        if ( !args->quiet )
+                            fprintf(pysam_stderr,"Warning: Skipping overlapping variants at %s:%d, sample %s\t%s>%s\n",
+                                    chr,rec->pos+1,args->hdr->samples[args->smpl->idx[ismpl]],rec->d.allele[0],rec->d.allele[ial]);
+                        if ( args->out  )
+                            fprintf(args->out,"LOG\tWarning: Skipping overlapping variants at %s:%d, sample %s\t%s>%s\n",
+                                    chr,rec->pos+1,args->hdr->samples[args->smpl->idx[ismpl]],rec->d.allele[0],rec->d.allele[ial]);
+                    }
+                    free(child);
+                    continue;
+                }
+
+                if ( parent->cur_rec!=rec )
+                {
+                    hts_expand(int,rec->n_allele,parent->mcur_child,parent->cur_child);
+                    for (j=0; j<rec->n_allele; j++) parent->cur_child[j] = -1;
+                    parent->cur_rec = rec;
+                }
+
+                j = parent->nchild++;
+                hts_expand0(hap_node_t*,parent->nchild,parent->mchild,parent->child);
+                parent->cur_child[ial] = j;
+                parent->child[j] = child;
+                tr->hap[i] = child;
+                tr->hap[i]->nend++;
+                parent->nend--;
+            }
+        }
+    }
+    return ret;
+}
+
+void csq_stage(args_t *args, csq_t *csq, bcf1_t *rec)
+{
+    // known issues: tab output leads to unsorted output. This is because
+    // coding haplotypes are printed in one go and buffering is not used
+    // with tab output. VCF output is OK though.
+    if ( csq_push(args, csq, rec)!=0 ) return;    // the consequence already exists
+
+    int i,j,ngt = 0;
+    if ( args->phase!=PHASE_DROP_GT )
+    {
+        ngt = bcf_get_genotypes(args->hdr, rec, &args->gt_arr, &args->mgt_arr);
+        if ( ngt>0 ) ngt /= bcf_hdr_nsamples(args->hdr);
+    }
+    if ( ngt<=0 )
+    {
+        if ( args->output_type==FT_TAB_TEXT )
+            csq_print_text(args, csq, -1,0);
+        return;
+    }
+    assert( ngt<=2 );
+
+    if ( args->output_type==FT_TAB_TEXT )
+    {
+        for (i=0; i<args->smpl->n; i++)
+        {
+            int32_t *gt = args->gt_arr + args->smpl->idx[i]*ngt;
+            for (j=0; j<ngt; j++)
+            {
+                if ( gt[j]==bcf_gt_missing || gt[j]==bcf_int32_vector_end || !bcf_gt_allele(gt[j]) ) continue;
+                csq_print_text(args, csq, args->smpl->idx[i],j+1);
+            }
+        }
+        return;
+    }
+
+    vrec_t *vrec = csq->vrec;
+    for (i=0; i<args->smpl->n; i++)
+    {
+        int32_t *gt = args->gt_arr + args->smpl->idx[i]*ngt;
+        for (j=0; j<ngt; j++)
+        {
+            if ( gt[j]==bcf_gt_missing || gt[j]==bcf_int32_vector_end || !bcf_gt_allele(gt[j]) ) continue;
+
+            int icsq = 2*csq->idx + j;
+            if ( icsq >= args->ncsq_max ) // more than ncsq_max consequences, so can't fit it in FMT
+            {
+                int ismpl = args->smpl->idx[i];
+                int print_warning = 1;
+                if ( args->quiet )
+                {
+                    if ( args->quiet > 1 || args->ncsq_small_warned ) print_warning = 0;
+                    args->ncsq_small_warned = 1;
+                }
+                if ( print_warning )
+                {
+                    fprintf(pysam_stderr,"Warning: --ncsq %d is too small to annotate %s at %s:%d with %d-th csq\n",
+                            args->ncsq_max/2,args->hdr->samples[ismpl],bcf_hdr_id2name(args->hdr,args->rid),vrec->line->pos+1,csq->idx+1);
+                    if ( args->quiet ) fprintf(pysam_stderr,"(This warning is printed only once)\n");
+                }
+                break;
+            }
+            if ( vrec->nfmt < 1 + icsq/32 ) vrec->nfmt = 1 + icsq/32;
+            vrec->smpl[i*args->nfmt_bcsq + icsq/32] |= 1 << (icsq % 32);
+        }
+    }
+}
+int test_utr(args_t *args, bcf1_t *rec)
+{
+    const char *chr = bcf_seqname(args->hdr,rec);
+    // note that the off-by-one extension of rlen is deliberate to account for insertions
+    if ( !regidx_overlap(args->idx_utr,chr,rec->pos,rec->pos+rec->rlen, args->itr) ) return 0;
+
+    splice_t splice;
+    splice_init(&splice, rec);
+
+    int i, ret = 0;
+    while ( regitr_overlap(args->itr) )
+    {
+        gf_utr_t *utr = regitr_payload(args->itr, gf_utr_t*);
+        tscript_t *tr = splice.tr = utr->tr;
+        for (i=1; i<rec->n_allele; i++)
+        {
+            if ( rec->d.allele[1][0]=='<' || rec->d.allele[1][0]=='*' ) { continue; }
+            splice.vcf.alt = rec->d.allele[i];
+            int splice_ret = splice_csq(args, &splice, utr->beg, utr->end);
+            if ( splice_ret!=SPLICE_INSIDE && splice_ret!=SPLICE_OVERLAP ) continue;
+            csq_t csq; 
+            memset(&csq, 0, sizeof(csq_t));
+            csq.pos          = rec->pos;
+            csq.type.type    = utr->which==prime5 ? CSQ_UTR5 : CSQ_UTR3;
+            csq.type.biotype = tr->type;
+            csq.type.strand  = tr->strand;
+            csq.type.trid    = tr->id;
+            csq.type.gene    = tr->gene->name;
+            csq_stage(args, &csq, rec);
+            ret = 1;
+        }
+    }
+    assert(!splice.kref.s);
+    assert(!splice.kalt.s);
+    return ret;
+}
+int test_splice(args_t *args, bcf1_t *rec)
+{
+    const char *chr = bcf_seqname(args->hdr,rec);
+    if ( !regidx_overlap(args->idx_exon,chr,rec->pos,rec->pos + rec->rlen, args->itr) ) return 0;
+
+    splice_t splice;
+    splice_init(&splice, rec);
+    splice.check_acceptor = splice.check_donor = 1;
+
+    int i, ret = 0;
+    while ( regitr_overlap(args->itr) )
+    {
+        gf_exon_t *exon = regitr_payload(args->itr, gf_exon_t*);
+        splice.tr = exon->tr;
+        if ( !splice.tr->ncds ) continue;  // not a coding transcript, no interest in splice sites
+
+        splice.check_region_beg = splice.tr->beg==exon->beg ? 0 : 1;
+        splice.check_region_end = splice.tr->end==exon->end ? 0 : 1;
+
+        for (i=1; i<rec->n_allele; i++)
+        {
+            if ( rec->d.allele[1][0]=='<' || rec->d.allele[1][0]=='*' ) { continue; }
+            splice.vcf.alt = rec->d.allele[i];
+            splice_csq(args, &splice, exon->beg, exon->end);
+            if ( splice.csq ) ret = 1;
+        }
+    }
+    free(splice.kref.s);
+    free(splice.kalt.s);
+    return ret;
+}
+int test_tscript(args_t *args, bcf1_t *rec)
+{
+    const char *chr = bcf_seqname(args->hdr,rec);
+    if ( !regidx_overlap(args->idx_tscript,chr,rec->pos,rec->pos+rec->rlen, args->itr) ) return 0;
+
+    splice_t splice;
+    splice_init(&splice, rec);
+
+    int i, ret = 0;
+    while ( regitr_overlap(args->itr) )
+    {
+        tscript_t *tr = splice.tr = regitr_payload(args->itr, tscript_t*);
+        for (i=1; i<rec->n_allele; i++)
+        {
+            if ( rec->d.allele[1][0]=='<' || rec->d.allele[1][0]=='*' ) { continue; }
+            splice.vcf.alt = rec->d.allele[i];
+            int splice_ret = splice_csq(args, &splice, tr->beg, tr->end);
+            if ( splice_ret!=SPLICE_INSIDE && splice_ret!=SPLICE_OVERLAP ) continue;    // SPLICE_OUTSIDE or SPLICE_REF
+            csq_t csq; 
+            memset(&csq, 0, sizeof(csq_t));
+            csq.pos          = rec->pos;
+            csq.type.type    = GF_is_coding(tr->type) ? CSQ_INTRON : CSQ_NON_CODING;
+            csq.type.biotype = tr->type;
+            csq.type.strand  = tr->strand;
+            csq.type.trid    = tr->id;
+            csq.type.gene    = tr->gene->name;
+            csq_stage(args, &csq, rec);
+            ret = 1;
+        }
+    }
+    assert(!splice.kref.s);
+    assert(!splice.kalt.s);
+    return ret;
+}
+
+void process(args_t *args, bcf1_t **rec_ptr)
+{
+    if ( !rec_ptr )
+    {
+        hap_flush(args, REGIDX_MAX);
+        vbuf_flush(args);
+        return;
+    }
+
+    bcf1_t *rec = *rec_ptr;
+
+    int call_csq = 1;
+    if ( !rec->n_allele ) call_csq = 0;   // no alternate allele
+    else if ( rec->n_allele==2 && (rec->d.allele[1][0]=='<' || rec->d.allele[1][0]=='*') ) call_csq = 0;     // gVCF, no alt allele
+    else if ( args->filter )
+    {
+        call_csq = filter_test(args->filter, rec, NULL);
+        if ( args->filter_logic==FLT_EXCLUDE ) call_csq = call_csq ? 0 : 1;
+    }
+    if ( !call_csq )
+    {
+        if ( !args->out_fh ) return;    // not a VCF output
+        vbuf_push(args, rec_ptr);
+        vbuf_flush(args);
+        return;
+    }
+
+    if ( args->rid != rec->rid ) 
+    {
+        hap_flush(args, REGIDX_MAX);
+        vbuf_flush(args);
+    }
+    args->rid = rec->rid;
+    vbuf_push(args, rec_ptr);
+
+    int hit = args->local_csq ? test_cds_local(args, rec) : test_cds(args, rec);
+    hit += test_utr(args, rec);
+    hit += test_splice(args, rec);
+    if ( !hit ) test_tscript(args, rec);
+
+    hap_flush(args, rec->pos-1);
+    vbuf_flush(args);
+
+    return;
+}
+
+const char *usage(void)
+{
+    return 
+        "\n"
+        "About: Haplotype-aware consequence caller.\n"
+        "Usage: bcftools csq [options] in.vcf\n"
+        "\n"
+        "Required options:\n"
+        "   -f, --fasta-ref <file>          reference file in fasta format\n"
+        "   -g, --gff-annot <file>          gff3 annotation file\n"
+        "\n"
+        "CSQ options:\n"
+        "   -c, --custom-tag <string>       use this tag instead of the default BCSQ\n"
+        "   -l, --local-csq                 localized predictions, consider only one VCF record at a time\n"
+        "   -n, --ncsq <int>                maximum number of consequences to consider per site [16]\n"
+        "   -p, --phase <a|m|r|R|s>         how to construct haplotypes and how to deal with unphased data: [r]\n"
+        "                                     a: take GTs as is, create haplotypes regardless of phase (0/1 -> 0|1)\n"
+        "                                     m: merge *all* GTs into a single haplotype (0/1 -> 1, 1/2 -> 1)\n"
+        "                                     r: require phased GTs, throw an error on unphased het GTs\n"
+        "                                     R: create non-reference haplotypes if possible (0/1 -> 1|1, 1/2 -> 1|2)\n"
+        "                                     s: skip unphased GTs\n"
+        "Options:\n"
+        "   -e, --exclude <expr>            exclude sites for which the expression is true\n"
+        "   -i, --include <expr>            select sites for which the expression is true\n"
+        "   -o, --output <file>             write output to a file [standard output]\n"
+        "   -O, --output-type <b|u|z|v|t>   b: compressed BCF, u: uncompressed BCF, z: compressed VCF\n"
+        "                                   v: uncompressed VCF, t: plain tab-delimited text output [v]\n"
+        "   -q, --quiet                     suppress warning messages. Can be given two times for even less messages\n"
+        "   -r, --regions <region>          restrict to comma-separated list of regions\n"
+        "   -R, --regions-file <file>       restrict to regions listed in a file\n"
+        "   -s, --samples <-|list>          samples to include or \"-\" to apply all variants and ignore samples\n"
+        "   -S, --samples-file <file>       samples to include\n"
+        "   -t, --targets <region>          similar to -r but streams rather than index-jumps\n"
+        "   -T, --targets-file <file>       similar to -R but streams rather than index-jumps\n"
+        "\n"
+        "Example:\n"
+        "   bcftools csq -f hs37d5.fa -g Homo_sapiens.GRCh37.82.gff3.gz in.vcf\n"
+        "\n"
+        "   # GFF3 annotation files can be downloaded from Ensembl. e.g. for human:\n"
+        "   ftp://ftp.ensembl.org/pub/current_gff3/homo_sapiens/\n"
+        "   ftp://ftp.ensembl.org/pub/grch37/release-84/gff3/homo_sapiens/\n"
+        "\n";
+}
+
+int main_csq(int argc, char *argv[])
+{
+    args_t *args = (args_t*) calloc(1,sizeof(args_t));
+    args->argc = argc; args->argv = argv;
+    args->output_type = FT_VCF;
+    args->bcsq_tag = "BCSQ";
+    args->ncsq_max = 2*16;
+
+    static struct option loptions[] =
+    {
+        {"help",0,0,'h'},
+        {"ncsq",1,0,'n'},
+        {"custom-tag",1,0,'c'},
+        {"local-csq",0,0,'l'},
+        {"gff-annot",1,0,'g'},
+        {"fasta-ref",1,0,'f'},
+        {"include",1,0,'i'},
+        {"exclude",1,0,'e'},
+        {"output",1,0,'o'},
+        {"output-type",1,NULL,'O'},
+        {"phase",1,0,'p'},
+        {"quiet",0,0,'q'},
+        {"regions",1,0,'r'},
+        {"regions-file",1,0,'R'},
+        {"samples",1,0,'s'},
+        {"samples-file",1,0,'S'},
+        {"targets",1,0,'t'},
+        {"targets-file",1,0,'T'},
+        {0,0,0,0}
+    };
+    int c, targets_is_file = 0, regions_is_file = 0; 
+    char *targets_list = NULL, *regions_list = NULL;
+    while ((c = getopt_long(argc, argv, "?hr:R:t:T:i:e:f:o:O:g:s:S:p:qc:ln:",loptions,NULL)) >= 0)
+    {
+        switch (c) 
+        {
+            case 'l': args->local_csq = 1; break;
+            case 'c': args->bcsq_tag = optarg; break;
+            case 'q': args->quiet++; break;
+            case 'p':
+                switch (optarg[0]) 
+                {
+                    case 'a': args->phase = PHASE_AS_IS; break;
+                    case 'm': args->phase = PHASE_MERGE; break;
+                    case 'r': args->phase = PHASE_REQUIRE; break;
+                    case 'R': args->phase = PHASE_NON_REF; break;
+                    case 's': args->phase = PHASE_SKIP; break;
+                    default: error("The -p code \"%s\" not recognised\n", optarg);
+                }
+                break;
+            case 'f': args->fa_fname = optarg; break;
+            case 'g': args->gff_fname = optarg; break;
+            case 'n': 
+                args->ncsq_max = 2 * atoi(optarg);
+                if ( args->ncsq_max <=0 ) error("Expected positive integer with -n, got %s\n", optarg);
+                break;
+            case 'o': args->output_fname = optarg; break;
+            case 'O':
+                      switch (optarg[0]) {
+                          case 't': args->output_type = FT_TAB_TEXT; break;
+                          case 'b': args->output_type = FT_BCF_GZ; break;
+                          case 'u': args->output_type = FT_BCF; break;
+                          case 'z': args->output_type = FT_VCF_GZ; break;
+                          case 'v': args->output_type = FT_VCF; break;
+                          default: error("The output type \"%s\" not recognised\n", optarg);
+                      }
+                      break;
+            case 'e': args->filter_str = optarg; args->filter_logic |= FLT_EXCLUDE; break;
+            case 'i': args->filter_str = optarg; args->filter_logic |= FLT_INCLUDE; break;
+            case 'r': regions_list = optarg; break;
+            case 'R': regions_list = optarg; regions_is_file = 1; break;
+            case 's': args->sample_list = optarg; break;
+            case 'S': args->sample_list = optarg; args->sample_is_file = 1; break;
+            case 't': targets_list = optarg; break;
+            case 'T': targets_list = optarg; targets_is_file = 1; break;
+            case 'h':
+            case '?': error("%s",usage());
+            default: error("The option not recognised: %s\n\n", optarg); break;
+        }
+    }
+    char *fname = NULL;
+    if ( optind==argc )
+    {
+        if ( !isatty(fileno((FILE *)stdin)) ) fname = "-";  // reading from stdin
+        else error("%s", usage());
+    }
+    else fname = argv[optind];
+    if ( argc - optind>1 ) error("%s", usage());
+    if ( !args->fa_fname ) error("Missing the --fa-ref option\n");
+    if ( !args->gff_fname ) error("Missing the --gff option\n");
+    args->sr = bcf_sr_init();
+    if ( targets_list && bcf_sr_set_targets(args->sr, targets_list, targets_is_file, 0)<0 )
+        error("Failed to read the targets: %s\n", targets_list);
+    if ( regions_list && bcf_sr_set_regions(args->sr, regions_list, regions_is_file)<0 )
+        error("Failed to read the regions: %s\n", regions_list);
+    if ( !bcf_sr_add_reader(args->sr, fname) )
+        error("Failed to open %s: %s\n", fname,bcf_sr_strerror(args->sr->errnum));
+    args->hdr = bcf_sr_get_header(args->sr,0);
+
+    init_data(args);
+    while ( bcf_sr_next_line(args->sr) )
+    {
+        process(args, &args->sr->readers[0].buffer[0]);
+    }
+    process(args,NULL);
+
+    destroy_data(args);
+    bcf_sr_destroy(args->sr);
+    free(args);
+
+    return 0;
+}
+
diff --git a/bcftools/filter.c b/bcftools/filter.c
index c56ae6d..463028f 100644
--- a/bcftools/filter.c
+++ b/bcftools/filter.c
@@ -24,6 +24,7 @@ THE SOFTWARE.  */
 
 #include <ctype.h>
 #include <stdlib.h>
+#include <strings.h>
 #include <errno.h>
 #include <math.h>
 #include <wordexp.h>
@@ -34,13 +35,37 @@ THE SOFTWARE.  */
 #include <htslib/hts_defs.h>
 #include <htslib/vcfutils.h>
 
+#ifndef __FUNCTION__
+#  define __FUNCTION__ __func__
+#endif
+
+uint64_t bcf_double_missing    = 0x7ff0000000000001;
+uint64_t bcf_double_vector_end = 0x7ff0000000000002;
+static inline void bcf_double_set(double *ptr, uint64_t value)
+{
+    union { uint64_t i; double d; } u;
+    u.i = value;
+    *ptr = u.d;
+}
+static inline int bcf_double_test(double d, uint64_t value)
+{
+    union { uint64_t i; double d; } u;
+    u.d = d;
+    return u.i==value ? 1 : 0;
+}
+#define bcf_double_set_vector_end(x) bcf_double_set(&(x),bcf_double_vector_end)
+#define bcf_double_set_missing(x)    bcf_double_set(&(x),bcf_double_missing)
+#define bcf_double_is_vector_end(x)  bcf_double_test((x),bcf_double_vector_end)
+#define bcf_double_is_missing(x)     bcf_double_test((x),bcf_double_missing)
+
+
 typedef struct _token_t
 {
     // read-only values, same for all VCF lines
     int tok_type;       // one of the TOK_* keys below
     char *key;          // set only for string constants, otherwise NULL
     char *tag;          // for debugging and printout only, VCF tag name
-    float threshold;    // filtering threshold
+    double threshold;   // filtering threshold
     int hdr_id, type;   // BCF header lookup ID and one of BCF_HT_* types
     int idx;            // 0-based index to VCF vectors, -1: not a vector, -2: any field ([*])
     void (*setter)(filter_t *, bcf1_t *, struct _token_t *);
@@ -49,7 +74,7 @@ typedef struct _token_t
     regex_t *regex;     // precompiled regex for string comparison
 
     // modified on filter evaluation at each VCF line
-    float *values;      // In case str_value is set, values[0] is one sample's string length
+    double *values;     // In case str_value is set, values[0] is one sample's string length
     char *str_value;    //  and values[0]*nsamples gives the total length;
     int is_str, is_missing; // is_missing is set only for constants, variables are controled via nvalues
     int pass_site;          // -1 not applicable, 0 fails, >0 pass
@@ -67,7 +92,8 @@ struct _filter_t
     int nfilters;
     token_t *filters, **flt_stack;  // filtering input tokens (in RPN) and evaluation stack
     int32_t *tmpi;
-    int max_unpack, mtmpi, nsamples;
+    float   *tmpf;
+    int max_unpack, mtmpi, mtmpf, nsamples;
 };
 
 
@@ -221,13 +247,15 @@ static void filters_set_qual(filter_t *flt, bcf1_t *line, token_t *tok)
         tok->nvalues = 0;
     else
     {
-        tok->values[0] = line->qual;
+        tok->values[0] = (double)line->qual;
         tok->nvalues = 1;
     }
 }
 static void filters_set_type(filter_t *flt, bcf1_t *line, token_t *tok)
 {
     tok->values[0] = bcf_get_variant_types(line);
+    if ( !tok->values[0] ) tok->values[0] = 1;      // mistake in htslib: VCF_* should start with 1
+    else tok->values[0] = ((int)tok->values[0]) << 1;
     tok->nvalues = 1;
 }
 static void filters_set_info(filter_t *flt, bcf1_t *line, token_t *tok)
@@ -272,6 +300,13 @@ static void filters_set_info(filter_t *flt, bcf1_t *line, token_t *tok)
         tok->str_value = NULL;
     }
 }
+static int filters_cmp_bit_and(token_t *atok, token_t *btok, int op_type, bcf1_t *line)
+{
+    int a = (int)(atok->nvalues?atok->values[0]:atok->threshold);
+    int b = (int)(btok->nvalues?btok->values[0]:btok->threshold);
+    if ( op_type==TOK_LIKE ) return a&b ? 1 : 0;
+    return a&b ? 0 : 1;
+}
 static int filters_cmp_filter(token_t *atok, token_t *btok, int op_type, bcf1_t *line)
 {
     int i;
@@ -316,7 +351,7 @@ static int filters_cmp_id(token_t *atok, token_t *btok, int op_type, bcf1_t *lin
 }
 
 /**
- *  bcf_get_info_value() - get single INFO value, int or float
+ *  bcf_get_info_value() - get single INFO value, int64_t or double
  *  @line:      BCF line
  *  @info_id:   tag ID, as returned by bcf_hdr_id2int
  *  @ivec:      0-based index to retrieve, -1 when single value is expected
@@ -336,8 +371,8 @@ static int bcf_get_info_value(bcf1_t *line, int info_id, int ivec, void *value)
     bcf_info_t *info = &line->d.info[j];
     if ( info->len == 1 )
     {
-        if ( info->type==BCF_BT_FLOAT ) *((float*)value) = info->v1.f;
-        else if ( info->type==BCF_BT_INT8 || info->type==BCF_BT_INT16 || info->type==BCF_BT_INT32 ) *((int*)value) = info->v1.i;
+        if ( info->type==BCF_BT_FLOAT ) *((double*)value) = info->v1.f;
+        else if ( info->type==BCF_BT_INT8 || info->type==BCF_BT_INT16 || info->type==BCF_BT_INT32 ) *((int64_t*)value) = info->v1.i;
         return 1;
     }
 
@@ -354,10 +389,10 @@ static int bcf_get_info_value(bcf1_t *line, int info_id, int ivec, void *value)
         return 1; \
     }
     switch (info->type) {
-        case BCF_BT_INT8:  BRANCH(int8_t,  p[j]==bcf_int8_missing,  p[j]==bcf_int8_vector_end,  int); break;
-        case BCF_BT_INT16: BRANCH(int16_t, p[j]==bcf_int16_missing, p[j]==bcf_int16_vector_end, int); break;
-        case BCF_BT_INT32: BRANCH(int32_t, p[j]==bcf_int32_missing, p[j]==bcf_int32_vector_end, int); break;
-        case BCF_BT_FLOAT: BRANCH(float,   bcf_float_is_missing(p[j]), bcf_float_is_vector_end(p[j]), float); break;
+        case BCF_BT_INT8:  BRANCH(int8_t,  p[j]==bcf_int8_missing,  p[j]==bcf_int8_vector_end,  int64_t); break;
+        case BCF_BT_INT16: BRANCH(int16_t, p[j]==bcf_int16_missing, p[j]==bcf_int16_vector_end, int64_t); break;
+        case BCF_BT_INT32: BRANCH(int32_t, p[j]==bcf_int32_missing, p[j]==bcf_int32_vector_end, int64_t); break;
+        case BCF_BT_FLOAT: BRANCH(float,   bcf_float_is_missing(p[j]), bcf_float_is_vector_end(p[j]), double); break;
         default: fprintf(stderr,"todo: type %d\n", info->type); exit(1); break;
     }
     #undef BRANCH
@@ -374,14 +409,18 @@ static void filters_set_info_int(filter_t *flt, bcf1_t *line, token_t *tok)
 {
     if ( tok->idx==-2 )
     {
-        int i, n = bcf_get_info_int32(flt->hdr,line,tok->tag,&flt->tmpi,&flt->mtmpi);
-        tok->nvalues = n;
-        hts_expand(float,n,tok->mvalues,tok->values);
-        for (i=0; i<n; i++) tok->values[i] = flt->tmpi[i];
+        int i;
+        tok->nvalues = bcf_get_info_int32(flt->hdr,line,tok->tag,&flt->tmpi,&flt->mtmpi);
+        if ( tok->nvalues<=0 ) tok->nvalues = 0;
+        else
+        {
+            hts_expand(double,tok->nvalues,tok->mvalues,tok->values);
+            for (i=0; i<tok->nvalues; i++) tok->values[i] = flt->tmpi[i];
+        }
     }
     else
     {
-        int32_t value;
+        int64_t value;
         if ( bcf_get_info_value(line,tok->hdr_id,tok->idx,&value) <= 0 )
             tok->nvalues = 0;
         else
@@ -396,12 +435,20 @@ static void filters_set_info_float(filter_t *flt, bcf1_t *line, token_t *tok)
 {
     if ( tok->idx==-2 )
     {
-        tok->nvalues = bcf_get_info_float(flt->hdr,line,tok->tag,&tok->values,&tok->mvalues);
-        if ( tok->nvalues<0 ) tok->nvalues = 0;
+        int i;
+        tok->nvalues = bcf_get_info_float(flt->hdr,line,tok->tag,&flt->tmpf,&flt->mtmpf);
+        if ( tok->nvalues<=0 ) tok->nvalues = 0;
+        else
+        {
+            hts_expand(double,tok->nvalues,tok->mvalues,tok->values);
+            for (i=0; i<tok->nvalues; i++)
+                if ( bcf_float_is_missing(flt->tmpf[i]) ) bcf_double_set_missing(tok->values[i]);
+                else tok->values[i] = flt->tmpf[i];
+        }
     }
     else
     {
-        float value;
+        double value;
         if ( bcf_get_info_value(line,tok->hdr_id,tok->idx,&value) <= 0 )
             tok->nvalues = 0;
         else
@@ -460,11 +507,11 @@ static void filters_set_format_int(filter_t *flt, bcf1_t *line, token_t *tok)
     else
     {
         int is_missing = 1;
-        hts_expand(float,tok->nvalues,tok->mvalues,tok->values);
+        hts_expand(double,tok->nvalues,tok->mvalues,tok->values);
         for (i=0; i<tok->nvalues; i++)
         {
             if ( flt->tmpi[i]==bcf_int32_missing || flt->tmpi[i]==bcf_int32_vector_end )
-                bcf_float_set_missing(tok->values[i]);
+                bcf_double_set_missing(tok->values[i]);
             else
             {
                 tok->values[i] = flt->tmpi[i];
@@ -490,20 +537,38 @@ static void filters_set_format_int(filter_t *flt, bcf1_t *line, token_t *tok)
 }
 static void filters_set_format_float(filter_t *flt, bcf1_t *line, token_t *tok)
 {
-    if ( (tok->nvalues=bcf_get_format_float(flt->hdr,line,tok->tag,&tok->values,&tok->mvalues))<=0 )
+    int i;
+    if ( (tok->nvalues=bcf_get_format_float(flt->hdr,line,tok->tag,&flt->tmpf,&flt->mtmpf))<=0 )
+    {
         tok->nvalues = tok->nsamples = 0;   // missing values
-    else if ( tok->idx >= 0 )
+    }
+    else
     {
-        int i, nsmpl, nvals;
-        nsmpl = bcf_hdr_nsamples(flt->hdr);
-        nvals = tok->nvalues / nsmpl;
-        if ( tok->idx >= nvals )
-            tok->nsamples = tok->nvalues = 0;  // the index is too big
-        else
+        int is_missing = 1;
+        hts_expand(double,tok->nvalues,tok->mvalues,tok->values);
+        for (i=0; i<tok->nvalues; i++)
         {
-            for (i=0; i<nsmpl; i++)
-                tok->values[i] = tok->values[i*nvals+tok->idx];
-            tok->nsamples = tok->nvalues = nsmpl;
+            if ( bcf_float_is_missing(flt->tmpf[i]) || bcf_float_is_vector_end(flt->tmpf[i]) )
+                bcf_double_set_missing(tok->values[i]);
+            else
+            {
+                tok->values[i] = flt->tmpf[i];
+                is_missing = 0;
+            }
+        }
+        if ( is_missing ) tok->nvalues = 0;
+        else if ( tok->idx >= 0 )
+        {
+            int nsmpl = bcf_hdr_nsamples(flt->hdr);
+            int nvals = tok->nvalues / nsmpl;
+            if ( tok->idx >= nvals )
+                tok->nvalues = 0;  // the index is too big
+            else
+            {
+                for (i=0; i<nsmpl; i++)
+                    tok->values[i] = tok->values[i*nvals+tok->idx];
+                tok->nvalues = nsmpl;
+            }
         }
     }
     tok->nsamples = tok->nvalues;
@@ -567,7 +632,7 @@ static void filters_set_genotype_string(filter_t *flt, bcf1_t *line, token_t *to
         tok->nvalues = tok->nsamples = 0;
         return;
     }
-    int i, blen = 3, nsmpl = bcf_hdr_nsamples(flt->hdr);
+    int i, blen = 4, nsmpl = bcf_hdr_nsamples(flt->hdr);
     kstring_t str;
 
 gt_length_too_big:
@@ -576,29 +641,15 @@ gt_length_too_big:
     {
         int plen = str.l;
 
-        #define BRANCH(type_t) { \
-            type_t *ptr = (type_t*) (fmt->p + i*fmt->size); \
-            if ( !(ptr[0]>>1) ) kputc('.',&str); \
-        }
-        switch (fmt->type) {
-            case BCF_BT_INT8:  BRANCH(int8_t); break;
-            case BCF_BT_INT16: BRANCH(int16_t); break;
-            case BCF_BT_INT32: BRANCH(int32_t); break;
-            default: fprintf(stderr,"FIXME: type %d in bcf_format_gt?\n", fmt->type); abort(); break;
-        }
-        #undef BRANCH
-
-        if ( plen==str.l )
+        bcf_format_gt(fmt, i, &str);
+        kputc_(0,&str);
+        if ( str.l - plen > blen )
         {
-            bcf_format_gt(fmt, i, &str);
-            if ( str.l - plen > blen )
-            {
-                // too many alternate alleles or ploidy is too large, the genotype does not fit
-                // three characters ("0/0" vs "10/10").
-                tok->str_value = str.s;
-                blen *= 2;
-                goto gt_length_too_big;
-            }
+            // too many alternate alleles or ploidy is too large, the genotype does not fit
+            // three characters ("0/0" vs "10/10").
+            tok->str_value = str.s;
+            blen *= 2;
+            goto gt_length_too_big;
         }
 
         plen = str.l - plen;
@@ -680,7 +731,7 @@ static void filters_set_ac(filter_t *flt, bcf1_t *line, token_t *tok)
     }
     else
     {
-        hts_expand(float,line->n_allele,tok->mvalues,tok->values);
+        hts_expand(double,line->n_allele,tok->mvalues,tok->values);
         for (i=1; i<line->n_allele; i++)
             tok->values[i-1] = flt->tmpi[i];
         tok->nvalues = line->n_allele - 1;
@@ -706,7 +757,7 @@ static void filters_set_af(filter_t *flt, bcf1_t *line, token_t *tok)
     if ( !tok->nvalues ) return;
     int i, an = flt->tmpi[0];
     for (i=0; i<tok->nvalues; i++)
-        tok->values[i] /= (float)an;
+        tok->values[i] /= (double)an;
 }
 static void filters_set_maf(filter_t *flt, bcf1_t *line, token_t *tok)
 {
@@ -715,18 +766,18 @@ static void filters_set_maf(filter_t *flt, bcf1_t *line, token_t *tok)
     int i, an = flt->tmpi[0];
     for (i=0; i<tok->nvalues; i++)
     {
-        tok->values[i] /= (float)an;
+        tok->values[i] /= (double)an;
         if ( tok->values[i] > 0.5 ) tok->values[i] = 1 - tok->values[i];
     }
 }
 
 static void set_max(filter_t *flt, bcf1_t *line, token_t *tok)
 {
-    float val = -HUGE_VAL;
+    double val = -HUGE_VAL;
     int i;
     for (i=0; i<tok->nvalues; i++)
     {
-        if ( !bcf_float_is_missing(tok->values[i]) && val < tok->values[i] ) val = tok->values[i];
+        if ( !bcf_double_is_missing(tok->values[i]) && val < tok->values[i] ) val = tok->values[i];
     }
     tok->values[0] = val;
     tok->nvalues   = 1;
@@ -734,30 +785,30 @@ static void set_max(filter_t *flt, bcf1_t *line, token_t *tok)
 }
 static void set_min(filter_t *flt, bcf1_t *line, token_t *tok)
 {
-    float val = HUGE_VAL;
+    double val = HUGE_VAL;
     int i;
     for (i=0; i<tok->nvalues; i++)
-        if ( !bcf_float_is_missing(tok->values[i]) && val > tok->values[i] ) val = tok->values[i];
+        if ( !bcf_double_is_missing(tok->values[i]) && val > tok->values[i] ) val = tok->values[i];
     tok->values[0] = val;
     tok->nvalues   = 1;
     tok->nsamples  = 0;
 }
 static void set_avg(filter_t *flt, bcf1_t *line, token_t *tok)
 {
-    float val = 0;
+    double val = 0;
     int i, n = 0;
     for (i=0; i<tok->nvalues; i++)
-        if ( !bcf_float_is_missing(tok->values[i]) ) { val += tok->values[i]; n++; }
+        if ( !bcf_double_is_missing(tok->values[i]) ) { val += tok->values[i]; n++; }
     tok->values[0] = n ? val / n : 0;
     tok->nvalues   = 1;
     tok->nsamples  = 0;
 }
 static void set_sum(filter_t *flt, bcf1_t *line, token_t *tok)
 {
-    float val = 0;
+    double val = 0;
     int i, n = 0;
     for (i=0; i<tok->nvalues; i++)
-        if ( !bcf_float_is_missing(tok->values[i]) ) { val += tok->values[i]; n++; }
+        if ( !bcf_double_is_missing(tok->values[i]) ) { val += tok->values[i]; n++; }
     tok->values[0] = val;
     tok->nvalues   = 1;
     tok->nsamples  = 0;
@@ -812,20 +863,20 @@ static void set_strlen(filter_t *flt, bcf1_t *line, token_t *tok)
         { \
             for (i=0; i<(atok)->nvalues; i++) \
             { \
-                if ( bcf_float_is_missing((atok)->values[i]) ) continue; \
-                if ( bcf_float_is_missing((btok)->values[i]) ) { bcf_float_set_missing((atok)->values[i]); continue; } \
+                if ( bcf_double_is_missing((atok)->values[i]) ) continue; \
+                if ( bcf_double_is_missing((btok)->values[i]) ) { bcf_double_set_missing((atok)->values[i]); continue; } \
                 has_values = 1; \
                 (atok)->values[i] = (atok)->values[i] AOP (btok)->values[i]; \
             } \
         } \
         else if ( (btok)->nsamples ) \
         { \
-            hts_expand(float,(btok)->nvalues,(atok)->mvalues,(atok)->values); \
+            hts_expand(double,(btok)->nvalues,(atok)->mvalues,(atok)->values); \
             for (i=0; i<(btok)->nvalues; i++) \
             { \
-                if ( bcf_float_is_missing((atok)->values[0]) || bcf_float_is_missing((btok)->values[i]) ) \
+                if ( bcf_double_is_missing((atok)->values[0]) || bcf_double_is_missing((btok)->values[i]) ) \
                 { \
-                    bcf_float_set_missing((atok)->values[i]); \
+                    bcf_double_set_missing((atok)->values[i]); \
                     continue; \
                 } \
                 has_values = 1; \
@@ -838,9 +889,9 @@ static void set_strlen(filter_t *flt, bcf1_t *line, token_t *tok)
         { \
             for (i=0; i<(atok)->nvalues; i++) \
             { \
-                if ( bcf_float_is_missing((atok)->values[i]) || bcf_float_is_missing((btok)->values[0]) ) \
+                if ( bcf_double_is_missing((atok)->values[i]) || bcf_double_is_missing((btok)->values[0]) ) \
                 { \
-                    bcf_float_set_missing((atok)->values[i]); \
+                    bcf_double_set_missing((atok)->values[i]); \
                     continue; \
                 } \
                 has_values = 1; \
@@ -921,10 +972,14 @@ static int vector_logic_or(token_t *atok, token_t *btok, int or_type)
         for (i=0; i<btok->nsamples; i++)
             atok->pass_samples[i] = btok->pass_samples[i];
         atok->nsamples = btok->nsamples;
+        atok->nvalues  = 1;
         return btok->pass_site;
     }
     if ( !btok->nvalues ) // missing value in b
+    {
+        btok->nvalues = 1;
         return atok->pass_site;
+    }
 
     if ( !atok->nsamples && !btok->nsamples ) return atok->pass_site || btok->pass_site;
     if ( !atok->nsamples )
@@ -978,6 +1033,7 @@ static int vector_logic_or(token_t *atok, token_t *btok, int or_type)
     if ( (atok)->nsamples || (btok)->nsamples ) error("todo: Querying of missing values in FORMAT\n"); \
     token_t *tok = (atok)->is_missing ? (btok) : (atok); \
     (ret) = ( tok->nvalues CMP_OP 1 ) ? 0 : 1; \
+    tok->nvalues = 1; \
 }
 
 #define CMP_VECTORS(atok,btok,CMP_OP,ret) \
@@ -990,8 +1046,6 @@ static int vector_logic_or(token_t *atok, token_t *btok, int or_type)
         { \
             for (i=0; i<(atok)->nsamples; i++) \
             { \
-                if ( bcf_float_is_missing((atok)->values[i]) ) { (atok)->pass_samples[i] = 0; continue; } \
-                if ( bcf_float_is_missing((btok)->values[i]) ) { (atok)->pass_samples[i] = 0; continue; } \
                 has_values = 1; \
                 if ( (atok)->values[i] CMP_OP (btok)->values[i] ) { (atok)->pass_samples[i] = 1; pass_site = 1; } \
                 else (atok)->pass_samples[i] = 0; \
@@ -1000,34 +1054,26 @@ static int vector_logic_or(token_t *atok, token_t *btok, int or_type)
         } \
         else if ( (atok)->nsamples ) \
         { \
-            if ( bcf_float_is_missing((btok)->values[0]) ) { (atok)->nvalues = 0; (atok)->nsamples = 0; (ret) = 0; } \
-            else \
+            for (i=0; i<(atok)->nsamples; i++) \
             { \
-                for (i=0; i<(atok)->nsamples; i++) \
-                { \
-                    if ( bcf_float_is_missing((atok)->values[i]) ) { (atok)->pass_samples[i] = 0; continue; } \
-                    has_values = 1; \
-                    if ( (atok)->values[i] CMP_OP (btok)->values[0] ) { (atok)->pass_samples[i] = 1; pass_site = 1; } \
-                    else (atok)->pass_samples[i] = 0; \
-                } \
+                /*if ( bcf_double_is_missing((atok)->values[i]) ) { (atok)->pass_samples[i] = 0; continue; }*/ \
+                has_values = 1; \
+                if ( (atok)->values[i] CMP_OP (btok)->values[0] ) { (atok)->pass_samples[i] = 1; pass_site = 1; } \
+                else (atok)->pass_samples[i] = 0; \
             } \
             if ( !has_values ) (atok)->nvalues = 0; \
         } \
         else if ( (btok)->nsamples ) \
         { \
-            if ( bcf_float_is_missing((atok)->values[0]) ) { (atok)->nvalues = 0; (atok)->nsamples = 0; (ret) = 0; } \
-            else \
+            for (i=0; i<(btok)->nsamples; i++) \
             { \
-                for (i=0; i<(btok)->nsamples; i++) \
-                { \
-                    if ( bcf_float_is_missing((btok)->values[i]) ) { (atok)->pass_samples[i] = 0; continue; } \
-                    has_values = 1; \
-                    if ( (atok)->values[0] CMP_OP (btok)->values[i] ) { (atok)->pass_samples[i] = 1; pass_site = 1; } \
-                    else (atok)->pass_samples[i] = 0; \
-                } \
-                (atok)->nvalues  = (btok)->nvalues; \
-                (atok)->nsamples = (btok)->nsamples; \
+                if ( bcf_double_is_missing((btok)->values[i]) ) { (atok)->pass_samples[i] = 0; continue; } \
+                has_values = 1; \
+                if ( (atok)->values[0] CMP_OP (btok)->values[i] ) { (atok)->pass_samples[i] = 1; pass_site = 1; } \
+                else (atok)->pass_samples[i] = 0; \
             } \
+            (atok)->nvalues  = (btok)->nvalues; \
+            (atok)->nsamples = (btok)->nsamples; \
             if ( !has_values ) (atok)->nvalues = 0; \
         } \
         else if ( (atok)->idx==-2 || (btok)->idx==-2 ) \
@@ -1124,10 +1170,23 @@ static int cmp_vector_strings(token_t *atok, token_t *btok, int logic)    // log
     }
     return pass_site;
 }
-static int regex_vector_strings(token_t *atok, token_t *btok)
+static int regex_vector_strings(token_t *atok, token_t *btok, int negate)
 {
-    int ret = regexec(btok->regex, atok->str_value, 0,NULL,0);
-    return ret==0 ? 1 : 0;
+    int i, pass_site = 0;
+    if ( atok->nsamples )
+    {
+        for (i=0; i<atok->nsamples; i++)
+        {
+            char *ptr = atok->str_value + i*(int)atok->values[0];
+            atok->pass_samples[i] = regexec(btok->regex, ptr, 0,NULL,0) ? 0 : 1;
+            if ( negate ) atok->pass_samples[i] = atok->pass_samples[i] ? 0 : 1;
+            pass_site |= atok->pass_samples[i];
+        }
+        return pass_site;
+    }
+    pass_site = regexec(btok->regex, atok->str_value, 0,NULL,0) ? 0 : 1;
+    if ( negate ) pass_site = pass_site ? 0 : 1;
+    return pass_site;
 }
 
 static int filters_init1(filter_t *filter, char *str, int len, token_t *tok)
@@ -1143,7 +1202,7 @@ static int filters_init1(filter_t *filter, char *str, int len, token_t *tok)
         int quote = str[0];
         if ( str[len-1] != quote ) error("TODO: [%s]\n", filter->str);
         tok->key = (char*) calloc(len-1,sizeof(char));
-        hts_expand(float,1,tok->mvalues,tok->values);
+        hts_expand(double,1,tok->mvalues,tok->values);
         tok->values[0] = len-2;
         memcpy(tok->key,str+1,len-2);
         tok->key[len-2] = 0;
@@ -1372,11 +1431,18 @@ static int filters_init1(filter_t *filter, char *str, int len, token_t *tok)
         return 0;
     }
 
-    // is it a value?
+    // is it a value? Here we parse as integer/float separately and use strtof
+    // rather than strtod, because the more accurate double representation
+    // would invalidate floating point comparisons like QUAL=59.2, obtained via
+    // htslib/vcf parser
     char *end;
-    errno = 0;
-    tok->threshold = strtod(tmp.s, &end);
-    if ( errno!=0 || end!=tmp.s+len ) error("[%s:%d %s] Error: the tag \"INFO/%s\" is not defined in the VCF header\n", __FILE__,__LINE__,__FUNCTION__,tmp.s);
+    tok->threshold = strtol(tmp.s, &end, 10);   // integer?
+    if ( end - tmp.s != strlen(tmp.s) )
+    {
+        errno = 0;
+        tok->threshold = strtof(tmp.s, &end);   // float?
+        if ( errno!=0 || end!=tmp.s+len ) error("[%s:%d %s] Error: the tag \"INFO/%s\" is not defined in the VCF header\n", __FILE__,__LINE__,__FUNCTION__,tmp.s);
+    }
 
     if ( tmp.s ) free(tmp.s);
     return 0;
@@ -1511,11 +1577,11 @@ filter_t *filter_init(bcf_hdr_t *hdr, const char *str)
             // Look for j="." and k numeric type
             int j = i-1, k = i-2;
             if ( !out[j].is_str ) { k = i-1, j = i-2; }
-            if ( out[k].hdr_id>0 && out[j].is_str && !strcmp(".",out[j].key) )
+            if ( out[k].hdr_id>0 && out[j].is_str && out[j].key && !strcmp(".",out[j].key) )
             {
                 int type = bcf_hdr_id2type(filter->hdr,out[k].type,out[k].hdr_id);
-                if ( type==BCF_HT_INT ) { out[j].is_str = 0; out[j].is_missing = 1; bcf_float_set_missing(out[j].values[0]); }
-                if ( type==BCF_HT_REAL ) { out[j].is_str = 0; out[j].is_missing = 1; bcf_float_set_missing(out[j].values[0]); }
+                if ( type==BCF_HT_INT ) { out[j].is_str = 0; out[j].is_missing = 1; bcf_double_set_missing(out[j].values[0]); }
+                if ( type==BCF_HT_REAL ) { out[j].is_str = 0; out[j].is_missing = 1; bcf_double_set_missing(out[j].values[0]); }
             }
         }
         if ( out[i].tok_type==TOK_LIKE || out[i].tok_type==TOK_NLIKE )
@@ -1524,7 +1590,14 @@ filter_t *filter_init(bcf_hdr_t *hdr, const char *str)
             if ( !out[j].key )
                 error("Could not parse the expression, wrong value for regex operator: %s\n", filter->str);
             out[j].regex = (regex_t *) malloc(sizeof(regex_t));
-            if ( regcomp(out[j].regex, out[j].key, REG_ICASE|REG_NOSUB) )
+            int cflags = REG_NOSUB;
+            int len = strlen(out[j].key);
+            if ( len>2 && out[j].key[len-1]=='i' && out[j].key[len-2]=='/' && out[j].key[len-3]!='\\'  )
+            {
+                out[j].key[len-2] = 0;
+                cflags |= REG_ICASE;
+            }
+            if ( regcomp(out[j].regex, out[j].key, cflags) )
                 error("Could not compile the regex expression \"%s\": %s\n", out[j].key,filter->str);
         }
         if ( out[i].tok_type!=TOK_VAL ) continue;
@@ -1532,41 +1605,47 @@ filter_t *filter_init(bcf_hdr_t *hdr, const char *str)
         if ( !strcmp(out[i].tag,"TYPE") )
         {
             if ( i+1==nout ) error("Could not parse the expression: %s\n", filter->str);
-            int j = i+1;
-            if ( out[j].tok_type==TOK_EQ || out[j].tok_type==TOK_NE ) j = i - 1;
-            if ( out[j].tok_type!=TOK_VAL || !out[j].key ) error("[%s:%d %s] Could not parse the expression: %s\n",  __FILE__,__LINE__,__FUNCTION__, filter->str);
-            if ( !strcasecmp(out[j].key,"snp") || !strcasecmp(out[j].key,"snps") ) { out[j].threshold = VCF_SNP; out[j].is_str = 0; }
-            else if ( !strcasecmp(out[j].key,"indel") || !strcasecmp(out[j].key,"indels") ) { out[j].threshold = VCF_INDEL; out[j].is_str = 0; }
-            else if ( !strcasecmp(out[j].key,"mnp") || !strcasecmp(out[j].key,"mnps") ) { out[j].threshold = VCF_MNP; out[j].is_str = 0; }
-            else if ( !strcasecmp(out[j].key,"other") ) { out[j].threshold = VCF_OTHER; out[j].is_str = 0; }
-            else if ( !strcasecmp(out[j].key,"ref") ) { out[j].threshold = VCF_REF; out[j].is_str = 0; }
-            else error("The type \"%s\" not recognised: %s\n", out[j].key, filter->str);
-            out[j].tag = out[j].key; out[j].key = NULL;
-            i = j;
+            int itok, ival;
+            if ( out[i+1].tok_type==TOK_EQ || out[i+1].tok_type==TOK_NE ) ival = i - 1, itok = i + 1;
+            else if ( out[i+1].tok_type==TOK_LIKE || out[i+1].tok_type==TOK_NLIKE ) ival = i - 1, itok = i + 1;
+            else if ( out[i+2].tok_type==TOK_EQ || out[i+2].tok_type==TOK_NE ) itok = i + 2, ival = i + 1;
+            else if ( out[i+2].tok_type==TOK_LIKE || out[i+2].tok_type==TOK_NLIKE ) itok = i + 2, ival = i + 1;
+            else error("[%s:%d %s] Could not parse the expression: %s\n",  __FILE__,__LINE__,__FUNCTION__, filter->str);
+            if ( !strcasecmp(out[ival].key,"snp") || !strcasecmp(out[ival].key,"snps") ) { out[ival].threshold = VCF_SNP<<1; out[ival].is_str = 0; }
+            else if ( !strcasecmp(out[ival].key,"indel") || !strcasecmp(out[ival].key,"indels") ) { out[ival].threshold = VCF_INDEL<<1; out[ival].is_str = 0; }
+            else if ( !strcasecmp(out[ival].key,"mnp") || !strcasecmp(out[ival].key,"mnps") ) { out[ival].threshold = VCF_MNP<<1; out[ival].is_str = 0; }
+            else if ( !strcasecmp(out[ival].key,"other") ) { out[ival].threshold = VCF_OTHER<<1; out[ival].is_str = 0; }
+            else if ( !strcasecmp(out[ival].key,"bnd") ) { out[ival].threshold = VCF_BND<<1; out[ival].is_str = 0; }
+            else if ( !strcasecmp(out[ival].key,"ref") ) { out[ival].threshold = 1; out[ival].is_str = 0; }
+            else error("The type \"%s\" not recognised: %s\n", out[ival].key, filter->str);
+            if ( out[itok].tok_type==TOK_LIKE || out[itok].tok_type==TOK_NLIKE ) out[itok].comparator = filters_cmp_bit_and;
+            out[ival].tag = out[ival].key; out[ival].key = NULL;
+            i = itok;
             continue;
         }
         if ( !strcmp(out[i].tag,"FILTER") )
         {
             if ( i+1==nout ) error("Could not parse the expression: %s\n", filter->str);
-            int j = i+1;
-            if ( out[j].tok_type==TOK_EQ || out[j].tok_type==TOK_NE ) j = i - 1;    // the expression has "value"=FILTER rather than FILTER="value"
-            if ( out[j].tok_type==TOK_LIKE ) out[j].tok_type = TOK_EQ;              // for FILTER, ~ and !~ work the same way as = and !=
-            if ( out[j].tok_type==TOK_NLIKE ) out[j].tok_type = TOK_NE;
-            if ( out[j+1].tok_type==TOK_LIKE ) out[j+1].tok_type = TOK_EQ;
-            if ( out[j+1].tok_type==TOK_NLIKE ) out[j+1].tok_type = TOK_NE;
-            if ( out[j].tok_type!=TOK_VAL || !out[j].key )
+            int itok = i, ival;
+            if ( out[i+1].tok_type==TOK_EQ || out[i+1].tok_type==TOK_NE ) ival = i - 1;
+            else if ( out[i+1].tok_type==TOK_LIKE ) out[i+1].tok_type = TOK_EQ, ival = i - 1;
+            else if ( out[i+1].tok_type==TOK_NLIKE ) out[i+1].tok_type = TOK_NE, ival = i - 1;
+            else if ( out[i+2].tok_type==TOK_EQ || out[i+2].tok_type==TOK_NE ) ival = ++i;
+            else if ( out[i+2].tok_type==TOK_LIKE ) out[i+2].tok_type = TOK_EQ, ival = ++i;
+            else if ( out[i+2].tok_type==TOK_NLIKE ) out[i+2].tok_type = TOK_NE, ival = ++i;
+            else error("[%s:%d %s] Could not parse the expression: %s\n",  __FILE__,__LINE__,__FUNCTION__, filter->str);
+            if ( out[ival].tok_type!=TOK_VAL || !out[ival].key )
                 error("[%s:%d %s] Could not parse the expression, an unquoted string value perhaps? %s\n", __FILE__,__LINE__,__FUNCTION__, filter->str);
-            if ( strcmp(".",out[j].key) )
+            if ( strcmp(".",out[ival].key) )
             {
-                out[j].hdr_id = bcf_hdr_id2int(filter->hdr, BCF_DT_ID, out[j].key);
-                if ( !bcf_hdr_idinfo_exists(filter->hdr,BCF_HL_FLT,out[j].hdr_id) )
-                    error("The filter \"%s\" not present in the VCF header\n", out[j].key);
+                out[ival].hdr_id = bcf_hdr_id2int(filter->hdr, BCF_DT_ID, out[ival].key);
+                if ( !bcf_hdr_idinfo_exists(filter->hdr,BCF_HL_FLT,out[ival].hdr_id) )
+                    error("The filter \"%s\" not present in the VCF header\n", out[ival].key);
             }
             else
-                out[j].hdr_id = -1;
-            out[j].tag = out[j].key; out[j].key = NULL;
-            out[i].hdr_id = out[j].hdr_id;
-            i = j;
+                out[ival].hdr_id = -1;
+            out[ival].tag = out[ival].key; out[ival].key = NULL;
+            out[itok].hdr_id = out[ival].hdr_id;
             continue;
         }
     }
@@ -1579,7 +1658,7 @@ filter_t *filter_init(bcf_hdr_t *hdr, const char *str)
         else if ( out[i].tok_type==TOK_SUM ) { out[i].setter = set_sum; out[i].tok_type = TOK_FUNC; }
         else if ( out[i].tok_type==TOK_ABS ) { out[i].setter = set_abs; out[i].tok_type = TOK_FUNC; }
         else if ( out[i].tok_type==TOK_LEN ) { out[i].setter = set_strlen; out[i].tok_type = TOK_FUNC; }
-        hts_expand0(float,1,out[i].mvalues,out[i].values);
+        hts_expand0(double,1,out[i].mvalues,out[i].values);
         if ( filter->nsamples )
         {
             out[i].pass_samples = (uint8_t*)malloc(filter->nsamples);
@@ -1618,6 +1697,7 @@ void filter_destroy(filter_t *filter)
     free(filter->flt_stack);
     free(filter->str);
     free(filter->tmpi);
+    free(filter->tmpf);
     free(filter);
 }
 
@@ -1704,7 +1784,9 @@ int filter_test(filter_t *filter, bcf1_t *line, const uint8_t **samples)
         }
 
         int is_true = 0;
-        if ( !filter->flt_stack[nstack-1]->nvalues || !filter->flt_stack[nstack-2]->nvalues )
+        if ( filter->filters[i].comparator )
+            is_true = filter->filters[i].comparator(filter->flt_stack[nstack-1],filter->flt_stack[nstack-2],filter->filters[i].tok_type,line);
+        else if ( !filter->flt_stack[nstack-1]->nvalues || !filter->flt_stack[nstack-2]->nvalues )
         {
             int skip = 0;
             if ( !filter->flt_stack[nstack-2]->is_missing && !filter->flt_stack[nstack-1]->is_missing ) skip = 1;
@@ -1746,10 +1828,7 @@ int filter_test(filter_t *filter, bcf1_t *line, const uint8_t **samples)
         else if ( filter->filters[i].tok_type == TOK_LIKE || filter->filters[i].tok_type == TOK_NLIKE )
         {
             if ( is_str==2 )
-            {
-                is_true = regex_vector_strings(filter->flt_stack[nstack-2],filter->flt_stack[nstack-1]);
-                if ( filter->filters[i].tok_type == TOK_NLIKE ) is_true = is_true ? 0 : 1;
-            }
+                is_true = regex_vector_strings(filter->flt_stack[nstack-2],filter->flt_stack[nstack-1], filter->filters[i].tok_type == TOK_LIKE ? 0 : 1);
             else
                 error("The regex operator can be used on strings only: %s\n", filter->str);
         }
diff --git a/bcftools/filter.c.pysam.c b/bcftools/filter.c.pysam.c
index 531339e..44046f2 100644
--- a/bcftools/filter.c.pysam.c
+++ b/bcftools/filter.c.pysam.c
@@ -26,6 +26,7 @@ THE SOFTWARE.  */
 
 #include <ctype.h>
 #include <stdlib.h>
+#include <strings.h>
 #include <errno.h>
 #include <math.h>
 #include <wordexp.h>
@@ -36,13 +37,37 @@ THE SOFTWARE.  */
 #include <htslib/hts_defs.h>
 #include <htslib/vcfutils.h>
 
+#ifndef __FUNCTION__
+#  define __FUNCTION__ __func__
+#endif
+
+uint64_t bcf_double_missing    = 0x7ff0000000000001;
+uint64_t bcf_double_vector_end = 0x7ff0000000000002;
+static inline void bcf_double_set(double *ptr, uint64_t value)
+{
+    union { uint64_t i; double d; } u;
+    u.i = value;
+    *ptr = u.d;
+}
+static inline int bcf_double_test(double d, uint64_t value)
+{
+    union { uint64_t i; double d; } u;
+    u.d = d;
+    return u.i==value ? 1 : 0;
+}
+#define bcf_double_set_vector_end(x) bcf_double_set(&(x),bcf_double_vector_end)
+#define bcf_double_set_missing(x)    bcf_double_set(&(x),bcf_double_missing)
+#define bcf_double_is_vector_end(x)  bcf_double_test((x),bcf_double_vector_end)
+#define bcf_double_is_missing(x)     bcf_double_test((x),bcf_double_missing)
+
+
 typedef struct _token_t
 {
     // read-only values, same for all VCF lines
     int tok_type;       // one of the TOK_* keys below
     char *key;          // set only for string constants, otherwise NULL
     char *tag;          // for debugging and printout only, VCF tag name
-    float threshold;    // filtering threshold
+    double threshold;   // filtering threshold
     int hdr_id, type;   // BCF header lookup ID and one of BCF_HT_* types
     int idx;            // 0-based index to VCF vectors, -1: not a vector, -2: any field ([*])
     void (*setter)(filter_t *, bcf1_t *, struct _token_t *);
@@ -51,7 +76,7 @@ typedef struct _token_t
     regex_t *regex;     // precompiled regex for string comparison
 
     // modified on filter evaluation at each VCF line
-    float *values;      // In case str_value is set, values[0] is one sample's string length
+    double *values;     // In case str_value is set, values[0] is one sample's string length
     char *str_value;    //  and values[0]*nsamples gives the total length;
     int is_str, is_missing; // is_missing is set only for constants, variables are controled via nvalues
     int pass_site;          // -1 not applicable, 0 fails, >0 pass
@@ -69,7 +94,8 @@ struct _filter_t
     int nfilters;
     token_t *filters, **flt_stack;  // filtering input tokens (in RPN) and evaluation stack
     int32_t *tmpi;
-    int max_unpack, mtmpi, nsamples;
+    float   *tmpf;
+    int max_unpack, mtmpi, mtmpf, nsamples;
 };
 
 
@@ -223,13 +249,15 @@ static void filters_set_qual(filter_t *flt, bcf1_t *line, token_t *tok)
         tok->nvalues = 0;
     else
     {
-        tok->values[0] = line->qual;
+        tok->values[0] = (double)line->qual;
         tok->nvalues = 1;
     }
 }
 static void filters_set_type(filter_t *flt, bcf1_t *line, token_t *tok)
 {
     tok->values[0] = bcf_get_variant_types(line);
+    if ( !tok->values[0] ) tok->values[0] = 1;      // mistake in htslib: VCF_* should start with 1
+    else tok->values[0] = ((int)tok->values[0]) << 1;
     tok->nvalues = 1;
 }
 static void filters_set_info(filter_t *flt, bcf1_t *line, token_t *tok)
@@ -274,6 +302,13 @@ static void filters_set_info(filter_t *flt, bcf1_t *line, token_t *tok)
         tok->str_value = NULL;
     }
 }
+static int filters_cmp_bit_and(token_t *atok, token_t *btok, int op_type, bcf1_t *line)
+{
+    int a = (int)(atok->nvalues?atok->values[0]:atok->threshold);
+    int b = (int)(btok->nvalues?btok->values[0]:btok->threshold);
+    if ( op_type==TOK_LIKE ) return a&b ? 1 : 0;
+    return a&b ? 0 : 1;
+}
 static int filters_cmp_filter(token_t *atok, token_t *btok, int op_type, bcf1_t *line)
 {
     int i;
@@ -318,7 +353,7 @@ static int filters_cmp_id(token_t *atok, token_t *btok, int op_type, bcf1_t *lin
 }
 
 /**
- *  bcf_get_info_value() - get single INFO value, int or float
+ *  bcf_get_info_value() - get single INFO value, int64_t or double
  *  @line:      BCF line
  *  @info_id:   tag ID, as returned by bcf_hdr_id2int
  *  @ivec:      0-based index to retrieve, -1 when single value is expected
@@ -338,8 +373,8 @@ static int bcf_get_info_value(bcf1_t *line, int info_id, int ivec, void *value)
     bcf_info_t *info = &line->d.info[j];
     if ( info->len == 1 )
     {
-        if ( info->type==BCF_BT_FLOAT ) *((float*)value) = info->v1.f;
-        else if ( info->type==BCF_BT_INT8 || info->type==BCF_BT_INT16 || info->type==BCF_BT_INT32 ) *((int*)value) = info->v1.i;
+        if ( info->type==BCF_BT_FLOAT ) *((double*)value) = info->v1.f;
+        else if ( info->type==BCF_BT_INT8 || info->type==BCF_BT_INT16 || info->type==BCF_BT_INT32 ) *((int64_t*)value) = info->v1.i;
         return 1;
     }
 
@@ -356,10 +391,10 @@ static int bcf_get_info_value(bcf1_t *line, int info_id, int ivec, void *value)
         return 1; \
     }
     switch (info->type) {
-        case BCF_BT_INT8:  BRANCH(int8_t,  p[j]==bcf_int8_missing,  p[j]==bcf_int8_vector_end,  int); break;
-        case BCF_BT_INT16: BRANCH(int16_t, p[j]==bcf_int16_missing, p[j]==bcf_int16_vector_end, int); break;
-        case BCF_BT_INT32: BRANCH(int32_t, p[j]==bcf_int32_missing, p[j]==bcf_int32_vector_end, int); break;
-        case BCF_BT_FLOAT: BRANCH(float,   bcf_float_is_missing(p[j]), bcf_float_is_vector_end(p[j]), float); break;
+        case BCF_BT_INT8:  BRANCH(int8_t,  p[j]==bcf_int8_missing,  p[j]==bcf_int8_vector_end,  int64_t); break;
+        case BCF_BT_INT16: BRANCH(int16_t, p[j]==bcf_int16_missing, p[j]==bcf_int16_vector_end, int64_t); break;
+        case BCF_BT_INT32: BRANCH(int32_t, p[j]==bcf_int32_missing, p[j]==bcf_int32_vector_end, int64_t); break;
+        case BCF_BT_FLOAT: BRANCH(float,   bcf_float_is_missing(p[j]), bcf_float_is_vector_end(p[j]), double); break;
         default: fprintf(pysam_stderr,"todo: type %d\n", info->type); exit(1); break;
     }
     #undef BRANCH
@@ -376,14 +411,18 @@ static void filters_set_info_int(filter_t *flt, bcf1_t *line, token_t *tok)
 {
     if ( tok->idx==-2 )
     {
-        int i, n = bcf_get_info_int32(flt->hdr,line,tok->tag,&flt->tmpi,&flt->mtmpi);
-        tok->nvalues = n;
-        hts_expand(float,n,tok->mvalues,tok->values);
-        for (i=0; i<n; i++) tok->values[i] = flt->tmpi[i];
+        int i;
+        tok->nvalues = bcf_get_info_int32(flt->hdr,line,tok->tag,&flt->tmpi,&flt->mtmpi);
+        if ( tok->nvalues<=0 ) tok->nvalues = 0;
+        else
+        {
+            hts_expand(double,tok->nvalues,tok->mvalues,tok->values);
+            for (i=0; i<tok->nvalues; i++) tok->values[i] = flt->tmpi[i];
+        }
     }
     else
     {
-        int32_t value;
+        int64_t value;
         if ( bcf_get_info_value(line,tok->hdr_id,tok->idx,&value) <= 0 )
             tok->nvalues = 0;
         else
@@ -398,12 +437,20 @@ static void filters_set_info_float(filter_t *flt, bcf1_t *line, token_t *tok)
 {
     if ( tok->idx==-2 )
     {
-        tok->nvalues = bcf_get_info_float(flt->hdr,line,tok->tag,&tok->values,&tok->mvalues);
-        if ( tok->nvalues<0 ) tok->nvalues = 0;
+        int i;
+        tok->nvalues = bcf_get_info_float(flt->hdr,line,tok->tag,&flt->tmpf,&flt->mtmpf);
+        if ( tok->nvalues<=0 ) tok->nvalues = 0;
+        else
+        {
+            hts_expand(double,tok->nvalues,tok->mvalues,tok->values);
+            for (i=0; i<tok->nvalues; i++)
+                if ( bcf_float_is_missing(flt->tmpf[i]) ) bcf_double_set_missing(tok->values[i]);
+                else tok->values[i] = flt->tmpf[i];
+        }
     }
     else
     {
-        float value;
+        double value;
         if ( bcf_get_info_value(line,tok->hdr_id,tok->idx,&value) <= 0 )
             tok->nvalues = 0;
         else
@@ -462,11 +509,11 @@ static void filters_set_format_int(filter_t *flt, bcf1_t *line, token_t *tok)
     else
     {
         int is_missing = 1;
-        hts_expand(float,tok->nvalues,tok->mvalues,tok->values);
+        hts_expand(double,tok->nvalues,tok->mvalues,tok->values);
         for (i=0; i<tok->nvalues; i++)
         {
             if ( flt->tmpi[i]==bcf_int32_missing || flt->tmpi[i]==bcf_int32_vector_end )
-                bcf_float_set_missing(tok->values[i]);
+                bcf_double_set_missing(tok->values[i]);
             else
             {
                 tok->values[i] = flt->tmpi[i];
@@ -492,20 +539,38 @@ static void filters_set_format_int(filter_t *flt, bcf1_t *line, token_t *tok)
 }
 static void filters_set_format_float(filter_t *flt, bcf1_t *line, token_t *tok)
 {
-    if ( (tok->nvalues=bcf_get_format_float(flt->hdr,line,tok->tag,&tok->values,&tok->mvalues))<=0 )
+    int i;
+    if ( (tok->nvalues=bcf_get_format_float(flt->hdr,line,tok->tag,&flt->tmpf,&flt->mtmpf))<=0 )
+    {
         tok->nvalues = tok->nsamples = 0;   // missing values
-    else if ( tok->idx >= 0 )
+    }
+    else
     {
-        int i, nsmpl, nvals;
-        nsmpl = bcf_hdr_nsamples(flt->hdr);
-        nvals = tok->nvalues / nsmpl;
-        if ( tok->idx >= nvals )
-            tok->nsamples = tok->nvalues = 0;  // the index is too big
-        else
+        int is_missing = 1;
+        hts_expand(double,tok->nvalues,tok->mvalues,tok->values);
+        for (i=0; i<tok->nvalues; i++)
         {
-            for (i=0; i<nsmpl; i++)
-                tok->values[i] = tok->values[i*nvals+tok->idx];
-            tok->nsamples = tok->nvalues = nsmpl;
+            if ( bcf_float_is_missing(flt->tmpf[i]) || bcf_float_is_vector_end(flt->tmpf[i]) )
+                bcf_double_set_missing(tok->values[i]);
+            else
+            {
+                tok->values[i] = flt->tmpf[i];
+                is_missing = 0;
+            }
+        }
+        if ( is_missing ) tok->nvalues = 0;
+        else if ( tok->idx >= 0 )
+        {
+            int nsmpl = bcf_hdr_nsamples(flt->hdr);
+            int nvals = tok->nvalues / nsmpl;
+            if ( tok->idx >= nvals )
+                tok->nvalues = 0;  // the index is too big
+            else
+            {
+                for (i=0; i<nsmpl; i++)
+                    tok->values[i] = tok->values[i*nvals+tok->idx];
+                tok->nvalues = nsmpl;
+            }
         }
     }
     tok->nsamples = tok->nvalues;
@@ -569,7 +634,7 @@ static void filters_set_genotype_string(filter_t *flt, bcf1_t *line, token_t *to
         tok->nvalues = tok->nsamples = 0;
         return;
     }
-    int i, blen = 3, nsmpl = bcf_hdr_nsamples(flt->hdr);
+    int i, blen = 4, nsmpl = bcf_hdr_nsamples(flt->hdr);
     kstring_t str;
 
 gt_length_too_big:
@@ -578,29 +643,15 @@ gt_length_too_big:
     {
         int plen = str.l;
 
-        #define BRANCH(type_t) { \
-            type_t *ptr = (type_t*) (fmt->p + i*fmt->size); \
-            if ( !(ptr[0]>>1) ) kputc('.',&str); \
-        }
-        switch (fmt->type) {
-            case BCF_BT_INT8:  BRANCH(int8_t); break;
-            case BCF_BT_INT16: BRANCH(int16_t); break;
-            case BCF_BT_INT32: BRANCH(int32_t); break;
-            default: fprintf(pysam_stderr,"FIXME: type %d in bcf_format_gt?\n", fmt->type); abort(); break;
-        }
-        #undef BRANCH
-
-        if ( plen==str.l )
+        bcf_format_gt(fmt, i, &str);
+        kputc_(0,&str);
+        if ( str.l - plen > blen )
         {
-            bcf_format_gt(fmt, i, &str);
-            if ( str.l - plen > blen )
-            {
-                // too many alternate alleles or ploidy is too large, the genotype does not fit
-                // three characters ("0/0" vs "10/10").
-                tok->str_value = str.s;
-                blen *= 2;
-                goto gt_length_too_big;
-            }
+            // too many alternate alleles or ploidy is too large, the genotype does not fit
+            // three characters ("0/0" vs "10/10").
+            tok->str_value = str.s;
+            blen *= 2;
+            goto gt_length_too_big;
         }
 
         plen = str.l - plen;
@@ -682,7 +733,7 @@ static void filters_set_ac(filter_t *flt, bcf1_t *line, token_t *tok)
     }
     else
     {
-        hts_expand(float,line->n_allele,tok->mvalues,tok->values);
+        hts_expand(double,line->n_allele,tok->mvalues,tok->values);
         for (i=1; i<line->n_allele; i++)
             tok->values[i-1] = flt->tmpi[i];
         tok->nvalues = line->n_allele - 1;
@@ -708,7 +759,7 @@ static void filters_set_af(filter_t *flt, bcf1_t *line, token_t *tok)
     if ( !tok->nvalues ) return;
     int i, an = flt->tmpi[0];
     for (i=0; i<tok->nvalues; i++)
-        tok->values[i] /= (float)an;
+        tok->values[i] /= (double)an;
 }
 static void filters_set_maf(filter_t *flt, bcf1_t *line, token_t *tok)
 {
@@ -717,18 +768,18 @@ static void filters_set_maf(filter_t *flt, bcf1_t *line, token_t *tok)
     int i, an = flt->tmpi[0];
     for (i=0; i<tok->nvalues; i++)
     {
-        tok->values[i] /= (float)an;
+        tok->values[i] /= (double)an;
         if ( tok->values[i] > 0.5 ) tok->values[i] = 1 - tok->values[i];
     }
 }
 
 static void set_max(filter_t *flt, bcf1_t *line, token_t *tok)
 {
-    float val = -HUGE_VAL;
+    double val = -HUGE_VAL;
     int i;
     for (i=0; i<tok->nvalues; i++)
     {
-        if ( !bcf_float_is_missing(tok->values[i]) && val < tok->values[i] ) val = tok->values[i];
+        if ( !bcf_double_is_missing(tok->values[i]) && val < tok->values[i] ) val = tok->values[i];
     }
     tok->values[0] = val;
     tok->nvalues   = 1;
@@ -736,30 +787,30 @@ static void set_max(filter_t *flt, bcf1_t *line, token_t *tok)
 }
 static void set_min(filter_t *flt, bcf1_t *line, token_t *tok)
 {
-    float val = HUGE_VAL;
+    double val = HUGE_VAL;
     int i;
     for (i=0; i<tok->nvalues; i++)
-        if ( !bcf_float_is_missing(tok->values[i]) && val > tok->values[i] ) val = tok->values[i];
+        if ( !bcf_double_is_missing(tok->values[i]) && val > tok->values[i] ) val = tok->values[i];
     tok->values[0] = val;
     tok->nvalues   = 1;
     tok->nsamples  = 0;
 }
 static void set_avg(filter_t *flt, bcf1_t *line, token_t *tok)
 {
-    float val = 0;
+    double val = 0;
     int i, n = 0;
     for (i=0; i<tok->nvalues; i++)
-        if ( !bcf_float_is_missing(tok->values[i]) ) { val += tok->values[i]; n++; }
+        if ( !bcf_double_is_missing(tok->values[i]) ) { val += tok->values[i]; n++; }
     tok->values[0] = n ? val / n : 0;
     tok->nvalues   = 1;
     tok->nsamples  = 0;
 }
 static void set_sum(filter_t *flt, bcf1_t *line, token_t *tok)
 {
-    float val = 0;
+    double val = 0;
     int i, n = 0;
     for (i=0; i<tok->nvalues; i++)
-        if ( !bcf_float_is_missing(tok->values[i]) ) { val += tok->values[i]; n++; }
+        if ( !bcf_double_is_missing(tok->values[i]) ) { val += tok->values[i]; n++; }
     tok->values[0] = val;
     tok->nvalues   = 1;
     tok->nsamples  = 0;
@@ -814,20 +865,20 @@ static void set_strlen(filter_t *flt, bcf1_t *line, token_t *tok)
         { \
             for (i=0; i<(atok)->nvalues; i++) \
             { \
-                if ( bcf_float_is_missing((atok)->values[i]) ) continue; \
-                if ( bcf_float_is_missing((btok)->values[i]) ) { bcf_float_set_missing((atok)->values[i]); continue; } \
+                if ( bcf_double_is_missing((atok)->values[i]) ) continue; \
+                if ( bcf_double_is_missing((btok)->values[i]) ) { bcf_double_set_missing((atok)->values[i]); continue; } \
                 has_values = 1; \
                 (atok)->values[i] = (atok)->values[i] AOP (btok)->values[i]; \
             } \
         } \
         else if ( (btok)->nsamples ) \
         { \
-            hts_expand(float,(btok)->nvalues,(atok)->mvalues,(atok)->values); \
+            hts_expand(double,(btok)->nvalues,(atok)->mvalues,(atok)->values); \
             for (i=0; i<(btok)->nvalues; i++) \
             { \
-                if ( bcf_float_is_missing((atok)->values[0]) || bcf_float_is_missing((btok)->values[i]) ) \
+                if ( bcf_double_is_missing((atok)->values[0]) || bcf_double_is_missing((btok)->values[i]) ) \
                 { \
-                    bcf_float_set_missing((atok)->values[i]); \
+                    bcf_double_set_missing((atok)->values[i]); \
                     continue; \
                 } \
                 has_values = 1; \
@@ -840,9 +891,9 @@ static void set_strlen(filter_t *flt, bcf1_t *line, token_t *tok)
         { \
             for (i=0; i<(atok)->nvalues; i++) \
             { \
-                if ( bcf_float_is_missing((atok)->values[i]) || bcf_float_is_missing((btok)->values[0]) ) \
+                if ( bcf_double_is_missing((atok)->values[i]) || bcf_double_is_missing((btok)->values[0]) ) \
                 { \
-                    bcf_float_set_missing((atok)->values[i]); \
+                    bcf_double_set_missing((atok)->values[i]); \
                     continue; \
                 } \
                 has_values = 1; \
@@ -923,10 +974,14 @@ static int vector_logic_or(token_t *atok, token_t *btok, int or_type)
         for (i=0; i<btok->nsamples; i++)
             atok->pass_samples[i] = btok->pass_samples[i];
         atok->nsamples = btok->nsamples;
+        atok->nvalues  = 1;
         return btok->pass_site;
     }
     if ( !btok->nvalues ) // missing value in b
+    {
+        btok->nvalues = 1;
         return atok->pass_site;
+    }
 
     if ( !atok->nsamples && !btok->nsamples ) return atok->pass_site || btok->pass_site;
     if ( !atok->nsamples )
@@ -980,6 +1035,7 @@ static int vector_logic_or(token_t *atok, token_t *btok, int or_type)
     if ( (atok)->nsamples || (btok)->nsamples ) error("todo: Querying of missing values in FORMAT\n"); \
     token_t *tok = (atok)->is_missing ? (btok) : (atok); \
     (ret) = ( tok->nvalues CMP_OP 1 ) ? 0 : 1; \
+    tok->nvalues = 1; \
 }
 
 #define CMP_VECTORS(atok,btok,CMP_OP,ret) \
@@ -992,8 +1048,6 @@ static int vector_logic_or(token_t *atok, token_t *btok, int or_type)
         { \
             for (i=0; i<(atok)->nsamples; i++) \
             { \
-                if ( bcf_float_is_missing((atok)->values[i]) ) { (atok)->pass_samples[i] = 0; continue; } \
-                if ( bcf_float_is_missing((btok)->values[i]) ) { (atok)->pass_samples[i] = 0; continue; } \
                 has_values = 1; \
                 if ( (atok)->values[i] CMP_OP (btok)->values[i] ) { (atok)->pass_samples[i] = 1; pass_site = 1; } \
                 else (atok)->pass_samples[i] = 0; \
@@ -1002,34 +1056,26 @@ static int vector_logic_or(token_t *atok, token_t *btok, int or_type)
         } \
         else if ( (atok)->nsamples ) \
         { \
-            if ( bcf_float_is_missing((btok)->values[0]) ) { (atok)->nvalues = 0; (atok)->nsamples = 0; (ret) = 0; } \
-            else \
+            for (i=0; i<(atok)->nsamples; i++) \
             { \
-                for (i=0; i<(atok)->nsamples; i++) \
-                { \
-                    if ( bcf_float_is_missing((atok)->values[i]) ) { (atok)->pass_samples[i] = 0; continue; } \
-                    has_values = 1; \
-                    if ( (atok)->values[i] CMP_OP (btok)->values[0] ) { (atok)->pass_samples[i] = 1; pass_site = 1; } \
-                    else (atok)->pass_samples[i] = 0; \
-                } \
+                /*if ( bcf_double_is_missing((atok)->values[i]) ) { (atok)->pass_samples[i] = 0; continue; }*/ \
+                has_values = 1; \
+                if ( (atok)->values[i] CMP_OP (btok)->values[0] ) { (atok)->pass_samples[i] = 1; pass_site = 1; } \
+                else (atok)->pass_samples[i] = 0; \
             } \
             if ( !has_values ) (atok)->nvalues = 0; \
         } \
         else if ( (btok)->nsamples ) \
         { \
-            if ( bcf_float_is_missing((atok)->values[0]) ) { (atok)->nvalues = 0; (atok)->nsamples = 0; (ret) = 0; } \
-            else \
+            for (i=0; i<(btok)->nsamples; i++) \
             { \
-                for (i=0; i<(btok)->nsamples; i++) \
-                { \
-                    if ( bcf_float_is_missing((btok)->values[i]) ) { (atok)->pass_samples[i] = 0; continue; } \
-                    has_values = 1; \
-                    if ( (atok)->values[0] CMP_OP (btok)->values[i] ) { (atok)->pass_samples[i] = 1; pass_site = 1; } \
-                    else (atok)->pass_samples[i] = 0; \
-                } \
-                (atok)->nvalues  = (btok)->nvalues; \
-                (atok)->nsamples = (btok)->nsamples; \
+                if ( bcf_double_is_missing((btok)->values[i]) ) { (atok)->pass_samples[i] = 0; continue; } \
+                has_values = 1; \
+                if ( (atok)->values[0] CMP_OP (btok)->values[i] ) { (atok)->pass_samples[i] = 1; pass_site = 1; } \
+                else (atok)->pass_samples[i] = 0; \
             } \
+            (atok)->nvalues  = (btok)->nvalues; \
+            (atok)->nsamples = (btok)->nsamples; \
             if ( !has_values ) (atok)->nvalues = 0; \
         } \
         else if ( (atok)->idx==-2 || (btok)->idx==-2 ) \
@@ -1126,10 +1172,23 @@ static int cmp_vector_strings(token_t *atok, token_t *btok, int logic)    // log
     }
     return pass_site;
 }
-static int regex_vector_strings(token_t *atok, token_t *btok)
+static int regex_vector_strings(token_t *atok, token_t *btok, int negate)
 {
-    int ret = regexec(btok->regex, atok->str_value, 0,NULL,0);
-    return ret==0 ? 1 : 0;
+    int i, pass_site = 0;
+    if ( atok->nsamples )
+    {
+        for (i=0; i<atok->nsamples; i++)
+        {
+            char *ptr = atok->str_value + i*(int)atok->values[0];
+            atok->pass_samples[i] = regexec(btok->regex, ptr, 0,NULL,0) ? 0 : 1;
+            if ( negate ) atok->pass_samples[i] = atok->pass_samples[i] ? 0 : 1;
+            pass_site |= atok->pass_samples[i];
+        }
+        return pass_site;
+    }
+    pass_site = regexec(btok->regex, atok->str_value, 0,NULL,0) ? 0 : 1;
+    if ( negate ) pass_site = pass_site ? 0 : 1;
+    return pass_site;
 }
 
 static int filters_init1(filter_t *filter, char *str, int len, token_t *tok)
@@ -1145,7 +1204,7 @@ static int filters_init1(filter_t *filter, char *str, int len, token_t *tok)
         int quote = str[0];
         if ( str[len-1] != quote ) error("TODO: [%s]\n", filter->str);
         tok->key = (char*) calloc(len-1,sizeof(char));
-        hts_expand(float,1,tok->mvalues,tok->values);
+        hts_expand(double,1,tok->mvalues,tok->values);
         tok->values[0] = len-2;
         memcpy(tok->key,str+1,len-2);
         tok->key[len-2] = 0;
@@ -1374,11 +1433,18 @@ static int filters_init1(filter_t *filter, char *str, int len, token_t *tok)
         return 0;
     }
 
-    // is it a value?
+    // is it a value? Here we parse as integer/float separately and use strtof
+    // rather than strtod, because the more accurate double representation
+    // would invalidate floating point comparisons like QUAL=59.2, obtained via
+    // htslib/vcf parser
     char *end;
-    errno = 0;
-    tok->threshold = strtod(tmp.s, &end);
-    if ( errno!=0 || end!=tmp.s+len ) error("[%s:%d %s] Error: the tag \"INFO/%s\" is not defined in the VCF header\n", __FILE__,__LINE__,__FUNCTION__,tmp.s);
+    tok->threshold = strtol(tmp.s, &end, 10);   // integer?
+    if ( end - tmp.s != strlen(tmp.s) )
+    {
+        errno = 0;
+        tok->threshold = strtof(tmp.s, &end);   // float?
+        if ( errno!=0 || end!=tmp.s+len ) error("[%s:%d %s] Error: the tag \"INFO/%s\" is not defined in the VCF header\n", __FILE__,__LINE__,__FUNCTION__,tmp.s);
+    }
 
     if ( tmp.s ) free(tmp.s);
     return 0;
@@ -1513,11 +1579,11 @@ filter_t *filter_init(bcf_hdr_t *hdr, const char *str)
             // Look for j="." and k numeric type
             int j = i-1, k = i-2;
             if ( !out[j].is_str ) { k = i-1, j = i-2; }
-            if ( out[k].hdr_id>0 && out[j].is_str && !strcmp(".",out[j].key) )
+            if ( out[k].hdr_id>0 && out[j].is_str && out[j].key && !strcmp(".",out[j].key) )
             {
                 int type = bcf_hdr_id2type(filter->hdr,out[k].type,out[k].hdr_id);
-                if ( type==BCF_HT_INT ) { out[j].is_str = 0; out[j].is_missing = 1; bcf_float_set_missing(out[j].values[0]); }
-                if ( type==BCF_HT_REAL ) { out[j].is_str = 0; out[j].is_missing = 1; bcf_float_set_missing(out[j].values[0]); }
+                if ( type==BCF_HT_INT ) { out[j].is_str = 0; out[j].is_missing = 1; bcf_double_set_missing(out[j].values[0]); }
+                if ( type==BCF_HT_REAL ) { out[j].is_str = 0; out[j].is_missing = 1; bcf_double_set_missing(out[j].values[0]); }
             }
         }
         if ( out[i].tok_type==TOK_LIKE || out[i].tok_type==TOK_NLIKE )
@@ -1526,7 +1592,14 @@ filter_t *filter_init(bcf_hdr_t *hdr, const char *str)
             if ( !out[j].key )
                 error("Could not parse the expression, wrong value for regex operator: %s\n", filter->str);
             out[j].regex = (regex_t *) malloc(sizeof(regex_t));
-            if ( regcomp(out[j].regex, out[j].key, REG_ICASE|REG_NOSUB) )
+            int cflags = REG_NOSUB;
+            int len = strlen(out[j].key);
+            if ( len>2 && out[j].key[len-1]=='i' && out[j].key[len-2]=='/' && out[j].key[len-3]!='\\'  )
+            {
+                out[j].key[len-2] = 0;
+                cflags |= REG_ICASE;
+            }
+            if ( regcomp(out[j].regex, out[j].key, cflags) )
                 error("Could not compile the regex expression \"%s\": %s\n", out[j].key,filter->str);
         }
         if ( out[i].tok_type!=TOK_VAL ) continue;
@@ -1534,41 +1607,47 @@ filter_t *filter_init(bcf_hdr_t *hdr, const char *str)
         if ( !strcmp(out[i].tag,"TYPE") )
         {
             if ( i+1==nout ) error("Could not parse the expression: %s\n", filter->str);
-            int j = i+1;
-            if ( out[j].tok_type==TOK_EQ || out[j].tok_type==TOK_NE ) j = i - 1;
-            if ( out[j].tok_type!=TOK_VAL || !out[j].key ) error("[%s:%d %s] Could not parse the expression: %s\n",  __FILE__,__LINE__,__FUNCTION__, filter->str);
-            if ( !strcasecmp(out[j].key,"snp") || !strcasecmp(out[j].key,"snps") ) { out[j].threshold = VCF_SNP; out[j].is_str = 0; }
-            else if ( !strcasecmp(out[j].key,"indel") || !strcasecmp(out[j].key,"indels") ) { out[j].threshold = VCF_INDEL; out[j].is_str = 0; }
-            else if ( !strcasecmp(out[j].key,"mnp") || !strcasecmp(out[j].key,"mnps") ) { out[j].threshold = VCF_MNP; out[j].is_str = 0; }
-            else if ( !strcasecmp(out[j].key,"other") ) { out[j].threshold = VCF_OTHER; out[j].is_str = 0; }
-            else if ( !strcasecmp(out[j].key,"ref") ) { out[j].threshold = VCF_REF; out[j].is_str = 0; }
-            else error("The type \"%s\" not recognised: %s\n", out[j].key, filter->str);
-            out[j].tag = out[j].key; out[j].key = NULL;
-            i = j;
+            int itok, ival;
+            if ( out[i+1].tok_type==TOK_EQ || out[i+1].tok_type==TOK_NE ) ival = i - 1, itok = i + 1;
+            else if ( out[i+1].tok_type==TOK_LIKE || out[i+1].tok_type==TOK_NLIKE ) ival = i - 1, itok = i + 1;
+            else if ( out[i+2].tok_type==TOK_EQ || out[i+2].tok_type==TOK_NE ) itok = i + 2, ival = i + 1;
+            else if ( out[i+2].tok_type==TOK_LIKE || out[i+2].tok_type==TOK_NLIKE ) itok = i + 2, ival = i + 1;
+            else error("[%s:%d %s] Could not parse the expression: %s\n",  __FILE__,__LINE__,__FUNCTION__, filter->str);
+            if ( !strcasecmp(out[ival].key,"snp") || !strcasecmp(out[ival].key,"snps") ) { out[ival].threshold = VCF_SNP<<1; out[ival].is_str = 0; }
+            else if ( !strcasecmp(out[ival].key,"indel") || !strcasecmp(out[ival].key,"indels") ) { out[ival].threshold = VCF_INDEL<<1; out[ival].is_str = 0; }
+            else if ( !strcasecmp(out[ival].key,"mnp") || !strcasecmp(out[ival].key,"mnps") ) { out[ival].threshold = VCF_MNP<<1; out[ival].is_str = 0; }
+            else if ( !strcasecmp(out[ival].key,"other") ) { out[ival].threshold = VCF_OTHER<<1; out[ival].is_str = 0; }
+            else if ( !strcasecmp(out[ival].key,"bnd") ) { out[ival].threshold = VCF_BND<<1; out[ival].is_str = 0; }
+            else if ( !strcasecmp(out[ival].key,"ref") ) { out[ival].threshold = 1; out[ival].is_str = 0; }
+            else error("The type \"%s\" not recognised: %s\n", out[ival].key, filter->str);
+            if ( out[itok].tok_type==TOK_LIKE || out[itok].tok_type==TOK_NLIKE ) out[itok].comparator = filters_cmp_bit_and;
+            out[ival].tag = out[ival].key; out[ival].key = NULL;
+            i = itok;
             continue;
         }
         if ( !strcmp(out[i].tag,"FILTER") )
         {
             if ( i+1==nout ) error("Could not parse the expression: %s\n", filter->str);
-            int j = i+1;
-            if ( out[j].tok_type==TOK_EQ || out[j].tok_type==TOK_NE ) j = i - 1;    // the expression has "value"=FILTER rather than FILTER="value"
-            if ( out[j].tok_type==TOK_LIKE ) out[j].tok_type = TOK_EQ;              // for FILTER, ~ and !~ work the same way as = and !=
-            if ( out[j].tok_type==TOK_NLIKE ) out[j].tok_type = TOK_NE;
-            if ( out[j+1].tok_type==TOK_LIKE ) out[j+1].tok_type = TOK_EQ;
-            if ( out[j+1].tok_type==TOK_NLIKE ) out[j+1].tok_type = TOK_NE;
-            if ( out[j].tok_type!=TOK_VAL || !out[j].key )
+            int itok = i, ival;
+            if ( out[i+1].tok_type==TOK_EQ || out[i+1].tok_type==TOK_NE ) ival = i - 1;
+            else if ( out[i+1].tok_type==TOK_LIKE ) out[i+1].tok_type = TOK_EQ, ival = i - 1;
+            else if ( out[i+1].tok_type==TOK_NLIKE ) out[i+1].tok_type = TOK_NE, ival = i - 1;
+            else if ( out[i+2].tok_type==TOK_EQ || out[i+2].tok_type==TOK_NE ) ival = ++i;
+            else if ( out[i+2].tok_type==TOK_LIKE ) out[i+2].tok_type = TOK_EQ, ival = ++i;
+            else if ( out[i+2].tok_type==TOK_NLIKE ) out[i+2].tok_type = TOK_NE, ival = ++i;
+            else error("[%s:%d %s] Could not parse the expression: %s\n",  __FILE__,__LINE__,__FUNCTION__, filter->str);
+            if ( out[ival].tok_type!=TOK_VAL || !out[ival].key )
                 error("[%s:%d %s] Could not parse the expression, an unquoted string value perhaps? %s\n", __FILE__,__LINE__,__FUNCTION__, filter->str);
-            if ( strcmp(".",out[j].key) )
+            if ( strcmp(".",out[ival].key) )
             {
-                out[j].hdr_id = bcf_hdr_id2int(filter->hdr, BCF_DT_ID, out[j].key);
-                if ( !bcf_hdr_idinfo_exists(filter->hdr,BCF_HL_FLT,out[j].hdr_id) )
-                    error("The filter \"%s\" not present in the VCF header\n", out[j].key);
+                out[ival].hdr_id = bcf_hdr_id2int(filter->hdr, BCF_DT_ID, out[ival].key);
+                if ( !bcf_hdr_idinfo_exists(filter->hdr,BCF_HL_FLT,out[ival].hdr_id) )
+                    error("The filter \"%s\" not present in the VCF header\n", out[ival].key);
             }
             else
-                out[j].hdr_id = -1;
-            out[j].tag = out[j].key; out[j].key = NULL;
-            out[i].hdr_id = out[j].hdr_id;
-            i = j;
+                out[ival].hdr_id = -1;
+            out[ival].tag = out[ival].key; out[ival].key = NULL;
+            out[itok].hdr_id = out[ival].hdr_id;
             continue;
         }
     }
@@ -1581,7 +1660,7 @@ filter_t *filter_init(bcf_hdr_t *hdr, const char *str)
         else if ( out[i].tok_type==TOK_SUM ) { out[i].setter = set_sum; out[i].tok_type = TOK_FUNC; }
         else if ( out[i].tok_type==TOK_ABS ) { out[i].setter = set_abs; out[i].tok_type = TOK_FUNC; }
         else if ( out[i].tok_type==TOK_LEN ) { out[i].setter = set_strlen; out[i].tok_type = TOK_FUNC; }
-        hts_expand0(float,1,out[i].mvalues,out[i].values);
+        hts_expand0(double,1,out[i].mvalues,out[i].values);
         if ( filter->nsamples )
         {
             out[i].pass_samples = (uint8_t*)malloc(filter->nsamples);
@@ -1620,6 +1699,7 @@ void filter_destroy(filter_t *filter)
     free(filter->flt_stack);
     free(filter->str);
     free(filter->tmpi);
+    free(filter->tmpf);
     free(filter);
 }
 
@@ -1706,7 +1786,9 @@ int filter_test(filter_t *filter, bcf1_t *line, const uint8_t **samples)
         }
 
         int is_true = 0;
-        if ( !filter->flt_stack[nstack-1]->nvalues || !filter->flt_stack[nstack-2]->nvalues )
+        if ( filter->filters[i].comparator )
+            is_true = filter->filters[i].comparator(filter->flt_stack[nstack-1],filter->flt_stack[nstack-2],filter->filters[i].tok_type,line);
+        else if ( !filter->flt_stack[nstack-1]->nvalues || !filter->flt_stack[nstack-2]->nvalues )
         {
             int skip = 0;
             if ( !filter->flt_stack[nstack-2]->is_missing && !filter->flt_stack[nstack-1]->is_missing ) skip = 1;
@@ -1748,10 +1830,7 @@ int filter_test(filter_t *filter, bcf1_t *line, const uint8_t **samples)
         else if ( filter->filters[i].tok_type == TOK_LIKE || filter->filters[i].tok_type == TOK_NLIKE )
         {
             if ( is_str==2 )
-            {
-                is_true = regex_vector_strings(filter->flt_stack[nstack-2],filter->flt_stack[nstack-1]);
-                if ( filter->filters[i].tok_type == TOK_NLIKE ) is_true = is_true ? 0 : 1;
-            }
+                is_true = regex_vector_strings(filter->flt_stack[nstack-2],filter->flt_stack[nstack-1], filter->filters[i].tok_type == TOK_LIKE ? 0 : 1);
             else
                 error("The regex operator can be used on strings only: %s\n", filter->str);
         }
diff --git a/bcftools/hclust.c b/bcftools/hclust.c
new file mode 100644
index 0000000..692fa54
--- /dev/null
+++ b/bcftools/hclust.c
@@ -0,0 +1,400 @@
+/* The MIT License
+
+   Copyright (c) 2016 Genome Research Ltd.
+
+   Author: Petr Danecek <pd3 at sanger.ac.uk>
+   
+   Permission is hereby granted, free of charge, to any person obtaining a copy
+   of this software and associated documentation files (the "Software"), to deal
+   in the Software without restriction, including without limitation the rights
+   to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+   copies of the Software, and to permit persons to whom the Software is
+   furnished to do so, subject to the following conditions:
+   
+   The above copyright notice and this permission notice shall be included in
+   all copies or substantial portions of the Software.
+   
+   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+   IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+   AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+   LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+   THE SOFTWARE.
+
+ */
+
+#include <htslib/hts.h>
+#include <htslib/kstring.h>
+#include <stdlib.h>
+#include "bcftools.h"
+#include "hclust.h"
+
+typedef struct _node_t
+{
+    struct _node_t *akid, *bkid, *next, *prev, *parent;
+    int id, idx;    // id: unique node id; idx: current index to pdist
+    float value;    // max pairwise dist of elements within the node
+}
+node_t;
+
+struct _hclust_t
+{
+    int ndat, nclust;       // ndat: number of elements (pdist matrix size); nclust: current number of clusters
+    float *pdist;           // pairwise cluster distances, diagonal matrix accessed via the PDIST macro
+    node_t *first, *last;   // clusters are maintained in a double-linked list
+    node_t **rmme;          // convenience array to remove all allocated nodes at the end
+    int nrmme;
+    kstring_t str;          // (for debugging) pointer to str.s is returned by create_dot()
+    char **dbg;             // (for debugging) created by create_list() via set_threshold() and returned by explain()
+    int ndbg, mdbg;
+};
+
+node_t *append_node(hclust_t *clust, int idx)
+{
+    node_t *node = (node_t*) calloc(1,sizeof(node_t));
+
+    clust->nclust++;
+    node->id  = clust->nrmme;
+    node->idx = idx;
+    if ( !clust->first )
+    {
+        clust->first = node; 
+        clust->last  = node; 
+    }
+    else
+    {
+        node->prev = clust->last;
+        clust->last->next = node; 
+        clust->last = node; 
+    }
+    
+    if ( clust->nrmme >= clust->ndat*2 ) error("hclust fixme: %d vs %d\n",clust->nrmme,clust->ndat);
+    clust->rmme[clust->nrmme++] = node;
+
+    return node;
+}
+void remove_node(hclust_t *clust, node_t *node)
+{
+    if ( node==clust->first ) clust->first = node->next;
+    if ( node==clust->last ) clust->last = node->prev;
+    if ( node->next ) node->next->prev = node->prev;
+    if ( node->prev ) node->prev->next = node->next;
+    clust->nclust--;
+}
+
+#if DEBUG
+void hclust_debug(hclust_t *clust)
+{
+    int i;
+    fprintf(stderr,"nrmme=%d  nclust=%d\n", clust->nrmme,clust->nclust);
+    for (i=0; i<clust->nrmme; i++)
+    {
+        node_t *node = clust->rmme[i];
+        int akid  = node->akid ? node->akid->id : -1;
+        int bkid  = node->bkid ? node->bkid->id : -1;
+        int akidx = node->akid ? node->akid->idx : -1;
+        int bkidx = node->bkid ? node->bkid->idx : -1;
+        fprintf(stderr,"\t%d\t%d\t%f\t%d %d\t%d %d\n",node->id,node->idx,node->value,akid,bkid,akidx,bkidx);
+    }
+
+    int j;
+    for (i=1; i<clust->ndat; i++)
+    {
+        int active = 0;
+        node_t *node = clust->first;
+        while (node)
+        {
+            if ( node->idx==i ) { active=1; break; }
+            node = node->next;
+        }
+        fprintf(stderr,"%2d%c ",i,active?'*':' ');
+        for (j=0; j<i; j++)
+        {
+            if ( PDIST(clust->pdist,i,j)==9 )
+                fprintf(stderr,"  -----  ");
+            else
+                fprintf(stderr," %f", PDIST(clust->pdist,i,j));
+        }
+        fprintf(stderr,"\n");
+    }
+    for (j=0; j<clust->ndat-1; j++) fprintf(stderr,"  %6d ",j); fprintf(stderr,"\n");
+}
+#endif
+
+hclust_t *hclust_init(int n, float *pdist)
+{
+    hclust_t *clust = (hclust_t*) calloc(1,sizeof(hclust_t));
+    clust->ndat  = n;
+    clust->pdist = pdist;
+    clust->rmme = (node_t**) calloc(n*2,sizeof(node_t*));
+
+    // init clusters
+    int i;
+    for (i=0; i<clust->ndat; i++) append_node(clust,i);
+
+    // build the tree
+    while ( clust->nclust>1 )
+    {
+        // find two clusters with minimum distance
+        float min_value = HUGE_VAL;
+        node_t *iclust = clust->first->next;
+        node_t *min_iclust = NULL, *min_jclust = NULL;
+        while ( iclust )
+        {
+            node_t *jclust = clust->first;
+            while ( jclust!=iclust )
+            {
+                float value = PDIST(clust->pdist,iclust->idx,jclust->idx);
+                if ( value < min_value ) 
+                { 
+                    min_value  = value;
+                    min_iclust = iclust;
+                    min_jclust = jclust; 
+                }
+                jclust = jclust->next;
+            }
+            iclust = iclust->next;
+        }
+        assert( min_iclust && min_jclust ); // pdist contains inf or nan, fix the caller
+        remove_node(clust,min_iclust);
+        remove_node(clust,min_jclust);
+
+        // update the pairwise distances. We keep the matrix and as we are moving up the
+        // tree, we use fewer columns/rows as the number of clusters decreases: we reuse
+        // i-th and leave j-th unused. Inter-cluster distance is defined as maximum distance
+        // between pairwise distances of elements within the cluster.
+        iclust = clust->first;
+        while ( iclust )
+        {
+            if ( PDIST(clust->pdist,iclust->idx,min_iclust->idx) < PDIST(clust->pdist,iclust->idx,min_jclust->idx) )
+                PDIST(clust->pdist,iclust->idx,min_iclust->idx) = PDIST(clust->pdist,iclust->idx,min_jclust->idx);
+            iclust = iclust->next;
+        }
+
+        node_t *node = append_node(clust,min_iclust->idx);
+        node->akid  = min_iclust;
+        node->bkid  = min_jclust;
+        node->value = min_value;
+        node->akid->parent = node;
+        node->bkid->parent = node;
+    }
+
+    return clust;
+}
+void hclust_destroy(hclust_t *clust)
+{
+    int i;
+    for (i=0; i<clust->nrmme; i++) free(clust->rmme[i]);
+    free(clust->rmme);
+    free(clust->dbg);
+    free(clust->str.s);
+    free(clust);
+}
+
+char *hclust_create_dot(hclust_t *clust, char **labels, float th)
+{
+    clust->str.l = 0;
+    ksprintf(&clust->str,"digraph myGraph {");
+
+    int i;
+    for (i=0; i<clust->nrmme; i++)
+    {
+        node_t *node = clust->rmme[i];
+        if ( node->value )
+            ksprintf(&clust->str,"\"%d\" [label=\"%f\"];", node->id,node->value);
+        else
+            ksprintf(&clust->str,"\"%d\" [label=\"%s\"];", node->id,labels[node->idx]);
+    }
+    for (i=0; i<clust->nrmme; i++)
+    {
+        node_t *node = clust->rmme[i];
+        if ( node->akid )
+        {
+            if ( node->value >= th && node->akid && node->akid->value < th )
+                ksprintf(&clust->str,"\"%d\" -> \"%d\" [color=\"#D43F3A\" penwidth=3];", node->id,node->akid->id);
+            else
+                ksprintf(&clust->str,"\"%d\" -> \"%d\";", node->id,node->akid->id);
+        }
+
+        if ( node->bkid )
+        {
+            if ( node->value >= th && node->bkid && node->bkid->value < th )
+                ksprintf(&clust->str,"\"%d\" -> \"%d\" [color=\"#D43F3A\" penwidth=3];", node->id,node->bkid->id);
+            else
+                ksprintf(&clust->str,"\"%d\" -> \"%d\";", node->id,node->bkid->id);
+        }
+    }
+    ksprintf(&clust->str,"};");
+    return clust->str.s;
+}
+char **hclust_explain(hclust_t *clust, int *nlines)
+{
+    clust->ndbg = 0;
+    char *beg = clust->str.s;
+    while ( *beg )
+    {
+        char *end = beg;
+        while ( *end && *end!='\n' ) end++;
+        clust->ndbg++;
+        hts_expand(char*,clust->ndbg,clust->mdbg,clust->dbg);
+        clust->dbg[clust->ndbg-1] = beg;
+        if ( !*end ) break;
+        *end = 0;
+        beg = end + 1;
+    }
+
+    *nlines = clust->ndbg;
+    return clust->dbg;
+}
+
+cluster_t *append_cluster(node_t *node, cluster_t *cluster, int *nclust, node_t **stack)
+{
+    (*nclust)++;
+    cluster = (cluster_t*) realloc(cluster,sizeof(cluster_t)*(*nclust));
+    cluster_t *clust = &cluster[*nclust-1];
+    clust->nmemb = 0;
+    clust->memb  = NULL;
+    clust->dist  = node->value;
+
+    int nstack = 1;
+    stack[0] = node;
+
+    while ( nstack )
+    {
+        node_t *node = stack[--nstack];
+        node_t *akid = node->akid;
+        node_t *bkid = node->bkid;
+        if ( node->akid )
+        {
+            stack[nstack++] = akid;
+            stack[nstack++] = bkid;
+        }
+        else    
+        {
+            clust->nmemb++;
+            clust->memb = (int*) realloc(clust->memb,sizeof(int)*clust->nmemb);
+            clust->memb[clust->nmemb-1] = node->id;
+        }
+    }
+    return cluster;
+}
+
+int cmp_nodes(const void *a, const void *b)
+{
+    const node_t *an = *((const node_t**) a);
+    const node_t *bn = *((const node_t**) b);
+    if ( an->value < bn->value ) return -1;
+    if ( an->value > bn->value ) return 1;
+    return 0;
+}
+
+float calc_dev(node_t **dat, int n)
+{
+    float avg = 0, dev = 0;
+    int i;
+    for (i=0; i<n; i++) avg += dat[i]->value;
+    avg /= n;
+    for (i=0; i<n; i++) dev += (dat[i]->value - avg)*(dat[i]->value - avg);
+    return sqrt(dev/n);
+}
+
+/*
+    Heuristics to determine clustering cutoff: sort nodes by distance and
+    split into two groups by minimizing the standard deviation.
+    This works best when two elements from a single different sample are
+    included in the mix.
+        - min_inter_dist .. smaller values are always considered identical
+        - max_intra_dist .. larger values are always considered different
+ */
+float hclust_set_threshold(hclust_t *clust, float min_inter_dist, float max_intra_dist)
+{
+    node_t **dat = clust->rmme + clust->ndat;
+    int i, ndat = clust->nrmme - clust->ndat;
+ 
+    qsort(dat, ndat, sizeof(dat), cmp_nodes);
+
+    clust->str.l = 0;
+    float th, min_dev = HUGE_VAL;
+    int imin = -1;
+    for (i=0; i<ndat; i++)
+    {
+        float dev = 0;
+        if ( i>0 ) dev += calc_dev(dat,i);
+        if ( i+1<ndat ) dev += calc_dev(dat+i,ndat-i);
+        th  = dat[i]->value;
+        ksprintf(&clust->str,"DEV\t%f\t%f\n",th,dev);
+        if ( min_dev > dev && th >= min_inter_dist ) { min_dev = dev; imin = i; }
+    }
+    if ( max_intra_dist > 0 )
+        th = max_intra_dist;  // use fixed cutoff, the above was only for debugging output
+    else
+    {
+        // dynamic cutoff
+        max_intra_dist = fabs(max_intra_dist);
+        th = imin==-1 ? max_intra_dist : dat[imin]->value;
+        if ( th > max_intra_dist ) th = max_intra_dist;
+    }
+    ksprintf(&clust->str,"TH\t%f\n", th);
+    ksprintf(&clust->str,"MAX_DIST\t%f\n", dat[ndat-1]->value);
+    ksprintf(&clust->str,"MIN_INTER\t%f\n", min_inter_dist);
+    ksprintf(&clust->str,"MAX_INTRA\t%f\n", max_intra_dist);
+    return th;
+} 
+
+cluster_t *hclust_create_list(hclust_t *clust, float min_inter_dist, float *max_intra_dist, int *nclust)
+{
+    float cutoff = *max_intra_dist = hclust_set_threshold(clust, min_inter_dist, *max_intra_dist);
+
+    node_t **stack = (node_t**) malloc(sizeof(node_t*)*clust->ndat);
+    node_t **tmp = (node_t**) malloc(sizeof(node_t*)*clust->ndat);
+    stack[0] = clust->first;
+    int nstack = 1;
+    
+    cluster_t *cluster = NULL;
+    int ncluster = 0;
+
+    if ( stack[0]->value < cutoff )
+    {
+        // all values are within the limits - create a single cluster
+        cluster = append_cluster(stack[0], cluster, &ncluster, tmp);
+        nstack = 0;
+    }
+
+    while ( nstack )
+    {
+        node_t *node = stack[--nstack];
+        node_t *akid = node->akid;
+        node_t *bkid = node->bkid;
+        if ( !akid )
+        {
+            cluster = append_cluster(node, cluster, &ncluster, tmp);
+            continue;
+        }
+
+        if ( node->value >= cutoff && akid->value < cutoff )
+            cluster = append_cluster(akid, cluster, &ncluster, tmp);
+        else    
+            stack[nstack++] = akid;
+
+        if ( node->value >= cutoff && bkid->value < cutoff )
+            cluster = append_cluster(bkid, cluster, &ncluster, tmp);
+        else    
+            stack[nstack++] = bkid;
+    }
+
+    free(tmp);
+    free(stack);
+
+    *nclust = ncluster;
+    return cluster;
+}
+
+void hclust_destroy_list(cluster_t *clust, int nclust)
+{
+    int i;
+    for (i=0; i<nclust; i++) free(clust[i].memb);
+    free(clust);
+}
+
+
diff --git a/bcftools/hclust.c.pysam.c b/bcftools/hclust.c.pysam.c
new file mode 100644
index 0000000..d43ddcf
--- /dev/null
+++ b/bcftools/hclust.c.pysam.c
@@ -0,0 +1,402 @@
+#include "pysam.h"
+
+/* The MIT License
+
+   Copyright (c) 2016 Genome Research Ltd.
+
+   Author: Petr Danecek <pd3 at sanger.ac.uk>
+   
+   Permission is hereby granted, free of charge, to any person obtaining a copy
+   of this software and associated documentation files (the "Software"), to deal
+   in the Software without restriction, including without limitation the rights
+   to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+   copies of the Software, and to permit persons to whom the Software is
+   furnished to do so, subject to the following conditions:
+   
+   The above copyright notice and this permission notice shall be included in
+   all copies or substantial portions of the Software.
+   
+   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+   IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+   AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+   LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+   THE SOFTWARE.
+
+ */
+
+#include <htslib/hts.h>
+#include <htslib/kstring.h>
+#include <stdlib.h>
+#include "bcftools.h"
+#include "hclust.h"
+
+typedef struct _node_t
+{
+    struct _node_t *akid, *bkid, *next, *prev, *parent;
+    int id, idx;    // id: unique node id; idx: current index to pdist
+    float value;    // max pairwise dist of elements within the node
+}
+node_t;
+
+struct _hclust_t
+{
+    int ndat, nclust;       // ndat: number of elements (pdist matrix size); nclust: current number of clusters
+    float *pdist;           // pairwise cluster distances, diagonal matrix accessed via the PDIST macro
+    node_t *first, *last;   // clusters are maintained in a double-linked list
+    node_t **rmme;          // convenience array to remove all allocated nodes at the end
+    int nrmme;
+    kstring_t str;          // (for debugging) pointer to str.s is returned by create_dot()
+    char **dbg;             // (for debugging) created by create_list() via set_threshold() and returned by explain()
+    int ndbg, mdbg;
+};
+
+node_t *append_node(hclust_t *clust, int idx)
+{
+    node_t *node = (node_t*) calloc(1,sizeof(node_t));
+
+    clust->nclust++;
+    node->id  = clust->nrmme;
+    node->idx = idx;
+    if ( !clust->first )
+    {
+        clust->first = node; 
+        clust->last  = node; 
+    }
+    else
+    {
+        node->prev = clust->last;
+        clust->last->next = node; 
+        clust->last = node; 
+    }
+    
+    if ( clust->nrmme >= clust->ndat*2 ) error("hclust fixme: %d vs %d\n",clust->nrmme,clust->ndat);
+    clust->rmme[clust->nrmme++] = node;
+
+    return node;
+}
+void remove_node(hclust_t *clust, node_t *node)
+{
+    if ( node==clust->first ) clust->first = node->next;
+    if ( node==clust->last ) clust->last = node->prev;
+    if ( node->next ) node->next->prev = node->prev;
+    if ( node->prev ) node->prev->next = node->next;
+    clust->nclust--;
+}
+
+#if DEBUG
+void hclust_debug(hclust_t *clust)
+{
+    int i;
+    fprintf(pysam_stderr,"nrmme=%d  nclust=%d\n", clust->nrmme,clust->nclust);
+    for (i=0; i<clust->nrmme; i++)
+    {
+        node_t *node = clust->rmme[i];
+        int akid  = node->akid ? node->akid->id : -1;
+        int bkid  = node->bkid ? node->bkid->id : -1;
+        int akidx = node->akid ? node->akid->idx : -1;
+        int bkidx = node->bkid ? node->bkid->idx : -1;
+        fprintf(pysam_stderr,"\t%d\t%d\t%f\t%d %d\t%d %d\n",node->id,node->idx,node->value,akid,bkid,akidx,bkidx);
+    }
+
+    int j;
+    for (i=1; i<clust->ndat; i++)
+    {
+        int active = 0;
+        node_t *node = clust->first;
+        while (node)
+        {
+            if ( node->idx==i ) { active=1; break; }
+            node = node->next;
+        }
+        fprintf(pysam_stderr,"%2d%c ",i,active?'*':' ');
+        for (j=0; j<i; j++)
+        {
+            if ( PDIST(clust->pdist,i,j)==9 )
+                fprintf(pysam_stderr,"  -----  ");
+            else
+                fprintf(pysam_stderr," %f", PDIST(clust->pdist,i,j));
+        }
+        fprintf(pysam_stderr,"\n");
+    }
+    for (j=0; j<clust->ndat-1; j++) fprintf(pysam_stderr,"  %6d ",j); fprintf(pysam_stderr,"\n");
+}
+#endif
+
+hclust_t *hclust_init(int n, float *pdist)
+{
+    hclust_t *clust = (hclust_t*) calloc(1,sizeof(hclust_t));
+    clust->ndat  = n;
+    clust->pdist = pdist;
+    clust->rmme = (node_t**) calloc(n*2,sizeof(node_t*));
+
+    // init clusters
+    int i;
+    for (i=0; i<clust->ndat; i++) append_node(clust,i);
+
+    // build the tree
+    while ( clust->nclust>1 )
+    {
+        // find two clusters with minimum distance
+        float min_value = HUGE_VAL;
+        node_t *iclust = clust->first->next;
+        node_t *min_iclust = NULL, *min_jclust = NULL;
+        while ( iclust )
+        {
+            node_t *jclust = clust->first;
+            while ( jclust!=iclust )
+            {
+                float value = PDIST(clust->pdist,iclust->idx,jclust->idx);
+                if ( value < min_value ) 
+                { 
+                    min_value  = value;
+                    min_iclust = iclust;
+                    min_jclust = jclust; 
+                }
+                jclust = jclust->next;
+            }
+            iclust = iclust->next;
+        }
+        assert( min_iclust && min_jclust ); // pdist contains inf or nan, fix the caller
+        remove_node(clust,min_iclust);
+        remove_node(clust,min_jclust);
+
+        // update the pairwise distances. We keep the matrix and as we are moving up the
+        // tree, we use fewer columns/rows as the number of clusters decreases: we reuse
+        // i-th and leave j-th unused. Inter-cluster distance is defined as maximum distance
+        // between pairwise distances of elements within the cluster.
+        iclust = clust->first;
+        while ( iclust )
+        {
+            if ( PDIST(clust->pdist,iclust->idx,min_iclust->idx) < PDIST(clust->pdist,iclust->idx,min_jclust->idx) )
+                PDIST(clust->pdist,iclust->idx,min_iclust->idx) = PDIST(clust->pdist,iclust->idx,min_jclust->idx);
+            iclust = iclust->next;
+        }
+
+        node_t *node = append_node(clust,min_iclust->idx);
+        node->akid  = min_iclust;
+        node->bkid  = min_jclust;
+        node->value = min_value;
+        node->akid->parent = node;
+        node->bkid->parent = node;
+    }
+
+    return clust;
+}
+void hclust_destroy(hclust_t *clust)
+{
+    int i;
+    for (i=0; i<clust->nrmme; i++) free(clust->rmme[i]);
+    free(clust->rmme);
+    free(clust->dbg);
+    free(clust->str.s);
+    free(clust);
+}
+
+char *hclust_create_dot(hclust_t *clust, char **labels, float th)
+{
+    clust->str.l = 0;
+    ksprintf(&clust->str,"digraph myGraph {");
+
+    int i;
+    for (i=0; i<clust->nrmme; i++)
+    {
+        node_t *node = clust->rmme[i];
+        if ( node->value )
+            ksprintf(&clust->str,"\"%d\" [label=\"%f\"];", node->id,node->value);
+        else
+            ksprintf(&clust->str,"\"%d\" [label=\"%s\"];", node->id,labels[node->idx]);
+    }
+    for (i=0; i<clust->nrmme; i++)
+    {
+        node_t *node = clust->rmme[i];
+        if ( node->akid )
+        {
+            if ( node->value >= th && node->akid && node->akid->value < th )
+                ksprintf(&clust->str,"\"%d\" -> \"%d\" [color=\"#D43F3A\" penwidth=3];", node->id,node->akid->id);
+            else
+                ksprintf(&clust->str,"\"%d\" -> \"%d\";", node->id,node->akid->id);
+        }
+
+        if ( node->bkid )
+        {
+            if ( node->value >= th && node->bkid && node->bkid->value < th )
+                ksprintf(&clust->str,"\"%d\" -> \"%d\" [color=\"#D43F3A\" penwidth=3];", node->id,node->bkid->id);
+            else
+                ksprintf(&clust->str,"\"%d\" -> \"%d\";", node->id,node->bkid->id);
+        }
+    }
+    ksprintf(&clust->str,"};");
+    return clust->str.s;
+}
+char **hclust_explain(hclust_t *clust, int *nlines)
+{
+    clust->ndbg = 0;
+    char *beg = clust->str.s;
+    while ( *beg )
+    {
+        char *end = beg;
+        while ( *end && *end!='\n' ) end++;
+        clust->ndbg++;
+        hts_expand(char*,clust->ndbg,clust->mdbg,clust->dbg);
+        clust->dbg[clust->ndbg-1] = beg;
+        if ( !*end ) break;
+        *end = 0;
+        beg = end + 1;
+    }
+
+    *nlines = clust->ndbg;
+    return clust->dbg;
+}
+
+cluster_t *append_cluster(node_t *node, cluster_t *cluster, int *nclust, node_t **stack)
+{
+    (*nclust)++;
+    cluster = (cluster_t*) realloc(cluster,sizeof(cluster_t)*(*nclust));
+    cluster_t *clust = &cluster[*nclust-1];
+    clust->nmemb = 0;
+    clust->memb  = NULL;
+    clust->dist  = node->value;
+
+    int nstack = 1;
+    stack[0] = node;
+
+    while ( nstack )
+    {
+        node_t *node = stack[--nstack];
+        node_t *akid = node->akid;
+        node_t *bkid = node->bkid;
+        if ( node->akid )
+        {
+            stack[nstack++] = akid;
+            stack[nstack++] = bkid;
+        }
+        else    
+        {
+            clust->nmemb++;
+            clust->memb = (int*) realloc(clust->memb,sizeof(int)*clust->nmemb);
+            clust->memb[clust->nmemb-1] = node->id;
+        }
+    }
+    return cluster;
+}
+
+int cmp_nodes(const void *a, const void *b)
+{
+    const node_t *an = *((const node_t**) a);
+    const node_t *bn = *((const node_t**) b);
+    if ( an->value < bn->value ) return -1;
+    if ( an->value > bn->value ) return 1;
+    return 0;
+}
+
+float calc_dev(node_t **dat, int n)
+{
+    float avg = 0, dev = 0;
+    int i;
+    for (i=0; i<n; i++) avg += dat[i]->value;
+    avg /= n;
+    for (i=0; i<n; i++) dev += (dat[i]->value - avg)*(dat[i]->value - avg);
+    return sqrt(dev/n);
+}
+
+/*
+    Heuristics to determine clustering cutoff: sort nodes by distance and
+    split into two groups by minimizing the standard deviation.
+    This works best when two elements from a single different sample are
+    included in the mix.
+        - min_inter_dist .. smaller values are always considered identical
+        - max_intra_dist .. larger values are always considered different
+ */
+float hclust_set_threshold(hclust_t *clust, float min_inter_dist, float max_intra_dist)
+{
+    node_t **dat = clust->rmme + clust->ndat;
+    int i, ndat = clust->nrmme - clust->ndat;
+ 
+    qsort(dat, ndat, sizeof(dat), cmp_nodes);
+
+    clust->str.l = 0;
+    float th, min_dev = HUGE_VAL;
+    int imin = -1;
+    for (i=0; i<ndat; i++)
+    {
+        float dev = 0;
+        if ( i>0 ) dev += calc_dev(dat,i);
+        if ( i+1<ndat ) dev += calc_dev(dat+i,ndat-i);
+        th  = dat[i]->value;
+        ksprintf(&clust->str,"DEV\t%f\t%f\n",th,dev);
+        if ( min_dev > dev && th >= min_inter_dist ) { min_dev = dev; imin = i; }
+    }
+    if ( max_intra_dist > 0 )
+        th = max_intra_dist;  // use fixed cutoff, the above was only for debugging output
+    else
+    {
+        // dynamic cutoff
+        max_intra_dist = fabs(max_intra_dist);
+        th = imin==-1 ? max_intra_dist : dat[imin]->value;
+        if ( th > max_intra_dist ) th = max_intra_dist;
+    }
+    ksprintf(&clust->str,"TH\t%f\n", th);
+    ksprintf(&clust->str,"MAX_DIST\t%f\n", dat[ndat-1]->value);
+    ksprintf(&clust->str,"MIN_INTER\t%f\n", min_inter_dist);
+    ksprintf(&clust->str,"MAX_INTRA\t%f\n", max_intra_dist);
+    return th;
+} 
+
+cluster_t *hclust_create_list(hclust_t *clust, float min_inter_dist, float *max_intra_dist, int *nclust)
+{
+    float cutoff = *max_intra_dist = hclust_set_threshold(clust, min_inter_dist, *max_intra_dist);
+
+    node_t **stack = (node_t**) malloc(sizeof(node_t*)*clust->ndat);
+    node_t **tmp = (node_t**) malloc(sizeof(node_t*)*clust->ndat);
+    stack[0] = clust->first;
+    int nstack = 1;
+    
+    cluster_t *cluster = NULL;
+    int ncluster = 0;
+
+    if ( stack[0]->value < cutoff )
+    {
+        // all values are within the limits - create a single cluster
+        cluster = append_cluster(stack[0], cluster, &ncluster, tmp);
+        nstack = 0;
+    }
+
+    while ( nstack )
+    {
+        node_t *node = stack[--nstack];
+        node_t *akid = node->akid;
+        node_t *bkid = node->bkid;
+        if ( !akid )
+        {
+            cluster = append_cluster(node, cluster, &ncluster, tmp);
+            continue;
+        }
+
+        if ( node->value >= cutoff && akid->value < cutoff )
+            cluster = append_cluster(akid, cluster, &ncluster, tmp);
+        else    
+            stack[nstack++] = akid;
+
+        if ( node->value >= cutoff && bkid->value < cutoff )
+            cluster = append_cluster(bkid, cluster, &ncluster, tmp);
+        else    
+            stack[nstack++] = bkid;
+    }
+
+    free(tmp);
+    free(stack);
+
+    *nclust = ncluster;
+    return cluster;
+}
+
+void hclust_destroy_list(cluster_t *clust, int nclust)
+{
+    int i;
+    for (i=0; i<nclust; i++) free(clust[i].memb);
+    free(clust);
+}
+
+
diff --git a/bcftools/hclust.h b/bcftools/hclust.h
new file mode 100644
index 0000000..43d333f
--- /dev/null
+++ b/bcftools/hclust.h
@@ -0,0 +1,77 @@
+/* The MIT License
+
+   Copyright (c) 2016 Genome Research Ltd.
+
+   Author: Petr Danecek <pd3 at sanger.ac.uk>
+   
+   Permission is hereby granted, free of charge, to any person obtaining a copy
+   of this software and associated documentation files (the "Software"), to deal
+   in the Software without restriction, including without limitation the rights
+   to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+   copies of the Software, and to permit persons to whom the Software is
+   furnished to do so, subject to the following conditions:
+   
+   The above copyright notice and this permission notice shall be included in
+   all copies or substantial portions of the Software.
+   
+   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+   IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+   AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+   LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+   THE SOFTWARE.
+
+ */
+
+/*
+    Simple hierarchical clustering
+*/
+
+#ifndef __HCLUST_H__
+#define __HCLUST_H__
+
+#include <stdio.h>
+
+typedef struct _hclust_t hclust_t;
+
+typedef struct
+{
+    float dist;
+    int nmemb, *memb;
+}
+cluster_t;
+
+#define PDIST(mat,a,b) (mat)[((a)>(b)?((a)*((a)-1)/2+(b)):((b)*((b)-1)/2+(a)))]
+
+/*
+ *  hclust_init() - init and run clustering
+ *  @n:     number of elements
+ *  @pdist: pairwise distances. The array will be modified by hclust and
+ *          must exist until hclust_destroy() is called
+ */
+hclust_t *hclust_init(int n, float *pdist);
+void hclust_destroy(hclust_t *clust);
+
+/*
+ *  hclust_create_list() - returns a list of clusters
+ *  @min_inter_dist: minimum inter-cluster distance. If smaller, elements are considered
+ *                   homogenous, belonging to the same cluster.
+ *  @max_intra_dist: maximum intra-cluster distance allowed. If smaller than 0,
+ *                   the threshold can be heuristically lowered, otherwise considered
+ *                   a fixed cutoff. The pointer will be filled to the cutoff actually used.
+ */
+cluster_t *hclust_create_list(hclust_t *clust, float min_inter_dist, float *max_intra_dist, int *nclust);
+void hclust_destroy_list(cluster_t *clust, int nclust);
+
+/* 
+ *  Access debugging data used in the decision making process.  Note that this
+ *  must be called immediately after hclust_create_list because other calls,
+ *  such as hclust_create_dot(), invalidate the temporary data structures.
+ */
+char **hclust_explain(hclust_t *clust, int *nlines);
+
+char *hclust_create_dot(hclust_t *clust, char **labels, float th);
+
+#endif
+
diff --git a/bcftools/kheap.h b/bcftools/kheap.h
new file mode 100644
index 0000000..ac2f9f9
--- /dev/null
+++ b/bcftools/kheap.h
@@ -0,0 +1,171 @@
+/* The MIT License
+
+   Copyright (C) 2016 Genome Research Ltd.
+
+   Author: Petr Danecek <pd3 at sanger.ac.uk>
+
+   Permission is hereby granted, free of charge, to any person obtaining a copy
+   of this software and associated documentation files (the "Software"), to deal
+   in the Software without restriction, including without limitation the rights
+   to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+   copies of the Software, and to permit persons to whom the Software is
+   furnished to do so, subject to the following conditions:
+   
+   The above copyright notice and this permission notice shall be included in
+   all copies or substantial portions of the Software.
+   
+   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+   IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+   AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+   LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+   THE SOFTWARE.
+
+ */
+/*
+    Usage example:
+
+        #include "kheap.h"
+
+        // First we prepare the user data to store, in this example it is a
+        // struct with a single element "key", and a comparator function
+        // "is_smaller".  In this example the comparator defines a min heap (as
+        // opposed to a max heap).
+        typedef struct
+        {
+            uint32_t key; 
+        } 
+        data_t;
+        static inline int is_smaller(data_t *a, data_t *b)
+        {
+            return a->key < b->key ? 1 : 0; 
+        }
+        data_t data[3] = { {3}, {2}, {1} };
+
+
+        // Heap declaration, "mh" is an arbitrary string.  The typedef is not
+        // required, it is just a convenience shortcut so that we can use
+        // "heap_t" instead of the generic "khp_mh_t" automatically created by
+        // the KHEAP_INIT macro.
+        KHEAP_INIT(mh, data_t, is_smaller)
+        typedef khp_mh_t heap_t;
+
+        // Initialize the heap, insert the test data, then retrieve them back,
+        // sorted. Multiple heaps with the same name "mh" can be created and
+        // used simultaneously, as long as they all use the same data type
+        // "data_t".
+        heap_t *heap = khp_init(mh);
+
+        for (int i=0; i<3; i++)
+            khp_insert(mh, heap, &data[i]);
+
+        while (heap->ndat)
+        {
+            printf("%d\n", heap->dat[0].pos);
+            khp_delete(mh, heap);
+        }
+
+        // Clean up
+        khp_destroy(mh, heap);
+
+*/
+
+#ifndef __KHEAP_H__
+#define __KHEAP_H__
+
+#include <stdlib.h>
+
+#ifndef kroundup32
+#define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x))
+#endif
+
+#ifndef kh_inline
+#ifdef _MSC_VER
+#define kh_inline __inline
+#else
+#define kh_inline inline
+#endif
+#endif /* kh_inline */
+
+#ifndef klib_unused
+#if (defined __clang__ && __clang_major__ >= 3) || (defined __GNUC__ && __GNUC__ >= 3)
+#define klib_unused __attribute__ ((__unused__))
+#else
+#define klib_unused
+#endif
+#endif /* klib_unused */
+
+
+#define __KHEAP_TYPE(name, kheap_t) \
+    typedef struct {                \
+        int ndat, mdat;             \
+        kheap_t *dat;               \
+        kheap_t tmp;                \
+    } khp_##name##_t;
+
+#define khp_parent(i) (((i)-1)/2)
+#define khp_lchild(i) (2*(i)+1)
+#define khp_rchild(i) (2*(i)+2)
+#define khp_swap(hp,i,j) {               \
+        ((hp)->tmp)    = ((hp)->dat[i]); \
+        ((hp)->dat[i]) = ((hp)->dat[j]); \
+        ((hp)->dat[j]) = ((hp)->tmp);    \
+    }
+
+#define __KHEAP_IMPL(name, SCOPE, kheap_t, __cmp)                       \
+    SCOPE khp_##name##_t *khp_init_##name(void)                         \
+    {                                                                   \
+        return (khp_##name##_t*)calloc(1, sizeof(khp_##name##_t));      \
+    }                                                                   \
+    SCOPE void khp_destroy_##name(khp_##name##_t *heap)                 \
+    {                                                                   \
+        if (heap) free(heap->dat);                                      \
+        free(heap);                                                     \
+    }                                                                   \
+    SCOPE int khp_insert_##name(khp_##name##_t *heap, kheap_t *dat)     \
+    {                                                                   \
+        heap->ndat++;                                                   \
+        if ( heap->ndat > heap->mdat )                                  \
+        {                                                               \
+            heap->mdat = heap->ndat;                                    \
+            kroundup32(heap->mdat);                                     \
+            heap->dat = (kheap_t*)realloc(heap->dat, heap->mdat*sizeof(kheap_t));  \
+        }                                                               \
+        int i = heap->ndat - 1;                                         \
+        while ( i && __cmp(dat,&heap->dat[khp_parent(i)]) )             \
+        {                                                               \
+            heap->dat[i] = heap->dat[khp_parent(i)];                    \
+            i = khp_parent(i);                                          \
+        }                                                               \
+        heap->dat[i] = *dat;                                            \
+        return i;                                                       \
+    }                                                                   \
+    SCOPE void khp_heapify_##name(khp_##name##_t *heap, int i)          \
+    {                                                                   \
+/*todo: loop instead of a recursive function? */ \
+        int extreme = khp_lchild(i) < heap->ndat && __cmp(&heap->dat[khp_lchild(i)],&heap->dat[i]) ? khp_lchild(i) : i;     \
+        if ( khp_rchild(i) < heap->ndat && __cmp(&heap->dat[khp_rchild(i)],&heap->dat[extreme]) ) extreme = khp_rchild(i);  \
+        if ( extreme != i )                                             \
+        {                                                               \
+            khp_swap(heap,i,extreme);                                   \
+            khp_heapify_##name(heap,extreme);                           \
+        }                                                               \
+    }                                                                   \
+    SCOPE void khp_delete_##name(khp_##name##_t *heap)                  \
+    {                                                                   \
+        if ( !heap || !heap->ndat ) return;                             \
+        heap->dat[0] = heap->dat[--heap->ndat];                         \
+        khp_heapify_##name(heap, 0);                                    \
+    }                                                                   \
+
+#define KHEAP_INIT(name, kheap_t, __cmp)            \
+    __KHEAP_TYPE(name, kheap_t)                     \
+    __KHEAP_IMPL(name, static kh_inline klib_unused, kheap_t, __cmp)
+
+#define khp_init(name) khp_init_##name()
+#define khp_destroy(name, heap) khp_destroy_##name(heap)
+#define khp_insert(name, heap, dat) khp_insert_##name(heap, dat)
+#define khp_delete(name, heap) khp_delete_##name(heap)
+
+#endif
diff --git a/bcftools/main.c b/bcftools/main.c
index 1892c1d..9350ff8 100644
--- a/bcftools/main.c
+++ b/bcftools/main.c
@@ -54,6 +54,8 @@ int main_polysomy(int argc, char *argv[]);
 #endif
 int main_plugin(int argc, char *argv[]);
 int main_consensus(int argc, char *argv[]);
+int main_csq(int argc, char *argv[]);
+int bam_mpileup(int argc, char *argv[]);
 
 typedef struct
 {
@@ -140,6 +142,10 @@ static cmd_t cmds[] =
       .alias = "cnv",
       .help  = "HMM CNV calling"
     },
+    { .func  = main_csq,
+      .alias = "csq",
+      .help  = "call variation consequences"
+    },
     { .func  = main_vcffilter,
       .alias = "filter",
       .help  = "filter VCF/BCF files using fixed thresholds"
@@ -148,6 +154,10 @@ static cmd_t cmds[] =
       .alias = "gtcheck",
       .help  = "check sample concordance, detect sample swaps and contamination"
     },
+    { .func  = bam_mpileup,
+        .alias = "mpileup",
+        .help  = "multi-way pileup producing genotype likelihoods"
+    },
 #if USE_GPL
     { .func  = main_polysomy,
       .alias = "polysomy",
diff --git a/bcftools/main.c.pysam.c b/bcftools/main.c.pysam.c
index f578442..a2b4a99 100644
--- a/bcftools/main.c.pysam.c
+++ b/bcftools/main.c.pysam.c
@@ -56,6 +56,8 @@ int main_polysomy(int argc, char *argv[]);
 #endif
 int main_plugin(int argc, char *argv[]);
 int main_consensus(int argc, char *argv[]);
+int main_csq(int argc, char *argv[]);
+int bam_mpileup(int argc, char *argv[]);
 
 typedef struct
 {
@@ -142,6 +144,10 @@ static cmd_t cmds[] =
       .alias = "cnv",
       .help  = "HMM CNV calling"
     },
+    { .func  = main_csq,
+      .alias = "csq",
+      .help  = "call variation consequences"
+    },
     { .func  = main_vcffilter,
       .alias = "filter",
       .help  = "filter VCF/BCF files using fixed thresholds"
@@ -150,6 +156,10 @@ static cmd_t cmds[] =
       .alias = "gtcheck",
       .help  = "check sample concordance, detect sample swaps and contamination"
     },
+    { .func  = bam_mpileup,
+        .alias = "mpileup",
+        .help  = "multi-way pileup producing genotype likelihoods"
+    },
 #if USE_GPL
     { .func  = main_polysomy,
       .alias = "polysomy",
diff --git a/bcftools/mcall.c b/bcftools/mcall.c
index 495f849..7f7515f 100644
--- a/bcftools/mcall.c
+++ b/bcftools/mcall.c
@@ -1,6 +1,6 @@
 /*  mcall.c -- multiallelic and rare variant calling.
 
-    Copyright (C) 2012-2014 Genome Research Ltd.
+    Copyright (C) 2012-2016 Genome Research Ltd.
 
     Author: Petr Danecek <pd3 at sanger.ac.uk>
 
@@ -107,6 +107,16 @@ int calc_Pkij(int fals, int mals, int kals, int fpl, int mpl, int kpl)
 //
 static void mcall_init_trios(call_t *call)
 {
+    if ( call->prior_AN )
+    {
+        int id = bcf_hdr_id2int(call->hdr,BCF_DT_ID,call->prior_AN);
+        if ( id==-1 ) error("No such tag \"%s\"\n", call->prior_AN);
+        if ( !bcf_hdr_idinfo_exists(call->hdr,BCF_HL_FMT,id) )  error("No such FORMAT tag \"%s\"\n", call->prior_AN);
+        id = bcf_hdr_id2int(call->hdr,BCF_DT_ID,call->prior_AC);
+        if ( id==-1 ) error("No such tag \"%s\"\n", call->prior_AC);
+        if ( !bcf_hdr_idinfo_exists(call->hdr,BCF_HL_FMT,id) )  error("No such FORMAT tag \"%s\"\n", call->prior_AC);
+    }
+
     // 23, 138, 478 possible diploid trio genotypes with 2, 3, 4 alleles
     call->ntrio[FTYPE_222][2] = 15; call->ntrio[FTYPE_222][3] = 78;  call->ntrio[FTYPE_222][4] = 250;
     call->ntrio[FTYPE_121][2] = 8;  call->ntrio[FTYPE_121][3] = 27;  call->ntrio[FTYPE_121][4] = 64;
@@ -347,8 +357,7 @@ void set_pdg(double *pl2p, int *PLs, double *pdg, int n_smpl, int n_gt, int unse
                 break;
             }
             if ( PLs[j]==bcf_int32_missing ) break;
-            assert( PLs[j]<256 );
-            pdg[j] = pl2p[ PLs[j] ];
+            pdg[j] = PLs[j] < 256 ? pl2p[PLs[j]] : pow(10., -PLs[j]/10.);
             sum += pdg[j];
         }
 
@@ -367,8 +376,7 @@ void set_pdg(double *pl2p, int *PLs, double *pdg, int n_smpl, int n_gt, int unse
             {
                 assert( PLs[j]!=bcf_int32_vector_end );
                 if ( PLs[j]==bcf_int32_missing ) PLs[j] = 255;
-                assert( PLs[j]<256 );
-                pdg[j] = pl2p[ PLs[j] ];
+                pdg[j] = PLs[j] < 256 ? pl2p[PLs[j]] : pow(10., -PLs[j]/10.);
                 sum += pdg[j];
             }
         }
@@ -539,19 +547,19 @@ float calc_HOB(int nref, int nalt, int nhets, int ndiploid)
 /**
   *  log(sum_i exp(a_i))
   */
-static inline double logsumexp(double *vals, int nvals)
-{
-    int i;
-    double max_exp = vals[0];
-    for (i=1; i<nvals; i++)
-        if ( max_exp < vals[i] ) max_exp = vals[i];
-
-    double sum = 0;
-    for (i=0; i<nvals; i++)
-        sum += exp(vals[i] - max_exp);
-
-    return log(sum) + max_exp;
-}
+// static inline double logsumexp(double *vals, int nvals)
+// {
+//     int i;
+//     double max_exp = vals[0];
+//     for (i=1; i<nvals; i++)
+//         if ( max_exp < vals[i] ) max_exp = vals[i];
+
+//     double sum = 0;
+//     for (i=0; i<nvals; i++)
+//         sum += exp(vals[i] - max_exp);
+
+//     return log(sum) + max_exp;
+// }
 /** log(exp(a)+exp(b)) */
 static inline double logsumexp2(double a, double b)
 {
@@ -562,9 +570,9 @@ static inline double logsumexp2(double a, double b)
 }
 
 // Macro to set the most likely alleles
-#define UPDATE_MAX_LKs(als) { \
+#define UPDATE_MAX_LKs(als,sum) { \
      if ( max_lk<lk_tot ) { max_lk = lk_tot; max_als = (als); } \
-     if ( lk_tot_set ) lk_sum = logsumexp2(lk_tot,lk_sum); \
+     if ( sum ) lk_sum = logsumexp2(lk_tot,lk_sum); \
 }
 
 #define SWAP(type_t,x,y) {type_t tmp; tmp = x; x = y; y = tmp; }
@@ -595,7 +603,7 @@ static int mcall_find_best_alleles(call_t *call, int nals, int *out_als)
         }
         if ( ia==0 ) ref_lk = lk_tot;   // likelihood of 0/0 for all samples
         else lk_tot += call->theta; // the prior
-        UPDATE_MAX_LKs(1<<ia);
+        UPDATE_MAX_LKs(1<<ia, ia>0 && lk_tot_set);
     }
 
     // Two alleles
@@ -612,14 +620,16 @@ static int mcall_find_best_alleles(call_t *call, int nals, int *out_als)
                 int lk_tot_set = 0;
                 double fa  = call->qsum[ia]/(call->qsum[ia]+call->qsum[ib]);
                 double fb  = call->qsum[ib]/(call->qsum[ia]+call->qsum[ib]);
-                double fab = 2*fa*fb; fa *= fa; fb *= fb;
+                double fa2 = fa*fa;
+                double fb2 = fb*fb;
+                double fab = 2*fa*fb;
                 int isample, ibb = (ib+1)*(ib+2)/2-1, iab = iaa - ia + ib;
                 double *pdg  = call->pdg;
                 for (isample=0; isample<nsmpl; isample++)
                 {
                     double val = 0;
                     if ( !call->ploidy || call->ploidy[isample]==2 )
-                        val = fa*pdg[iaa] + fb*pdg[ibb] + fab*pdg[iab];
+                        val = fa2*pdg[iaa] + fb2*pdg[ibb] + fab*pdg[iab];
                     else if ( call->ploidy && call->ploidy[isample]==1 )
                         val = fa*pdg[iaa] + fb*pdg[ibb];
                     if ( val ) { lk_tot += log(val); lk_tot_set = 1; }
@@ -627,7 +637,7 @@ static int mcall_find_best_alleles(call_t *call, int nals, int *out_als)
                 }
                 if ( ia!=0 ) lk_tot += call->theta;    // the prior
                 if ( ib!=0 ) lk_tot += call->theta;
-                UPDATE_MAX_LKs(1<<ia|1<<ib);
+                UPDATE_MAX_LKs(1<<ia|1<<ib, lk_tot_set);
             }
         }
     }
@@ -652,7 +662,10 @@ static int mcall_find_best_alleles(call_t *call, int nals, int *out_als)
                     double fa  = call->qsum[ia]/(call->qsum[ia]+call->qsum[ib]+call->qsum[ic]);
                     double fb  = call->qsum[ib]/(call->qsum[ia]+call->qsum[ib]+call->qsum[ic]);
                     double fc  = call->qsum[ic]/(call->qsum[ia]+call->qsum[ib]+call->qsum[ic]);
-                    double fab = 2*fa*fb, fac = 2*fa*fc, fbc = 2*fb*fc; fa *= fa; fb *= fb; fc *= fc;
+                    double fa2 = fa*fa;
+                    double fb2 = fb*fb;
+                    double fc2 = fc*fc;
+                    double fab = 2*fa*fb, fac = 2*fa*fc, fbc = 2*fb*fc;
                     int isample, icc = (ic+1)*(ic+2)/2-1;
                     int iac = iaa - ia + ic, ibc = ibb - ib + ic;
                     double *pdg = call->pdg;
@@ -660,7 +673,7 @@ static int mcall_find_best_alleles(call_t *call, int nals, int *out_als)
                     {
                         double val = 0;
                         if ( !call->ploidy || call->ploidy[isample]==2 )
-                            val = fa*pdg[iaa] + fb*pdg[ibb] + fc*pdg[icc] + fab*pdg[iab] + fac*pdg[iac] + fbc*pdg[ibc];
+                            val = fa2*pdg[iaa] + fb2*pdg[ibb] + fc2*pdg[icc] + fab*pdg[iab] + fac*pdg[iac] + fbc*pdg[ibc];
                         else if ( call->ploidy && call->ploidy[isample]==1 )
                             val = fa*pdg[iaa] + fb*pdg[ibb] + fc*pdg[icc];
                         if ( val ) { lk_tot += log(val); lk_tot_set = 1; }
@@ -669,7 +682,7 @@ static int mcall_find_best_alleles(call_t *call, int nals, int *out_als)
                     if ( ia!=0 ) lk_tot += call->theta;    // the prior
                     if ( ib!=0 ) lk_tot += call->theta;    // the prior
                     if ( ic!=0 ) lk_tot += call->theta;    // the prior
-                    UPDATE_MAX_LKs(1<<ia|1<<ib|1<<ic);
+                    UPDATE_MAX_LKs(1<<ia|1<<ib|1<<ic, lk_tot_set);
                 }
             }
         }
@@ -780,7 +793,7 @@ static void mcall_call_genotypes(call_t *call, bcf1_t *rec, int nals, int nout_a
         {
             if ( !(out_als & 1<<ia) ) continue;     // ia-th allele not in the final selection, skip
             int iaa = (ia+1)*(ia+2)/2-1;            // PL index of the ia/ia genotype
-            double lk = pdg[iaa]*call->qsum[ia]*call->qsum[ia];
+            double lk = ploidy==2 ? pdg[iaa]*call->qsum[ia]*call->qsum[ia] : pdg[iaa]*call->qsum[ia];
             #if USE_PRIOR_FOR_GTS
                 if ( ia!=0 ) lk *= prior;
             #endif
@@ -934,7 +947,7 @@ static void mcall_call_trio_genotypes(call_t *call, bcf1_t *rec, int nals, int n
             if ( !(out_als & 1<<ia) ) continue;     // ia-th allele not in the final selection, skip
             int iaa   = bcf_alleles2gt(ia,ia);      // PL index of the ia/ia genotype
             int idx   = bcf_alleles2gt(call->als_map[ia],call->als_map[ia]);
-            double lk = pdg[iaa]*call->qsum[ia]*call->qsum[ia];
+            double lk = ploidy==2 ? pdg[iaa]*call->qsum[ia]*call->qsum[ia] : pdg[iaa]*call->qsum[ia];
             sum_lk   += lk;
             gls[idx]  = lk;
             if ( best_lk < lk )
@@ -1184,82 +1197,80 @@ static void mcall_trim_PLs(call_t *call, bcf1_t *rec, int nals, int nout_als, in
 
 void mcall_trim_numberR(call_t *call, bcf1_t *rec, int nals, int nout_als, int out_als)
 {
-    int i, ret;
+    if ( nals==nout_als ) return;
+
+    int i,j, nret, size = sizeof(float);
+
+    void *tmp_ori = call->itmp, *tmp_new = call->PLs;  // reusing PLs storage which is not used at this point
+    int ntmp_ori = call->n_itmp, ntmp_new = call->mPLs;
 
-    // at the moment we have DPR,AD,ADF,ADR all Number=R,Type=Integer,
-    // so only dealing with these cases at the moment
+    // INFO fields
     for (i=0; i<rec->n_info; i++)
     {
         bcf_info_t *info = &rec->d.info[i];
         int vlen = bcf_hdr_id2length(call->hdr,BCF_HL_INFO,info->key);
-        if ( vlen!=BCF_VL_R ) continue;
-        int type = bcf_hdr_id2type(call->hdr,BCF_HL_INFO,info->key);
-        if ( type!=BCF_HT_INT ) continue;
+        if ( vlen!=BCF_VL_R ) continue; // not a Number=R tag
 
-        ret = bcf_get_info_int32(call->hdr, rec, bcf_hdr_int2id(call->hdr,BCF_DT_ID,info->key), &call->itmp, &call->n_itmp);
-        if ( ret>0 )
+        int type  = bcf_hdr_id2type(call->hdr,BCF_HL_INFO,info->key);
+        const char *key = bcf_hdr_int2id(call->hdr,BCF_DT_ID,info->key);
+        nret = bcf_get_info_values(call->hdr, rec, key, &tmp_ori, &ntmp_ori, type);
+        if ( nret<=0 ) continue;
+
+        if ( nout_als==1 )
+            bcf_update_info_int32(call->hdr, rec, key, tmp_ori, 1);     // has to be the REF, the order could not change
+        else
         {
-            assert( ret==nals );
-            if ( out_als==1 )
-                bcf_update_info_int32(call->hdr, rec, bcf_hdr_int2id(call->hdr,BCF_DT_ID,info->key), call->itmp, 1);
-            else
+            for (j=0; j<nals; j++)
             {
-                int j;
-                for (j=0; j<nals; j++)
-                {
-                    if ( call->als_map[j]==-1 ) continue;   // to be dropped
-                    call->PLs[ call->als_map[j] ] = call->itmp[j]; // reusing PLs storage which is not used at this point
-                }
-                bcf_update_info_int32(call->hdr, rec, bcf_hdr_int2id(call->hdr,BCF_DT_ID,info->key), call->PLs, nout_als);
+                int k = call->als_map[j];
+                if ( k==-1 ) continue;   // to be dropped
+                memcpy((char *)tmp_new+size*k, (char *)tmp_ori+size*j, size);
             }
+            bcf_update_info_int32(call->hdr, rec, key, tmp_new, nout_als);
         }
     }
 
+    // FORMAT fields
     for (i=0; i<rec->n_fmt; i++)
     {
         bcf_fmt_t *fmt = &rec->d.fmt[i];
         int vlen = bcf_hdr_id2length(call->hdr,BCF_HL_FMT,fmt->id);
-        if ( vlen!=BCF_VL_R ) continue;
+        if ( vlen!=BCF_VL_R ) continue; // not a Number=R tag
+
         int type = bcf_hdr_id2type(call->hdr,BCF_HL_FMT,fmt->id);
-        if ( type!=BCF_HT_INT ) continue;
+        const char *key = bcf_hdr_int2id(call->hdr,BCF_DT_ID,fmt->id);
+        nret = bcf_get_format_values(call->hdr, rec, key, &tmp_ori, &ntmp_ori, type);
+        if (nret<=0) continue;
+        int nsmpl = bcf_hdr_nsamples(call->hdr);
 
-        ret = bcf_get_format_int32(call->hdr, rec, bcf_hdr_int2id(call->hdr,BCF_DT_ID,fmt->id), &call->itmp, &call->n_itmp);
-        if ( ret>0 )
-        {
-            int j, nsmpl = bcf_hdr_nsamples(call->hdr);
-            int ndp = ret / nsmpl;
-            assert( ndp==nals );
-            if ( out_als==1 )
-            {
-                for (j=0; j<nsmpl; j++)
-                    call->PLs[j] = call->itmp[j*ndp];
+        assert( nret==nals*nsmpl );
 
-                bcf_update_format_int32(call->hdr, rec, bcf_hdr_int2id(call->hdr,BCF_DT_ID,fmt->id), call->PLs, nsmpl);
-            }
-            else
+        for (j=0; j<nsmpl; j++)
+        {
+            char *ptr_src = (char *)tmp_ori + j*nals*size;
+            char *ptr_dst = (char *)tmp_new + j*nout_als*size;
+            int k;
+            for (k=0; k<nals; k++)
             {
-                int k;
-                for (j=0; j<nsmpl; j++)
-                {
-                    int32_t *dp_dst = call->PLs + j*nout_als;
-                    int32_t *dp_src = call->itmp + j*ndp;
-                    for (k=0; k<nals; k++)
-                    {
-                        if ( call->als_map[k]==-1 ) continue;   // to be dropped
-                        dp_dst[ call->als_map[k] ] = dp_src[k]; // reusing PLs storage which is not used at this point
-                    }
-                }
-                bcf_update_format_int32(call->hdr, rec, bcf_hdr_int2id(call->hdr,BCF_DT_ID,fmt->id), call->PLs, nsmpl*nout_als);
+                int l = call->als_map[k];
+                if ( l==-1 ) continue;   // to be dropped
+                memcpy(ptr_dst+size*l, ptr_src+size*k, size);
             }
         }
+        bcf_update_format_int32(call->hdr, rec, key, tmp_new, nout_als*nsmpl);
     }
+
+    call->PLs    = (int32_t*) tmp_new;
+    call->mPLs   = ntmp_new;
+    call->itmp   = (int32_t*) tmp_ori;
+    call->n_itmp = ntmp_ori;
 }
 
 
 // NB: in this function we temporarily use calls->als_map for a different
 // purpose to store mapping from new (target) alleles to original alleles.
 //
-static void mcall_constrain_alleles(call_t *call, bcf1_t *rec, int *unseen)
+static int mcall_constrain_alleles(call_t *call, bcf1_t *rec, int *unseen)
 {
     bcf_sr_regions_t *tgt = call->srs->targets;
     if ( tgt->nals>5 ) error("Maximum accepted number of alleles is 5, got %d\n", tgt->nals);
@@ -1282,7 +1293,7 @@ static void mcall_constrain_alleles(call_t *call, bcf1_t *rec, int *unseen)
         call->als[nals] = tgt->als[i];
         j = vcmp_find_allele(call->vcmp, rec->d.allele+1, rec->n_allele - 1, tgt->als[i]);
 
-        if ( j+1==*unseen ) error("Cannot constrain to %s\n",tgt->als[i]);
+        if ( j+1==*unseen ) { fprintf(stderr,"fixme? Cannot constrain to %s\n",tgt->als[i]); return -1; }
         
         if ( j>=0 )
         {
@@ -1308,7 +1319,7 @@ static void mcall_constrain_alleles(call_t *call, bcf1_t *rec, int *unseen)
         nals++;
     }
 
-    if ( !has_new && nals==rec->n_allele ) return;
+    if ( !has_new && nals==rec->n_allele ) return 0;
     bcf_update_alleles(call->hdr, rec, (const char**)call->als, nals);
 
     // create mapping from new PL to old PL
@@ -1360,6 +1371,7 @@ static void mcall_constrain_alleles(call_t *call, bcf1_t *rec, int *unseen)
     bcf_update_info_float(call->hdr, rec, "QS", qsum, nals);
 
     if ( *unseen ) *unseen = nals-1;
+    return 0;
 }
 
 
@@ -1374,7 +1386,7 @@ int mcall(call_t *call, bcf1_t *rec)
     int i, unseen = call->unseen;
 
     // Force alleles when calling genotypes given alleles was requested
-    if ( call->flag & CALL_CONSTR_ALLELES ) mcall_constrain_alleles(call, rec, &unseen);
+    if ( call->flag & CALL_CONSTR_ALLELES && mcall_constrain_alleles(call, rec, &unseen)!=0 ) return -2;
 
     int nsmpl = bcf_hdr_nsamples(call->hdr);
     int nals  = rec->n_allele;
@@ -1395,7 +1407,7 @@ int mcall(call_t *call, bcf1_t *rec)
     #if QS_FROM_PDG
         estimate_qsum(call, rec);
     #else
-        // Get sum of qualities
+        // Get sum of qualities, serves as an AF estimate, f_x = QS/N in Eq. 1 in call-m math notes.
         int nqs = bcf_get_info_float(call->hdr, rec, "QS", &call->qsum, &call->nqsum);
         if ( nqs<=0 ) error("The QS annotation not present at %s:%d\n", bcf_seqname(call->hdr,rec),rec->pos+1);
         if ( nqs < nals )
@@ -1406,23 +1418,50 @@ int mcall(call_t *call, bcf1_t *rec)
             hts_expand(float,nals,call->nqsum,call->qsum);
             for (i=nqs; i<nals; i++) call->qsum[i] = 0;
         }
-        float qsum_tot = 0;
-        for (i=0; i<nals; i++) qsum_tot += call->qsum[i];
-        if ( !call->qsum[0] )
+
+        // If available, take into account reference panel AFs
+        if ( call->prior_AN && bcf_get_info_int32(call->hdr, rec, call->prior_AN ,&call->ac, &call->nac)==1 )
         {
-            // As P(RR)!=0 even for QS(ref)=0, we set QS(ref) to a small value,
-            // an equivalent of a single reference read.
-            if ( bcf_get_info_int32(call->hdr, rec, "DP", &call->itmp, &call->n_itmp)!=1 )
-                error("Could not read DP at %s:%d\n", call->hdr->id[BCF_DT_CTG][rec->rid].key,rec->pos+1);
-            if ( call->itmp[0] )
+            int an = call->ac[0];
+            if ( bcf_get_info_int32(call->hdr, rec, call->prior_AC ,&call->ac, &call->nac)==nals-1 )
             {
-                call->qsum[0] = 1.0 / call->itmp[0] / nsmpl;
-                qsum_tot += call->qsum[0];
+                int ac0 = an;   // number of alleles in the reference population
+                for (i=0; i<nals-1; i++)
+                {
+                    if ( call->ac[i]==bcf_int32_vector_end ) break;
+                    if ( call->ac[i]==bcf_int32_missing ) continue;
+                    ac0 -= call->ac[i];
+                    call->qsum[i+1] += call->ac[i]*0.5;
+                }
+                if ( ac0<0 ) error("Incorrect %s,%s values at %s:%d\n", call->prior_AN,call->prior_AC,bcf_seqname(call->hdr,rec),rec->pos+1);
+                call->qsum[0] += ac0*0.5;
+                for (i=0; i<nals; i++) call->qsum[i] /= nsmpl + 0.5*an;
             }
         }
+
+        float qsum_tot = 0;
+        for (i=0; i<nals; i++) qsum_tot += call->qsum[i];
+
+        // Is this still necessary??
+        //
+        //  if (0&& !call->qsum[0] )
+        //  {
+        //      // As P(RR)!=0 even for QS(ref)=0, we set QS(ref) to a small value,
+        //      // an equivalent of a single reference read.
+        //      if ( bcf_get_info_int32(call->hdr, rec, "DP", &call->itmp, &call->n_itmp)!=1 )
+        //          error("Could not read DP at %s:%d\n", call->hdr->id[BCF_DT_CTG][rec->rid].key,rec->pos+1);
+        //      if ( call->itmp[0] )
+        //      {
+        //          call->qsum[0] = 1.0 / call->itmp[0] / nsmpl;
+        //          qsum_tot += call->qsum[0];
+        //      }
+        //  }
+
         if ( qsum_tot ) for (i=0; i<nals; i++) call->qsum[i] /= qsum_tot;
     #endif
 
+    bcf_update_info_int32(call->hdr, rec, "QS", NULL, 0);      // remove QS tag
+
     // Find the best combination of alleles
     int out_als, nout;
     if ( nals > 8*sizeof(out_als) )
@@ -1497,13 +1536,17 @@ int mcall(call_t *call, bcf1_t *rec)
         if ( hob != HUGE_VAL ) bcf_update_info_float(call->hdr, rec, "HOB", &hob, 1);
 
         // Quality of a variant site. fabs() to avoid negative zeros in VCF output when CALL_KEEPALT is set
-        rec->qual = call->lk_sum==-HUGE_VAL || call->ref_lk==0 ? 0 : fabs(-4.343*(call->ref_lk - call->lk_sum));
+        rec->qual = -4.343*(call->ref_lk - logsumexp2(call->lk_sum,call->ref_lk));
     }
     else
     {
         // Set the quality of a REF site
-        rec->qual = call->lk_sum==-HUGE_VAL || call->ref_lk==0 ? 0 : -4.343*log(1 - exp(call->ref_lk - call->lk_sum));
+        if ( call->lk_sum==-HUGE_VAL )  // no support from (high quality) reads, so QUAL=1-prior
+            rec->qual = call->theta ? -4.343*call->theta : 0;
+        else
+            rec->qual = -4.343*(call->lk_sum - logsumexp2(call->lk_sum,call->ref_lk));
     }
+
     if ( rec->qual>999 ) rec->qual = 999;
     if ( rec->qual>50 ) rec->qual = rint(rec->qual);
 
@@ -1530,7 +1573,6 @@ int mcall(call_t *call, bcf1_t *rec)
     }
 
     bcf_update_info_int32(call->hdr, rec, "I16", NULL, 0);     // remove I16 tag
-    bcf_update_info_int32(call->hdr, rec, "QS", NULL, 0);      // remove QS tag
 
     return nout;
 }
diff --git a/bcftools/mcall.c.pysam.c b/bcftools/mcall.c.pysam.c
index 29ed799..a315656 100644
--- a/bcftools/mcall.c.pysam.c
+++ b/bcftools/mcall.c.pysam.c
@@ -2,7 +2,7 @@
 
 /*  mcall.c -- multiallelic and rare variant calling.
 
-    Copyright (C) 2012-2014 Genome Research Ltd.
+    Copyright (C) 2012-2016 Genome Research Ltd.
 
     Author: Petr Danecek <pd3 at sanger.ac.uk>
 
@@ -109,6 +109,16 @@ int calc_Pkij(int fals, int mals, int kals, int fpl, int mpl, int kpl)
 //
 static void mcall_init_trios(call_t *call)
 {
+    if ( call->prior_AN )
+    {
+        int id = bcf_hdr_id2int(call->hdr,BCF_DT_ID,call->prior_AN);
+        if ( id==-1 ) error("No such tag \"%s\"\n", call->prior_AN);
+        if ( !bcf_hdr_idinfo_exists(call->hdr,BCF_HL_FMT,id) )  error("No such FORMAT tag \"%s\"\n", call->prior_AN);
+        id = bcf_hdr_id2int(call->hdr,BCF_DT_ID,call->prior_AC);
+        if ( id==-1 ) error("No such tag \"%s\"\n", call->prior_AC);
+        if ( !bcf_hdr_idinfo_exists(call->hdr,BCF_HL_FMT,id) )  error("No such FORMAT tag \"%s\"\n", call->prior_AC);
+    }
+
     // 23, 138, 478 possible diploid trio genotypes with 2, 3, 4 alleles
     call->ntrio[FTYPE_222][2] = 15; call->ntrio[FTYPE_222][3] = 78;  call->ntrio[FTYPE_222][4] = 250;
     call->ntrio[FTYPE_121][2] = 8;  call->ntrio[FTYPE_121][3] = 27;  call->ntrio[FTYPE_121][4] = 64;
@@ -349,8 +359,7 @@ void set_pdg(double *pl2p, int *PLs, double *pdg, int n_smpl, int n_gt, int unse
                 break;
             }
             if ( PLs[j]==bcf_int32_missing ) break;
-            assert( PLs[j]<256 );
-            pdg[j] = pl2p[ PLs[j] ];
+            pdg[j] = PLs[j] < 256 ? pl2p[PLs[j]] : pow(10., -PLs[j]/10.);
             sum += pdg[j];
         }
 
@@ -369,8 +378,7 @@ void set_pdg(double *pl2p, int *PLs, double *pdg, int n_smpl, int n_gt, int unse
             {
                 assert( PLs[j]!=bcf_int32_vector_end );
                 if ( PLs[j]==bcf_int32_missing ) PLs[j] = 255;
-                assert( PLs[j]<256 );
-                pdg[j] = pl2p[ PLs[j] ];
+                pdg[j] = PLs[j] < 256 ? pl2p[PLs[j]] : pow(10., -PLs[j]/10.);
                 sum += pdg[j];
             }
         }
@@ -541,19 +549,19 @@ float calc_HOB(int nref, int nalt, int nhets, int ndiploid)
 /**
   *  log(sum_i exp(a_i))
   */
-static inline double logsumexp(double *vals, int nvals)
-{
-    int i;
-    double max_exp = vals[0];
-    for (i=1; i<nvals; i++)
-        if ( max_exp < vals[i] ) max_exp = vals[i];
-
-    double sum = 0;
-    for (i=0; i<nvals; i++)
-        sum += exp(vals[i] - max_exp);
-
-    return log(sum) + max_exp;
-}
+// static inline double logsumexp(double *vals, int nvals)
+// {
+//     int i;
+//     double max_exp = vals[0];
+//     for (i=1; i<nvals; i++)
+//         if ( max_exp < vals[i] ) max_exp = vals[i];
+
+//     double sum = 0;
+//     for (i=0; i<nvals; i++)
+//         sum += exp(vals[i] - max_exp);
+
+//     return log(sum) + max_exp;
+// }
 /** log(exp(a)+exp(b)) */
 static inline double logsumexp2(double a, double b)
 {
@@ -564,9 +572,9 @@ static inline double logsumexp2(double a, double b)
 }
 
 // Macro to set the most likely alleles
-#define UPDATE_MAX_LKs(als) { \
+#define UPDATE_MAX_LKs(als,sum) { \
      if ( max_lk<lk_tot ) { max_lk = lk_tot; max_als = (als); } \
-     if ( lk_tot_set ) lk_sum = logsumexp2(lk_tot,lk_sum); \
+     if ( sum ) lk_sum = logsumexp2(lk_tot,lk_sum); \
 }
 
 #define SWAP(type_t,x,y) {type_t tmp; tmp = x; x = y; y = tmp; }
@@ -597,7 +605,7 @@ static int mcall_find_best_alleles(call_t *call, int nals, int *out_als)
         }
         if ( ia==0 ) ref_lk = lk_tot;   // likelihood of 0/0 for all samples
         else lk_tot += call->theta; // the prior
-        UPDATE_MAX_LKs(1<<ia);
+        UPDATE_MAX_LKs(1<<ia, ia>0 && lk_tot_set);
     }
 
     // Two alleles
@@ -614,14 +622,16 @@ static int mcall_find_best_alleles(call_t *call, int nals, int *out_als)
                 int lk_tot_set = 0;
                 double fa  = call->qsum[ia]/(call->qsum[ia]+call->qsum[ib]);
                 double fb  = call->qsum[ib]/(call->qsum[ia]+call->qsum[ib]);
-                double fab = 2*fa*fb; fa *= fa; fb *= fb;
+                double fa2 = fa*fa;
+                double fb2 = fb*fb;
+                double fab = 2*fa*fb;
                 int isample, ibb = (ib+1)*(ib+2)/2-1, iab = iaa - ia + ib;
                 double *pdg  = call->pdg;
                 for (isample=0; isample<nsmpl; isample++)
                 {
                     double val = 0;
                     if ( !call->ploidy || call->ploidy[isample]==2 )
-                        val = fa*pdg[iaa] + fb*pdg[ibb] + fab*pdg[iab];
+                        val = fa2*pdg[iaa] + fb2*pdg[ibb] + fab*pdg[iab];
                     else if ( call->ploidy && call->ploidy[isample]==1 )
                         val = fa*pdg[iaa] + fb*pdg[ibb];
                     if ( val ) { lk_tot += log(val); lk_tot_set = 1; }
@@ -629,7 +639,7 @@ static int mcall_find_best_alleles(call_t *call, int nals, int *out_als)
                 }
                 if ( ia!=0 ) lk_tot += call->theta;    // the prior
                 if ( ib!=0 ) lk_tot += call->theta;
-                UPDATE_MAX_LKs(1<<ia|1<<ib);
+                UPDATE_MAX_LKs(1<<ia|1<<ib, lk_tot_set);
             }
         }
     }
@@ -654,7 +664,10 @@ static int mcall_find_best_alleles(call_t *call, int nals, int *out_als)
                     double fa  = call->qsum[ia]/(call->qsum[ia]+call->qsum[ib]+call->qsum[ic]);
                     double fb  = call->qsum[ib]/(call->qsum[ia]+call->qsum[ib]+call->qsum[ic]);
                     double fc  = call->qsum[ic]/(call->qsum[ia]+call->qsum[ib]+call->qsum[ic]);
-                    double fab = 2*fa*fb, fac = 2*fa*fc, fbc = 2*fb*fc; fa *= fa; fb *= fb; fc *= fc;
+                    double fa2 = fa*fa;
+                    double fb2 = fb*fb;
+                    double fc2 = fc*fc;
+                    double fab = 2*fa*fb, fac = 2*fa*fc, fbc = 2*fb*fc;
                     int isample, icc = (ic+1)*(ic+2)/2-1;
                     int iac = iaa - ia + ic, ibc = ibb - ib + ic;
                     double *pdg = call->pdg;
@@ -662,7 +675,7 @@ static int mcall_find_best_alleles(call_t *call, int nals, int *out_als)
                     {
                         double val = 0;
                         if ( !call->ploidy || call->ploidy[isample]==2 )
-                            val = fa*pdg[iaa] + fb*pdg[ibb] + fc*pdg[icc] + fab*pdg[iab] + fac*pdg[iac] + fbc*pdg[ibc];
+                            val = fa2*pdg[iaa] + fb2*pdg[ibb] + fc2*pdg[icc] + fab*pdg[iab] + fac*pdg[iac] + fbc*pdg[ibc];
                         else if ( call->ploidy && call->ploidy[isample]==1 )
                             val = fa*pdg[iaa] + fb*pdg[ibb] + fc*pdg[icc];
                         if ( val ) { lk_tot += log(val); lk_tot_set = 1; }
@@ -671,7 +684,7 @@ static int mcall_find_best_alleles(call_t *call, int nals, int *out_als)
                     if ( ia!=0 ) lk_tot += call->theta;    // the prior
                     if ( ib!=0 ) lk_tot += call->theta;    // the prior
                     if ( ic!=0 ) lk_tot += call->theta;    // the prior
-                    UPDATE_MAX_LKs(1<<ia|1<<ib|1<<ic);
+                    UPDATE_MAX_LKs(1<<ia|1<<ib|1<<ic, lk_tot_set);
                 }
             }
         }
@@ -782,7 +795,7 @@ static void mcall_call_genotypes(call_t *call, bcf1_t *rec, int nals, int nout_a
         {
             if ( !(out_als & 1<<ia) ) continue;     // ia-th allele not in the final selection, skip
             int iaa = (ia+1)*(ia+2)/2-1;            // PL index of the ia/ia genotype
-            double lk = pdg[iaa]*call->qsum[ia]*call->qsum[ia];
+            double lk = ploidy==2 ? pdg[iaa]*call->qsum[ia]*call->qsum[ia] : pdg[iaa]*call->qsum[ia];
             #if USE_PRIOR_FOR_GTS
                 if ( ia!=0 ) lk *= prior;
             #endif
@@ -936,7 +949,7 @@ static void mcall_call_trio_genotypes(call_t *call, bcf1_t *rec, int nals, int n
             if ( !(out_als & 1<<ia) ) continue;     // ia-th allele not in the final selection, skip
             int iaa   = bcf_alleles2gt(ia,ia);      // PL index of the ia/ia genotype
             int idx   = bcf_alleles2gt(call->als_map[ia],call->als_map[ia]);
-            double lk = pdg[iaa]*call->qsum[ia]*call->qsum[ia];
+            double lk = ploidy==2 ? pdg[iaa]*call->qsum[ia]*call->qsum[ia] : pdg[iaa]*call->qsum[ia];
             sum_lk   += lk;
             gls[idx]  = lk;
             if ( best_lk < lk )
@@ -1186,82 +1199,80 @@ static void mcall_trim_PLs(call_t *call, bcf1_t *rec, int nals, int nout_als, in
 
 void mcall_trim_numberR(call_t *call, bcf1_t *rec, int nals, int nout_als, int out_als)
 {
-    int i, ret;
+    if ( nals==nout_als ) return;
+
+    int i,j, nret, size = sizeof(float);
+
+    void *tmp_ori = call->itmp, *tmp_new = call->PLs;  // reusing PLs storage which is not used at this point
+    int ntmp_ori = call->n_itmp, ntmp_new = call->mPLs;
 
-    // at the moment we have DPR,AD,ADF,ADR all Number=R,Type=Integer,
-    // so only dealing with these cases at the moment
+    // INFO fields
     for (i=0; i<rec->n_info; i++)
     {
         bcf_info_t *info = &rec->d.info[i];
         int vlen = bcf_hdr_id2length(call->hdr,BCF_HL_INFO,info->key);
-        if ( vlen!=BCF_VL_R ) continue;
-        int type = bcf_hdr_id2type(call->hdr,BCF_HL_INFO,info->key);
-        if ( type!=BCF_HT_INT ) continue;
+        if ( vlen!=BCF_VL_R ) continue; // not a Number=R tag
 
-        ret = bcf_get_info_int32(call->hdr, rec, bcf_hdr_int2id(call->hdr,BCF_DT_ID,info->key), &call->itmp, &call->n_itmp);
-        if ( ret>0 )
+        int type  = bcf_hdr_id2type(call->hdr,BCF_HL_INFO,info->key);
+        const char *key = bcf_hdr_int2id(call->hdr,BCF_DT_ID,info->key);
+        nret = bcf_get_info_values(call->hdr, rec, key, &tmp_ori, &ntmp_ori, type);
+        if ( nret<=0 ) continue;
+
+        if ( nout_als==1 )
+            bcf_update_info_int32(call->hdr, rec, key, tmp_ori, 1);     // has to be the REF, the order could not change
+        else
         {
-            assert( ret==nals );
-            if ( out_als==1 )
-                bcf_update_info_int32(call->hdr, rec, bcf_hdr_int2id(call->hdr,BCF_DT_ID,info->key), call->itmp, 1);
-            else
+            for (j=0; j<nals; j++)
             {
-                int j;
-                for (j=0; j<nals; j++)
-                {
-                    if ( call->als_map[j]==-1 ) continue;   // to be dropped
-                    call->PLs[ call->als_map[j] ] = call->itmp[j]; // reusing PLs storage which is not used at this point
-                }
-                bcf_update_info_int32(call->hdr, rec, bcf_hdr_int2id(call->hdr,BCF_DT_ID,info->key), call->PLs, nout_als);
+                int k = call->als_map[j];
+                if ( k==-1 ) continue;   // to be dropped
+                memcpy((char *)tmp_new+size*k, (char *)tmp_ori+size*j, size);
             }
+            bcf_update_info_int32(call->hdr, rec, key, tmp_new, nout_als);
         }
     }
 
+    // FORMAT fields
     for (i=0; i<rec->n_fmt; i++)
     {
         bcf_fmt_t *fmt = &rec->d.fmt[i];
         int vlen = bcf_hdr_id2length(call->hdr,BCF_HL_FMT,fmt->id);
-        if ( vlen!=BCF_VL_R ) continue;
+        if ( vlen!=BCF_VL_R ) continue; // not a Number=R tag
+
         int type = bcf_hdr_id2type(call->hdr,BCF_HL_FMT,fmt->id);
-        if ( type!=BCF_HT_INT ) continue;
+        const char *key = bcf_hdr_int2id(call->hdr,BCF_DT_ID,fmt->id);
+        nret = bcf_get_format_values(call->hdr, rec, key, &tmp_ori, &ntmp_ori, type);
+        if (nret<=0) continue;
+        int nsmpl = bcf_hdr_nsamples(call->hdr);
 
-        ret = bcf_get_format_int32(call->hdr, rec, bcf_hdr_int2id(call->hdr,BCF_DT_ID,fmt->id), &call->itmp, &call->n_itmp);
-        if ( ret>0 )
-        {
-            int j, nsmpl = bcf_hdr_nsamples(call->hdr);
-            int ndp = ret / nsmpl;
-            assert( ndp==nals );
-            if ( out_als==1 )
-            {
-                for (j=0; j<nsmpl; j++)
-                    call->PLs[j] = call->itmp[j*ndp];
+        assert( nret==nals*nsmpl );
 
-                bcf_update_format_int32(call->hdr, rec, bcf_hdr_int2id(call->hdr,BCF_DT_ID,fmt->id), call->PLs, nsmpl);
-            }
-            else
+        for (j=0; j<nsmpl; j++)
+        {
+            char *ptr_src = (char *)tmp_ori + j*nals*size;
+            char *ptr_dst = (char *)tmp_new + j*nout_als*size;
+            int k;
+            for (k=0; k<nals; k++)
             {
-                int k;
-                for (j=0; j<nsmpl; j++)
-                {
-                    int32_t *dp_dst = call->PLs + j*nout_als;
-                    int32_t *dp_src = call->itmp + j*ndp;
-                    for (k=0; k<nals; k++)
-                    {
-                        if ( call->als_map[k]==-1 ) continue;   // to be dropped
-                        dp_dst[ call->als_map[k] ] = dp_src[k]; // reusing PLs storage which is not used at this point
-                    }
-                }
-                bcf_update_format_int32(call->hdr, rec, bcf_hdr_int2id(call->hdr,BCF_DT_ID,fmt->id), call->PLs, nsmpl*nout_als);
+                int l = call->als_map[k];
+                if ( l==-1 ) continue;   // to be dropped
+                memcpy(ptr_dst+size*l, ptr_src+size*k, size);
             }
         }
+        bcf_update_format_int32(call->hdr, rec, key, tmp_new, nout_als*nsmpl);
     }
+
+    call->PLs    = (int32_t*) tmp_new;
+    call->mPLs   = ntmp_new;
+    call->itmp   = (int32_t*) tmp_ori;
+    call->n_itmp = ntmp_ori;
 }
 
 
 // NB: in this function we temporarily use calls->als_map for a different
 // purpose to store mapping from new (target) alleles to original alleles.
 //
-static void mcall_constrain_alleles(call_t *call, bcf1_t *rec, int *unseen)
+static int mcall_constrain_alleles(call_t *call, bcf1_t *rec, int *unseen)
 {
     bcf_sr_regions_t *tgt = call->srs->targets;
     if ( tgt->nals>5 ) error("Maximum accepted number of alleles is 5, got %d\n", tgt->nals);
@@ -1284,7 +1295,7 @@ static void mcall_constrain_alleles(call_t *call, bcf1_t *rec, int *unseen)
         call->als[nals] = tgt->als[i];
         j = vcmp_find_allele(call->vcmp, rec->d.allele+1, rec->n_allele - 1, tgt->als[i]);
 
-        if ( j+1==*unseen ) error("Cannot constrain to %s\n",tgt->als[i]);
+        if ( j+1==*unseen ) { fprintf(pysam_stderr,"fixme? Cannot constrain to %s\n",tgt->als[i]); return -1; }
         
         if ( j>=0 )
         {
@@ -1310,7 +1321,7 @@ static void mcall_constrain_alleles(call_t *call, bcf1_t *rec, int *unseen)
         nals++;
     }
 
-    if ( !has_new && nals==rec->n_allele ) return;
+    if ( !has_new && nals==rec->n_allele ) return 0;
     bcf_update_alleles(call->hdr, rec, (const char**)call->als, nals);
 
     // create mapping from new PL to old PL
@@ -1362,6 +1373,7 @@ static void mcall_constrain_alleles(call_t *call, bcf1_t *rec, int *unseen)
     bcf_update_info_float(call->hdr, rec, "QS", qsum, nals);
 
     if ( *unseen ) *unseen = nals-1;
+    return 0;
 }
 
 
@@ -1376,7 +1388,7 @@ int mcall(call_t *call, bcf1_t *rec)
     int i, unseen = call->unseen;
 
     // Force alleles when calling genotypes given alleles was requested
-    if ( call->flag & CALL_CONSTR_ALLELES ) mcall_constrain_alleles(call, rec, &unseen);
+    if ( call->flag & CALL_CONSTR_ALLELES && mcall_constrain_alleles(call, rec, &unseen)!=0 ) return -2;
 
     int nsmpl = bcf_hdr_nsamples(call->hdr);
     int nals  = rec->n_allele;
@@ -1397,7 +1409,7 @@ int mcall(call_t *call, bcf1_t *rec)
     #if QS_FROM_PDG
         estimate_qsum(call, rec);
     #else
-        // Get sum of qualities
+        // Get sum of qualities, serves as an AF estimate, f_x = QS/N in Eq. 1 in call-m math notes.
         int nqs = bcf_get_info_float(call->hdr, rec, "QS", &call->qsum, &call->nqsum);
         if ( nqs<=0 ) error("The QS annotation not present at %s:%d\n", bcf_seqname(call->hdr,rec),rec->pos+1);
         if ( nqs < nals )
@@ -1408,23 +1420,50 @@ int mcall(call_t *call, bcf1_t *rec)
             hts_expand(float,nals,call->nqsum,call->qsum);
             for (i=nqs; i<nals; i++) call->qsum[i] = 0;
         }
-        float qsum_tot = 0;
-        for (i=0; i<nals; i++) qsum_tot += call->qsum[i];
-        if ( !call->qsum[0] )
+
+        // If available, take into account reference panel AFs
+        if ( call->prior_AN && bcf_get_info_int32(call->hdr, rec, call->prior_AN ,&call->ac, &call->nac)==1 )
         {
-            // As P(RR)!=0 even for QS(ref)=0, we set QS(ref) to a small value,
-            // an equivalent of a single reference read.
-            if ( bcf_get_info_int32(call->hdr, rec, "DP", &call->itmp, &call->n_itmp)!=1 )
-                error("Could not read DP at %s:%d\n", call->hdr->id[BCF_DT_CTG][rec->rid].key,rec->pos+1);
-            if ( call->itmp[0] )
+            int an = call->ac[0];
+            if ( bcf_get_info_int32(call->hdr, rec, call->prior_AC ,&call->ac, &call->nac)==nals-1 )
             {
-                call->qsum[0] = 1.0 / call->itmp[0] / nsmpl;
-                qsum_tot += call->qsum[0];
+                int ac0 = an;   // number of alleles in the reference population
+                for (i=0; i<nals-1; i++)
+                {
+                    if ( call->ac[i]==bcf_int32_vector_end ) break;
+                    if ( call->ac[i]==bcf_int32_missing ) continue;
+                    ac0 -= call->ac[i];
+                    call->qsum[i+1] += call->ac[i]*0.5;
+                }
+                if ( ac0<0 ) error("Incorrect %s,%s values at %s:%d\n", call->prior_AN,call->prior_AC,bcf_seqname(call->hdr,rec),rec->pos+1);
+                call->qsum[0] += ac0*0.5;
+                for (i=0; i<nals; i++) call->qsum[i] /= nsmpl + 0.5*an;
             }
         }
+
+        float qsum_tot = 0;
+        for (i=0; i<nals; i++) qsum_tot += call->qsum[i];
+
+        // Is this still necessary??
+        //
+        //  if (0&& !call->qsum[0] )
+        //  {
+        //      // As P(RR)!=0 even for QS(ref)=0, we set QS(ref) to a small value,
+        //      // an equivalent of a single reference read.
+        //      if ( bcf_get_info_int32(call->hdr, rec, "DP", &call->itmp, &call->n_itmp)!=1 )
+        //          error("Could not read DP at %s:%d\n", call->hdr->id[BCF_DT_CTG][rec->rid].key,rec->pos+1);
+        //      if ( call->itmp[0] )
+        //      {
+        //          call->qsum[0] = 1.0 / call->itmp[0] / nsmpl;
+        //          qsum_tot += call->qsum[0];
+        //      }
+        //  }
+
         if ( qsum_tot ) for (i=0; i<nals; i++) call->qsum[i] /= qsum_tot;
     #endif
 
+    bcf_update_info_int32(call->hdr, rec, "QS", NULL, 0);      // remove QS tag
+
     // Find the best combination of alleles
     int out_als, nout;
     if ( nals > 8*sizeof(out_als) )
@@ -1499,13 +1538,17 @@ int mcall(call_t *call, bcf1_t *rec)
         if ( hob != HUGE_VAL ) bcf_update_info_float(call->hdr, rec, "HOB", &hob, 1);
 
         // Quality of a variant site. fabs() to avoid negative zeros in VCF output when CALL_KEEPALT is set
-        rec->qual = call->lk_sum==-HUGE_VAL || call->ref_lk==0 ? 0 : fabs(-4.343*(call->ref_lk - call->lk_sum));
+        rec->qual = -4.343*(call->ref_lk - logsumexp2(call->lk_sum,call->ref_lk));
     }
     else
     {
         // Set the quality of a REF site
-        rec->qual = call->lk_sum==-HUGE_VAL || call->ref_lk==0 ? 0 : -4.343*log(1 - exp(call->ref_lk - call->lk_sum));
+        if ( call->lk_sum==-HUGE_VAL )  // no support from (high quality) reads, so QUAL=1-prior
+            rec->qual = call->theta ? -4.343*call->theta : 0;
+        else
+            rec->qual = -4.343*(call->lk_sum - logsumexp2(call->lk_sum,call->ref_lk));
     }
+
     if ( rec->qual>999 ) rec->qual = 999;
     if ( rec->qual>50 ) rec->qual = rint(rec->qual);
 
@@ -1532,7 +1575,6 @@ int mcall(call_t *call, bcf1_t *rec)
     }
 
     bcf_update_info_int32(call->hdr, rec, "I16", NULL, 0);     // remove I16 tag
-    bcf_update_info_int32(call->hdr, rec, "QS", NULL, 0);      // remove QS tag
 
     return nout;
 }
diff --git a/bcftools/mpileup.c b/bcftools/mpileup.c
new file mode 100644
index 0000000..ac37dd4
--- /dev/null
+++ b/bcftools/mpileup.c
@@ -0,0 +1,1110 @@
+/*  mpileup.c -- mpileup subcommand. Previously bam_plcmd.c from samtools
+
+    Copyright (C) 2008-2017 Genome Research Ltd.
+    Portions copyright (C) 2009-2012 Broad Institute.
+
+    Author: Heng Li <lh3 at sanger.ac.uk>
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+DEALINGS IN THE SOFTWARE.  */
+
+#include <math.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <ctype.h>
+#include <string.h>
+#include <strings.h>
+#include <limits.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <getopt.h>
+#include <htslib/sam.h>
+#include <htslib/faidx.h>
+#include <htslib/kstring.h>
+#include <htslib/khash_str2int.h>
+#include <assert.h>
+#include "regidx.h"
+#include "bcftools.h"
+#include "bam2bcf.h"
+#include "bam_sample.h"
+#include "gvcf.h"
+
+#define MPLP_BCF        1
+#define MPLP_VCF        (1<<1)
+#define MPLP_NO_COMP    (1<<2)
+#define MPLP_NO_ORPHAN  (1<<3)
+#define MPLP_REALN      (1<<4)
+#define MPLP_NO_INDEL   (1<<5)
+#define MPLP_REDO_BAQ   (1<<6)
+#define MPLP_ILLUMINA13 (1<<7)
+#define MPLP_IGNORE_RG  (1<<8)
+#define MPLP_PRINT_POS  (1<<9)
+#define MPLP_PRINT_MAPQ (1<<10)
+#define MPLP_PER_SAMPLE (1<<11)
+#define MPLP_SMART_OVERLAPS (1<<12)
+
+typedef struct _mplp_aux_t mplp_aux_t;
+typedef struct _mplp_pileup_t mplp_pileup_t;
+
+// Data shared by all bam files
+typedef struct {
+    int min_mq, flag, min_baseQ, capQ_thres, max_depth, max_indel_depth, fmt_flag;
+    int rflag_require, rflag_filter, output_type;
+    int openQ, extQ, tandemQ, min_support; // for indels
+    double min_frac; // for indels
+    char *reg_fname, *pl_list, *fai_fname, *output_fname;
+    int reg_is_file, record_cmd_line, n_threads;
+    faidx_t *fai;
+    regidx_t *bed, *reg;    // bed: skipping regions, reg: index-jump to regions
+    regitr_t *bed_itr, *reg_itr;
+    int bed_logic;          // 1: include region, 0: exclude region
+    gvcf_t *gvcf;
+
+    // auxiliary structures for calling
+    bcf_callaux_t *bca;
+    bcf_callret1_t *bcr;
+    bcf_call_t bc;
+    bam_mplp_t iter;
+    mplp_aux_t **mplp_data;
+    int nfiles;
+    char **files;
+    mplp_pileup_t *gplp;
+    int *n_plp;
+    const bam_pileup1_t **plp;
+    bam_smpl_t *bsmpl;
+    kstring_t buf;
+    bcf1_t *bcf_rec;
+    htsFile *bcf_fp;
+    bcf_hdr_t *bcf_hdr;
+    int argc;
+    char **argv;
+} mplp_conf_t;
+
+typedef struct {
+    char *ref[2];
+    int ref_id[2];
+    int ref_len[2];
+} mplp_ref_t;
+
+#define MPLP_REF_INIT {{NULL,NULL},{-1,-1},{0,0}}
+
+// Data specific to each bam file
+struct _mplp_aux_t {
+    samFile *fp;
+    hts_itr_t *iter;
+    bam_hdr_t *h;
+    mplp_ref_t *ref;
+    const mplp_conf_t *conf;
+    int bam_id;
+    hts_idx_t *idx;     // maintained only with more than one -r regions
+};
+
+// Data passed to htslib/mpileup
+struct _mplp_pileup_t {
+    int n;
+    int *n_plp, *m_plp;
+    bam_pileup1_t **plp;
+};
+
+static int mplp_get_ref(mplp_aux_t *ma, int tid,  char **ref, int *ref_len) {
+    mplp_ref_t *r = ma->ref;
+
+    //printf("get ref %d {%d/%p, %d/%p}\n", tid, r->ref_id[0], r->ref[0], r->ref_id[1], r->ref[1]);
+
+    if (!r || !ma->conf->fai) {
+        *ref = NULL;
+        return 0;
+    }
+
+    // Do we need to reference count this so multiple mplp_aux_t can
+    // track which references are in use?
+    // For now we just cache the last two. Sufficient?
+    if (tid == r->ref_id[0]) {
+        *ref = r->ref[0];
+        *ref_len = r->ref_len[0];
+        return 1;
+    }
+    if (tid == r->ref_id[1]) {
+        // Last, swap over
+        int tmp;
+        tmp = r->ref_id[0];  r->ref_id[0]  = r->ref_id[1];  r->ref_id[1]  = tmp;
+        tmp = r->ref_len[0]; r->ref_len[0] = r->ref_len[1]; r->ref_len[1] = tmp;
+
+        char *tc;
+        tc = r->ref[0]; r->ref[0] = r->ref[1]; r->ref[1] = tc;
+        *ref = r->ref[0];
+        *ref_len = r->ref_len[0];
+        return 1;
+    }
+
+    // New, so migrate to old and load new
+    free(r->ref[1]);
+    r->ref[1]     = r->ref[0];
+    r->ref_id[1]  = r->ref_id[0];
+    r->ref_len[1] = r->ref_len[0];
+
+    r->ref_id[0] = tid;
+    r->ref[0] = faidx_fetch_seq(ma->conf->fai,
+                                ma->h->target_name[r->ref_id[0]],
+                                0,
+                                INT_MAX,
+                                &r->ref_len[0]);
+
+    if (!r->ref[0]) {
+        r->ref[0] = NULL;
+        r->ref_id[0] = -1;
+        r->ref_len[0] = 0;
+        *ref = NULL;
+        return 0;
+    }
+
+    *ref = r->ref[0];
+    *ref_len = r->ref_len[0];
+    return 1;
+}
+
+static int mplp_func(void *data, bam1_t *b)
+{
+    char *ref;
+    mplp_aux_t *ma = (mplp_aux_t*)data;
+    int ret, ref_len;
+    while (1)
+    {
+        int has_ref;
+        ret = ma->iter? sam_itr_next(ma->fp, ma->iter, b) : sam_read1(ma->fp, ma->h, b);
+        if (ret < 0) break;
+        // The 'B' cigar operation is not part of the specification, considering as obsolete.
+        //  bam_remove_B(b);
+        if (b->core.tid < 0 || (b->core.flag&BAM_FUNMAP)) continue; // exclude unmapped reads
+        if (ma->conf->rflag_require && !(ma->conf->rflag_require&b->core.flag)) continue;
+        if (ma->conf->rflag_filter && ma->conf->rflag_filter&b->core.flag) continue;
+        if (ma->conf->bed)
+        {
+            // test overlap
+            regitr_t *itr = ma->conf->bed_itr;
+            int beg = b->core.pos, end = bam_endpos(b)-1;
+            int overlap = regidx_overlap(ma->conf->bed, ma->h->target_name[b->core.tid],beg,end, itr);
+            if ( !ma->conf->bed_logic && !overlap )
+            {
+                // exclude only reads which are fully contained in the region
+                while ( regitr_overlap(itr) )
+                {
+                    if ( beg < itr->beg ) { overlap = 1; break; }
+                    if ( end > itr->end ) { overlap = 1; break; }
+                }
+            }
+            if ( !overlap ) continue;
+        }
+        if ( bam_smpl_get_sample_id(ma->conf->bsmpl,ma->bam_id,b)<0 ) continue;
+        if (ma->conf->flag & MPLP_ILLUMINA13) {
+            int i;
+            uint8_t *qual = bam_get_qual(b);
+            for (i = 0; i < b->core.l_qseq; ++i)
+                qual[i] = qual[i] > 31? qual[i] - 31 : 0;
+        }
+
+        if (ma->conf->fai && b->core.tid >= 0) {
+            has_ref = mplp_get_ref(ma, b->core.tid, &ref, &ref_len);
+            if (has_ref && ref_len <= b->core.pos) { // exclude reads outside of the reference sequence
+                fprintf(stderr,"[%s] Skipping because %d is outside of %d [ref:%d]\n",
+                        __func__, b->core.pos, ref_len, b->core.tid);
+                continue;
+            }
+        } else {
+            has_ref = 0;
+        }
+
+        if (has_ref && (ma->conf->flag&MPLP_REALN)) sam_prob_realn(b, ref, ref_len, (ma->conf->flag & MPLP_REDO_BAQ)? 7 : 3);
+        if (has_ref && ma->conf->capQ_thres > 10) {
+            int q = sam_cap_mapq(b, ref, ref_len, ma->conf->capQ_thres);
+            if (q < 0) continue;    // skip
+            else if (b->core.qual > q) b->core.qual = q;
+        }
+        if (b->core.qual < ma->conf->min_mq) continue;
+        else if ((ma->conf->flag&MPLP_NO_ORPHAN) && (b->core.flag&BAM_FPAIRED) && !(b->core.flag&BAM_FPROPER_PAIR)) continue;
+
+        return ret;
+    };
+    return ret;
+}
+
+// Called once per new bam added to the pileup.
+// We cache sample information here so we don't have to keep recomputing this
+// on each and every pileup column.
+//
+// Cd is an arbitrary block of data we can write into, which ends up in
+// the pileup structures.  We stash the sample ID there.
+static int pileup_constructor(void *data, const bam1_t *b, bam_pileup_cd *cd) {
+    mplp_aux_t *ma = (mplp_aux_t *)data;
+    cd->i = bam_smpl_get_sample_id(ma->conf->bsmpl, ma->bam_id, (bam1_t *)b);
+    return 0;
+}
+
+static void group_smpl(mplp_pileup_t *m, bam_smpl_t *bsmpl, int n, int *n_plp, const bam_pileup1_t **plp)
+{
+    int i, j;
+    memset(m->n_plp, 0, m->n * sizeof(int));
+    for (i = 0; i < n; ++i) // iterate over all bams
+    {
+        for (j = 0; j < n_plp[i]; ++j)  // iterate over all reads available at this position
+        {
+            const bam_pileup1_t *p = plp[i] + j;
+            int id = p->cd.i;
+            if (m->n_plp[id] == m->m_plp[id]) 
+            {
+                m->m_plp[id] = m->m_plp[id]? m->m_plp[id]<<1 : 8;
+                m->plp[id] = (bam_pileup1_t*) realloc(m->plp[id], sizeof(bam_pileup1_t) * m->m_plp[id]);
+            }
+            m->plp[id][m->n_plp[id]++] = *p;
+        }
+    }
+}
+
+static void flush_bcf_records(mplp_conf_t *conf, htsFile *fp, bcf_hdr_t *hdr, bcf1_t *rec)
+{
+    if ( !conf->gvcf )
+    {
+        if ( rec ) bcf_write1(fp, hdr, rec);
+        return;
+    }
+
+    if ( !rec )
+    {
+        gvcf_write(conf->gvcf, fp, hdr, NULL, 0);
+        return;
+    }
+
+    int is_ref = 0;
+    if ( rec->n_allele==1 ) is_ref = 1;
+    else if ( rec->n_allele==2 )
+    {
+        // second allele is mpileup's X, not a variant
+        if ( rec->d.allele[1][0]=='<' && rec->d.allele[1][1]=='*' && rec->d.allele[1][2]=='>' ) is_ref = 1;
+    }
+    rec = gvcf_write(conf->gvcf, fp, hdr, rec, is_ref);
+    if ( rec ) bcf_write1(fp,hdr,rec);
+}
+
+static int mpileup_reg(mplp_conf_t *conf, uint32_t beg, uint32_t end)
+{
+    bam_hdr_t *hdr = conf->mplp_data[0]->h; // header of first file in input list
+
+    int ret, i, tid, pos, ref_len;
+    char *ref;
+
+    while ( (ret=bam_mplp_auto(conf->iter, &tid, &pos, conf->n_plp, conf->plp)) > 0) 
+    {
+        if ( end && (pos<beg || pos>end) ) continue;
+        if ( conf->bed && tid >= 0 )
+        {
+            int overlap = regidx_overlap(conf->bed, hdr->target_name[tid], pos, pos, NULL);
+            if ( !conf->bed_logic ) overlap = overlap ? 0 : 1;
+            if ( !overlap ) continue;
+        }
+        mplp_get_ref(conf->mplp_data[0], tid, &ref, &ref_len);
+
+        int total_depth, _ref0, ref16;
+        for (i = total_depth = 0; i < conf->nfiles; ++i) total_depth += conf->n_plp[i];
+        group_smpl(conf->gplp, conf->bsmpl, conf->nfiles, conf->n_plp, conf->plp);
+        _ref0 = (ref && pos < ref_len)? ref[pos] : 'N';
+        ref16 = seq_nt16_table[_ref0];
+        bcf_callaux_clean(conf->bca, &conf->bc);
+        for (i = 0; i < conf->gplp->n; ++i)
+            bcf_call_glfgen(conf->gplp->n_plp[i], conf->gplp->plp[i], ref16, conf->bca, conf->bcr + i);
+        conf->bc.tid = tid; conf->bc.pos = pos;
+        bcf_call_combine(conf->gplp->n, conf->bcr, conf->bca, ref16, &conf->bc);
+        bcf_clear1(conf->bcf_rec);
+        bcf_call2bcf(&conf->bc, conf->bcf_rec, conf->bcr, conf->fmt_flag, 0, 0);
+        flush_bcf_records(conf, conf->bcf_fp, conf->bcf_hdr, conf->bcf_rec);
+
+        // call indels; todo: subsampling with total_depth>max_indel_depth instead of ignoring?
+        // check me: rghash in bcf_call_gap_prep() should have no effect, reads mplp_func already excludes them
+        if (!(conf->flag&MPLP_NO_INDEL) && total_depth < conf->max_indel_depth 
+            && bcf_call_gap_prep(conf->gplp->n, conf->gplp->n_plp, conf->gplp->plp, pos, conf->bca, ref) >= 0)
+        {
+            bcf_callaux_clean(conf->bca, &conf->bc);
+            for (i = 0; i < conf->gplp->n; ++i)
+                bcf_call_glfgen(conf->gplp->n_plp[i], conf->gplp->plp[i], -1, conf->bca, conf->bcr + i);
+            if (bcf_call_combine(conf->gplp->n, conf->bcr, conf->bca, -1, &conf->bc) >= 0) 
+            {
+                bcf_clear1(conf->bcf_rec);
+                bcf_call2bcf(&conf->bc, conf->bcf_rec, conf->bcr, conf->fmt_flag, conf->bca, ref);
+                flush_bcf_records(conf, conf->bcf_fp, conf->bcf_hdr, conf->bcf_rec);
+            }
+        }
+    }
+    return 0;
+}
+
+static int mpileup(mplp_conf_t *conf)
+{
+    if (conf->nfiles == 0) {
+        fprintf(stderr,"[%s] no input file/data given\n", __func__);
+        exit(EXIT_FAILURE);
+    }
+
+    mplp_ref_t mp_ref = MPLP_REF_INIT;
+    conf->gplp = (mplp_pileup_t *) calloc(1,sizeof(mplp_pileup_t));
+    conf->mplp_data = (mplp_aux_t**) calloc(conf->nfiles, sizeof(mplp_aux_t*));
+    conf->plp = (const bam_pileup1_t**) calloc(conf->nfiles, sizeof(bam_pileup1_t*));
+    conf->n_plp = (int*) calloc(conf->nfiles, sizeof(int));
+
+    // Allow to run mpileup on multiple regions in one go. This comes at cost: the bai index
+    // must be kept in the memory for the whole time which can be a problem with many bams.
+    // Therefore if none or only one region is requested, we initialize the bam iterator as
+    // before and free the index. Only when multiple regions are queried, we keep the index.
+    int nregs = 0;
+    if ( conf->reg_fname )
+    {
+        if ( conf->reg_is_file )
+        {
+            conf->reg = regidx_init(conf->reg_fname,NULL,NULL,0,NULL);
+            if ( !conf->reg ) {
+                fprintf(stderr,"Could not parse the regions: %s\n", conf->reg_fname);
+                exit(EXIT_FAILURE);
+            }
+        }
+        else
+        {
+            conf->reg = regidx_init(NULL,regidx_parse_reg,NULL,sizeof(char*),NULL);
+            if ( regidx_insert_list(conf->reg,conf->reg_fname,',') !=0 ) {
+                fprintf(stderr,"Could not parse the regions: %s\n", conf->reg_fname);
+                exit(EXIT_FAILURE);
+            }
+        }
+        nregs = regidx_nregs(conf->reg);
+        conf->reg_itr = regitr_init(conf->reg);
+        regitr_loop(conf->reg_itr);   // region iterator now positioned at the first region
+    }
+
+    // read the header of each file in the list and initialize data
+    // beware: mpileup has always assumed that tid's are consistent in the headers, add sanity check at least!
+    bam_hdr_t *hdr = NULL;      // header of first file in input list
+    int i;
+    for (i = 0; i < conf->nfiles; ++i) {
+        bam_hdr_t *h_tmp;
+        conf->mplp_data[i] = (mplp_aux_t*) calloc(1, sizeof(mplp_aux_t));
+        conf->mplp_data[i]->fp = sam_open(conf->files[i], "rb");
+        if ( !conf->mplp_data[i]->fp )
+        {
+            fprintf(stderr, "[%s] failed to open %s: %s\n", __func__, conf->files[i], strerror(errno));
+            exit(EXIT_FAILURE);
+        }
+        if (hts_set_opt(conf->mplp_data[i]->fp, CRAM_OPT_DECODE_MD, 0)) {
+            fprintf(stderr, "Failed to set CRAM_OPT_DECODE_MD value\n");
+            exit(EXIT_FAILURE);
+        }
+        if (conf->fai_fname && hts_set_fai_filename(conf->mplp_data[i]->fp, conf->fai_fname) != 0) {
+            fprintf(stderr, "[%s] failed to process %s: %s\n",
+                    __func__, conf->fai_fname, strerror(errno));
+            exit(EXIT_FAILURE);
+        }
+        conf->mplp_data[i]->conf = conf;
+        conf->mplp_data[i]->ref = &mp_ref;
+        h_tmp = sam_hdr_read(conf->mplp_data[i]->fp);
+        if ( !h_tmp ) {
+            fprintf(stderr,"[%s] fail to read the header of %s\n", __func__, conf->files[i]);
+            exit(EXIT_FAILURE);
+        }
+        conf->mplp_data[i]->h = i ? hdr : h_tmp; // for j==0, "h" has not been set yet
+        conf->mplp_data[i]->bam_id = bam_smpl_add_bam(conf->bsmpl,h_tmp->text,conf->files[i]);
+        if ( conf->mplp_data[i]->bam_id<0 )
+        {
+            // no usable readgroups in this bam, it can be skipped
+            sam_close(conf->mplp_data[i]->fp);
+            free(conf->mplp_data[i]);
+            bam_hdr_destroy(h_tmp);
+            free(conf->files[i]);
+            if ( i+1<conf->nfiles ) memmove(&conf->files[i],&conf->files[i+1],sizeof(*conf->files)*(conf->nfiles-i-1));
+            conf->nfiles--;
+            i--;
+            continue;
+        }
+        if (conf->reg) {
+            hts_idx_t *idx = sam_index_load(conf->mplp_data[i]->fp, conf->files[i]);
+            if (idx == NULL) {
+                fprintf(stderr, "[%s] fail to load index for %s\n", __func__, conf->files[i]);
+                exit(EXIT_FAILURE);
+            }
+            conf->buf.l = 0;
+            ksprintf(&conf->buf,"%s:%u-%u",conf->reg_itr->seq,conf->reg_itr->beg+1,conf->reg_itr->end+1);
+            conf->mplp_data[i]->iter = sam_itr_querys(idx, conf->mplp_data[i]->h, conf->buf.s);
+            if ( !conf->mplp_data[i]->iter ) 
+            {
+                conf->mplp_data[i]->iter = sam_itr_querys(idx, conf->mplp_data[i]->h, conf->reg_itr->seq);
+                if ( conf->mplp_data[i]->iter ) {
+                    fprintf(stderr,"[E::%s] fail to parse region '%s'\n", __func__, conf->buf.s);
+                    exit(EXIT_FAILURE);
+                }
+                fprintf(stderr,"[E::%s] the sequence \"%s\" not found: %s\n",__func__,conf->reg_itr->seq,conf->files[i]);
+                exit(EXIT_FAILURE);
+            }
+            if ( nregs==1 ) // no need to keep the index in memory
+               hts_idx_destroy(idx);
+            else
+                conf->mplp_data[i]->idx = idx;
+        }
+
+        if ( !hdr ) hdr = h_tmp; /* save the header of first file in list */
+        else {
+            // FIXME: check consistency between h and h_tmp
+            bam_hdr_destroy(h_tmp);
+
+            // we store only the first file's header; it's (alleged to be)
+            // compatible with the i-th file's target_name lookup needs
+            conf->mplp_data[i]->h = hdr;
+        }
+    }
+    // allocate data storage proportionate to number of samples being studied sm->n
+    bam_smpl_get_samples(conf->bsmpl, &conf->gplp->n);
+    conf->gplp->n_plp = (int*) calloc(conf->gplp->n, sizeof(int));
+    conf->gplp->m_plp = (int*) calloc(conf->gplp->n, sizeof(int));
+    conf->gplp->plp = (bam_pileup1_t**) calloc(conf->gplp->n, sizeof(bam_pileup1_t*));  
+
+    fprintf(stderr, "[%s] %d samples in %d input files\n", __func__, conf->gplp->n, conf->nfiles);
+    // write the VCF header
+    conf->bcf_fp = hts_open(conf->output_fname?conf->output_fname:"-", hts_bcf_wmode(conf->output_type));
+    if (conf->bcf_fp == NULL) {
+        fprintf(stderr, "[%s] failed to write to %s: %s\n", __func__, conf->output_fname? conf->output_fname : "standard output", strerror(errno));
+        exit(EXIT_FAILURE);
+    }
+    if ( conf->n_threads ) hts_set_threads(conf->bcf_fp, conf->n_threads);
+
+    // BCF header creation
+    conf->bcf_hdr = bcf_hdr_init("w");
+    conf->buf.l = 0;
+
+    if (conf->record_cmd_line)
+    {
+        ksprintf(&conf->buf, "##bcftoolsVersion=%s+htslib-%s\n",bcftools_version(),hts_version());
+        bcf_hdr_append(conf->bcf_hdr, conf->buf.s);
+
+        conf->buf.l = 0;
+        ksprintf(&conf->buf, "##bcftoolsCommand=mpileup");
+        for (i=1; i<conf->argc; i++) ksprintf(&conf->buf, " %s", conf->argv[i]);
+        kputc('\n', &conf->buf);
+        bcf_hdr_append(conf->bcf_hdr, conf->buf.s);
+    }
+
+    if (conf->fai_fname)
+    {
+        conf->buf.l = 0;
+        ksprintf(&conf->buf, "##reference=file://%s\n", conf->fai_fname);
+        bcf_hdr_append(conf->bcf_hdr, conf->buf.s);
+    }
+
+    // Translate BAM @SQ tags to BCF ##contig tags
+    // todo: use/write new BAM header manipulation routines, fill also UR, M5
+    for (i=0; i<hdr->n_targets; i++)
+    {
+        conf->buf.l = 0;
+        ksprintf(&conf->buf, "##contig=<ID=%s,length=%d>", hdr->target_name[i], hdr->target_len[i]);
+        bcf_hdr_append(conf->bcf_hdr, conf->buf.s);
+    }
+    conf->buf.l = 0;
+
+    bcf_hdr_append(conf->bcf_hdr,"##ALT=<ID=*,Description=\"Represents allele(s) other than observed.\">");
+    bcf_hdr_append(conf->bcf_hdr,"##INFO=<ID=INDEL,Number=0,Type=Flag,Description=\"Indicates that the variant is an INDEL.\">");
+    bcf_hdr_append(conf->bcf_hdr,"##INFO=<ID=IDV,Number=1,Type=Integer,Description=\"Maximum number of reads supporting an indel\">");
+    bcf_hdr_append(conf->bcf_hdr,"##INFO=<ID=IMF,Number=1,Type=Float,Description=\"Maximum fraction of reads supporting an indel\">");
+    bcf_hdr_append(conf->bcf_hdr,"##INFO=<ID=DP,Number=1,Type=Integer,Description=\"Raw read depth\">");
+    bcf_hdr_append(conf->bcf_hdr,"##INFO=<ID=VDB,Number=1,Type=Float,Description=\"Variant Distance Bias for filtering splice-site artefacts in RNA-seq data (bigger is better)\",Version=\"3\">");
+    bcf_hdr_append(conf->bcf_hdr,"##INFO=<ID=RPB,Number=1,Type=Float,Description=\"Mann-Whitney U test of Read Position Bias (bigger is better)\">");
+    bcf_hdr_append(conf->bcf_hdr,"##INFO=<ID=MQB,Number=1,Type=Float,Description=\"Mann-Whitney U test of Mapping Quality Bias (bigger is better)\">");
+    bcf_hdr_append(conf->bcf_hdr,"##INFO=<ID=BQB,Number=1,Type=Float,Description=\"Mann-Whitney U test of Base Quality Bias (bigger is better)\">");
+    bcf_hdr_append(conf->bcf_hdr,"##INFO=<ID=MQSB,Number=1,Type=Float,Description=\"Mann-Whitney U test of Mapping Quality vs Strand Bias (bigger is better)\">");
+#if CDF_MWU_TESTS
+    bcf_hdr_append(conf->bcf_hdr,"##INFO=<ID=RPB2,Number=1,Type=Float,Description=\"Mann-Whitney U test of Read Position Bias [CDF] (bigger is better)\">");
+    bcf_hdr_append(conf->bcf_hdr,"##INFO=<ID=MQB2,Number=1,Type=Float,Description=\"Mann-Whitney U test of Mapping Quality Bias [CDF] (bigger is better)\">");
+    bcf_hdr_append(conf->bcf_hdr,"##INFO=<ID=BQB2,Number=1,Type=Float,Description=\"Mann-Whitney U test of Base Quality Bias [CDF] (bigger is better)\">");
+    bcf_hdr_append(conf->bcf_hdr,"##INFO=<ID=MQSB2,Number=1,Type=Float,Description=\"Mann-Whitney U test of Mapping Quality vs Strand Bias [CDF] (bigger is better)\">");
+#endif
+    bcf_hdr_append(conf->bcf_hdr,"##INFO=<ID=SGB,Number=1,Type=Float,Description=\"Segregation based metric.\">");
+    bcf_hdr_append(conf->bcf_hdr,"##INFO=<ID=MQ0F,Number=1,Type=Float,Description=\"Fraction of MQ0 reads (smaller is better)\">");
+    bcf_hdr_append(conf->bcf_hdr,"##INFO=<ID=I16,Number=16,Type=Float,Description=\"Auxiliary tag used for calling, see description of bcf_callret1_t in bam2bcf.h\">");
+    bcf_hdr_append(conf->bcf_hdr,"##INFO=<ID=QS,Number=R,Type=Float,Description=\"Auxiliary tag used for calling\">");
+    bcf_hdr_append(conf->bcf_hdr,"##FORMAT=<ID=PL,Number=G,Type=Integer,Description=\"List of Phred-scaled genotype likelihoods\">");
+    if ( conf->fmt_flag&B2B_FMT_DP )
+        bcf_hdr_append(conf->bcf_hdr,"##FORMAT=<ID=DP,Number=1,Type=Integer,Description=\"Number of high-quality bases\">");
+    if ( conf->fmt_flag&B2B_FMT_DV )
+        bcf_hdr_append(conf->bcf_hdr,"##FORMAT=<ID=DV,Number=1,Type=Integer,Description=\"Number of high-quality non-reference bases\">");
+    if ( conf->fmt_flag&B2B_FMT_DPR )
+        bcf_hdr_append(conf->bcf_hdr,"##FORMAT=<ID=DPR,Number=R,Type=Integer,Description=\"Number of high-quality bases observed for each allele\">");
+    if ( conf->fmt_flag&B2B_INFO_DPR )
+        bcf_hdr_append(conf->bcf_hdr,"##INFO=<ID=DPR,Number=R,Type=Integer,Description=\"Number of high-quality bases observed for each allele\">");
+    if ( conf->fmt_flag&B2B_FMT_DP4 )
+        bcf_hdr_append(conf->bcf_hdr,"##FORMAT=<ID=DP4,Number=4,Type=Integer,Description=\"Number of high-quality ref-fwd, ref-reverse, alt-fwd and alt-reverse bases\">");
+    if ( conf->fmt_flag&B2B_FMT_SP )
+        bcf_hdr_append(conf->bcf_hdr,"##FORMAT=<ID=SP,Number=1,Type=Integer,Description=\"Phred-scaled strand bias P-value\">");
+    if ( conf->fmt_flag&B2B_FMT_AD )
+        bcf_hdr_append(conf->bcf_hdr,"##FORMAT=<ID=AD,Number=R,Type=Integer,Description=\"Allelic depths\">");
+    if ( conf->fmt_flag&B2B_FMT_ADF )
+        bcf_hdr_append(conf->bcf_hdr,"##FORMAT=<ID=ADF,Number=R,Type=Integer,Description=\"Allelic depths on the forward strand\">");
+    if ( conf->fmt_flag&B2B_FMT_ADR )
+        bcf_hdr_append(conf->bcf_hdr,"##FORMAT=<ID=ADR,Number=R,Type=Integer,Description=\"Allelic depths on the reverse strand\">");
+    if ( conf->fmt_flag&B2B_INFO_AD )
+        bcf_hdr_append(conf->bcf_hdr,"##INFO=<ID=AD,Number=R,Type=Integer,Description=\"Total allelic depths\">");
+    if ( conf->fmt_flag&B2B_INFO_ADF )
+        bcf_hdr_append(conf->bcf_hdr,"##INFO=<ID=ADF,Number=R,Type=Integer,Description=\"Total allelic depths on the forward strand\">");
+    if ( conf->fmt_flag&B2B_INFO_ADR )
+        bcf_hdr_append(conf->bcf_hdr,"##INFO=<ID=ADR,Number=R,Type=Integer,Description=\"Total allelic depths on the reverse strand\">");
+    if ( conf->gvcf )
+        gvcf_update_header(conf->gvcf, conf->bcf_hdr);
+
+    int nsmpl;
+    const char **smpl = bam_smpl_get_samples(conf->bsmpl, &nsmpl);
+    for (i=0; i<nsmpl; i++)
+        bcf_hdr_add_sample(conf->bcf_hdr, smpl[i]);
+    bcf_hdr_write(conf->bcf_fp, conf->bcf_hdr);
+
+    conf->bca = bcf_call_init(-1., conf->min_baseQ);
+    conf->bcr = (bcf_callret1_t*) calloc(nsmpl, sizeof(bcf_callret1_t));
+    conf->bca->openQ = conf->openQ, conf->bca->extQ = conf->extQ, conf->bca->tandemQ = conf->tandemQ;
+    conf->bca->min_frac = conf->min_frac;
+    conf->bca->min_support = conf->min_support;
+    conf->bca->per_sample_flt = conf->flag & MPLP_PER_SAMPLE;
+
+    conf->bc.bcf_hdr = conf->bcf_hdr;
+    conf->bc.n  = nsmpl;
+    conf->bc.PL = (int32_t*) malloc(15 * nsmpl * sizeof(*conf->bc.PL));
+    if (conf->fmt_flag)
+    {
+        assert( sizeof(float)==sizeof(int32_t) );
+        conf->bc.DP4 = (int32_t*) malloc(nsmpl * sizeof(int32_t) * 4);
+        conf->bc.fmt_arr = (uint8_t*) malloc(nsmpl * sizeof(float)); // all fmt_flag fields, float and int32
+        if ( conf->fmt_flag&(B2B_INFO_DPR|B2B_FMT_DPR|B2B_INFO_AD|B2B_INFO_ADF|B2B_INFO_ADR|B2B_FMT_AD|B2B_FMT_ADF|B2B_FMT_ADR) )
+        {
+            // first B2B_MAX_ALLELES fields for total numbers, the rest per-sample
+            conf->bc.ADR = (int32_t*) malloc((nsmpl+1)*B2B_MAX_ALLELES*sizeof(int32_t));
+            conf->bc.ADF = (int32_t*) malloc((nsmpl+1)*B2B_MAX_ALLELES*sizeof(int32_t));
+            for (i=0; i<nsmpl; i++)
+            {
+                conf->bcr[i].ADR = conf->bc.ADR + (i+1)*B2B_MAX_ALLELES;
+                conf->bcr[i].ADF = conf->bc.ADF + (i+1)*B2B_MAX_ALLELES;
+            }
+        }
+    }
+
+    // init mpileup
+    conf->iter = bam_mplp_init(conf->nfiles, mplp_func, (void**)conf->mplp_data);
+    if ( conf->flag & MPLP_SMART_OVERLAPS ) bam_mplp_init_overlaps(conf->iter);
+    if ( (double)conf->max_depth * conf->nfiles > 1<<20)
+        fprintf(stderr, "Warning: Potential memory hog, up to %.0fM reads in the pileup!\n", (double)conf->max_depth*conf->nfiles);
+    if ( (double)conf->max_depth * conf->nfiles / nsmpl < 250 )
+        fprintf(stderr, "Note: The maximum per-sample depth with -d %d is %.1fx\n", conf->max_depth,(double)conf->max_depth * conf->nfiles / nsmpl);
+    bam_mplp_set_maxcnt(conf->iter, conf->max_depth);
+    conf->max_indel_depth = conf->max_indel_depth * nsmpl;
+    conf->bcf_rec = bcf_init1();
+    bam_mplp_constructor(conf->iter, pileup_constructor);
+
+    // Run mpileup for multiple regions
+    if ( nregs )
+    {
+        int ireg = 0;
+        do 
+        {
+            // first region is already positioned
+            if ( ireg++ > 0 )
+            {
+                conf->buf.l = 0;
+                ksprintf(&conf->buf,"%s:%u-%u",conf->reg_itr->seq,conf->reg_itr->beg,conf->reg_itr->end);
+
+                for (i=0; i<conf->nfiles; i++) 
+                {
+                    hts_itr_destroy(conf->mplp_data[i]->iter);
+                    conf->mplp_data[i]->iter = sam_itr_querys(conf->mplp_data[i]->idx, conf->mplp_data[i]->h, conf->buf.s);
+                    if ( !conf->mplp_data[i]->iter ) 
+                    {
+                        conf->mplp_data[i]->iter = sam_itr_querys(conf->mplp_data[i]->idx, conf->mplp_data[i]->h, conf->reg_itr->seq);
+                        if ( conf->mplp_data[i]->iter ) {
+                            fprintf(stderr,"[E::%s] fail to parse region '%s'\n", __func__, conf->buf.s);
+                            exit(EXIT_FAILURE);
+                        }
+                        fprintf(stderr,"[E::%s] the sequence \"%s\" not found: %s\n",__func__,conf->reg_itr->seq,conf->files[i]);
+                        exit(EXIT_FAILURE);
+                    }
+                    bam_mplp_reset(conf->iter);
+                }
+            }
+            mpileup_reg(conf,conf->reg_itr->beg,conf->reg_itr->end);
+        }
+        while ( regitr_loop(conf->reg_itr) );
+    }
+    else
+        mpileup_reg(conf,0,0);
+
+    flush_bcf_records(conf, conf->bcf_fp, conf->bcf_hdr, NULL);
+
+    // clean up
+    free(conf->bc.tmp.s);
+    bcf_destroy1(conf->bcf_rec);
+    if (conf->bcf_fp)
+    {
+        hts_close(conf->bcf_fp);
+        bcf_hdr_destroy(conf->bcf_hdr);
+        bcf_call_destroy(conf->bca);
+        free(conf->bc.PL);
+        free(conf->bc.DP4);
+        free(conf->bc.ADR);
+        free(conf->bc.ADF);
+        free(conf->bc.fmt_arr);
+        free(conf->bcr);
+    }
+    if ( conf->gvcf ) gvcf_destroy(conf->gvcf);
+    free(conf->buf.s);
+    for (i = 0; i < conf->gplp->n; ++i) free(conf->gplp->plp[i]);
+    free(conf->gplp->plp); free(conf->gplp->n_plp); free(conf->gplp->m_plp); free(conf->gplp);
+    bam_mplp_destroy(conf->iter);
+    bam_hdr_destroy(hdr);
+    for (i = 0; i < conf->nfiles; ++i) {
+        if ( nregs>1 ) hts_idx_destroy(conf->mplp_data[i]->idx);
+        sam_close(conf->mplp_data[i]->fp);
+        if ( conf->mplp_data[i]->iter) hts_itr_destroy(conf->mplp_data[i]->iter);
+        free(conf->mplp_data[i]);
+    }
+    if ( conf->reg_itr ) regitr_destroy(conf->reg_itr);
+    free(conf->mplp_data); free(conf->plp); free(conf->n_plp);
+    free(mp_ref.ref[0]);
+    free(mp_ref.ref[1]);
+    return 0;
+}
+
+static int is_url(const char *s)
+{
+    static const char uri_scheme_chars[] =
+        "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+.-";
+    return s[strspn(s, uri_scheme_chars)] == ':';
+}
+
+#define MAX_PATH_LEN 1024
+int read_file_list(const char *file_list,int *n,char **argv[])
+{
+    char buf[MAX_PATH_LEN];
+    int len, nfiles = 0;
+    char **files = NULL;
+    struct stat sb;
+
+    *n = 0;
+    *argv = NULL;
+
+    FILE *fh = fopen(file_list,"r");
+    if ( !fh )
+    {
+        fprintf(stderr,"%s: %s\n", file_list,strerror(errno));
+        return 1;
+    }
+
+    files = (char**) calloc(nfiles,sizeof(char*));
+    nfiles = 0;
+    while ( fgets(buf,MAX_PATH_LEN,fh) )
+    {
+        // allow empty lines and trailing spaces
+        len = strlen(buf);
+        while ( len>0 && isspace(buf[len-1]) ) len--;
+        if ( !len ) continue;
+
+        // check sanity of the file list
+        buf[len] = 0;
+        if (! (is_url(buf) || stat(buf, &sb) == 0))
+        {
+            // no such file, check if it is safe to print its name
+            int i, safe_to_print = 1;
+            for (i=0; i<len; i++)
+                if (!isprint(buf[i])) { safe_to_print = 0; break; }
+            if ( safe_to_print )
+                fprintf(stderr,"The file list \"%s\" appears broken, could not locate: %s\n", file_list,buf);
+            else
+                fprintf(stderr,"Does the file \"%s\" really contain a list of files and do all exist?\n", file_list);
+            return 1;
+        }
+
+        nfiles++;
+        files = (char**) realloc(files,nfiles*sizeof(char*));
+        files[nfiles-1] = strdup(buf);
+    }
+    fclose(fh);
+    if ( !nfiles )
+    {
+        fprintf(stderr,"No files read from %s\n", file_list);
+        return 1;
+    }
+    *argv = files;
+    *n    = nfiles;
+    return 0;
+}
+#undef MAX_PATH_LEN
+
+int parse_format_flag(const char *str)
+{
+    int i, flag = 0, n_tags;
+    char **tags = hts_readlist(str, 0, &n_tags);
+    for(i=0; i<n_tags; i++)
+    {
+        if ( !strcasecmp(tags[i],"DP") || !strcasecmp(tags[i],"FORMAT/DP") || !strcasecmp(tags[i],"FMT/DP") ) flag |= B2B_FMT_DP;
+        else if ( !strcasecmp(tags[i],"DV") || !strcasecmp(tags[i],"FORMAT/DV") || !strcasecmp(tags[i],"FMT/DV") ) { flag |= B2B_FMT_DV; fprintf(stderr, "[warning] tag DV functional, but deprecated. Please switch to `AD` in future.\n"); }
+        else if ( !strcasecmp(tags[i],"SP") || !strcasecmp(tags[i],"FORMAT/SP") || !strcasecmp(tags[i],"FMT/SP") ) flag |= B2B_FMT_SP;
+        else if ( !strcasecmp(tags[i],"DP4") || !strcasecmp(tags[i],"FORMAT/DP4") || !strcasecmp(tags[i],"FMT/DP4") ) { flag |= B2B_FMT_DP4; fprintf(stderr, "[warning] tag DP4 functional, but deprecated. Please switch to `ADF` and `ADR` in future.\n"); }
+        else if ( !strcasecmp(tags[i],"DPR") || !strcasecmp(tags[i],"FORMAT/DPR") || !strcasecmp(tags[i],"FMT/DPR") ) { flag |= B2B_FMT_DPR; fprintf(stderr, "[warning] tag DPR functional, but deprecated. Please switch to `AD` in future.\n"); }
+        else if ( !strcasecmp(tags[i],"INFO/DPR") ) { flag |= B2B_INFO_DPR; fprintf(stderr, "[warning] tag INFO/DPR functional, but deprecated. Please switch to `INFO/AD` in future.\n"); }
+        else if ( !strcasecmp(tags[i],"AD") || !strcasecmp(tags[i],"FORMAT/AD") || !strcasecmp(tags[i],"FMT/AD") ) flag |= B2B_FMT_AD;
+        else if ( !strcasecmp(tags[i],"ADF") || !strcasecmp(tags[i],"FORMAT/ADF") || !strcasecmp(tags[i],"FMT/ADF") ) flag |= B2B_FMT_ADF;
+        else if ( !strcasecmp(tags[i],"ADR") || !strcasecmp(tags[i],"FORMAT/ADR") || !strcasecmp(tags[i],"FMT/ADR") ) flag |= B2B_FMT_ADR;
+        else if ( !strcasecmp(tags[i],"INFO/AD") ) flag |= B2B_INFO_AD;
+        else if ( !strcasecmp(tags[i],"INFO/ADF") ) flag |= B2B_INFO_ADF;
+        else if ( !strcasecmp(tags[i],"INFO/ADR") ) flag |= B2B_INFO_ADR;
+        else
+        {
+            fprintf(stderr,"Could not parse tag \"%s\" in \"%s\"\n", tags[i], str);
+            exit(EXIT_FAILURE);
+        }
+        free(tags[i]);
+    }
+    if (n_tags) free(tags);
+    return flag;
+}
+
+static void list_annotations(FILE *fp)
+{
+    fprintf(fp,
+"\n"
+"FORMAT annotation tags available (\"FORMAT/\" prefix is optional):\n"
+"\n"
+"  FORMAT/AD  .. Allelic depth (Number=R,Type=Integer)\n"
+"  FORMAT/ADF .. Allelic depths on the forward strand (Number=R,Type=Integer)\n"
+"  FORMAT/ADR .. Allelic depths on the reverse strand (Number=R,Type=Integer)\n"
+"  FORMAT/DP  .. Number of high-quality bases (Number=1,Type=Integer)\n"
+"  FORMAT/SP  .. Phred-scaled strand bias P-value (Number=1,Type=Integer)\n"
+"\n"
+"INFO annotation tags available:\n"
+"\n"
+"  INFO/AD  .. Total allelic depth (Number=R,Type=Integer)\n"
+"  INFO/ADF .. Total allelic depths on the forward strand (Number=R,Type=Integer)\n"
+"  INFO/ADR .. Total allelic depths on the reverse strand (Number=R,Type=Integer)\n"
+"\n");
+}
+
+static void print_usage(FILE *fp, const mplp_conf_t *mplp)
+{
+    char *tmp_require = bam_flag2str(mplp->rflag_require);
+    char *tmp_filter  = bam_flag2str(mplp->rflag_filter);
+
+    // Display usage information, formatted for the standard 80 columns.
+    // (The unusual string formatting here aids the readability of this
+    // source code in 80 columns, to the extent that's possible.)
+
+    fprintf(fp,
+"\n"
+"Usage: bcftools mpileup [options] in1.bam [in2.bam [...]]\n"
+"\n"
+"Input options:\n"
+"  -6, --illumina1.3+      quality is in the Illumina-1.3+ encoding\n"
+"  -A, --count-orphans     do not discard anomalous read pairs\n"
+"  -b, --bam-list FILE     list of input BAM filenames, one per line\n"
+"  -B, --no-BAQ            disable BAQ (per-Base Alignment Quality)\n"
+"  -C, --adjust-MQ INT     adjust mapping quality; recommended:50, disable:0 [0]\n"
+"  -d, --max-depth INT     max per-file depth; avoids excessive memory usage [%d]\n", mplp->max_depth);
+    fprintf(fp,
+"  -E, --redo-BAQ          recalculate BAQ on the fly, ignore existing BQs\n"
+"  -f, --fasta-ref FILE    faidx indexed reference sequence file\n"
+"      --no-reference      do not require fasta reference file\n"
+"  -G, --read-groups FILE  select or exclude read groups listed in the file\n"
+"  -q, --min-MQ INT        skip alignments with mapQ smaller than INT [%d]\n", mplp->min_mq);
+    fprintf(fp,
+"  -Q, --min-BQ INT        skip bases with baseQ/BAQ smaller than INT [%d]\n", mplp->min_baseQ);
+    fprintf(fp,
+"  -r, --regions REG[,...] comma separated list of regions in which pileup is generated\n"
+"  -R, --regions-file FILE restrict to regions listed in a file\n"
+"      --ignore-RG         ignore RG tags (one BAM = one sample)\n"
+"  --rf, --incl-flags STR|INT  required flags: skip reads with mask bits unset [%s]\n", tmp_require);
+    fprintf(fp,
+"  --ff, --excl-flags STR|INT  filter flags: skip reads with mask bits set\n"
+"                                            [%s]\n", tmp_filter);
+    fprintf(fp,
+"  -s, --samples LIST      comma separated list of samples to include\n"
+"  -S, --samples-file FILE file of samples to include\n"
+"  -t, --targets REG[,...] similar to -r but streams rather than index-jumps\n"
+"  -T, --targets-file FILE similar to -R but streams rather than index-jumps\n"
+"  -x, --ignore-overlaps   disable read-pair overlap detection\n"
+"\n"
+"Output options:\n"
+"  -a, --annotate LIST     optional tags to output; '?' to list []\n"
+"  -g, --gvcf INT[,...]    group non-variant sites into gVCF blocks according\n"
+"                          to minimum per-sample DP\n"
+"      --no-version        do not append version and command line to the header\n"
+"  -o, --output FILE       write output to FILE [standard output]\n"
+"  -O, --output-type TYPE  'b' compressed BCF; 'u' uncompressed BCF;\n"
+"                          'z' compressed VCF; 'v' uncompressed VCF [v]\n"
+"      --threads INT       number of extra output compression threads [0]\n"
+"\n"
+"SNP/INDEL genotype likelihoods options:\n"
+"  -e, --ext-prob INT      Phred-scaled gap extension seq error probability [%d]\n", mplp->extQ);
+    fprintf(fp,
+"  -F, --gap-frac FLOAT    minimum fraction of gapped reads [%g]\n", mplp->min_frac);
+    fprintf(fp,
+"  -h, --tandem-qual INT   coefficient for homopolymer errors [%d]\n", mplp->tandemQ);
+    fprintf(fp,
+"  -I, --skip-indels       do not perform indel calling\n"
+"  -L, --max-idepth INT    maximum per-file depth for INDEL calling [%d]\n", mplp->max_indel_depth);
+    fprintf(fp,
+"  -m, --min-ireads INT    minimum number gapped reads for indel candidates [%d]\n", mplp->min_support);
+    fprintf(fp,
+"  -o, --open-prob INT     Phred-scaled gap open seq error probability [%d]\n", mplp->openQ);
+    fprintf(fp,
+"  -p, --per-sample-mF     apply -m and -F per-sample for increased sensitivity\n"
+"  -P, --platforms STR     comma separated list of platforms for indels [all]\n"
+"\n"
+"Notes: Assuming diploid individuals.\n"
+"\n");
+
+    free(tmp_require);
+    free(tmp_filter);
+}
+
+int bam_mpileup(int argc, char *argv[])
+{
+    int c;
+    const char *file_list = NULL;
+    char **fn = NULL;
+    int nfiles = 0, use_orphan = 0, noref = 0;
+    mplp_conf_t mplp;
+    memset(&mplp, 0, sizeof(mplp_conf_t));
+    mplp.min_baseQ = 13;
+    mplp.capQ_thres = 0;
+    mplp.max_depth = 250; mplp.max_indel_depth = 250;
+    mplp.openQ = 40; mplp.extQ = 20; mplp.tandemQ = 100;
+    mplp.min_frac = 0.002; mplp.min_support = 1;
+    mplp.flag = MPLP_NO_ORPHAN | MPLP_REALN | MPLP_SMART_OVERLAPS;
+    mplp.argc = argc; mplp.argv = argv;
+    mplp.rflag_filter = BAM_FUNMAP | BAM_FSECONDARY | BAM_FQCFAIL | BAM_FDUP;
+    mplp.output_fname = NULL;
+    mplp.output_type = FT_VCF;
+    mplp.record_cmd_line = 1;
+    mplp.n_threads = 0;
+    mplp.bsmpl = bam_smpl_init();
+
+    static const struct option lopts[] =
+    {
+        {"rf", required_argument, NULL, 1},   // require flag
+        {"ff", required_argument, NULL, 2},   // filter flag
+        {"incl-flags", required_argument, NULL, 1},
+        {"excl-flags", required_argument, NULL, 2},
+        {"output", required_argument, NULL, 3},
+        {"open-prob", required_argument, NULL, 4},
+        {"ignore-RG", no_argument, NULL, 5},
+        {"ignore-rg", no_argument, NULL, 5},
+        {"gvcf", required_argument, NULL, 'g'},
+        {"non-reference", no_argument, NULL, 7},
+        {"no-version", no_argument, NULL, 8},
+        {"threads",required_argument,NULL,9},
+        {"illumina1.3+", no_argument, NULL, '6'},
+        {"count-orphans", no_argument, NULL, 'A'},
+        {"bam-list", required_argument, NULL, 'b'},
+        {"no-BAQ", no_argument, NULL, 'B'},
+        {"no-baq", no_argument, NULL, 'B'},
+        {"adjust-MQ", required_argument, NULL, 'C'},
+        {"adjust-mq", required_argument, NULL, 'C'},
+        {"max-depth", required_argument, NULL, 'd'},
+        {"redo-BAQ", no_argument, NULL, 'E'},
+        {"redo-baq", no_argument, NULL, 'E'},
+        {"fasta-ref", required_argument, NULL, 'f'},
+        {"read-groups", required_argument, NULL, 'G'},
+        {"region", required_argument, NULL, 'r'},
+        {"regions", required_argument, NULL, 'r'},
+        {"regions-file", required_argument, NULL, 'R'},
+        {"targets", required_argument, NULL, 't'},
+        {"targets-file", required_argument, NULL, 'T'},
+        {"min-MQ", required_argument, NULL, 'q'},
+        {"min-mq", required_argument, NULL, 'q'},
+        {"min-BQ", required_argument, NULL, 'Q'},
+        {"min-bq", required_argument, NULL, 'Q'},
+        {"ignore-overlaps", no_argument, NULL, 'x'},
+        {"output-type", required_argument, NULL, 'O'},
+        {"samples", required_argument, NULL, 's'},
+        {"samples-file", required_argument, NULL, 'S'},
+        {"annotate", required_argument, NULL, 'a'},
+        {"ext-prob", required_argument, NULL, 'e'},
+        {"gap-frac", required_argument, NULL, 'F'},
+        {"tandem-qual", required_argument, NULL, 'h'},
+        {"skip-indels", no_argument, NULL, 'I'},
+        {"max-idepth", required_argument, NULL, 'L'},
+        {"min-ireads ", required_argument, NULL, 'm'},
+        {"per-sample-mF", no_argument, NULL, 'p'},
+        {"per-sample-mf", no_argument, NULL, 'p'},
+        {"platforms", required_argument, NULL, 'P'},
+        {NULL, 0, NULL, 0}
+    };
+    while ((c = getopt_long(argc, argv, "Ag:f:r:R:q:Q:C:Bd:L:b:P:po:e:h:Im:F:EG:6O:xa:s:S:t:T:",lopts,NULL)) >= 0) {
+        switch (c) {
+        case 'x': mplp.flag &= ~MPLP_SMART_OVERLAPS; break;
+        case  1 :
+            mplp.rflag_require = bam_str2flag(optarg);
+            if ( mplp.rflag_require<0 ) { fprintf(stderr,"Could not parse --rf %s\n", optarg); return 1; }
+            break;
+        case  2 :
+            mplp.rflag_filter = bam_str2flag(optarg);
+            if ( mplp.rflag_filter<0 ) { fprintf(stderr,"Could not parse --ff %s\n", optarg); return 1; }
+            break;
+        case  3 : mplp.output_fname = optarg; break;
+        case  4 : mplp.openQ = atoi(optarg); break;
+        case  5 : bam_smpl_ignore_readgroups(mplp.bsmpl); break;
+        case 'g':
+            mplp.gvcf = gvcf_init(optarg);
+            if ( !mplp.gvcf ) error("Could not parse: --gvcf %s\n", optarg);
+            break;
+        case 'f':
+            mplp.fai = fai_load(optarg);
+            if (mplp.fai == NULL) return 1;
+            mplp.fai_fname = optarg;
+            break;
+        case  7 : noref = 1; break;
+        case  8 : mplp.record_cmd_line = 0; break;
+        case  9 : mplp.n_threads = strtol(optarg, 0, 0); break;
+        case 'd': mplp.max_depth = atoi(optarg); break;
+        case 'r': mplp.reg_fname = strdup(optarg); break;
+        case 'R': mplp.reg_fname = strdup(optarg); mplp.reg_is_file = 1; break;
+        case 't':
+                  // In the original version the whole BAM was streamed which is inefficient
+                  //  with few BED intervals and big BAMs. Todo: devise a heuristic to determine
+                  //  best strategy, that is streaming or jumping.
+                  if ( optarg[0]=='^' ) optarg++;
+                  else mplp.bed_logic = 1;
+                  mplp.bed = regidx_init(NULL,regidx_parse_reg,NULL,0,NULL);
+                  mplp.bed_itr = regitr_init(mplp.bed);
+                  if ( regidx_insert_list(mplp.bed,optarg,',') !=0 )
+                  {
+                      fprintf(stderr,"Could not parse the targets: %s\n", optarg);
+                      exit(EXIT_FAILURE);
+                  }
+                  break;
+        case 'T':
+                  if ( optarg[0]=='^' ) optarg++;
+                  else mplp.bed_logic = 1;
+                  mplp.bed = regidx_init(optarg,NULL,NULL,0,NULL);
+                  if (!mplp.bed) { fprintf(stderr, "bcftools mpileup: Could not read file \"%s\"", optarg); return 1; }
+                  break;
+        case 'P': mplp.pl_list = strdup(optarg); break;
+        case 'p': mplp.flag |= MPLP_PER_SAMPLE; break;
+        case 'B': mplp.flag &= ~MPLP_REALN; break;
+        case 'I': mplp.flag |= MPLP_NO_INDEL; break;
+        case 'E': mplp.flag |= MPLP_REDO_BAQ; break;
+        case '6': mplp.flag |= MPLP_ILLUMINA13; break;
+        case 's': if ( bam_smpl_add_samples(mplp.bsmpl,optarg,0)<0 ) error("Could not read samples: %s\n",optarg); break;
+        case 'S': if ( bam_smpl_add_samples(mplp.bsmpl,optarg,1)<0 ) error("Could not read samples: %s\n",optarg); break;
+        case 'O': 
+            switch (optarg[0]) {
+                case 'b': mplp.output_type = FT_BCF_GZ; break;
+                case 'u': mplp.output_type = FT_BCF; break;
+                case 'z': mplp.output_type = FT_VCF_GZ; break;
+                case 'v': mplp.output_type = FT_VCF; break;
+                default: error("[error] The option \"-O\" changed meaning when mpileup moved to bcftools. Did you mean: \"bcftools mpileup --output-type\" or \"samtools mpileup --output-BP\"?\n", optarg); 
+            }
+            break;
+        case 'C': mplp.capQ_thres = atoi(optarg); break;
+        case 'q': mplp.min_mq = atoi(optarg); break;
+        case 'Q': mplp.min_baseQ = atoi(optarg); break;
+        case 'b': file_list = optarg; break;
+        case 'o': {
+                char *end;
+                long value = strtol(optarg, &end, 10);
+                // Distinguish between -o INT and -o FILE (a bit of a hack!)
+                if (*end == '\0') mplp.openQ = value;
+                else mplp.output_fname = optarg;
+            }
+            break;
+        case 'e': mplp.extQ = atoi(optarg); break;
+        case 'h': mplp.tandemQ = atoi(optarg); break;
+        case 'A': use_orphan = 1; break;
+        case 'F': mplp.min_frac = atof(optarg); break;
+        case 'm': mplp.min_support = atoi(optarg); break;
+        case 'L': mplp.max_indel_depth = atoi(optarg); break;
+        case 'G': bam_smpl_add_readgroups(mplp.bsmpl, optarg, 1); break;
+        case 'a':
+            if (optarg[0]=='?') {
+                list_annotations(stderr);
+                return 1;
+            }
+            mplp.fmt_flag |= parse_format_flag(optarg);
+        break;
+        default:
+            fprintf(stderr,"Invalid option: '%c'\n", c);
+            return 1;
+        }
+    }
+
+    if ( mplp.gvcf && !(mplp.fmt_flag&B2B_FMT_DP) )
+    {
+        fprintf(stderr,"[warning] The -t DP option is required with --gvcf, switching on.\n");
+        mplp.fmt_flag |= B2B_FMT_DP;
+    }
+    if ( mplp.flag&(MPLP_BCF|MPLP_VCF|MPLP_NO_COMP) )
+    {
+        if ( mplp.flag&MPLP_VCF )
+        {
+            if ( mplp.flag&MPLP_NO_COMP ) mplp.output_type = FT_VCF;
+            else mplp.output_type = FT_VCF_GZ;
+        }
+        else if ( mplp.flag&MPLP_BCF )
+        {
+            if ( mplp.flag&MPLP_NO_COMP ) mplp.output_type = FT_BCF;
+            else mplp.output_type = FT_BCF_GZ;
+        }
+    }
+    if ( !(mplp.flag&MPLP_REALN) && mplp.flag&MPLP_REDO_BAQ )
+    {
+        fprintf(stderr,"Error: The -B option cannot be combined with -E\n");
+        return 1;
+    }
+    if (use_orphan) mplp.flag &= ~MPLP_NO_ORPHAN;
+    if (argc == 1)
+    {
+        print_usage(stderr, &mplp);
+        return 1;
+    }
+    if (!mplp.fai && !noref) {
+        fprintf(stderr,"Error: mpileup requires the --fasta-ref option by default; use --no-reference to run without a fasta reference\n");
+        return 1;
+    }
+    int ret,i;
+    if (file_list) 
+    {
+        if ( read_file_list(file_list,&nfiles,&fn) ) return 1;
+        mplp.files  = fn;
+        mplp.nfiles = nfiles;
+    }
+    else
+    {
+        mplp.nfiles = argc - optind;
+        mplp.files  = (char**) malloc(mplp.nfiles*sizeof(char*));
+        for (i=0; i<mplp.nfiles; i++) mplp.files[i] = strdup(argv[optind+i]);
+    }
+    ret = mpileup(&mplp);
+
+    for (i=0; i<mplp.nfiles; i++) free(mplp.files[i]);
+    free(mplp.files);
+    free(mplp.reg_fname); free(mplp.pl_list);
+    if (mplp.fai) fai_destroy(mplp.fai);
+    if (mplp.bed)
+    {
+        regidx_destroy(mplp.bed);
+        regitr_destroy(mplp.bed_itr);
+    }
+    if (mplp.reg) regidx_destroy(mplp.reg);
+    bam_smpl_destroy(mplp.bsmpl);
+    return ret;
+}
diff --git a/bcftools/mpileup.c.pysam.c b/bcftools/mpileup.c.pysam.c
new file mode 100644
index 0000000..6ef6838
--- /dev/null
+++ b/bcftools/mpileup.c.pysam.c
@@ -0,0 +1,1112 @@
+#include "pysam.h"
+
+/*  mpileup.c -- mpileup subcommand. Previously bam_plcmd.c from samtools
+
+    Copyright (C) 2008-2017 Genome Research Ltd.
+    Portions copyright (C) 2009-2012 Broad Institute.
+
+    Author: Heng Li <lh3 at sanger.ac.uk>
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+DEALINGS IN THE SOFTWARE.  */
+
+#include <math.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <ctype.h>
+#include <string.h>
+#include <strings.h>
+#include <limits.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <getopt.h>
+#include <htslib/sam.h>
+#include <htslib/faidx.h>
+#include <htslib/kstring.h>
+#include <htslib/khash_str2int.h>
+#include <assert.h>
+#include "regidx.h"
+#include "bcftools.h"
+#include "bam2bcf.h"
+#include "bam_sample.h"
+#include "gvcf.h"
+
+#define MPLP_BCF        1
+#define MPLP_VCF        (1<<1)
+#define MPLP_NO_COMP    (1<<2)
+#define MPLP_NO_ORPHAN  (1<<3)
+#define MPLP_REALN      (1<<4)
+#define MPLP_NO_INDEL   (1<<5)
+#define MPLP_REDO_BAQ   (1<<6)
+#define MPLP_ILLUMINA13 (1<<7)
+#define MPLP_IGNORE_RG  (1<<8)
+#define MPLP_PRINT_POS  (1<<9)
+#define MPLP_PRINT_MAPQ (1<<10)
+#define MPLP_PER_SAMPLE (1<<11)
+#define MPLP_SMART_OVERLAPS (1<<12)
+
+typedef struct _mplp_aux_t mplp_aux_t;
+typedef struct _mplp_pileup_t mplp_pileup_t;
+
+// Data shared by all bam files
+typedef struct {
+    int min_mq, flag, min_baseQ, capQ_thres, max_depth, max_indel_depth, fmt_flag;
+    int rflag_require, rflag_filter, output_type;
+    int openQ, extQ, tandemQ, min_support; // for indels
+    double min_frac; // for indels
+    char *reg_fname, *pl_list, *fai_fname, *output_fname;
+    int reg_is_file, record_cmd_line, n_threads;
+    faidx_t *fai;
+    regidx_t *bed, *reg;    // bed: skipping regions, reg: index-jump to regions
+    regitr_t *bed_itr, *reg_itr;
+    int bed_logic;          // 1: include region, 0: exclude region
+    gvcf_t *gvcf;
+
+    // auxiliary structures for calling
+    bcf_callaux_t *bca;
+    bcf_callret1_t *bcr;
+    bcf_call_t bc;
+    bam_mplp_t iter;
+    mplp_aux_t **mplp_data;
+    int nfiles;
+    char **files;
+    mplp_pileup_t *gplp;
+    int *n_plp;
+    const bam_pileup1_t **plp;
+    bam_smpl_t *bsmpl;
+    kstring_t buf;
+    bcf1_t *bcf_rec;
+    htsFile *bcf_fp;
+    bcf_hdr_t *bcf_hdr;
+    int argc;
+    char **argv;
+} mplp_conf_t;
+
+typedef struct {
+    char *ref[2];
+    int ref_id[2];
+    int ref_len[2];
+} mplp_ref_t;
+
+#define MPLP_REF_INIT {{NULL,NULL},{-1,-1},{0,0}}
+
+// Data specific to each bam file
+struct _mplp_aux_t {
+    samFile *fp;
+    hts_itr_t *iter;
+    bam_hdr_t *h;
+    mplp_ref_t *ref;
+    const mplp_conf_t *conf;
+    int bam_id;
+    hts_idx_t *idx;     // maintained only with more than one -r regions
+};
+
+// Data passed to htslib/mpileup
+struct _mplp_pileup_t {
+    int n;
+    int *n_plp, *m_plp;
+    bam_pileup1_t **plp;
+};
+
+static int mplp_get_ref(mplp_aux_t *ma, int tid,  char **ref, int *ref_len) {
+    mplp_ref_t *r = ma->ref;
+
+    //printf("get ref %d {%d/%p, %d/%p}\n", tid, r->ref_id[0], r->ref[0], r->ref_id[1], r->ref[1]);
+
+    if (!r || !ma->conf->fai) {
+        *ref = NULL;
+        return 0;
+    }
+
+    // Do we need to reference count this so multiple mplp_aux_t can
+    // track which references are in use?
+    // For now we just cache the last two. Sufficient?
+    if (tid == r->ref_id[0]) {
+        *ref = r->ref[0];
+        *ref_len = r->ref_len[0];
+        return 1;
+    }
+    if (tid == r->ref_id[1]) {
+        // Last, swap over
+        int tmp;
+        tmp = r->ref_id[0];  r->ref_id[0]  = r->ref_id[1];  r->ref_id[1]  = tmp;
+        tmp = r->ref_len[0]; r->ref_len[0] = r->ref_len[1]; r->ref_len[1] = tmp;
+
+        char *tc;
+        tc = r->ref[0]; r->ref[0] = r->ref[1]; r->ref[1] = tc;
+        *ref = r->ref[0];
+        *ref_len = r->ref_len[0];
+        return 1;
+    }
+
+    // New, so migrate to old and load new
+    free(r->ref[1]);
+    r->ref[1]     = r->ref[0];
+    r->ref_id[1]  = r->ref_id[0];
+    r->ref_len[1] = r->ref_len[0];
+
+    r->ref_id[0] = tid;
+    r->ref[0] = faidx_fetch_seq(ma->conf->fai,
+                                ma->h->target_name[r->ref_id[0]],
+                                0,
+                                INT_MAX,
+                                &r->ref_len[0]);
+
+    if (!r->ref[0]) {
+        r->ref[0] = NULL;
+        r->ref_id[0] = -1;
+        r->ref_len[0] = 0;
+        *ref = NULL;
+        return 0;
+    }
+
+    *ref = r->ref[0];
+    *ref_len = r->ref_len[0];
+    return 1;
+}
+
+static int mplp_func(void *data, bam1_t *b)
+{
+    char *ref;
+    mplp_aux_t *ma = (mplp_aux_t*)data;
+    int ret, ref_len;
+    while (1)
+    {
+        int has_ref;
+        ret = ma->iter? sam_itr_next(ma->fp, ma->iter, b) : sam_read1(ma->fp, ma->h, b);
+        if (ret < 0) break;
+        // The 'B' cigar operation is not part of the specification, considering as obsolete.
+        //  bam_remove_B(b);
+        if (b->core.tid < 0 || (b->core.flag&BAM_FUNMAP)) continue; // exclude unmapped reads
+        if (ma->conf->rflag_require && !(ma->conf->rflag_require&b->core.flag)) continue;
+        if (ma->conf->rflag_filter && ma->conf->rflag_filter&b->core.flag) continue;
+        if (ma->conf->bed)
+        {
+            // test overlap
+            regitr_t *itr = ma->conf->bed_itr;
+            int beg = b->core.pos, end = bam_endpos(b)-1;
+            int overlap = regidx_overlap(ma->conf->bed, ma->h->target_name[b->core.tid],beg,end, itr);
+            if ( !ma->conf->bed_logic && !overlap )
+            {
+                // exclude only reads which are fully contained in the region
+                while ( regitr_overlap(itr) )
+                {
+                    if ( beg < itr->beg ) { overlap = 1; break; }
+                    if ( end > itr->end ) { overlap = 1; break; }
+                }
+            }
+            if ( !overlap ) continue;
+        }
+        if ( bam_smpl_get_sample_id(ma->conf->bsmpl,ma->bam_id,b)<0 ) continue;
+        if (ma->conf->flag & MPLP_ILLUMINA13) {
+            int i;
+            uint8_t *qual = bam_get_qual(b);
+            for (i = 0; i < b->core.l_qseq; ++i)
+                qual[i] = qual[i] > 31? qual[i] - 31 : 0;
+        }
+
+        if (ma->conf->fai && b->core.tid >= 0) {
+            has_ref = mplp_get_ref(ma, b->core.tid, &ref, &ref_len);
+            if (has_ref && ref_len <= b->core.pos) { // exclude reads outside of the reference sequence
+                fprintf(pysam_stderr,"[%s] Skipping because %d is outside of %d [ref:%d]\n",
+                        __func__, b->core.pos, ref_len, b->core.tid);
+                continue;
+            }
+        } else {
+            has_ref = 0;
+        }
+
+        if (has_ref && (ma->conf->flag&MPLP_REALN)) sam_prob_realn(b, ref, ref_len, (ma->conf->flag & MPLP_REDO_BAQ)? 7 : 3);
+        if (has_ref && ma->conf->capQ_thres > 10) {
+            int q = sam_cap_mapq(b, ref, ref_len, ma->conf->capQ_thres);
+            if (q < 0) continue;    // skip
+            else if (b->core.qual > q) b->core.qual = q;
+        }
+        if (b->core.qual < ma->conf->min_mq) continue;
+        else if ((ma->conf->flag&MPLP_NO_ORPHAN) && (b->core.flag&BAM_FPAIRED) && !(b->core.flag&BAM_FPROPER_PAIR)) continue;
+
+        return ret;
+    };
+    return ret;
+}
+
+// Called once per new bam added to the pileup.
+// We cache sample information here so we don't have to keep recomputing this
+// on each and every pileup column.
+//
+// Cd is an arbitrary block of data we can write into, which ends up in
+// the pileup structures.  We stash the sample ID there.
+static int pileup_constructor(void *data, const bam1_t *b, bam_pileup_cd *cd) {
+    mplp_aux_t *ma = (mplp_aux_t *)data;
+    cd->i = bam_smpl_get_sample_id(ma->conf->bsmpl, ma->bam_id, (bam1_t *)b);
+    return 0;
+}
+
+static void group_smpl(mplp_pileup_t *m, bam_smpl_t *bsmpl, int n, int *n_plp, const bam_pileup1_t **plp)
+{
+    int i, j;
+    memset(m->n_plp, 0, m->n * sizeof(int));
+    for (i = 0; i < n; ++i) // iterate over all bams
+    {
+        for (j = 0; j < n_plp[i]; ++j)  // iterate over all reads available at this position
+        {
+            const bam_pileup1_t *p = plp[i] + j;
+            int id = p->cd.i;
+            if (m->n_plp[id] == m->m_plp[id]) 
+            {
+                m->m_plp[id] = m->m_plp[id]? m->m_plp[id]<<1 : 8;
+                m->plp[id] = (bam_pileup1_t*) realloc(m->plp[id], sizeof(bam_pileup1_t) * m->m_plp[id]);
+            }
+            m->plp[id][m->n_plp[id]++] = *p;
+        }
+    }
+}
+
+static void flush_bcf_records(mplp_conf_t *conf, htsFile *fp, bcf_hdr_t *hdr, bcf1_t *rec)
+{
+    if ( !conf->gvcf )
+    {
+        if ( rec ) bcf_write1(fp, hdr, rec);
+        return;
+    }
+
+    if ( !rec )
+    {
+        gvcf_write(conf->gvcf, fp, hdr, NULL, 0);
+        return;
+    }
+
+    int is_ref = 0;
+    if ( rec->n_allele==1 ) is_ref = 1;
+    else if ( rec->n_allele==2 )
+    {
+        // second allele is mpileup's X, not a variant
+        if ( rec->d.allele[1][0]=='<' && rec->d.allele[1][1]=='*' && rec->d.allele[1][2]=='>' ) is_ref = 1;
+    }
+    rec = gvcf_write(conf->gvcf, fp, hdr, rec, is_ref);
+    if ( rec ) bcf_write1(fp,hdr,rec);
+}
+
+static int mpileup_reg(mplp_conf_t *conf, uint32_t beg, uint32_t end)
+{
+    bam_hdr_t *hdr = conf->mplp_data[0]->h; // header of first file in input list
+
+    int ret, i, tid, pos, ref_len;
+    char *ref;
+
+    while ( (ret=bam_mplp_auto(conf->iter, &tid, &pos, conf->n_plp, conf->plp)) > 0) 
+    {
+        if ( end && (pos<beg || pos>end) ) continue;
+        if ( conf->bed && tid >= 0 )
+        {
+            int overlap = regidx_overlap(conf->bed, hdr->target_name[tid], pos, pos, NULL);
+            if ( !conf->bed_logic ) overlap = overlap ? 0 : 1;
+            if ( !overlap ) continue;
+        }
+        mplp_get_ref(conf->mplp_data[0], tid, &ref, &ref_len);
+
+        int total_depth, _ref0, ref16;
+        for (i = total_depth = 0; i < conf->nfiles; ++i) total_depth += conf->n_plp[i];
+        group_smpl(conf->gplp, conf->bsmpl, conf->nfiles, conf->n_plp, conf->plp);
+        _ref0 = (ref && pos < ref_len)? ref[pos] : 'N';
+        ref16 = seq_nt16_table[_ref0];
+        bcf_callaux_clean(conf->bca, &conf->bc);
+        for (i = 0; i < conf->gplp->n; ++i)
+            bcf_call_glfgen(conf->gplp->n_plp[i], conf->gplp->plp[i], ref16, conf->bca, conf->bcr + i);
+        conf->bc.tid = tid; conf->bc.pos = pos;
+        bcf_call_combine(conf->gplp->n, conf->bcr, conf->bca, ref16, &conf->bc);
+        bcf_clear1(conf->bcf_rec);
+        bcf_call2bcf(&conf->bc, conf->bcf_rec, conf->bcr, conf->fmt_flag, 0, 0);
+        flush_bcf_records(conf, conf->bcf_fp, conf->bcf_hdr, conf->bcf_rec);
+
+        // call indels; todo: subsampling with total_depth>max_indel_depth instead of ignoring?
+        // check me: rghash in bcf_call_gap_prep() should have no effect, reads mplp_func already excludes them
+        if (!(conf->flag&MPLP_NO_INDEL) && total_depth < conf->max_indel_depth 
+            && bcf_call_gap_prep(conf->gplp->n, conf->gplp->n_plp, conf->gplp->plp, pos, conf->bca, ref) >= 0)
+        {
+            bcf_callaux_clean(conf->bca, &conf->bc);
+            for (i = 0; i < conf->gplp->n; ++i)
+                bcf_call_glfgen(conf->gplp->n_plp[i], conf->gplp->plp[i], -1, conf->bca, conf->bcr + i);
+            if (bcf_call_combine(conf->gplp->n, conf->bcr, conf->bca, -1, &conf->bc) >= 0) 
+            {
+                bcf_clear1(conf->bcf_rec);
+                bcf_call2bcf(&conf->bc, conf->bcf_rec, conf->bcr, conf->fmt_flag, conf->bca, ref);
+                flush_bcf_records(conf, conf->bcf_fp, conf->bcf_hdr, conf->bcf_rec);
+            }
+        }
+    }
+    return 0;
+}
+
+static int mpileup(mplp_conf_t *conf)
+{
+    if (conf->nfiles == 0) {
+        fprintf(pysam_stderr,"[%s] no input file/data given\n", __func__);
+        exit(EXIT_FAILURE);
+    }
+
+    mplp_ref_t mp_ref = MPLP_REF_INIT;
+    conf->gplp = (mplp_pileup_t *) calloc(1,sizeof(mplp_pileup_t));
+    conf->mplp_data = (mplp_aux_t**) calloc(conf->nfiles, sizeof(mplp_aux_t*));
+    conf->plp = (const bam_pileup1_t**) calloc(conf->nfiles, sizeof(bam_pileup1_t*));
+    conf->n_plp = (int*) calloc(conf->nfiles, sizeof(int));
+
+    // Allow to run mpileup on multiple regions in one go. This comes at cost: the bai index
+    // must be kept in the memory for the whole time which can be a problem with many bams.
+    // Therefore if none or only one region is requested, we initialize the bam iterator as
+    // before and free the index. Only when multiple regions are queried, we keep the index.
+    int nregs = 0;
+    if ( conf->reg_fname )
+    {
+        if ( conf->reg_is_file )
+        {
+            conf->reg = regidx_init(conf->reg_fname,NULL,NULL,0,NULL);
+            if ( !conf->reg ) {
+                fprintf(pysam_stderr,"Could not parse the regions: %s\n", conf->reg_fname);
+                exit(EXIT_FAILURE);
+            }
+        }
+        else
+        {
+            conf->reg = regidx_init(NULL,regidx_parse_reg,NULL,sizeof(char*),NULL);
+            if ( regidx_insert_list(conf->reg,conf->reg_fname,',') !=0 ) {
+                fprintf(pysam_stderr,"Could not parse the regions: %s\n", conf->reg_fname);
+                exit(EXIT_FAILURE);
+            }
+        }
+        nregs = regidx_nregs(conf->reg);
+        conf->reg_itr = regitr_init(conf->reg);
+        regitr_loop(conf->reg_itr);   // region iterator now positioned at the first region
+    }
+
+    // read the header of each file in the list and initialize data
+    // beware: mpileup has always assumed that tid's are consistent in the headers, add sanity check at least!
+    bam_hdr_t *hdr = NULL;      // header of first file in input list
+    int i;
+    for (i = 0; i < conf->nfiles; ++i) {
+        bam_hdr_t *h_tmp;
+        conf->mplp_data[i] = (mplp_aux_t*) calloc(1, sizeof(mplp_aux_t));
+        conf->mplp_data[i]->fp = sam_open(conf->files[i], "rb");
+        if ( !conf->mplp_data[i]->fp )
+        {
+            fprintf(pysam_stderr, "[%s] failed to open %s: %s\n", __func__, conf->files[i], strerror(errno));
+            exit(EXIT_FAILURE);
+        }
+        if (hts_set_opt(conf->mplp_data[i]->fp, CRAM_OPT_DECODE_MD, 0)) {
+            fprintf(pysam_stderr, "Failed to set CRAM_OPT_DECODE_MD value\n");
+            exit(EXIT_FAILURE);
+        }
+        if (conf->fai_fname && hts_set_fai_filename(conf->mplp_data[i]->fp, conf->fai_fname) != 0) {
+            fprintf(pysam_stderr, "[%s] failed to process %s: %s\n",
+                    __func__, conf->fai_fname, strerror(errno));
+            exit(EXIT_FAILURE);
+        }
+        conf->mplp_data[i]->conf = conf;
+        conf->mplp_data[i]->ref = &mp_ref;
+        h_tmp = sam_hdr_read(conf->mplp_data[i]->fp);
+        if ( !h_tmp ) {
+            fprintf(pysam_stderr,"[%s] fail to read the header of %s\n", __func__, conf->files[i]);
+            exit(EXIT_FAILURE);
+        }
+        conf->mplp_data[i]->h = i ? hdr : h_tmp; // for j==0, "h" has not been set yet
+        conf->mplp_data[i]->bam_id = bam_smpl_add_bam(conf->bsmpl,h_tmp->text,conf->files[i]);
+        if ( conf->mplp_data[i]->bam_id<0 )
+        {
+            // no usable readgroups in this bam, it can be skipped
+            sam_close(conf->mplp_data[i]->fp);
+            free(conf->mplp_data[i]);
+            bam_hdr_destroy(h_tmp);
+            free(conf->files[i]);
+            if ( i+1<conf->nfiles ) memmove(&conf->files[i],&conf->files[i+1],sizeof(*conf->files)*(conf->nfiles-i-1));
+            conf->nfiles--;
+            i--;
+            continue;
+        }
+        if (conf->reg) {
+            hts_idx_t *idx = sam_index_load(conf->mplp_data[i]->fp, conf->files[i]);
+            if (idx == NULL) {
+                fprintf(pysam_stderr, "[%s] fail to load index for %s\n", __func__, conf->files[i]);
+                exit(EXIT_FAILURE);
+            }
+            conf->buf.l = 0;
+            ksprintf(&conf->buf,"%s:%u-%u",conf->reg_itr->seq,conf->reg_itr->beg+1,conf->reg_itr->end+1);
+            conf->mplp_data[i]->iter = sam_itr_querys(idx, conf->mplp_data[i]->h, conf->buf.s);
+            if ( !conf->mplp_data[i]->iter ) 
+            {
+                conf->mplp_data[i]->iter = sam_itr_querys(idx, conf->mplp_data[i]->h, conf->reg_itr->seq);
+                if ( conf->mplp_data[i]->iter ) {
+                    fprintf(pysam_stderr,"[E::%s] fail to parse region '%s'\n", __func__, conf->buf.s);
+                    exit(EXIT_FAILURE);
+                }
+                fprintf(pysam_stderr,"[E::%s] the sequence \"%s\" not found: %s\n",__func__,conf->reg_itr->seq,conf->files[i]);
+                exit(EXIT_FAILURE);
+            }
+            if ( nregs==1 ) // no need to keep the index in memory
+               hts_idx_destroy(idx);
+            else
+                conf->mplp_data[i]->idx = idx;
+        }
+
+        if ( !hdr ) hdr = h_tmp; /* save the header of first file in list */
+        else {
+            // FIXME: check consistency between h and h_tmp
+            bam_hdr_destroy(h_tmp);
+
+            // we store only the first file's header; it's (alleged to be)
+            // compatible with the i-th file's target_name lookup needs
+            conf->mplp_data[i]->h = hdr;
+        }
+    }
+    // allocate data storage proportionate to number of samples being studied sm->n
+    bam_smpl_get_samples(conf->bsmpl, &conf->gplp->n);
+    conf->gplp->n_plp = (int*) calloc(conf->gplp->n, sizeof(int));
+    conf->gplp->m_plp = (int*) calloc(conf->gplp->n, sizeof(int));
+    conf->gplp->plp = (bam_pileup1_t**) calloc(conf->gplp->n, sizeof(bam_pileup1_t*));  
+
+    fprintf(pysam_stderr, "[%s] %d samples in %d input files\n", __func__, conf->gplp->n, conf->nfiles);
+    // write the VCF header
+    conf->bcf_fp = hts_open(conf->output_fname?conf->output_fname:"-", hts_bcf_wmode(conf->output_type));
+    if (conf->bcf_fp == NULL) {
+        fprintf(pysam_stderr, "[%s] failed to write to %s: %s\n", __func__, conf->output_fname? conf->output_fname : "standard output", strerror(errno));
+        exit(EXIT_FAILURE);
+    }
+    if ( conf->n_threads ) hts_set_threads(conf->bcf_fp, conf->n_threads);
+
+    // BCF header creation
+    conf->bcf_hdr = bcf_hdr_init("w");
+    conf->buf.l = 0;
+
+    if (conf->record_cmd_line)
+    {
+        ksprintf(&conf->buf, "##bcftoolsVersion=%s+htslib-%s\n",bcftools_version(),hts_version());
+        bcf_hdr_append(conf->bcf_hdr, conf->buf.s);
+
+        conf->buf.l = 0;
+        ksprintf(&conf->buf, "##bcftoolsCommand=mpileup");
+        for (i=1; i<conf->argc; i++) ksprintf(&conf->buf, " %s", conf->argv[i]);
+        kputc('\n', &conf->buf);
+        bcf_hdr_append(conf->bcf_hdr, conf->buf.s);
+    }
+
+    if (conf->fai_fname)
+    {
+        conf->buf.l = 0;
+        ksprintf(&conf->buf, "##reference=file://%s\n", conf->fai_fname);
+        bcf_hdr_append(conf->bcf_hdr, conf->buf.s);
+    }
+
+    // Translate BAM @SQ tags to BCF ##contig tags
+    // todo: use/write new BAM header manipulation routines, fill also UR, M5
+    for (i=0; i<hdr->n_targets; i++)
+    {
+        conf->buf.l = 0;
+        ksprintf(&conf->buf, "##contig=<ID=%s,length=%d>", hdr->target_name[i], hdr->target_len[i]);
+        bcf_hdr_append(conf->bcf_hdr, conf->buf.s);
+    }
+    conf->buf.l = 0;
+
+    bcf_hdr_append(conf->bcf_hdr,"##ALT=<ID=*,Description=\"Represents allele(s) other than observed.\">");
+    bcf_hdr_append(conf->bcf_hdr,"##INFO=<ID=INDEL,Number=0,Type=Flag,Description=\"Indicates that the variant is an INDEL.\">");
+    bcf_hdr_append(conf->bcf_hdr,"##INFO=<ID=IDV,Number=1,Type=Integer,Description=\"Maximum number of reads supporting an indel\">");
+    bcf_hdr_append(conf->bcf_hdr,"##INFO=<ID=IMF,Number=1,Type=Float,Description=\"Maximum fraction of reads supporting an indel\">");
+    bcf_hdr_append(conf->bcf_hdr,"##INFO=<ID=DP,Number=1,Type=Integer,Description=\"Raw read depth\">");
+    bcf_hdr_append(conf->bcf_hdr,"##INFO=<ID=VDB,Number=1,Type=Float,Description=\"Variant Distance Bias for filtering splice-site artefacts in RNA-seq data (bigger is better)\",Version=\"3\">");
+    bcf_hdr_append(conf->bcf_hdr,"##INFO=<ID=RPB,Number=1,Type=Float,Description=\"Mann-Whitney U test of Read Position Bias (bigger is better)\">");
+    bcf_hdr_append(conf->bcf_hdr,"##INFO=<ID=MQB,Number=1,Type=Float,Description=\"Mann-Whitney U test of Mapping Quality Bias (bigger is better)\">");
+    bcf_hdr_append(conf->bcf_hdr,"##INFO=<ID=BQB,Number=1,Type=Float,Description=\"Mann-Whitney U test of Base Quality Bias (bigger is better)\">");
+    bcf_hdr_append(conf->bcf_hdr,"##INFO=<ID=MQSB,Number=1,Type=Float,Description=\"Mann-Whitney U test of Mapping Quality vs Strand Bias (bigger is better)\">");
+#if CDF_MWU_TESTS
+    bcf_hdr_append(conf->bcf_hdr,"##INFO=<ID=RPB2,Number=1,Type=Float,Description=\"Mann-Whitney U test of Read Position Bias [CDF] (bigger is better)\">");
+    bcf_hdr_append(conf->bcf_hdr,"##INFO=<ID=MQB2,Number=1,Type=Float,Description=\"Mann-Whitney U test of Mapping Quality Bias [CDF] (bigger is better)\">");
+    bcf_hdr_append(conf->bcf_hdr,"##INFO=<ID=BQB2,Number=1,Type=Float,Description=\"Mann-Whitney U test of Base Quality Bias [CDF] (bigger is better)\">");
+    bcf_hdr_append(conf->bcf_hdr,"##INFO=<ID=MQSB2,Number=1,Type=Float,Description=\"Mann-Whitney U test of Mapping Quality vs Strand Bias [CDF] (bigger is better)\">");
+#endif
+    bcf_hdr_append(conf->bcf_hdr,"##INFO=<ID=SGB,Number=1,Type=Float,Description=\"Segregation based metric.\">");
+    bcf_hdr_append(conf->bcf_hdr,"##INFO=<ID=MQ0F,Number=1,Type=Float,Description=\"Fraction of MQ0 reads (smaller is better)\">");
+    bcf_hdr_append(conf->bcf_hdr,"##INFO=<ID=I16,Number=16,Type=Float,Description=\"Auxiliary tag used for calling, see description of bcf_callret1_t in bam2bcf.h\">");
+    bcf_hdr_append(conf->bcf_hdr,"##INFO=<ID=QS,Number=R,Type=Float,Description=\"Auxiliary tag used for calling\">");
+    bcf_hdr_append(conf->bcf_hdr,"##FORMAT=<ID=PL,Number=G,Type=Integer,Description=\"List of Phred-scaled genotype likelihoods\">");
+    if ( conf->fmt_flag&B2B_FMT_DP )
+        bcf_hdr_append(conf->bcf_hdr,"##FORMAT=<ID=DP,Number=1,Type=Integer,Description=\"Number of high-quality bases\">");
+    if ( conf->fmt_flag&B2B_FMT_DV )
+        bcf_hdr_append(conf->bcf_hdr,"##FORMAT=<ID=DV,Number=1,Type=Integer,Description=\"Number of high-quality non-reference bases\">");
+    if ( conf->fmt_flag&B2B_FMT_DPR )
+        bcf_hdr_append(conf->bcf_hdr,"##FORMAT=<ID=DPR,Number=R,Type=Integer,Description=\"Number of high-quality bases observed for each allele\">");
+    if ( conf->fmt_flag&B2B_INFO_DPR )
+        bcf_hdr_append(conf->bcf_hdr,"##INFO=<ID=DPR,Number=R,Type=Integer,Description=\"Number of high-quality bases observed for each allele\">");
+    if ( conf->fmt_flag&B2B_FMT_DP4 )
+        bcf_hdr_append(conf->bcf_hdr,"##FORMAT=<ID=DP4,Number=4,Type=Integer,Description=\"Number of high-quality ref-fwd, ref-reverse, alt-fwd and alt-reverse bases\">");
+    if ( conf->fmt_flag&B2B_FMT_SP )
+        bcf_hdr_append(conf->bcf_hdr,"##FORMAT=<ID=SP,Number=1,Type=Integer,Description=\"Phred-scaled strand bias P-value\">");
+    if ( conf->fmt_flag&B2B_FMT_AD )
+        bcf_hdr_append(conf->bcf_hdr,"##FORMAT=<ID=AD,Number=R,Type=Integer,Description=\"Allelic depths\">");
+    if ( conf->fmt_flag&B2B_FMT_ADF )
+        bcf_hdr_append(conf->bcf_hdr,"##FORMAT=<ID=ADF,Number=R,Type=Integer,Description=\"Allelic depths on the forward strand\">");
+    if ( conf->fmt_flag&B2B_FMT_ADR )
+        bcf_hdr_append(conf->bcf_hdr,"##FORMAT=<ID=ADR,Number=R,Type=Integer,Description=\"Allelic depths on the reverse strand\">");
+    if ( conf->fmt_flag&B2B_INFO_AD )
+        bcf_hdr_append(conf->bcf_hdr,"##INFO=<ID=AD,Number=R,Type=Integer,Description=\"Total allelic depths\">");
+    if ( conf->fmt_flag&B2B_INFO_ADF )
+        bcf_hdr_append(conf->bcf_hdr,"##INFO=<ID=ADF,Number=R,Type=Integer,Description=\"Total allelic depths on the forward strand\">");
+    if ( conf->fmt_flag&B2B_INFO_ADR )
+        bcf_hdr_append(conf->bcf_hdr,"##INFO=<ID=ADR,Number=R,Type=Integer,Description=\"Total allelic depths on the reverse strand\">");
+    if ( conf->gvcf )
+        gvcf_update_header(conf->gvcf, conf->bcf_hdr);
+
+    int nsmpl;
+    const char **smpl = bam_smpl_get_samples(conf->bsmpl, &nsmpl);
+    for (i=0; i<nsmpl; i++)
+        bcf_hdr_add_sample(conf->bcf_hdr, smpl[i]);
+    bcf_hdr_write(conf->bcf_fp, conf->bcf_hdr);
+
+    conf->bca = bcf_call_init(-1., conf->min_baseQ);
+    conf->bcr = (bcf_callret1_t*) calloc(nsmpl, sizeof(bcf_callret1_t));
+    conf->bca->openQ = conf->openQ, conf->bca->extQ = conf->extQ, conf->bca->tandemQ = conf->tandemQ;
+    conf->bca->min_frac = conf->min_frac;
+    conf->bca->min_support = conf->min_support;
+    conf->bca->per_sample_flt = conf->flag & MPLP_PER_SAMPLE;
+
+    conf->bc.bcf_hdr = conf->bcf_hdr;
+    conf->bc.n  = nsmpl;
+    conf->bc.PL = (int32_t*) malloc(15 * nsmpl * sizeof(*conf->bc.PL));
+    if (conf->fmt_flag)
+    {
+        assert( sizeof(float)==sizeof(int32_t) );
+        conf->bc.DP4 = (int32_t*) malloc(nsmpl * sizeof(int32_t) * 4);
+        conf->bc.fmt_arr = (uint8_t*) malloc(nsmpl * sizeof(float)); // all fmt_flag fields, float and int32
+        if ( conf->fmt_flag&(B2B_INFO_DPR|B2B_FMT_DPR|B2B_INFO_AD|B2B_INFO_ADF|B2B_INFO_ADR|B2B_FMT_AD|B2B_FMT_ADF|B2B_FMT_ADR) )
+        {
+            // first B2B_MAX_ALLELES fields for total numbers, the rest per-sample
+            conf->bc.ADR = (int32_t*) malloc((nsmpl+1)*B2B_MAX_ALLELES*sizeof(int32_t));
+            conf->bc.ADF = (int32_t*) malloc((nsmpl+1)*B2B_MAX_ALLELES*sizeof(int32_t));
+            for (i=0; i<nsmpl; i++)
+            {
+                conf->bcr[i].ADR = conf->bc.ADR + (i+1)*B2B_MAX_ALLELES;
+                conf->bcr[i].ADF = conf->bc.ADF + (i+1)*B2B_MAX_ALLELES;
+            }
+        }
+    }
+
+    // init mpileup
+    conf->iter = bam_mplp_init(conf->nfiles, mplp_func, (void**)conf->mplp_data);
+    if ( conf->flag & MPLP_SMART_OVERLAPS ) bam_mplp_init_overlaps(conf->iter);
+    if ( (double)conf->max_depth * conf->nfiles > 1<<20)
+        fprintf(pysam_stderr, "Warning: Potential memory hog, up to %.0fM reads in the pileup!\n", (double)conf->max_depth*conf->nfiles);
+    if ( (double)conf->max_depth * conf->nfiles / nsmpl < 250 )
+        fprintf(pysam_stderr, "Note: The maximum per-sample depth with -d %d is %.1fx\n", conf->max_depth,(double)conf->max_depth * conf->nfiles / nsmpl);
+    bam_mplp_set_maxcnt(conf->iter, conf->max_depth);
+    conf->max_indel_depth = conf->max_indel_depth * nsmpl;
+    conf->bcf_rec = bcf_init1();
+    bam_mplp_constructor(conf->iter, pileup_constructor);
+
+    // Run mpileup for multiple regions
+    if ( nregs )
+    {
+        int ireg = 0;
+        do 
+        {
+            // first region is already positioned
+            if ( ireg++ > 0 )
+            {
+                conf->buf.l = 0;
+                ksprintf(&conf->buf,"%s:%u-%u",conf->reg_itr->seq,conf->reg_itr->beg,conf->reg_itr->end);
+
+                for (i=0; i<conf->nfiles; i++) 
+                {
+                    hts_itr_destroy(conf->mplp_data[i]->iter);
+                    conf->mplp_data[i]->iter = sam_itr_querys(conf->mplp_data[i]->idx, conf->mplp_data[i]->h, conf->buf.s);
+                    if ( !conf->mplp_data[i]->iter ) 
+                    {
+                        conf->mplp_data[i]->iter = sam_itr_querys(conf->mplp_data[i]->idx, conf->mplp_data[i]->h, conf->reg_itr->seq);
+                        if ( conf->mplp_data[i]->iter ) {
+                            fprintf(pysam_stderr,"[E::%s] fail to parse region '%s'\n", __func__, conf->buf.s);
+                            exit(EXIT_FAILURE);
+                        }
+                        fprintf(pysam_stderr,"[E::%s] the sequence \"%s\" not found: %s\n",__func__,conf->reg_itr->seq,conf->files[i]);
+                        exit(EXIT_FAILURE);
+                    }
+                    bam_mplp_reset(conf->iter);
+                }
+            }
+            mpileup_reg(conf,conf->reg_itr->beg,conf->reg_itr->end);
+        }
+        while ( regitr_loop(conf->reg_itr) );
+    }
+    else
+        mpileup_reg(conf,0,0);
+
+    flush_bcf_records(conf, conf->bcf_fp, conf->bcf_hdr, NULL);
+
+    // clean up
+    free(conf->bc.tmp.s);
+    bcf_destroy1(conf->bcf_rec);
+    if (conf->bcf_fp)
+    {
+        hts_close(conf->bcf_fp);
+        bcf_hdr_destroy(conf->bcf_hdr);
+        bcf_call_destroy(conf->bca);
+        free(conf->bc.PL);
+        free(conf->bc.DP4);
+        free(conf->bc.ADR);
+        free(conf->bc.ADF);
+        free(conf->bc.fmt_arr);
+        free(conf->bcr);
+    }
+    if ( conf->gvcf ) gvcf_destroy(conf->gvcf);
+    free(conf->buf.s);
+    for (i = 0; i < conf->gplp->n; ++i) free(conf->gplp->plp[i]);
+    free(conf->gplp->plp); free(conf->gplp->n_plp); free(conf->gplp->m_plp); free(conf->gplp);
+    bam_mplp_destroy(conf->iter);
+    bam_hdr_destroy(hdr);
+    for (i = 0; i < conf->nfiles; ++i) {
+        if ( nregs>1 ) hts_idx_destroy(conf->mplp_data[i]->idx);
+        sam_close(conf->mplp_data[i]->fp);
+        if ( conf->mplp_data[i]->iter) hts_itr_destroy(conf->mplp_data[i]->iter);
+        free(conf->mplp_data[i]);
+    }
+    if ( conf->reg_itr ) regitr_destroy(conf->reg_itr);
+    free(conf->mplp_data); free(conf->plp); free(conf->n_plp);
+    free(mp_ref.ref[0]);
+    free(mp_ref.ref[1]);
+    return 0;
+}
+
+static int is_url(const char *s)
+{
+    static const char uri_scheme_chars[] =
+        "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+.-";
+    return s[strspn(s, uri_scheme_chars)] == ':';
+}
+
+#define MAX_PATH_LEN 1024
+int read_file_list(const char *file_list,int *n,char **argv[])
+{
+    char buf[MAX_PATH_LEN];
+    int len, nfiles = 0;
+    char **files = NULL;
+    struct stat sb;
+
+    *n = 0;
+    *argv = NULL;
+
+    FILE *fh = fopen(file_list,"r");
+    if ( !fh )
+    {
+        fprintf(pysam_stderr,"%s: %s\n", file_list,strerror(errno));
+        return 1;
+    }
+
+    files = (char**) calloc(nfiles,sizeof(char*));
+    nfiles = 0;
+    while ( fgets(buf,MAX_PATH_LEN,fh) )
+    {
+        // allow empty lines and trailing spaces
+        len = strlen(buf);
+        while ( len>0 && isspace(buf[len-1]) ) len--;
+        if ( !len ) continue;
+
+        // check sanity of the file list
+        buf[len] = 0;
+        if (! (is_url(buf) || stat(buf, &sb) == 0))
+        {
+            // no such file, check if it is safe to print its name
+            int i, safe_to_print = 1;
+            for (i=0; i<len; i++)
+                if (!isprint(buf[i])) { safe_to_print = 0; break; }
+            if ( safe_to_print )
+                fprintf(pysam_stderr,"The file list \"%s\" appears broken, could not locate: %s\n", file_list,buf);
+            else
+                fprintf(pysam_stderr,"Does the file \"%s\" really contain a list of files and do all exist?\n", file_list);
+            return 1;
+        }
+
+        nfiles++;
+        files = (char**) realloc(files,nfiles*sizeof(char*));
+        files[nfiles-1] = strdup(buf);
+    }
+    fclose(fh);
+    if ( !nfiles )
+    {
+        fprintf(pysam_stderr,"No files read from %s\n", file_list);
+        return 1;
+    }
+    *argv = files;
+    *n    = nfiles;
+    return 0;
+}
+#undef MAX_PATH_LEN
+
+int parse_format_flag(const char *str)
+{
+    int i, flag = 0, n_tags;
+    char **tags = hts_readlist(str, 0, &n_tags);
+    for(i=0; i<n_tags; i++)
+    {
+        if ( !strcasecmp(tags[i],"DP") || !strcasecmp(tags[i],"FORMAT/DP") || !strcasecmp(tags[i],"FMT/DP") ) flag |= B2B_FMT_DP;
+        else if ( !strcasecmp(tags[i],"DV") || !strcasecmp(tags[i],"FORMAT/DV") || !strcasecmp(tags[i],"FMT/DV") ) { flag |= B2B_FMT_DV; fprintf(pysam_stderr, "[warning] tag DV functional, but deprecated. Please switch to `AD` in future.\n"); }
+        else if ( !strcasecmp(tags[i],"SP") || !strcasecmp(tags[i],"FORMAT/SP") || !strcasecmp(tags[i],"FMT/SP") ) flag |= B2B_FMT_SP;
+        else if ( !strcasecmp(tags[i],"DP4") || !strcasecmp(tags[i],"FORMAT/DP4") || !strcasecmp(tags[i],"FMT/DP4") ) { flag |= B2B_FMT_DP4; fprintf(pysam_stderr, "[warning] tag DP4 functional, but deprecated. Please switch to `ADF` and `ADR` in future.\n"); }
+        else if ( !strcasecmp(tags[i],"DPR") || !strcasecmp(tags[i],"FORMAT/DPR") || !strcasecmp(tags[i],"FMT/DPR") ) { flag |= B2B_FMT_DPR; fprintf(pysam_stderr, "[warning] tag DPR functional, but deprecated. Please switch to `AD` in future.\n"); }
+        else if ( !strcasecmp(tags[i],"INFO/DPR") ) { flag |= B2B_INFO_DPR; fprintf(pysam_stderr, "[warning] tag INFO/DPR functional, but deprecated. Please switch to `INFO/AD` in future.\n"); }
+        else if ( !strcasecmp(tags[i],"AD") || !strcasecmp(tags[i],"FORMAT/AD") || !strcasecmp(tags[i],"FMT/AD") ) flag |= B2B_FMT_AD;
+        else if ( !strcasecmp(tags[i],"ADF") || !strcasecmp(tags[i],"FORMAT/ADF") || !strcasecmp(tags[i],"FMT/ADF") ) flag |= B2B_FMT_ADF;
+        else if ( !strcasecmp(tags[i],"ADR") || !strcasecmp(tags[i],"FORMAT/ADR") || !strcasecmp(tags[i],"FMT/ADR") ) flag |= B2B_FMT_ADR;
+        else if ( !strcasecmp(tags[i],"INFO/AD") ) flag |= B2B_INFO_AD;
+        else if ( !strcasecmp(tags[i],"INFO/ADF") ) flag |= B2B_INFO_ADF;
+        else if ( !strcasecmp(tags[i],"INFO/ADR") ) flag |= B2B_INFO_ADR;
+        else
+        {
+            fprintf(pysam_stderr,"Could not parse tag \"%s\" in \"%s\"\n", tags[i], str);
+            exit(EXIT_FAILURE);
+        }
+        free(tags[i]);
+    }
+    if (n_tags) free(tags);
+    return flag;
+}
+
+static void list_annotations(FILE *fp)
+{
+    fprintf(fp,
+"\n"
+"FORMAT annotation tags available (\"FORMAT/\" prefix is optional):\n"
+"\n"
+"  FORMAT/AD  .. Allelic depth (Number=R,Type=Integer)\n"
+"  FORMAT/ADF .. Allelic depths on the forward strand (Number=R,Type=Integer)\n"
+"  FORMAT/ADR .. Allelic depths on the reverse strand (Number=R,Type=Integer)\n"
+"  FORMAT/DP  .. Number of high-quality bases (Number=1,Type=Integer)\n"
+"  FORMAT/SP  .. Phred-scaled strand bias P-value (Number=1,Type=Integer)\n"
+"\n"
+"INFO annotation tags available:\n"
+"\n"
+"  INFO/AD  .. Total allelic depth (Number=R,Type=Integer)\n"
+"  INFO/ADF .. Total allelic depths on the forward strand (Number=R,Type=Integer)\n"
+"  INFO/ADR .. Total allelic depths on the reverse strand (Number=R,Type=Integer)\n"
+"\n");
+}
+
+static void print_usage(FILE *fp, const mplp_conf_t *mplp)
+{
+    char *tmp_require = bam_flag2str(mplp->rflag_require);
+    char *tmp_filter  = bam_flag2str(mplp->rflag_filter);
+
+    // Display usage information, formatted for the standard 80 columns.
+    // (The unusual string formatting here aids the readability of this
+    // source code in 80 columns, to the extent that's possible.)
+
+    fprintf(fp,
+"\n"
+"Usage: bcftools mpileup [options] in1.bam [in2.bam [...]]\n"
+"\n"
+"Input options:\n"
+"  -6, --illumina1.3+      quality is in the Illumina-1.3+ encoding\n"
+"  -A, --count-orphans     do not discard anomalous read pairs\n"
+"  -b, --bam-list FILE     list of input BAM filenames, one per line\n"
+"  -B, --no-BAQ            disable BAQ (per-Base Alignment Quality)\n"
+"  -C, --adjust-MQ INT     adjust mapping quality; recommended:50, disable:0 [0]\n"
+"  -d, --max-depth INT     max per-file depth; avoids excessive memory usage [%d]\n", mplp->max_depth);
+    fprintf(fp,
+"  -E, --redo-BAQ          recalculate BAQ on the fly, ignore existing BQs\n"
+"  -f, --fasta-ref FILE    faidx indexed reference sequence file\n"
+"      --no-reference      do not require fasta reference file\n"
+"  -G, --read-groups FILE  select or exclude read groups listed in the file\n"
+"  -q, --min-MQ INT        skip alignments with mapQ smaller than INT [%d]\n", mplp->min_mq);
+    fprintf(fp,
+"  -Q, --min-BQ INT        skip bases with baseQ/BAQ smaller than INT [%d]\n", mplp->min_baseQ);
+    fprintf(fp,
+"  -r, --regions REG[,...] comma separated list of regions in which pileup is generated\n"
+"  -R, --regions-file FILE restrict to regions listed in a file\n"
+"      --ignore-RG         ignore RG tags (one BAM = one sample)\n"
+"  --rf, --incl-flags STR|INT  required flags: skip reads with mask bits unset [%s]\n", tmp_require);
+    fprintf(fp,
+"  --ff, --excl-flags STR|INT  filter flags: skip reads with mask bits set\n"
+"                                            [%s]\n", tmp_filter);
+    fprintf(fp,
+"  -s, --samples LIST      comma separated list of samples to include\n"
+"  -S, --samples-file FILE file of samples to include\n"
+"  -t, --targets REG[,...] similar to -r but streams rather than index-jumps\n"
+"  -T, --targets-file FILE similar to -R but streams rather than index-jumps\n"
+"  -x, --ignore-overlaps   disable read-pair overlap detection\n"
+"\n"
+"Output options:\n"
+"  -a, --annotate LIST     optional tags to output; '?' to list []\n"
+"  -g, --gvcf INT[,...]    group non-variant sites into gVCF blocks according\n"
+"                          to minimum per-sample DP\n"
+"      --no-version        do not append version and command line to the header\n"
+"  -o, --output FILE       write output to FILE [standard output]\n"
+"  -O, --output-type TYPE  'b' compressed BCF; 'u' uncompressed BCF;\n"
+"                          'z' compressed VCF; 'v' uncompressed VCF [v]\n"
+"      --threads INT       number of extra output compression threads [0]\n"
+"\n"
+"SNP/INDEL genotype likelihoods options:\n"
+"  -e, --ext-prob INT      Phred-scaled gap extension seq error probability [%d]\n", mplp->extQ);
+    fprintf(fp,
+"  -F, --gap-frac FLOAT    minimum fraction of gapped reads [%g]\n", mplp->min_frac);
+    fprintf(fp,
+"  -h, --tandem-qual INT   coefficient for homopolymer errors [%d]\n", mplp->tandemQ);
+    fprintf(fp,
+"  -I, --skip-indels       do not perform indel calling\n"
+"  -L, --max-idepth INT    maximum per-file depth for INDEL calling [%d]\n", mplp->max_indel_depth);
+    fprintf(fp,
+"  -m, --min-ireads INT    minimum number gapped reads for indel candidates [%d]\n", mplp->min_support);
+    fprintf(fp,
+"  -o, --open-prob INT     Phred-scaled gap open seq error probability [%d]\n", mplp->openQ);
+    fprintf(fp,
+"  -p, --per-sample-mF     apply -m and -F per-sample for increased sensitivity\n"
+"  -P, --platforms STR     comma separated list of platforms for indels [all]\n"
+"\n"
+"Notes: Assuming diploid individuals.\n"
+"\n");
+
+    free(tmp_require);
+    free(tmp_filter);
+}
+
+int bam_mpileup(int argc, char *argv[])
+{
+    int c;
+    const char *file_list = NULL;
+    char **fn = NULL;
+    int nfiles = 0, use_orphan = 0, noref = 0;
+    mplp_conf_t mplp;
+    memset(&mplp, 0, sizeof(mplp_conf_t));
+    mplp.min_baseQ = 13;
+    mplp.capQ_thres = 0;
+    mplp.max_depth = 250; mplp.max_indel_depth = 250;
+    mplp.openQ = 40; mplp.extQ = 20; mplp.tandemQ = 100;
+    mplp.min_frac = 0.002; mplp.min_support = 1;
+    mplp.flag = MPLP_NO_ORPHAN | MPLP_REALN | MPLP_SMART_OVERLAPS;
+    mplp.argc = argc; mplp.argv = argv;
+    mplp.rflag_filter = BAM_FUNMAP | BAM_FSECONDARY | BAM_FQCFAIL | BAM_FDUP;
+    mplp.output_fname = NULL;
+    mplp.output_type = FT_VCF;
+    mplp.record_cmd_line = 1;
+    mplp.n_threads = 0;
+    mplp.bsmpl = bam_smpl_init();
+
+    static const struct option lopts[] =
+    {
+        {"rf", required_argument, NULL, 1},   // require flag
+        {"ff", required_argument, NULL, 2},   // filter flag
+        {"incl-flags", required_argument, NULL, 1},
+        {"excl-flags", required_argument, NULL, 2},
+        {"output", required_argument, NULL, 3},
+        {"open-prob", required_argument, NULL, 4},
+        {"ignore-RG", no_argument, NULL, 5},
+        {"ignore-rg", no_argument, NULL, 5},
+        {"gvcf", required_argument, NULL, 'g'},
+        {"non-reference", no_argument, NULL, 7},
+        {"no-version", no_argument, NULL, 8},
+        {"threads",required_argument,NULL,9},
+        {"illumina1.3+", no_argument, NULL, '6'},
+        {"count-orphans", no_argument, NULL, 'A'},
+        {"bam-list", required_argument, NULL, 'b'},
+        {"no-BAQ", no_argument, NULL, 'B'},
+        {"no-baq", no_argument, NULL, 'B'},
+        {"adjust-MQ", required_argument, NULL, 'C'},
+        {"adjust-mq", required_argument, NULL, 'C'},
+        {"max-depth", required_argument, NULL, 'd'},
+        {"redo-BAQ", no_argument, NULL, 'E'},
+        {"redo-baq", no_argument, NULL, 'E'},
+        {"fasta-ref", required_argument, NULL, 'f'},
+        {"read-groups", required_argument, NULL, 'G'},
+        {"region", required_argument, NULL, 'r'},
+        {"regions", required_argument, NULL, 'r'},
+        {"regions-file", required_argument, NULL, 'R'},
+        {"targets", required_argument, NULL, 't'},
+        {"targets-file", required_argument, NULL, 'T'},
+        {"min-MQ", required_argument, NULL, 'q'},
+        {"min-mq", required_argument, NULL, 'q'},
+        {"min-BQ", required_argument, NULL, 'Q'},
+        {"min-bq", required_argument, NULL, 'Q'},
+        {"ignore-overlaps", no_argument, NULL, 'x'},
+        {"output-type", required_argument, NULL, 'O'},
+        {"samples", required_argument, NULL, 's'},
+        {"samples-file", required_argument, NULL, 'S'},
+        {"annotate", required_argument, NULL, 'a'},
+        {"ext-prob", required_argument, NULL, 'e'},
+        {"gap-frac", required_argument, NULL, 'F'},
+        {"tandem-qual", required_argument, NULL, 'h'},
+        {"skip-indels", no_argument, NULL, 'I'},
+        {"max-idepth", required_argument, NULL, 'L'},
+        {"min-ireads ", required_argument, NULL, 'm'},
+        {"per-sample-mF", no_argument, NULL, 'p'},
+        {"per-sample-mf", no_argument, NULL, 'p'},
+        {"platforms", required_argument, NULL, 'P'},
+        {NULL, 0, NULL, 0}
+    };
+    while ((c = getopt_long(argc, argv, "Ag:f:r:R:q:Q:C:Bd:L:b:P:po:e:h:Im:F:EG:6O:xa:s:S:t:T:",lopts,NULL)) >= 0) {
+        switch (c) {
+        case 'x': mplp.flag &= ~MPLP_SMART_OVERLAPS; break;
+        case  1 :
+            mplp.rflag_require = bam_str2flag(optarg);
+            if ( mplp.rflag_require<0 ) { fprintf(pysam_stderr,"Could not parse --rf %s\n", optarg); return 1; }
+            break;
+        case  2 :
+            mplp.rflag_filter = bam_str2flag(optarg);
+            if ( mplp.rflag_filter<0 ) { fprintf(pysam_stderr,"Could not parse --ff %s\n", optarg); return 1; }
+            break;
+        case  3 : mplp.output_fname = optarg; break;
+        case  4 : mplp.openQ = atoi(optarg); break;
+        case  5 : bam_smpl_ignore_readgroups(mplp.bsmpl); break;
+        case 'g':
+            mplp.gvcf = gvcf_init(optarg);
+            if ( !mplp.gvcf ) error("Could not parse: --gvcf %s\n", optarg);
+            break;
+        case 'f':
+            mplp.fai = fai_load(optarg);
+            if (mplp.fai == NULL) return 1;
+            mplp.fai_fname = optarg;
+            break;
+        case  7 : noref = 1; break;
+        case  8 : mplp.record_cmd_line = 0; break;
+        case  9 : mplp.n_threads = strtol(optarg, 0, 0); break;
+        case 'd': mplp.max_depth = atoi(optarg); break;
+        case 'r': mplp.reg_fname = strdup(optarg); break;
+        case 'R': mplp.reg_fname = strdup(optarg); mplp.reg_is_file = 1; break;
+        case 't':
+                  // In the original version the whole BAM was streamed which is inefficient
+                  //  with few BED intervals and big BAMs. Todo: devise a heuristic to determine
+                  //  best strategy, that is streaming or jumping.
+                  if ( optarg[0]=='^' ) optarg++;
+                  else mplp.bed_logic = 1;
+                  mplp.bed = regidx_init(NULL,regidx_parse_reg,NULL,0,NULL);
+                  mplp.bed_itr = regitr_init(mplp.bed);
+                  if ( regidx_insert_list(mplp.bed,optarg,',') !=0 )
+                  {
+                      fprintf(pysam_stderr,"Could not parse the targets: %s\n", optarg);
+                      exit(EXIT_FAILURE);
+                  }
+                  break;
+        case 'T':
+                  if ( optarg[0]=='^' ) optarg++;
+                  else mplp.bed_logic = 1;
+                  mplp.bed = regidx_init(optarg,NULL,NULL,0,NULL);
+                  if (!mplp.bed) { fprintf(pysam_stderr, "bcftools mpileup: Could not read file \"%s\"", optarg); return 1; }
+                  break;
+        case 'P': mplp.pl_list = strdup(optarg); break;
+        case 'p': mplp.flag |= MPLP_PER_SAMPLE; break;
+        case 'B': mplp.flag &= ~MPLP_REALN; break;
+        case 'I': mplp.flag |= MPLP_NO_INDEL; break;
+        case 'E': mplp.flag |= MPLP_REDO_BAQ; break;
+        case '6': mplp.flag |= MPLP_ILLUMINA13; break;
+        case 's': if ( bam_smpl_add_samples(mplp.bsmpl,optarg,0)<0 ) error("Could not read samples: %s\n",optarg); break;
+        case 'S': if ( bam_smpl_add_samples(mplp.bsmpl,optarg,1)<0 ) error("Could not read samples: %s\n",optarg); break;
+        case 'O': 
+            switch (optarg[0]) {
+                case 'b': mplp.output_type = FT_BCF_GZ; break;
+                case 'u': mplp.output_type = FT_BCF; break;
+                case 'z': mplp.output_type = FT_VCF_GZ; break;
+                case 'v': mplp.output_type = FT_VCF; break;
+                default: error("[error] The option \"-O\" changed meaning when mpileup moved to bcftools. Did you mean: \"bcftools mpileup --output-type\" or \"samtools mpileup --output-BP\"?\n", optarg); 
+            }
+            break;
+        case 'C': mplp.capQ_thres = atoi(optarg); break;
+        case 'q': mplp.min_mq = atoi(optarg); break;
+        case 'Q': mplp.min_baseQ = atoi(optarg); break;
+        case 'b': file_list = optarg; break;
+        case 'o': {
+                char *end;
+                long value = strtol(optarg, &end, 10);
+                // Distinguish between -o INT and -o FILE (a bit of a hack!)
+                if (*end == '\0') mplp.openQ = value;
+                else mplp.output_fname = optarg;
+            }
+            break;
+        case 'e': mplp.extQ = atoi(optarg); break;
+        case 'h': mplp.tandemQ = atoi(optarg); break;
+        case 'A': use_orphan = 1; break;
+        case 'F': mplp.min_frac = atof(optarg); break;
+        case 'm': mplp.min_support = atoi(optarg); break;
+        case 'L': mplp.max_indel_depth = atoi(optarg); break;
+        case 'G': bam_smpl_add_readgroups(mplp.bsmpl, optarg, 1); break;
+        case 'a':
+            if (optarg[0]=='?') {
+                list_annotations(pysam_stderr);
+                return 1;
+            }
+            mplp.fmt_flag |= parse_format_flag(optarg);
+        break;
+        default:
+            fprintf(pysam_stderr,"Invalid option: '%c'\n", c);
+            return 1;
+        }
+    }
+
+    if ( mplp.gvcf && !(mplp.fmt_flag&B2B_FMT_DP) )
+    {
+        fprintf(pysam_stderr,"[warning] The -t DP option is required with --gvcf, switching on.\n");
+        mplp.fmt_flag |= B2B_FMT_DP;
+    }
+    if ( mplp.flag&(MPLP_BCF|MPLP_VCF|MPLP_NO_COMP) )
+    {
+        if ( mplp.flag&MPLP_VCF )
+        {
+            if ( mplp.flag&MPLP_NO_COMP ) mplp.output_type = FT_VCF;
+            else mplp.output_type = FT_VCF_GZ;
+        }
+        else if ( mplp.flag&MPLP_BCF )
+        {
+            if ( mplp.flag&MPLP_NO_COMP ) mplp.output_type = FT_BCF;
+            else mplp.output_type = FT_BCF_GZ;
+        }
+    }
+    if ( !(mplp.flag&MPLP_REALN) && mplp.flag&MPLP_REDO_BAQ )
+    {
+        fprintf(pysam_stderr,"Error: The -B option cannot be combined with -E\n");
+        return 1;
+    }
+    if (use_orphan) mplp.flag &= ~MPLP_NO_ORPHAN;
+    if (argc == 1)
+    {
+        print_usage(pysam_stderr, &mplp);
+        return 1;
+    }
+    if (!mplp.fai && !noref) {
+        fprintf(pysam_stderr,"Error: mpileup requires the --fasta-ref option by default; use --no-reference to run without a fasta reference\n");
+        return 1;
+    }
+    int ret,i;
+    if (file_list) 
+    {
+        if ( read_file_list(file_list,&nfiles,&fn) ) return 1;
+        mplp.files  = fn;
+        mplp.nfiles = nfiles;
+    }
+    else
+    {
+        mplp.nfiles = argc - optind;
+        mplp.files  = (char**) malloc(mplp.nfiles*sizeof(char*));
+        for (i=0; i<mplp.nfiles; i++) mplp.files[i] = strdup(argv[optind+i]);
+    }
+    ret = mpileup(&mplp);
+
+    for (i=0; i<mplp.nfiles; i++) free(mplp.files[i]);
+    free(mplp.files);
+    free(mplp.reg_fname); free(mplp.pl_list);
+    if (mplp.fai) fai_destroy(mplp.fai);
+    if (mplp.bed)
+    {
+        regidx_destroy(mplp.bed);
+        regitr_destroy(mplp.bed_itr);
+    }
+    if (mplp.reg) regidx_destroy(mplp.reg);
+    bam_smpl_destroy(mplp.bsmpl);
+    return ret;
+}
diff --git a/bcftools/mw.h b/bcftools/mw.h
new file mode 100644
index 0000000..3e68cbf
--- /dev/null
+++ b/bcftools/mw.h
@@ -0,0 +1,1944 @@
+/* mw.h -- a table of precomputed Mann Whitney coefficients (for bam2bcf.c)
+
+   The MIT License
+
+   Copyright (C) 2016 Genome Research Ltd.
+
+   Author: James Bonfield <jkb at sanger.ac.uk>
+
+   Permission is hereby granted, free of charge, to any person obtaining a copy
+   of this software and associated documentation files (the "Software"), to deal
+   in the Software without restriction, including without limitation the rights
+   to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+   copies of the Software, and to permit persons to whom the Software is
+   furnished to do so, subject to the following conditions:
+
+   The above copyright notice and this permission notice shall be included in
+   all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+   IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+   AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+   LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+   THE SOFTWARE.
+*/
+
+// Code to build this table is below
+#ifdef BUILD_MW
+#include <stdio.h>
+
+double mann_whitney_1947(int n, int m, int U)
+{
+    if (U<0) return 0;
+    if (n==0||m==0) return U==0 ? 1 : 0;
+    return (double)n/(n+m)*mann_whitney_1947(n-1,m,U-m) + (double)m/(n+m)*mann_whitney_1947(n,m-1,U);
+}
+
+int main(void) {
+    int i, j, k;
+    printf("static double mw[6][6][50] = // [2-7][2-7][0-49]\n{\n");
+    for (i = 2; i < 8; i++) {
+	printf("    {\n");
+	for (j = 2; j < 8; j++) {
+	    printf("        {\n");
+	    for (k = 0; k < 50; k++) {
+		printf("            %.17f,\n", mann_whitney_1947(i,j,k));
+	    }
+	    printf("        },\n");
+	}
+	printf("    },\n");
+    }
+    printf("};\n");
+    return 0;
+}
+#endif
+
+static double mw[6][6][50] = // [2-7][2-7][0-49]
+{
+    {
+        {
+            0.16666666666666666,
+            0.16666666666666666,
+            0.33333333333333331,
+            0.16666666666666666,
+            0.16666666666666666,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+        },
+        {
+            0.09999999999999999,
+            0.09999999999999999,
+            0.19999999999999998,
+            0.20000000000000001,
+            0.20000000000000001,
+            0.10000000000000001,
+            0.10000000000000001,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+        },
+        {
+            0.06666666666666665,
+            0.06666666666666665,
+            0.13333333333333330,
+            0.13333333333333333,
+            0.20000000000000001,
+            0.13333333333333333,
+            0.13333333333333333,
+            0.06666666666666667,
+            0.06666666666666667,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+        },
+        {
+            0.04761904761904761,
+            0.04761904761904761,
+            0.09523809523809522,
+            0.09523809523809523,
+            0.14285714285714288,
+            0.14285714285714285,
+            0.14285714285714285,
+            0.09523809523809523,
+            0.09523809523809523,
+            0.04761904761904762,
+            0.04761904761904762,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+        },
+        {
+            0.03571428571428571,
+            0.03571428571428571,
+            0.07142857142857141,
+            0.07142857142857142,
+            0.10714285714285715,
+            0.10714285714285714,
+            0.14285714285714285,
+            0.10714285714285715,
+            0.10714285714285715,
+            0.07142857142857144,
+            0.07142857142857142,
+            0.03571428571428571,
+            0.03571428571428571,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+        },
+        {
+            0.02777777777777777,
+            0.02777777777777777,
+            0.05555555555555555,
+            0.05555555555555555,
+            0.08333333333333334,
+            0.08333333333333333,
+            0.11111111111111110,
+            0.11111111111111113,
+            0.11111111111111113,
+            0.08333333333333334,
+            0.08333333333333334,
+            0.05555555555555556,
+            0.05555555555555555,
+            0.02777777777777778,
+            0.02777777777777778,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+        },
+    },
+    {
+        {
+            0.10000000000000001,
+            0.10000000000000001,
+            0.20000000000000001,
+            0.20000000000000001,
+            0.19999999999999998,
+            0.09999999999999999,
+            0.09999999999999999,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+        },
+        {
+            0.05000000000000000,
+            0.05000000000000000,
+            0.10000000000000001,
+            0.14999999999999999,
+            0.14999999999999999,
+            0.14999999999999999,
+            0.14999999999999999,
+            0.10000000000000001,
+            0.05000000000000000,
+            0.05000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+        },
+        {
+            0.02857142857142857,
+            0.02857142857142857,
+            0.05714285714285714,
+            0.08571428571428570,
+            0.11428571428571427,
+            0.11428571428571427,
+            0.14285714285714282,
+            0.11428571428571428,
+            0.11428571428571428,
+            0.08571428571428572,
+            0.05714285714285714,
+            0.02857142857142857,
+            0.02857142857142857,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+        },
+        {
+            0.01785714285714286,
+            0.01785714285714286,
+            0.03571428571428571,
+            0.05357142857142856,
+            0.07142857142857142,
+            0.08928571428571427,
+            0.10714285714285711,
+            0.10714285714285712,
+            0.10714285714285714,
+            0.10714285714285715,
+            0.08928571428571427,
+            0.07142857142857142,
+            0.05357142857142857,
+            0.03571428571428571,
+            0.01785714285714286,
+            0.01785714285714286,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+        },
+        {
+            0.01190476190476190,
+            0.01190476190476190,
+            0.02380952380952381,
+            0.03571428571428571,
+            0.04761904761904762,
+            0.05952380952380951,
+            0.08333333333333330,
+            0.08333333333333331,
+            0.09523809523809523,
+            0.09523809523809523,
+            0.09523809523809523,
+            0.08333333333333333,
+            0.08333333333333333,
+            0.05952380952380952,
+            0.04761904761904762,
+            0.03571428571428571,
+            0.02380952380952381,
+            0.01190476190476190,
+            0.01190476190476190,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+        },
+        {
+            0.00833333333333333,
+            0.00833333333333333,
+            0.01666666666666666,
+            0.02499999999999999,
+            0.03333333333333333,
+            0.04166666666666666,
+            0.05833333333333331,
+            0.06666666666666665,
+            0.07499999999999998,
+            0.08333333333333331,
+            0.08333333333333331,
+            0.08333333333333333,
+            0.08333333333333333,
+            0.07500000000000000,
+            0.06666666666666667,
+            0.05833333333333333,
+            0.04166666666666666,
+            0.03333333333333333,
+            0.02500000000000000,
+            0.01666666666666667,
+            0.00833333333333333,
+            0.00833333333333333,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+        },
+    },
+    {
+        {
+            0.06666666666666667,
+            0.06666666666666667,
+            0.13333333333333333,
+            0.13333333333333333,
+            0.20000000000000001,
+            0.13333333333333333,
+            0.13333333333333330,
+            0.06666666666666665,
+            0.06666666666666665,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+        },
+        {
+            0.02857142857142857,
+            0.02857142857142857,
+            0.05714285714285714,
+            0.08571428571428572,
+            0.11428571428571428,
+            0.11428571428571428,
+            0.14285714285714282,
+            0.11428571428571427,
+            0.11428571428571427,
+            0.08571428571428570,
+            0.05714285714285714,
+            0.02857142857142857,
+            0.02857142857142857,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+        },
+        {
+            0.01428571428571429,
+            0.01428571428571429,
+            0.02857142857142857,
+            0.04285714285714286,
+            0.07142857142857142,
+            0.07142857142857142,
+            0.09999999999999998,
+            0.09999999999999998,
+            0.11428571428571427,
+            0.09999999999999998,
+            0.09999999999999998,
+            0.07142857142857142,
+            0.07142857142857142,
+            0.04285714285714286,
+            0.02857142857142857,
+            0.01428571428571429,
+            0.01428571428571429,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+        },
+        {
+            0.00793650793650794,
+            0.00793650793650794,
+            0.01587301587301587,
+            0.02380952380952381,
+            0.03968253968253968,
+            0.04761904761904762,
+            0.06349206349206349,
+            0.07142857142857142,
+            0.08730158730158730,
+            0.08730158730158730,
+            0.09523809523809522,
+            0.08730158730158728,
+            0.08730158730158730,
+            0.07142857142857142,
+            0.06349206349206349,
+            0.04761904761904761,
+            0.03968253968253968,
+            0.02380952380952381,
+            0.01587301587301587,
+            0.00793650793650794,
+            0.00793650793650794,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+        },
+        {
+            0.00476190476190476,
+            0.00476190476190476,
+            0.00952380952380952,
+            0.01428571428571429,
+            0.02380952380952381,
+            0.02857142857142857,
+            0.04285714285714286,
+            0.04761904761904762,
+            0.06190476190476190,
+            0.06666666666666665,
+            0.07619047619047617,
+            0.07619047619047617,
+            0.08571428571428569,
+            0.07619047619047617,
+            0.07619047619047620,
+            0.06666666666666667,
+            0.06190476190476191,
+            0.04761904761904762,
+            0.04285714285714286,
+            0.02857142857142857,
+            0.02380952380952381,
+            0.01428571428571429,
+            0.00952380952380952,
+            0.00476190476190476,
+            0.00476190476190476,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+        },
+        {
+            0.00303030303030303,
+            0.00303030303030303,
+            0.00606060606060606,
+            0.00909090909090909,
+            0.01515151515151515,
+            0.01818181818181818,
+            0.02727272727272727,
+            0.03333333333333333,
+            0.04242424242424242,
+            0.04848484848484847,
+            0.05757575757575756,
+            0.06060606060606059,
+            0.06969696969696967,
+            0.06969696969696967,
+            0.07272727272727272,
+            0.06969696969696969,
+            0.06969696969696970,
+            0.06060606060606059,
+            0.05757575757575757,
+            0.04848484848484848,
+            0.04242424242424242,
+            0.03333333333333333,
+            0.02727272727272727,
+            0.01818181818181818,
+            0.01515151515151515,
+            0.00909090909090909,
+            0.00606060606060606,
+            0.00303030303030303,
+            0.00303030303030303,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+        },
+    },
+    {
+        {
+            0.04761904761904762,
+            0.04761904761904762,
+            0.09523809523809523,
+            0.09523809523809523,
+            0.14285714285714285,
+            0.14285714285714285,
+            0.14285714285714288,
+            0.09523809523809523,
+            0.09523809523809522,
+            0.04761904761904761,
+            0.04761904761904761,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+        },
+        {
+            0.01785714285714286,
+            0.01785714285714286,
+            0.03571428571428571,
+            0.05357142857142857,
+            0.07142857142857142,
+            0.08928571428571427,
+            0.10714285714285715,
+            0.10714285714285714,
+            0.10714285714285712,
+            0.10714285714285711,
+            0.08928571428571427,
+            0.07142857142857142,
+            0.05357142857142856,
+            0.03571428571428571,
+            0.01785714285714286,
+            0.01785714285714286,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+        },
+        {
+            0.00793650793650794,
+            0.00793650793650794,
+            0.01587301587301587,
+            0.02380952380952381,
+            0.03968253968253968,
+            0.04761904761904761,
+            0.06349206349206349,
+            0.07142857142857142,
+            0.08730158730158730,
+            0.08730158730158728,
+            0.09523809523809522,
+            0.08730158730158730,
+            0.08730158730158730,
+            0.07142857142857142,
+            0.06349206349206349,
+            0.04761904761904762,
+            0.03968253968253968,
+            0.02380952380952381,
+            0.01587301587301587,
+            0.00793650793650794,
+            0.00793650793650794,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+        },
+        {
+            0.00396825396825397,
+            0.00396825396825397,
+            0.00793650793650794,
+            0.01190476190476190,
+            0.01984126984126984,
+            0.02777777777777777,
+            0.03571428571428571,
+            0.04365079365079365,
+            0.05555555555555555,
+            0.06349206349206349,
+            0.07142857142857142,
+            0.07539682539682539,
+            0.07936507936507936,
+            0.07936507936507936,
+            0.07539682539682539,
+            0.07142857142857142,
+            0.06349206349206349,
+            0.05555555555555555,
+            0.04365079365079365,
+            0.03571428571428571,
+            0.02777777777777777,
+            0.01984126984126984,
+            0.01190476190476190,
+            0.00793650793650794,
+            0.00396825396825397,
+            0.00396825396825397,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+        },
+        {
+            0.00216450216450216,
+            0.00216450216450216,
+            0.00432900432900433,
+            0.00649350649350649,
+            0.01082251082251082,
+            0.01515151515151515,
+            0.02164502164502164,
+            0.02597402597402597,
+            0.03463203463203463,
+            0.04112554112554112,
+            0.04978354978354978,
+            0.05411255411255411,
+            0.06277056277056275,
+            0.06493506493506493,
+            0.06926406926406925,
+            0.06926406926406925,
+            0.06926406926406925,
+            0.06493506493506492,
+            0.06277056277056275,
+            0.05411255411255410,
+            0.04978354978354978,
+            0.04112554112554112,
+            0.03463203463203463,
+            0.02597402597402597,
+            0.02164502164502164,
+            0.01515151515151515,
+            0.01082251082251082,
+            0.00649350649350649,
+            0.00432900432900433,
+            0.00216450216450216,
+            0.00216450216450216,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+        },
+        {
+            0.00126262626262626,
+            0.00126262626262626,
+            0.00252525252525253,
+            0.00378787878787879,
+            0.00631313131313131,
+            0.00883838383838384,
+            0.01262626262626262,
+            0.01641414141414141,
+            0.02146464646464646,
+            0.02651515151515151,
+            0.03282828282828283,
+            0.03787878787878787,
+            0.04419191919191919,
+            0.04924242424242424,
+            0.05429292929292929,
+            0.05808080808080808,
+            0.06060606060606059,
+            0.06186868686868686,
+            0.06186868686868686,
+            0.06060606060606059,
+            0.05808080808080807,
+            0.05429292929292930,
+            0.04924242424242424,
+            0.04419191919191920,
+            0.03787878787878787,
+            0.03282828282828282,
+            0.02651515151515152,
+            0.02146464646464646,
+            0.01641414141414142,
+            0.01262626262626263,
+            0.00883838383838384,
+            0.00631313131313131,
+            0.00378787878787879,
+            0.00252525252525253,
+            0.00126262626262626,
+            0.00126262626262626,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+        },
+    },
+    {
+        {
+            0.03571428571428571,
+            0.03571428571428571,
+            0.07142857142857142,
+            0.07142857142857144,
+            0.10714285714285715,
+            0.10714285714285715,
+            0.14285714285714285,
+            0.10714285714285714,
+            0.10714285714285715,
+            0.07142857142857142,
+            0.07142857142857141,
+            0.03571428571428571,
+            0.03571428571428571,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+        },
+        {
+            0.01190476190476190,
+            0.01190476190476190,
+            0.02380952380952381,
+            0.03571428571428571,
+            0.04761904761904762,
+            0.05952380952380952,
+            0.08333333333333333,
+            0.08333333333333333,
+            0.09523809523809523,
+            0.09523809523809523,
+            0.09523809523809523,
+            0.08333333333333331,
+            0.08333333333333330,
+            0.05952380952380951,
+            0.04761904761904762,
+            0.03571428571428571,
+            0.02380952380952381,
+            0.01190476190476190,
+            0.01190476190476190,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+        },
+        {
+            0.00476190476190476,
+            0.00476190476190476,
+            0.00952380952380952,
+            0.01428571428571429,
+            0.02380952380952381,
+            0.02857142857142857,
+            0.04285714285714286,
+            0.04761904761904762,
+            0.06190476190476191,
+            0.06666666666666667,
+            0.07619047619047620,
+            0.07619047619047617,
+            0.08571428571428569,
+            0.07619047619047617,
+            0.07619047619047617,
+            0.06666666666666665,
+            0.06190476190476190,
+            0.04761904761904762,
+            0.04285714285714286,
+            0.02857142857142857,
+            0.02380952380952381,
+            0.01428571428571429,
+            0.00952380952380952,
+            0.00476190476190476,
+            0.00476190476190476,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+        },
+        {
+            0.00216450216450216,
+            0.00216450216450216,
+            0.00432900432900433,
+            0.00649350649350649,
+            0.01082251082251082,
+            0.01515151515151515,
+            0.02164502164502164,
+            0.02597402597402597,
+            0.03463203463203463,
+            0.04112554112554112,
+            0.04978354978354978,
+            0.05411255411255410,
+            0.06277056277056275,
+            0.06493506493506492,
+            0.06926406926406925,
+            0.06926406926406925,
+            0.06926406926406925,
+            0.06493506493506493,
+            0.06277056277056275,
+            0.05411255411255411,
+            0.04978354978354978,
+            0.04112554112554112,
+            0.03463203463203463,
+            0.02597402597402597,
+            0.02164502164502164,
+            0.01515151515151515,
+            0.01082251082251082,
+            0.00649350649350649,
+            0.00432900432900433,
+            0.00216450216450216,
+            0.00216450216450216,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+        },
+        {
+            0.00108225108225108,
+            0.00108225108225108,
+            0.00216450216450216,
+            0.00324675324675325,
+            0.00541125541125541,
+            0.00757575757575758,
+            0.01190476190476190,
+            0.01406926406926407,
+            0.01948051948051948,
+            0.02380952380952381,
+            0.03030303030303030,
+            0.03463203463203463,
+            0.04220779220779219,
+            0.04545454545454544,
+            0.05194805194805194,
+            0.05519480519480519,
+            0.05952380952380951,
+            0.05952380952380952,
+            0.06277056277056275,
+            0.05952380952380952,
+            0.05952380952380951,
+            0.05519480519480519,
+            0.05194805194805194,
+            0.04545454545454544,
+            0.04220779220779219,
+            0.03463203463203463,
+            0.03030303030303030,
+            0.02380952380952381,
+            0.01948051948051948,
+            0.01406926406926407,
+            0.01190476190476190,
+            0.00757575757575758,
+            0.00541125541125541,
+            0.00324675324675325,
+            0.00216450216450216,
+            0.00108225108225108,
+            0.00108225108225108,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+        },
+        {
+            0.00058275058275058,
+            0.00058275058275058,
+            0.00116550116550117,
+            0.00174825174825175,
+            0.00291375291375291,
+            0.00407925407925408,
+            0.00641025641025641,
+            0.00815850815850816,
+            0.01107226107226107,
+            0.01398601398601398,
+            0.01806526806526806,
+            0.02156177156177156,
+            0.02680652680652679,
+            0.03030303030303030,
+            0.03554778554778554,
+            0.03962703962703962,
+            0.04428904428904428,
+            0.04720279720279720,
+            0.05128205128205127,
+            0.05244755244755244,
+            0.05477855477855477,
+            0.05477855477855477,
+            0.05477855477855477,
+            0.05244755244755243,
+            0.05128205128205127,
+            0.04720279720279720,
+            0.04428904428904428,
+            0.03962703962703962,
+            0.03554778554778555,
+            0.03030303030303030,
+            0.02680652680652681,
+            0.02156177156177156,
+            0.01806526806526806,
+            0.01398601398601399,
+            0.01107226107226107,
+            0.00815850815850816,
+            0.00641025641025641,
+            0.00407925407925408,
+            0.00291375291375291,
+            0.00174825174825175,
+            0.00116550116550117,
+            0.00058275058275058,
+            0.00058275058275058,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+        },
+    },
+    {
+        {
+            0.02777777777777778,
+            0.02777777777777778,
+            0.05555555555555555,
+            0.05555555555555556,
+            0.08333333333333334,
+            0.08333333333333334,
+            0.11111111111111113,
+            0.11111111111111113,
+            0.11111111111111110,
+            0.08333333333333333,
+            0.08333333333333334,
+            0.05555555555555555,
+            0.05555555555555555,
+            0.02777777777777777,
+            0.02777777777777777,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+        },
+        {
+            0.00833333333333333,
+            0.00833333333333333,
+            0.01666666666666667,
+            0.02500000000000000,
+            0.03333333333333333,
+            0.04166666666666666,
+            0.05833333333333333,
+            0.06666666666666667,
+            0.07500000000000000,
+            0.08333333333333333,
+            0.08333333333333333,
+            0.08333333333333331,
+            0.08333333333333331,
+            0.07499999999999998,
+            0.06666666666666665,
+            0.05833333333333331,
+            0.04166666666666666,
+            0.03333333333333333,
+            0.02499999999999999,
+            0.01666666666666666,
+            0.00833333333333333,
+            0.00833333333333333,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+        },
+        {
+            0.00303030303030303,
+            0.00303030303030303,
+            0.00606060606060606,
+            0.00909090909090909,
+            0.01515151515151515,
+            0.01818181818181818,
+            0.02727272727272727,
+            0.03333333333333333,
+            0.04242424242424242,
+            0.04848484848484848,
+            0.05757575757575757,
+            0.06060606060606059,
+            0.06969696969696970,
+            0.06969696969696969,
+            0.07272727272727272,
+            0.06969696969696967,
+            0.06969696969696967,
+            0.06060606060606059,
+            0.05757575757575756,
+            0.04848484848484847,
+            0.04242424242424242,
+            0.03333333333333333,
+            0.02727272727272727,
+            0.01818181818181818,
+            0.01515151515151515,
+            0.00909090909090909,
+            0.00606060606060606,
+            0.00303030303030303,
+            0.00303030303030303,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+        },
+        {
+            0.00126262626262626,
+            0.00126262626262626,
+            0.00252525252525253,
+            0.00378787878787879,
+            0.00631313131313131,
+            0.00883838383838384,
+            0.01262626262626263,
+            0.01641414141414142,
+            0.02146464646464646,
+            0.02651515151515152,
+            0.03282828282828282,
+            0.03787878787878787,
+            0.04419191919191920,
+            0.04924242424242424,
+            0.05429292929292930,
+            0.05808080808080807,
+            0.06060606060606059,
+            0.06186868686868686,
+            0.06186868686868686,
+            0.06060606060606059,
+            0.05808080808080808,
+            0.05429292929292929,
+            0.04924242424242424,
+            0.04419191919191919,
+            0.03787878787878787,
+            0.03282828282828283,
+            0.02651515151515151,
+            0.02146464646464646,
+            0.01641414141414141,
+            0.01262626262626262,
+            0.00883838383838384,
+            0.00631313131313131,
+            0.00378787878787879,
+            0.00252525252525253,
+            0.00126262626262626,
+            0.00126262626262626,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+        },
+        {
+            0.00058275058275058,
+            0.00058275058275058,
+            0.00116550116550117,
+            0.00174825174825175,
+            0.00291375291375291,
+            0.00407925407925408,
+            0.00641025641025641,
+            0.00815850815850816,
+            0.01107226107226107,
+            0.01398601398601399,
+            0.01806526806526806,
+            0.02156177156177156,
+            0.02680652680652681,
+            0.03030303030303030,
+            0.03554778554778555,
+            0.03962703962703962,
+            0.04428904428904428,
+            0.04720279720279720,
+            0.05128205128205127,
+            0.05244755244755243,
+            0.05477855477855477,
+            0.05477855477855477,
+            0.05477855477855477,
+            0.05244755244755244,
+            0.05128205128205127,
+            0.04720279720279720,
+            0.04428904428904428,
+            0.03962703962703962,
+            0.03554778554778554,
+            0.03030303030303030,
+            0.02680652680652679,
+            0.02156177156177156,
+            0.01806526806526806,
+            0.01398601398601398,
+            0.01107226107226107,
+            0.00815850815850816,
+            0.00641025641025641,
+            0.00407925407925408,
+            0.00291375291375291,
+            0.00174825174825175,
+            0.00116550116550117,
+            0.00058275058275058,
+            0.00058275058275058,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+            0.00000000000000000,
+        },
+        {
+            0.00029137529137529,
+            0.00029137529137529,
+            0.00058275058275058,
+            0.00087412587412587,
+            0.00145687645687646,
+            0.00203962703962704,
+            0.00320512820512821,
+            0.00437062937062937,
+            0.00582750582750583,
+            0.00757575757575758,
+            0.00990675990675991,
+            0.01223776223776224,
+            0.01544289044289044,
+            0.01835664335664336,
+            0.02185314685314686,
+            0.02534965034965035,
+            0.02913752913752913,
+            0.03263403263403263,
+            0.03642191142191141,
+            0.03962703962703962,
+            0.04254079254079253,
+            0.04516317016317015,
+            0.04720279720279719,
+            0.04836829836829836,
+            0.04924242424242423,
+            0.04924242424242423,
+            0.04836829836829836,
+            0.04720279720279719,
+            0.04516317016317015,
+            0.04254079254079253,
+            0.03962703962703962,
+            0.03642191142191141,
+            0.03263403263403263,
+            0.02913752913752913,
+            0.02534965034965035,
+            0.02185314685314686,
+            0.01835664335664336,
+            0.01544289044289044,
+            0.01223776223776224,
+            0.00990675990675991,
+            0.00757575757575758,
+            0.00582750582750583,
+            0.00437062937062937,
+            0.00320512820512821,
+            0.00203962703962704,
+            0.00145687645687646,
+            0.00087412587412587,
+            0.00058275058275058,
+            0.00029137529137529,
+            0.00029137529137529,
+        },
+    },
+};
diff --git a/bcftools/ploidy.c b/bcftools/ploidy.c
index 719e175..550ba87 100644
--- a/bcftools/ploidy.c
+++ b/bcftools/ploidy.c
@@ -1,4 +1,4 @@
-/*
+/* 
     Copyright (C) 2014-2016 Genome Research Ltd.
 
     Author: Petr Danecek <pd3 at sanger.ac.uk>
@@ -22,7 +22,6 @@
     THE SOFTWARE.
 */
 
-#include <htslib/regidx.h>
 #include <htslib/khash_str2int.h>
 #include <htslib/kseq.h>
 #include <htslib/hts.h>
@@ -35,6 +34,7 @@ struct _ploidy_t
     int dflt, min, max; // ploidy: default, min and max (only explicitly listed)
     int *sex2dflt;
     regidx_t *idx;
+    regitr_t *itr;
     void *sex2id;
     char **id2sex;
     kstring_t tmp_str;
@@ -52,7 +52,7 @@ regidx_t *ploidy_regions(ploidy_t *ploidy)
     return ploidy->idx;
 }
 
-int ploidy_parse(const char *line, char **chr_beg, char **chr_end, reg_t *reg, void *payload, void *usr)
+int ploidy_parse(const char *line, char **chr_beg, char **chr_end, uint32_t *beg, uint32_t *end, void *payload, void *usr)
 {
     int i, ret;
     ploidy_t *ploidy = (ploidy_t*) usr;
@@ -68,7 +68,7 @@ int ploidy_parse(const char *line, char **chr_beg, char **chr_end, reg_t *reg, v
     else
     {
         // Fill CHR,FROM,TO
-        ret = regidx_parse_tab(line,chr_beg,chr_end,reg,NULL,NULL);
+        ret = regidx_parse_tab(line,chr_beg,chr_end,beg,end,NULL,NULL);
         if ( ret!=0 ) return ret;
     }
 
@@ -144,6 +144,7 @@ ploidy_t *ploidy_init(const char *fname, int dflt)
         ploidy_destroy(pld);
         return NULL;
     }
+    pld->itr = regitr_init(pld->idx);
     _set_defaults(pld,dflt);
     return pld;
 }
@@ -156,6 +157,7 @@ ploidy_t *ploidy_init_string(const char *str, int dflt)
     pld->min = pld->max = -1;
     pld->sex2id = khash_str2int_init();
     pld->idx = regidx_init(NULL,ploidy_parse,NULL,sizeof(sex_ploidy_t),pld);
+    pld->itr = regitr_init(pld->idx);
 
     kstring_t tmp = {0,0,0};
     const char *ss = str;
@@ -170,7 +172,6 @@ ploidy_t *ploidy_init_string(const char *str, int dflt)
         while ( *se && isspace(*se) ) se++;
         ss = se;
     }
-    regidx_insert(pld->idx,NULL);
     free(tmp.s);
 
     _set_defaults(pld,dflt);
@@ -180,6 +181,7 @@ ploidy_t *ploidy_init_string(const char *str, int dflt)
 void ploidy_destroy(ploidy_t *ploidy)
 {
     if ( ploidy->sex2id ) khash_str2int_destroy_free(ploidy->sex2id);
+    if ( ploidy->itr ) regitr_destroy(ploidy->itr);
     if ( ploidy->idx ) regidx_destroy(ploidy->idx);
     free(ploidy->id2sex);
     free(ploidy->tmp_str.s);
@@ -189,8 +191,7 @@ void ploidy_destroy(ploidy_t *ploidy)
 
 int ploidy_query(ploidy_t *ploidy, char *seq, int pos, int *sex2ploidy, int *min, int *max)
 {
-    regitr_t itr;
-    int i, ret = regidx_overlap(ploidy->idx, seq,pos,pos, &itr);
+    int i, ret = regidx_overlap(ploidy->idx, seq,pos,pos, ploidy->itr);
 
     if ( !sex2ploidy && !min && !max ) return ret;
 
@@ -207,17 +208,16 @@ int ploidy_query(ploidy_t *ploidy, char *seq, int pos, int *sex2ploidy, int *min
     int _min = INT_MAX, _max = -1;
     if ( sex2ploidy ) for (i=0; i<ploidy->nsex; i++) sex2ploidy[i] = ploidy->dflt;
 
-    while ( REGITR_OVERLAP(itr,pos,pos) )
+    while ( regitr_overlap(ploidy->itr) )
     {
-        int sex = REGITR_PAYLOAD(itr,sex_ploidy_t).sex;
-        int pld = REGITR_PAYLOAD(itr,sex_ploidy_t).ploidy;
+        int sex = regitr_payload(ploidy->itr,sex_ploidy_t).sex;
+        int pld = regitr_payload(ploidy->itr,sex_ploidy_t).ploidy;
         if ( pld!=ploidy->dflt ) 
         {
             if ( sex2ploidy ) sex2ploidy[ sex ] = pld;
             if ( _min > pld ) _min = pld;
             if ( _max < pld ) _max = pld;
         }
-        itr.i++;
     }
     if ( _max==-1 ) _max = _min = ploidy->dflt;
     if ( max ) *max = _max;
diff --git a/bcftools/ploidy.c.pysam.c b/bcftools/ploidy.c.pysam.c
index d0468b9..2eb9bd8 100644
--- a/bcftools/ploidy.c.pysam.c
+++ b/bcftools/ploidy.c.pysam.c
@@ -1,6 +1,6 @@
 #include "pysam.h"
 
-/*
+/* 
     Copyright (C) 2014-2016 Genome Research Ltd.
 
     Author: Petr Danecek <pd3 at sanger.ac.uk>
@@ -24,7 +24,6 @@
     THE SOFTWARE.
 */
 
-#include <htslib/regidx.h>
 #include <htslib/khash_str2int.h>
 #include <htslib/kseq.h>
 #include <htslib/hts.h>
@@ -37,6 +36,7 @@ struct _ploidy_t
     int dflt, min, max; // ploidy: default, min and max (only explicitly listed)
     int *sex2dflt;
     regidx_t *idx;
+    regitr_t *itr;
     void *sex2id;
     char **id2sex;
     kstring_t tmp_str;
@@ -54,7 +54,7 @@ regidx_t *ploidy_regions(ploidy_t *ploidy)
     return ploidy->idx;
 }
 
-int ploidy_parse(const char *line, char **chr_beg, char **chr_end, reg_t *reg, void *payload, void *usr)
+int ploidy_parse(const char *line, char **chr_beg, char **chr_end, uint32_t *beg, uint32_t *end, void *payload, void *usr)
 {
     int i, ret;
     ploidy_t *ploidy = (ploidy_t*) usr;
@@ -70,7 +70,7 @@ int ploidy_parse(const char *line, char **chr_beg, char **chr_end, reg_t *reg, v
     else
     {
         // Fill CHR,FROM,TO
-        ret = regidx_parse_tab(line,chr_beg,chr_end,reg,NULL,NULL);
+        ret = regidx_parse_tab(line,chr_beg,chr_end,beg,end,NULL,NULL);
         if ( ret!=0 ) return ret;
     }
 
@@ -146,6 +146,7 @@ ploidy_t *ploidy_init(const char *fname, int dflt)
         ploidy_destroy(pld);
         return NULL;
     }
+    pld->itr = regitr_init(pld->idx);
     _set_defaults(pld,dflt);
     return pld;
 }
@@ -158,6 +159,7 @@ ploidy_t *ploidy_init_string(const char *str, int dflt)
     pld->min = pld->max = -1;
     pld->sex2id = khash_str2int_init();
     pld->idx = regidx_init(NULL,ploidy_parse,NULL,sizeof(sex_ploidy_t),pld);
+    pld->itr = regitr_init(pld->idx);
 
     kstring_t tmp = {0,0,0};
     const char *ss = str;
@@ -172,7 +174,6 @@ ploidy_t *ploidy_init_string(const char *str, int dflt)
         while ( *se && isspace(*se) ) se++;
         ss = se;
     }
-    regidx_insert(pld->idx,NULL);
     free(tmp.s);
 
     _set_defaults(pld,dflt);
@@ -182,6 +183,7 @@ ploidy_t *ploidy_init_string(const char *str, int dflt)
 void ploidy_destroy(ploidy_t *ploidy)
 {
     if ( ploidy->sex2id ) khash_str2int_destroy_free(ploidy->sex2id);
+    if ( ploidy->itr ) regitr_destroy(ploidy->itr);
     if ( ploidy->idx ) regidx_destroy(ploidy->idx);
     free(ploidy->id2sex);
     free(ploidy->tmp_str.s);
@@ -191,8 +193,7 @@ void ploidy_destroy(ploidy_t *ploidy)
 
 int ploidy_query(ploidy_t *ploidy, char *seq, int pos, int *sex2ploidy, int *min, int *max)
 {
-    regitr_t itr;
-    int i, ret = regidx_overlap(ploidy->idx, seq,pos,pos, &itr);
+    int i, ret = regidx_overlap(ploidy->idx, seq,pos,pos, ploidy->itr);
 
     if ( !sex2ploidy && !min && !max ) return ret;
 
@@ -209,17 +210,16 @@ int ploidy_query(ploidy_t *ploidy, char *seq, int pos, int *sex2ploidy, int *min
     int _min = INT_MAX, _max = -1;
     if ( sex2ploidy ) for (i=0; i<ploidy->nsex; i++) sex2ploidy[i] = ploidy->dflt;
 
-    while ( REGITR_OVERLAP(itr,pos,pos) )
+    while ( regitr_overlap(ploidy->itr) )
     {
-        int sex = REGITR_PAYLOAD(itr,sex_ploidy_t).sex;
-        int pld = REGITR_PAYLOAD(itr,sex_ploidy_t).ploidy;
+        int sex = regitr_payload(ploidy->itr,sex_ploidy_t).sex;
+        int pld = regitr_payload(ploidy->itr,sex_ploidy_t).ploidy;
         if ( pld!=ploidy->dflt ) 
         {
             if ( sex2ploidy ) sex2ploidy[ sex ] = pld;
             if ( _min > pld ) _min = pld;
             if ( _max < pld ) _max = pld;
         }
-        itr.i++;
     }
     if ( _max==-1 ) _max = _min = ploidy->dflt;
     if ( max ) *max = _max;
diff --git a/bcftools/ploidy.h b/bcftools/ploidy.h
index 6deef73..1e7d2f7 100644
--- a/bcftools/ploidy.h
+++ b/bcftools/ploidy.h
@@ -55,7 +55,7 @@
 #ifndef __PLOIDY_H__
 #define __PLOIDY_H__
 
-#include <htslib/regidx.h>
+#include "regidx.h"
 
 typedef struct _ploidy_t ploidy_t;
 
diff --git a/bcftools/prob1.c b/bcftools/prob1.c
index 8f4463f..954d43c 100644
--- a/bcftools/prob1.c
+++ b/bcftools/prob1.c
@@ -157,8 +157,9 @@ int test16(bcf1_t *b, anno16_t *a);
 static int cal_pdg(const bcf1_t *b, bcf_p1aux_t *ma)
 {
     int i, j;
-    long *p, tmp;
-    p = (long*) alloca(b->n_allele * sizeof(long));
+    long p_a[16], *p=p_a, tmp;
+    if (b->n_allele > 16)
+        p = (long*) malloc(b->n_allele * sizeof(long));
     memset(p, 0, sizeof(long) * b->n_allele);
 
     // Set P(D|g) for each sample and sum phread likelihoods across all samples to create lk
@@ -177,12 +178,14 @@ static int cal_pdg(const bcf1_t *b, bcf_p1aux_t *ma)
             tmp = p[j], p[j] = p[j-1], p[j-1] = tmp;
     for (i = b->n_allele - 1; i >= 0; --i)
         if ((p[i]&0xf) == 0) break;
+    if (p != p_a)
+        free(p);
     return i;
 }
 
 
-/* f0 is minor allele fraction */
-int bcf_p1_call_gt(const bcf_p1aux_t *ma, double f0, int k)
+/* f0 is freq of the ref allele */
+int bcf_p1_call_gt(const bcf_p1aux_t *ma, double f0, int k, int is_var)
 {
     double sum, g[3];
     double max, f3[3], *pdg = ma->pdg + k * 3;
@@ -203,6 +206,7 @@ int bcf_p1_call_gt(const bcf_p1aux_t *ma, double f0, int k)
         g[i] /= sum;
         if (g[i] > max) max = g[i], max_i = i;
     }
+    if ( !is_var ) { max_i = 2; max = g[2]; }   // force 0/0 genotype if the site is non-variant
     max = 1. - max;
     if (max < 1e-308) max = 1e-308;
     q = (int)(-4.343 * log(max) + .499);
diff --git a/bcftools/prob1.c.pysam.c b/bcftools/prob1.c.pysam.c
index a59ec44..f4f4271 100644
--- a/bcftools/prob1.c.pysam.c
+++ b/bcftools/prob1.c.pysam.c
@@ -159,8 +159,9 @@ int test16(bcf1_t *b, anno16_t *a);
 static int cal_pdg(const bcf1_t *b, bcf_p1aux_t *ma)
 {
     int i, j;
-    long *p, tmp;
-    p = (long*) alloca(b->n_allele * sizeof(long));
+    long p_a[16], *p=p_a, tmp;
+    if (b->n_allele > 16)
+        p = (long*) malloc(b->n_allele * sizeof(long));
     memset(p, 0, sizeof(long) * b->n_allele);
 
     // Set P(D|g) for each sample and sum phread likelihoods across all samples to create lk
@@ -179,12 +180,14 @@ static int cal_pdg(const bcf1_t *b, bcf_p1aux_t *ma)
             tmp = p[j], p[j] = p[j-1], p[j-1] = tmp;
     for (i = b->n_allele - 1; i >= 0; --i)
         if ((p[i]&0xf) == 0) break;
+    if (p != p_a)
+        free(p);
     return i;
 }
 
 
-/* f0 is minor allele fraction */
-int bcf_p1_call_gt(const bcf_p1aux_t *ma, double f0, int k)
+/* f0 is freq of the ref allele */
+int bcf_p1_call_gt(const bcf_p1aux_t *ma, double f0, int k, int is_var)
 {
     double sum, g[3];
     double max, f3[3], *pdg = ma->pdg + k * 3;
@@ -205,6 +208,7 @@ int bcf_p1_call_gt(const bcf_p1aux_t *ma, double f0, int k)
         g[i] /= sum;
         if (g[i] > max) max = g[i], max_i = i;
     }
+    if ( !is_var ) { max_i = 2; max = g[2]; }   // force 0/0 genotype if the site is non-variant
     max = 1. - max;
     if (max < 1e-308) max = 1e-308;
     q = (int)(-4.343 * log(max) + .499);
diff --git a/bcftools/prob1.h b/bcftools/prob1.h
index 1594d3f..a3d4b0d 100644
--- a/bcftools/prob1.h
+++ b/bcftools/prob1.h
@@ -78,7 +78,7 @@ extern "C" {
     void bcf_p1_destroy(bcf_p1aux_t *ma);
     void bcf_p1_set_ploidy(bcf1_t *b, bcf_p1aux_t *ma);
     int bcf_p1_cal(call_t *call, bcf1_t *b, int do_contrast, bcf_p1aux_t *ma, bcf_p1rst_t *rst);
-    int bcf_p1_call_gt(const bcf_p1aux_t *ma, double f0, int k);
+    int bcf_p1_call_gt(const bcf_p1aux_t *ma, double f0, int k, int is_var);
     void bcf_p1_dump_afs(bcf_p1aux_t *ma);
     int bcf_p1_read_prior(bcf_p1aux_t *ma, const char *fn);
     int bcf_p1_set_n1(bcf_p1aux_t *b, int n1);
diff --git a/bcftools/regidx.c b/bcftools/regidx.c
new file mode 100644
index 0000000..84646a8
--- /dev/null
+++ b/bcftools/regidx.c
@@ -0,0 +1,598 @@
+/* 
+    Copyright (C) 2014-2016 Genome Research Ltd.
+
+    Author: Petr Danecek <pd3 at sanger.ac.uk>
+
+    Permission is hereby granted, free of charge, to any person obtaining a copy
+    of this software and associated documentation files (the "Software"), to deal
+    in the Software without restriction, including without limitation the rights
+    to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+    copies of the Software, and to permit persons to whom the Software is
+    furnished to do so, subject to the following conditions:
+    
+    The above copyright notice and this permission notice shall be included in
+    all copies or substantial portions of the Software.
+    
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+    AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+    OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+    THE SOFTWARE.
+*/
+
+#include <strings.h>
+#include <htslib/hts.h>
+#include <htslib/kstring.h>
+#include <htslib/kseq.h>
+#include <htslib/khash_str2int.h>
+#include "regidx.h"
+
+#define MAX_COOR_0 REGIDX_MAX   // CSI and hts_itr_query limit, 0-based
+
+#define iBIN(x) ((x)>>13)
+
+typedef struct
+{
+    uint32_t beg, end;
+}
+reg_t;
+
+typedef struct
+{
+    uint32_t pos, ireg;     // y-coordinate and a pointer to reglist.reg and reglist.dat
+}
+pos_t;
+
+typedef struct _reglist_t reglist_t;
+
+typedef struct
+{
+    uint32_t beg, end, ireg;      // query coordinates and the active region
+    regidx_t *ridx;
+    reglist_t *list;
+    int active;
+}
+_itr_t;
+
+// List of regions for one chromosome.
+struct _reglist_t
+{
+    uint32_t *idx, nidx;    // index to list.reg+1
+    uint32_t nreg, mreg;    // n:used, m:allocated
+    reg_t *reg;             // regions
+    void *dat;              // payload data
+    char *seq;              // sequence name
+    int unsorted;
+
+};
+
+// Container of all sequences
+struct _regidx_t
+{
+    int nseq, mseq;         // n:used, m:alloced
+    reglist_t *seq;         // regions for each sequence
+    void *seq2regs;         // hash for fast lookup from chr name to regions
+    char **seq_names;
+    regidx_free_f free;     // function to free any data allocated by regidx_parse_f
+    regidx_parse_f parse;   // parse one input line
+    void *usr;              // user data to pass to regidx_parse_f
+    int payload_size;
+    void *payload;          // temporary payload data set by regidx_parse_f (sequence is not known beforehand)
+    kstring_t str;
+};
+
+int regidx_seq_nregs(regidx_t *idx, const char *seq)
+{
+    int iseq;
+    if ( khash_str2int_get(idx->seq2regs, seq, &iseq)!=0 ) return 0; // no such sequence
+    return idx->seq[iseq].nreg;
+}
+
+int regidx_nregs(regidx_t *idx)
+{
+    int i, nreg = 0;
+    for (i=0; i<idx->nseq; i++) nreg += idx->seq[i].nreg;
+    return nreg;
+}
+
+char **regidx_seq_names(regidx_t *idx, int *n)
+{
+    *n = idx->nseq;
+    return idx->seq_names;
+}
+
+int regidx_insert_list(regidx_t *idx, char *line, char delim)
+{
+    kstring_t tmp = {0,0,0};
+    char *ss = line;
+    while ( *ss )
+    {
+        char *se = ss;
+        while ( *se && *se!=delim ) se++;
+        tmp.l = 0;
+        kputsn(ss, se-ss, &tmp);
+        if ( regidx_insert(idx,tmp.s) < 0 )
+        {
+            free(tmp.s);
+            return -1;
+        }
+        if ( !*se ) break;
+        ss = se+1;
+    }
+    free(tmp.s);
+    return 0;
+}
+
+static inline int cmp_regs(reg_t *a, reg_t *b)
+{
+    if ( a->beg < b->beg ) return -1;
+    if ( a->beg > b->beg ) return 1;
+    if ( a->end < b->end ) return 1;    // longer intervals come first
+    if ( a->end > b->end ) return -1;
+    return 0;
+}
+static int cmp_reg_ptrs(const void *a, const void *b)
+{
+    return cmp_regs((reg_t*)a,(reg_t*)b);
+}
+static int cmp_reg_ptrs2(const void *a, const void *b)
+{
+    return cmp_regs(*((reg_t**)a),*((reg_t**)b));
+}
+
+inline int regidx_push(regidx_t *idx, char *chr_beg, char *chr_end, uint32_t beg, uint32_t end, void *payload)
+{
+    if ( beg > MAX_COOR_0 ) beg = MAX_COOR_0;
+    if ( end > MAX_COOR_0 ) end = MAX_COOR_0;
+
+    int rid;
+    idx->str.l = 0;
+    kputsn(chr_beg, chr_end-chr_beg+1, &idx->str);
+    if ( khash_str2int_get(idx->seq2regs, idx->str.s, &rid)!=0 )
+    {
+        // new chromosome
+        idx->nseq++;
+        int m_prev = idx->mseq;
+        hts_expand0(reglist_t,idx->nseq,idx->mseq,idx->seq);
+        hts_expand0(char*,idx->nseq,m_prev,idx->seq_names);
+        idx->seq_names[idx->nseq-1] = strdup(idx->str.s);
+        rid = khash_str2int_inc(idx->seq2regs, idx->seq_names[idx->nseq-1]);
+    }
+
+    reglist_t *list = &idx->seq[rid];
+    list->seq = idx->seq_names[rid];
+    list->nreg++;
+    int mreg = list->mreg;
+    hts_expand(reg_t,list->nreg,list->mreg,list->reg);
+    list->reg[list->nreg-1].beg = beg;
+    list->reg[list->nreg-1].end = end;
+    if ( idx->payload_size )
+    {
+        if ( mreg != list->mreg ) list->dat = realloc(list->dat,idx->payload_size*list->mreg);
+        memcpy((char *)list->dat + idx->payload_size*(list->nreg-1), payload, idx->payload_size);
+    }
+    if ( !list->unsorted && list->nreg>1 && cmp_regs(&list->reg[list->nreg-2],&list->reg[list->nreg-1])>0 ) list->unsorted = 1;
+    return 0;
+}
+
+int regidx_insert(regidx_t *idx, char *line)
+{
+    if ( !line ) return 0;
+    char *chr_from, *chr_to;
+    uint32_t beg,end;
+    int ret = idx->parse(line,&chr_from,&chr_to,&beg,&end,idx->payload,idx->usr);
+    if ( ret==-2 ) return -1;   // error
+    if ( ret==-1 ) return 0;    // skip the line
+    regidx_push(idx, chr_from,chr_to,beg,end,idx->payload);
+    return 0;
+}
+
+regidx_t *regidx_init(const char *fname, regidx_parse_f parser, regidx_free_f free_f, size_t payload_size, void *usr_dat)
+{
+    if ( !parser )
+    {
+        if ( !fname ) parser = regidx_parse_tab;
+        else
+        {
+            int len = strlen(fname);
+            if ( len>=7 && !strcasecmp(".bed.gz",fname+len-7) )
+                parser = regidx_parse_bed;
+            else if ( len>=8 && !strcasecmp(".bed.bgz",fname+len-8) )
+                parser = regidx_parse_bed;
+            else if ( len>=4 && !strcasecmp(".bed",fname+len-4) )
+                parser = regidx_parse_bed;
+            else
+                parser = regidx_parse_tab;
+        }
+    }
+
+    regidx_t *idx = (regidx_t*) calloc(1,sizeof(regidx_t));
+    idx->free  = free_f;
+    idx->parse = parser;
+    idx->usr   = usr_dat;
+    idx->seq2regs = khash_str2int_init();
+    idx->payload_size = payload_size;
+    if ( payload_size ) idx->payload = malloc(payload_size);
+
+    if ( !fname ) return idx;
+    
+    kstring_t str = {0,0,0};
+
+    htsFile *fp = hts_open(fname,"r");
+    if ( !fp ) goto error;
+
+    while ( hts_getline(fp, KS_SEP_LINE, &str) > 0 )
+    {
+        if ( regidx_insert(idx, str.s) ) goto error;
+    }
+
+    free(str.s);
+    hts_close(fp);
+    return idx;
+
+error:
+    free(str.s);
+    if ( fp ) hts_close(fp);
+    regidx_destroy(idx);
+    return NULL;
+}
+
+void regidx_destroy(regidx_t *idx)
+{
+    int i, j;
+    for (i=0; i<idx->nseq; i++)
+    {
+        reglist_t *list = &idx->seq[i];
+        if ( idx->free )
+        {
+            for (j=0; j<list->nreg; j++)
+                idx->free((char *)list->dat + idx->payload_size*j);
+        }
+        free(list->dat);
+        free(list->reg);
+        free(list->idx);
+    }
+    free(idx->seq_names);
+    free(idx->seq);
+    free(idx->str.s);
+    free(idx->payload);
+    khash_str2int_destroy_free(idx->seq2regs);
+    free(idx);
+}
+
+int _reglist_build_index(regidx_t *regidx, reglist_t *list)
+{
+    int i;
+    if ( list->unsorted )
+    {
+        if ( !regidx->payload_size )
+            qsort(list->reg,list->nreg,sizeof(reg_t),cmp_reg_ptrs);
+        else
+        {
+            reg_t **ptr = (reg_t**) malloc(sizeof(reg_t*)*list->nreg);
+            for (i=0; i<list->nreg; i++) ptr[i] = list->reg + i;
+            qsort(ptr,list->nreg,sizeof(*ptr),cmp_reg_ptrs2);
+
+            void *tmp_dat = malloc(regidx->payload_size*list->nreg);
+            for (i=0; i<list->nreg; i++)
+            {
+                size_t iori = ptr[i] - list->reg;
+                memcpy((char *)tmp_dat+i*regidx->payload_size,
+                       (char *)list->dat+iori*regidx->payload_size,
+                       regidx->payload_size);
+            }
+            free(list->dat);
+            list->dat = tmp_dat;
+
+            reg_t *tmp_reg = (reg_t*) malloc(sizeof(reg_t)*list->nreg);
+            for (i=0; i<list->nreg; i++)
+            {
+                size_t iori = ptr[i] - list->reg;
+                tmp_reg[i] = list->reg[iori];
+            }
+            free(ptr);
+            free(list->reg);
+            list->reg  = tmp_reg;
+            list->mreg = list->nreg;
+        }
+        list->unsorted = 0;
+    }
+
+    list->nidx = 0;
+    int j,k, midx = 0;
+    for (j=0; j<list->nreg; j++)
+    {
+        int ibeg = iBIN(list->reg[j].beg);
+        int iend = iBIN(list->reg[j].end);
+        if ( midx <= iend )
+        {
+            int old_midx = midx; 
+            midx = iend + 1;
+            kroundup32(midx);
+            list->idx = (uint32_t*) realloc(list->idx, midx*sizeof(uint32_t));
+            memset(list->idx+old_midx, 0, sizeof(uint32_t)*(midx-old_midx));
+        }
+        if ( ibeg==iend )
+        {
+            if ( !list->idx[ibeg] ) list->idx[ibeg] = j + 1;
+        }
+        else
+        {
+            for (k=ibeg; k<=iend; k++)
+                if ( !list->idx[k] ) list->idx[k] = j + 1;
+        }
+        if ( list->nidx < iend+1 ) list->nidx = iend+1;
+    }
+
+    return 0;
+}
+
+int regidx_overlap(regidx_t *regidx, const char *chr, uint32_t beg, uint32_t end, regitr_t *regitr)
+{
+    if ( regitr ) regitr->seq = NULL;
+
+    int iseq, ireg;
+    if ( khash_str2int_get(regidx->seq2regs, chr, &iseq)!=0 ) return 0;    // no such sequence
+
+    reglist_t *list = &regidx->seq[iseq];
+    if ( !list->nreg ) return 0;
+
+    if ( list->nreg==1 )
+    {
+        if ( beg > list->reg[0].end ) return 0;
+        if ( end < list->reg[0].beg ) return 0;
+        ireg = 0;
+    }
+    else
+    {
+        if ( !list->idx )
+            _reglist_build_index(regidx,list);
+
+        int ibeg = iBIN(beg);
+        if ( ibeg >= list->nidx ) return 0;     // beg is too big
+
+        // find a matching region
+        uint32_t i = list->idx[ibeg];
+        if ( !i )
+        {
+            int iend = iBIN(end);
+            if ( iend > list->nidx ) iend = list->nidx;
+            for (i=ibeg; i<iend; i++)
+                if ( list->idx[i] ) break;
+            if ( i==iend ) return 0;
+            i = list->idx[i];
+        }
+
+        for (ireg=i-1; ireg<list->nreg; ireg++)
+        {
+            if ( list->reg[ireg].beg > end ) return 0;   // no match, past the query region
+            if ( list->reg[ireg].end >= beg && list->reg[ireg].beg <= end ) break; // found
+        }
+
+        if ( ireg >= list->nreg ) return 0;   // no match
+    }
+
+    if ( !regitr ) return 1;    // match, but no more info to save
+
+    // may need to iterate over the matching regions later
+    _itr_t *itr = (_itr_t*)regitr->itr;
+    itr->ridx = regidx;
+    itr->list = list;
+    itr->beg  = beg;
+    itr->end  = end;
+    itr->ireg = ireg;
+    itr->active = 0;
+
+    regitr->seq = list->seq;
+    regitr->beg = list->reg[ireg].beg;
+    regitr->end = list->reg[ireg].end;
+    if ( regidx->payload_size )
+        regitr->payload = (char *)list->dat + regidx->payload_size*ireg;
+
+    return 1;
+}
+
+int regidx_parse_bed(const char *line, char **chr_beg, char **chr_end, uint32_t *beg, uint32_t *end, void *payload, void *usr)
+{
+    char *ss = (char*) line;
+    while ( *ss && isspace(*ss) ) ss++;
+    if ( !*ss ) return -1;      // skip blank lines
+    if ( *ss=='#' ) return -1;  // skip comments
+    
+    char *se = ss;
+    while ( *se && !isspace(*se) ) se++;
+
+    *chr_beg = ss;
+    *chr_end = se-1;
+
+    if ( !*se )
+    {
+        // just the chromosome name
+        *beg = 0;
+        *end = MAX_COOR_0;
+        return 0;
+    }
+
+    ss = se+1;
+    *beg = strtod(ss, &se);
+    if ( ss==se ) { fprintf(stderr,"Could not parse bed line: %s\n", line); return -2; }
+
+    ss = se+1;
+    *end = strtod(ss, &se) - 1;
+    if ( ss==se ) { fprintf(stderr,"Could not parse bed line: %s\n", line); return -2; }
+    
+    return 0;
+}
+
+int regidx_parse_tab(const char *line, char **chr_beg, char **chr_end, uint32_t *beg, uint32_t *end, void *payload, void *usr)
+{
+    char *ss = (char*) line;
+    while ( *ss && isspace(*ss) ) ss++;
+    if ( !*ss ) return -1;      // skip blank lines
+    if ( *ss=='#' ) return -1;  // skip comments
+    
+    char *se = ss;
+    while ( *se && !isspace(*se) ) se++;
+
+    *chr_beg = ss;
+    *chr_end = se-1;
+
+    if ( !*se )
+    {
+        // just the chromosome name
+        *beg = 0;
+        *end = MAX_COOR_0;
+        return 0;
+    }
+
+    ss = se+1;
+    *beg = strtod(ss, &se);
+    if ( ss==se ) { fprintf(stderr,"Could not parse tab line: %s\n", line); return -2; }
+    if ( *beg==0 ) { fprintf(stderr,"Could not parse tab line, expected 1-based coordinate: %s\n", line); return -2; }
+    (*beg)--;
+
+    if ( !se[0] || !se[1] )
+        *end = *beg;
+    else
+    {
+        ss = se+1;
+        *end = strtod(ss, &se);
+        if ( ss==se ) *end = *beg;
+        else if ( *end==0 ) { fprintf(stderr,"Could not parse tab line, expected 1-based coordinate: %s\n", line); return -2; }
+        else (*end)--;
+    }
+    return 0;
+}
+
+int regidx_parse_reg(const char *line, char **chr_beg, char **chr_end, uint32_t *beg, uint32_t *end, void *payload, void *usr)
+{
+    char *ss = (char*) line;
+    while ( *ss && isspace(*ss) ) ss++;
+    if ( !*ss ) return -1;      // skip blank lines
+    if ( *ss=='#' ) return -1;  // skip comments
+    
+    char *se = ss;
+    while ( *se && *se!=':' ) se++;
+
+    *chr_beg = ss;
+    *chr_end = se-1;
+
+    if ( !*se )
+    {
+        *beg = 0;
+        *end = MAX_COOR_0;
+        return 0;
+    }
+
+    ss = se+1;
+    *beg = strtod(ss, &se);
+    if ( ss==se ) { fprintf(stderr,"Could not parse reg line: %s\n", line); return -2; }
+    if ( *beg==0 ) { fprintf(stderr,"Could not parse reg line, expected 1-based coordinate: %s\n", line); return -2; }
+    (*beg)--;
+
+    if ( !se[0] || !se[1] )
+        *end = se[0]=='-' ? MAX_COOR_0 : *beg;
+    else
+    {
+        ss = se+1;
+        *end = strtod(ss, &se);
+        if ( ss==se ) *end = *beg;
+        else if ( *end==0 ) { fprintf(stderr,"Could not parse reg line, expected 1-based coordinate: %s\n", line); return -2; }
+        else (*end)--;
+    }
+    return 0;
+}
+
+regitr_t *regitr_init(regidx_t *regidx)
+{
+    regitr_t *regitr = (regitr_t*) calloc(1,sizeof(regitr_t));
+    regitr->itr  = (_itr_t*) calloc(1,sizeof(_itr_t));
+    _itr_t *itr = (_itr_t*) regitr->itr;
+    itr->ridx = regidx;
+    itr->list = NULL;
+    return regitr;
+}
+
+void regitr_reset(regidx_t *regidx, regitr_t *regitr)
+{
+    _itr_t *itr = (_itr_t*) regitr->itr;
+    memset(itr,0,sizeof(_itr_t));
+    itr->ridx = regidx;
+}
+
+void regitr_destroy(regitr_t *regitr)
+{
+    free(regitr->itr);
+    free(regitr);
+}
+
+int regitr_overlap(regitr_t *regitr)
+{
+    if ( !regitr->seq ) return 0;
+
+    _itr_t *itr = (_itr_t*) regitr->itr;
+    if ( !itr->active )
+    {
+        // is this the first call after regidx_overlap?
+        itr->active = 1;
+        itr->ireg++;
+        return 1;
+    }
+
+    reglist_t *list = itr->list;
+
+    int i;
+    for (i=itr->ireg; i<list->nreg; i++)
+    {
+        if ( list->reg[i].beg > itr->end ) return 0;   // no match, past the query region
+        if ( list->reg[i].end >= itr->beg && list->reg[i].beg <= itr->end ) break; // found
+    }
+
+    if ( i >= list->nreg ) return 0;   // no match
+
+    itr->ireg = i + 1;
+    regitr->seq = list->seq;
+    regitr->beg = list->reg[i].beg;
+    regitr->end = list->reg[i].end;
+    if ( itr->ridx->payload_size )
+        regitr->payload = (char *)list->dat + itr->ridx->payload_size*i;
+
+    return 1;
+}
+
+int regitr_loop(regitr_t *regitr)
+{
+    _itr_t *itr = (_itr_t*) regitr->itr;
+    regidx_t *regidx = itr->ridx;
+
+    if ( !itr->list )    // first time here
+    {
+        itr->list = regidx->seq;
+        itr->ireg = 0;
+    }
+
+    size_t iseq = itr->list - regidx->seq;
+    if ( iseq >= regidx->nseq ) return 0;
+
+    if ( itr->ireg >= itr->list->nreg )
+    {
+        iseq++;
+        if ( iseq >= regidx->nseq ) return 0; // no more sequences, done
+        itr->ireg = 0;
+        itr->list = &regidx->seq[iseq];
+    }
+
+    regitr->seq = itr->list->seq;
+    regitr->beg = itr->list->reg[itr->ireg].beg;
+    regitr->end = itr->list->reg[itr->ireg].end;
+    if ( regidx->payload_size )
+        regitr->payload = (char *)itr->list->dat + regidx->payload_size*itr->ireg;
+    itr->ireg++;
+
+    return 1;
+}
+
+
+
diff --git a/bcftools/regidx.c.pysam.c b/bcftools/regidx.c.pysam.c
new file mode 100644
index 0000000..4d6dcda
--- /dev/null
+++ b/bcftools/regidx.c.pysam.c
@@ -0,0 +1,600 @@
+#include "pysam.h"
+
+/* 
+    Copyright (C) 2014-2016 Genome Research Ltd.
+
+    Author: Petr Danecek <pd3 at sanger.ac.uk>
+
+    Permission is hereby granted, free of charge, to any person obtaining a copy
+    of this software and associated documentation files (the "Software"), to deal
+    in the Software without restriction, including without limitation the rights
+    to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+    copies of the Software, and to permit persons to whom the Software is
+    furnished to do so, subject to the following conditions:
+    
+    The above copyright notice and this permission notice shall be included in
+    all copies or substantial portions of the Software.
+    
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+    AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+    OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+    THE SOFTWARE.
+*/
+
+#include <strings.h>
+#include <htslib/hts.h>
+#include <htslib/kstring.h>
+#include <htslib/kseq.h>
+#include <htslib/khash_str2int.h>
+#include "regidx.h"
+
+#define MAX_COOR_0 REGIDX_MAX   // CSI and hts_itr_query limit, 0-based
+
+#define iBIN(x) ((x)>>13)
+
+typedef struct
+{
+    uint32_t beg, end;
+}
+reg_t;
+
+typedef struct
+{
+    uint32_t pos, ireg;     // y-coordinate and a pointer to reglist.reg and reglist.dat
+}
+pos_t;
+
+typedef struct _reglist_t reglist_t;
+
+typedef struct
+{
+    uint32_t beg, end, ireg;      // query coordinates and the active region
+    regidx_t *ridx;
+    reglist_t *list;
+    int active;
+}
+_itr_t;
+
+// List of regions for one chromosome.
+struct _reglist_t
+{
+    uint32_t *idx, nidx;    // index to list.reg+1
+    uint32_t nreg, mreg;    // n:used, m:allocated
+    reg_t *reg;             // regions
+    void *dat;              // payload data
+    char *seq;              // sequence name
+    int unsorted;
+
+};
+
+// Container of all sequences
+struct _regidx_t
+{
+    int nseq, mseq;         // n:used, m:alloced
+    reglist_t *seq;         // regions for each sequence
+    void *seq2regs;         // hash for fast lookup from chr name to regions
+    char **seq_names;
+    regidx_free_f free;     // function to free any data allocated by regidx_parse_f
+    regidx_parse_f parse;   // parse one input line
+    void *usr;              // user data to pass to regidx_parse_f
+    int payload_size;
+    void *payload;          // temporary payload data set by regidx_parse_f (sequence is not known beforehand)
+    kstring_t str;
+};
+
+int regidx_seq_nregs(regidx_t *idx, const char *seq)
+{
+    int iseq;
+    if ( khash_str2int_get(idx->seq2regs, seq, &iseq)!=0 ) return 0; // no such sequence
+    return idx->seq[iseq].nreg;
+}
+
+int regidx_nregs(regidx_t *idx)
+{
+    int i, nreg = 0;
+    for (i=0; i<idx->nseq; i++) nreg += idx->seq[i].nreg;
+    return nreg;
+}
+
+char **regidx_seq_names(regidx_t *idx, int *n)
+{
+    *n = idx->nseq;
+    return idx->seq_names;
+}
+
+int regidx_insert_list(regidx_t *idx, char *line, char delim)
+{
+    kstring_t tmp = {0,0,0};
+    char *ss = line;
+    while ( *ss )
+    {
+        char *se = ss;
+        while ( *se && *se!=delim ) se++;
+        tmp.l = 0;
+        kputsn(ss, se-ss, &tmp);
+        if ( regidx_insert(idx,tmp.s) < 0 )
+        {
+            free(tmp.s);
+            return -1;
+        }
+        if ( !*se ) break;
+        ss = se+1;
+    }
+    free(tmp.s);
+    return 0;
+}
+
+static inline int cmp_regs(reg_t *a, reg_t *b)
+{
+    if ( a->beg < b->beg ) return -1;
+    if ( a->beg > b->beg ) return 1;
+    if ( a->end < b->end ) return 1;    // longer intervals come first
+    if ( a->end > b->end ) return -1;
+    return 0;
+}
+static int cmp_reg_ptrs(const void *a, const void *b)
+{
+    return cmp_regs((reg_t*)a,(reg_t*)b);
+}
+static int cmp_reg_ptrs2(const void *a, const void *b)
+{
+    return cmp_regs(*((reg_t**)a),*((reg_t**)b));
+}
+
+inline int regidx_push(regidx_t *idx, char *chr_beg, char *chr_end, uint32_t beg, uint32_t end, void *payload)
+{
+    if ( beg > MAX_COOR_0 ) beg = MAX_COOR_0;
+    if ( end > MAX_COOR_0 ) end = MAX_COOR_0;
+
+    int rid;
+    idx->str.l = 0;
+    kputsn(chr_beg, chr_end-chr_beg+1, &idx->str);
+    if ( khash_str2int_get(idx->seq2regs, idx->str.s, &rid)!=0 )
+    {
+        // new chromosome
+        idx->nseq++;
+        int m_prev = idx->mseq;
+        hts_expand0(reglist_t,idx->nseq,idx->mseq,idx->seq);
+        hts_expand0(char*,idx->nseq,m_prev,idx->seq_names);
+        idx->seq_names[idx->nseq-1] = strdup(idx->str.s);
+        rid = khash_str2int_inc(idx->seq2regs, idx->seq_names[idx->nseq-1]);
+    }
+
+    reglist_t *list = &idx->seq[rid];
+    list->seq = idx->seq_names[rid];
+    list->nreg++;
+    int mreg = list->mreg;
+    hts_expand(reg_t,list->nreg,list->mreg,list->reg);
+    list->reg[list->nreg-1].beg = beg;
+    list->reg[list->nreg-1].end = end;
+    if ( idx->payload_size )
+    {
+        if ( mreg != list->mreg ) list->dat = realloc(list->dat,idx->payload_size*list->mreg);
+        memcpy((char *)list->dat + idx->payload_size*(list->nreg-1), payload, idx->payload_size);
+    }
+    if ( !list->unsorted && list->nreg>1 && cmp_regs(&list->reg[list->nreg-2],&list->reg[list->nreg-1])>0 ) list->unsorted = 1;
+    return 0;
+}
+
+int regidx_insert(regidx_t *idx, char *line)
+{
+    if ( !line ) return 0;
+    char *chr_from, *chr_to;
+    uint32_t beg,end;
+    int ret = idx->parse(line,&chr_from,&chr_to,&beg,&end,idx->payload,idx->usr);
+    if ( ret==-2 ) return -1;   // error
+    if ( ret==-1 ) return 0;    // skip the line
+    regidx_push(idx, chr_from,chr_to,beg,end,idx->payload);
+    return 0;
+}
+
+regidx_t *regidx_init(const char *fname, regidx_parse_f parser, regidx_free_f free_f, size_t payload_size, void *usr_dat)
+{
+    if ( !parser )
+    {
+        if ( !fname ) parser = regidx_parse_tab;
+        else
+        {
+            int len = strlen(fname);
+            if ( len>=7 && !strcasecmp(".bed.gz",fname+len-7) )
+                parser = regidx_parse_bed;
+            else if ( len>=8 && !strcasecmp(".bed.bgz",fname+len-8) )
+                parser = regidx_parse_bed;
+            else if ( len>=4 && !strcasecmp(".bed",fname+len-4) )
+                parser = regidx_parse_bed;
+            else
+                parser = regidx_parse_tab;
+        }
+    }
+
+    regidx_t *idx = (regidx_t*) calloc(1,sizeof(regidx_t));
+    idx->free  = free_f;
+    idx->parse = parser;
+    idx->usr   = usr_dat;
+    idx->seq2regs = khash_str2int_init();
+    idx->payload_size = payload_size;
+    if ( payload_size ) idx->payload = malloc(payload_size);
+
+    if ( !fname ) return idx;
+    
+    kstring_t str = {0,0,0};
+
+    htsFile *fp = hts_open(fname,"r");
+    if ( !fp ) goto error;
+
+    while ( hts_getline(fp, KS_SEP_LINE, &str) > 0 )
+    {
+        if ( regidx_insert(idx, str.s) ) goto error;
+    }
+
+    free(str.s);
+    hts_close(fp);
+    return idx;
+
+error:
+    free(str.s);
+    if ( fp ) hts_close(fp);
+    regidx_destroy(idx);
+    return NULL;
+}
+
+void regidx_destroy(regidx_t *idx)
+{
+    int i, j;
+    for (i=0; i<idx->nseq; i++)
+    {
+        reglist_t *list = &idx->seq[i];
+        if ( idx->free )
+        {
+            for (j=0; j<list->nreg; j++)
+                idx->free((char *)list->dat + idx->payload_size*j);
+        }
+        free(list->dat);
+        free(list->reg);
+        free(list->idx);
+    }
+    free(idx->seq_names);
+    free(idx->seq);
+    free(idx->str.s);
+    free(idx->payload);
+    khash_str2int_destroy_free(idx->seq2regs);
+    free(idx);
+}
+
+int _reglist_build_index(regidx_t *regidx, reglist_t *list)
+{
+    int i;
+    if ( list->unsorted )
+    {
+        if ( !regidx->payload_size )
+            qsort(list->reg,list->nreg,sizeof(reg_t),cmp_reg_ptrs);
+        else
+        {
+            reg_t **ptr = (reg_t**) malloc(sizeof(reg_t*)*list->nreg);
+            for (i=0; i<list->nreg; i++) ptr[i] = list->reg + i;
+            qsort(ptr,list->nreg,sizeof(*ptr),cmp_reg_ptrs2);
+
+            void *tmp_dat = malloc(regidx->payload_size*list->nreg);
+            for (i=0; i<list->nreg; i++)
+            {
+                size_t iori = ptr[i] - list->reg;
+                memcpy((char *)tmp_dat+i*regidx->payload_size,
+                       (char *)list->dat+iori*regidx->payload_size,
+                       regidx->payload_size);
+            }
+            free(list->dat);
+            list->dat = tmp_dat;
+
+            reg_t *tmp_reg = (reg_t*) malloc(sizeof(reg_t)*list->nreg);
+            for (i=0; i<list->nreg; i++)
+            {
+                size_t iori = ptr[i] - list->reg;
+                tmp_reg[i] = list->reg[iori];
+            }
+            free(ptr);
+            free(list->reg);
+            list->reg  = tmp_reg;
+            list->mreg = list->nreg;
+        }
+        list->unsorted = 0;
+    }
+
+    list->nidx = 0;
+    int j,k, midx = 0;
+    for (j=0; j<list->nreg; j++)
+    {
+        int ibeg = iBIN(list->reg[j].beg);
+        int iend = iBIN(list->reg[j].end);
+        if ( midx <= iend )
+        {
+            int old_midx = midx; 
+            midx = iend + 1;
+            kroundup32(midx);
+            list->idx = (uint32_t*) realloc(list->idx, midx*sizeof(uint32_t));
+            memset(list->idx+old_midx, 0, sizeof(uint32_t)*(midx-old_midx));
+        }
+        if ( ibeg==iend )
+        {
+            if ( !list->idx[ibeg] ) list->idx[ibeg] = j + 1;
+        }
+        else
+        {
+            for (k=ibeg; k<=iend; k++)
+                if ( !list->idx[k] ) list->idx[k] = j + 1;
+        }
+        if ( list->nidx < iend+1 ) list->nidx = iend+1;
+    }
+
+    return 0;
+}
+
+int regidx_overlap(regidx_t *regidx, const char *chr, uint32_t beg, uint32_t end, regitr_t *regitr)
+{
+    if ( regitr ) regitr->seq = NULL;
+
+    int iseq, ireg;
+    if ( khash_str2int_get(regidx->seq2regs, chr, &iseq)!=0 ) return 0;    // no such sequence
+
+    reglist_t *list = &regidx->seq[iseq];
+    if ( !list->nreg ) return 0;
+
+    if ( list->nreg==1 )
+    {
+        if ( beg > list->reg[0].end ) return 0;
+        if ( end < list->reg[0].beg ) return 0;
+        ireg = 0;
+    }
+    else
+    {
+        if ( !list->idx )
+            _reglist_build_index(regidx,list);
+
+        int ibeg = iBIN(beg);
+        if ( ibeg >= list->nidx ) return 0;     // beg is too big
+
+        // find a matching region
+        uint32_t i = list->idx[ibeg];
+        if ( !i )
+        {
+            int iend = iBIN(end);
+            if ( iend > list->nidx ) iend = list->nidx;
+            for (i=ibeg; i<iend; i++)
+                if ( list->idx[i] ) break;
+            if ( i==iend ) return 0;
+            i = list->idx[i];
+        }
+
+        for (ireg=i-1; ireg<list->nreg; ireg++)
+        {
+            if ( list->reg[ireg].beg > end ) return 0;   // no match, past the query region
+            if ( list->reg[ireg].end >= beg && list->reg[ireg].beg <= end ) break; // found
+        }
+
+        if ( ireg >= list->nreg ) return 0;   // no match
+    }
+
+    if ( !regitr ) return 1;    // match, but no more info to save
+
+    // may need to iterate over the matching regions later
+    _itr_t *itr = (_itr_t*)regitr->itr;
+    itr->ridx = regidx;
+    itr->list = list;
+    itr->beg  = beg;
+    itr->end  = end;
+    itr->ireg = ireg;
+    itr->active = 0;
+
+    regitr->seq = list->seq;
+    regitr->beg = list->reg[ireg].beg;
+    regitr->end = list->reg[ireg].end;
+    if ( regidx->payload_size )
+        regitr->payload = (char *)list->dat + regidx->payload_size*ireg;
+
+    return 1;
+}
+
+int regidx_parse_bed(const char *line, char **chr_beg, char **chr_end, uint32_t *beg, uint32_t *end, void *payload, void *usr)
+{
+    char *ss = (char*) line;
+    while ( *ss && isspace(*ss) ) ss++;
+    if ( !*ss ) return -1;      // skip blank lines
+    if ( *ss=='#' ) return -1;  // skip comments
+    
+    char *se = ss;
+    while ( *se && !isspace(*se) ) se++;
+
+    *chr_beg = ss;
+    *chr_end = se-1;
+
+    if ( !*se )
+    {
+        // just the chromosome name
+        *beg = 0;
+        *end = MAX_COOR_0;
+        return 0;
+    }
+
+    ss = se+1;
+    *beg = strtod(ss, &se);
+    if ( ss==se ) { fprintf(pysam_stderr,"Could not parse bed line: %s\n", line); return -2; }
+
+    ss = se+1;
+    *end = strtod(ss, &se) - 1;
+    if ( ss==se ) { fprintf(pysam_stderr,"Could not parse bed line: %s\n", line); return -2; }
+    
+    return 0;
+}
+
+int regidx_parse_tab(const char *line, char **chr_beg, char **chr_end, uint32_t *beg, uint32_t *end, void *payload, void *usr)
+{
+    char *ss = (char*) line;
+    while ( *ss && isspace(*ss) ) ss++;
+    if ( !*ss ) return -1;      // skip blank lines
+    if ( *ss=='#' ) return -1;  // skip comments
+    
+    char *se = ss;
+    while ( *se && !isspace(*se) ) se++;
+
+    *chr_beg = ss;
+    *chr_end = se-1;
+
+    if ( !*se )
+    {
+        // just the chromosome name
+        *beg = 0;
+        *end = MAX_COOR_0;
+        return 0;
+    }
+
+    ss = se+1;
+    *beg = strtod(ss, &se);
+    if ( ss==se ) { fprintf(pysam_stderr,"Could not parse tab line: %s\n", line); return -2; }
+    if ( *beg==0 ) { fprintf(pysam_stderr,"Could not parse tab line, expected 1-based coordinate: %s\n", line); return -2; }
+    (*beg)--;
+
+    if ( !se[0] || !se[1] )
+        *end = *beg;
+    else
+    {
+        ss = se+1;
+        *end = strtod(ss, &se);
+        if ( ss==se ) *end = *beg;
+        else if ( *end==0 ) { fprintf(pysam_stderr,"Could not parse tab line, expected 1-based coordinate: %s\n", line); return -2; }
+        else (*end)--;
+    }
+    return 0;
+}
+
+int regidx_parse_reg(const char *line, char **chr_beg, char **chr_end, uint32_t *beg, uint32_t *end, void *payload, void *usr)
+{
+    char *ss = (char*) line;
+    while ( *ss && isspace(*ss) ) ss++;
+    if ( !*ss ) return -1;      // skip blank lines
+    if ( *ss=='#' ) return -1;  // skip comments
+    
+    char *se = ss;
+    while ( *se && *se!=':' ) se++;
+
+    *chr_beg = ss;
+    *chr_end = se-1;
+
+    if ( !*se )
+    {
+        *beg = 0;
+        *end = MAX_COOR_0;
+        return 0;
+    }
+
+    ss = se+1;
+    *beg = strtod(ss, &se);
+    if ( ss==se ) { fprintf(pysam_stderr,"Could not parse reg line: %s\n", line); return -2; }
+    if ( *beg==0 ) { fprintf(pysam_stderr,"Could not parse reg line, expected 1-based coordinate: %s\n", line); return -2; }
+    (*beg)--;
+
+    if ( !se[0] || !se[1] )
+        *end = se[0]=='-' ? MAX_COOR_0 : *beg;
+    else
+    {
+        ss = se+1;
+        *end = strtod(ss, &se);
+        if ( ss==se ) *end = *beg;
+        else if ( *end==0 ) { fprintf(pysam_stderr,"Could not parse reg line, expected 1-based coordinate: %s\n", line); return -2; }
+        else (*end)--;
+    }
+    return 0;
+}
+
+regitr_t *regitr_init(regidx_t *regidx)
+{
+    regitr_t *regitr = (regitr_t*) calloc(1,sizeof(regitr_t));
+    regitr->itr  = (_itr_t*) calloc(1,sizeof(_itr_t));
+    _itr_t *itr = (_itr_t*) regitr->itr;
+    itr->ridx = regidx;
+    itr->list = NULL;
+    return regitr;
+}
+
+void regitr_reset(regidx_t *regidx, regitr_t *regitr)
+{
+    _itr_t *itr = (_itr_t*) regitr->itr;
+    memset(itr,0,sizeof(_itr_t));
+    itr->ridx = regidx;
+}
+
+void regitr_destroy(regitr_t *regitr)
+{
+    free(regitr->itr);
+    free(regitr);
+}
+
+int regitr_overlap(regitr_t *regitr)
+{
+    if ( !regitr->seq ) return 0;
+
+    _itr_t *itr = (_itr_t*) regitr->itr;
+    if ( !itr->active )
+    {
+        // is this the first call after regidx_overlap?
+        itr->active = 1;
+        itr->ireg++;
+        return 1;
+    }
+
+    reglist_t *list = itr->list;
+
+    int i;
+    for (i=itr->ireg; i<list->nreg; i++)
+    {
+        if ( list->reg[i].beg > itr->end ) return 0;   // no match, past the query region
+        if ( list->reg[i].end >= itr->beg && list->reg[i].beg <= itr->end ) break; // found
+    }
+
+    if ( i >= list->nreg ) return 0;   // no match
+
+    itr->ireg = i + 1;
+    regitr->seq = list->seq;
+    regitr->beg = list->reg[i].beg;
+    regitr->end = list->reg[i].end;
+    if ( itr->ridx->payload_size )
+        regitr->payload = (char *)list->dat + itr->ridx->payload_size*i;
+
+    return 1;
+}
+
+int regitr_loop(regitr_t *regitr)
+{
+    _itr_t *itr = (_itr_t*) regitr->itr;
+    regidx_t *regidx = itr->ridx;
+
+    if ( !itr->list )    // first time here
+    {
+        itr->list = regidx->seq;
+        itr->ireg = 0;
+    }
+
+    size_t iseq = itr->list - regidx->seq;
+    if ( iseq >= regidx->nseq ) return 0;
+
+    if ( itr->ireg >= itr->list->nreg )
+    {
+        iseq++;
+        if ( iseq >= regidx->nseq ) return 0; // no more sequences, done
+        itr->ireg = 0;
+        itr->list = &regidx->seq[iseq];
+    }
+
+    regitr->seq = itr->list->seq;
+    regitr->beg = itr->list->reg[itr->ireg].beg;
+    regitr->end = itr->list->reg[itr->ireg].end;
+    if ( regidx->payload_size )
+        regitr->payload = (char *)itr->list->dat + regidx->payload_size*itr->ireg;
+    itr->ireg++;
+
+    return 1;
+}
+
+
+
diff --git a/bcftools/regidx.h b/bcftools/regidx.h
new file mode 100644
index 0000000..8e25fe1
--- /dev/null
+++ b/bcftools/regidx.h
@@ -0,0 +1,191 @@
+/* 
+    Copyright (C) 2014-2016 Genome Research Ltd.
+
+    Author: Petr Danecek <pd3 at sanger.ac.uk>
+
+    Permission is hereby granted, free of charge, to any person obtaining a copy
+    of this software and associated documentation files (the "Software"), to deal
+    in the Software without restriction, including without limitation the rights
+    to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+    copies of the Software, and to permit persons to whom the Software is
+    furnished to do so, subject to the following conditions:
+    
+    The above copyright notice and this permission notice shall be included in
+    all copies or substantial portions of the Software.
+    
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+    AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+    OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+    THE SOFTWARE.
+*/
+
+/*
+    Region indexing with an optional payload.
+
+    Example of usage:
+
+        // Init the parser and print regions. In this example the payload is a
+        // pointer to a string. For the description of parse_custom and
+        // free_custom functions, see regidx_parse_f and regidx_free_f below,
+        // and for working example see test/test-regidx.c.
+        regidx_t *idx = regidx_init(in_fname,parse_custom,free_custom,sizeof(char*),NULL);
+
+        // Query overlap with chr:from-to
+        regitr_t *itr = regitr_init(idx);
+        if ( regidx_overlap(idx, chr,from,to, itr) ) printf("There is an overlap!\n");
+
+        while ( regitr_overlap(itr) )
+        {
+            printf("[%d,%d] overlaps with [%d,%d], payload=%s\n", from,to, 
+                itr->beg, itr->end, regitr_payload(itr,char*));
+        }
+
+        regidx_destroy(idx);
+        regitr_destroy(itr);
+
+
+    Another example, loop over all regions:
+        
+        regidx_t *idx = regidx_init(in_fname,NULL,NULL,0,NULL);
+        regitr_t *itr = regitr_init(idx);
+
+        while ( regitr_loop(itr) )
+            printf("chr=%s  beg=%d  end=%d\n", itr->seq, itr->beg, itr->end);
+
+        regidx_destroy(idx);
+        regitr_destroy(itr);
+*/
+
+#ifndef __REGIDX_H__
+#define __REGIDX_H__
+
+#include <stdio.h>
+#include <inttypes.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define REGIDX_MAX 2147483646       // maximum regidx coordinate (0-based)
+
+typedef struct _regidx_t regidx_t;
+typedef struct
+{
+    uint32_t beg,end;
+    void *payload;
+    char *seq;
+    void *itr;
+}
+regitr_t;
+
+#define regitr_payload(itr,type_t) (*((type_t*)(itr)->payload))
+
+/*
+ *  regidx_parse_f - Function to parse one input line, such as regidx_parse_bed
+ *  or regidx_parse_tab below. The function is expected to set `chr_from` and
+ *  `chr_to` to point to first and last character of chromosome name and set
+ *  coordinates `beg` and `end` (0-based, inclusive). If regidx_init() was
+ *  called with non-zero payload_size, the `payload` points to a memory
+ *  location of the payload_size and `usr` is the data passed to regidx_init().
+ *  Any memory allocated by the function will be freed by regidx_free_f called
+ *  by regidx_destroy().
+ *
+ *  Return value: 0 on success, -1 to skip a record, -2 on fatal error.
+ */
+typedef int  (*regidx_parse_f)(const char *line, char **chr_beg, char **chr_end, uint32_t *beg, uint32_t *end, void *payload, void *usr);
+typedef void (*regidx_free_f)(void *payload);
+
+/*
+ *  A note about the parsers: 
+ *      - leading spaces are ignored
+ *      - lines starting with "#" are ignored
+ */
+int regidx_parse_bed(const char*,char**,char**,uint32_t*,uint32_t*,void*,void*);   // CHROM or whitespace-sepatated CHROM,FROM,TO (0-based,right-open)
+int regidx_parse_tab(const char*,char**,char**,uint32_t*,uint32_t*,void*,void*);   // CHROM or whitespace-separated CHROM,POS (1-based, inclusive)
+int regidx_parse_reg(const char*,char**,char**,uint32_t*,uint32_t*,void*,void*);   // CHROM, CHROM:POS, CHROM:FROM-TO, CHROM:FROM- (1-based, inclusive)
+
+/*
+ *  regidx_init() - creates new index
+ *  @param fname:  input file name or NULL if regions will be added one-by-one via regidx_insert()
+ *  @param parsef: regidx_parse_bed, regidx_parse_tab or see description of regidx_parse_f. If NULL,
+ *                 the format will be autodected, currently either regidx_parse_tab (the default) or
+ *                 regidx_parse_bed (file must be named 'bed' or 'bed.gz') will be used. Note that
+ *                 the exact autodetection algorithm will change.
+ *  @param freef:  NULL or see description of regidx_parse_f
+ *  @param payload_size: 0 with regidx_parse_bed, regidx_parse_tab or see regidx_parse_f
+ *  @param usr:    optional user data passed to regidx_parse_f
+ *
+ *  Returns index on success or NULL on error.
+ */
+regidx_t *regidx_init(const char *fname, regidx_parse_f parsef, regidx_free_f freef, size_t payload_size, void *usr);
+
+/*
+ *  regidx_destroy() - free memory allocated by regidx_init
+ */
+void regidx_destroy(regidx_t *idx);
+
+/*
+ *  regidx_overlap() - check overlap of the location chr:from-to with regions
+ *  @param beg,end:     0-based start, end coordinate (inclusive)
+ *  @param itr:         pointer to iterator, can be NULL if regidx_loop not needed
+ *
+ *  Returns 0 if there is no overlap or 1 if overlap is found. The overlapping
+ *  regions can be iterated as shown in the example above.
+ */
+int regidx_overlap(regidx_t *idx, const char *chr, uint32_t beg, uint32_t end, regitr_t *itr);
+
+/*
+ *  regidx_insert() - add a new region. 
+ *  regidx_insert_list() - add new regions from a list
+ *  regidx_push() - low level insertion of a new region
+ *
+ *  Returns 0 on success or -1 on error.
+ */
+int regidx_insert(regidx_t *idx, char *line);
+int regidx_insert_list(regidx_t *idx, char *line, char delim);
+int regidx_push(regidx_t *idx, char *chr_beg, char *chr_end, uint32_t beg, uint32_t end, void *payload);
+
+/*
+ *  regidx_seq_names() - return list of all sequence names
+ */
+char **regidx_seq_names(regidx_t *idx, int *n);
+
+/*
+ *  regidx_seq_nregs() - number of regions
+ *  regidx_nregs()  - total number of regions
+ */
+int regidx_seq_nregs(regidx_t *idx, const char *seq);
+int regidx_nregs(regidx_t *idx);
+
+/*
+ *  regitr_init() - initialize an iterator. The idx parameter is required only
+ *                  with regitr_loop. If only regitr_overlap is called, NULL
+ *                  can be given.
+ *
+ *  regitr_reset() - initialize an iterator for a repeated regitr_loop cycle.
+ *                  Not required with regitr_overlap.
+ */
+regitr_t *regitr_init(regidx_t *idx);
+void regitr_destroy(regitr_t *itr);
+void regitr_reset(regidx_t *idx, regitr_t *itr);
+
+/*
+ *  regitr_overlap() - next overlapping region
+ *  Returns 0 when done or 1 when itr is set to next region
+ */
+int regitr_overlap(regitr_t *itr);
+
+/*
+ *  regitr_loop() - loop over all regions
+ *  Returns 0 when done or 1 when itr is set to next region
+ */
+int regitr_loop(regitr_t *itr);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/bcftools/smpl_ilist.c b/bcftools/smpl_ilist.c
new file mode 100644
index 0000000..c7fa913
--- /dev/null
+++ b/bcftools/smpl_ilist.c
@@ -0,0 +1,106 @@
+/* 
+    Copyright (C) 2016 Genome Research Ltd.
+
+    Author: Petr Danecek <pd3 at sanger.ac.uk>
+
+    Permission is hereby granted, free of charge, to any person obtaining a copy
+    of this software and associated documentation files (the "Software"), to deal
+    in the Software without restriction, including without limitation the rights
+    to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+    copies of the Software, and to permit persons to whom the Software is
+    furnished to do so, subject to the following conditions:
+    
+    The above copyright notice and this permission notice shall be included in
+    all copies or substantial portions of the Software.
+    
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+    AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+    OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+    THE SOFTWARE.
+*/
+
+#include "bcftools.h"
+#include "smpl_ilist.h"
+
+void smpl_ilist_destroy(smpl_ilist_t *smpl)
+{
+    free(smpl->idx);
+    free(smpl);
+}
+
+smpl_ilist_t *smpl_ilist_init(bcf_hdr_t *hdr, char *sample_list, int is_file, int flags)
+{
+    smpl_ilist_t *smpl = (smpl_ilist_t*) calloc(1,sizeof(smpl_ilist_t));
+
+    int i;
+    if ( !sample_list )
+    {
+        smpl->n = bcf_hdr_nsamples(hdr);
+        smpl->idx = (int*) malloc(sizeof(int)*smpl->n);
+        for (i=0; i<smpl->n; i++) smpl->idx[i] = i;
+        return smpl;
+    }
+
+    int nlist;
+    char **list = hts_readlist(sample_list[0]=='^'?sample_list+1:sample_list, is_file, &nlist);
+    if ( !list ) error("Could not parse %s\n", sample_list);
+
+    // preserve the VCF order
+    int *tmp = (int*)calloc(bcf_hdr_nsamples(hdr),sizeof(int));
+    for (i=0; i<nlist; i++)
+    {
+        int idx = bcf_hdr_id2int(hdr, BCF_DT_SAMPLE, list[i]);
+        if ( idx>=0 ) 
+        {
+            tmp[idx] = 1;
+            smpl->n++;
+        }
+        else if ( flags&SMPL_STRICT )
+            error("No such sample: %s\n", list[i]);
+    }
+
+    if ( sample_list[0]=='^' ) smpl->n = bcf_hdr_nsamples(hdr) - smpl->n;
+    smpl->idx = (int*) malloc(sizeof(int)*smpl->n);
+
+    int j = 0;
+    if ( sample_list[0]!='^' )
+    {
+        for (i=0; i<bcf_hdr_nsamples(hdr); i++)
+            if ( tmp[i] ) smpl->idx[j++] = i;
+    }
+    else
+    {
+        for (i=0; i<bcf_hdr_nsamples(hdr); i++)
+            if ( !tmp[i] ) smpl->idx[j++] = i;
+    }
+
+    free(tmp);
+    for (i=0; i<nlist; i++) free(list[i]);
+    free(list);
+
+    return smpl;
+}
+
+smpl_ilist_t *smpl_ilist_map(bcf_hdr_t *hdr_a, bcf_hdr_t *hdr_b, int flags)
+{
+    if ( flags&SMPL_STRICT && bcf_hdr_nsamples(hdr_a)!=bcf_hdr_nsamples(hdr_b) )
+        error("Different number of samples: %d vs %d\n", bcf_hdr_nsamples(hdr_a),bcf_hdr_nsamples(hdr_b));
+
+    smpl_ilist_t *smpl = (smpl_ilist_t*) calloc(1,sizeof(smpl_ilist_t));
+
+    int i;
+    smpl->n = bcf_hdr_nsamples(hdr_a);
+    smpl->idx = (int*) malloc(sizeof(int)*smpl->n);
+    for (i=0; i<smpl->n; i++)
+    {
+        const char *name = bcf_hdr_int2id(hdr_a, BCF_DT_SAMPLE, i);
+        smpl->idx[i] = bcf_hdr_id2int(hdr_b, BCF_DT_SAMPLE, name);
+        if ( flags&SMPL_STRICT && smpl->idx[i]<0 ) 
+            error("The sample %s is not present in the second file\n", name);
+    }
+    return smpl;
+}
+
diff --git a/bcftools/smpl_ilist.c.pysam.c b/bcftools/smpl_ilist.c.pysam.c
new file mode 100644
index 0000000..f52b8ce
--- /dev/null
+++ b/bcftools/smpl_ilist.c.pysam.c
@@ -0,0 +1,108 @@
+#include "pysam.h"
+
+/* 
+    Copyright (C) 2016 Genome Research Ltd.
+
+    Author: Petr Danecek <pd3 at sanger.ac.uk>
+
+    Permission is hereby granted, free of charge, to any person obtaining a copy
+    of this software and associated documentation files (the "Software"), to deal
+    in the Software without restriction, including without limitation the rights
+    to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+    copies of the Software, and to permit persons to whom the Software is
+    furnished to do so, subject to the following conditions:
+    
+    The above copyright notice and this permission notice shall be included in
+    all copies or substantial portions of the Software.
+    
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+    AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+    OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+    THE SOFTWARE.
+*/
+
+#include "bcftools.h"
+#include "smpl_ilist.h"
+
+void smpl_ilist_destroy(smpl_ilist_t *smpl)
+{
+    free(smpl->idx);
+    free(smpl);
+}
+
+smpl_ilist_t *smpl_ilist_init(bcf_hdr_t *hdr, char *sample_list, int is_file, int flags)
+{
+    smpl_ilist_t *smpl = (smpl_ilist_t*) calloc(1,sizeof(smpl_ilist_t));
+
+    int i;
+    if ( !sample_list )
+    {
+        smpl->n = bcf_hdr_nsamples(hdr);
+        smpl->idx = (int*) malloc(sizeof(int)*smpl->n);
+        for (i=0; i<smpl->n; i++) smpl->idx[i] = i;
+        return smpl;
+    }
+
+    int nlist;
+    char **list = hts_readlist(sample_list[0]=='^'?sample_list+1:sample_list, is_file, &nlist);
+    if ( !list ) error("Could not parse %s\n", sample_list);
+
+    // preserve the VCF order
+    int *tmp = (int*)calloc(bcf_hdr_nsamples(hdr),sizeof(int));
+    for (i=0; i<nlist; i++)
+    {
+        int idx = bcf_hdr_id2int(hdr, BCF_DT_SAMPLE, list[i]);
+        if ( idx>=0 ) 
+        {
+            tmp[idx] = 1;
+            smpl->n++;
+        }
+        else if ( flags&SMPL_STRICT )
+            error("No such sample: %s\n", list[i]);
+    }
+
+    if ( sample_list[0]=='^' ) smpl->n = bcf_hdr_nsamples(hdr) - smpl->n;
+    smpl->idx = (int*) malloc(sizeof(int)*smpl->n);
+
+    int j = 0;
+    if ( sample_list[0]!='^' )
+    {
+        for (i=0; i<bcf_hdr_nsamples(hdr); i++)
+            if ( tmp[i] ) smpl->idx[j++] = i;
+    }
+    else
+    {
+        for (i=0; i<bcf_hdr_nsamples(hdr); i++)
+            if ( !tmp[i] ) smpl->idx[j++] = i;
+    }
+
+    free(tmp);
+    for (i=0; i<nlist; i++) free(list[i]);
+    free(list);
+
+    return smpl;
+}
+
+smpl_ilist_t *smpl_ilist_map(bcf_hdr_t *hdr_a, bcf_hdr_t *hdr_b, int flags)
+{
+    if ( flags&SMPL_STRICT && bcf_hdr_nsamples(hdr_a)!=bcf_hdr_nsamples(hdr_b) )
+        error("Different number of samples: %d vs %d\n", bcf_hdr_nsamples(hdr_a),bcf_hdr_nsamples(hdr_b));
+
+    smpl_ilist_t *smpl = (smpl_ilist_t*) calloc(1,sizeof(smpl_ilist_t));
+
+    int i;
+    smpl->n = bcf_hdr_nsamples(hdr_a);
+    smpl->idx = (int*) malloc(sizeof(int)*smpl->n);
+    for (i=0; i<smpl->n; i++)
+    {
+        const char *name = bcf_hdr_int2id(hdr_a, BCF_DT_SAMPLE, i);
+        smpl->idx[i] = bcf_hdr_id2int(hdr_b, BCF_DT_SAMPLE, name);
+        if ( flags&SMPL_STRICT && smpl->idx[i]<0 ) 
+            error("The sample %s is not present in the second file\n", name);
+    }
+    return smpl;
+}
+
diff --git a/bcftools/smpl_ilist.h b/bcftools/smpl_ilist.h
new file mode 100644
index 0000000..7083cf2
--- /dev/null
+++ b/bcftools/smpl_ilist.h
@@ -0,0 +1,47 @@
+/* 
+    Copyright (C) 2016 Genome Research Ltd.
+
+    Author: Petr Danecek <pd3 at sanger.ac.uk>
+
+    Permission is hereby granted, free of charge, to any person obtaining a copy
+    of this software and associated documentation files (the "Software"), to deal
+    in the Software without restriction, including without limitation the rights
+    to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+    copies of the Software, and to permit persons to whom the Software is
+    furnished to do so, subject to the following conditions:
+    
+    The above copyright notice and this permission notice shall be included in
+    all copies or substantial portions of the Software.
+    
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+    AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+    OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+    THE SOFTWARE.
+*/
+/*
+    Parse --samples and --samples-file
+*/
+
+#ifndef __SMPL_ILIST_H__
+#define __SMPL_ILIST_H__
+
+#include <htslib/vcf.h>
+
+#define SMPL_NONE   0   // flexible error recovery
+#define SMPL_STRICT 1   // samples must exist
+
+typedef struct
+{
+    int *idx;  // index to bcf_hdr_t.samples 
+    int n;
+}
+smpl_ilist_t;
+
+smpl_ilist_t *smpl_ilist_init(bcf_hdr_t *hdr, char *sample_list, int is_file, int flags);
+smpl_ilist_t *smpl_ilist_map(bcf_hdr_t *hdr_a, bcf_hdr_t *hdr_b, int flags);
+void smpl_ilist_destroy(smpl_ilist_t *smpl);
+
+#endif
diff --git a/bcftools/tabix.c b/bcftools/tabix.c
index 2f24b92..c1874c2 100644
--- a/bcftools/tabix.c
+++ b/bcftools/tabix.c
@@ -1,7 +1,7 @@
 /*  tabix.c -- tabix subcommand.
 
     Copyright (C) 2012 Broad Institute.
-    Copyright (C) 2013 Genome Research Ltd.
+    Copyright (C) 2013, 2016 Genome Research Ltd.
 
     Author: Heng Li <lh3 at sanger.ac.uk>
 
@@ -32,8 +32,8 @@ THE SOFTWARE.  */
 
 int main_tabix(int argc, char *argv[])
 {
-    int c, min_shift = -1, is_force = 0, is_all = 0;
-    tbx_conf_t conf = tbx_conf_gff, *conf_ptr = NULL;
+    int c, min_shift = -1, is_force = 0, is_all = 0, detect = 1;
+    tbx_conf_t conf = tbx_conf_gff;
     while ((c = getopt(argc, argv, "0fap:s:b:e:S:c:m:")) >= 0)
         if (c == '0') conf.preset |= TBX_UCSC;
         else if (c == 'f') is_force = 1;
@@ -45,13 +45,14 @@ int main_tabix(int argc, char *argv[])
         else if (c == 'c') conf.meta_char = *optarg;
         else if (c == 'S') conf.line_skip = atoi(optarg);
         else if (c == 'p') {
-            if (strcmp(optarg, "gff") == 0) conf_ptr = &tbx_conf_gff;
-            else if (strcmp(optarg, "bed") == 0) conf_ptr = &tbx_conf_bed;
-            else if (strcmp(optarg, "sam") == 0) conf_ptr = &tbx_conf_sam;
-            else if (strcmp(optarg, "vcf") == 0) conf_ptr = &tbx_conf_vcf;
+            if (strcmp(optarg, "gff") == 0) conf = tbx_conf_gff;
+            else if (strcmp(optarg, "bed") == 0) conf = tbx_conf_bed;
+            else if (strcmp(optarg, "sam") == 0) conf = tbx_conf_sam;
+            else if (strcmp(optarg, "vcf") == 0) conf = tbx_conf_vcf;
             else {
                 fprintf(stderr, "The type '%s' not recognised\n", optarg);
                 return 1;
+            detect = 0;
             }
 
         }
@@ -79,28 +80,29 @@ int main_tabix(int argc, char *argv[])
         bgzf_close(fp);
         free(s.s);
     } else if (optind + 2 > argc) { // create index
-        if ( !conf_ptr )
+        if ( detect )
         {
             // auto-detect file type by file name
             int l = strlen(argv[optind]);
             int strcasecmp(const char *s1, const char *s2);
-            if (l>=7 && strcasecmp(argv[optind]+l-7, ".gff.gz") == 0) conf_ptr = &tbx_conf_gff;
-            else if (l>=7 && strcasecmp(argv[optind]+l-7, ".bed.gz") == 0) conf_ptr = &tbx_conf_bed;
-            else if (l>=7 && strcasecmp(argv[optind]+l-7, ".sam.gz") == 0) conf_ptr = &tbx_conf_sam;
-            else if (l>=7 && strcasecmp(argv[optind]+l-7, ".vcf.gz") == 0) conf_ptr = &tbx_conf_vcf;
+            if (l>=7 && strcasecmp(argv[optind]+l-7, ".gff.gz") == 0) conf = tbx_conf_gff;
+            else if (l>=7 && strcasecmp(argv[optind]+l-7, ".bed.gz") == 0) conf = tbx_conf_bed;
+            else if (l>=7 && strcasecmp(argv[optind]+l-7, ".sam.gz") == 0) conf = tbx_conf_sam;
+            else if (l>=7 && strcasecmp(argv[optind]+l-7, ".vcf.gz") == 0) conf = tbx_conf_vcf;
         }
-        if ( conf_ptr ) conf = *conf_ptr;
 
         if (!is_force) {
             char *fn;
             FILE *fp;
-            fn = (char*)alloca(strlen(argv[optind]) + 5);
+            fn = (char*)malloc(strlen(argv[optind]) + 5);
             strcat(strcpy(fn, argv[optind]), min_shift <= 0? ".tbi" : ".csi");
             if ((fp = fopen(fn, "rb")) != 0) {
                 fclose(fp);
+                free(fn);
                 fprintf(stderr, "[E::%s] the index file exists; use option '-f' to overwrite\n", __func__);
                 return 1;
             }
+            free(fn);
         }
         if ( tbx_index_build(argv[optind], min_shift, &conf) )
         {
diff --git a/bcftools/tabix.c.pysam.c b/bcftools/tabix.c.pysam.c
index afa3619..b0c6e0e 100644
--- a/bcftools/tabix.c.pysam.c
+++ b/bcftools/tabix.c.pysam.c
@@ -3,7 +3,7 @@
 /*  tabix.c -- tabix subcommand.
 
     Copyright (C) 2012 Broad Institute.
-    Copyright (C) 2013 Genome Research Ltd.
+    Copyright (C) 2013, 2016 Genome Research Ltd.
 
     Author: Heng Li <lh3 at sanger.ac.uk>
 
@@ -34,8 +34,8 @@ THE SOFTWARE.  */
 
 int main_tabix(int argc, char *argv[])
 {
-    int c, min_shift = -1, is_force = 0, is_all = 0;
-    tbx_conf_t conf = tbx_conf_gff, *conf_ptr = NULL;
+    int c, min_shift = -1, is_force = 0, is_all = 0, detect = 1;
+    tbx_conf_t conf = tbx_conf_gff;
     while ((c = getopt(argc, argv, "0fap:s:b:e:S:c:m:")) >= 0)
         if (c == '0') conf.preset |= TBX_UCSC;
         else if (c == 'f') is_force = 1;
@@ -47,13 +47,14 @@ int main_tabix(int argc, char *argv[])
         else if (c == 'c') conf.meta_char = *optarg;
         else if (c == 'S') conf.line_skip = atoi(optarg);
         else if (c == 'p') {
-            if (strcmp(optarg, "gff") == 0) conf_ptr = &tbx_conf_gff;
-            else if (strcmp(optarg, "bed") == 0) conf_ptr = &tbx_conf_bed;
-            else if (strcmp(optarg, "sam") == 0) conf_ptr = &tbx_conf_sam;
-            else if (strcmp(optarg, "vcf") == 0) conf_ptr = &tbx_conf_vcf;
+            if (strcmp(optarg, "gff") == 0) conf = tbx_conf_gff;
+            else if (strcmp(optarg, "bed") == 0) conf = tbx_conf_bed;
+            else if (strcmp(optarg, "sam") == 0) conf = tbx_conf_sam;
+            else if (strcmp(optarg, "vcf") == 0) conf = tbx_conf_vcf;
             else {
                 fprintf(pysam_stderr, "The type '%s' not recognised\n", optarg);
                 return 1;
+            detect = 0;
             }
 
         }
@@ -81,28 +82,29 @@ int main_tabix(int argc, char *argv[])
         bgzf_close(fp);
         free(s.s);
     } else if (optind + 2 > argc) { // create index
-        if ( !conf_ptr )
+        if ( detect )
         {
             // auto-detect file type by file name
             int l = strlen(argv[optind]);
             int strcasecmp(const char *s1, const char *s2);
-            if (l>=7 && strcasecmp(argv[optind]+l-7, ".gff.gz") == 0) conf_ptr = &tbx_conf_gff;
-            else if (l>=7 && strcasecmp(argv[optind]+l-7, ".bed.gz") == 0) conf_ptr = &tbx_conf_bed;
-            else if (l>=7 && strcasecmp(argv[optind]+l-7, ".sam.gz") == 0) conf_ptr = &tbx_conf_sam;
-            else if (l>=7 && strcasecmp(argv[optind]+l-7, ".vcf.gz") == 0) conf_ptr = &tbx_conf_vcf;
+            if (l>=7 && strcasecmp(argv[optind]+l-7, ".gff.gz") == 0) conf = tbx_conf_gff;
+            else if (l>=7 && strcasecmp(argv[optind]+l-7, ".bed.gz") == 0) conf = tbx_conf_bed;
+            else if (l>=7 && strcasecmp(argv[optind]+l-7, ".sam.gz") == 0) conf = tbx_conf_sam;
+            else if (l>=7 && strcasecmp(argv[optind]+l-7, ".vcf.gz") == 0) conf = tbx_conf_vcf;
         }
-        if ( conf_ptr ) conf = *conf_ptr;
 
         if (!is_force) {
             char *fn;
             FILE *fp;
-            fn = (char*)alloca(strlen(argv[optind]) + 5);
+            fn = (char*)malloc(strlen(argv[optind]) + 5);
             strcat(strcpy(fn, argv[optind]), min_shift <= 0? ".tbi" : ".csi");
             if ((fp = fopen(fn, "rb")) != 0) {
                 fclose(fp);
+                free(fn);
                 fprintf(pysam_stderr, "[E::%s] the index file exists; use option '-f' to overwrite\n", __func__);
                 return 1;
             }
+            free(fn);
         }
         if ( tbx_index_build(argv[optind], min_shift, &conf) )
         {
diff --git a/bcftools/tsv2vcf.c b/bcftools/tsv2vcf.c
index 8826f18..2e1aa52 100644
--- a/bcftools/tsv2vcf.c
+++ b/bcftools/tsv2vcf.c
@@ -24,6 +24,7 @@
 */
 
 #include <ctype.h>
+#include <strings.h>
 #include "tsv2vcf.h"
 
 tsv_t *tsv_init(const char *str)
diff --git a/bcftools/tsv2vcf.c.pysam.c b/bcftools/tsv2vcf.c.pysam.c
index 1da48d5..f5eff01 100644
--- a/bcftools/tsv2vcf.c.pysam.c
+++ b/bcftools/tsv2vcf.c.pysam.c
@@ -26,6 +26,7 @@
 */
 
 #include <ctype.h>
+#include <strings.h>
 #include "tsv2vcf.h"
 
 tsv_t *tsv_init(const char *str)
diff --git a/bcftools/vcfannotate.c b/bcftools/vcfannotate.c
index d5164f3..e6efda9 100644
--- a/bcftools/vcfannotate.c
+++ b/bcftools/vcfannotate.c
@@ -23,6 +23,7 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 THE SOFTWARE.  */
 
 #include <stdio.h>
+#include <strings.h>
 #include <unistd.h>
 #include <getopt.h>
 #include <ctype.h>
@@ -41,6 +42,7 @@ THE SOFTWARE.  */
 #include "vcmp.h"
 #include "filter.h"
 #include "convert.h"
+#include "smpl_ilist.h"
 
 struct _args_t;
 
@@ -65,12 +67,12 @@ annot_line_t;
 
 #define REPLACE_MISSING  0  // replace only missing values
 #define REPLACE_ALL      1  // replace both missing and existing values
-#define REPLACE_EXISTING 2  // replace only if tgt is not missing
+#define REPLACE_NON_MISSING 2  // replace only if tgt is not missing
 #define SET_OR_APPEND    3  // set new value if missing or non-existent, append otherwise
 typedef struct _annot_col_t
 {
     int icol, replace, number;  // number: one of BCF_VL_* types
-    char *hdr_key;
+    char *hdr_key_src, *hdr_key_dst;
     int (*setter)(struct _args_t *, bcf1_t *, struct _annot_col_t *, void*);
 }
 annot_col_t;
@@ -109,6 +111,7 @@ typedef struct _args_t
     convert_t *set_ids;
     int set_ids_replace;
 
+    int nsmpl_annot;
     int *sample_map, nsample_map, sample_is_file;   // map[idst] -> isrc
     int mtmpi, mtmpf, mtmps;
     int mtmpi2, mtmpf2, mtmps2;
@@ -155,6 +158,7 @@ void remove_info(args_t *args, bcf1_t *line, rm_tag_t *tag)
         }
         line->d.shared_dirty |= BCF1_DIRTY_INF;
         inf->vptr = NULL;
+        inf->vptr_off = inf->vptr_len = 0;
     }
 }
 void remove_info_tag(args_t *args, bcf1_t *line, rm_tag_t *tag)
@@ -187,6 +191,10 @@ void remove_format(args_t *args, bcf1_t *line, rm_tag_t *tag)
     }
 }
 
+#include "htslib/khash.h"
+KHASH_MAP_INIT_STR(vdict, bcf_idinfo_t)
+typedef khash_t(vdict) vdict_t;
+
 static void remove_hdr_lines(bcf_hdr_t *hdr, int type)
 {
     int i = 0, nrm = 0;
@@ -194,11 +202,18 @@ static void remove_hdr_lines(bcf_hdr_t *hdr, int type)
     {
         if ( hdr->hrec[i]->type!=type ) { i++; continue; }
         bcf_hrec_t *hrec = hdr->hrec[i];
-        if ( type==BCF_HL_FMT )
+        if ( type==BCF_HL_FMT || type==BCF_HL_INFO || type==BCF_HL_FMT || type== BCF_HL_CTG )
         {
             // everything except FORMAT/GT
             int id = bcf_hrec_find_key(hrec, "ID");
-            if ( id>=0 && !strcmp(hrec->vals[id],"GT") ) { i++; continue; }
+            if ( id>=0 )
+            {
+                if ( type==BCF_HL_FMT && !strcmp(hrec->vals[id],"GT") ) { i++; continue; }
+                vdict_t *d = type==BCF_HL_CTG ? (vdict_t*)hdr->dict[BCF_DT_CTG] : (vdict_t*)hdr->dict[BCF_DT_ID];
+                khint_t k = kh_get(vdict, d, hdr->hrec[i]->vals[id]);
+                kh_val(d, k).hrec[type==BCF_HL_CTG?0:type] = NULL;
+                kh_val(d, k).info[type] |= 0xf;
+            }
         }
         nrm++;
         hdr->nhrec--;
@@ -453,7 +468,7 @@ static int setter_qual(args_t *args, bcf1_t *line, annot_col_t *col, void *data)
 
     line->qual = strtod(str, &str);
     if ( str == tab->cols[col->icol] )
-        error("Could not parse %s at %s:%d .. [%s]\n", col->hdr_key,bcf_seqname(args->hdr,line),line->pos+1,tab->cols[col->icol]);
+        error("Could not parse %s at %s:%d .. [%s]\n", col->hdr_key_src,bcf_seqname(args->hdr,line),line->pos+1,tab->cols[col->icol]);
     return 0;
 }
 static int vcf_setter_qual(args_t *args, bcf1_t *line, annot_col_t *col, void *data)
@@ -470,31 +485,31 @@ static int setter_info_flag(args_t *args, bcf1_t *line, annot_col_t *col, void *
     char *str = tab->cols[col->icol];
     if ( str[0]=='.' && str[1]==0 ) return 0;
 
-    if ( str[0]=='1' && str[1]==0 ) return bcf_update_info_flag(args->hdr_out,line,col->hdr_key,NULL,1);
-    if ( str[0]=='0' && str[1]==0 ) return bcf_update_info_flag(args->hdr_out,line,col->hdr_key,NULL,0);
+    if ( str[0]=='1' && str[1]==0 ) return bcf_update_info_flag(args->hdr_out,line,col->hdr_key_dst,NULL,1);
+    if ( str[0]=='0' && str[1]==0 ) return bcf_update_info_flag(args->hdr_out,line,col->hdr_key_dst,NULL,0);
     error("Could not parse %s at %s:%d .. [%s]\n", bcf_seqname(args->hdr,line),line->pos+1,tab->cols[col->icol]);
     return -1;
 }
 static int vcf_setter_info_flag(args_t *args, bcf1_t *line, annot_col_t *col, void *data)
 {
     bcf1_t *rec = (bcf1_t*) data;
-    int flag = bcf_get_info_flag(args->files->readers[1].header,rec,col->hdr_key,NULL,NULL);
-    bcf_update_info_flag(args->hdr_out,line,col->hdr_key,NULL,flag);
+    int flag = bcf_get_info_flag(args->files->readers[1].header,rec,col->hdr_key_src,NULL,NULL);
+    bcf_update_info_flag(args->hdr_out,line,col->hdr_key_dst,NULL,flag);
     return 0;
 }
 static int setter_ARinfo_int32(args_t *args, bcf1_t *line, annot_col_t *col, int nals, char **als, int ntmpi)
 {
     if ( col->number==BCF_VL_A && ntmpi!=nals-1 && (ntmpi!=1 || args->tmpi[0]!=bcf_int32_missing || args->tmpi[1]!=bcf_int32_vector_end) )
-        error("Incorrect number of values (%d) for the %s tag at %s:%d\n", ntmpi,col->hdr_key,bcf_seqname(args->hdr,line),line->pos+1);
+        error("Incorrect number of values (%d) for the %s tag at %s:%d\n", ntmpi,col->hdr_key_src,bcf_seqname(args->hdr,line),line->pos+1);
     else if ( col->number==BCF_VL_R && ntmpi!=nals && (ntmpi!=1 || args->tmpi[0]!=bcf_int32_missing || args->tmpi[1]!=bcf_int32_vector_end) )
-        error("Incorrect number of values (%d) for the %s tag at %s:%d\n", ntmpi,col->hdr_key,bcf_seqname(args->hdr,line),line->pos+1);
+        error("Incorrect number of values (%d) for the %s tag at %s:%d\n", ntmpi,col->hdr_key_src,bcf_seqname(args->hdr,line),line->pos+1);
 
     int ndst = col->number==BCF_VL_A ? line->n_allele - 1 : line->n_allele;
     int *map = vcmp_map_ARvalues(args->vcmp,ndst,nals,als,line->n_allele,line->d.allele);
     if ( !map ) error("REF alleles not compatible at %s:%d\n");
 
     // fill in any missing values in the target VCF (or all, if not present)
-    int ntmpi2 = bcf_get_info_float(args->hdr, line, col->hdr_key, &args->tmpi2, &args->mtmpi2);
+    int ntmpi2 = bcf_get_info_float(args->hdr, line, col->hdr_key_dst, &args->tmpi2, &args->mtmpi2);
     if ( ntmpi2 < ndst ) hts_expand(int32_t,ndst,args->mtmpi2,args->tmpi2);
 
     int i;
@@ -511,7 +526,7 @@ static int setter_ARinfo_int32(args_t *args, bcf1_t *line, annot_col_t *col, int
 
         args->tmpi2[i] = args->tmpi[ map[i] ];
     }
-    bcf_update_info_int32(args->hdr_out,line,col->hdr_key,args->tmpi2,ndst);
+    bcf_update_info_int32(args->hdr_out,line,col->hdr_key_dst,args->tmpi2,ndst);
     return 0;
 }
 static int setter_info_int(args_t *args, bcf1_t *line, annot_col_t *col, void *data)
@@ -537,17 +552,17 @@ static int setter_info_int(args_t *args, bcf1_t *line, annot_col_t *col, void *d
 
     if ( col->replace==REPLACE_MISSING )
     {
-        int ret = bcf_get_info_int32(args->hdr, line, col->hdr_key, &args->tmpi2, &args->mtmpi2);
+        int ret = bcf_get_info_int32(args->hdr, line, col->hdr_key_dst, &args->tmpi2, &args->mtmpi2);
         if ( ret>0 && args->tmpi2[0]!=bcf_int32_missing ) return 0;
     }
 
-    bcf_update_info_int32(args->hdr_out,line,col->hdr_key,args->tmpi,ntmpi);
+    bcf_update_info_int32(args->hdr_out,line,col->hdr_key_dst,args->tmpi,ntmpi);
     return 0;
 }
 static int vcf_setter_info_int(args_t *args, bcf1_t *line, annot_col_t *col, void *data)
 {
     bcf1_t *rec = (bcf1_t*) data;
-    int ntmpi = bcf_get_info_int32(args->files->readers[1].header,rec,col->hdr_key,&args->tmpi,&args->mtmpi);
+    int ntmpi = bcf_get_info_int32(args->files->readers[1].header,rec,col->hdr_key_src,&args->tmpi,&args->mtmpi);
     if ( ntmpi < 0 ) return 0;    // nothing to add
 
     if ( col->number==BCF_VL_A || col->number==BCF_VL_R ) 
@@ -555,26 +570,26 @@ static int vcf_setter_info_int(args_t *args, bcf1_t *line, annot_col_t *col, voi
 
     if ( col->replace==REPLACE_MISSING )
     {
-        int ret = bcf_get_info_int32(args->hdr, line, col->hdr_key, &args->tmpi2, &args->mtmpi2);
+        int ret = bcf_get_info_int32(args->hdr, line, col->hdr_key_dst, &args->tmpi2, &args->mtmpi2);
         if ( ret>0 && args->tmpi2[0]!=bcf_int32_missing ) return 0;
     }
 
-    bcf_update_info_int32(args->hdr_out,line,col->hdr_key,args->tmpi,ntmpi);
+    bcf_update_info_int32(args->hdr_out,line,col->hdr_key_dst,args->tmpi,ntmpi);
     return 0;
 }
 static int setter_ARinfo_real(args_t *args, bcf1_t *line, annot_col_t *col, int nals, char **als, int ntmpf)
 {
     if ( col->number==BCF_VL_A && ntmpf!=nals-1 && (ntmpf!=1 || !bcf_float_is_missing(args->tmpf[0]) || !bcf_float_is_vector_end(args->tmpf[0])) )
-        error("Incorrect number of values (%d) for the %s tag at %s:%d\n", ntmpf,col->hdr_key,bcf_seqname(args->hdr,line),line->pos+1);
+        error("Incorrect number of values (%d) for the %s tag at %s:%d\n", ntmpf,col->hdr_key_src,bcf_seqname(args->hdr,line),line->pos+1);
     else if ( col->number==BCF_VL_R && ntmpf!=nals && (ntmpf!=1 || !bcf_float_is_missing(args->tmpf[0]) || !bcf_float_is_vector_end(args->tmpf[0])) )
-        error("Incorrect number of values (%d) for the %s tag at %s:%d\n", ntmpf,col->hdr_key,bcf_seqname(args->hdr,line),line->pos+1);
+        error("Incorrect number of values (%d) for the %s tag at %s:%d\n", ntmpf,col->hdr_key_src,bcf_seqname(args->hdr,line),line->pos+1);
 
     int ndst = col->number==BCF_VL_A ? line->n_allele - 1 : line->n_allele;
     int *map = vcmp_map_ARvalues(args->vcmp,ndst,nals,als,line->n_allele,line->d.allele);
     if ( !map ) error("REF alleles not compatible at %s:%d\n");
 
     // fill in any missing values in the target VCF (or all, if not present)
-    int ntmpf2 = bcf_get_info_float(args->hdr, line, col->hdr_key, &args->tmpf2, &args->mtmpf2);
+    int ntmpf2 = bcf_get_info_float(args->hdr, line, col->hdr_key_dst, &args->tmpf2, &args->mtmpf2);
     if ( ntmpf2 < ndst ) hts_expand(float,ndst,args->mtmpf2,args->tmpf2);
 
     int i;
@@ -591,7 +606,7 @@ static int setter_ARinfo_real(args_t *args, bcf1_t *line, annot_col_t *col, int
 
         args->tmpf2[i] = args->tmpf[ map[i] ];
     }
-    bcf_update_info_float(args->hdr_out,line,col->hdr_key,args->tmpf2,ndst);
+    bcf_update_info_float(args->hdr_out,line,col->hdr_key_dst,args->tmpf2,ndst);
     return 0;
 }
 static int setter_info_real(args_t *args, bcf1_t *line, annot_col_t *col, void *data)
@@ -617,17 +632,17 @@ static int setter_info_real(args_t *args, bcf1_t *line, annot_col_t *col, void *
 
     if ( col->replace==REPLACE_MISSING )
     {
-        int ret = bcf_get_info_float(args->hdr, line, col->hdr_key, &args->tmpf2, &args->mtmpf2);
+        int ret = bcf_get_info_float(args->hdr, line, col->hdr_key_dst, &args->tmpf2, &args->mtmpf2);
         if ( ret>0 && !bcf_float_is_missing(args->tmpf2[0]) ) return 0;
     }
 
-    bcf_update_info_float(args->hdr_out,line,col->hdr_key,args->tmpf,ntmpf);
+    bcf_update_info_float(args->hdr_out,line,col->hdr_key_dst,args->tmpf,ntmpf);
     return 0;
 }
 static int vcf_setter_info_real(args_t *args, bcf1_t *line, annot_col_t *col, void *data)
 {
     bcf1_t *rec = (bcf1_t*) data;
-    int ntmpf = bcf_get_info_float(args->files->readers[1].header,rec,col->hdr_key,&args->tmpf,&args->mtmpf);
+    int ntmpf = bcf_get_info_float(args->files->readers[1].header,rec,col->hdr_key_src,&args->tmpf,&args->mtmpf);
     if ( ntmpf < 0 ) return 0;    // nothing to add
 
     if ( col->number==BCF_VL_A || col->number==BCF_VL_R ) 
@@ -635,11 +650,11 @@ static int vcf_setter_info_real(args_t *args, bcf1_t *line, annot_col_t *col, vo
 
     if ( col->replace==REPLACE_MISSING )
     {
-        int ret = bcf_get_info_float(args->hdr, line, col->hdr_key, &args->tmpf2, &args->mtmpf2);
+        int ret = bcf_get_info_float(args->hdr, line, col->hdr_key_dst, &args->tmpf2, &args->mtmpf2);
         if ( ret>0 && !bcf_float_is_missing(args->tmpf2[0]) ) return 0;
     }
 
-    bcf_update_info_float(args->hdr_out,line,col->hdr_key,args->tmpf,ntmpf);
+    bcf_update_info_float(args->hdr_out,line,col->hdr_key_dst,args->tmpf,ntmpf);
     return 0;
 }
 int copy_string_field(char *src, int isrc, int src_len, kstring_t *dst, int idst); // see vcfmerge.c
@@ -652,9 +667,9 @@ static int setter_ARinfo_string(args_t *args, bcf1_t *line, annot_col_t *col, in
         lsrc++;
     }
     if ( col->number==BCF_VL_A && nsrc!=nals-1 && (nsrc!=1 || args->tmps[0]!='.' || args->tmps[1]!=0 ) )
-        error("Incorrect number of values (%d) for the %s tag at %s:%d\n", nsrc,col->hdr_key,bcf_seqname(args->hdr,line),line->pos+1);
+        error("Incorrect number of values (%d) for the %s tag at %s:%d\n", nsrc,col->hdr_key_src,bcf_seqname(args->hdr,line),line->pos+1);
     else if ( col->number==BCF_VL_R && nsrc!=nals && (nsrc!=1 || args->tmps[0]!='.' || args->tmps[1]!=0 ) )
-        error("Incorrect number of values (%d) for the %s tag at %s:%d\n", nsrc,col->hdr_key,bcf_seqname(args->hdr,line),line->pos+1);
+        error("Incorrect number of values (%d) for the %s tag at %s:%d\n", nsrc,col->hdr_key_src,bcf_seqname(args->hdr,line),line->pos+1);
 
     int ndst = col->number==BCF_VL_A ? line->n_allele - 1 : line->n_allele;
     int *map = vcmp_map_ARvalues(args->vcmp,ndst,nals,als,line->n_allele,line->d.allele);
@@ -662,7 +677,7 @@ static int setter_ARinfo_string(args_t *args, bcf1_t *line, annot_col_t *col, in
 
     // fill in any missing values in the target VCF (or all, if not present)
     int i, empty = 0, nstr, mstr = args->tmpks.m;
-    nstr = bcf_get_info_string(args->hdr, line, col->hdr_key, &args->tmpks.s, &mstr); 
+    nstr = bcf_get_info_string(args->hdr, line, col->hdr_key_dst, &args->tmpks.s, &mstr); 
     args->tmpks.m = mstr;
     if ( nstr<0 || (nstr==1 && args->tmpks.s[0]=='.' && args->tmpks.s[1]==0) )
     {
@@ -695,7 +710,7 @@ static int setter_ARinfo_string(args_t *args, bcf1_t *line, annot_col_t *col, in
         int ret = copy_string_field(args->tmps,map[i],lsrc,&args->tmpks,i);
         assert( ret==0 );
     }
-    bcf_update_info_string(args->hdr_out,line,col->hdr_key,args->tmpks.s);
+    bcf_update_info_string(args->hdr_out,line,col->hdr_key_dst,args->tmpks.s);
     return 0;
 }
 static int setter_info_str(args_t *args, bcf1_t *line, annot_col_t *col, void *data)
@@ -712,17 +727,17 @@ static int setter_info_str(args_t *args, bcf1_t *line, annot_col_t *col, void *d
 
     if ( col->replace==REPLACE_MISSING )
     {
-        int ret = bcf_get_info_string(args->hdr, line, col->hdr_key, &args->tmps2, &args->mtmps2);
+        int ret = bcf_get_info_string(args->hdr, line, col->hdr_key_dst, &args->tmps2, &args->mtmps2);
         if ( ret>0 && (args->tmps2[0]!='.' || args->tmps2[1]!=0) ) return 0;
     }
 
-    bcf_update_info_string(args->hdr_out,line,col->hdr_key,args->tmps);
+    bcf_update_info_string(args->hdr_out,line,col->hdr_key_dst,args->tmps);
     return 0;
 }
 static int vcf_setter_info_str(args_t *args, bcf1_t *line, annot_col_t *col, void *data)
 {
     bcf1_t *rec = (bcf1_t*) data;
-    int ntmps = bcf_get_info_string(args->files->readers[1].header,rec,col->hdr_key,&args->tmps,&args->mtmps);
+    int ntmps = bcf_get_info_string(args->files->readers[1].header,rec,col->hdr_key_src,&args->tmps,&args->mtmps);
     if ( ntmps < 0 ) return 0;    // nothing to add
 
     if ( col->number==BCF_VL_A || col->number==BCF_VL_R ) 
@@ -730,11 +745,11 @@ static int vcf_setter_info_str(args_t *args, bcf1_t *line, annot_col_t *col, voi
 
     if ( col->replace==REPLACE_MISSING )
     {
-        int ret = bcf_get_info_string(args->hdr, line, col->hdr_key, &args->tmps2, &args->mtmps2);
+        int ret = bcf_get_info_string(args->hdr, line, col->hdr_key_dst, &args->tmps2, &args->mtmps2);
         if ( ret>0 && (args->tmps2[0]!='.' || args->tmps2[1]!=0) ) return 0;
     }
 
-    bcf_update_info_string(args->hdr_out,line,col->hdr_key,args->tmps);
+    bcf_update_info_string(args->hdr_out,line,col->hdr_key_dst,args->tmps);
     return 0;
 }
 static int vcf_setter_format_gt(args_t *args, bcf1_t *line, annot_col_t *col, void *data)
@@ -752,7 +767,7 @@ static int vcf_setter_format_gt(args_t *args, bcf1_t *line, annot_col_t *col, vo
     nsrc /= bcf_hdr_nsamples(args->files->readers[1].header);
     if ( ndst<=0 )  // field not present in dst file
     {
-        if ( col->replace==REPLACE_EXISTING ) return 0;
+        if ( col->replace==REPLACE_NON_MISSING ) return 0;
         hts_expand(int32_t, nsrc*bcf_hdr_nsamples(args->hdr_out), args->mtmpi2, args->tmpi2);
         for (i=0; i<bcf_hdr_nsamples(args->hdr_out); i++)
         {
@@ -777,7 +792,7 @@ static int vcf_setter_format_gt(args_t *args, bcf1_t *line, annot_col_t *col, vo
             if ( args->sample_map[i]==-1 ) continue;
             int32_t *src = args->tmpi  + nsrc*args->sample_map[i];
             int32_t *dst = args->tmpi2 + ndst*i;
-            if ( col->replace==REPLACE_EXISTING && bcf_gt_is_missing(dst[0]) ) continue;
+            if ( col->replace==REPLACE_NON_MISSING && bcf_gt_is_missing(dst[0]) ) continue;
             if ( col->replace==REPLACE_MISSING  && !bcf_gt_is_missing(dst[0]) ) continue;
             for (j=0; j<nsrc; j++) dst[j] = src[j];
             for (; j<ndst; j++) dst[j] = bcf_int32_vector_end;
@@ -793,7 +808,7 @@ static int vcf_setter_format_gt(args_t *args, bcf1_t *line, annot_col_t *col, vo
             int32_t *dst = args->tmpi3 + nsrc*i;
             int keep_ori = 0;
             if ( args->sample_map[i]==-1 ) keep_ori = 1;
-            else if ( col->replace==REPLACE_EXISTING && bcf_gt_is_missing(ori[0]) ) keep_ori = 1;
+            else if ( col->replace==REPLACE_NON_MISSING && bcf_gt_is_missing(ori[0]) ) keep_ori = 1;
             else if ( col->replace==REPLACE_MISSING  && !bcf_gt_is_missing(ori[0]) ) keep_ori = 1;
             if ( keep_ori )
             {
@@ -811,7 +826,7 @@ static int vcf_setter_format_gt(args_t *args, bcf1_t *line, annot_col_t *col, vo
 }
 static int count_vals(annot_line_t *tab, int icol_beg, int icol_end)
 {
-    int i, nmax = 0;
+    int i, nmax = 1;
     for (i=icol_beg; i<icol_end; i++)
     {
         char *str = tab->cols[i], *end = str;
@@ -831,298 +846,306 @@ static int count_vals(annot_line_t *tab, int icol_beg, int icol_end)
     }
     return nmax;
 }
-static int setter_format_int(args_t *args, bcf1_t *line, annot_col_t *col, void *data)
-{
-    annot_line_t *tab = (annot_line_t*) data;
-    int nsmpl = bcf_hdr_nsamples(args->hdr_out);
-    assert( col->icol+nsmpl <= tab->ncols );
-    int nvals = count_vals(tab,col->icol,col->icol+nsmpl);
-    assert( nvals>0 );
-    hts_expand(int32_t,nvals*nsmpl,args->mtmpi,args->tmpi);
-
-    int icol = col->icol, ismpl;
-    for (ismpl=0; ismpl<nsmpl; ismpl++)
-    {
-        int32_t *ptr = args->tmpi + ismpl*nvals;
-        int ival = 0;
-
-        char *str = tab->cols[icol];
-        while ( *str )
-        {
-            if ( str[0]=='.' && (!str[1] || str[1]==',') )  // missing value
-            {
-                ptr[ival++] = bcf_int32_missing;
-                str += str[1] ? 2 : 1;
-                continue;
-            }
-
-            char *end = str;
-            ptr[ival] = strtol(str, &end, 10); 
-            if ( end==str )
-                error("Could not parse %s at %s:%d .. [%s]\n", col->hdr_key,bcf_seqname(args->hdr,line),line->pos+1,tab->cols[col->icol]);
-
-            ival++;
-            str = *end ? end+1 : end;
-        }
-        while ( ival<nvals ) ptr[ival++] = bcf_int32_vector_end;
-        icol++;
-    }
-    return bcf_update_format_int32(args->hdr_out,line,col->hdr_key,args->tmpi,nsmpl*nvals);
-}
-static int setter_format_real(args_t *args, bcf1_t *line, annot_col_t *col, void *data)
-{
-    annot_line_t *tab = (annot_line_t*) data;
-    int nsmpl = bcf_hdr_nsamples(args->hdr_out);
-    assert( col->icol+nsmpl <= tab->ncols );
-    int nvals = count_vals(tab,col->icol,col->icol+nsmpl);
-    assert( nvals>0 );
-    hts_expand(float,nvals*nsmpl,args->mtmpf,args->tmpf);
-
-    int icol = col->icol, ismpl;
-    for (ismpl=0; ismpl<nsmpl; ismpl++)
-    {
-        float *ptr = args->tmpf + ismpl*nvals;
-        int ival = 0;
-
-        char *str = tab->cols[icol];
-        while ( *str )
-        {
-            if ( str[0]=='.' && (!str[1] || str[1]==',') )  // missing value
-            {
-                bcf_float_set_missing(ptr[ival]); 
-                ival++;
-                str += str[1] ? 2 : 1;
-                continue;
-            }
-
-            char *end = str;
-            ptr[ival] = strtod(str, &end); 
-            if ( end==str )
-                error("Could not parse %s at %s:%d .. [%s]\n", col->hdr_key,bcf_seqname(args->hdr,line),line->pos+1,tab->cols[col->icol]);
-
-            ival++;
-            str = *end ? end+1 : end;
-        }
-        while ( ival<nvals ) { bcf_float_set_vector_end(ptr[ival]); ival++; }
-        icol++;
-    }
-    return bcf_update_format_float(args->hdr_out,line,col->hdr_key,args->tmpf,nsmpl*nvals);
-}
-static int setter_format_str(args_t *args, bcf1_t *line, annot_col_t *col, void *data)
-{
-    annot_line_t *tab = (annot_line_t*) data;
-    int nsmpl = bcf_hdr_nsamples(args->hdr_out);
-    assert( col->icol+nsmpl <= tab->ncols );
-
-    int i, max_len = 0;
-    for (i=col->icol; i<col->icol+nsmpl; i++)
-    {
-        int len = strlen(tab->cols[i]);
-        if ( max_len < len ) max_len = len;
-    }
-    hts_expand(char,max_len*nsmpl,args->mtmps,args->tmps);
-
-    int icol = col->icol, ismpl;
-    for (ismpl=0; ismpl<nsmpl; ismpl++)
-    {
-        char *ptr = args->tmps + ismpl*max_len;
-        char *str = tab->cols[icol];
-        i = 0;
-        while ( str[i] )
-        {
-            ptr[i] = str[i];
-            i++;
-        }
-        while ( i<max_len ) ptr[i++] = 0;
-        icol++;
-    }
-    return bcf_update_format_char(args->hdr_out,line,col->hdr_key,args->tmps,nsmpl*max_len);
-}
-static int vcf_setter_format_int(args_t *args, bcf1_t *line, annot_col_t *col, void *data)
+static int core_setter_format_int(args_t *args, bcf1_t *line, annot_col_t *col, int32_t *vals, int nvals)
 {
-    bcf1_t *rec = (bcf1_t*) data;
-    int nsrc = bcf_get_format_int32(args->files->readers[1].header,rec,col->hdr_key,&args->tmpi,&args->mtmpi);
-    if ( nsrc==-3 ) return 0;    // the tag is not present
-    if ( nsrc<=0 ) return 1;     // error
-
     if ( !args->sample_map )
-        return bcf_update_format_int32(args->hdr_out,line,col->hdr_key,args->tmpi,nsrc);
+        return bcf_update_format_int32(args->hdr_out,line,col->hdr_key_dst,vals,nvals*args->nsmpl_annot);
 
-    int i, j, ndst = bcf_get_format_int32(args->hdr,line,col->hdr_key,&args->tmpi2,&args->mtmpi2);
+    int i, j, ndst = bcf_get_format_int32(args->hdr,line,col->hdr_key_dst,&args->tmpi2,&args->mtmpi2);
     if ( ndst > 0 ) ndst /= bcf_hdr_nsamples(args->hdr_out);
-    nsrc /= bcf_hdr_nsamples(args->files->readers[1].header);
     if ( ndst<=0 )
     {
-        if ( col->replace==REPLACE_EXISTING ) return 0;    // overwrite only if present
-        hts_expand(int32_t, nsrc*bcf_hdr_nsamples(args->hdr_out), args->mtmpi2, args->tmpi2);
+        if ( col->replace==REPLACE_NON_MISSING ) return 0;    // overwrite only if present
+        hts_expand(int32_t, nvals*bcf_hdr_nsamples(args->hdr_out), args->mtmpi2, args->tmpi2);
         for (i=0; i<bcf_hdr_nsamples(args->hdr_out); i++)
         {
-            int32_t *dst = args->tmpi2 + nsrc*i;
+            int32_t *dst = args->tmpi2 + nvals*i;
             if ( args->sample_map[i]==-1 )
             {
                 dst[0] = bcf_int32_missing;
-                for (j=1; j<nsrc; j++) dst[j] = bcf_int32_vector_end;
+                for (j=1; j<nvals; j++) dst[j] = bcf_int32_vector_end;
             }
             else
             {
-                int32_t *src = args->tmpi + nsrc*args->sample_map[i];
-                for (j=0; j<nsrc; j++) dst[j] = src[j];
+                int32_t *src = vals + nvals*args->sample_map[i];
+                for (j=0; j<nvals; j++) dst[j] = src[j];
             }
         }
-        return bcf_update_format_int32(args->hdr_out,line,col->hdr_key,args->tmpi2,nsrc*bcf_hdr_nsamples(args->hdr_out));
+        return bcf_update_format_int32(args->hdr_out,line,col->hdr_key_dst,args->tmpi2,nvals*bcf_hdr_nsamples(args->hdr_out));
     }
-    else if ( ndst >= nsrc )     
+    else if ( ndst >= nvals )     
     {
         for (i=0; i<bcf_hdr_nsamples(args->hdr_out); i++)
         {
             if ( args->sample_map[i]==-1 ) continue;
-            int32_t *src = args->tmpi  + nsrc*args->sample_map[i];
+            int32_t *src = vals  + nvals*args->sample_map[i];
             int32_t *dst = args->tmpi2 + ndst*i;
-            if ( col->replace==REPLACE_EXISTING && dst[0]==bcf_int32_missing ) continue;
-            if ( col->replace==REPLACE_MISSING  && dst[0]!=bcf_int32_missing ) continue;
-            for (j=0; j<nsrc; j++) dst[j] = src[j];
+            // possible cases:
+            //      in annot out
+            //       x  y     x     TAG,-TAG,=TAG    .. REPLACE_ALL, REPLACE_NON_MISSING, SET_OR_APPEND
+            //       x  y     y    +TAG              .. REPLACE_MISSING
+            //       .  y     .    =TAG              .. SET_OR_APPEND
+            //       .  y     y     TAG,+TAG,-TAG    .. REPLACE_ALL, REPLACE_MISSING, REPLACE_NON_MISSING
+            //       x  .     x     TAG,+TAG         .. REPLACE_ALL, REPLACE_MISSING
+            //       x  .     .    -TAG              .. REPLACE_NON_MISSING
+            if ( col->replace==REPLACE_NON_MISSING ) { if ( dst[0]==bcf_int32_missing ) continue; } 
+            else if ( col->replace==REPLACE_MISSING ) { if ( dst[0]!=bcf_int32_missing ) continue; }
+            else if ( col->replace==REPLACE_ALL ) { if ( src[0]==bcf_int32_missing ) continue; }
+            for (j=0; j<nvals; j++) dst[j] = src[j];
             for (; j<ndst; j++) dst[j] = bcf_int32_vector_end;
         }
-        return bcf_update_format_int32(args->hdr_out,line,col->hdr_key,args->tmpi2,ndst*bcf_hdr_nsamples(args->hdr_out));
+        return bcf_update_format_int32(args->hdr_out,line,col->hdr_key_dst,args->tmpi2,ndst*bcf_hdr_nsamples(args->hdr_out));
     }
-    else    // ndst < nsrc
+    else    // ndst < nvals
     {
-        hts_expand(int32_t, nsrc*bcf_hdr_nsamples(args->hdr_out), args->mtmpi3, args->tmpi3);
+        hts_expand(int32_t, nvals*bcf_hdr_nsamples(args->hdr_out), args->mtmpi3, args->tmpi3);
         for (i=0; i<bcf_hdr_nsamples(args->hdr_out); i++)
         {
-            int32_t *ori = args->tmpi2 + ndst*i;
-            int32_t *dst = args->tmpi3 + nsrc*i;
-            int keep_ori = 0;
-            if ( args->sample_map[i]==-1 ) keep_ori = 1;
-            else if ( col->replace==REPLACE_EXISTING && ori[0]==bcf_int32_missing ) keep_ori = 1;
-            else if ( col->replace==REPLACE_MISSING  && ori[0]!=bcf_int32_missing ) keep_ori = 1;
-            if ( keep_ori )
+            int32_t *ann = vals + nvals*args->sample_map[i];
+            int32_t *ori = args->tmpi2 + ndst*i;                // ori vcf line
+            int32_t *dst = args->tmpi3 + nvals*i;               // expanded buffer
+            int use_new_ann = 1;
+            if ( args->sample_map[i]==-1 ) use_new_ann = 0;
+            else if ( col->replace==REPLACE_NON_MISSING ) { if ( ori[0]==bcf_int32_missing ) use_new_ann = 0; }
+            else if ( col->replace==REPLACE_MISSING ) { if ( ori[0]!=bcf_int32_missing ) use_new_ann = 0; }
+            else if ( col->replace==REPLACE_ALL ) { if ( ann[0]==bcf_int32_missing ) use_new_ann = 0; }
+            if ( !use_new_ann )
             {
                 for (j=0; j<ndst; j++) dst[j] = ori[j];
-                for (; j<nsrc; j++) dst[j] = bcf_int32_vector_end;
+                for (; j<nvals; j++) dst[j] = bcf_int32_vector_end;
             }
             else
-            {
-                int32_t *src = args->tmpi + nsrc*args->sample_map[i];
-                for (j=0; j<nsrc; j++) dst[j] = src[j];
-            }
+                for (j=0; j<nvals; j++) dst[j] = ann[j];
         }
-        return bcf_update_format_int32(args->hdr_out,line,col->hdr_key,args->tmpi3,nsrc*bcf_hdr_nsamples(args->hdr_out));
+        return bcf_update_format_int32(args->hdr_out,line,col->hdr_key_dst,args->tmpi3,nvals*bcf_hdr_nsamples(args->hdr_out));
     }
 }
-static int vcf_setter_format_real(args_t *args, bcf1_t *line, annot_col_t *col, void *data)
+static int core_setter_format_real(args_t *args, bcf1_t *line, annot_col_t *col, float *vals, int nvals)
 {
-    bcf1_t *rec = (bcf1_t*) data;
-    int nsrc = bcf_get_format_float(args->files->readers[1].header,rec,col->hdr_key,&args->tmpf,&args->mtmpf);
-    if ( nsrc==-3 ) return 0;    // the tag is not present
-    if ( nsrc<=0 ) return 1;     // error
-
     if ( !args->sample_map )
-        return bcf_update_format_float(args->hdr_out,line,col->hdr_key,args->tmpf,nsrc);
+        return bcf_update_format_float(args->hdr_out,line,col->hdr_key_dst,vals,nvals*args->nsmpl_annot);
 
-    int i, j, ndst = bcf_get_format_float(args->hdr,line,col->hdr_key,&args->tmpf2,&args->mtmpf2);
+    int i, j, ndst = bcf_get_format_float(args->hdr,line,col->hdr_key_dst,&args->tmpf2,&args->mtmpf2);
     if ( ndst > 0 ) ndst /= bcf_hdr_nsamples(args->hdr_out);
-    nsrc /= bcf_hdr_nsamples(args->files->readers[1].header);
     if ( ndst<=0 )
     {
-        if ( col->replace==REPLACE_EXISTING ) return 0;    // overwrite only if present
-        hts_expand(float, nsrc*bcf_hdr_nsamples(args->hdr_out), args->mtmpf2, args->tmpf2);
+        if ( col->replace==REPLACE_NON_MISSING ) return 0;    // overwrite only if present
+        hts_expand(float, nvals*bcf_hdr_nsamples(args->hdr_out), args->mtmpf2, args->tmpf2);
         for (i=0; i<bcf_hdr_nsamples(args->hdr_out); i++)
         {
-            float *dst = args->tmpf2 + nsrc*i;
+            float *dst = args->tmpf2 + nvals*i;
             if ( args->sample_map[i]==-1 )
             {
                 bcf_float_set_missing(dst[0]);
-                for (j=1; j<nsrc; j++) bcf_float_set_vector_end(dst[j]);
+                for (j=1; j<nvals; j++) bcf_float_set_vector_end(dst[j]);
             }
             else
             {
-                float *src = args->tmpf + nsrc*args->sample_map[i];
-                for (j=0; j<nsrc; j++) dst[j] = src[j];
+                float *src = vals + nvals*args->sample_map[i];
+                for (j=0; j<nvals; j++) dst[j] = src[j];
             }
         }
-        return bcf_update_format_float(args->hdr_out,line,col->hdr_key,args->tmpf2,nsrc*bcf_hdr_nsamples(args->hdr_out));
+        return bcf_update_format_float(args->hdr_out,line,col->hdr_key_dst,args->tmpf2,nvals*bcf_hdr_nsamples(args->hdr_out));
     }
-    else if ( ndst >= nsrc )     
+    else if ( ndst >= nvals )     
     {
         for (i=0; i<bcf_hdr_nsamples(args->hdr_out); i++)
         {
             if ( args->sample_map[i]==-1 ) continue;
-            float *src = args->tmpf  + nsrc*args->sample_map[i];
+            float *src = vals  + nvals*args->sample_map[i];
             float *dst = args->tmpf2 + ndst*i;
-            if ( col->replace==REPLACE_EXISTING && bcf_float_is_missing(dst[0]) ) continue;
-            if ( col->replace==REPLACE_MISSING  && !bcf_float_is_missing(dst[0]) ) continue;
-            for (j=0; j<nsrc; j++) dst[j] = src[j];
+            if ( col->replace==REPLACE_NON_MISSING ) { if ( bcf_float_is_missing(dst[0]) ) continue; } 
+            else if ( col->replace==REPLACE_MISSING ) { if ( !bcf_float_is_missing(dst[0]) ) continue; }
+            else if ( col->replace==REPLACE_ALL ) { if ( bcf_float_is_missing(src[0]) ) continue; }
+            for (j=0; j<nvals; j++) dst[j] = src[j];
             for (; j<ndst; j++) bcf_float_set_vector_end(dst[j]);
         }
-        return bcf_update_format_float(args->hdr_out,line,col->hdr_key,args->tmpf2,ndst*bcf_hdr_nsamples(args->hdr_out));
+        return bcf_update_format_float(args->hdr_out,line,col->hdr_key_dst,args->tmpf2,ndst*bcf_hdr_nsamples(args->hdr_out));
     }
-    else    // ndst < nsrc
+    else    // ndst < nvals
     {
-        hts_expand(float, nsrc*bcf_hdr_nsamples(args->hdr_out), args->mtmpf3, args->tmpf3);
+        hts_expand(float, nvals*bcf_hdr_nsamples(args->hdr_out), args->mtmpf3, args->tmpf3);
         for (i=0; i<bcf_hdr_nsamples(args->hdr_out); i++)
         {
-            float *ori = args->tmpf2 + ndst*i;
-            float *dst = args->tmpf3 + nsrc*i;
-            int keep_ori = 0;
-            if ( args->sample_map[i]==-1 ) keep_ori = 1;
-            else if ( col->replace==REPLACE_EXISTING && bcf_float_is_missing(ori[0]) ) keep_ori = 1;
-            else if ( col->replace==REPLACE_MISSING  && !bcf_float_is_missing(ori[0]) ) keep_ori = 1;
-            if ( keep_ori )
+            float *ann = vals + nvals*args->sample_map[i];
+            float *ori = args->tmpf2 + ndst*i;                // ori vcf line
+            float *dst = args->tmpf3 + nvals*i;               // expanded buffer
+            int use_new_ann = 1;
+            if ( args->sample_map[i]==-1 ) use_new_ann = 0;
+            else if ( col->replace==REPLACE_NON_MISSING ) { if ( bcf_float_is_missing(ori[0]) ) use_new_ann = 0; }
+            else if ( col->replace==REPLACE_MISSING ) { if ( !bcf_float_is_missing(ori[0]) ) use_new_ann = 0; }
+            else if ( col->replace==REPLACE_ALL ) { if ( bcf_float_is_missing(ann[0]) ) use_new_ann = 0; }
+            if ( !use_new_ann )
             {
                 for (j=0; j<ndst; j++) dst[j] = ori[j];
-                for (; j<nsrc; j++) bcf_float_set_vector_end(dst[j]);
+                for (; j<nvals; j++) bcf_float_set_vector_end(dst[j]);
             }
             else
-            {
-                float *src = args->tmpf + nsrc*args->sample_map[i];
-                for (j=0; j<nsrc; j++) dst[j] = src[j];
-            }
+                for (j=0; j<nvals; j++) dst[j] = ann[j];
         }
-        return bcf_update_format_float(args->hdr_out,line,col->hdr_key,args->tmpf3,nsrc*bcf_hdr_nsamples(args->hdr_out));
+        return bcf_update_format_float(args->hdr_out,line,col->hdr_key_dst,args->tmpf3,nvals*bcf_hdr_nsamples(args->hdr_out));
     }
 }
-static int vcf_setter_format_str(args_t *args, bcf1_t *line, annot_col_t *col, void *data)
+static int core_setter_format_str(args_t *args, bcf1_t *line, annot_col_t *col, char **vals)
 {
-    bcf1_t *rec = (bcf1_t*) data;
-    args->tmpp[0] = args->tmps;
-    int ret = bcf_get_format_string(args->files->readers[1].header,rec,col->hdr_key,&args->tmpp,&args->mtmps);
-    args->tmps = args->tmpp[0]; // tmps might be realloced
-    if ( ret==-3 ) return 0;    // the tag is not present
-    if ( ret<=0 ) return 1;     // error
-
     if ( !args->sample_map )
-        return bcf_update_format_string(args->hdr_out,line,col->hdr_key,(const char**)args->tmpp,bcf_hdr_nsamples(args->hdr_out));
+        return bcf_update_format_string(args->hdr_out,line,col->hdr_key_dst,(const char**)vals,args->nsmpl_annot);
 
     int i;
     args->tmpp2[0] = args->tmps2;
-    ret = bcf_get_format_string(args->hdr,line,col->hdr_key,&args->tmpp2,&args->mtmps2);
+    int ret = bcf_get_format_string(args->hdr,line,col->hdr_key_dst,&args->tmpp2,&args->mtmps2);
     args->tmps2 = args->tmpp2[0];   // tmps2 might be realloced
 
+    int nsmpl = bcf_hdr_nsamples(args->hdr_out);
     if ( ret<=0 )   // not present in dst
     {
         hts_expand(char,bcf_hdr_nsamples(args->hdr_out)*2,args->mtmps2,args->tmps2);
-        for (i=0; i<bcf_hdr_nsamples(args->hdr_out); i++)
+        char *tmp = args->tmps2;
+        for (i=0; i<nsmpl; i++)
         {
-            args->tmps2[2*i]   = '.';
-            args->tmps2[2*i+1] = 0;
-            args->tmpp2[i] = args->tmps2+2*i;
+            tmp[0] = '.'; 
+            tmp[1] = 0;
+            args->tmpp2[i] = tmp;
+            tmp += 2;
         }
     }
+    for (i=0; i<nsmpl; i++)
+    {
+        if ( args->sample_map[i]==-1 ) continue;
+        char **src = vals + args->sample_map[i];
+        char **dst = args->tmpp2 + i;
+
+        if ( col->replace==REPLACE_NON_MISSING ) { if ( (*dst)[0]=='.' && (*dst)[1]==0 ) continue; } 
+        else if ( col->replace==REPLACE_MISSING ) { if ( (*dst)[0]!='.' || (*dst)[1]!=0 ) continue; }
+        else if ( col->replace==REPLACE_ALL ) { if ( (*src)[0]=='.' && (*src)[1]==0 ) continue; }
+        *dst = *src;
+    }
+    return bcf_update_format_string(args->hdr_out,line,col->hdr_key_dst,(const char**)args->tmpp2,nsmpl);
+}
+static int setter_format_int(args_t *args, bcf1_t *line, annot_col_t *col, void *data)
+{
+    annot_line_t *tab = (annot_line_t*) data;
+    if ( col->icol+args->nsmpl_annot > tab->ncols ) 
+        error("Incorrect number of values for %s at %s:%d\n",col->hdr_key_src,bcf_seqname(args->hdr,line),line->pos+1);
+    int nvals = count_vals(tab,col->icol,col->icol+args->nsmpl_annot);
+    hts_expand(int32_t,nvals*args->nsmpl_annot,args->mtmpi,args->tmpi);
+
+    int icol = col->icol, ismpl;
+    for (ismpl=0; ismpl<args->nsmpl_annot; ismpl++)
+    {
+        int32_t *ptr = args->tmpi + ismpl*nvals;
+        int ival = 0;
+
+        char *str = tab->cols[icol];
+        while ( *str )
+        {
+            if ( str[0]=='.' && (!str[1] || str[1]==',') )  // missing value
+            {
+                ptr[ival++] = bcf_int32_missing;
+                str += str[1] ? 2 : 1;
+                continue;
+            }
+
+            char *end = str;
+            ptr[ival] = strtol(str, &end, 10); 
+            if ( end==str )
+                error("Could not parse %s at %s:%d .. [%s]\n", col->hdr_key_src,bcf_seqname(args->hdr,line),line->pos+1,tab->cols[col->icol]);
+
+            ival++;
+            str = *end ? end+1 : end;
+        }
+        while ( ival<nvals ) ptr[ival++] = bcf_int32_vector_end;
+        icol++;
+    }
+    return core_setter_format_int(args,line,col,args->tmpi,nvals);
+}
+static int setter_format_real(args_t *args, bcf1_t *line, annot_col_t *col, void *data)
+{
+    annot_line_t *tab = (annot_line_t*) data;
+    if ( col->icol+args->nsmpl_annot > tab->ncols ) 
+        error("Incorrect number of values for %s at %s:%d\n",col->hdr_key_src,bcf_seqname(args->hdr,line),line->pos+1);
+    int nvals = count_vals(tab,col->icol,col->icol+args->nsmpl_annot);
+    hts_expand(float,nvals*args->nsmpl_annot,args->mtmpf,args->tmpf);
 
-    for (i=0; i<bcf_hdr_nsamples(args->hdr_out); i++)
+    int icol = col->icol, ismpl;
+    for (ismpl=0; ismpl<args->nsmpl_annot; ismpl++)
     {
-        int isrc = args->sample_map[i];
-        if ( isrc==-1 ) continue;
-        args->tmpp2[i] = args->tmpp[isrc];
+        float *ptr = args->tmpf + ismpl*nvals;
+        int ival = 0;
+
+        char *str = tab->cols[icol];
+        while ( *str )
+        {
+            if ( str[0]=='.' && (!str[1] || str[1]==',') )  // missing value
+            {
+                bcf_float_set_missing(ptr[ival]); 
+                ival++;
+                str += str[1] ? 2 : 1;
+                continue;
+            }
+
+            char *end = str;
+            ptr[ival] = strtod(str, &end); 
+            if ( end==str )
+                error("Could not parse %s at %s:%d .. [%s]\n", col->hdr_key_src,bcf_seqname(args->hdr,line),line->pos+1,tab->cols[col->icol]);
+
+            ival++;
+            str = *end ? end+1 : end;
+        }
+        while ( ival<nvals ) { bcf_float_set_vector_end(ptr[ival]); ival++; }
+        icol++;
     }
-    return bcf_update_format_string(args->hdr_out,line,col->hdr_key,(const char**)args->tmpp2,bcf_hdr_nsamples(args->hdr_out));
+    return core_setter_format_real(args,line,col,args->tmpf,nvals);
 }
-static void set_samples(args_t *args, bcf_hdr_t *src, bcf_hdr_t *dst, int need_samples)
+static int setter_format_str(args_t *args, bcf1_t *line, annot_col_t *col, void *data)
+{
+    annot_line_t *tab = (annot_line_t*) data;
+    if ( col->icol+args->nsmpl_annot > tab->ncols ) 
+        error("Incorrect number of values for %s at %s:%d\n",col->hdr_key_src,bcf_seqname(args->hdr,line),line->pos+1);
+
+    int ismpl;
+    for (ismpl=0; ismpl<args->nsmpl_annot; ismpl++)
+        args->tmpp[ismpl] = tab->cols[col->icol + ismpl];
+
+    return core_setter_format_str(args,line,col,args->tmpp);
+}
+static int vcf_setter_format_int(args_t *args, bcf1_t *line, annot_col_t *col, void *data)
+{
+    bcf1_t *rec = (bcf1_t*) data;
+    int nsrc = bcf_get_format_int32(args->files->readers[1].header,rec,col->hdr_key_src,&args->tmpi,&args->mtmpi);
+    if ( nsrc==-3 ) return 0;    // the tag is not present
+    if ( nsrc<=0 ) return 1;     // error
+    return core_setter_format_int(args,line,col,args->tmpi,nsrc/bcf_hdr_nsamples(args->files->readers[1].header));
+}
+static int vcf_setter_format_real(args_t *args, bcf1_t *line, annot_col_t *col, void *data)
+{
+    bcf1_t *rec = (bcf1_t*) data;
+    int nsrc = bcf_get_format_float(args->files->readers[1].header,rec,col->hdr_key_src,&args->tmpf,&args->mtmpf);
+    if ( nsrc==-3 ) return 0;    // the tag is not present
+    if ( nsrc<=0 ) return 1;     // error
+    return core_setter_format_real(args,line,col,args->tmpf,nsrc/bcf_hdr_nsamples(args->files->readers[1].header));
+}
+
+static int vcf_setter_format_str(args_t *args, bcf1_t *line, annot_col_t *col, void *data)
+{
+    bcf1_t *rec = (bcf1_t*) data;
+    args->tmpp[0] = args->tmps;
+    int ret = bcf_get_format_string(args->files->readers[1].header,rec,col->hdr_key_src,&args->tmpp,&args->mtmps);
+    args->tmps = args->tmpp[0]; // tmps might be realloced
+    if ( ret==-3 ) return 0;    // the tag is not present
+    if ( ret<=0 ) return 1;     // error
+    return core_setter_format_str(args,line,col,args->tmpp);
+}
+static int init_sample_map(args_t *args, bcf_hdr_t *src, bcf_hdr_t *dst)
 {
     int i;
     if ( !args->sample_names )
     {
+        args->nsmpl_annot = bcf_hdr_nsamples(dst);
+
+        // tab annotation file, expecting that all samples are present: sample map not needed
+        if ( !src ) return 0;
+
         int nmatch = 0, order_ok = 1;
         for (i=0; i<bcf_hdr_nsamples(src); i++)
         {
@@ -1133,11 +1156,8 @@ static void set_samples(args_t *args, bcf_hdr_t *src, bcf_hdr_t *dst, int need_s
                 if ( i!=id ) order_ok = 0;
             }
         }
-        if ( bcf_hdr_nsamples(src)==bcf_hdr_nsamples(dst) && nmatch==bcf_hdr_nsamples(src) && order_ok && !need_samples ) 
-            return;    // the same samples in both files
-
-        if ( !nmatch ) error("No matching samples found in the source and the destination file\n");
-        if ( nmatch!=bcf_hdr_nsamples(src) || nmatch!=bcf_hdr_nsamples(dst) ) fprintf(stderr,"%d sample(s) in common\n", nmatch);
+        if ( bcf_hdr_nsamples(src)==bcf_hdr_nsamples(dst) && nmatch==bcf_hdr_nsamples(src) && order_ok ) return 0;  // not needed
+        if ( !nmatch ) return -1;   // No matching samples found in the source and the destination file
 
         args->nsample_map = bcf_hdr_nsamples(dst);
         args->sample_map  = (int*) malloc(sizeof(int)*args->nsample_map);
@@ -1146,46 +1166,70 @@ static void set_samples(args_t *args, bcf_hdr_t *src, bcf_hdr_t *dst, int need_s
             int id = bcf_hdr_id2int(src, BCF_DT_SAMPLE, dst->samples[i]);
             args->sample_map[i] = id;   // idst -> isrc, -1 if not present
         }
-        return;
+        return 1;
     }
 
     args->nsample_map = bcf_hdr_nsamples(dst);
     args->sample_map  = (int*) malloc(sizeof(int)*args->nsample_map);
     for (i=0; i<args->nsample_map; i++) args->sample_map[i] = -1;
 
-    int nsamples = 0;
-    char **samples = hts_readlist(args->sample_names, args->sample_is_file, &nsamples);
-    for (i=0; i<nsamples; i++)
+    // possible todo: could do with smpl_ilist only
+    smpl_ilist_t *ilist = smpl_ilist_init(dst, args->sample_names, args->sample_is_file, SMPL_STRICT);
+    if ( !ilist || !ilist->n ) error("Could not parse: %s\n", args->sample_names);
+    char **samples = (char**) malloc(sizeof(char*)*ilist->n);
+    for (i=0; i<ilist->n; i++) samples[i] = strdup(dst->samples[i]);
+    args->nsmpl_annot = ilist->n;
+    smpl_ilist_destroy(ilist);
+    int need_sample_map = args->nsmpl_annot==bcf_hdr_nsamples(dst) ? 0 : 1;
+    if ( !src )
     {
-        int isrc, idst;
-        char *ss = samples[i], *se = samples[i];
-        while ( *se && !isspace(*se) ) se++;
-        if ( !*se ) 
+        // tab annotation file
+        for (i=0; i<args->nsmpl_annot; i++)
+        {
+            int idst = bcf_hdr_id2int(dst, BCF_DT_SAMPLE, samples[i]);
+            if ( idst==-1 ) error("Sample \"%s\" not found in the destination file\n", samples[i]);
+            args->sample_map[idst] = i;
+            if ( idst!=i ) need_sample_map = 1;
+        }
+    }
+    else
+    {
+        // vcf annotation file
+        for (i=0; i<args->nsmpl_annot; i++)
         {
-            // only one sample name
+            int isrc, idst;
+            char *ss = samples[i], *se = samples[i];
+            while ( *se && !isspace(*se) ) se++;
+            if ( !*se ) 
+            {
+                // only one sample name
+                isrc = bcf_hdr_id2int(src, BCF_DT_SAMPLE,ss);
+                if ( isrc==-1 ) error("Sample \"%s\" not found in the source file\n", ss);
+                idst = bcf_hdr_id2int(dst, BCF_DT_SAMPLE,ss);
+                if ( idst==-1 ) error("Sample \"%s\" not found in the destination file\n", ss);
+                args->sample_map[idst] = isrc;
+                if ( idst!=isrc ) need_sample_map = 1;
+                continue;
+            }
+            *se = 0;
             isrc = bcf_hdr_id2int(src, BCF_DT_SAMPLE,ss);
             if ( isrc==-1 ) error("Sample \"%s\" not found in the source file\n", ss);
+
+            ss = se+1;
+            while ( isspace(*ss) ) ss++;
+            se = ss;
+            while ( *se && !isspace(*se) ) se++;
+
             idst = bcf_hdr_id2int(dst, BCF_DT_SAMPLE,ss);
             if ( idst==-1 ) error("Sample \"%s\" not found in the destination file\n", ss);
+
             args->sample_map[idst] = isrc;
-            continue;
+            if ( idst!=isrc ) need_sample_map = 1;
         }
-        *se = 0;
-        isrc = bcf_hdr_id2int(src, BCF_DT_SAMPLE,ss);
-        if ( isrc==-1 ) error("Sample \"%s\" not found in the source file\n", ss);
-
-        ss = se+1;
-        while ( isspace(*ss) ) ss++;
-        se = ss;
-        while ( *se && !isspace(*se) ) se++;
-
-        idst = bcf_hdr_id2int(dst, BCF_DT_SAMPLE,ss);
-        if ( idst==-1 ) error("Sample \"%s\" not found in the destination file\n", ss);
-
-        args->sample_map[idst] = isrc;
     }
-    for (i=0; i<nsamples; i++) free(samples[i]);
+    for (i=0; i<args->nsmpl_annot; i++) free(samples[i]);
     free(samples);
+    return need_sample_map;
 }
 static char *columns_complement(char *columns, void **skip_info, void **skip_fmt)
 {
@@ -1247,8 +1291,27 @@ static char *columns_complement(char *columns, void **skip_info, void **skip_fmt
     free(columns);
     return str.s;
 }
+static void bcf_hrec_format_rename(bcf_hrec_t *hrec, char *tag, kstring_t *str)
+{
+    int j, nout = 0;
+    ksprintf(str, "##%s=<", hrec->key);
+    for (j=0; j<hrec->nkeys; j++)
+    {
+        if ( !strcmp("IDX",hrec->keys[j]) ) continue;
+        if ( nout ) kputc(',',str);
+        if ( !strcmp("ID", hrec->keys[j]) )
+            ksprintf(str,"%s=%s", hrec->keys[j], tag);
+        else
+            ksprintf(str,"%s=%s", hrec->keys[j], hrec->vals[j]);
+        nout++;
+    }
+    ksprintf(str,">\n");
+}
 static void init_columns(args_t *args)
 {
+    int need_sample_map = 0;
+    int sample_map_ok = init_sample_map(args, args->tgts_is_vcf?args->files->readers[1].header:NULL, args->hdr);
+
     void *skip_fmt = NULL, *skip_info = NULL;
     if ( args->tgts_is_vcf )
         args->columns = columns_complement(args->columns, &skip_info, &skip_fmt);
@@ -1256,13 +1319,13 @@ static void init_columns(args_t *args)
     kstring_t str = {0,0,0}, tmp = {0,0,0};
     char *ss = args->columns, *se = ss;
     args->ncols = 0;
-    int icol = -1, has_fmt_str = 0, force_samples = -1;
+    int icol = -1, has_fmt_str = 0;
     while ( *ss )
     {
         if ( *se && *se!=',' ) { se++; continue; }
         int replace = REPLACE_ALL;
         if ( *ss=='+' ) { replace = REPLACE_MISSING; ss++; }
-        else if ( *ss=='-' ) { replace = REPLACE_EXISTING; ss++; }
+        else if ( *ss=='-' ) { replace = REPLACE_NON_MISSING; ss++; }
         else if ( *ss=='=' ) { replace = SET_OR_APPEND; ss++; }
         icol++;
         str.l = 0;
@@ -1276,23 +1339,25 @@ static void init_columns(args_t *args)
         else if ( !strcasecmp("ALT",str.s) ) args->alt_idx = icol;
         else if ( !strcasecmp("ID",str.s) )
         {
-            if ( replace==REPLACE_EXISTING ) error("Apologies, the -ID feature has not been implemented yet.\n");
+            if ( replace==REPLACE_NON_MISSING ) error("Apologies, the -ID feature has not been implemented yet.\n");
             args->ncols++; args->cols = (annot_col_t*) realloc(args->cols,sizeof(annot_col_t)*args->ncols);
             annot_col_t *col = &args->cols[args->ncols-1];
             col->icol = icol;
             col->replace = replace;
             col->setter = args->tgts_is_vcf ? vcf_setter_id : setter_id;
-            col->hdr_key = strdup(str.s);
+            col->hdr_key_src = strdup(str.s);
+            col->hdr_key_dst = strdup(str.s);
         }
         else if ( !strcasecmp("FILTER",str.s) )
         {
-            if ( replace==REPLACE_EXISTING ) error("Apologies, the -FILTER feature has not been implemented yet.\n");
+            if ( replace==REPLACE_NON_MISSING ) error("Apologies, the -FILTER feature has not been implemented yet.\n");
             args->ncols++; args->cols = (annot_col_t*) realloc(args->cols,sizeof(annot_col_t)*args->ncols);
             annot_col_t *col = &args->cols[args->ncols-1];
             col->icol = icol;
             col->replace = replace;
             col->setter = args->tgts_is_vcf ? vcf_setter_filter : setter_filter;
-            col->hdr_key = strdup(str.s);
+            col->hdr_key_src = strdup(str.s);
+            col->hdr_key_dst = strdup(str.s);
             if ( args->tgts_is_vcf )
             {
                 bcf_hdr_t *tgts_hdr = args->files->readers[1].header;
@@ -1312,18 +1377,19 @@ static void init_columns(args_t *args)
         }
         else if ( !strcasecmp("QUAL",str.s) )
         {
-            if ( replace==REPLACE_EXISTING ) error("Apologies, the -QUAL feature has not been implemented yet.\n");
+            if ( replace==REPLACE_NON_MISSING ) error("Apologies, the -QUAL feature has not been implemented yet.\n");
             if ( replace==SET_OR_APPEND ) error("Apologies, the =QUAL feature has not been implemented yet.\n");
             args->ncols++; args->cols = (annot_col_t*) realloc(args->cols,sizeof(annot_col_t)*args->ncols);
             annot_col_t *col = &args->cols[args->ncols-1];
             col->icol = icol;
             col->replace = replace;
             col->setter = args->tgts_is_vcf ? vcf_setter_qual : setter_qual;
-            col->hdr_key = strdup(str.s);
+            col->hdr_key_src = strdup(str.s);
+            col->hdr_key_dst = strdup(str.s);
         }
         else if ( args->tgts_is_vcf && !strcasecmp("INFO",str.s) ) // All INFO fields
         {
-            if ( replace==REPLACE_EXISTING ) error("Apologies, the -INFO/TAG feature has not been implemented yet.\n");
+            if ( replace==REPLACE_NON_MISSING ) error("Apologies, the -INFO/TAG feature has not been implemented yet.\n");
             if ( replace==SET_OR_APPEND ) error("Apologies, the =INFO/TAG feature has not been implemented yet.\n");
             bcf_hdr_t *tgts_hdr = args->files->readers[1].header;
             int j;
@@ -1343,7 +1409,8 @@ static void init_columns(args_t *args)
                 annot_col_t *col = &args->cols[args->ncols-1];
                 col->icol = -1;
                 col->replace = replace;
-                col->hdr_key = strdup(hrec->vals[k]);
+                col->hdr_key_src = strdup(hrec->vals[k]);
+                col->hdr_key_dst = strdup(hrec->vals[k]);
                 col->number  = bcf_hdr_id2length(args->hdr_out,BCF_HL_INFO,hdr_id);
                 switch ( bcf_hdr_id2type(args->hdr_out,BCF_HL_INFO,hdr_id) )
                 {
@@ -1358,8 +1425,7 @@ static void init_columns(args_t *args)
         else if ( args->tgts_is_vcf && (!strcasecmp("FORMAT",str.s) || !strcasecmp("FMT",str.s)) ) // All FORMAT fields
         {
             bcf_hdr_t *tgts_hdr = args->files->readers[1].header;
-            if ( force_samples<0 ) force_samples = replace;
-            if ( force_samples>=0 && replace!=REPLACE_ALL ) force_samples = replace;
+            need_sample_map = 1;
             int j;
             for (j=0; j<tgts_hdr->nhrec; j++)
             {
@@ -1377,8 +1443,9 @@ static void init_columns(args_t *args)
                 annot_col_t *col = &args->cols[args->ncols-1];
                 col->icol = -1;
                 col->replace = replace;
-                col->hdr_key = strdup(hrec->vals[k]);
-                if ( !strcasecmp("GT",col->hdr_key) ) col->setter = vcf_setter_format_gt;
+                col->hdr_key_src = strdup(hrec->vals[k]);
+                col->hdr_key_dst = strdup(hrec->vals[k]);
+                if ( !strcasecmp("GT",col->hdr_key_src) ) col->setter = vcf_setter_format_gt;
                 else
                     switch ( bcf_hdr_id2type(args->hdr_out,BCF_HL_FMT,hdr_id) )
                     {
@@ -1391,18 +1458,27 @@ static void init_columns(args_t *args)
         }
         else if ( !strncasecmp("FORMAT/",str.s, 7) || !strncasecmp("FMT/",str.s,4) )
         {
-            char *key = str.s + (!strncasecmp("FMT/",str.s,4) ? 4 : 7);
-            if ( force_samples<0 ) force_samples = replace;
-            if ( force_samples>=0 && replace!=REPLACE_ALL ) force_samples = replace;
+            char *key_dst = str.s + (!strncasecmp("FMT/",str.s,4) ? 4 : 7);
+            char *key_src = strstr(key_dst,":=");
+            if ( key_src )
+            {
+                *key_src = 0;
+                key_src += 2;
+                if ( !strncasecmp("FORMAT/",key_src,7) ) key_src += 7;
+                else if ( !strncasecmp("FMT/",key_src,4) ) key_src += 4;
+            }
+            else
+                key_src = key_dst;
+            need_sample_map = 1;
             if ( args->tgts_is_vcf )
             {
-                bcf_hrec_t *hrec = bcf_hdr_get_hrec(args->files->readers[1].header, BCF_HL_FMT, "ID", key, NULL);
+                bcf_hrec_t *hrec = bcf_hdr_get_hrec(args->files->readers[1].header, BCF_HL_FMT, "ID", key_src, NULL);
                 tmp.l = 0;
-                bcf_hrec_format(hrec, &tmp);
+                bcf_hrec_format_rename(hrec, key_dst, &tmp);
                 bcf_hdr_append(args->hdr_out, tmp.s);
                 bcf_hdr_sync(args->hdr_out);
             }
-            int hdr_id = bcf_hdr_id2int(args->hdr_out, BCF_DT_ID, key);
+            int hdr_id = bcf_hdr_id2int(args->hdr_out, BCF_DT_ID, key_dst);
             if ( !bcf_hdr_idinfo_exists(args->hdr_out,BCF_HL_FMT,hdr_id) )
                 error("The tag \"%s\" is not defined in %s\n", str.s, args->targets_fname);
             args->ncols++; args->cols = (annot_col_t*) realloc(args->cols,sizeof(annot_col_t)*args->ncols);
@@ -1410,13 +1486,14 @@ static void init_columns(args_t *args)
             if ( !args->tgts_is_vcf )
             {
                 col->icol = icol;
-                icol += bcf_hdr_nsamples(args->hdr_out) - 1;
+                icol += args->nsmpl_annot - 1;
             }
             else
                 col->icol = -1;
             col->replace = replace;
-            col->hdr_key = strdup(key);
-            if ( !strcasecmp("GT",key) ) col->setter = vcf_setter_format_gt;
+            col->hdr_key_src = strdup(key_src);
+            col->hdr_key_dst = strdup(key_dst);
+            if ( !strcasecmp("GT",key_src) ) col->setter = vcf_setter_format_gt;
             else
                 switch ( bcf_hdr_id2type(args->hdr_out,BCF_HL_FMT,hdr_id) )
                 {
@@ -1428,24 +1505,33 @@ static void init_columns(args_t *args)
         }
         else
         {
-            if ( replace==REPLACE_EXISTING ) error("Apologies, the -INFO/TAG feature has not been implemented yet.\n");
+            if ( replace==REPLACE_NON_MISSING ) error("Apologies, the -INFO/TAG feature has not been implemented yet.\n");
             if ( replace==SET_OR_APPEND ) error("Apologies, the =INFO/TAG feature has not been implemented yet.\n");
-            if ( !strncasecmp("INFO/",str.s,5) ) { memmove(str.s,str.s+5,str.l-4); }
-            int hdr_id = bcf_hdr_id2int(args->hdr_out, BCF_DT_ID, str.s);
+            char *key_dst = !strncasecmp("INFO/",str.s,5) ? str.s + 5 : str.s;
+            char *key_src = strstr(key_dst,":=");
+            if ( key_src )
+            {
+                *key_src = 0;
+                key_src += 2;
+                if ( !strncasecmp("INFO/",key_src,5) ) key_src += 5;
+            }
+            else
+                key_src = key_dst;
+            int hdr_id = bcf_hdr_id2int(args->hdr_out, BCF_DT_ID, key_dst);
             if ( !bcf_hdr_idinfo_exists(args->hdr_out,BCF_HL_INFO,hdr_id) )
             {
                 if ( args->tgts_is_vcf ) // reading annotations from a VCF, add a new header line
                 {
-                    bcf_hrec_t *hrec = bcf_hdr_get_hrec(args->files->readers[1].header, BCF_HL_INFO, "ID", str.s, NULL);
+                    bcf_hrec_t *hrec = bcf_hdr_get_hrec(args->files->readers[1].header, BCF_HL_INFO, "ID", key_src, NULL);
                     if ( !hrec ) error("The tag \"%s\" is not defined in %s\n", str.s,args->files->readers[1].fname);
                     tmp.l = 0;
-                    bcf_hrec_format(hrec, &tmp);
+                    bcf_hrec_format_rename(hrec, key_dst, &tmp);
                     bcf_hdr_append(args->hdr_out, tmp.s);
                     bcf_hdr_sync(args->hdr_out);
-                    hdr_id = bcf_hdr_id2int(args->hdr_out, BCF_DT_ID, str.s);
+                    hdr_id = bcf_hdr_id2int(args->hdr_out, BCF_DT_ID, key_dst);
                 }
                 else
-                    error("The tag \"%s\" is not defined in %s\n", str.s, args->targets_fname);
+                    error("The tag \"%s\" is not defined in %s\n", key_src, args->targets_fname);
                 assert( bcf_hdr_idinfo_exists(args->hdr_out,BCF_HL_INFO,hdr_id) );
             }
 
@@ -1453,7 +1539,8 @@ static void init_columns(args_t *args)
             annot_col_t *col = &args->cols[args->ncols-1];
             col->icol = icol;
             col->replace = replace;
-            col->hdr_key = strdup(str.s);
+            col->hdr_key_src = strdup(key_src);
+            col->hdr_key_dst = strdup(key_dst);
             col->number  = bcf_hdr_id2length(args->hdr_out,BCF_HL_INFO,hdr_id);
             switch ( bcf_hdr_id2type(args->hdr_out,BCF_HL_INFO,hdr_id) )
             {
@@ -1480,8 +1567,13 @@ static void init_columns(args_t *args)
         args->tmpp  = (char**)malloc(sizeof(char*)*n);
         args->tmpp2 = (char**)malloc(sizeof(char*)*n);
     }
-    if ( force_samples>=0 && args->tgts_is_vcf )
-        set_samples(args, args->files->readers[1].header, args->hdr, force_samples==REPLACE_ALL ? 0 : 1);
+    if ( !need_sample_map )
+    {
+        free(args->sample_map);
+        args->sample_map = NULL;
+    }
+    else if ( sample_map_ok<0 )
+        error("No matching samples in source and destination file?\n");
 }
 
 static void rename_chrs(args_t *args, char *fname)
@@ -1552,7 +1644,6 @@ static void init_data(args_t *args)
     if ( args->mark_sites )
     {
         if ( !args->targets_fname ) error("The -a option not given\n");
-        if ( args->tgts_is_vcf ) error("Apologies, this has not been implemented yet: -a is a VCF\n");  // very easy to add..
         bcf_hdr_printf(args->hdr_out,"##INFO=<ID=%s,Number=0,Type=Flag,Description=\"Sites %slisted in %s\">",
             args->mark_sites,args->mark_sites_logic==MARK_LISTED?"":"not ",args->mark_sites);
     }
@@ -1564,7 +1655,8 @@ static void init_data(args_t *args)
 
         args->out_fh = hts_open(args->output_fname,hts_bcf_wmode(args->output_type));
         if ( args->out_fh == NULL ) error("Can't write to \"%s\": %s\n", args->output_fname, strerror(errno));
-        if ( args->n_threads ) hts_set_threads(args->out_fh, args->n_threads);
+        if ( args->n_threads )
+            hts_set_opt(args->out_fh, HTS_OPT_THREAD_POOL, args->files->p);
         bcf_hdr_write(args->out_fh, args->hdr_out);
     }
 }
@@ -1577,7 +1669,10 @@ static void destroy_data(args_t *args)
     if ( args->hdr_out ) bcf_hdr_destroy(args->hdr_out);
     if (args->vcmp) vcmp_destroy(args->vcmp);
     for (i=0; i<args->ncols; i++)
-        free(args->cols[i].hdr_key);
+    {
+        free(args->cols[i].hdr_key_src);
+        free(args->cols[i].hdr_key_dst);
+    }
     free(args->cols);
     for (i=0; i<args->malines; i++)
     {
@@ -1718,7 +1813,7 @@ static void annotate(args_t *args, bcf1_t *line)
             // there is a matching line
             for (j=0; j<args->ncols; j++)
                 if ( args->cols[j].setter(args,line,&args->cols[j],&args->alines[i]) )
-                    error("fixme: Could not set %s at %s:%d\n", args->cols[j].hdr_key,bcf_seqname(args->hdr,line),line->pos+1);
+                    error("fixme: Could not set %s at %s:%d\n", args->cols[j].hdr_key_src,bcf_seqname(args->hdr,line),line->pos+1);
 
         }
 
@@ -1731,12 +1826,20 @@ static void annotate(args_t *args, bcf1_t *line)
                 bcf_update_info_flag(args->hdr_out,line,args->mark_sites,NULL,i<args->nalines?0:1);
         }
     }
-    else if ( args->files->nreaders == 2 && bcf_sr_has_line(args->files,1) )
+    else if ( args->files->nreaders == 2 )
     {
-        bcf1_t *aline = bcf_sr_get_line(args->files,1);
-        for (j=0; j<args->ncols; j++)
-            if ( args->cols[j].setter(args,line,&args->cols[j],aline) )
-                error("fixme: Could not set %s at %s:%d\n", args->cols[j].hdr_key,bcf_seqname(args->hdr,line),line->pos+1);
+        if ( bcf_sr_has_line(args->files,1) )
+        {
+            bcf1_t *aline = bcf_sr_get_line(args->files,1);
+            for (j=0; j<args->ncols; j++)
+                if ( args->cols[j].setter(args,line,&args->cols[j],aline) )
+                    error("fixme: Could not set %s at %s:%d\n", args->cols[j].hdr_key_src,bcf_seqname(args->hdr,line),line->pos+1);
+
+            if ( args->mark_sites )
+                bcf_update_info_flag(args->hdr_out,line,args->mark_sites,NULL,args->mark_sites_logic==MARK_LISTED ? 1 : 0);
+        }
+        else if ( args->mark_sites )
+            bcf_update_info_flag(args->hdr_out,line,args->mark_sites,NULL, args->mark_sites_logic==MARK_UNLISTED ? 1 : 0);
     }
     if ( args->set_ids )
     {
@@ -1761,6 +1864,7 @@ static void usage(args_t *args)
     fprintf(stderr, "\n");
     fprintf(stderr, "Options:\n");
     fprintf(stderr, "   -a, --annotations <file>       VCF file or tabix-indexed file with annotations: CHR\\tPOS[\\tVALUE]+\n");
+    fprintf(stderr, "       --collapse <string>        matching records by <snps|indels|both|all|some|none>, see man page for details [some]\n");
     fprintf(stderr, "   -c, --columns <list>           list of columns in the annotation file, e.g. CHROM,POS,REF,ALT,-,INFO/TAG. See man page for details\n");
     fprintf(stderr, "   -e, --exclude <expr>           exclude sites for which the expression is true (see man page for details)\n");
     fprintf(stderr, "   -h, --header-lines <file>      lines which should be appended to the VCF header\n");
@@ -1793,7 +1897,7 @@ int main_vcfannotate(int argc, char *argv[])
     args->record_cmd_line = 1;
     args->ref_idx = args->alt_idx = args->chr_idx = args->from_idx = args->to_idx = -1;
     args->set_ids_replace = 1;
-    int regions_is_file = 0;
+    int regions_is_file = 0, collapse = 0;
 
     static struct option loptions[] =
     {
@@ -1803,6 +1907,7 @@ int main_vcfannotate(int argc, char *argv[])
         {"output-type",required_argument,NULL,'O'},
         {"threads",required_argument,NULL,9},
         {"annotations",required_argument,NULL,'a'},
+        {"collapse",required_argument,NULL,2},
         {"include",required_argument,NULL,'i'},
         {"exclude",required_argument,NULL,'e'},
         {"regions",required_argument,NULL,'r'},
@@ -1847,6 +1952,16 @@ int main_vcfannotate(int argc, char *argv[])
             case 'R': args->regions_list = optarg; regions_is_file = 1; break;
             case 'h': args->header_fname = optarg; break;
             case  1 : args->rename_chrs = optarg; break;
+            case  2 :
+                if ( !strcmp(optarg,"snps") ) collapse |= COLLAPSE_SNPS;
+                else if ( !strcmp(optarg,"indels") ) collapse |= COLLAPSE_INDELS;
+                else if ( !strcmp(optarg,"both") ) collapse |= COLLAPSE_SNPS | COLLAPSE_INDELS;
+                else if ( !strcmp(optarg,"any") ) collapse |= COLLAPSE_ANY;
+                else if ( !strcmp(optarg,"all") ) collapse |= COLLAPSE_ANY;
+                else if ( !strcmp(optarg,"some") ) collapse |= COLLAPSE_SOME;
+                else if ( !strcmp(optarg,"none") ) collapse = COLLAPSE_NONE;
+                else error("The --collapse string \"%s\" not recognised.\n", optarg);
+                break;
             case  9 : args->n_threads = strtol(optarg, 0, 0); break;
             case  8 : args->record_cmd_line = 0; break;
             case '?': usage(args); break;
@@ -1877,9 +1992,10 @@ int main_vcfannotate(int argc, char *argv[])
         {
             args->tgts_is_vcf = 1;
             args->files->require_index = 1;
-            args->files->collapse |= COLLAPSE_SOME;
+            args->files->collapse = collapse ? collapse : COLLAPSE_SOME;
         }
     }
+    if ( bcf_sr_set_threads(args->files, args->n_threads)<0 ) error("Failed to create threads\n");
     if ( !bcf_sr_add_reader(args->files, fname) ) error("Failed to open %s: %s\n", fname,bcf_sr_strerror(args->files->errnum));
 
     init_data(args);
diff --git a/bcftools/vcfannotate.c.pysam.c b/bcftools/vcfannotate.c.pysam.c
index ea8398c..09f76c2 100644
--- a/bcftools/vcfannotate.c.pysam.c
+++ b/bcftools/vcfannotate.c.pysam.c
@@ -25,6 +25,7 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 THE SOFTWARE.  */
 
 #include <stdio.h>
+#include <strings.h>
 #include <unistd.h>
 #include <getopt.h>
 #include <ctype.h>
@@ -43,6 +44,7 @@ THE SOFTWARE.  */
 #include "vcmp.h"
 #include "filter.h"
 #include "convert.h"
+#include "smpl_ilist.h"
 
 struct _args_t;
 
@@ -67,12 +69,12 @@ annot_line_t;
 
 #define REPLACE_MISSING  0  // replace only missing values
 #define REPLACE_ALL      1  // replace both missing and existing values
-#define REPLACE_EXISTING 2  // replace only if tgt is not missing
+#define REPLACE_NON_MISSING 2  // replace only if tgt is not missing
 #define SET_OR_APPEND    3  // set new value if missing or non-existent, append otherwise
 typedef struct _annot_col_t
 {
     int icol, replace, number;  // number: one of BCF_VL_* types
-    char *hdr_key;
+    char *hdr_key_src, *hdr_key_dst;
     int (*setter)(struct _args_t *, bcf1_t *, struct _annot_col_t *, void*);
 }
 annot_col_t;
@@ -111,6 +113,7 @@ typedef struct _args_t
     convert_t *set_ids;
     int set_ids_replace;
 
+    int nsmpl_annot;
     int *sample_map, nsample_map, sample_is_file;   // map[idst] -> isrc
     int mtmpi, mtmpf, mtmps;
     int mtmpi2, mtmpf2, mtmps2;
@@ -157,6 +160,7 @@ void remove_info(args_t *args, bcf1_t *line, rm_tag_t *tag)
         }
         line->d.shared_dirty |= BCF1_DIRTY_INF;
         inf->vptr = NULL;
+        inf->vptr_off = inf->vptr_len = 0;
     }
 }
 void remove_info_tag(args_t *args, bcf1_t *line, rm_tag_t *tag)
@@ -189,6 +193,10 @@ void remove_format(args_t *args, bcf1_t *line, rm_tag_t *tag)
     }
 }
 
+#include "htslib/khash.h"
+KHASH_MAP_INIT_STR(vdict, bcf_idinfo_t)
+typedef khash_t(vdict) vdict_t;
+
 static void remove_hdr_lines(bcf_hdr_t *hdr, int type)
 {
     int i = 0, nrm = 0;
@@ -196,11 +204,18 @@ static void remove_hdr_lines(bcf_hdr_t *hdr, int type)
     {
         if ( hdr->hrec[i]->type!=type ) { i++; continue; }
         bcf_hrec_t *hrec = hdr->hrec[i];
-        if ( type==BCF_HL_FMT )
+        if ( type==BCF_HL_FMT || type==BCF_HL_INFO || type==BCF_HL_FMT || type== BCF_HL_CTG )
         {
             // everything except FORMAT/GT
             int id = bcf_hrec_find_key(hrec, "ID");
-            if ( id>=0 && !strcmp(hrec->vals[id],"GT") ) { i++; continue; }
+            if ( id>=0 )
+            {
+                if ( type==BCF_HL_FMT && !strcmp(hrec->vals[id],"GT") ) { i++; continue; }
+                vdict_t *d = type==BCF_HL_CTG ? (vdict_t*)hdr->dict[BCF_DT_CTG] : (vdict_t*)hdr->dict[BCF_DT_ID];
+                khint_t k = kh_get(vdict, d, hdr->hrec[i]->vals[id]);
+                kh_val(d, k).hrec[type==BCF_HL_CTG?0:type] = NULL;
+                kh_val(d, k).info[type] |= 0xf;
+            }
         }
         nrm++;
         hdr->nhrec--;
@@ -455,7 +470,7 @@ static int setter_qual(args_t *args, bcf1_t *line, annot_col_t *col, void *data)
 
     line->qual = strtod(str, &str);
     if ( str == tab->cols[col->icol] )
-        error("Could not parse %s at %s:%d .. [%s]\n", col->hdr_key,bcf_seqname(args->hdr,line),line->pos+1,tab->cols[col->icol]);
+        error("Could not parse %s at %s:%d .. [%s]\n", col->hdr_key_src,bcf_seqname(args->hdr,line),line->pos+1,tab->cols[col->icol]);
     return 0;
 }
 static int vcf_setter_qual(args_t *args, bcf1_t *line, annot_col_t *col, void *data)
@@ -472,31 +487,31 @@ static int setter_info_flag(args_t *args, bcf1_t *line, annot_col_t *col, void *
     char *str = tab->cols[col->icol];
     if ( str[0]=='.' && str[1]==0 ) return 0;
 
-    if ( str[0]=='1' && str[1]==0 ) return bcf_update_info_flag(args->hdr_out,line,col->hdr_key,NULL,1);
-    if ( str[0]=='0' && str[1]==0 ) return bcf_update_info_flag(args->hdr_out,line,col->hdr_key,NULL,0);
+    if ( str[0]=='1' && str[1]==0 ) return bcf_update_info_flag(args->hdr_out,line,col->hdr_key_dst,NULL,1);
+    if ( str[0]=='0' && str[1]==0 ) return bcf_update_info_flag(args->hdr_out,line,col->hdr_key_dst,NULL,0);
     error("Could not parse %s at %s:%d .. [%s]\n", bcf_seqname(args->hdr,line),line->pos+1,tab->cols[col->icol]);
     return -1;
 }
 static int vcf_setter_info_flag(args_t *args, bcf1_t *line, annot_col_t *col, void *data)
 {
     bcf1_t *rec = (bcf1_t*) data;
-    int flag = bcf_get_info_flag(args->files->readers[1].header,rec,col->hdr_key,NULL,NULL);
-    bcf_update_info_flag(args->hdr_out,line,col->hdr_key,NULL,flag);
+    int flag = bcf_get_info_flag(args->files->readers[1].header,rec,col->hdr_key_src,NULL,NULL);
+    bcf_update_info_flag(args->hdr_out,line,col->hdr_key_dst,NULL,flag);
     return 0;
 }
 static int setter_ARinfo_int32(args_t *args, bcf1_t *line, annot_col_t *col, int nals, char **als, int ntmpi)
 {
     if ( col->number==BCF_VL_A && ntmpi!=nals-1 && (ntmpi!=1 || args->tmpi[0]!=bcf_int32_missing || args->tmpi[1]!=bcf_int32_vector_end) )
-        error("Incorrect number of values (%d) for the %s tag at %s:%d\n", ntmpi,col->hdr_key,bcf_seqname(args->hdr,line),line->pos+1);
+        error("Incorrect number of values (%d) for the %s tag at %s:%d\n", ntmpi,col->hdr_key_src,bcf_seqname(args->hdr,line),line->pos+1);
     else if ( col->number==BCF_VL_R && ntmpi!=nals && (ntmpi!=1 || args->tmpi[0]!=bcf_int32_missing || args->tmpi[1]!=bcf_int32_vector_end) )
-        error("Incorrect number of values (%d) for the %s tag at %s:%d\n", ntmpi,col->hdr_key,bcf_seqname(args->hdr,line),line->pos+1);
+        error("Incorrect number of values (%d) for the %s tag at %s:%d\n", ntmpi,col->hdr_key_src,bcf_seqname(args->hdr,line),line->pos+1);
 
     int ndst = col->number==BCF_VL_A ? line->n_allele - 1 : line->n_allele;
     int *map = vcmp_map_ARvalues(args->vcmp,ndst,nals,als,line->n_allele,line->d.allele);
     if ( !map ) error("REF alleles not compatible at %s:%d\n");
 
     // fill in any missing values in the target VCF (or all, if not present)
-    int ntmpi2 = bcf_get_info_float(args->hdr, line, col->hdr_key, &args->tmpi2, &args->mtmpi2);
+    int ntmpi2 = bcf_get_info_float(args->hdr, line, col->hdr_key_dst, &args->tmpi2, &args->mtmpi2);
     if ( ntmpi2 < ndst ) hts_expand(int32_t,ndst,args->mtmpi2,args->tmpi2);
 
     int i;
@@ -513,7 +528,7 @@ static int setter_ARinfo_int32(args_t *args, bcf1_t *line, annot_col_t *col, int
 
         args->tmpi2[i] = args->tmpi[ map[i] ];
     }
-    bcf_update_info_int32(args->hdr_out,line,col->hdr_key,args->tmpi2,ndst);
+    bcf_update_info_int32(args->hdr_out,line,col->hdr_key_dst,args->tmpi2,ndst);
     return 0;
 }
 static int setter_info_int(args_t *args, bcf1_t *line, annot_col_t *col, void *data)
@@ -539,17 +554,17 @@ static int setter_info_int(args_t *args, bcf1_t *line, annot_col_t *col, void *d
 
     if ( col->replace==REPLACE_MISSING )
     {
-        int ret = bcf_get_info_int32(args->hdr, line, col->hdr_key, &args->tmpi2, &args->mtmpi2);
+        int ret = bcf_get_info_int32(args->hdr, line, col->hdr_key_dst, &args->tmpi2, &args->mtmpi2);
         if ( ret>0 && args->tmpi2[0]!=bcf_int32_missing ) return 0;
     }
 
-    bcf_update_info_int32(args->hdr_out,line,col->hdr_key,args->tmpi,ntmpi);
+    bcf_update_info_int32(args->hdr_out,line,col->hdr_key_dst,args->tmpi,ntmpi);
     return 0;
 }
 static int vcf_setter_info_int(args_t *args, bcf1_t *line, annot_col_t *col, void *data)
 {
     bcf1_t *rec = (bcf1_t*) data;
-    int ntmpi = bcf_get_info_int32(args->files->readers[1].header,rec,col->hdr_key,&args->tmpi,&args->mtmpi);
+    int ntmpi = bcf_get_info_int32(args->files->readers[1].header,rec,col->hdr_key_src,&args->tmpi,&args->mtmpi);
     if ( ntmpi < 0 ) return 0;    // nothing to add
 
     if ( col->number==BCF_VL_A || col->number==BCF_VL_R ) 
@@ -557,26 +572,26 @@ static int vcf_setter_info_int(args_t *args, bcf1_t *line, annot_col_t *col, voi
 
     if ( col->replace==REPLACE_MISSING )
     {
-        int ret = bcf_get_info_int32(args->hdr, line, col->hdr_key, &args->tmpi2, &args->mtmpi2);
+        int ret = bcf_get_info_int32(args->hdr, line, col->hdr_key_dst, &args->tmpi2, &args->mtmpi2);
         if ( ret>0 && args->tmpi2[0]!=bcf_int32_missing ) return 0;
     }
 
-    bcf_update_info_int32(args->hdr_out,line,col->hdr_key,args->tmpi,ntmpi);
+    bcf_update_info_int32(args->hdr_out,line,col->hdr_key_dst,args->tmpi,ntmpi);
     return 0;
 }
 static int setter_ARinfo_real(args_t *args, bcf1_t *line, annot_col_t *col, int nals, char **als, int ntmpf)
 {
     if ( col->number==BCF_VL_A && ntmpf!=nals-1 && (ntmpf!=1 || !bcf_float_is_missing(args->tmpf[0]) || !bcf_float_is_vector_end(args->tmpf[0])) )
-        error("Incorrect number of values (%d) for the %s tag at %s:%d\n", ntmpf,col->hdr_key,bcf_seqname(args->hdr,line),line->pos+1);
+        error("Incorrect number of values (%d) for the %s tag at %s:%d\n", ntmpf,col->hdr_key_src,bcf_seqname(args->hdr,line),line->pos+1);
     else if ( col->number==BCF_VL_R && ntmpf!=nals && (ntmpf!=1 || !bcf_float_is_missing(args->tmpf[0]) || !bcf_float_is_vector_end(args->tmpf[0])) )
-        error("Incorrect number of values (%d) for the %s tag at %s:%d\n", ntmpf,col->hdr_key,bcf_seqname(args->hdr,line),line->pos+1);
+        error("Incorrect number of values (%d) for the %s tag at %s:%d\n", ntmpf,col->hdr_key_src,bcf_seqname(args->hdr,line),line->pos+1);
 
     int ndst = col->number==BCF_VL_A ? line->n_allele - 1 : line->n_allele;
     int *map = vcmp_map_ARvalues(args->vcmp,ndst,nals,als,line->n_allele,line->d.allele);
     if ( !map ) error("REF alleles not compatible at %s:%d\n");
 
     // fill in any missing values in the target VCF (or all, if not present)
-    int ntmpf2 = bcf_get_info_float(args->hdr, line, col->hdr_key, &args->tmpf2, &args->mtmpf2);
+    int ntmpf2 = bcf_get_info_float(args->hdr, line, col->hdr_key_dst, &args->tmpf2, &args->mtmpf2);
     if ( ntmpf2 < ndst ) hts_expand(float,ndst,args->mtmpf2,args->tmpf2);
 
     int i;
@@ -593,7 +608,7 @@ static int setter_ARinfo_real(args_t *args, bcf1_t *line, annot_col_t *col, int
 
         args->tmpf2[i] = args->tmpf[ map[i] ];
     }
-    bcf_update_info_float(args->hdr_out,line,col->hdr_key,args->tmpf2,ndst);
+    bcf_update_info_float(args->hdr_out,line,col->hdr_key_dst,args->tmpf2,ndst);
     return 0;
 }
 static int setter_info_real(args_t *args, bcf1_t *line, annot_col_t *col, void *data)
@@ -619,17 +634,17 @@ static int setter_info_real(args_t *args, bcf1_t *line, annot_col_t *col, void *
 
     if ( col->replace==REPLACE_MISSING )
     {
-        int ret = bcf_get_info_float(args->hdr, line, col->hdr_key, &args->tmpf2, &args->mtmpf2);
+        int ret = bcf_get_info_float(args->hdr, line, col->hdr_key_dst, &args->tmpf2, &args->mtmpf2);
         if ( ret>0 && !bcf_float_is_missing(args->tmpf2[0]) ) return 0;
     }
 
-    bcf_update_info_float(args->hdr_out,line,col->hdr_key,args->tmpf,ntmpf);
+    bcf_update_info_float(args->hdr_out,line,col->hdr_key_dst,args->tmpf,ntmpf);
     return 0;
 }
 static int vcf_setter_info_real(args_t *args, bcf1_t *line, annot_col_t *col, void *data)
 {
     bcf1_t *rec = (bcf1_t*) data;
-    int ntmpf = bcf_get_info_float(args->files->readers[1].header,rec,col->hdr_key,&args->tmpf,&args->mtmpf);
+    int ntmpf = bcf_get_info_float(args->files->readers[1].header,rec,col->hdr_key_src,&args->tmpf,&args->mtmpf);
     if ( ntmpf < 0 ) return 0;    // nothing to add
 
     if ( col->number==BCF_VL_A || col->number==BCF_VL_R ) 
@@ -637,11 +652,11 @@ static int vcf_setter_info_real(args_t *args, bcf1_t *line, annot_col_t *col, vo
 
     if ( col->replace==REPLACE_MISSING )
     {
-        int ret = bcf_get_info_float(args->hdr, line, col->hdr_key, &args->tmpf2, &args->mtmpf2);
+        int ret = bcf_get_info_float(args->hdr, line, col->hdr_key_dst, &args->tmpf2, &args->mtmpf2);
         if ( ret>0 && !bcf_float_is_missing(args->tmpf2[0]) ) return 0;
     }
 
-    bcf_update_info_float(args->hdr_out,line,col->hdr_key,args->tmpf,ntmpf);
+    bcf_update_info_float(args->hdr_out,line,col->hdr_key_dst,args->tmpf,ntmpf);
     return 0;
 }
 int copy_string_field(char *src, int isrc, int src_len, kstring_t *dst, int idst); // see vcfmerge.c
@@ -654,9 +669,9 @@ static int setter_ARinfo_string(args_t *args, bcf1_t *line, annot_col_t *col, in
         lsrc++;
     }
     if ( col->number==BCF_VL_A && nsrc!=nals-1 && (nsrc!=1 || args->tmps[0]!='.' || args->tmps[1]!=0 ) )
-        error("Incorrect number of values (%d) for the %s tag at %s:%d\n", nsrc,col->hdr_key,bcf_seqname(args->hdr,line),line->pos+1);
+        error("Incorrect number of values (%d) for the %s tag at %s:%d\n", nsrc,col->hdr_key_src,bcf_seqname(args->hdr,line),line->pos+1);
     else if ( col->number==BCF_VL_R && nsrc!=nals && (nsrc!=1 || args->tmps[0]!='.' || args->tmps[1]!=0 ) )
-        error("Incorrect number of values (%d) for the %s tag at %s:%d\n", nsrc,col->hdr_key,bcf_seqname(args->hdr,line),line->pos+1);
+        error("Incorrect number of values (%d) for the %s tag at %s:%d\n", nsrc,col->hdr_key_src,bcf_seqname(args->hdr,line),line->pos+1);
 
     int ndst = col->number==BCF_VL_A ? line->n_allele - 1 : line->n_allele;
     int *map = vcmp_map_ARvalues(args->vcmp,ndst,nals,als,line->n_allele,line->d.allele);
@@ -664,7 +679,7 @@ static int setter_ARinfo_string(args_t *args, bcf1_t *line, annot_col_t *col, in
 
     // fill in any missing values in the target VCF (or all, if not present)
     int i, empty = 0, nstr, mstr = args->tmpks.m;
-    nstr = bcf_get_info_string(args->hdr, line, col->hdr_key, &args->tmpks.s, &mstr); 
+    nstr = bcf_get_info_string(args->hdr, line, col->hdr_key_dst, &args->tmpks.s, &mstr); 
     args->tmpks.m = mstr;
     if ( nstr<0 || (nstr==1 && args->tmpks.s[0]=='.' && args->tmpks.s[1]==0) )
     {
@@ -697,7 +712,7 @@ static int setter_ARinfo_string(args_t *args, bcf1_t *line, annot_col_t *col, in
         int ret = copy_string_field(args->tmps,map[i],lsrc,&args->tmpks,i);
         assert( ret==0 );
     }
-    bcf_update_info_string(args->hdr_out,line,col->hdr_key,args->tmpks.s);
+    bcf_update_info_string(args->hdr_out,line,col->hdr_key_dst,args->tmpks.s);
     return 0;
 }
 static int setter_info_str(args_t *args, bcf1_t *line, annot_col_t *col, void *data)
@@ -714,17 +729,17 @@ static int setter_info_str(args_t *args, bcf1_t *line, annot_col_t *col, void *d
 
     if ( col->replace==REPLACE_MISSING )
     {
-        int ret = bcf_get_info_string(args->hdr, line, col->hdr_key, &args->tmps2, &args->mtmps2);
+        int ret = bcf_get_info_string(args->hdr, line, col->hdr_key_dst, &args->tmps2, &args->mtmps2);
         if ( ret>0 && (args->tmps2[0]!='.' || args->tmps2[1]!=0) ) return 0;
     }
 
-    bcf_update_info_string(args->hdr_out,line,col->hdr_key,args->tmps);
+    bcf_update_info_string(args->hdr_out,line,col->hdr_key_dst,args->tmps);
     return 0;
 }
 static int vcf_setter_info_str(args_t *args, bcf1_t *line, annot_col_t *col, void *data)
 {
     bcf1_t *rec = (bcf1_t*) data;
-    int ntmps = bcf_get_info_string(args->files->readers[1].header,rec,col->hdr_key,&args->tmps,&args->mtmps);
+    int ntmps = bcf_get_info_string(args->files->readers[1].header,rec,col->hdr_key_src,&args->tmps,&args->mtmps);
     if ( ntmps < 0 ) return 0;    // nothing to add
 
     if ( col->number==BCF_VL_A || col->number==BCF_VL_R ) 
@@ -732,11 +747,11 @@ static int vcf_setter_info_str(args_t *args, bcf1_t *line, annot_col_t *col, voi
 
     if ( col->replace==REPLACE_MISSING )
     {
-        int ret = bcf_get_info_string(args->hdr, line, col->hdr_key, &args->tmps2, &args->mtmps2);
+        int ret = bcf_get_info_string(args->hdr, line, col->hdr_key_dst, &args->tmps2, &args->mtmps2);
         if ( ret>0 && (args->tmps2[0]!='.' || args->tmps2[1]!=0) ) return 0;
     }
 
-    bcf_update_info_string(args->hdr_out,line,col->hdr_key,args->tmps);
+    bcf_update_info_string(args->hdr_out,line,col->hdr_key_dst,args->tmps);
     return 0;
 }
 static int vcf_setter_format_gt(args_t *args, bcf1_t *line, annot_col_t *col, void *data)
@@ -754,7 +769,7 @@ static int vcf_setter_format_gt(args_t *args, bcf1_t *line, annot_col_t *col, vo
     nsrc /= bcf_hdr_nsamples(args->files->readers[1].header);
     if ( ndst<=0 )  // field not present in dst file
     {
-        if ( col->replace==REPLACE_EXISTING ) return 0;
+        if ( col->replace==REPLACE_NON_MISSING ) return 0;
         hts_expand(int32_t, nsrc*bcf_hdr_nsamples(args->hdr_out), args->mtmpi2, args->tmpi2);
         for (i=0; i<bcf_hdr_nsamples(args->hdr_out); i++)
         {
@@ -779,7 +794,7 @@ static int vcf_setter_format_gt(args_t *args, bcf1_t *line, annot_col_t *col, vo
             if ( args->sample_map[i]==-1 ) continue;
             int32_t *src = args->tmpi  + nsrc*args->sample_map[i];
             int32_t *dst = args->tmpi2 + ndst*i;
-            if ( col->replace==REPLACE_EXISTING && bcf_gt_is_missing(dst[0]) ) continue;
+            if ( col->replace==REPLACE_NON_MISSING && bcf_gt_is_missing(dst[0]) ) continue;
             if ( col->replace==REPLACE_MISSING  && !bcf_gt_is_missing(dst[0]) ) continue;
             for (j=0; j<nsrc; j++) dst[j] = src[j];
             for (; j<ndst; j++) dst[j] = bcf_int32_vector_end;
@@ -795,7 +810,7 @@ static int vcf_setter_format_gt(args_t *args, bcf1_t *line, annot_col_t *col, vo
             int32_t *dst = args->tmpi3 + nsrc*i;
             int keep_ori = 0;
             if ( args->sample_map[i]==-1 ) keep_ori = 1;
-            else if ( col->replace==REPLACE_EXISTING && bcf_gt_is_missing(ori[0]) ) keep_ori = 1;
+            else if ( col->replace==REPLACE_NON_MISSING && bcf_gt_is_missing(ori[0]) ) keep_ori = 1;
             else if ( col->replace==REPLACE_MISSING  && !bcf_gt_is_missing(ori[0]) ) keep_ori = 1;
             if ( keep_ori )
             {
@@ -813,7 +828,7 @@ static int vcf_setter_format_gt(args_t *args, bcf1_t *line, annot_col_t *col, vo
 }
 static int count_vals(annot_line_t *tab, int icol_beg, int icol_end)
 {
-    int i, nmax = 0;
+    int i, nmax = 1;
     for (i=icol_beg; i<icol_end; i++)
     {
         char *str = tab->cols[i], *end = str;
@@ -833,298 +848,306 @@ static int count_vals(annot_line_t *tab, int icol_beg, int icol_end)
     }
     return nmax;
 }
-static int setter_format_int(args_t *args, bcf1_t *line, annot_col_t *col, void *data)
-{
-    annot_line_t *tab = (annot_line_t*) data;
-    int nsmpl = bcf_hdr_nsamples(args->hdr_out);
-    assert( col->icol+nsmpl <= tab->ncols );
-    int nvals = count_vals(tab,col->icol,col->icol+nsmpl);
-    assert( nvals>0 );
-    hts_expand(int32_t,nvals*nsmpl,args->mtmpi,args->tmpi);
-
-    int icol = col->icol, ismpl;
-    for (ismpl=0; ismpl<nsmpl; ismpl++)
-    {
-        int32_t *ptr = args->tmpi + ismpl*nvals;
-        int ival = 0;
-
-        char *str = tab->cols[icol];
-        while ( *str )
-        {
-            if ( str[0]=='.' && (!str[1] || str[1]==',') )  // missing value
-            {
-                ptr[ival++] = bcf_int32_missing;
-                str += str[1] ? 2 : 1;
-                continue;
-            }
-
-            char *end = str;
-            ptr[ival] = strtol(str, &end, 10); 
-            if ( end==str )
-                error("Could not parse %s at %s:%d .. [%s]\n", col->hdr_key,bcf_seqname(args->hdr,line),line->pos+1,tab->cols[col->icol]);
-
-            ival++;
-            str = *end ? end+1 : end;
-        }
-        while ( ival<nvals ) ptr[ival++] = bcf_int32_vector_end;
-        icol++;
-    }
-    return bcf_update_format_int32(args->hdr_out,line,col->hdr_key,args->tmpi,nsmpl*nvals);
-}
-static int setter_format_real(args_t *args, bcf1_t *line, annot_col_t *col, void *data)
-{
-    annot_line_t *tab = (annot_line_t*) data;
-    int nsmpl = bcf_hdr_nsamples(args->hdr_out);
-    assert( col->icol+nsmpl <= tab->ncols );
-    int nvals = count_vals(tab,col->icol,col->icol+nsmpl);
-    assert( nvals>0 );
-    hts_expand(float,nvals*nsmpl,args->mtmpf,args->tmpf);
-
-    int icol = col->icol, ismpl;
-    for (ismpl=0; ismpl<nsmpl; ismpl++)
-    {
-        float *ptr = args->tmpf + ismpl*nvals;
-        int ival = 0;
-
-        char *str = tab->cols[icol];
-        while ( *str )
-        {
-            if ( str[0]=='.' && (!str[1] || str[1]==',') )  // missing value
-            {
-                bcf_float_set_missing(ptr[ival]); 
-                ival++;
-                str += str[1] ? 2 : 1;
-                continue;
-            }
-
-            char *end = str;
-            ptr[ival] = strtod(str, &end); 
-            if ( end==str )
-                error("Could not parse %s at %s:%d .. [%s]\n", col->hdr_key,bcf_seqname(args->hdr,line),line->pos+1,tab->cols[col->icol]);
-
-            ival++;
-            str = *end ? end+1 : end;
-        }
-        while ( ival<nvals ) { bcf_float_set_vector_end(ptr[ival]); ival++; }
-        icol++;
-    }
-    return bcf_update_format_float(args->hdr_out,line,col->hdr_key,args->tmpf,nsmpl*nvals);
-}
-static int setter_format_str(args_t *args, bcf1_t *line, annot_col_t *col, void *data)
-{
-    annot_line_t *tab = (annot_line_t*) data;
-    int nsmpl = bcf_hdr_nsamples(args->hdr_out);
-    assert( col->icol+nsmpl <= tab->ncols );
-
-    int i, max_len = 0;
-    for (i=col->icol; i<col->icol+nsmpl; i++)
-    {
-        int len = strlen(tab->cols[i]);
-        if ( max_len < len ) max_len = len;
-    }
-    hts_expand(char,max_len*nsmpl,args->mtmps,args->tmps);
-
-    int icol = col->icol, ismpl;
-    for (ismpl=0; ismpl<nsmpl; ismpl++)
-    {
-        char *ptr = args->tmps + ismpl*max_len;
-        char *str = tab->cols[icol];
-        i = 0;
-        while ( str[i] )
-        {
-            ptr[i] = str[i];
-            i++;
-        }
-        while ( i<max_len ) ptr[i++] = 0;
-        icol++;
-    }
-    return bcf_update_format_char(args->hdr_out,line,col->hdr_key,args->tmps,nsmpl*max_len);
-}
-static int vcf_setter_format_int(args_t *args, bcf1_t *line, annot_col_t *col, void *data)
+static int core_setter_format_int(args_t *args, bcf1_t *line, annot_col_t *col, int32_t *vals, int nvals)
 {
-    bcf1_t *rec = (bcf1_t*) data;
-    int nsrc = bcf_get_format_int32(args->files->readers[1].header,rec,col->hdr_key,&args->tmpi,&args->mtmpi);
-    if ( nsrc==-3 ) return 0;    // the tag is not present
-    if ( nsrc<=0 ) return 1;     // error
-
     if ( !args->sample_map )
-        return bcf_update_format_int32(args->hdr_out,line,col->hdr_key,args->tmpi,nsrc);
+        return bcf_update_format_int32(args->hdr_out,line,col->hdr_key_dst,vals,nvals*args->nsmpl_annot);
 
-    int i, j, ndst = bcf_get_format_int32(args->hdr,line,col->hdr_key,&args->tmpi2,&args->mtmpi2);
+    int i, j, ndst = bcf_get_format_int32(args->hdr,line,col->hdr_key_dst,&args->tmpi2,&args->mtmpi2);
     if ( ndst > 0 ) ndst /= bcf_hdr_nsamples(args->hdr_out);
-    nsrc /= bcf_hdr_nsamples(args->files->readers[1].header);
     if ( ndst<=0 )
     {
-        if ( col->replace==REPLACE_EXISTING ) return 0;    // overwrite only if present
-        hts_expand(int32_t, nsrc*bcf_hdr_nsamples(args->hdr_out), args->mtmpi2, args->tmpi2);
+        if ( col->replace==REPLACE_NON_MISSING ) return 0;    // overwrite only if present
+        hts_expand(int32_t, nvals*bcf_hdr_nsamples(args->hdr_out), args->mtmpi2, args->tmpi2);
         for (i=0; i<bcf_hdr_nsamples(args->hdr_out); i++)
         {
-            int32_t *dst = args->tmpi2 + nsrc*i;
+            int32_t *dst = args->tmpi2 + nvals*i;
             if ( args->sample_map[i]==-1 )
             {
                 dst[0] = bcf_int32_missing;
-                for (j=1; j<nsrc; j++) dst[j] = bcf_int32_vector_end;
+                for (j=1; j<nvals; j++) dst[j] = bcf_int32_vector_end;
             }
             else
             {
-                int32_t *src = args->tmpi + nsrc*args->sample_map[i];
-                for (j=0; j<nsrc; j++) dst[j] = src[j];
+                int32_t *src = vals + nvals*args->sample_map[i];
+                for (j=0; j<nvals; j++) dst[j] = src[j];
             }
         }
-        return bcf_update_format_int32(args->hdr_out,line,col->hdr_key,args->tmpi2,nsrc*bcf_hdr_nsamples(args->hdr_out));
+        return bcf_update_format_int32(args->hdr_out,line,col->hdr_key_dst,args->tmpi2,nvals*bcf_hdr_nsamples(args->hdr_out));
     }
-    else if ( ndst >= nsrc )     
+    else if ( ndst >= nvals )     
     {
         for (i=0; i<bcf_hdr_nsamples(args->hdr_out); i++)
         {
             if ( args->sample_map[i]==-1 ) continue;
-            int32_t *src = args->tmpi  + nsrc*args->sample_map[i];
+            int32_t *src = vals  + nvals*args->sample_map[i];
             int32_t *dst = args->tmpi2 + ndst*i;
-            if ( col->replace==REPLACE_EXISTING && dst[0]==bcf_int32_missing ) continue;
-            if ( col->replace==REPLACE_MISSING  && dst[0]!=bcf_int32_missing ) continue;
-            for (j=0; j<nsrc; j++) dst[j] = src[j];
+            // possible cases:
+            //      in annot out
+            //       x  y     x     TAG,-TAG,=TAG    .. REPLACE_ALL, REPLACE_NON_MISSING, SET_OR_APPEND
+            //       x  y     y    +TAG              .. REPLACE_MISSING
+            //       .  y     .    =TAG              .. SET_OR_APPEND
+            //       .  y     y     TAG,+TAG,-TAG    .. REPLACE_ALL, REPLACE_MISSING, REPLACE_NON_MISSING
+            //       x  .     x     TAG,+TAG         .. REPLACE_ALL, REPLACE_MISSING
+            //       x  .     .    -TAG              .. REPLACE_NON_MISSING
+            if ( col->replace==REPLACE_NON_MISSING ) { if ( dst[0]==bcf_int32_missing ) continue; } 
+            else if ( col->replace==REPLACE_MISSING ) { if ( dst[0]!=bcf_int32_missing ) continue; }
+            else if ( col->replace==REPLACE_ALL ) { if ( src[0]==bcf_int32_missing ) continue; }
+            for (j=0; j<nvals; j++) dst[j] = src[j];
             for (; j<ndst; j++) dst[j] = bcf_int32_vector_end;
         }
-        return bcf_update_format_int32(args->hdr_out,line,col->hdr_key,args->tmpi2,ndst*bcf_hdr_nsamples(args->hdr_out));
+        return bcf_update_format_int32(args->hdr_out,line,col->hdr_key_dst,args->tmpi2,ndst*bcf_hdr_nsamples(args->hdr_out));
     }
-    else    // ndst < nsrc
+    else    // ndst < nvals
     {
-        hts_expand(int32_t, nsrc*bcf_hdr_nsamples(args->hdr_out), args->mtmpi3, args->tmpi3);
+        hts_expand(int32_t, nvals*bcf_hdr_nsamples(args->hdr_out), args->mtmpi3, args->tmpi3);
         for (i=0; i<bcf_hdr_nsamples(args->hdr_out); i++)
         {
-            int32_t *ori = args->tmpi2 + ndst*i;
-            int32_t *dst = args->tmpi3 + nsrc*i;
-            int keep_ori = 0;
-            if ( args->sample_map[i]==-1 ) keep_ori = 1;
-            else if ( col->replace==REPLACE_EXISTING && ori[0]==bcf_int32_missing ) keep_ori = 1;
-            else if ( col->replace==REPLACE_MISSING  && ori[0]!=bcf_int32_missing ) keep_ori = 1;
-            if ( keep_ori )
+            int32_t *ann = vals + nvals*args->sample_map[i];
+            int32_t *ori = args->tmpi2 + ndst*i;                // ori vcf line
+            int32_t *dst = args->tmpi3 + nvals*i;               // expanded buffer
+            int use_new_ann = 1;
+            if ( args->sample_map[i]==-1 ) use_new_ann = 0;
+            else if ( col->replace==REPLACE_NON_MISSING ) { if ( ori[0]==bcf_int32_missing ) use_new_ann = 0; }
+            else if ( col->replace==REPLACE_MISSING ) { if ( ori[0]!=bcf_int32_missing ) use_new_ann = 0; }
+            else if ( col->replace==REPLACE_ALL ) { if ( ann[0]==bcf_int32_missing ) use_new_ann = 0; }
+            if ( !use_new_ann )
             {
                 for (j=0; j<ndst; j++) dst[j] = ori[j];
-                for (; j<nsrc; j++) dst[j] = bcf_int32_vector_end;
+                for (; j<nvals; j++) dst[j] = bcf_int32_vector_end;
             }
             else
-            {
-                int32_t *src = args->tmpi + nsrc*args->sample_map[i];
-                for (j=0; j<nsrc; j++) dst[j] = src[j];
-            }
+                for (j=0; j<nvals; j++) dst[j] = ann[j];
         }
-        return bcf_update_format_int32(args->hdr_out,line,col->hdr_key,args->tmpi3,nsrc*bcf_hdr_nsamples(args->hdr_out));
+        return bcf_update_format_int32(args->hdr_out,line,col->hdr_key_dst,args->tmpi3,nvals*bcf_hdr_nsamples(args->hdr_out));
     }
 }
-static int vcf_setter_format_real(args_t *args, bcf1_t *line, annot_col_t *col, void *data)
+static int core_setter_format_real(args_t *args, bcf1_t *line, annot_col_t *col, float *vals, int nvals)
 {
-    bcf1_t *rec = (bcf1_t*) data;
-    int nsrc = bcf_get_format_float(args->files->readers[1].header,rec,col->hdr_key,&args->tmpf,&args->mtmpf);
-    if ( nsrc==-3 ) return 0;    // the tag is not present
-    if ( nsrc<=0 ) return 1;     // error
-
     if ( !args->sample_map )
-        return bcf_update_format_float(args->hdr_out,line,col->hdr_key,args->tmpf,nsrc);
+        return bcf_update_format_float(args->hdr_out,line,col->hdr_key_dst,vals,nvals*args->nsmpl_annot);
 
-    int i, j, ndst = bcf_get_format_float(args->hdr,line,col->hdr_key,&args->tmpf2,&args->mtmpf2);
+    int i, j, ndst = bcf_get_format_float(args->hdr,line,col->hdr_key_dst,&args->tmpf2,&args->mtmpf2);
     if ( ndst > 0 ) ndst /= bcf_hdr_nsamples(args->hdr_out);
-    nsrc /= bcf_hdr_nsamples(args->files->readers[1].header);
     if ( ndst<=0 )
     {
-        if ( col->replace==REPLACE_EXISTING ) return 0;    // overwrite only if present
-        hts_expand(float, nsrc*bcf_hdr_nsamples(args->hdr_out), args->mtmpf2, args->tmpf2);
+        if ( col->replace==REPLACE_NON_MISSING ) return 0;    // overwrite only if present
+        hts_expand(float, nvals*bcf_hdr_nsamples(args->hdr_out), args->mtmpf2, args->tmpf2);
         for (i=0; i<bcf_hdr_nsamples(args->hdr_out); i++)
         {
-            float *dst = args->tmpf2 + nsrc*i;
+            float *dst = args->tmpf2 + nvals*i;
             if ( args->sample_map[i]==-1 )
             {
                 bcf_float_set_missing(dst[0]);
-                for (j=1; j<nsrc; j++) bcf_float_set_vector_end(dst[j]);
+                for (j=1; j<nvals; j++) bcf_float_set_vector_end(dst[j]);
             }
             else
             {
-                float *src = args->tmpf + nsrc*args->sample_map[i];
-                for (j=0; j<nsrc; j++) dst[j] = src[j];
+                float *src = vals + nvals*args->sample_map[i];
+                for (j=0; j<nvals; j++) dst[j] = src[j];
             }
         }
-        return bcf_update_format_float(args->hdr_out,line,col->hdr_key,args->tmpf2,nsrc*bcf_hdr_nsamples(args->hdr_out));
+        return bcf_update_format_float(args->hdr_out,line,col->hdr_key_dst,args->tmpf2,nvals*bcf_hdr_nsamples(args->hdr_out));
     }
-    else if ( ndst >= nsrc )     
+    else if ( ndst >= nvals )     
     {
         for (i=0; i<bcf_hdr_nsamples(args->hdr_out); i++)
         {
             if ( args->sample_map[i]==-1 ) continue;
-            float *src = args->tmpf  + nsrc*args->sample_map[i];
+            float *src = vals  + nvals*args->sample_map[i];
             float *dst = args->tmpf2 + ndst*i;
-            if ( col->replace==REPLACE_EXISTING && bcf_float_is_missing(dst[0]) ) continue;
-            if ( col->replace==REPLACE_MISSING  && !bcf_float_is_missing(dst[0]) ) continue;
-            for (j=0; j<nsrc; j++) dst[j] = src[j];
+            if ( col->replace==REPLACE_NON_MISSING ) { if ( bcf_float_is_missing(dst[0]) ) continue; } 
+            else if ( col->replace==REPLACE_MISSING ) { if ( !bcf_float_is_missing(dst[0]) ) continue; }
+            else if ( col->replace==REPLACE_ALL ) { if ( bcf_float_is_missing(src[0]) ) continue; }
+            for (j=0; j<nvals; j++) dst[j] = src[j];
             for (; j<ndst; j++) bcf_float_set_vector_end(dst[j]);
         }
-        return bcf_update_format_float(args->hdr_out,line,col->hdr_key,args->tmpf2,ndst*bcf_hdr_nsamples(args->hdr_out));
+        return bcf_update_format_float(args->hdr_out,line,col->hdr_key_dst,args->tmpf2,ndst*bcf_hdr_nsamples(args->hdr_out));
     }
-    else    // ndst < nsrc
+    else    // ndst < nvals
     {
-        hts_expand(float, nsrc*bcf_hdr_nsamples(args->hdr_out), args->mtmpf3, args->tmpf3);
+        hts_expand(float, nvals*bcf_hdr_nsamples(args->hdr_out), args->mtmpf3, args->tmpf3);
         for (i=0; i<bcf_hdr_nsamples(args->hdr_out); i++)
         {
-            float *ori = args->tmpf2 + ndst*i;
-            float *dst = args->tmpf3 + nsrc*i;
-            int keep_ori = 0;
-            if ( args->sample_map[i]==-1 ) keep_ori = 1;
-            else if ( col->replace==REPLACE_EXISTING && bcf_float_is_missing(ori[0]) ) keep_ori = 1;
-            else if ( col->replace==REPLACE_MISSING  && !bcf_float_is_missing(ori[0]) ) keep_ori = 1;
-            if ( keep_ori )
+            float *ann = vals + nvals*args->sample_map[i];
+            float *ori = args->tmpf2 + ndst*i;                // ori vcf line
+            float *dst = args->tmpf3 + nvals*i;               // expanded buffer
+            int use_new_ann = 1;
+            if ( args->sample_map[i]==-1 ) use_new_ann = 0;
+            else if ( col->replace==REPLACE_NON_MISSING ) { if ( bcf_float_is_missing(ori[0]) ) use_new_ann = 0; }
+            else if ( col->replace==REPLACE_MISSING ) { if ( !bcf_float_is_missing(ori[0]) ) use_new_ann = 0; }
+            else if ( col->replace==REPLACE_ALL ) { if ( bcf_float_is_missing(ann[0]) ) use_new_ann = 0; }
+            if ( !use_new_ann )
             {
                 for (j=0; j<ndst; j++) dst[j] = ori[j];
-                for (; j<nsrc; j++) bcf_float_set_vector_end(dst[j]);
+                for (; j<nvals; j++) bcf_float_set_vector_end(dst[j]);
             }
             else
-            {
-                float *src = args->tmpf + nsrc*args->sample_map[i];
-                for (j=0; j<nsrc; j++) dst[j] = src[j];
-            }
+                for (j=0; j<nvals; j++) dst[j] = ann[j];
         }
-        return bcf_update_format_float(args->hdr_out,line,col->hdr_key,args->tmpf3,nsrc*bcf_hdr_nsamples(args->hdr_out));
+        return bcf_update_format_float(args->hdr_out,line,col->hdr_key_dst,args->tmpf3,nvals*bcf_hdr_nsamples(args->hdr_out));
     }
 }
-static int vcf_setter_format_str(args_t *args, bcf1_t *line, annot_col_t *col, void *data)
+static int core_setter_format_str(args_t *args, bcf1_t *line, annot_col_t *col, char **vals)
 {
-    bcf1_t *rec = (bcf1_t*) data;
-    args->tmpp[0] = args->tmps;
-    int ret = bcf_get_format_string(args->files->readers[1].header,rec,col->hdr_key,&args->tmpp,&args->mtmps);
-    args->tmps = args->tmpp[0]; // tmps might be realloced
-    if ( ret==-3 ) return 0;    // the tag is not present
-    if ( ret<=0 ) return 1;     // error
-
     if ( !args->sample_map )
-        return bcf_update_format_string(args->hdr_out,line,col->hdr_key,(const char**)args->tmpp,bcf_hdr_nsamples(args->hdr_out));
+        return bcf_update_format_string(args->hdr_out,line,col->hdr_key_dst,(const char**)vals,args->nsmpl_annot);
 
     int i;
     args->tmpp2[0] = args->tmps2;
-    ret = bcf_get_format_string(args->hdr,line,col->hdr_key,&args->tmpp2,&args->mtmps2);
+    int ret = bcf_get_format_string(args->hdr,line,col->hdr_key_dst,&args->tmpp2,&args->mtmps2);
     args->tmps2 = args->tmpp2[0];   // tmps2 might be realloced
 
+    int nsmpl = bcf_hdr_nsamples(args->hdr_out);
     if ( ret<=0 )   // not present in dst
     {
         hts_expand(char,bcf_hdr_nsamples(args->hdr_out)*2,args->mtmps2,args->tmps2);
-        for (i=0; i<bcf_hdr_nsamples(args->hdr_out); i++)
+        char *tmp = args->tmps2;
+        for (i=0; i<nsmpl; i++)
         {
-            args->tmps2[2*i]   = '.';
-            args->tmps2[2*i+1] = 0;
-            args->tmpp2[i] = args->tmps2+2*i;
+            tmp[0] = '.'; 
+            tmp[1] = 0;
+            args->tmpp2[i] = tmp;
+            tmp += 2;
         }
     }
+    for (i=0; i<nsmpl; i++)
+    {
+        if ( args->sample_map[i]==-1 ) continue;
+        char **src = vals + args->sample_map[i];
+        char **dst = args->tmpp2 + i;
+
+        if ( col->replace==REPLACE_NON_MISSING ) { if ( (*dst)[0]=='.' && (*dst)[1]==0 ) continue; } 
+        else if ( col->replace==REPLACE_MISSING ) { if ( (*dst)[0]!='.' || (*dst)[1]!=0 ) continue; }
+        else if ( col->replace==REPLACE_ALL ) { if ( (*src)[0]=='.' && (*src)[1]==0 ) continue; }
+        *dst = *src;
+    }
+    return bcf_update_format_string(args->hdr_out,line,col->hdr_key_dst,(const char**)args->tmpp2,nsmpl);
+}
+static int setter_format_int(args_t *args, bcf1_t *line, annot_col_t *col, void *data)
+{
+    annot_line_t *tab = (annot_line_t*) data;
+    if ( col->icol+args->nsmpl_annot > tab->ncols ) 
+        error("Incorrect number of values for %s at %s:%d\n",col->hdr_key_src,bcf_seqname(args->hdr,line),line->pos+1);
+    int nvals = count_vals(tab,col->icol,col->icol+args->nsmpl_annot);
+    hts_expand(int32_t,nvals*args->nsmpl_annot,args->mtmpi,args->tmpi);
+
+    int icol = col->icol, ismpl;
+    for (ismpl=0; ismpl<args->nsmpl_annot; ismpl++)
+    {
+        int32_t *ptr = args->tmpi + ismpl*nvals;
+        int ival = 0;
+
+        char *str = tab->cols[icol];
+        while ( *str )
+        {
+            if ( str[0]=='.' && (!str[1] || str[1]==',') )  // missing value
+            {
+                ptr[ival++] = bcf_int32_missing;
+                str += str[1] ? 2 : 1;
+                continue;
+            }
+
+            char *end = str;
+            ptr[ival] = strtol(str, &end, 10); 
+            if ( end==str )
+                error("Could not parse %s at %s:%d .. [%s]\n", col->hdr_key_src,bcf_seqname(args->hdr,line),line->pos+1,tab->cols[col->icol]);
+
+            ival++;
+            str = *end ? end+1 : end;
+        }
+        while ( ival<nvals ) ptr[ival++] = bcf_int32_vector_end;
+        icol++;
+    }
+    return core_setter_format_int(args,line,col,args->tmpi,nvals);
+}
+static int setter_format_real(args_t *args, bcf1_t *line, annot_col_t *col, void *data)
+{
+    annot_line_t *tab = (annot_line_t*) data;
+    if ( col->icol+args->nsmpl_annot > tab->ncols ) 
+        error("Incorrect number of values for %s at %s:%d\n",col->hdr_key_src,bcf_seqname(args->hdr,line),line->pos+1);
+    int nvals = count_vals(tab,col->icol,col->icol+args->nsmpl_annot);
+    hts_expand(float,nvals*args->nsmpl_annot,args->mtmpf,args->tmpf);
 
-    for (i=0; i<bcf_hdr_nsamples(args->hdr_out); i++)
+    int icol = col->icol, ismpl;
+    for (ismpl=0; ismpl<args->nsmpl_annot; ismpl++)
     {
-        int isrc = args->sample_map[i];
-        if ( isrc==-1 ) continue;
-        args->tmpp2[i] = args->tmpp[isrc];
+        float *ptr = args->tmpf + ismpl*nvals;
+        int ival = 0;
+
+        char *str = tab->cols[icol];
+        while ( *str )
+        {
+            if ( str[0]=='.' && (!str[1] || str[1]==',') )  // missing value
+            {
+                bcf_float_set_missing(ptr[ival]); 
+                ival++;
+                str += str[1] ? 2 : 1;
+                continue;
+            }
+
+            char *end = str;
+            ptr[ival] = strtod(str, &end); 
+            if ( end==str )
+                error("Could not parse %s at %s:%d .. [%s]\n", col->hdr_key_src,bcf_seqname(args->hdr,line),line->pos+1,tab->cols[col->icol]);
+
+            ival++;
+            str = *end ? end+1 : end;
+        }
+        while ( ival<nvals ) { bcf_float_set_vector_end(ptr[ival]); ival++; }
+        icol++;
     }
-    return bcf_update_format_string(args->hdr_out,line,col->hdr_key,(const char**)args->tmpp2,bcf_hdr_nsamples(args->hdr_out));
+    return core_setter_format_real(args,line,col,args->tmpf,nvals);
 }
-static void set_samples(args_t *args, bcf_hdr_t *src, bcf_hdr_t *dst, int need_samples)
+static int setter_format_str(args_t *args, bcf1_t *line, annot_col_t *col, void *data)
+{
+    annot_line_t *tab = (annot_line_t*) data;
+    if ( col->icol+args->nsmpl_annot > tab->ncols ) 
+        error("Incorrect number of values for %s at %s:%d\n",col->hdr_key_src,bcf_seqname(args->hdr,line),line->pos+1);
+
+    int ismpl;
+    for (ismpl=0; ismpl<args->nsmpl_annot; ismpl++)
+        args->tmpp[ismpl] = tab->cols[col->icol + ismpl];
+
+    return core_setter_format_str(args,line,col,args->tmpp);
+}
+static int vcf_setter_format_int(args_t *args, bcf1_t *line, annot_col_t *col, void *data)
+{
+    bcf1_t *rec = (bcf1_t*) data;
+    int nsrc = bcf_get_format_int32(args->files->readers[1].header,rec,col->hdr_key_src,&args->tmpi,&args->mtmpi);
+    if ( nsrc==-3 ) return 0;    // the tag is not present
+    if ( nsrc<=0 ) return 1;     // error
+    return core_setter_format_int(args,line,col,args->tmpi,nsrc/bcf_hdr_nsamples(args->files->readers[1].header));
+}
+static int vcf_setter_format_real(args_t *args, bcf1_t *line, annot_col_t *col, void *data)
+{
+    bcf1_t *rec = (bcf1_t*) data;
+    int nsrc = bcf_get_format_float(args->files->readers[1].header,rec,col->hdr_key_src,&args->tmpf,&args->mtmpf);
+    if ( nsrc==-3 ) return 0;    // the tag is not present
+    if ( nsrc<=0 ) return 1;     // error
+    return core_setter_format_real(args,line,col,args->tmpf,nsrc/bcf_hdr_nsamples(args->files->readers[1].header));
+}
+
+static int vcf_setter_format_str(args_t *args, bcf1_t *line, annot_col_t *col, void *data)
+{
+    bcf1_t *rec = (bcf1_t*) data;
+    args->tmpp[0] = args->tmps;
+    int ret = bcf_get_format_string(args->files->readers[1].header,rec,col->hdr_key_src,&args->tmpp,&args->mtmps);
+    args->tmps = args->tmpp[0]; // tmps might be realloced
+    if ( ret==-3 ) return 0;    // the tag is not present
+    if ( ret<=0 ) return 1;     // error
+    return core_setter_format_str(args,line,col,args->tmpp);
+}
+static int init_sample_map(args_t *args, bcf_hdr_t *src, bcf_hdr_t *dst)
 {
     int i;
     if ( !args->sample_names )
     {
+        args->nsmpl_annot = bcf_hdr_nsamples(dst);
+
+        // tab annotation file, expecting that all samples are present: sample map not needed
+        if ( !src ) return 0;
+
         int nmatch = 0, order_ok = 1;
         for (i=0; i<bcf_hdr_nsamples(src); i++)
         {
@@ -1135,11 +1158,8 @@ static void set_samples(args_t *args, bcf_hdr_t *src, bcf_hdr_t *dst, int need_s
                 if ( i!=id ) order_ok = 0;
             }
         }
-        if ( bcf_hdr_nsamples(src)==bcf_hdr_nsamples(dst) && nmatch==bcf_hdr_nsamples(src) && order_ok && !need_samples ) 
-            return;    // the same samples in both files
-
-        if ( !nmatch ) error("No matching samples found in the source and the destination file\n");
-        if ( nmatch!=bcf_hdr_nsamples(src) || nmatch!=bcf_hdr_nsamples(dst) ) fprintf(pysam_stderr,"%d sample(s) in common\n", nmatch);
+        if ( bcf_hdr_nsamples(src)==bcf_hdr_nsamples(dst) && nmatch==bcf_hdr_nsamples(src) && order_ok ) return 0;  // not needed
+        if ( !nmatch ) return -1;   // No matching samples found in the source and the destination file
 
         args->nsample_map = bcf_hdr_nsamples(dst);
         args->sample_map  = (int*) malloc(sizeof(int)*args->nsample_map);
@@ -1148,46 +1168,70 @@ static void set_samples(args_t *args, bcf_hdr_t *src, bcf_hdr_t *dst, int need_s
             int id = bcf_hdr_id2int(src, BCF_DT_SAMPLE, dst->samples[i]);
             args->sample_map[i] = id;   // idst -> isrc, -1 if not present
         }
-        return;
+        return 1;
     }
 
     args->nsample_map = bcf_hdr_nsamples(dst);
     args->sample_map  = (int*) malloc(sizeof(int)*args->nsample_map);
     for (i=0; i<args->nsample_map; i++) args->sample_map[i] = -1;
 
-    int nsamples = 0;
-    char **samples = hts_readlist(args->sample_names, args->sample_is_file, &nsamples);
-    for (i=0; i<nsamples; i++)
+    // possible todo: could do with smpl_ilist only
+    smpl_ilist_t *ilist = smpl_ilist_init(dst, args->sample_names, args->sample_is_file, SMPL_STRICT);
+    if ( !ilist || !ilist->n ) error("Could not parse: %s\n", args->sample_names);
+    char **samples = (char**) malloc(sizeof(char*)*ilist->n);
+    for (i=0; i<ilist->n; i++) samples[i] = strdup(dst->samples[i]);
+    args->nsmpl_annot = ilist->n;
+    smpl_ilist_destroy(ilist);
+    int need_sample_map = args->nsmpl_annot==bcf_hdr_nsamples(dst) ? 0 : 1;
+    if ( !src )
     {
-        int isrc, idst;
-        char *ss = samples[i], *se = samples[i];
-        while ( *se && !isspace(*se) ) se++;
-        if ( !*se ) 
+        // tab annotation file
+        for (i=0; i<args->nsmpl_annot; i++)
+        {
+            int idst = bcf_hdr_id2int(dst, BCF_DT_SAMPLE, samples[i]);
+            if ( idst==-1 ) error("Sample \"%s\" not found in the destination file\n", samples[i]);
+            args->sample_map[idst] = i;
+            if ( idst!=i ) need_sample_map = 1;
+        }
+    }
+    else
+    {
+        // vcf annotation file
+        for (i=0; i<args->nsmpl_annot; i++)
         {
-            // only one sample name
+            int isrc, idst;
+            char *ss = samples[i], *se = samples[i];
+            while ( *se && !isspace(*se) ) se++;
+            if ( !*se ) 
+            {
+                // only one sample name
+                isrc = bcf_hdr_id2int(src, BCF_DT_SAMPLE,ss);
+                if ( isrc==-1 ) error("Sample \"%s\" not found in the source file\n", ss);
+                idst = bcf_hdr_id2int(dst, BCF_DT_SAMPLE,ss);
+                if ( idst==-1 ) error("Sample \"%s\" not found in the destination file\n", ss);
+                args->sample_map[idst] = isrc;
+                if ( idst!=isrc ) need_sample_map = 1;
+                continue;
+            }
+            *se = 0;
             isrc = bcf_hdr_id2int(src, BCF_DT_SAMPLE,ss);
             if ( isrc==-1 ) error("Sample \"%s\" not found in the source file\n", ss);
+
+            ss = se+1;
+            while ( isspace(*ss) ) ss++;
+            se = ss;
+            while ( *se && !isspace(*se) ) se++;
+
             idst = bcf_hdr_id2int(dst, BCF_DT_SAMPLE,ss);
             if ( idst==-1 ) error("Sample \"%s\" not found in the destination file\n", ss);
+
             args->sample_map[idst] = isrc;
-            continue;
+            if ( idst!=isrc ) need_sample_map = 1;
         }
-        *se = 0;
-        isrc = bcf_hdr_id2int(src, BCF_DT_SAMPLE,ss);
-        if ( isrc==-1 ) error("Sample \"%s\" not found in the source file\n", ss);
-
-        ss = se+1;
-        while ( isspace(*ss) ) ss++;
-        se = ss;
-        while ( *se && !isspace(*se) ) se++;
-
-        idst = bcf_hdr_id2int(dst, BCF_DT_SAMPLE,ss);
-        if ( idst==-1 ) error("Sample \"%s\" not found in the destination file\n", ss);
-
-        args->sample_map[idst] = isrc;
     }
-    for (i=0; i<nsamples; i++) free(samples[i]);
+    for (i=0; i<args->nsmpl_annot; i++) free(samples[i]);
     free(samples);
+    return need_sample_map;
 }
 static char *columns_complement(char *columns, void **skip_info, void **skip_fmt)
 {
@@ -1249,8 +1293,27 @@ static char *columns_complement(char *columns, void **skip_info, void **skip_fmt
     free(columns);
     return str.s;
 }
+static void bcf_hrec_format_rename(bcf_hrec_t *hrec, char *tag, kstring_t *str)
+{
+    int j, nout = 0;
+    ksprintf(str, "##%s=<", hrec->key);
+    for (j=0; j<hrec->nkeys; j++)
+    {
+        if ( !strcmp("IDX",hrec->keys[j]) ) continue;
+        if ( nout ) kputc(',',str);
+        if ( !strcmp("ID", hrec->keys[j]) )
+            ksprintf(str,"%s=%s", hrec->keys[j], tag);
+        else
+            ksprintf(str,"%s=%s", hrec->keys[j], hrec->vals[j]);
+        nout++;
+    }
+    ksprintf(str,">\n");
+}
 static void init_columns(args_t *args)
 {
+    int need_sample_map = 0;
+    int sample_map_ok = init_sample_map(args, args->tgts_is_vcf?args->files->readers[1].header:NULL, args->hdr);
+
     void *skip_fmt = NULL, *skip_info = NULL;
     if ( args->tgts_is_vcf )
         args->columns = columns_complement(args->columns, &skip_info, &skip_fmt);
@@ -1258,13 +1321,13 @@ static void init_columns(args_t *args)
     kstring_t str = {0,0,0}, tmp = {0,0,0};
     char *ss = args->columns, *se = ss;
     args->ncols = 0;
-    int icol = -1, has_fmt_str = 0, force_samples = -1;
+    int icol = -1, has_fmt_str = 0;
     while ( *ss )
     {
         if ( *se && *se!=',' ) { se++; continue; }
         int replace = REPLACE_ALL;
         if ( *ss=='+' ) { replace = REPLACE_MISSING; ss++; }
-        else if ( *ss=='-' ) { replace = REPLACE_EXISTING; ss++; }
+        else if ( *ss=='-' ) { replace = REPLACE_NON_MISSING; ss++; }
         else if ( *ss=='=' ) { replace = SET_OR_APPEND; ss++; }
         icol++;
         str.l = 0;
@@ -1278,23 +1341,25 @@ static void init_columns(args_t *args)
         else if ( !strcasecmp("ALT",str.s) ) args->alt_idx = icol;
         else if ( !strcasecmp("ID",str.s) )
         {
-            if ( replace==REPLACE_EXISTING ) error("Apologies, the -ID feature has not been implemented yet.\n");
+            if ( replace==REPLACE_NON_MISSING ) error("Apologies, the -ID feature has not been implemented yet.\n");
             args->ncols++; args->cols = (annot_col_t*) realloc(args->cols,sizeof(annot_col_t)*args->ncols);
             annot_col_t *col = &args->cols[args->ncols-1];
             col->icol = icol;
             col->replace = replace;
             col->setter = args->tgts_is_vcf ? vcf_setter_id : setter_id;
-            col->hdr_key = strdup(str.s);
+            col->hdr_key_src = strdup(str.s);
+            col->hdr_key_dst = strdup(str.s);
         }
         else if ( !strcasecmp("FILTER",str.s) )
         {
-            if ( replace==REPLACE_EXISTING ) error("Apologies, the -FILTER feature has not been implemented yet.\n");
+            if ( replace==REPLACE_NON_MISSING ) error("Apologies, the -FILTER feature has not been implemented yet.\n");
             args->ncols++; args->cols = (annot_col_t*) realloc(args->cols,sizeof(annot_col_t)*args->ncols);
             annot_col_t *col = &args->cols[args->ncols-1];
             col->icol = icol;
             col->replace = replace;
             col->setter = args->tgts_is_vcf ? vcf_setter_filter : setter_filter;
-            col->hdr_key = strdup(str.s);
+            col->hdr_key_src = strdup(str.s);
+            col->hdr_key_dst = strdup(str.s);
             if ( args->tgts_is_vcf )
             {
                 bcf_hdr_t *tgts_hdr = args->files->readers[1].header;
@@ -1314,18 +1379,19 @@ static void init_columns(args_t *args)
         }
         else if ( !strcasecmp("QUAL",str.s) )
         {
-            if ( replace==REPLACE_EXISTING ) error("Apologies, the -QUAL feature has not been implemented yet.\n");
+            if ( replace==REPLACE_NON_MISSING ) error("Apologies, the -QUAL feature has not been implemented yet.\n");
             if ( replace==SET_OR_APPEND ) error("Apologies, the =QUAL feature has not been implemented yet.\n");
             args->ncols++; args->cols = (annot_col_t*) realloc(args->cols,sizeof(annot_col_t)*args->ncols);
             annot_col_t *col = &args->cols[args->ncols-1];
             col->icol = icol;
             col->replace = replace;
             col->setter = args->tgts_is_vcf ? vcf_setter_qual : setter_qual;
-            col->hdr_key = strdup(str.s);
+            col->hdr_key_src = strdup(str.s);
+            col->hdr_key_dst = strdup(str.s);
         }
         else if ( args->tgts_is_vcf && !strcasecmp("INFO",str.s) ) // All INFO fields
         {
-            if ( replace==REPLACE_EXISTING ) error("Apologies, the -INFO/TAG feature has not been implemented yet.\n");
+            if ( replace==REPLACE_NON_MISSING ) error("Apologies, the -INFO/TAG feature has not been implemented yet.\n");
             if ( replace==SET_OR_APPEND ) error("Apologies, the =INFO/TAG feature has not been implemented yet.\n");
             bcf_hdr_t *tgts_hdr = args->files->readers[1].header;
             int j;
@@ -1345,7 +1411,8 @@ static void init_columns(args_t *args)
                 annot_col_t *col = &args->cols[args->ncols-1];
                 col->icol = -1;
                 col->replace = replace;
-                col->hdr_key = strdup(hrec->vals[k]);
+                col->hdr_key_src = strdup(hrec->vals[k]);
+                col->hdr_key_dst = strdup(hrec->vals[k]);
                 col->number  = bcf_hdr_id2length(args->hdr_out,BCF_HL_INFO,hdr_id);
                 switch ( bcf_hdr_id2type(args->hdr_out,BCF_HL_INFO,hdr_id) )
                 {
@@ -1360,8 +1427,7 @@ static void init_columns(args_t *args)
         else if ( args->tgts_is_vcf && (!strcasecmp("FORMAT",str.s) || !strcasecmp("FMT",str.s)) ) // All FORMAT fields
         {
             bcf_hdr_t *tgts_hdr = args->files->readers[1].header;
-            if ( force_samples<0 ) force_samples = replace;
-            if ( force_samples>=0 && replace!=REPLACE_ALL ) force_samples = replace;
+            need_sample_map = 1;
             int j;
             for (j=0; j<tgts_hdr->nhrec; j++)
             {
@@ -1379,8 +1445,9 @@ static void init_columns(args_t *args)
                 annot_col_t *col = &args->cols[args->ncols-1];
                 col->icol = -1;
                 col->replace = replace;
-                col->hdr_key = strdup(hrec->vals[k]);
-                if ( !strcasecmp("GT",col->hdr_key) ) col->setter = vcf_setter_format_gt;
+                col->hdr_key_src = strdup(hrec->vals[k]);
+                col->hdr_key_dst = strdup(hrec->vals[k]);
+                if ( !strcasecmp("GT",col->hdr_key_src) ) col->setter = vcf_setter_format_gt;
                 else
                     switch ( bcf_hdr_id2type(args->hdr_out,BCF_HL_FMT,hdr_id) )
                     {
@@ -1393,18 +1460,27 @@ static void init_columns(args_t *args)
         }
         else if ( !strncasecmp("FORMAT/",str.s, 7) || !strncasecmp("FMT/",str.s,4) )
         {
-            char *key = str.s + (!strncasecmp("FMT/",str.s,4) ? 4 : 7);
-            if ( force_samples<0 ) force_samples = replace;
-            if ( force_samples>=0 && replace!=REPLACE_ALL ) force_samples = replace;
+            char *key_dst = str.s + (!strncasecmp("FMT/",str.s,4) ? 4 : 7);
+            char *key_src = strstr(key_dst,":=");
+            if ( key_src )
+            {
+                *key_src = 0;
+                key_src += 2;
+                if ( !strncasecmp("FORMAT/",key_src,7) ) key_src += 7;
+                else if ( !strncasecmp("FMT/",key_src,4) ) key_src += 4;
+            }
+            else
+                key_src = key_dst;
+            need_sample_map = 1;
             if ( args->tgts_is_vcf )
             {
-                bcf_hrec_t *hrec = bcf_hdr_get_hrec(args->files->readers[1].header, BCF_HL_FMT, "ID", key, NULL);
+                bcf_hrec_t *hrec = bcf_hdr_get_hrec(args->files->readers[1].header, BCF_HL_FMT, "ID", key_src, NULL);
                 tmp.l = 0;
-                bcf_hrec_format(hrec, &tmp);
+                bcf_hrec_format_rename(hrec, key_dst, &tmp);
                 bcf_hdr_append(args->hdr_out, tmp.s);
                 bcf_hdr_sync(args->hdr_out);
             }
-            int hdr_id = bcf_hdr_id2int(args->hdr_out, BCF_DT_ID, key);
+            int hdr_id = bcf_hdr_id2int(args->hdr_out, BCF_DT_ID, key_dst);
             if ( !bcf_hdr_idinfo_exists(args->hdr_out,BCF_HL_FMT,hdr_id) )
                 error("The tag \"%s\" is not defined in %s\n", str.s, args->targets_fname);
             args->ncols++; args->cols = (annot_col_t*) realloc(args->cols,sizeof(annot_col_t)*args->ncols);
@@ -1412,13 +1488,14 @@ static void init_columns(args_t *args)
             if ( !args->tgts_is_vcf )
             {
                 col->icol = icol;
-                icol += bcf_hdr_nsamples(args->hdr_out) - 1;
+                icol += args->nsmpl_annot - 1;
             }
             else
                 col->icol = -1;
             col->replace = replace;
-            col->hdr_key = strdup(key);
-            if ( !strcasecmp("GT",key) ) col->setter = vcf_setter_format_gt;
+            col->hdr_key_src = strdup(key_src);
+            col->hdr_key_dst = strdup(key_dst);
+            if ( !strcasecmp("GT",key_src) ) col->setter = vcf_setter_format_gt;
             else
                 switch ( bcf_hdr_id2type(args->hdr_out,BCF_HL_FMT,hdr_id) )
                 {
@@ -1430,24 +1507,33 @@ static void init_columns(args_t *args)
         }
         else
         {
-            if ( replace==REPLACE_EXISTING ) error("Apologies, the -INFO/TAG feature has not been implemented yet.\n");
+            if ( replace==REPLACE_NON_MISSING ) error("Apologies, the -INFO/TAG feature has not been implemented yet.\n");
             if ( replace==SET_OR_APPEND ) error("Apologies, the =INFO/TAG feature has not been implemented yet.\n");
-            if ( !strncasecmp("INFO/",str.s,5) ) { memmove(str.s,str.s+5,str.l-4); }
-            int hdr_id = bcf_hdr_id2int(args->hdr_out, BCF_DT_ID, str.s);
+            char *key_dst = !strncasecmp("INFO/",str.s,5) ? str.s + 5 : str.s;
+            char *key_src = strstr(key_dst,":=");
+            if ( key_src )
+            {
+                *key_src = 0;
+                key_src += 2;
+                if ( !strncasecmp("INFO/",key_src,5) ) key_src += 5;
+            }
+            else
+                key_src = key_dst;
+            int hdr_id = bcf_hdr_id2int(args->hdr_out, BCF_DT_ID, key_dst);
             if ( !bcf_hdr_idinfo_exists(args->hdr_out,BCF_HL_INFO,hdr_id) )
             {
                 if ( args->tgts_is_vcf ) // reading annotations from a VCF, add a new header line
                 {
-                    bcf_hrec_t *hrec = bcf_hdr_get_hrec(args->files->readers[1].header, BCF_HL_INFO, "ID", str.s, NULL);
+                    bcf_hrec_t *hrec = bcf_hdr_get_hrec(args->files->readers[1].header, BCF_HL_INFO, "ID", key_src, NULL);
                     if ( !hrec ) error("The tag \"%s\" is not defined in %s\n", str.s,args->files->readers[1].fname);
                     tmp.l = 0;
-                    bcf_hrec_format(hrec, &tmp);
+                    bcf_hrec_format_rename(hrec, key_dst, &tmp);
                     bcf_hdr_append(args->hdr_out, tmp.s);
                     bcf_hdr_sync(args->hdr_out);
-                    hdr_id = bcf_hdr_id2int(args->hdr_out, BCF_DT_ID, str.s);
+                    hdr_id = bcf_hdr_id2int(args->hdr_out, BCF_DT_ID, key_dst);
                 }
                 else
-                    error("The tag \"%s\" is not defined in %s\n", str.s, args->targets_fname);
+                    error("The tag \"%s\" is not defined in %s\n", key_src, args->targets_fname);
                 assert( bcf_hdr_idinfo_exists(args->hdr_out,BCF_HL_INFO,hdr_id) );
             }
 
@@ -1455,7 +1541,8 @@ static void init_columns(args_t *args)
             annot_col_t *col = &args->cols[args->ncols-1];
             col->icol = icol;
             col->replace = replace;
-            col->hdr_key = strdup(str.s);
+            col->hdr_key_src = strdup(key_src);
+            col->hdr_key_dst = strdup(key_dst);
             col->number  = bcf_hdr_id2length(args->hdr_out,BCF_HL_INFO,hdr_id);
             switch ( bcf_hdr_id2type(args->hdr_out,BCF_HL_INFO,hdr_id) )
             {
@@ -1482,8 +1569,13 @@ static void init_columns(args_t *args)
         args->tmpp  = (char**)malloc(sizeof(char*)*n);
         args->tmpp2 = (char**)malloc(sizeof(char*)*n);
     }
-    if ( force_samples>=0 && args->tgts_is_vcf )
-        set_samples(args, args->files->readers[1].header, args->hdr, force_samples==REPLACE_ALL ? 0 : 1);
+    if ( !need_sample_map )
+    {
+        free(args->sample_map);
+        args->sample_map = NULL;
+    }
+    else if ( sample_map_ok<0 )
+        error("No matching samples in source and destination file?\n");
 }
 
 static void rename_chrs(args_t *args, char *fname)
@@ -1554,7 +1646,6 @@ static void init_data(args_t *args)
     if ( args->mark_sites )
     {
         if ( !args->targets_fname ) error("The -a option not given\n");
-        if ( args->tgts_is_vcf ) error("Apologies, this has not been implemented yet: -a is a VCF\n");  // very easy to add..
         bcf_hdr_printf(args->hdr_out,"##INFO=<ID=%s,Number=0,Type=Flag,Description=\"Sites %slisted in %s\">",
             args->mark_sites,args->mark_sites_logic==MARK_LISTED?"":"not ",args->mark_sites);
     }
@@ -1566,7 +1657,8 @@ static void init_data(args_t *args)
 
         args->out_fh = hts_open(args->output_fname,hts_bcf_wmode(args->output_type));
         if ( args->out_fh == NULL ) error("Can't write to \"%s\": %s\n", args->output_fname, strerror(errno));
-        if ( args->n_threads ) hts_set_threads(args->out_fh, args->n_threads);
+        if ( args->n_threads )
+            hts_set_opt(args->out_fh, HTS_OPT_THREAD_POOL, args->files->p);
         bcf_hdr_write(args->out_fh, args->hdr_out);
     }
 }
@@ -1579,7 +1671,10 @@ static void destroy_data(args_t *args)
     if ( args->hdr_out ) bcf_hdr_destroy(args->hdr_out);
     if (args->vcmp) vcmp_destroy(args->vcmp);
     for (i=0; i<args->ncols; i++)
-        free(args->cols[i].hdr_key);
+    {
+        free(args->cols[i].hdr_key_src);
+        free(args->cols[i].hdr_key_dst);
+    }
     free(args->cols);
     for (i=0; i<args->malines; i++)
     {
@@ -1720,7 +1815,7 @@ static void annotate(args_t *args, bcf1_t *line)
             // there is a matching line
             for (j=0; j<args->ncols; j++)
                 if ( args->cols[j].setter(args,line,&args->cols[j],&args->alines[i]) )
-                    error("fixme: Could not set %s at %s:%d\n", args->cols[j].hdr_key,bcf_seqname(args->hdr,line),line->pos+1);
+                    error("fixme: Could not set %s at %s:%d\n", args->cols[j].hdr_key_src,bcf_seqname(args->hdr,line),line->pos+1);
 
         }
 
@@ -1733,12 +1828,20 @@ static void annotate(args_t *args, bcf1_t *line)
                 bcf_update_info_flag(args->hdr_out,line,args->mark_sites,NULL,i<args->nalines?0:1);
         }
     }
-    else if ( args->files->nreaders == 2 && bcf_sr_has_line(args->files,1) )
+    else if ( args->files->nreaders == 2 )
     {
-        bcf1_t *aline = bcf_sr_get_line(args->files,1);
-        for (j=0; j<args->ncols; j++)
-            if ( args->cols[j].setter(args,line,&args->cols[j],aline) )
-                error("fixme: Could not set %s at %s:%d\n", args->cols[j].hdr_key,bcf_seqname(args->hdr,line),line->pos+1);
+        if ( bcf_sr_has_line(args->files,1) )
+        {
+            bcf1_t *aline = bcf_sr_get_line(args->files,1);
+            for (j=0; j<args->ncols; j++)
+                if ( args->cols[j].setter(args,line,&args->cols[j],aline) )
+                    error("fixme: Could not set %s at %s:%d\n", args->cols[j].hdr_key_src,bcf_seqname(args->hdr,line),line->pos+1);
+
+            if ( args->mark_sites )
+                bcf_update_info_flag(args->hdr_out,line,args->mark_sites,NULL,args->mark_sites_logic==MARK_LISTED ? 1 : 0);
+        }
+        else if ( args->mark_sites )
+            bcf_update_info_flag(args->hdr_out,line,args->mark_sites,NULL, args->mark_sites_logic==MARK_UNLISTED ? 1 : 0);
     }
     if ( args->set_ids )
     {
@@ -1763,6 +1866,7 @@ static void usage(args_t *args)
     fprintf(pysam_stderr, "\n");
     fprintf(pysam_stderr, "Options:\n");
     fprintf(pysam_stderr, "   -a, --annotations <file>       VCF file or tabix-indexed file with annotations: CHR\\tPOS[\\tVALUE]+\n");
+    fprintf(pysam_stderr, "       --collapse <string>        matching records by <snps|indels|both|all|some|none>, see man page for details [some]\n");
     fprintf(pysam_stderr, "   -c, --columns <list>           list of columns in the annotation file, e.g. CHROM,POS,REF,ALT,-,INFO/TAG. See man page for details\n");
     fprintf(pysam_stderr, "   -e, --exclude <expr>           exclude sites for which the expression is true (see man page for details)\n");
     fprintf(pysam_stderr, "   -h, --header-lines <file>      lines which should be appended to the VCF header\n");
@@ -1795,7 +1899,7 @@ int main_vcfannotate(int argc, char *argv[])
     args->record_cmd_line = 1;
     args->ref_idx = args->alt_idx = args->chr_idx = args->from_idx = args->to_idx = -1;
     args->set_ids_replace = 1;
-    int regions_is_file = 0;
+    int regions_is_file = 0, collapse = 0;
 
     static struct option loptions[] =
     {
@@ -1805,6 +1909,7 @@ int main_vcfannotate(int argc, char *argv[])
         {"output-type",required_argument,NULL,'O'},
         {"threads",required_argument,NULL,9},
         {"annotations",required_argument,NULL,'a'},
+        {"collapse",required_argument,NULL,2},
         {"include",required_argument,NULL,'i'},
         {"exclude",required_argument,NULL,'e'},
         {"regions",required_argument,NULL,'r'},
@@ -1849,6 +1954,16 @@ int main_vcfannotate(int argc, char *argv[])
             case 'R': args->regions_list = optarg; regions_is_file = 1; break;
             case 'h': args->header_fname = optarg; break;
             case  1 : args->rename_chrs = optarg; break;
+            case  2 :
+                if ( !strcmp(optarg,"snps") ) collapse |= COLLAPSE_SNPS;
+                else if ( !strcmp(optarg,"indels") ) collapse |= COLLAPSE_INDELS;
+                else if ( !strcmp(optarg,"both") ) collapse |= COLLAPSE_SNPS | COLLAPSE_INDELS;
+                else if ( !strcmp(optarg,"any") ) collapse |= COLLAPSE_ANY;
+                else if ( !strcmp(optarg,"all") ) collapse |= COLLAPSE_ANY;
+                else if ( !strcmp(optarg,"some") ) collapse |= COLLAPSE_SOME;
+                else if ( !strcmp(optarg,"none") ) collapse = COLLAPSE_NONE;
+                else error("The --collapse string \"%s\" not recognised.\n", optarg);
+                break;
             case  9 : args->n_threads = strtol(optarg, 0, 0); break;
             case  8 : args->record_cmd_line = 0; break;
             case '?': usage(args); break;
@@ -1879,9 +1994,10 @@ int main_vcfannotate(int argc, char *argv[])
         {
             args->tgts_is_vcf = 1;
             args->files->require_index = 1;
-            args->files->collapse |= COLLAPSE_SOME;
+            args->files->collapse = collapse ? collapse : COLLAPSE_SOME;
         }
     }
+    if ( bcf_sr_set_threads(args->files, args->n_threads)<0 ) error("Failed to create threads\n");
     if ( !bcf_sr_add_reader(args->files, fname) ) error("Failed to open %s: %s\n", fname,bcf_sr_strerror(args->files->errnum));
 
     init_data(args);
diff --git a/bcftools/vcfcall.c b/bcftools/vcfcall.c
index e5bbf11..00771f7 100644
--- a/bcftools/vcfcall.c
+++ b/bcftools/vcfcall.c
@@ -24,6 +24,7 @@ THE SOFTWARE.  */
 
 #include <stdarg.h>
 #include <string.h>
+#include <strings.h>
 #include <errno.h>
 #include <unistd.h>
 #include <getopt.h>
@@ -146,7 +147,7 @@ static ploidy_predef_t ploidy_predefs[] =
           "*  * *     F 2\n"
     },
     { .alias  = "GRCh38",
-      .about  = "Human Genome reference assembly GRCh38 / hg38, plain chromosome naming (1,2,3,..)",
+      .about  = "Human Genome reference assembly GRCh38 / hg38",
       .ploidy =
           "X 1 9999 M 1\n"
           "X 2781480 155701381 M 1\n"
@@ -275,7 +276,7 @@ static void set_samples(args_t *args, const char *fn, int is_file)
 
     args->samples_map = (int*) malloc(sizeof(int)*bcf_hdr_nsamples(args->aux.hdr)); // for subsetting
     args->sample2sex  = (int*) malloc(sizeof(int)*bcf_hdr_nsamples(args->aux.hdr));
-    int dflt_sex_id = ploidy_add_sex(args->ploidy, "F");
+    int dflt_sex_id = ploidy_nsex(args->ploidy) - 1;
     for (i=0; i<bcf_hdr_nsamples(args->aux.hdr); i++) args->sample2sex[i] = dflt_sex_id;
 
     int *old2new = (int*) malloc(sizeof(int)*bcf_hdr_nsamples(args->aux.hdr));
@@ -294,6 +295,7 @@ static void set_samples(args_t *args, const char *fn, int is_file)
 
         int ismpl = bcf_hdr_id2int(args->aux.hdr, BCF_DT_SAMPLE, ss);
         if ( ismpl < 0 ) { fprintf(stderr,"Warning: No such sample in the VCF: %s\n",ss); continue; }
+        if ( old2new[ismpl] != -1 ) { fprintf(stderr,"Warning: The sample is listed multiple times: %s\n",ss); continue; }
 
         ss = se+1;
         while ( *ss && isspace(*ss) ) ss++;
@@ -411,18 +413,24 @@ static void init_data(args_t *args)
         {
             args->nsamples = bcf_hdr_nsamples(args->aux.hdr);
             args->sample2sex = (int*) malloc(sizeof(int)*args->nsamples);
-            for (i=0; i<args->nsamples; i++) args->sample2sex[i] = 0;
+            for (i=0; i<args->nsamples; i++) args->sample2sex[i] = args->nsex - 1;
         }
     }
     if ( args->nsamples )
     {
         args->aux.ploidy = (uint8_t*) malloc(args->nsamples);
-        for (i=0; i<args->nsamples; i++) args->aux.ploidy[i] = 2;
-        for (i=0; i<args->nsex; i++) args->sex2ploidy_prev[i] = 2;
+        for (i=0; i<args->nsamples; i++) args->aux.ploidy[i] = ploidy_max(args->ploidy);
+        for (i=0; i<args->nsex; i++) args->sex2ploidy_prev[i] = ploidy_max(args->ploidy);
+        for (i=0; i<args->nsamples; i++) 
+            if ( args->sample2sex[i] >= args->nsex ) args->sample2sex[i] = args->nsex - 1;
     }
 
-    if ( args->gvcf ) 
+    if ( args->gvcf )
+    {
+        int id = bcf_hdr_id2int(args->aux.hdr,BCF_DT_ID,"DP");
+        if ( id<0 || !bcf_hdr_idinfo_exists(args->aux.hdr,BCF_HL_FMT,id) ) error("--gvcf output mode requires FORMAT/DP tag, which is not present in the input header\n");
         gvcf_update_header(args->gvcf, args->aux.hdr);
+    }
 
     if ( args->samples_map )
     {
@@ -554,7 +562,6 @@ static void set_ploidy(args_t *args, bcf1_t *rec)
         else
             args->aux.ploidy[i] = args->sex2ploidy[args->sample2sex[i]];
     }
-
     int *tmp = args->sex2ploidy; args->sex2ploidy = args->sex2ploidy_prev; args->sex2ploidy_prev = tmp;
 }
 
@@ -569,7 +576,10 @@ ploidy_t *init_ploidy(char *alias)
 
     if ( !pld->alias )
     {
-        fprintf(stderr,"Predefined ploidies:\n");
+        fprintf(stderr,"\nPRE-DEFINED PLOIDY FILES\n\n");
+        fprintf(stderr," * Columns are: CHROM,FROM,TO,SEX,PLOIDY\n");
+        fprintf(stderr," * Coordinates are 1-based inclusive.\n");
+        fprintf(stderr," * A '*' means any value not otherwise defined.\n\n");
         pld = ploidy_predefs;
         while ( pld->alias )
         {
@@ -618,6 +628,7 @@ static void usage(args_t *args)
     fprintf(stderr, "Input/output options:\n");
     fprintf(stderr, "   -A, --keep-alts                 keep all possible alternate alleles at variant sites\n");
     fprintf(stderr, "   -f, --format-fields <list>      output format fields: GQ,GP (lowercase allowed) []\n");
+    fprintf(stderr, "   -F, --prior-freqs <AN,AC>       use prior allele frequencies\n");
     fprintf(stderr, "   -g, --gvcf <int>,[...]          group non-variant sites into gVCF blocks by minimum per-sample DP\n");
     fprintf(stderr, "   -i, --insert-missed             output also sites missed by mpileup but present in -T\n");
     fprintf(stderr, "   -M, --keep-masked-ref           keep sites with masked reference allele (REF=N)\n");
@@ -630,7 +641,7 @@ static void usage(args_t *args)
     fprintf(stderr, "   -m, --multiallelic-caller       alternative model for multiallelic and rare-variant calling (conflicts with -c)\n");
     fprintf(stderr, "   -n, --novel-rate <float>,[...]  likelihood of novel mutation for constrained trio calling, see man page for details [1e-8,1e-9,1e-9]\n");
     fprintf(stderr, "   -p, --pval-threshold <float>    variant if P(ref|D)<FLOAT with -c [0.5]\n");
-    fprintf(stderr, "   -P, --prior <float>             mutation rate (use bigger for greater sensitivity) [1.1e-3]\n");
+    fprintf(stderr, "   -P, --prior <float>             mutation rate (use bigger for greater sensitivity), use with -m [1.1e-3]\n");
 
     // todo (and more)
     // fprintf(stderr, "\nContrast calling and association test options:\n");
@@ -667,6 +678,7 @@ int main_vcfcall(int argc, char *argv[])
     {
         {"help",no_argument,NULL,'h'},
         {"format-fields",required_argument,NULL,'f'},
+        {"prior-freqs",required_argument,NULL,'F'},
         {"gvcf",required_argument,NULL,'g'},
         {"output",required_argument,NULL,'o'},
         {"output-type",required_argument,NULL,'O'},
@@ -698,7 +710,7 @@ int main_vcfcall(int argc, char *argv[])
     };
 
     char *tmp = NULL;
-    while ((c = getopt_long(argc, argv, "h?o:O:r:R:s:S:t:T:ANMV:vcmp:C:n:P:f:ig:XY", loptions, NULL)) >= 0)
+    while ((c = getopt_long(argc, argv, "h?o:O:r:R:s:S:t:T:ANMV:vcmp:C:n:P:f:ig:XYF:", loptions, NULL)) >= 0)
     {
         switch (c)
         {
@@ -713,6 +725,13 @@ int main_vcfcall(int argc, char *argv[])
             case 'c': args.flag |= CF_CCALL; break;          // the original EM based calling method
             case 'i': args.flag |= CF_INS_MISSED; break;
             case 'v': args.aux.flag |= CALL_VARONLY; break;
+            case 'F':
+                args.aux.prior_AN = optarg;
+                args.aux.prior_AC = strchr(optarg,',');
+                if ( !args.aux.prior_AC ) error("Expected two tags with -F (e.g. AN,AC), got \"%s\"\n",optarg);
+                *args.aux.prior_AC = 0;
+                args.aux.prior_AC++;
+                break;
             case 'g': 
                 args.gvcf = gvcf_init(optarg);
                 if ( !args.gvcf ) error("Could not parse: --gvcf %s\n", optarg);
@@ -770,8 +789,8 @@ int main_vcfcall(int argc, char *argv[])
 
     if ( !ploidy_fname && !ploidy )
     {
-        fprintf(stderr,"Note: Neither --ploidy nor --ploidy-file given, assuming all sites are diploid\n");
-        args.ploidy = ploidy_init_string("",2);
+        if ( !args.samples_is_file ) fprintf(stderr,"Note: none of --samples-file, --ploidy or --ploidy-file given, assuming all sites are diploid\n");
+        args.ploidy = ploidy_init_string("* * * 0 0\n* * * 1 1\n* * * 2 2\n",2);
     }
 
     if ( !args.ploidy ) error("Could not initialize ploidy\n");
@@ -833,6 +852,7 @@ int main_vcfcall(int argc, char *argv[])
         else
             ret = ccall(&args.aux, bcf_rec);
         if ( ret==-1 ) error("Something is wrong\n");
+        else if ( ret==-2 ) continue;   // skip the site
 
         // Normal output
         if ( (args.aux.flag & CALL_VARONLY) && ret==0 && !args.gvcf ) continue;     // not a variant
diff --git a/bcftools/vcfcall.c.pysam.c b/bcftools/vcfcall.c.pysam.c
index 8e59fd9..8e6721b 100644
--- a/bcftools/vcfcall.c.pysam.c
+++ b/bcftools/vcfcall.c.pysam.c
@@ -26,6 +26,7 @@ THE SOFTWARE.  */
 
 #include <stdarg.h>
 #include <string.h>
+#include <strings.h>
 #include <errno.h>
 #include <unistd.h>
 #include <getopt.h>
@@ -148,7 +149,7 @@ static ploidy_predef_t ploidy_predefs[] =
           "*  * *     F 2\n"
     },
     { .alias  = "GRCh38",
-      .about  = "Human Genome reference assembly GRCh38 / hg38, plain chromosome naming (1,2,3,..)",
+      .about  = "Human Genome reference assembly GRCh38 / hg38",
       .ploidy =
           "X 1 9999 M 1\n"
           "X 2781480 155701381 M 1\n"
@@ -277,7 +278,7 @@ static void set_samples(args_t *args, const char *fn, int is_file)
 
     args->samples_map = (int*) malloc(sizeof(int)*bcf_hdr_nsamples(args->aux.hdr)); // for subsetting
     args->sample2sex  = (int*) malloc(sizeof(int)*bcf_hdr_nsamples(args->aux.hdr));
-    int dflt_sex_id = ploidy_add_sex(args->ploidy, "F");
+    int dflt_sex_id = ploidy_nsex(args->ploidy) - 1;
     for (i=0; i<bcf_hdr_nsamples(args->aux.hdr); i++) args->sample2sex[i] = dflt_sex_id;
 
     int *old2new = (int*) malloc(sizeof(int)*bcf_hdr_nsamples(args->aux.hdr));
@@ -296,6 +297,7 @@ static void set_samples(args_t *args, const char *fn, int is_file)
 
         int ismpl = bcf_hdr_id2int(args->aux.hdr, BCF_DT_SAMPLE, ss);
         if ( ismpl < 0 ) { fprintf(pysam_stderr,"Warning: No such sample in the VCF: %s\n",ss); continue; }
+        if ( old2new[ismpl] != -1 ) { fprintf(pysam_stderr,"Warning: The sample is listed multiple times: %s\n",ss); continue; }
 
         ss = se+1;
         while ( *ss && isspace(*ss) ) ss++;
@@ -413,18 +415,24 @@ static void init_data(args_t *args)
         {
             args->nsamples = bcf_hdr_nsamples(args->aux.hdr);
             args->sample2sex = (int*) malloc(sizeof(int)*args->nsamples);
-            for (i=0; i<args->nsamples; i++) args->sample2sex[i] = 0;
+            for (i=0; i<args->nsamples; i++) args->sample2sex[i] = args->nsex - 1;
         }
     }
     if ( args->nsamples )
     {
         args->aux.ploidy = (uint8_t*) malloc(args->nsamples);
-        for (i=0; i<args->nsamples; i++) args->aux.ploidy[i] = 2;
-        for (i=0; i<args->nsex; i++) args->sex2ploidy_prev[i] = 2;
+        for (i=0; i<args->nsamples; i++) args->aux.ploidy[i] = ploidy_max(args->ploidy);
+        for (i=0; i<args->nsex; i++) args->sex2ploidy_prev[i] = ploidy_max(args->ploidy);
+        for (i=0; i<args->nsamples; i++) 
+            if ( args->sample2sex[i] >= args->nsex ) args->sample2sex[i] = args->nsex - 1;
     }
 
-    if ( args->gvcf ) 
+    if ( args->gvcf )
+    {
+        int id = bcf_hdr_id2int(args->aux.hdr,BCF_DT_ID,"DP");
+        if ( id<0 || !bcf_hdr_idinfo_exists(args->aux.hdr,BCF_HL_FMT,id) ) error("--gvcf output mode requires FORMAT/DP tag, which is not present in the input header\n");
         gvcf_update_header(args->gvcf, args->aux.hdr);
+    }
 
     if ( args->samples_map )
     {
@@ -556,7 +564,6 @@ static void set_ploidy(args_t *args, bcf1_t *rec)
         else
             args->aux.ploidy[i] = args->sex2ploidy[args->sample2sex[i]];
     }
-
     int *tmp = args->sex2ploidy; args->sex2ploidy = args->sex2ploidy_prev; args->sex2ploidy_prev = tmp;
 }
 
@@ -571,7 +578,10 @@ ploidy_t *init_ploidy(char *alias)
 
     if ( !pld->alias )
     {
-        fprintf(pysam_stderr,"Predefined ploidies:\n");
+        fprintf(pysam_stderr,"\nPRE-DEFINED PLOIDY FILES\n\n");
+        fprintf(pysam_stderr," * Columns are: CHROM,FROM,TO,SEX,PLOIDY\n");
+        fprintf(pysam_stderr," * Coordinates are 1-based inclusive.\n");
+        fprintf(pysam_stderr," * A '*' means any value not otherwise defined.\n\n");
         pld = ploidy_predefs;
         while ( pld->alias )
         {
@@ -620,6 +630,7 @@ static void usage(args_t *args)
     fprintf(pysam_stderr, "Input/output options:\n");
     fprintf(pysam_stderr, "   -A, --keep-alts                 keep all possible alternate alleles at variant sites\n");
     fprintf(pysam_stderr, "   -f, --format-fields <list>      output format fields: GQ,GP (lowercase allowed) []\n");
+    fprintf(pysam_stderr, "   -F, --prior-freqs <AN,AC>       use prior allele frequencies\n");
     fprintf(pysam_stderr, "   -g, --gvcf <int>,[...]          group non-variant sites into gVCF blocks by minimum per-sample DP\n");
     fprintf(pysam_stderr, "   -i, --insert-missed             output also sites missed by mpileup but present in -T\n");
     fprintf(pysam_stderr, "   -M, --keep-masked-ref           keep sites with masked reference allele (REF=N)\n");
@@ -632,7 +643,7 @@ static void usage(args_t *args)
     fprintf(pysam_stderr, "   -m, --multiallelic-caller       alternative model for multiallelic and rare-variant calling (conflicts with -c)\n");
     fprintf(pysam_stderr, "   -n, --novel-rate <float>,[...]  likelihood of novel mutation for constrained trio calling, see man page for details [1e-8,1e-9,1e-9]\n");
     fprintf(pysam_stderr, "   -p, --pval-threshold <float>    variant if P(ref|D)<FLOAT with -c [0.5]\n");
-    fprintf(pysam_stderr, "   -P, --prior <float>             mutation rate (use bigger for greater sensitivity) [1.1e-3]\n");
+    fprintf(pysam_stderr, "   -P, --prior <float>             mutation rate (use bigger for greater sensitivity), use with -m [1.1e-3]\n");
 
     // todo (and more)
     // fprintf(pysam_stderr, "\nContrast calling and association test options:\n");
@@ -669,6 +680,7 @@ int main_vcfcall(int argc, char *argv[])
     {
         {"help",no_argument,NULL,'h'},
         {"format-fields",required_argument,NULL,'f'},
+        {"prior-freqs",required_argument,NULL,'F'},
         {"gvcf",required_argument,NULL,'g'},
         {"output",required_argument,NULL,'o'},
         {"output-type",required_argument,NULL,'O'},
@@ -700,7 +712,7 @@ int main_vcfcall(int argc, char *argv[])
     };
 
     char *tmp = NULL;
-    while ((c = getopt_long(argc, argv, "h?o:O:r:R:s:S:t:T:ANMV:vcmp:C:n:P:f:ig:XY", loptions, NULL)) >= 0)
+    while ((c = getopt_long(argc, argv, "h?o:O:r:R:s:S:t:T:ANMV:vcmp:C:n:P:f:ig:XYF:", loptions, NULL)) >= 0)
     {
         switch (c)
         {
@@ -715,6 +727,13 @@ int main_vcfcall(int argc, char *argv[])
             case 'c': args.flag |= CF_CCALL; break;          // the original EM based calling method
             case 'i': args.flag |= CF_INS_MISSED; break;
             case 'v': args.aux.flag |= CALL_VARONLY; break;
+            case 'F':
+                args.aux.prior_AN = optarg;
+                args.aux.prior_AC = strchr(optarg,',');
+                if ( !args.aux.prior_AC ) error("Expected two tags with -F (e.g. AN,AC), got \"%s\"\n",optarg);
+                *args.aux.prior_AC = 0;
+                args.aux.prior_AC++;
+                break;
             case 'g': 
                 args.gvcf = gvcf_init(optarg);
                 if ( !args.gvcf ) error("Could not parse: --gvcf %s\n", optarg);
@@ -772,8 +791,8 @@ int main_vcfcall(int argc, char *argv[])
 
     if ( !ploidy_fname && !ploidy )
     {
-        fprintf(pysam_stderr,"Note: Neither --ploidy nor --ploidy-file given, assuming all sites are diploid\n");
-        args.ploidy = ploidy_init_string("",2);
+        if ( !args.samples_is_file ) fprintf(pysam_stderr,"Note: none of --samples-file, --ploidy or --ploidy-file given, assuming all sites are diploid\n");
+        args.ploidy = ploidy_init_string("* * * 0 0\n* * * 1 1\n* * * 2 2\n",2);
     }
 
     if ( !args.ploidy ) error("Could not initialize ploidy\n");
@@ -835,6 +854,7 @@ int main_vcfcall(int argc, char *argv[])
         else
             ret = ccall(&args.aux, bcf_rec);
         if ( ret==-1 ) error("Something is wrong\n");
+        else if ( ret==-2 ) continue;   // skip the site
 
         // Normal output
         if ( (args.aux.flag & CALL_VARONLY) && ret==0 && !args.gvcf ) continue;     // not a variant
diff --git a/bcftools/vcfcnv.c b/bcftools/vcfcnv.c
index e4b9372..ffe71c4 100644
--- a/bcftools/vcfcnv.c
+++ b/bcftools/vcfcnv.c
@@ -266,17 +266,15 @@ static void init_data(args_t *args)
     hmm_init_states(args->hmm, args->iprobs);
 
     args->summary_fh = stdout;
-    if ( args->output_dir )
+    init_sample_files(&args->query_sample, args->output_dir);
+    if ( args->control_sample.name )
     {
-        init_sample_files(&args->query_sample, args->output_dir);
-        if ( args->control_sample.name )
-        {
-            init_sample_files(&args->control_sample, args->output_dir);
-            args->summary_fh = open_file(&args->summary_fname,"w","%s/summary.tab",args->output_dir);
-        }
-        else
-            args->summary_fh = NULL;    // one sample only, no two-file summary
+        init_sample_files(&args->control_sample, args->output_dir);
+        args->summary_fh = open_file(&args->summary_fname,"w","%s/summary.tab",args->output_dir);
     }
+    else
+        args->summary_fh = NULL;    // one sample only, no two-file summary
+        
 
     int i;
     FILE *fh = args->summary_fh ? args->summary_fh : args->query_sample.summary_fh;
@@ -295,6 +293,19 @@ static void init_data(args_t *args)
                 "# RG, Regions\t[2]Chromosome\t[3]Start\t[4]End\t[5]Copy number:%s\t[6]Quality\t[7]nSites\t[8]nHETs\n",
                 args->query_sample.name
                );
+    if ( args->optimize_frac )
+    {
+        fprintf(args->query_sample.summary_fh, "# CF, cell fraction estimate\t[2]Chromosome\t[3]Start\t[4]End\t[5]Cell fraction\t[6]BAF deviation\n");
+        if ( args->control_sample.name )
+        {
+            fprintf(args->control_sample.summary_fh, "# CF, cell fraction estimate\t[2]Chromosome\t[3]Start\t[4]End\t[5]Cell fraction\t[6]BAF deviation\n");
+            fprintf(args->summary_fh, "# CF, cell fraction estimate\t[2]Chromosome\t[3]Start\t[4]End\t"
+                "[5]Cell fraction:%s\t[6]Cell fraction:%s\t[7]BAF deviation:%s\t[8]BAF deviation:%s\n",
+                args->query_sample.name,args->control_sample.name,
+                args->query_sample.name,args->control_sample.name
+                );
+        }
+    }
 }
 
 char *msprintf(const char *fmt, ...);
@@ -556,6 +567,7 @@ static void destroy_data(args_t *args)
     free(args->sites);
     free(args->eprob);
     free(args->tprob);
+    free(args->iprobs);
     free(args->summary_fname);
     free(args->nonref_afs);
     free(args->query_sample.baf);
@@ -960,6 +972,20 @@ static void cnv_flush_viterbi(args_t *args)
         if ( args->control_sample.name )
             fprintf(stderr,"\t.. %f %f", args->control_sample.cell_frac,args->control_sample.baf_dev2);
         fprintf(stderr,"\n");
+
+        fprintf(args->query_sample.summary_fh,"CF\t%s\t%d\t%d\t%.2f\t%f\n",
+            bcf_hdr_id2name(args->hdr,args->prev_rid),args->sites[0]+1,args->sites[args->nsites-1]+1,
+            args->query_sample.cell_frac,sqrt(args->query_sample.baf_dev2));
+        if ( args->control_sample.name )
+        {
+            fprintf(args->control_sample.summary_fh,"CF\t%s\t%d\t%d\t%.2f\t%f\n",
+                    bcf_hdr_id2name(args->hdr,args->prev_rid),args->sites[0]+1,args->sites[args->nsites-1]+1,
+                    args->control_sample.cell_frac,sqrt(args->control_sample.baf_dev2));
+            fprintf(args->summary_fh,"CF\t%s\t%d\t%d\t%.2f\t%.2f\t%f\t%f\n",
+                    bcf_hdr_id2name(args->hdr,args->prev_rid),args->sites[0]+1,args->sites[args->nsites-1]+1,
+                    args->query_sample.cell_frac, args->control_sample.cell_frac,
+                    sqrt(args->query_sample.baf_dev2), sqrt(args->control_sample.baf_dev2));
+        }
     }
     set_emission_probs(args);
 
@@ -1351,7 +1377,7 @@ int main_vcfcnv(int argc, char *argv[])
     else fname = argv[optind];
     if ( !fname ) usage(args);
 
-    if ( args->plot_th<=100 && !args->output_dir ) error("Expected -o option with -p\n");
+    if ( !args->output_dir ) error("Expected -o option\n");
     if ( args->regions_list )
     {
         if ( bcf_sr_set_regions(args->files, args->regions_list, regions_is_file)<0 )
diff --git a/bcftools/vcfcnv.c.pysam.c b/bcftools/vcfcnv.c.pysam.c
index 10a00b9..1075ef1 100644
--- a/bcftools/vcfcnv.c.pysam.c
+++ b/bcftools/vcfcnv.c.pysam.c
@@ -268,17 +268,15 @@ static void init_data(args_t *args)
     hmm_init_states(args->hmm, args->iprobs);
 
     args->summary_fh = pysam_stdout;
-    if ( args->output_dir )
+    init_sample_files(&args->query_sample, args->output_dir);
+    if ( args->control_sample.name )
     {
-        init_sample_files(&args->query_sample, args->output_dir);
-        if ( args->control_sample.name )
-        {
-            init_sample_files(&args->control_sample, args->output_dir);
-            args->summary_fh = open_file(&args->summary_fname,"w","%s/summary.tab",args->output_dir);
-        }
-        else
-            args->summary_fh = NULL;    // one sample only, no two-file summary
+        init_sample_files(&args->control_sample, args->output_dir);
+        args->summary_fh = open_file(&args->summary_fname,"w","%s/summary.tab",args->output_dir);
     }
+    else
+        args->summary_fh = NULL;    // one sample only, no two-file summary
+        
 
     int i;
     FILE *fh = args->summary_fh ? args->summary_fh : args->query_sample.summary_fh;
@@ -297,6 +295,19 @@ static void init_data(args_t *args)
                 "# RG, Regions\t[2]Chromosome\t[3]Start\t[4]End\t[5]Copy number:%s\t[6]Quality\t[7]nSites\t[8]nHETs\n",
                 args->query_sample.name
                );
+    if ( args->optimize_frac )
+    {
+        fprintf(args->query_sample.summary_fh, "# CF, cell fraction estimate\t[2]Chromosome\t[3]Start\t[4]End\t[5]Cell fraction\t[6]BAF deviation\n");
+        if ( args->control_sample.name )
+        {
+            fprintf(args->control_sample.summary_fh, "# CF, cell fraction estimate\t[2]Chromosome\t[3]Start\t[4]End\t[5]Cell fraction\t[6]BAF deviation\n");
+            fprintf(args->summary_fh, "# CF, cell fraction estimate\t[2]Chromosome\t[3]Start\t[4]End\t"
+                "[5]Cell fraction:%s\t[6]Cell fraction:%s\t[7]BAF deviation:%s\t[8]BAF deviation:%s\n",
+                args->query_sample.name,args->control_sample.name,
+                args->query_sample.name,args->control_sample.name
+                );
+        }
+    }
 }
 
 char *msprintf(const char *fmt, ...);
@@ -558,6 +569,7 @@ static void destroy_data(args_t *args)
     free(args->sites);
     free(args->eprob);
     free(args->tprob);
+    free(args->iprobs);
     free(args->summary_fname);
     free(args->nonref_afs);
     free(args->query_sample.baf);
@@ -962,6 +974,20 @@ static void cnv_flush_viterbi(args_t *args)
         if ( args->control_sample.name )
             fprintf(pysam_stderr,"\t.. %f %f", args->control_sample.cell_frac,args->control_sample.baf_dev2);
         fprintf(pysam_stderr,"\n");
+
+        fprintf(args->query_sample.summary_fh,"CF\t%s\t%d\t%d\t%.2f\t%f\n",
+            bcf_hdr_id2name(args->hdr,args->prev_rid),args->sites[0]+1,args->sites[args->nsites-1]+1,
+            args->query_sample.cell_frac,sqrt(args->query_sample.baf_dev2));
+        if ( args->control_sample.name )
+        {
+            fprintf(args->control_sample.summary_fh,"CF\t%s\t%d\t%d\t%.2f\t%f\n",
+                    bcf_hdr_id2name(args->hdr,args->prev_rid),args->sites[0]+1,args->sites[args->nsites-1]+1,
+                    args->control_sample.cell_frac,sqrt(args->control_sample.baf_dev2));
+            fprintf(args->summary_fh,"CF\t%s\t%d\t%d\t%.2f\t%.2f\t%f\t%f\n",
+                    bcf_hdr_id2name(args->hdr,args->prev_rid),args->sites[0]+1,args->sites[args->nsites-1]+1,
+                    args->query_sample.cell_frac, args->control_sample.cell_frac,
+                    sqrt(args->query_sample.baf_dev2), sqrt(args->control_sample.baf_dev2));
+        }
     }
     set_emission_probs(args);
 
@@ -1353,7 +1379,7 @@ int main_vcfcnv(int argc, char *argv[])
     else fname = argv[optind];
     if ( !fname ) usage(args);
 
-    if ( args->plot_th<=100 && !args->output_dir ) error("Expected -o option with -p\n");
+    if ( !args->output_dir ) error("Expected -o option\n");
     if ( args->regions_list )
     {
         if ( bcf_sr_set_regions(args->files, args->regions_list, regions_is_file)<0 )
diff --git a/bcftools/vcfconcat.c b/bcftools/vcfconcat.c
index bd6a00a..3345c20 100644
--- a/bcftools/vcfconcat.c
+++ b/bcftools/vcfconcat.c
@@ -555,100 +555,138 @@ static void concat(args_t *args)
     }
 }
 
+int print_vcf_gz_header(BGZF *fp, BGZF *bgzf_out, int print_header, kstring_t *tmp)
+{
+    char *buffer = (char*) fp->uncompressed_block;
+
+    // Read the header and find the position of the data block
+    if ( buffer[0]!='#' ) error("Could not parse the header, expected '#', found '%c'\n", buffer[0]);
+
+    int nskip = 1;     // end of the header in the current uncompressed block
+    while (1)
+    {
+        if ( buffer[nskip]=='\n' )
+        {
+            nskip++;
+            if ( nskip>=fp->block_length )
+            {
+                kputsn(buffer,nskip,tmp);
+                if ( bgzf_read_block(fp) != 0 ) return -1;
+                if ( !fp->block_length ) break;
+                nskip = 0;
+            }
+            // The header has finished
+            if ( buffer[nskip]!='#' )
+            {
+                kputsn(buffer,nskip,tmp);
+                break;
+            }
+        }
+        nskip++;
+        if ( nskip>=fp->block_length )
+        {
+            kputsn(buffer,fp->block_length,tmp);
+            if ( bgzf_read_block(fp) != 0 ) return -1;
+            if ( !fp->block_length ) break;
+            nskip = 0;
+        }
+    }
+    if ( print_header )
+    {
+        if ( bgzf_write(bgzf_out,tmp->s,tmp->l) != tmp->l ) error("Failed to write %d bytes\n", tmp->l);
+        tmp->l = 0;
+    }
+    return nskip;
+}
+
+static inline int unpackInt16(const uint8_t *buffer)
+{
+    return buffer[0] | buffer[1] << 8;
+}
+static int check_header(const uint8_t *header)
+{
+    if ( header[0] != 31 || header[1] != 139 || header[2] != 8 ) return -2;
+    return ((header[3] & 4) != 0
+            && unpackInt16((uint8_t*)&header[10]) == 6
+            && header[12] == 'B' && header[13] == 'C'
+            && unpackInt16((uint8_t*)&header[14]) == 2) ? 0 : -1;
+}
 static void naive_concat(args_t *args)
 {
     // only compressed BCF atm
     BGZF *bgzf_out = bgzf_open(args->output_fname,"w");;
 
-    const size_t page_size = 32768;
-    char *buf = (char*) malloc(page_size);
+    const size_t page_size = BGZF_MAX_BLOCK_SIZE;
+    uint8_t *buf = (uint8_t*) malloc(page_size);
     kstring_t tmp = {0,0,0};
-    int i;
+    int i, file_types = 0;
     for (i=0; i<args->nfnames; i++)
     {
         htsFile *hts_fp = hts_open(args->fnames[i],"r");
         if ( !hts_fp ) error("Failed to open: %s\n", args->fnames[i]);
         htsFormat type = *hts_get_format(hts_fp);
 
-        if ( type.format==vcf ) error("The --naive option currently works only for compressed BCFs, sorry :-/\n");
-        if ( type.compression!=bgzf ) error("The --naive option currently works only for compressed BCFs, sorry :-/\n");
+        if ( type.compression!=bgzf )
+            error("The --naive option works only for compressed BCFs or VCFs, sorry :-/\n");
+        file_types |= type.format==vcf ? 1 : 2;
+        if ( file_types==3 )
+            error("The --naive option works only for compressed files of the same type, all BCFs or all VCFs :-/\n");
 
         BGZF *fp = hts_get_bgzfp(hts_fp);
         if ( !fp || bgzf_read_block(fp) != 0 || !fp->block_length )
             error("Failed to read %s: %s\n", args->fnames[i], strerror(errno));
 
-        uint8_t magic[5];
-        if ( bgzf_read(fp, magic, 5) != 5 ) error("Failed to read the BCF header in %s\n", args->fnames[i]);
-        if (strncmp((char*)magic, "BCF\2\2", 5) != 0) error("Invalid BCF magic string in %s\n", args->fnames[i]);
+        int nskip;
+        if ( type.format==bcf )
+        {
+            uint8_t magic[5];
+            if ( bgzf_read(fp, magic, 5) != 5 ) error("Failed to read the BCF header in %s\n", args->fnames[i]);
+            if (strncmp((char*)magic, "BCF\2\2", 5) != 0) error("Invalid BCF magic string in %s\n", args->fnames[i]);
 
-        if ( bgzf_read(fp, &tmp.l, 4) != 4 ) error("Failed to read the BCF header in %s\n", args->fnames[i]);
-        hts_expand(char,tmp.l,tmp.m,tmp.s);
-        if ( bgzf_read(fp, tmp.s, tmp.l) != tmp.l ) error("Failed to read the BCF header in %s\n", args->fnames[i]);
+            if ( bgzf_read(fp, &tmp.l, 4) != 4 ) error("Failed to read the BCF header in %s\n", args->fnames[i]);
+            hts_expand(char,tmp.l,tmp.m,tmp.s);
+            if ( bgzf_read(fp, tmp.s, tmp.l) != tmp.l ) error("Failed to read the BCF header in %s\n", args->fnames[i]);
 
-        // write only the first header
-        if ( i==0 )
+            // write only the first header
+            if ( i==0 )
+            {
+                if ( bgzf_write(bgzf_out, "BCF\2\2", 5) !=5 ) error("Failed to write %d bytes to %s\n", 5,args->output_fname);
+                if ( bgzf_write(bgzf_out, &tmp.l, 4) !=4 ) error("Failed to write %d bytes to %s\n", 4,args->output_fname);
+                if ( bgzf_write(bgzf_out, tmp.s, tmp.l) != tmp.l) error("Failed to write %d bytes to %s\n", tmp.l,args->output_fname);
+            }
+            nskip = fp->block_offset;
+        }
+        else
         {
-            if ( bgzf_write(bgzf_out, "BCF\2\2", 5) !=5 ) error("Failed to write %d bytes to %s\n", 5,args->output_fname);
-            if ( bgzf_write(bgzf_out, &tmp.l, 4) !=4 ) error("Failed to write %d bytes to %s\n", 4,args->output_fname);
-            if ( bgzf_write(bgzf_out, tmp.s, tmp.l) != tmp.l) error("Failed to write %d bytes to %s\n", tmp.l,args->output_fname);
+            nskip = print_vcf_gz_header(fp, bgzf_out, i==0?1:0, &tmp);
+            if ( nskip==-1 ) error("Error reading %s\n", args->fnames[i]);
         }
 
         // Output all non-header data that were read together with the header block
-        int nskip = fp->block_offset;
         if ( fp->block_length - nskip > 0 )
         {
-            if ( bgzf_write(bgzf_out, fp->uncompressed_block+nskip, fp->block_length-nskip)<0 ) error("Error: %d\n",fp->errcode);
+            if ( bgzf_write(bgzf_out, (char *)fp->uncompressed_block+nskip, fp->block_length-nskip)<0 ) error("Error: %d\n",fp->errcode);
         }
         if ( bgzf_flush(bgzf_out)<0 ) error("Error: %d\n",bgzf_out->errcode);
 
 
         // Stream the rest of the file as it is, without recompressing, but remove BGZF EOF blocks
-        ssize_t nread, ncached = 0, nwr;
-        const int neof = 28;
-        char cached[neof];
+        // The final bgzf eof block will be added by bgzf_close.
+        ssize_t nread, nblock, nwr;
+        const int nheader = 18, neof = 28;
+        const uint8_t *eof = (uint8_t*) "\037\213\010\4\0\0\0\0\0\377\6\0\102\103\2\0\033\0\3\0\0\0\0\0\0\0\0\0";
         while (1)
         {
-            nread = bgzf_raw_read(fp, buf, page_size);
-
-            // page_size boundary may occur in the middle of the EOF block, so we need to cache the blocks' ends
-            if ( nread<=0 ) break;
-            if ( nread<=neof )      // last block
-            {
-                if ( ncached )
-                {
-                    // flush the part of the cache that won't be needed
-                    nwr = bgzf_raw_write(bgzf_out, cached, nread);
-                    if (nwr != nread) error("Write failed, wrote %d instead of %d bytes.\n", nwr,(int)nread);
-
-                    // make space in the cache so that we can append to the end
-                    if ( nread!=neof ) memmove(cached,cached+nread,neof-nread);
-                }
-
-                // fill the cache and check for eof outside this loop
-                memcpy(cached+neof-nread,buf,nread);
-                break;
-            }
-
-            // not the last block, flush the cache if full
-            if ( ncached )
-            {
-                nwr = bgzf_raw_write(bgzf_out, cached, ncached);
-                if (nwr != ncached) error("Write failed, wrote %d instead of %d bytes.\n", nwr,(int)ncached);
-                ncached = 0;
-            }
-
-            // fill the cache
-            nread -= neof;
-            memcpy(cached,buf+nread,neof);
-            ncached = neof;
-
+            nread = bgzf_raw_read(fp, buf, nheader);
+            if ( !nread ) break;
+            if ( nread != nheader || check_header(buf)!=0 ) error("Could not parse the header of a bgzf block: %s\n",args->fnames[i]);
+            nblock = unpackInt16(buf+16) + 1;
+            assert( nblock <= page_size && nblock >= nheader );
+            nread += bgzf_raw_read(fp, buf+nheader, nblock - nheader);
+            if ( nread!=nblock ) error("Could not read %d bytes: %s\n",nblock,args->fnames[i]);
+            if ( nread==neof && !memcmp(buf,eof,neof) ) continue;
             nwr = bgzf_raw_write(bgzf_out, buf, nread);
-            if (nwr != nread) error("Write failed, wrote %d instead of %d bytes.\n", nwr,(int)nread);
-        }
-        if ( ncached && memcmp(cached,"\037\213\010\4\0\0\0\0\0\377\6\0\102\103\2\0\033\0\3\0\0\0\0\0\0\0\0\0",neof) )
-        {
-            nwr = bgzf_raw_write(bgzf_out, cached, neof);
-            if (nwr != neof) error("Write failed, wrote %d instead of %d bytes.\n", nwr,(int)neof);
+            if ( nwr != nread ) error("Write failed, wrote %d instead of %d bytes.\n", nwr,(int)nread);
         }
         if (hts_close(hts_fp)) error("Close failed: %s\n",args->fnames[i]);
     }
@@ -677,8 +715,8 @@ static void usage(args_t *args)
     fprintf(stderr, "   -D, --remove-duplicates        Alias for -d none\n");
     fprintf(stderr, "   -f, --file-list <file>         Read the list of files from a file.\n");
     fprintf(stderr, "   -l, --ligate                   Ligate phased VCFs by matching phase at overlapping haplotypes\n");
-    fprintf(stderr, "       --no-version               do not append version and command line to the header\n");
-    fprintf(stderr, "   -n, --naive                    Concatenate BCF files without recompression (dangerous, use with caution)\n");
+    fprintf(stderr, "       --no-version               Do not append version and command line to the header\n");
+    fprintf(stderr, "   -n, --naive                    Concatenate files without recompression (dangerous, use with caution)\n");
     fprintf(stderr, "   -o, --output <file>            Write output to a file [standard output]\n");
     fprintf(stderr, "   -O, --output-type <b|u|z|v>    b: compressed BCF, u: uncompressed BCF, z: compressed VCF, v: uncompressed VCF [v]\n");
     fprintf(stderr, "   -q, --min-PQ <int>             Break phase set if phasing quality is lower than <int> [30]\n");
diff --git a/bcftools/vcfconcat.c.pysam.c b/bcftools/vcfconcat.c.pysam.c
index be2d6a2..4445a51 100644
--- a/bcftools/vcfconcat.c.pysam.c
+++ b/bcftools/vcfconcat.c.pysam.c
@@ -557,100 +557,138 @@ static void concat(args_t *args)
     }
 }
 
+int print_vcf_gz_header(BGZF *fp, BGZF *bgzf_out, int print_header, kstring_t *tmp)
+{
+    char *buffer = (char*) fp->uncompressed_block;
+
+    // Read the header and find the position of the data block
+    if ( buffer[0]!='#' ) error("Could not parse the header, expected '#', found '%c'\n", buffer[0]);
+
+    int nskip = 1;     // end of the header in the current uncompressed block
+    while (1)
+    {
+        if ( buffer[nskip]=='\n' )
+        {
+            nskip++;
+            if ( nskip>=fp->block_length )
+            {
+                kputsn(buffer,nskip,tmp);
+                if ( bgzf_read_block(fp) != 0 ) return -1;
+                if ( !fp->block_length ) break;
+                nskip = 0;
+            }
+            // The header has finished
+            if ( buffer[nskip]!='#' )
+            {
+                kputsn(buffer,nskip,tmp);
+                break;
+            }
+        }
+        nskip++;
+        if ( nskip>=fp->block_length )
+        {
+            kputsn(buffer,fp->block_length,tmp);
+            if ( bgzf_read_block(fp) != 0 ) return -1;
+            if ( !fp->block_length ) break;
+            nskip = 0;
+        }
+    }
+    if ( print_header )
+    {
+        if ( bgzf_write(bgzf_out,tmp->s,tmp->l) != tmp->l ) error("Failed to write %d bytes\n", tmp->l);
+        tmp->l = 0;
+    }
+    return nskip;
+}
+
+static inline int unpackInt16(const uint8_t *buffer)
+{
+    return buffer[0] | buffer[1] << 8;
+}
+static int check_header(const uint8_t *header)
+{
+    if ( header[0] != 31 || header[1] != 139 || header[2] != 8 ) return -2;
+    return ((header[3] & 4) != 0
+            && unpackInt16((uint8_t*)&header[10]) == 6
+            && header[12] == 'B' && header[13] == 'C'
+            && unpackInt16((uint8_t*)&header[14]) == 2) ? 0 : -1;
+}
 static void naive_concat(args_t *args)
 {
     // only compressed BCF atm
     BGZF *bgzf_out = bgzf_open(args->output_fname,"w");;
 
-    const size_t page_size = 32768;
-    char *buf = (char*) malloc(page_size);
+    const size_t page_size = BGZF_MAX_BLOCK_SIZE;
+    uint8_t *buf = (uint8_t*) malloc(page_size);
     kstring_t tmp = {0,0,0};
-    int i;
+    int i, file_types = 0;
     for (i=0; i<args->nfnames; i++)
     {
         htsFile *hts_fp = hts_open(args->fnames[i],"r");
         if ( !hts_fp ) error("Failed to open: %s\n", args->fnames[i]);
         htsFormat type = *hts_get_format(hts_fp);
 
-        if ( type.format==vcf ) error("The --naive option currently works only for compressed BCFs, sorry :-/\n");
-        if ( type.compression!=bgzf ) error("The --naive option currently works only for compressed BCFs, sorry :-/\n");
+        if ( type.compression!=bgzf )
+            error("The --naive option works only for compressed BCFs or VCFs, sorry :-/\n");
+        file_types |= type.format==vcf ? 1 : 2;
+        if ( file_types==3 )
+            error("The --naive option works only for compressed files of the same type, all BCFs or all VCFs :-/\n");
 
         BGZF *fp = hts_get_bgzfp(hts_fp);
         if ( !fp || bgzf_read_block(fp) != 0 || !fp->block_length )
             error("Failed to read %s: %s\n", args->fnames[i], strerror(errno));
 
-        uint8_t magic[5];
-        if ( bgzf_read(fp, magic, 5) != 5 ) error("Failed to read the BCF header in %s\n", args->fnames[i]);
-        if (strncmp((char*)magic, "BCF\2\2", 5) != 0) error("Invalid BCF magic string in %s\n", args->fnames[i]);
+        int nskip;
+        if ( type.format==bcf )
+        {
+            uint8_t magic[5];
+            if ( bgzf_read(fp, magic, 5) != 5 ) error("Failed to read the BCF header in %s\n", args->fnames[i]);
+            if (strncmp((char*)magic, "BCF\2\2", 5) != 0) error("Invalid BCF magic string in %s\n", args->fnames[i]);
 
-        if ( bgzf_read(fp, &tmp.l, 4) != 4 ) error("Failed to read the BCF header in %s\n", args->fnames[i]);
-        hts_expand(char,tmp.l,tmp.m,tmp.s);
-        if ( bgzf_read(fp, tmp.s, tmp.l) != tmp.l ) error("Failed to read the BCF header in %s\n", args->fnames[i]);
+            if ( bgzf_read(fp, &tmp.l, 4) != 4 ) error("Failed to read the BCF header in %s\n", args->fnames[i]);
+            hts_expand(char,tmp.l,tmp.m,tmp.s);
+            if ( bgzf_read(fp, tmp.s, tmp.l) != tmp.l ) error("Failed to read the BCF header in %s\n", args->fnames[i]);
 
-        // write only the first header
-        if ( i==0 )
+            // write only the first header
+            if ( i==0 )
+            {
+                if ( bgzf_write(bgzf_out, "BCF\2\2", 5) !=5 ) error("Failed to write %d bytes to %s\n", 5,args->output_fname);
+                if ( bgzf_write(bgzf_out, &tmp.l, 4) !=4 ) error("Failed to write %d bytes to %s\n", 4,args->output_fname);
+                if ( bgzf_write(bgzf_out, tmp.s, tmp.l) != tmp.l) error("Failed to write %d bytes to %s\n", tmp.l,args->output_fname);
+            }
+            nskip = fp->block_offset;
+        }
+        else
         {
-            if ( bgzf_write(bgzf_out, "BCF\2\2", 5) !=5 ) error("Failed to write %d bytes to %s\n", 5,args->output_fname);
-            if ( bgzf_write(bgzf_out, &tmp.l, 4) !=4 ) error("Failed to write %d bytes to %s\n", 4,args->output_fname);
-            if ( bgzf_write(bgzf_out, tmp.s, tmp.l) != tmp.l) error("Failed to write %d bytes to %s\n", tmp.l,args->output_fname);
+            nskip = print_vcf_gz_header(fp, bgzf_out, i==0?1:0, &tmp);
+            if ( nskip==-1 ) error("Error reading %s\n", args->fnames[i]);
         }
 
         // Output all non-header data that were read together with the header block
-        int nskip = fp->block_offset;
         if ( fp->block_length - nskip > 0 )
         {
-            if ( bgzf_write(bgzf_out, fp->uncompressed_block+nskip, fp->block_length-nskip)<0 ) error("Error: %d\n",fp->errcode);
+            if ( bgzf_write(bgzf_out, (char *)fp->uncompressed_block+nskip, fp->block_length-nskip)<0 ) error("Error: %d\n",fp->errcode);
         }
         if ( bgzf_flush(bgzf_out)<0 ) error("Error: %d\n",bgzf_out->errcode);
 
 
         // Stream the rest of the file as it is, without recompressing, but remove BGZF EOF blocks
-        ssize_t nread, ncached = 0, nwr;
-        const int neof = 28;
-        char cached[neof];
+        // The final bgzf eof block will be added by bgzf_close.
+        ssize_t nread, nblock, nwr;
+        const int nheader = 18, neof = 28;
+        const uint8_t *eof = (uint8_t*) "\037\213\010\4\0\0\0\0\0\377\6\0\102\103\2\0\033\0\3\0\0\0\0\0\0\0\0\0";
         while (1)
         {
-            nread = bgzf_raw_read(fp, buf, page_size);
-
-            // page_size boundary may occur in the middle of the EOF block, so we need to cache the blocks' ends
-            if ( nread<=0 ) break;
-            if ( nread<=neof )      // last block
-            {
-                if ( ncached )
-                {
-                    // flush the part of the cache that won't be needed
-                    nwr = bgzf_raw_write(bgzf_out, cached, nread);
-                    if (nwr != nread) error("Write failed, wrote %d instead of %d bytes.\n", nwr,(int)nread);
-
-                    // make space in the cache so that we can append to the end
-                    if ( nread!=neof ) memmove(cached,cached+nread,neof-nread);
-                }
-
-                // fill the cache and check for eof outside this loop
-                memcpy(cached+neof-nread,buf,nread);
-                break;
-            }
-
-            // not the last block, flush the cache if full
-            if ( ncached )
-            {
-                nwr = bgzf_raw_write(bgzf_out, cached, ncached);
-                if (nwr != ncached) error("Write failed, wrote %d instead of %d bytes.\n", nwr,(int)ncached);
-                ncached = 0;
-            }
-
-            // fill the cache
-            nread -= neof;
-            memcpy(cached,buf+nread,neof);
-            ncached = neof;
-
+            nread = bgzf_raw_read(fp, buf, nheader);
+            if ( !nread ) break;
+            if ( nread != nheader || check_header(buf)!=0 ) error("Could not parse the header of a bgzf block: %s\n",args->fnames[i]);
+            nblock = unpackInt16(buf+16) + 1;
+            assert( nblock <= page_size && nblock >= nheader );
+            nread += bgzf_raw_read(fp, buf+nheader, nblock - nheader);
+            if ( nread!=nblock ) error("Could not read %d bytes: %s\n",nblock,args->fnames[i]);
+            if ( nread==neof && !memcmp(buf,eof,neof) ) continue;
             nwr = bgzf_raw_write(bgzf_out, buf, nread);
-            if (nwr != nread) error("Write failed, wrote %d instead of %d bytes.\n", nwr,(int)nread);
-        }
-        if ( ncached && memcmp(cached,"\037\213\010\4\0\0\0\0\0\377\6\0\102\103\2\0\033\0\3\0\0\0\0\0\0\0\0\0",neof) )
-        {
-            nwr = bgzf_raw_write(bgzf_out, cached, neof);
-            if (nwr != neof) error("Write failed, wrote %d instead of %d bytes.\n", nwr,(int)neof);
+            if ( nwr != nread ) error("Write failed, wrote %d instead of %d bytes.\n", nwr,(int)nread);
         }
         if (hts_close(hts_fp)) error("Close failed: %s\n",args->fnames[i]);
     }
@@ -679,8 +717,8 @@ static void usage(args_t *args)
     fprintf(pysam_stderr, "   -D, --remove-duplicates        Alias for -d none\n");
     fprintf(pysam_stderr, "   -f, --file-list <file>         Read the list of files from a file.\n");
     fprintf(pysam_stderr, "   -l, --ligate                   Ligate phased VCFs by matching phase at overlapping haplotypes\n");
-    fprintf(pysam_stderr, "       --no-version               do not append version and command line to the header\n");
-    fprintf(pysam_stderr, "   -n, --naive                    Concatenate BCF files without recompression (dangerous, use with caution)\n");
+    fprintf(pysam_stderr, "       --no-version               Do not append version and command line to the header\n");
+    fprintf(pysam_stderr, "   -n, --naive                    Concatenate files without recompression (dangerous, use with caution)\n");
     fprintf(pysam_stderr, "   -o, --output <file>            Write output to a file [standard output]\n");
     fprintf(pysam_stderr, "   -O, --output-type <b|u|z|v>    b: compressed BCF, u: uncompressed BCF, z: compressed VCF, v: uncompressed VCF [v]\n");
     fprintf(pysam_stderr, "   -q, --min-PQ <int>             Break phase set if phasing quality is lower than <int> [30]\n");
diff --git a/bcftools/vcfconvert.c b/bcftools/vcfconvert.c
index 1e60d30..f650bea 100644
--- a/bcftools/vcfconvert.c
+++ b/bcftools/vcfconvert.c
@@ -23,6 +23,7 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 THE SOFTWARE.  */
 
 #include <stdio.h>
+#include <strings.h>
 #include <unistd.h>
 #include <getopt.h>
 #include <ctype.h>
@@ -65,7 +66,7 @@ struct _args_t
     int rev_als, output_vcf_ids, hap2dip, output_chrom_first_col;
     int nsamples, *samples, sample_is_file, targets_is_file, regions_is_file, output_type;
     char **argv, *sample_list, *targets_list, *regions_list, *tag, *columns;
-    char *outfname, *infname, *ref_fname;
+    char *outfname, *infname, *ref_fname, *sex_fname;
     int argc, n_threads, record_cmd_line;
 };
 
@@ -81,6 +82,9 @@ static void destroy_data(args_t *args)
 static void open_vcf(args_t *args, const char *format_str)
 {
     args->files = bcf_sr_init();
+    if ( args->n_threads && bcf_sr_set_threads(args->files, args->n_threads)!=0 )
+        error("Could not initialize --threads %d\n", args->n_threads);
+
     if ( args->regions_list )
     {
         if ( bcf_sr_set_regions(args->files, args->regions_list, args->regions_is_file)<0 )
@@ -129,9 +133,6 @@ static void open_vcf(args_t *args, const char *format_str)
     }
     if ( format_str ) args->convert = convert_init(args->header, samples, nsamples, format_str);
     free(samples);
-
-    if ( args->filter_str )
-        args->filter = filter_init(args->header, args->filter_str);
 }
 
 static int tsv_setter_chrom_pos_ref_alt(tsv_t *tsv, bcf1_t *rec, void *usr)
@@ -373,6 +374,7 @@ static void gensample_to_vcf(args_t *args)
 
     int i, nsamples;
     char **samples = hts_readlist(sample_fname, 1, &nsamples);
+    if ( !samples ) error("Could not read %s\n", sample_fname);
     for (i=2; i<nsamples; i++)
     {
         se = samples[i]; while ( *se && !isspace(*se) ) se++;
@@ -493,6 +495,7 @@ static void haplegendsample_to_vcf(args_t *args)
 
     int i, nrows, nsamples;
     char **samples = hts_readlist(sample_fname, 1, &nrows);
+    if ( !samples ) error("Could not read %s\n", sample_fname);
     nsamples = nrows - 1;
 
     // sample_fname should contain a header line, so need to ignore first row
@@ -610,6 +613,7 @@ static void hapsample_to_vcf(args_t *args)
 
     int i, nsamples;
     char **samples = hts_readlist(sample_fname, 1, &nsamples);
+    if ( !samples ) error("Could not read %s\n", sample_fname);
     for (i=2; i<nsamples; i++)
     {
         se = samples[i]; while ( *se && !isspace(*se) ) se++;
@@ -654,6 +658,32 @@ static void hapsample_to_vcf(args_t *args)
     fprintf(stderr,"Number of processed rows: \t%d\n", args->n.total);
 }
 
+char *init_sample2sex(bcf_hdr_t *hdr, char *sex_fname)
+{
+    int i, nlines;
+    char *sample2sex = (char*) calloc(bcf_hdr_nsamples(hdr),1);
+    char **lines = hts_readlist(sex_fname, 1, &nlines);
+    if ( !lines ) error("Could not read %s\n", sex_fname);
+    for (i=0; i<nlines; i++)
+    {
+        char *se = lines[i]; while ( *se && !isspace(*se) ) se++;
+        char tmp = *se;
+        *se = 0;
+        int id = bcf_hdr_id2int(hdr, BCF_DT_SAMPLE, lines[i]);
+        *se = tmp;
+        if ( id<0 ) continue;
+        while ( *se && isspace(*se) ) se++;
+        if ( *se=='M' ) sample2sex[id] = '1';
+        else if ( *se=='F' ) sample2sex[id] = '2';
+        else error("Could not parse %s: %s\n", sex_fname,lines[i]);
+    }
+    for (i=0; i<nlines; i++) free(lines[i]);
+    free(lines);
+    for (i=0; i<bcf_hdr_nsamples(hdr); i++) 
+        if ( !sample2sex[i] ) error("Missing sex for sample %s in %s\n", bcf_hdr_int2id(hdr, BCF_DT_SAMPLE, i),sex_fname);
+    return sample2sex;
+}
+
 static void vcf_to_gensample(args_t *args)
 {
     kstring_t str = {0,0,0};
@@ -682,7 +712,7 @@ static void vcf_to_gensample(args_t *args)
     char *gen_fname = NULL, *sample_fname = NULL;
     str.l = 0;
     kputs(args->outfname,&str);
-    int n_files, i;
+    int n_files = 0, i;
     char **files = hts_readlist(str.s, 0, &n_files);
     if ( n_files==1 )
     {
@@ -712,22 +742,30 @@ static void vcf_to_gensample(args_t *args)
     if (sample_fname) fprintf(stderr, "Sample file: %s\n", sample_fname);
 
     // write samples file
-    if (sample_fname) {
+    if (sample_fname) 
+    {
+        char *sample2sex = NULL;
+        if ( args->sex_fname ) sample2sex = init_sample2sex(args->header,args->sex_fname);
+
         int i;
         BGZF *sout = bgzf_open(sample_fname, sample_compressed ? "wg" : "wu");
         str.l = 0;
-        kputs("ID_1 ID_2 missing\n0 0 0\n", &str);
+        kputs(sample2sex ? "ID_1 ID_2 missing sex\n0 0 0 0\n" : "ID_1 ID_2 missing\n0 0 0\n", &str);
         ret = bgzf_write(sout, str.s, str.l);
         if ( ret != str.l ) error("Error writing %s: %s\n", sample_fname, strerror(errno));
         for (i=0; i<bcf_hdr_nsamples(args->header); i++)
         {
             str.l = 0;
-            ksprintf(&str, "%s %s 0\n", args->header->samples[i],args->header->samples[i]);
+            if ( sample2sex )
+                ksprintf(&str, "%s %s 0 %c\n", args->header->samples[i],args->header->samples[i],sample2sex[i]);
+            else
+                ksprintf(&str, "%s %s 0\n", args->header->samples[i],args->header->samples[i]);
             ret = bgzf_write(sout, str.s, str.l);
             if ( ret != str.l ) error("Error writing %s: %s\n", sample_fname, strerror(errno));
         }
         if ( bgzf_close(sout)!=0 ) error("Error closing %s: %s\n", sample_fname, strerror(errno));
         free(sample_fname);
+        free(sample2sex);
     }
     if (!gen_fname) {
         if ( str.m ) free(str.s);
@@ -793,7 +831,7 @@ static void vcf_to_haplegendsample(args_t *args)
     char *hap_fname = NULL, *legend_fname = NULL, *sample_fname = NULL;
     str.l = 0;
     kputs(args->outfname,&str);
-    int n_files, i;
+    int n_files = 0, i;
     char **files = hts_readlist(str.s, 0, &n_files);
     if ( n_files==1 )
     {
@@ -829,7 +867,11 @@ static void vcf_to_haplegendsample(args_t *args)
     if (sample_fname) fprintf(stderr, "Sample file: %s\n", sample_fname);
 
     // write samples file
-    if (sample_fname) {
+    if (sample_fname)
+    {
+        char *sample2sex = NULL;
+        if ( args->sex_fname ) sample2sex = init_sample2sex(args->header,args->sex_fname);
+        
         int i;
         BGZF *sout = bgzf_open(sample_fname, sample_compressed ? "wg" : "wu");
         str.l = 0;
@@ -839,12 +881,13 @@ static void vcf_to_haplegendsample(args_t *args)
         for (i=0; i<bcf_hdr_nsamples(args->header); i++)
         {
             str.l = 0;
-            ksprintf(&str, "%s %s %s 2\n", args->header->samples[i], args->header->samples[i], args->header->samples[i]);
+            ksprintf(&str, "%s %s %s %c\n", args->header->samples[i], args->header->samples[i], args->header->samples[i], sample2sex ? sample2sex[i] : '2');
             ret = bgzf_write(sout, str.s, str.l);
             if ( ret != str.l ) error("Error writing %s: %s\n", sample_fname, strerror(errno));
         }
         if ( bgzf_close(sout)!=0 ) error("Error closing %s: %s\n", sample_fname, strerror(errno));
         free(sample_fname);
+        free(sample2sex);
     }
     if (!hap_fname && !legend_fname) {
         if ( str.m ) free(str.s);
@@ -853,6 +896,7 @@ static void vcf_to_haplegendsample(args_t *args)
 
     // open haps and legend outputs
     BGZF *hout = hap_fname ? bgzf_open(hap_fname, hap_compressed ? "wg" : "wu") : NULL;
+    if ( hap_compressed && args->n_threads ) bgzf_thread_pool(hout, args->files->p->pool, args->files->p->qsize);
     BGZF *lout = legend_fname ? bgzf_open(legend_fname, legend_compressed ? "wg" : "wu") : NULL;
     if (legend_fname) {
         str.l = 0;
@@ -940,7 +984,7 @@ static void vcf_to_hapsample(args_t *args)
     char *hap_fname = NULL, *sample_fname = NULL;
     str.l = 0;
     kputs(args->outfname,&str);
-    int n_files, i;
+    int n_files = 0, i;
     char **files = hts_readlist(str.s, 0, &n_files);
     if ( n_files==1 )
     {
@@ -970,22 +1014,30 @@ static void vcf_to_hapsample(args_t *args)
     if (sample_fname) fprintf(stderr, "Sample file: %s\n", sample_fname);
 
     // write samples file
-    if (sample_fname) {
+    if (sample_fname)
+    {
+        char *sample2sex = NULL;
+        if ( args->sex_fname ) sample2sex = init_sample2sex(args->header,args->sex_fname);
+
         int i;
         BGZF *sout = bgzf_open(sample_fname, sample_compressed ? "wg" : "wu");
         str.l = 0;
-        kputs("ID_1 ID_2 missing\n0 0 0\n", &str);
+        kputs(sample2sex ? "ID_1 ID_2 missing sex\n0 0 0 0\n" : "ID_1 ID_2 missing\n0 0 0\n", &str);
         ret = bgzf_write(sout, str.s, str.l);
         if ( ret != str.l ) error("Error writing %s: %s\n", sample_fname, strerror(errno));
         for (i=0; i<bcf_hdr_nsamples(args->header); i++)
         {
             str.l = 0;
-            ksprintf(&str, "%s %s 0\n", args->header->samples[i], args->header->samples[i]);
+            if ( sample2sex )
+                ksprintf(&str, "%s %s 0 %c\n", args->header->samples[i],args->header->samples[i],sample2sex[i]);
+            else
+                ksprintf(&str, "%s %s 0\n", args->header->samples[i],args->header->samples[i]);
             ret = bgzf_write(sout, str.s, str.l);
             if ( ret != str.l ) error("Error writing %s: %s\n", sample_fname, strerror(errno));
         }
         if ( bgzf_close(sout)!=0 ) error("Error closing %s: %s\n", sample_fname, strerror(errno));
         free(sample_fname);
+        free(sample2sex);
     }
     if (!hap_fname) {
         if ( str.m ) free(str.s);
@@ -994,6 +1046,7 @@ static void vcf_to_hapsample(args_t *args)
 
     // open haps output
     BGZF *hout = hap_fname ? bgzf_open(hap_fname, hap_compressed ? "wg" : "wu") : NULL;
+    if ( hap_compressed && args->n_threads ) bgzf_thread_pool(hout, args->files->p->pool, args->files->p->qsize);
 
     int no_alt = 0, non_biallelic = 0, filtered = 0, nok = 0;
     while ( bcf_sr_next_line(args->files) )
@@ -1256,9 +1309,30 @@ static void gvcf_to_vcf(args_t *args)
             if ( !pass ) continue;
         }
 
-        if ( line->n_allele!=1 || !bcf_has_filter(hdr,line,"PASS") )
+        if (!bcf_has_filter(hdr,line,"PASS"))
+        {
+            bcf_write(out_fh,hdr,line);
+            continue;
+        }
+
+        // check if alleles compatible with being a gVCF record
+        int i, gallele = -1;
+        if (line->n_allele==1)
+            gallele = 0; // illumina/bcftools-call gvcf (if INFO/END present)
+        else
+        {
+            if ( line->d.allele[1][0]!='<' ) continue;
+            for (i=1; i<line->n_allele; i++)
+            {
+                if ( line->d.allele[i][1]=='*' && line->d.allele[i][2]=='>' && line->d.allele[i][3]=='\0' ) { gallele = i; break; } // mpileup/spec compliant gVCF
+                if ( line->d.allele[i][1]=='X' && line->d.allele[i][2]=='>' && line->d.allele[i][3]=='\0' ) { gallele = i; break; } // old mpileup gVCF
+                if ( strcmp(line->d.allele[i],"<NON_REF>")==0 ) { gallele = i; break; }               // GATK gVCF
+            }
+        }
+
+        // no gVCF compatible alleles
+        if (gallele<0)
         {
-            // Assuming that only ALT=. sites can be blocks and skipping sites which don't PASS
             bcf_write(out_fh,hdr,line);
             continue;
         }
@@ -1266,7 +1340,7 @@ static void gvcf_to_vcf(args_t *args)
         int nend = bcf_get_info_int32(hdr,line,"END",&itmp,&nitmp);
         if ( nend!=1 )
         {
-            // No END lineord
+            // No INFO/END => not gVCF record
             bcf_write(out_fh,hdr,line);
             continue;
         }
@@ -1277,10 +1351,9 @@ static void gvcf_to_vcf(args_t *args)
             line->pos = pos;
             char *ref = faidx_fetch_seq(args->ref, (char*)bcf_hdr_id2name(hdr,line->rid), line->pos, line->pos, &len);
             if ( !ref ) error("faidx_fetch_seq failed at %s:%d\n", bcf_hdr_id2name(hdr,line->rid), line->pos+1);
-            // we have already checked above that there is only one allele,
-            // so fine to just update alleles with the ref allele from the fasta
-            bcf_update_alleles_str(hdr, line, &ref[0]);
+            strncpy(line->d.allele[0],ref,len);
             bcf_write(out_fh,hdr,line);
+            free(ref);
         }
     }
     free(itmp);
@@ -1316,6 +1389,7 @@ static void usage(void)
     fprintf(stderr, "   -g, --gensample <...>       <prefix>|<gen-file>,<sample-file>\n");
     fprintf(stderr, "       --tag <string>          tag to take values for .gen file: GT,PL,GL,GP [GT]\n");
     fprintf(stderr, "       --chrom                 output chromosome in first column instead of CHROM:POS_REF_ALT\n");
+    fprintf(stderr, "       --sex <file>            output sex column in the sample-file, input format is: Sample\\t[MF]\n");
     fprintf(stderr, "       --vcf-ids               output VCF IDs in second column instead of CHROM:POS_REF_ALT\n");
     fprintf(stderr, "\n");
     fprintf(stderr, "gVCF conversion:\n");
@@ -1326,12 +1400,14 @@ static void usage(void)
     fprintf(stderr, "       --hapsample2vcf <...>   <prefix>|<haps-file>,<sample-file>\n");
     fprintf(stderr, "       --hapsample <...>       <prefix>|<haps-file>,<sample-file>\n");
     fprintf(stderr, "       --haploid2diploid       convert haploid genotypes to diploid homozygotes\n");
+    fprintf(stderr, "       --sex <file>            output sex column in the sample-file, input format is: Sample\\t[MF]\n");
     fprintf(stderr, "       --vcf-ids               output VCF IDs instead of CHROM:POS_REF_ALT\n");
     fprintf(stderr, "\n");
     fprintf(stderr, "HAP/LEGEND/SAMPLE conversion:\n");
     fprintf(stderr, "   -H, --haplegendsample2vcf <...>  <prefix>|<hap-file>,<legend-file>,<sample-file>\n");
     fprintf(stderr, "   -h, --haplegendsample <...>      <prefix>|<hap-file>,<legend-file>,<sample-file>\n");
     fprintf(stderr, "       --haploid2diploid            convert haploid genotypes to diploid homozygotes\n");
+    fprintf(stderr, "       --sex <file>                 output sex column in the sample-file, input format is: Sample\\t[MF]\n");
     fprintf(stderr, "       --vcf-ids                    output VCF IDs instead of CHROM:POS_REF_ALT\n");
     fprintf(stderr, "\n");
     fprintf(stderr, "TSV conversion:\n");
@@ -1375,6 +1451,7 @@ int main_vcfconvert(int argc, char *argv[])
         {"targets-file",required_argument,NULL,'T'},
         {"samples",required_argument,NULL,'s'},
         {"samples-file",required_argument,NULL,'S'},
+        {"sex",required_argument,NULL,11},
         {"gensample",required_argument,NULL,'g'},
         {"gensample2vcf",required_argument,NULL,'G'},
         {"tag",required_argument,NULL,1},
@@ -1428,6 +1505,7 @@ int main_vcfconvert(int argc, char *argv[])
             case 'h': args->convert_func = vcf_to_haplegendsample; args->outfname = optarg; break;
             case  9 : args->n_threads = strtol(optarg, 0, 0); break;
             case 10 : args->record_cmd_line = 0; break;
+            case 11 : args->sex_fname = optarg; break;
             case '?': usage();
             default: error("Unknown argument: %s\n", optarg);
         }
diff --git a/bcftools/vcfconvert.c.pysam.c b/bcftools/vcfconvert.c.pysam.c
index 12333cc..4d3469c 100644
--- a/bcftools/vcfconvert.c.pysam.c
+++ b/bcftools/vcfconvert.c.pysam.c
@@ -25,6 +25,7 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 THE SOFTWARE.  */
 
 #include <stdio.h>
+#include <strings.h>
 #include <unistd.h>
 #include <getopt.h>
 #include <ctype.h>
@@ -67,7 +68,7 @@ struct _args_t
     int rev_als, output_vcf_ids, hap2dip, output_chrom_first_col;
     int nsamples, *samples, sample_is_file, targets_is_file, regions_is_file, output_type;
     char **argv, *sample_list, *targets_list, *regions_list, *tag, *columns;
-    char *outfname, *infname, *ref_fname;
+    char *outfname, *infname, *ref_fname, *sex_fname;
     int argc, n_threads, record_cmd_line;
 };
 
@@ -83,6 +84,9 @@ static void destroy_data(args_t *args)
 static void open_vcf(args_t *args, const char *format_str)
 {
     args->files = bcf_sr_init();
+    if ( args->n_threads && bcf_sr_set_threads(args->files, args->n_threads)!=0 )
+        error("Could not initialize --threads %d\n", args->n_threads);
+
     if ( args->regions_list )
     {
         if ( bcf_sr_set_regions(args->files, args->regions_list, args->regions_is_file)<0 )
@@ -131,9 +135,6 @@ static void open_vcf(args_t *args, const char *format_str)
     }
     if ( format_str ) args->convert = convert_init(args->header, samples, nsamples, format_str);
     free(samples);
-
-    if ( args->filter_str )
-        args->filter = filter_init(args->header, args->filter_str);
 }
 
 static int tsv_setter_chrom_pos_ref_alt(tsv_t *tsv, bcf1_t *rec, void *usr)
@@ -375,6 +376,7 @@ static void gensample_to_vcf(args_t *args)
 
     int i, nsamples;
     char **samples = hts_readlist(sample_fname, 1, &nsamples);
+    if ( !samples ) error("Could not read %s\n", sample_fname);
     for (i=2; i<nsamples; i++)
     {
         se = samples[i]; while ( *se && !isspace(*se) ) se++;
@@ -495,6 +497,7 @@ static void haplegendsample_to_vcf(args_t *args)
 
     int i, nrows, nsamples;
     char **samples = hts_readlist(sample_fname, 1, &nrows);
+    if ( !samples ) error("Could not read %s\n", sample_fname);
     nsamples = nrows - 1;
 
     // sample_fname should contain a header line, so need to ignore first row
@@ -612,6 +615,7 @@ static void hapsample_to_vcf(args_t *args)
 
     int i, nsamples;
     char **samples = hts_readlist(sample_fname, 1, &nsamples);
+    if ( !samples ) error("Could not read %s\n", sample_fname);
     for (i=2; i<nsamples; i++)
     {
         se = samples[i]; while ( *se && !isspace(*se) ) se++;
@@ -656,6 +660,32 @@ static void hapsample_to_vcf(args_t *args)
     fprintf(pysam_stderr,"Number of processed rows: \t%d\n", args->n.total);
 }
 
+char *init_sample2sex(bcf_hdr_t *hdr, char *sex_fname)
+{
+    int i, nlines;
+    char *sample2sex = (char*) calloc(bcf_hdr_nsamples(hdr),1);
+    char **lines = hts_readlist(sex_fname, 1, &nlines);
+    if ( !lines ) error("Could not read %s\n", sex_fname);
+    for (i=0; i<nlines; i++)
+    {
+        char *se = lines[i]; while ( *se && !isspace(*se) ) se++;
+        char tmp = *se;
+        *se = 0;
+        int id = bcf_hdr_id2int(hdr, BCF_DT_SAMPLE, lines[i]);
+        *se = tmp;
+        if ( id<0 ) continue;
+        while ( *se && isspace(*se) ) se++;
+        if ( *se=='M' ) sample2sex[id] = '1';
+        else if ( *se=='F' ) sample2sex[id] = '2';
+        else error("Could not parse %s: %s\n", sex_fname,lines[i]);
+    }
+    for (i=0; i<nlines; i++) free(lines[i]);
+    free(lines);
+    for (i=0; i<bcf_hdr_nsamples(hdr); i++) 
+        if ( !sample2sex[i] ) error("Missing sex for sample %s in %s\n", bcf_hdr_int2id(hdr, BCF_DT_SAMPLE, i),sex_fname);
+    return sample2sex;
+}
+
 static void vcf_to_gensample(args_t *args)
 {
     kstring_t str = {0,0,0};
@@ -684,7 +714,7 @@ static void vcf_to_gensample(args_t *args)
     char *gen_fname = NULL, *sample_fname = NULL;
     str.l = 0;
     kputs(args->outfname,&str);
-    int n_files, i;
+    int n_files = 0, i;
     char **files = hts_readlist(str.s, 0, &n_files);
     if ( n_files==1 )
     {
@@ -714,22 +744,30 @@ static void vcf_to_gensample(args_t *args)
     if (sample_fname) fprintf(pysam_stderr, "Sample file: %s\n", sample_fname);
 
     // write samples file
-    if (sample_fname) {
+    if (sample_fname) 
+    {
+        char *sample2sex = NULL;
+        if ( args->sex_fname ) sample2sex = init_sample2sex(args->header,args->sex_fname);
+
         int i;
         BGZF *sout = bgzf_open(sample_fname, sample_compressed ? "wg" : "wu");
         str.l = 0;
-        kputs("ID_1 ID_2 missing\n0 0 0\n", &str);
+        kputs(sample2sex ? "ID_1 ID_2 missing sex\n0 0 0 0\n" : "ID_1 ID_2 missing\n0 0 0\n", &str);
         ret = bgzf_write(sout, str.s, str.l);
         if ( ret != str.l ) error("Error writing %s: %s\n", sample_fname, strerror(errno));
         for (i=0; i<bcf_hdr_nsamples(args->header); i++)
         {
             str.l = 0;
-            ksprintf(&str, "%s %s 0\n", args->header->samples[i],args->header->samples[i]);
+            if ( sample2sex )
+                ksprintf(&str, "%s %s 0 %c\n", args->header->samples[i],args->header->samples[i],sample2sex[i]);
+            else
+                ksprintf(&str, "%s %s 0\n", args->header->samples[i],args->header->samples[i]);
             ret = bgzf_write(sout, str.s, str.l);
             if ( ret != str.l ) error("Error writing %s: %s\n", sample_fname, strerror(errno));
         }
         if ( bgzf_close(sout)!=0 ) error("Error closing %s: %s\n", sample_fname, strerror(errno));
         free(sample_fname);
+        free(sample2sex);
     }
     if (!gen_fname) {
         if ( str.m ) free(str.s);
@@ -795,7 +833,7 @@ static void vcf_to_haplegendsample(args_t *args)
     char *hap_fname = NULL, *legend_fname = NULL, *sample_fname = NULL;
     str.l = 0;
     kputs(args->outfname,&str);
-    int n_files, i;
+    int n_files = 0, i;
     char **files = hts_readlist(str.s, 0, &n_files);
     if ( n_files==1 )
     {
@@ -831,7 +869,11 @@ static void vcf_to_haplegendsample(args_t *args)
     if (sample_fname) fprintf(pysam_stderr, "Sample file: %s\n", sample_fname);
 
     // write samples file
-    if (sample_fname) {
+    if (sample_fname)
+    {
+        char *sample2sex = NULL;
+        if ( args->sex_fname ) sample2sex = init_sample2sex(args->header,args->sex_fname);
+        
         int i;
         BGZF *sout = bgzf_open(sample_fname, sample_compressed ? "wg" : "wu");
         str.l = 0;
@@ -841,12 +883,13 @@ static void vcf_to_haplegendsample(args_t *args)
         for (i=0; i<bcf_hdr_nsamples(args->header); i++)
         {
             str.l = 0;
-            ksprintf(&str, "%s %s %s 2\n", args->header->samples[i], args->header->samples[i], args->header->samples[i]);
+            ksprintf(&str, "%s %s %s %c\n", args->header->samples[i], args->header->samples[i], args->header->samples[i], sample2sex ? sample2sex[i] : '2');
             ret = bgzf_write(sout, str.s, str.l);
             if ( ret != str.l ) error("Error writing %s: %s\n", sample_fname, strerror(errno));
         }
         if ( bgzf_close(sout)!=0 ) error("Error closing %s: %s\n", sample_fname, strerror(errno));
         free(sample_fname);
+        free(sample2sex);
     }
     if (!hap_fname && !legend_fname) {
         if ( str.m ) free(str.s);
@@ -855,6 +898,7 @@ static void vcf_to_haplegendsample(args_t *args)
 
     // open haps and legend outputs
     BGZF *hout = hap_fname ? bgzf_open(hap_fname, hap_compressed ? "wg" : "wu") : NULL;
+    if ( hap_compressed && args->n_threads ) bgzf_thread_pool(hout, args->files->p->pool, args->files->p->qsize);
     BGZF *lout = legend_fname ? bgzf_open(legend_fname, legend_compressed ? "wg" : "wu") : NULL;
     if (legend_fname) {
         str.l = 0;
@@ -942,7 +986,7 @@ static void vcf_to_hapsample(args_t *args)
     char *hap_fname = NULL, *sample_fname = NULL;
     str.l = 0;
     kputs(args->outfname,&str);
-    int n_files, i;
+    int n_files = 0, i;
     char **files = hts_readlist(str.s, 0, &n_files);
     if ( n_files==1 )
     {
@@ -972,22 +1016,30 @@ static void vcf_to_hapsample(args_t *args)
     if (sample_fname) fprintf(pysam_stderr, "Sample file: %s\n", sample_fname);
 
     // write samples file
-    if (sample_fname) {
+    if (sample_fname)
+    {
+        char *sample2sex = NULL;
+        if ( args->sex_fname ) sample2sex = init_sample2sex(args->header,args->sex_fname);
+
         int i;
         BGZF *sout = bgzf_open(sample_fname, sample_compressed ? "wg" : "wu");
         str.l = 0;
-        kputs("ID_1 ID_2 missing\n0 0 0\n", &str);
+        kputs(sample2sex ? "ID_1 ID_2 missing sex\n0 0 0 0\n" : "ID_1 ID_2 missing\n0 0 0\n", &str);
         ret = bgzf_write(sout, str.s, str.l);
         if ( ret != str.l ) error("Error writing %s: %s\n", sample_fname, strerror(errno));
         for (i=0; i<bcf_hdr_nsamples(args->header); i++)
         {
             str.l = 0;
-            ksprintf(&str, "%s %s 0\n", args->header->samples[i], args->header->samples[i]);
+            if ( sample2sex )
+                ksprintf(&str, "%s %s 0 %c\n", args->header->samples[i],args->header->samples[i],sample2sex[i]);
+            else
+                ksprintf(&str, "%s %s 0\n", args->header->samples[i],args->header->samples[i]);
             ret = bgzf_write(sout, str.s, str.l);
             if ( ret != str.l ) error("Error writing %s: %s\n", sample_fname, strerror(errno));
         }
         if ( bgzf_close(sout)!=0 ) error("Error closing %s: %s\n", sample_fname, strerror(errno));
         free(sample_fname);
+        free(sample2sex);
     }
     if (!hap_fname) {
         if ( str.m ) free(str.s);
@@ -996,6 +1048,7 @@ static void vcf_to_hapsample(args_t *args)
 
     // open haps output
     BGZF *hout = hap_fname ? bgzf_open(hap_fname, hap_compressed ? "wg" : "wu") : NULL;
+    if ( hap_compressed && args->n_threads ) bgzf_thread_pool(hout, args->files->p->pool, args->files->p->qsize);
 
     int no_alt = 0, non_biallelic = 0, filtered = 0, nok = 0;
     while ( bcf_sr_next_line(args->files) )
@@ -1258,9 +1311,30 @@ static void gvcf_to_vcf(args_t *args)
             if ( !pass ) continue;
         }
 
-        if ( line->n_allele!=1 || !bcf_has_filter(hdr,line,"PASS") )
+        if (!bcf_has_filter(hdr,line,"PASS"))
+        {
+            bcf_write(out_fh,hdr,line);
+            continue;
+        }
+
+        // check if alleles compatible with being a gVCF record
+        int i, gallele = -1;
+        if (line->n_allele==1)
+            gallele = 0; // illumina/bcftools-call gvcf (if INFO/END present)
+        else
+        {
+            if ( line->d.allele[1][0]!='<' ) continue;
+            for (i=1; i<line->n_allele; i++)
+            {
+                if ( line->d.allele[i][1]=='*' && line->d.allele[i][2]=='>' && line->d.allele[i][3]=='\0' ) { gallele = i; break; } // mpileup/spec compliant gVCF
+                if ( line->d.allele[i][1]=='X' && line->d.allele[i][2]=='>' && line->d.allele[i][3]=='\0' ) { gallele = i; break; } // old mpileup gVCF
+                if ( strcmp(line->d.allele[i],"<NON_REF>")==0 ) { gallele = i; break; }               // GATK gVCF
+            }
+        }
+
+        // no gVCF compatible alleles
+        if (gallele<0)
         {
-            // Assuming that only ALT=. sites can be blocks and skipping sites which don't PASS
             bcf_write(out_fh,hdr,line);
             continue;
         }
@@ -1268,7 +1342,7 @@ static void gvcf_to_vcf(args_t *args)
         int nend = bcf_get_info_int32(hdr,line,"END",&itmp,&nitmp);
         if ( nend!=1 )
         {
-            // No END lineord
+            // No INFO/END => not gVCF record
             bcf_write(out_fh,hdr,line);
             continue;
         }
@@ -1279,10 +1353,9 @@ static void gvcf_to_vcf(args_t *args)
             line->pos = pos;
             char *ref = faidx_fetch_seq(args->ref, (char*)bcf_hdr_id2name(hdr,line->rid), line->pos, line->pos, &len);
             if ( !ref ) error("faidx_fetch_seq failed at %s:%d\n", bcf_hdr_id2name(hdr,line->rid), line->pos+1);
-            // we have already checked above that there is only one allele,
-            // so fine to just update alleles with the ref allele from the fasta
-            bcf_update_alleles_str(hdr, line, &ref[0]);
+            strncpy(line->d.allele[0],ref,len);
             bcf_write(out_fh,hdr,line);
+            free(ref);
         }
     }
     free(itmp);
@@ -1318,6 +1391,7 @@ static void usage(void)
     fprintf(pysam_stderr, "   -g, --gensample <...>       <prefix>|<gen-file>,<sample-file>\n");
     fprintf(pysam_stderr, "       --tag <string>          tag to take values for .gen file: GT,PL,GL,GP [GT]\n");
     fprintf(pysam_stderr, "       --chrom                 output chromosome in first column instead of CHROM:POS_REF_ALT\n");
+    fprintf(pysam_stderr, "       --sex <file>            output sex column in the sample-file, input format is: Sample\\t[MF]\n");
     fprintf(pysam_stderr, "       --vcf-ids               output VCF IDs in second column instead of CHROM:POS_REF_ALT\n");
     fprintf(pysam_stderr, "\n");
     fprintf(pysam_stderr, "gVCF conversion:\n");
@@ -1328,12 +1402,14 @@ static void usage(void)
     fprintf(pysam_stderr, "       --hapsample2vcf <...>   <prefix>|<haps-file>,<sample-file>\n");
     fprintf(pysam_stderr, "       --hapsample <...>       <prefix>|<haps-file>,<sample-file>\n");
     fprintf(pysam_stderr, "       --haploid2diploid       convert haploid genotypes to diploid homozygotes\n");
+    fprintf(pysam_stderr, "       --sex <file>            output sex column in the sample-file, input format is: Sample\\t[MF]\n");
     fprintf(pysam_stderr, "       --vcf-ids               output VCF IDs instead of CHROM:POS_REF_ALT\n");
     fprintf(pysam_stderr, "\n");
     fprintf(pysam_stderr, "HAP/LEGEND/SAMPLE conversion:\n");
     fprintf(pysam_stderr, "   -H, --haplegendsample2vcf <...>  <prefix>|<hap-file>,<legend-file>,<sample-file>\n");
     fprintf(pysam_stderr, "   -h, --haplegendsample <...>      <prefix>|<hap-file>,<legend-file>,<sample-file>\n");
     fprintf(pysam_stderr, "       --haploid2diploid            convert haploid genotypes to diploid homozygotes\n");
+    fprintf(pysam_stderr, "       --sex <file>                 output sex column in the sample-file, input format is: Sample\\t[MF]\n");
     fprintf(pysam_stderr, "       --vcf-ids                    output VCF IDs instead of CHROM:POS_REF_ALT\n");
     fprintf(pysam_stderr, "\n");
     fprintf(pysam_stderr, "TSV conversion:\n");
@@ -1377,6 +1453,7 @@ int main_vcfconvert(int argc, char *argv[])
         {"targets-file",required_argument,NULL,'T'},
         {"samples",required_argument,NULL,'s'},
         {"samples-file",required_argument,NULL,'S'},
+        {"sex",required_argument,NULL,11},
         {"gensample",required_argument,NULL,'g'},
         {"gensample2vcf",required_argument,NULL,'G'},
         {"tag",required_argument,NULL,1},
@@ -1430,6 +1507,7 @@ int main_vcfconvert(int argc, char *argv[])
             case 'h': args->convert_func = vcf_to_haplegendsample; args->outfname = optarg; break;
             case  9 : args->n_threads = strtol(optarg, 0, 0); break;
             case 10 : args->record_cmd_line = 0; break;
+            case 11 : args->sex_fname = optarg; break;
             case '?': usage();
             default: error("Unknown argument: %s\n", optarg);
         }
diff --git a/bcftools/vcffilter.c b/bcftools/vcffilter.c
index f979d77..c1b41f2 100644
--- a/bcftools/vcffilter.c
+++ b/bcftools/vcffilter.c
@@ -129,7 +129,8 @@ static void init_data(args_t *args)
                 if ( tmp.s ) kputs(" and ", &tmp);
                 kputs("\"IndelGap\"", &tmp);
             }
-            fprintf(stderr,"Warning: using %s filter name instead of \"%s\"\n", tmp.s,args->soft_filter);
+            if ( strncmp(tmp.s+1,args->soft_filter,tmp.l-2) )
+                fprintf(stderr,"Warning: using %s filter name instead of \"%s\"\n", tmp.s,args->soft_filter);
             free(tmp.s);
         }
 
diff --git a/bcftools/vcffilter.c.pysam.c b/bcftools/vcffilter.c.pysam.c
index 58193da..e603bde 100644
--- a/bcftools/vcffilter.c.pysam.c
+++ b/bcftools/vcffilter.c.pysam.c
@@ -131,7 +131,8 @@ static void init_data(args_t *args)
                 if ( tmp.s ) kputs(" and ", &tmp);
                 kputs("\"IndelGap\"", &tmp);
             }
-            fprintf(pysam_stderr,"Warning: using %s filter name instead of \"%s\"\n", tmp.s,args->soft_filter);
+            if ( strncmp(tmp.s+1,args->soft_filter,tmp.l-2) )
+                fprintf(pysam_stderr,"Warning: using %s filter name instead of \"%s\"\n", tmp.s,args->soft_filter);
             free(tmp.s);
         }
 
diff --git a/bcftools/vcfgtcheck.c b/bcftools/vcfgtcheck.c
index b741ef6..8835db3 100644
--- a/bcftools/vcfgtcheck.c
+++ b/bcftools/vcfgtcheck.c
@@ -35,7 +35,9 @@ THE SOFTWARE.  */
 #include <htslib/vcf.h>
 #include <htslib/synced_bcf_reader.h>
 #include <htslib/vcfutils.h>
+#include <inttypes.h>
 #include "bcftools.h"
+#include "hclust.h"
 
 typedef struct
 {
@@ -43,10 +45,10 @@ typedef struct
     bcf_hdr_t *gt_hdr, *sm_hdr; // VCF with genotypes to compare against and the query VCF
     int ntmp_arr, npl_arr;
     int32_t *tmp_arr, *pl_arr;
-    double *lks, *sites;
+    double *lks, *sites, min_inter_err, max_intra_err;
     int *cnts, *dps, hom_only, cross_check, all_sites;
     char *cwd, **argv, *gt_fname, *plot, *query_sample, *target_sample;
-    int argc, no_PLs;
+    int argc, no_PLs, narr, nsmpl;
 }
 args_t;
 
@@ -133,6 +135,7 @@ static void plot_check(args_t *args, char *target_sample, char *query_sample)
     free(fname);
 }
 
+#if 0
 static void plot_cross_check(args_t *args)
 {
     char *fname;
@@ -214,6 +217,7 @@ static void plot_cross_check(args_t *args)
     py_plot(fname);
     free(fname);
 }
+#endif
 
 static void init_data(args_t *args)
 {
@@ -230,14 +234,6 @@ static void init_data(args_t *args)
         args->sites = (double*) calloc(nsamples,sizeof(double));
         args->dps   = (int*) calloc(nsamples,sizeof(int));
     }
-    else
-    {
-        int nsamples = bcf_hdr_nsamples(args->sm_hdr);
-        int narr = (nsamples-1)*nsamples/2;
-        args->lks  = (double*) calloc(narr,sizeof(double));
-        args->cnts = (int*) calloc(narr,sizeof(int));
-        args->dps  = (int*) calloc(narr,sizeof(int));
-    }
 }
 
 static void destroy_data(args_t *args)
@@ -524,177 +520,181 @@ static void check_gt(args_t *args)
     }
 }
 
-static inline int is_hom_most_likely(int nals, int *pls)
+// static inline int is_hom_most_likely(int nals, int *pls)
+// {
+//     int ia, ib, idx = 1, min_is_hom = 1, min_pl = pls[0];
+//     for (ia=1; ia<nals; ia++)
+//     {
+//         for (ib=0; ib<ia; ib++)
+//         {
+//             if ( pls[idx] < min_pl ) { min_pl = pls[idx]; min_is_hom = 0; }
+//             idx++;
+//         }
+//         if ( pls[idx] < min_pl ) { min_pl = pls[idx]; min_is_hom = 1; }
+//         idx++;
+//     }
+//     return min_is_hom;
+// }
+
+int process_GT(args_t *args, bcf1_t *line, uint32_t *ntot, uint32_t *ndif)
+{
+    int ngt = bcf_get_genotypes(args->sm_hdr, line, &args->tmp_arr, &args->ntmp_arr);
+
+    if ( ngt<=0 ) return 1;                 // GT not present
+    if ( ngt!=args->nsmpl*2 ) return 2;     // not diploid
+    ngt /= args->nsmpl;
+    
+    int i,j, idx = 0;
+    for (i=1; i<args->nsmpl; i++)
+    {
+        int32_t *a = args->tmp_arr + i*ngt;
+        if ( bcf_gt_is_missing(a[0]) || bcf_gt_is_missing(a[1]) || a[1]==bcf_int32_vector_end ) { idx+=i; continue; }
+        int agt = 1<<bcf_gt_allele(a[0]) | 1<<bcf_gt_allele(a[1]);
+
+        for (j=0; j<i; j++)
+        {
+            int32_t *b = args->tmp_arr + j*ngt;
+            if ( bcf_gt_is_missing(b[0]) || bcf_gt_is_missing(b[1]) || b[1]==bcf_int32_vector_end ) { idx++; continue; }
+            int bgt = 1<<bcf_gt_allele(b[0]) | 1<<bcf_gt_allele(b[1]);
+
+            ntot[idx]++;
+            if ( agt!=bgt ) ndif[idx]++;
+            idx++;
+        }
+    }
+    return 0;
+}
+int process_PL(args_t *args, bcf1_t *line, uint32_t *ntot, uint32_t *ndif)
 {
-    int ia, ib, idx = 1, min_is_hom = 1, min_pl = pls[0];
-    for (ia=1; ia<nals; ia++)
+    int npl = bcf_get_format_int32(args->sm_hdr, line, "PL", &args->tmp_arr, &args->ntmp_arr);
+
+    if ( npl<=0 ) return 1;                 // PL not present
+    npl /= args->nsmpl;
+    
+    int i,j,k, idx = 0;
+    for (i=1; i<args->nsmpl; i++)
     {
-        for (ib=0; ib<ia; ib++)
+        int32_t *a = args->tmp_arr + i*npl;
+        int imin = -1;
+        for (k=0; k<npl; k++)
+        {
+            if ( a[k]==bcf_int32_vector_end ) break;
+            if ( a[k]==bcf_int32_missing ) continue;
+            if ( imin==-1 || a[imin] > a[k] ) imin = k;
+        }
+        if ( imin<0 ) { idx+=i; continue; }
+
+        for (j=0; j<i; j++)
         {
-            if ( pls[idx] < min_pl ) { min_pl = pls[idx]; min_is_hom = 0; }
+            int32_t *b = args->tmp_arr + j*npl;
+            int jmin = -1;
+            for (k=0; k<npl; k++)
+            {
+                if ( b[k]==bcf_int32_vector_end ) break;
+                if ( b[k]==bcf_int32_missing ) continue;
+                if ( jmin==-1 || b[jmin] > b[k] ) jmin = k;
+            }
+            if ( jmin<0 ) { idx++; continue; }
+
+            ntot[idx]++;
+            if ( imin!=jmin ) ndif[idx]++;
             idx++;
         }
-        if ( pls[idx] < min_pl ) { min_pl = pls[idx]; min_is_hom = 1; }
-        idx++;
     }
-    return min_is_hom;
+    return 0;
 }
 
 static void cross_check_gts(args_t *args)
 {
-    int nsamples = bcf_hdr_nsamples(args->sm_hdr), ndp_arr = 0;
-    unsigned int *dp = (unsigned int*) calloc(nsamples,sizeof(unsigned int)), *ndp = (unsigned int*) calloc(nsamples,sizeof(unsigned int)); // this will overflow one day...
-    int fake_pls = args->no_PLs, ignore_dp = 0;
-
-    int i,j,k,idx, pl_warned = 0, dp_warned = 0;
-    int32_t *dp_arr = NULL;
-    int *is_hom = args->hom_only ? (int*) malloc(sizeof(int)*nsamples) : NULL;
+    // Initialize things: check which tags are defined in the header, sample names etc.
     if ( bcf_hdr_id2int(args->sm_hdr, BCF_DT_ID, "PL")<0 )
     {
         if ( bcf_hdr_id2int(args->sm_hdr, BCF_DT_ID, "GT")<0 )
             error("[E::%s] Neither PL nor GT present in the header of %s\n", __func__, args->files->readers[0].fname);
-        if ( !args->no_PLs )
+        if ( !args->no_PLs ) {
             fprintf(stderr,"Warning: PL not present in the header of %s, using GT instead\n", args->files->readers[0].fname);
-        fake_pls = 1;
+            args->no_PLs = 99;
+        }
     }
-    if ( bcf_hdr_id2int(args->sm_hdr, BCF_DT_ID, "DP")<0 ) ignore_dp = 1;
 
-    FILE *fp = args->plot ? open_file(NULL, "w", "%s.tab", args->plot) : stdout;
-    print_header(args, fp);
-    if ( args->all_sites ) fprintf(fp,"# [1]SD, Average Site Discordance\t[2]Chromosome\t[3]Position\t[4]Number of available pairs\t[5]Average discordance\n");
+    args->nsmpl = bcf_hdr_nsamples(args->sm_hdr);
+    args->narr  = (args->nsmpl-1)*args->nsmpl/2;
+
+    uint32_t *ndif = (uint32_t*) calloc(args->narr,4);
+    uint32_t *ntot = (uint32_t*) calloc(args->narr,4);
 
     while ( bcf_sr_next_line(args->files) )
     {
-        bcf1_t *line = args->files->readers[0].buffer[0];
-        bcf_unpack(line, BCF_UN_FMT);
-
-        int npl;
-        if ( !fake_pls )
-        {
-            npl = bcf_get_format_int32(args->sm_hdr, line, "PL", &args->pl_arr, &args->npl_arr);
-            if ( npl<=0 ) { pl_warned++; continue; }
-            npl /= nsamples;
-        }
-        else
-            npl = fake_PLs(args, args->sm_hdr, line);
-        int mdp = 0;
-        if ( !ignore_dp && (mdp=bcf_get_format_int32(args->sm_hdr, line, "DP", &dp_arr, &ndp_arr)) <= 0 ) dp_warned++;
+        bcf1_t *line = bcf_sr_get_line(args->files,0);
 
-        if ( args->hom_only )
+        // use PLs unless no_PLs is set and GT exists
+        if ( args->no_PLs )
         {
-            for (i=0; i<nsamples; i++)
-                is_hom[i] = is_hom_most_likely(line->n_allele, args->pl_arr+i*npl);
+            if ( process_GT(args,line,ntot,ndif)==0 ) continue;
         }
-
-        double sum = 0; int nsum = 0;
-        idx = 0;
-        for (i=0; i<nsamples; i++)
-        {
-            int *ipl = &args->pl_arr[i*npl];
-            if ( *ipl==-1 ) { idx += i; continue; } // missing genotype
-            if ( mdp>0 && (dp_arr[i]==bcf_int32_missing || !dp_arr[i]) ) { idx += i; continue; }
-            if ( args->hom_only && !is_hom[i] ) { idx += i; continue; }
-
-            for (j=0; j<i; j++)
-            {
-                int *jpl = &args->pl_arr[j*npl];
-                if ( *jpl==-1 ) { idx++; continue; } // missing genotype
-                if ( mdp>0 && (dp_arr[j]==bcf_int32_missing || !dp_arr[j]) ) { idx++; continue; }
-                if ( args->hom_only && !is_hom[j] ) { idx++; continue; }
-
-                int min_pl = INT_MAX;
-                for (k=0; k<npl; k++)
-                {
-                    if ( ipl[k]==bcf_int32_missing || jpl[k]==bcf_int32_missing ) break;
-                    if ( ipl[k]==bcf_int32_vector_end || jpl[k]==bcf_int32_vector_end ) { k = npl; break; }
-                    if ( min_pl > ipl[k]+jpl[k] ) min_pl = ipl[k]+jpl[k];
-                }
-                if ( k!=npl ) { idx++; continue; }
-
-                if ( args->all_sites ) { sum += min_pl; nsum++; }
-                args->lks[idx] += min_pl;
-                args->cnts[idx]++;
-
-                if ( mdp>0 )
-                {
-                    args->dps[idx] += dp_arr[i] < dp_arr[j] ? dp_arr[i] : dp_arr[j];
-                    dp[i] += dp_arr[i]; ndp[i]++;
-                    dp[j] += dp_arr[j]; ndp[j]++;
-                }
-                else
-                {
-                    args->dps[idx]++;
-                    dp[i]++; ndp[i]++;
-                    dp[j]++; ndp[j]++;
-                }
-                idx++;
-            }
-        }
-        if ( args->all_sites )
-            fprintf(fp,"SD\t%s\t%d\t%d\t%.0f\n", args->sm_hdr->id[BCF_DT_CTG][line->rid].key, line->pos+1, nsum, nsum?sum/nsum:0);
+        process_PL(args,line,ntot,ndif);
     }
-    if ( dp_arr ) free(dp_arr);
-    if ( args->pl_arr ) free(args->pl_arr);
-    if ( args->tmp_arr ) free(args->tmp_arr);
-    if ( is_hom ) free(is_hom);
+    
+    FILE *fp = stdout;
+    print_header(args, fp);
 
-    if ( pl_warned ) fprintf(stderr, "[W::%s] PL was not found at %d site(s)\n", __func__, pl_warned);
-    if ( dp_warned ) fprintf(stderr, "[W::%s] DP was not found at %d site(s)\n", __func__, dp_warned);
+    float *tmp = (float*)malloc(sizeof(float)*args->nsmpl*(args->nsmpl-1)/2);
 
-    // Output samples sorted by average discordance
-    double *score  = (double*) calloc(nsamples,sizeof(double));
-    args->sites = (double*) calloc(nsamples,sizeof(double));
-    idx = 0;
-    for (i=0; i<nsamples; i++)
+    // Output pairwise distances
+    fprintf(fp, "# ERR, error rate\t[2]Pairwise error rate\t[3]Number of sites compared\t[4]Sample i\t[5]Sample j\n");
+    int i,j, idx = 0;
+    for (i=0; i<args->nsmpl; i++)
     {
         for (j=0; j<i; j++)
         {
-            score[i] += args->lks[idx];
-            score[j] += args->lks[idx];
-            args->sites[i] += args->cnts[idx];
-            args->sites[j] += args->cnts[idx];
+            float err = ntot[idx] ? (float)ndif[idx]/ntot[idx] : 1e-10;
+            fprintf(fp, "ERR\t%f\t%"PRId32"\t%s\t%s\n", err, ntot[idx],args->sm_hdr->samples[i],args->sm_hdr->samples[j]);
+            PDIST(tmp,i,j) = err;
             idx++;
         }
     }
-    for (i=0; i<nsamples; i++)
-        if ( args->sites[i] ) score[i] /= args->sites[i];
-    double **p = (double**) malloc(sizeof(double*)*nsamples), avg_score = 0;
-    for (i=0; i<nsamples; i++) p[i] = &score[i];
-    qsort(p, nsamples, sizeof(int*), cmp_doubleptr);
-    // The average discordance gives the number of differing sites in % with -G1
-    fprintf(fp, "# [1]SM\t[2]Average Discordance\t[3]Average depth\t[4]Average number of sites\t[5]Sample\t[6]Sample ID\n");
-    for (i=0; i<nsamples; i++)
+
+    // Cluster samples
+    int nlist;
+    float clust_max_err = args->max_intra_err;
+    hclust_t *clust = hclust_init(args->nsmpl,tmp);
+    cluster_t *list = hclust_create_list(clust,args->min_inter_err,&clust_max_err,&nlist);
+    fprintf(fp, "# CLUSTER\t[2]Maximum inter-cluster ERR\t[3-]List of samples\n");
+    for (i=0; i<nlist; i++)
     {
-        idx = p[i] - score;
-        double adp = ndp[idx] ? (double)dp[idx]/ndp[idx] : 0;
-        double nsites = args->sites[idx]/(nsamples-1);
-        avg_score += score[idx];
-        fprintf(fp, "SM\t%f\t%.2lf\t%.0lf\t%s\t%d\n", score[idx]*100., adp, nsites, args->sm_hdr->samples[idx],i);
+        fprintf(fp,"CLUSTER\t%f", list[i].dist);
+        for (j=0; j<list[i].nmemb; j++)
+            fprintf(fp,"\t%s",args->sm_hdr->samples[list[i].memb[j]]);
+        fprintf(fp,"\n");
     }
-
-    //  // Overall score: maximum absolute deviation from the average score
-    //  fprintf(fp, "# [1] MD\t[2]Maximum deviation\t[3]The culprit\n");
-    //  fprintf(fp, "MD\t%f\t%s\n", (score[idx] - avg_score/nsamples)*100., args->sm_hdr->samples[idx]);    // idx still set
-    free(p);
-    free(score);
-    free(dp);
-    free(ndp);
-
-    // Pairwise discordances
+    hclust_destroy_list(list,nlist);
+    // Debugging output: the cluster graph and data used for deciding
+    char **dbg = hclust_explain(clust,&nlist);
+    for (i=0; i<nlist; i++)
+        fprintf(fp,"DBG\t%s\n", dbg[i]);
+    fprintf(fp, "# TH, clustering threshold\t[2]Value\nTH\t%f\n",clust_max_err);
+    fprintf(fp, "# DOT\t[2]Cluster graph, visualize e.g. as \"this-output.txt | grep ^DOT | cut -f2- | dot -Tsvg -o graph.svg\"\n");
+    fprintf(fp, "DOT\t%s\n", hclust_create_dot(clust,args->sm_hdr->samples,clust_max_err));
+    hclust_destroy(clust);
+    free(tmp);
+
+
+    // Deprecated output for temporary backward compatibility
+    fprintf(fp, "# Warning: The CN block is deprecated and will be removed in future releases. Use ERR instead.\n");
     fprintf(fp, "# [1]CN\t[2]Discordance\t[3]Number of sites\t[4]Average minimum depth\t[5]Sample i\t[6]Sample j\n");
     idx = 0;
-    for (i=0; i<nsamples; i++)
+    for (i=0; i<args->nsmpl; i++)
     {
         for (j=0; j<i; j++)
         {
-            fprintf(fp, "CN\t%.0f\t%d\t%.2f\t%s\t%s\n", args->lks[idx], args->cnts[idx], args->cnts[idx]?(double)args->dps[idx]/args->cnts[idx]:0.0,
-                    args->sm_hdr->samples[i],args->sm_hdr->samples[j]);
+            fprintf(fp, "CN\t%"PRId32"\t%"PRId32"\t0\t%s\t%s\n", ndif[idx], ntot[idx],args->sm_hdr->samples[i],args->sm_hdr->samples[j]);
             idx++;
         }
     }
-    fclose(fp);
-    if ( args->plot )
-        plot_cross_check(args);
+
+    free(ndif);
+    free(ntot);
+    free(args->tmp_arr);
 }
 
 static char *init_prefix(char *prefix)
@@ -713,6 +713,7 @@ static void usage(void)
     fprintf(stderr, "\n");
     fprintf(stderr, "Options:\n");
     fprintf(stderr, "    -a, --all-sites                 output comparison for all sites\n");
+    fprintf(stderr, "    -c, --cluster <min,max>         min inter- and max intra-sample error [0.23,-0.3]\n");
     fprintf(stderr, "    -g, --genotypes <file>          genotypes to compare against\n");
     fprintf(stderr, "    -G, --GTs-only <int>            use GTs, ignore PLs, using <int> for unseen genotypes [99]\n");
     fprintf(stderr, "    -H, --homs-only                 homozygous genotypes only (useful for low coverage data)\n");
@@ -736,8 +737,16 @@ int main_vcfgtcheck(int argc, char *argv[])
     char *regions = NULL, *targets = NULL;
     int regions_is_file = 0, targets_is_file = 0;
 
+    // In simulated sample swaps the minimum error was 0.3 and maximum intra-sample error was 0.23
+    //    - min_inter: pairs with smaller err value will be considered identical 
+    //    - max_intra: pairs with err value bigger than abs(max_intra_err) will be considered
+    //                  different. If negative, the cutoff may be heuristically lowered
+    args->min_inter_err =  0.23;
+    args->max_intra_err = -0.3;
+
     static struct option loptions[] =
     {
+        {"cluster",1,0,'c'},
         {"GTs-only",1,0,'G'},
         {"all-sites",0,0,'a'},
         {"homs-only",0,0,'H'},
@@ -753,8 +762,17 @@ int main_vcfgtcheck(int argc, char *argv[])
         {0,0,0,0}
     };
     char *tmp;
-    while ((c = getopt_long(argc, argv, "hg:p:s:S:Hr:R:at:T:G:",loptions,NULL)) >= 0) {
+    while ((c = getopt_long(argc, argv, "hg:p:s:S:Hr:R:at:T:G:c:",loptions,NULL)) >= 0) {
         switch (c) {
+            case 'c':
+                args->min_inter_err = strtod(optarg,&tmp);
+                if ( *tmp )
+                {
+                    if ( *tmp!=',') error("Could not parse: -c %s\n", optarg);
+                    args->max_intra_err = strtod(tmp+1,&tmp);
+                    if ( *tmp ) error("Could not parse: -c %s\n", optarg);
+                }
+                break;
             case 'G':
                 args->no_PLs = strtol(optarg,&tmp,10);
                 if ( *tmp ) error("Could not parse argument: --GTs-only %s\n", optarg);
diff --git a/bcftools/vcfgtcheck.c.pysam.c b/bcftools/vcfgtcheck.c.pysam.c
index 2f0a288..0bd6071 100644
--- a/bcftools/vcfgtcheck.c.pysam.c
+++ b/bcftools/vcfgtcheck.c.pysam.c
@@ -37,7 +37,9 @@ THE SOFTWARE.  */
 #include <htslib/vcf.h>
 #include <htslib/synced_bcf_reader.h>
 #include <htslib/vcfutils.h>
+#include <inttypes.h>
 #include "bcftools.h"
+#include "hclust.h"
 
 typedef struct
 {
@@ -45,10 +47,10 @@ typedef struct
     bcf_hdr_t *gt_hdr, *sm_hdr; // VCF with genotypes to compare against and the query VCF
     int ntmp_arr, npl_arr;
     int32_t *tmp_arr, *pl_arr;
-    double *lks, *sites;
+    double *lks, *sites, min_inter_err, max_intra_err;
     int *cnts, *dps, hom_only, cross_check, all_sites;
     char *cwd, **argv, *gt_fname, *plot, *query_sample, *target_sample;
-    int argc, no_PLs;
+    int argc, no_PLs, narr, nsmpl;
 }
 args_t;
 
@@ -135,6 +137,7 @@ static void plot_check(args_t *args, char *target_sample, char *query_sample)
     free(fname);
 }
 
+#if 0
 static void plot_cross_check(args_t *args)
 {
     char *fname;
@@ -216,6 +219,7 @@ static void plot_cross_check(args_t *args)
     py_plot(fname);
     free(fname);
 }
+#endif
 
 static void init_data(args_t *args)
 {
@@ -232,14 +236,6 @@ static void init_data(args_t *args)
         args->sites = (double*) calloc(nsamples,sizeof(double));
         args->dps   = (int*) calloc(nsamples,sizeof(int));
     }
-    else
-    {
-        int nsamples = bcf_hdr_nsamples(args->sm_hdr);
-        int narr = (nsamples-1)*nsamples/2;
-        args->lks  = (double*) calloc(narr,sizeof(double));
-        args->cnts = (int*) calloc(narr,sizeof(int));
-        args->dps  = (int*) calloc(narr,sizeof(int));
-    }
 }
 
 static void destroy_data(args_t *args)
@@ -526,177 +522,181 @@ static void check_gt(args_t *args)
     }
 }
 
-static inline int is_hom_most_likely(int nals, int *pls)
+// static inline int is_hom_most_likely(int nals, int *pls)
+// {
+//     int ia, ib, idx = 1, min_is_hom = 1, min_pl = pls[0];
+//     for (ia=1; ia<nals; ia++)
+//     {
+//         for (ib=0; ib<ia; ib++)
+//         {
+//             if ( pls[idx] < min_pl ) { min_pl = pls[idx]; min_is_hom = 0; }
+//             idx++;
+//         }
+//         if ( pls[idx] < min_pl ) { min_pl = pls[idx]; min_is_hom = 1; }
+//         idx++;
+//     }
+//     return min_is_hom;
+// }
+
+int process_GT(args_t *args, bcf1_t *line, uint32_t *ntot, uint32_t *ndif)
+{
+    int ngt = bcf_get_genotypes(args->sm_hdr, line, &args->tmp_arr, &args->ntmp_arr);
+
+    if ( ngt<=0 ) return 1;                 // GT not present
+    if ( ngt!=args->nsmpl*2 ) return 2;     // not diploid
+    ngt /= args->nsmpl;
+    
+    int i,j, idx = 0;
+    for (i=1; i<args->nsmpl; i++)
+    {
+        int32_t *a = args->tmp_arr + i*ngt;
+        if ( bcf_gt_is_missing(a[0]) || bcf_gt_is_missing(a[1]) || a[1]==bcf_int32_vector_end ) { idx+=i; continue; }
+        int agt = 1<<bcf_gt_allele(a[0]) | 1<<bcf_gt_allele(a[1]);
+
+        for (j=0; j<i; j++)
+        {
+            int32_t *b = args->tmp_arr + j*ngt;
+            if ( bcf_gt_is_missing(b[0]) || bcf_gt_is_missing(b[1]) || b[1]==bcf_int32_vector_end ) { idx++; continue; }
+            int bgt = 1<<bcf_gt_allele(b[0]) | 1<<bcf_gt_allele(b[1]);
+
+            ntot[idx]++;
+            if ( agt!=bgt ) ndif[idx]++;
+            idx++;
+        }
+    }
+    return 0;
+}
+int process_PL(args_t *args, bcf1_t *line, uint32_t *ntot, uint32_t *ndif)
 {
-    int ia, ib, idx = 1, min_is_hom = 1, min_pl = pls[0];
-    for (ia=1; ia<nals; ia++)
+    int npl = bcf_get_format_int32(args->sm_hdr, line, "PL", &args->tmp_arr, &args->ntmp_arr);
+
+    if ( npl<=0 ) return 1;                 // PL not present
+    npl /= args->nsmpl;
+    
+    int i,j,k, idx = 0;
+    for (i=1; i<args->nsmpl; i++)
     {
-        for (ib=0; ib<ia; ib++)
+        int32_t *a = args->tmp_arr + i*npl;
+        int imin = -1;
+        for (k=0; k<npl; k++)
+        {
+            if ( a[k]==bcf_int32_vector_end ) break;
+            if ( a[k]==bcf_int32_missing ) continue;
+            if ( imin==-1 || a[imin] > a[k] ) imin = k;
+        }
+        if ( imin<0 ) { idx+=i; continue; }
+
+        for (j=0; j<i; j++)
         {
-            if ( pls[idx] < min_pl ) { min_pl = pls[idx]; min_is_hom = 0; }
+            int32_t *b = args->tmp_arr + j*npl;
+            int jmin = -1;
+            for (k=0; k<npl; k++)
+            {
+                if ( b[k]==bcf_int32_vector_end ) break;
+                if ( b[k]==bcf_int32_missing ) continue;
+                if ( jmin==-1 || b[jmin] > b[k] ) jmin = k;
+            }
+            if ( jmin<0 ) { idx++; continue; }
+
+            ntot[idx]++;
+            if ( imin!=jmin ) ndif[idx]++;
             idx++;
         }
-        if ( pls[idx] < min_pl ) { min_pl = pls[idx]; min_is_hom = 1; }
-        idx++;
     }
-    return min_is_hom;
+    return 0;
 }
 
 static void cross_check_gts(args_t *args)
 {
-    int nsamples = bcf_hdr_nsamples(args->sm_hdr), ndp_arr = 0;
-    unsigned int *dp = (unsigned int*) calloc(nsamples,sizeof(unsigned int)), *ndp = (unsigned int*) calloc(nsamples,sizeof(unsigned int)); // this will overflow one day...
-    int fake_pls = args->no_PLs, ignore_dp = 0;
-
-    int i,j,k,idx, pl_warned = 0, dp_warned = 0;
-    int32_t *dp_arr = NULL;
-    int *is_hom = args->hom_only ? (int*) malloc(sizeof(int)*nsamples) : NULL;
+    // Initialize things: check which tags are defined in the header, sample names etc.
     if ( bcf_hdr_id2int(args->sm_hdr, BCF_DT_ID, "PL")<0 )
     {
         if ( bcf_hdr_id2int(args->sm_hdr, BCF_DT_ID, "GT")<0 )
             error("[E::%s] Neither PL nor GT present in the header of %s\n", __func__, args->files->readers[0].fname);
-        if ( !args->no_PLs )
+        if ( !args->no_PLs ) {
             fprintf(pysam_stderr,"Warning: PL not present in the header of %s, using GT instead\n", args->files->readers[0].fname);
-        fake_pls = 1;
+            args->no_PLs = 99;
+        }
     }
-    if ( bcf_hdr_id2int(args->sm_hdr, BCF_DT_ID, "DP")<0 ) ignore_dp = 1;
 
-    FILE *fp = args->plot ? open_file(NULL, "w", "%s.tab", args->plot) : pysam_stdout;
-    print_header(args, fp);
-    if ( args->all_sites ) fprintf(fp,"# [1]SD, Average Site Discordance\t[2]Chromosome\t[3]Position\t[4]Number of available pairs\t[5]Average discordance\n");
+    args->nsmpl = bcf_hdr_nsamples(args->sm_hdr);
+    args->narr  = (args->nsmpl-1)*args->nsmpl/2;
+
+    uint32_t *ndif = (uint32_t*) calloc(args->narr,4);
+    uint32_t *ntot = (uint32_t*) calloc(args->narr,4);
 
     while ( bcf_sr_next_line(args->files) )
     {
-        bcf1_t *line = args->files->readers[0].buffer[0];
-        bcf_unpack(line, BCF_UN_FMT);
-
-        int npl;
-        if ( !fake_pls )
-        {
-            npl = bcf_get_format_int32(args->sm_hdr, line, "PL", &args->pl_arr, &args->npl_arr);
-            if ( npl<=0 ) { pl_warned++; continue; }
-            npl /= nsamples;
-        }
-        else
-            npl = fake_PLs(args, args->sm_hdr, line);
-        int mdp = 0;
-        if ( !ignore_dp && (mdp=bcf_get_format_int32(args->sm_hdr, line, "DP", &dp_arr, &ndp_arr)) <= 0 ) dp_warned++;
+        bcf1_t *line = bcf_sr_get_line(args->files,0);
 
-        if ( args->hom_only )
+        // use PLs unless no_PLs is set and GT exists
+        if ( args->no_PLs )
         {
-            for (i=0; i<nsamples; i++)
-                is_hom[i] = is_hom_most_likely(line->n_allele, args->pl_arr+i*npl);
+            if ( process_GT(args,line,ntot,ndif)==0 ) continue;
         }
-
-        double sum = 0; int nsum = 0;
-        idx = 0;
-        for (i=0; i<nsamples; i++)
-        {
-            int *ipl = &args->pl_arr[i*npl];
-            if ( *ipl==-1 ) { idx += i; continue; } // missing genotype
-            if ( mdp>0 && (dp_arr[i]==bcf_int32_missing || !dp_arr[i]) ) { idx += i; continue; }
-            if ( args->hom_only && !is_hom[i] ) { idx += i; continue; }
-
-            for (j=0; j<i; j++)
-            {
-                int *jpl = &args->pl_arr[j*npl];
-                if ( *jpl==-1 ) { idx++; continue; } // missing genotype
-                if ( mdp>0 && (dp_arr[j]==bcf_int32_missing || !dp_arr[j]) ) { idx++; continue; }
-                if ( args->hom_only && !is_hom[j] ) { idx++; continue; }
-
-                int min_pl = INT_MAX;
-                for (k=0; k<npl; k++)
-                {
-                    if ( ipl[k]==bcf_int32_missing || jpl[k]==bcf_int32_missing ) break;
-                    if ( ipl[k]==bcf_int32_vector_end || jpl[k]==bcf_int32_vector_end ) { k = npl; break; }
-                    if ( min_pl > ipl[k]+jpl[k] ) min_pl = ipl[k]+jpl[k];
-                }
-                if ( k!=npl ) { idx++; continue; }
-
-                if ( args->all_sites ) { sum += min_pl; nsum++; }
-                args->lks[idx] += min_pl;
-                args->cnts[idx]++;
-
-                if ( mdp>0 )
-                {
-                    args->dps[idx] += dp_arr[i] < dp_arr[j] ? dp_arr[i] : dp_arr[j];
-                    dp[i] += dp_arr[i]; ndp[i]++;
-                    dp[j] += dp_arr[j]; ndp[j]++;
-                }
-                else
-                {
-                    args->dps[idx]++;
-                    dp[i]++; ndp[i]++;
-                    dp[j]++; ndp[j]++;
-                }
-                idx++;
-            }
-        }
-        if ( args->all_sites )
-            fprintf(fp,"SD\t%s\t%d\t%d\t%.0f\n", args->sm_hdr->id[BCF_DT_CTG][line->rid].key, line->pos+1, nsum, nsum?sum/nsum:0);
+        process_PL(args,line,ntot,ndif);
     }
-    if ( dp_arr ) free(dp_arr);
-    if ( args->pl_arr ) free(args->pl_arr);
-    if ( args->tmp_arr ) free(args->tmp_arr);
-    if ( is_hom ) free(is_hom);
+    
+    FILE *fp = pysam_stdout;
+    print_header(args, fp);
 
-    if ( pl_warned ) fprintf(pysam_stderr, "[W::%s] PL was not found at %d site(s)\n", __func__, pl_warned);
-    if ( dp_warned ) fprintf(pysam_stderr, "[W::%s] DP was not found at %d site(s)\n", __func__, dp_warned);
+    float *tmp = (float*)malloc(sizeof(float)*args->nsmpl*(args->nsmpl-1)/2);
 
-    // Output samples sorted by average discordance
-    double *score  = (double*) calloc(nsamples,sizeof(double));
-    args->sites = (double*) calloc(nsamples,sizeof(double));
-    idx = 0;
-    for (i=0; i<nsamples; i++)
+    // Output pairwise distances
+    fprintf(fp, "# ERR, error rate\t[2]Pairwise error rate\t[3]Number of sites compared\t[4]Sample i\t[5]Sample j\n");
+    int i,j, idx = 0;
+    for (i=0; i<args->nsmpl; i++)
     {
         for (j=0; j<i; j++)
         {
-            score[i] += args->lks[idx];
-            score[j] += args->lks[idx];
-            args->sites[i] += args->cnts[idx];
-            args->sites[j] += args->cnts[idx];
+            float err = ntot[idx] ? (float)ndif[idx]/ntot[idx] : 1e-10;
+            fprintf(fp, "ERR\t%f\t%"PRId32"\t%s\t%s\n", err, ntot[idx],args->sm_hdr->samples[i],args->sm_hdr->samples[j]);
+            PDIST(tmp,i,j) = err;
             idx++;
         }
     }
-    for (i=0; i<nsamples; i++)
-        if ( args->sites[i] ) score[i] /= args->sites[i];
-    double **p = (double**) malloc(sizeof(double*)*nsamples), avg_score = 0;
-    for (i=0; i<nsamples; i++) p[i] = &score[i];
-    qsort(p, nsamples, sizeof(int*), cmp_doubleptr);
-    // The average discordance gives the number of differing sites in % with -G1
-    fprintf(fp, "# [1]SM\t[2]Average Discordance\t[3]Average depth\t[4]Average number of sites\t[5]Sample\t[6]Sample ID\n");
-    for (i=0; i<nsamples; i++)
+
+    // Cluster samples
+    int nlist;
+    float clust_max_err = args->max_intra_err;
+    hclust_t *clust = hclust_init(args->nsmpl,tmp);
+    cluster_t *list = hclust_create_list(clust,args->min_inter_err,&clust_max_err,&nlist);
+    fprintf(fp, "# CLUSTER\t[2]Maximum inter-cluster ERR\t[3-]List of samples\n");
+    for (i=0; i<nlist; i++)
     {
-        idx = p[i] - score;
-        double adp = ndp[idx] ? (double)dp[idx]/ndp[idx] : 0;
-        double nsites = args->sites[idx]/(nsamples-1);
-        avg_score += score[idx];
-        fprintf(fp, "SM\t%f\t%.2lf\t%.0lf\t%s\t%d\n", score[idx]*100., adp, nsites, args->sm_hdr->samples[idx],i);
+        fprintf(fp,"CLUSTER\t%f", list[i].dist);
+        for (j=0; j<list[i].nmemb; j++)
+            fprintf(fp,"\t%s",args->sm_hdr->samples[list[i].memb[j]]);
+        fprintf(fp,"\n");
     }
-
-    //  // Overall score: maximum absolute deviation from the average score
-    //  fprintf(fp, "# [1] MD\t[2]Maximum deviation\t[3]The culprit\n");
-    //  fprintf(fp, "MD\t%f\t%s\n", (score[idx] - avg_score/nsamples)*100., args->sm_hdr->samples[idx]);    // idx still set
-    free(p);
-    free(score);
-    free(dp);
-    free(ndp);
-
-    // Pairwise discordances
+    hclust_destroy_list(list,nlist);
+    // Debugging output: the cluster graph and data used for deciding
+    char **dbg = hclust_explain(clust,&nlist);
+    for (i=0; i<nlist; i++)
+        fprintf(fp,"DBG\t%s\n", dbg[i]);
+    fprintf(fp, "# TH, clustering threshold\t[2]Value\nTH\t%f\n",clust_max_err);
+    fprintf(fp, "# DOT\t[2]Cluster graph, visualize e.g. as \"this-output.txt | grep ^DOT | cut -f2- | dot -Tsvg -o graph.svg\"\n");
+    fprintf(fp, "DOT\t%s\n", hclust_create_dot(clust,args->sm_hdr->samples,clust_max_err));
+    hclust_destroy(clust);
+    free(tmp);
+
+
+    // Deprecated output for temporary backward compatibility
+    fprintf(fp, "# Warning: The CN block is deprecated and will be removed in future releases. Use ERR instead.\n");
     fprintf(fp, "# [1]CN\t[2]Discordance\t[3]Number of sites\t[4]Average minimum depth\t[5]Sample i\t[6]Sample j\n");
     idx = 0;
-    for (i=0; i<nsamples; i++)
+    for (i=0; i<args->nsmpl; i++)
     {
         for (j=0; j<i; j++)
         {
-            fprintf(fp, "CN\t%.0f\t%d\t%.2f\t%s\t%s\n", args->lks[idx], args->cnts[idx], args->cnts[idx]?(double)args->dps[idx]/args->cnts[idx]:0.0,
-                    args->sm_hdr->samples[i],args->sm_hdr->samples[j]);
+            fprintf(fp, "CN\t%"PRId32"\t%"PRId32"\t0\t%s\t%s\n", ndif[idx], ntot[idx],args->sm_hdr->samples[i],args->sm_hdr->samples[j]);
             idx++;
         }
     }
-    fclose(fp);
-    if ( args->plot )
-        plot_cross_check(args);
+
+    free(ndif);
+    free(ntot);
+    free(args->tmp_arr);
 }
 
 static char *init_prefix(char *prefix)
@@ -715,6 +715,7 @@ static void usage(void)
     fprintf(pysam_stderr, "\n");
     fprintf(pysam_stderr, "Options:\n");
     fprintf(pysam_stderr, "    -a, --all-sites                 output comparison for all sites\n");
+    fprintf(pysam_stderr, "    -c, --cluster <min,max>         min inter- and max intra-sample error [0.23,-0.3]\n");
     fprintf(pysam_stderr, "    -g, --genotypes <file>          genotypes to compare against\n");
     fprintf(pysam_stderr, "    -G, --GTs-only <int>            use GTs, ignore PLs, using <int> for unseen genotypes [99]\n");
     fprintf(pysam_stderr, "    -H, --homs-only                 homozygous genotypes only (useful for low coverage data)\n");
@@ -738,8 +739,16 @@ int main_vcfgtcheck(int argc, char *argv[])
     char *regions = NULL, *targets = NULL;
     int regions_is_file = 0, targets_is_file = 0;
 
+    // In simulated sample swaps the minimum error was 0.3 and maximum intra-sample error was 0.23
+    //    - min_inter: pairs with smaller err value will be considered identical 
+    //    - max_intra: pairs with err value bigger than abs(max_intra_err) will be considered
+    //                  different. If negative, the cutoff may be heuristically lowered
+    args->min_inter_err =  0.23;
+    args->max_intra_err = -0.3;
+
     static struct option loptions[] =
     {
+        {"cluster",1,0,'c'},
         {"GTs-only",1,0,'G'},
         {"all-sites",0,0,'a'},
         {"homs-only",0,0,'H'},
@@ -755,8 +764,17 @@ int main_vcfgtcheck(int argc, char *argv[])
         {0,0,0,0}
     };
     char *tmp;
-    while ((c = getopt_long(argc, argv, "hg:p:s:S:Hr:R:at:T:G:",loptions,NULL)) >= 0) {
+    while ((c = getopt_long(argc, argv, "hg:p:s:S:Hr:R:at:T:G:c:",loptions,NULL)) >= 0) {
         switch (c) {
+            case 'c':
+                args->min_inter_err = strtod(optarg,&tmp);
+                if ( *tmp )
+                {
+                    if ( *tmp!=',') error("Could not parse: -c %s\n", optarg);
+                    args->max_intra_err = strtod(tmp+1,&tmp);
+                    if ( *tmp ) error("Could not parse: -c %s\n", optarg);
+                }
+                break;
             case 'G':
                 args->no_PLs = strtol(optarg,&tmp,10);
                 if ( *tmp ) error("Could not parse argument: --GTs-only %s\n", optarg);
diff --git a/bcftools/vcfindex.c b/bcftools/vcfindex.c
index d1e9179..aa60fb2 100644
--- a/bcftools/vcfindex.c
+++ b/bcftools/vcfindex.c
@@ -1,4 +1,3 @@
-
 /*  vcfindex.c -- Index bgzip compressed VCF/BCF files for random access.
 
     Copyright (C) 2014-2016 Genome Research Ltd.
@@ -32,6 +31,7 @@ DEALINGS IN THE SOFTWARE.  */
 #include <sys/stat.h>
 #define __STDC_FORMAT_MACROS
 #include <inttypes.h>
+#include <htslib/kstring.h>
 #include "bcftools.h"
 
 #define BCF_LIDX_SHIFT    14
@@ -43,24 +43,22 @@ static void usage(void)
     fprintf(stderr, "Usage:   bcftools index [options] <in.bcf>|<in.vcf.gz>\n");
     fprintf(stderr, "\n");
     fprintf(stderr, "Indexing options:\n");
-    fprintf(stderr, "    -c, --csi            generate CSI-format index for VCF/BCF files [default]\n");
-    fprintf(stderr, "    -f, --force          overwrite index if it already exists\n");
-    fprintf(stderr, "    -m, --min-shift INT  set minimal interval size for CSI indices to 2^INT [14]\n");
-    fprintf(stderr, "    -t, --tbi            generate TBI-format index for VCF files\n");
+    fprintf(stderr, "    -c, --csi                generate CSI-format index for VCF/BCF files [default]\n");
+    fprintf(stderr, "    -f, --force              overwrite index if it already exists\n");
+    fprintf(stderr, "    -m, --min-shift INT      set minimal interval size for CSI indices to 2^INT [14]\n");
+    fprintf(stderr, "    -o, --output-file FILE   optional output index file name\n");
+    fprintf(stderr, "    -t, --tbi                generate TBI-format index for VCF files\n");
+    fprintf(stderr, "        --threads            sets the number of threads [0]\n");
     fprintf(stderr, "\n");
     fprintf(stderr, "Stats options:\n");
     fprintf(stderr, "    -n, --nrecords       print number of records based on existing index file\n");
-    fprintf(stderr, "    -s, --stats   print per contig stats based on existing index file\n");
+    fprintf(stderr, "    -s, --stats          print per contig stats based on existing index file\n");
     fprintf(stderr, "\n");
     exit(1);
 }
 
 int vcf_index_stats(char *fname, int stats)
 {
-    char *fn_out = NULL;
-    FILE *out;
-    out = fn_out ? fopen(fn_out, "w") : stdout;
-
     const char **seq;
     int i, nseq;
     tbx_t *tbx = NULL;
@@ -74,12 +72,12 @@ int vcf_index_stats(char *fname, int stats)
     if ( hts_get_format(fp)->format==vcf )
     {
         tbx = tbx_index_load(fname);
-        if ( !tbx ) { fprintf(stderr,"Could not load TBI index: %s\n", fname); return 1; }
+        if ( !tbx ) { fprintf(stderr,"Could not load index for VCF: %s\n", fname); return 1; }
     }
     else if ( hts_get_format(fp)->format==bcf )
     {
         idx = bcf_index_load(fname);
-        if ( !idx ) { fprintf(stderr,"Could not load CSI index: %s\n", fname); return 1; }
+        if ( !idx ) { fprintf(stderr,"Could not load index for BCF file: %s\n", fname); return 1; }
     }
     else
     {
@@ -97,7 +95,7 @@ int vcf_index_stats(char *fname, int stats)
         if (stats&2 || !records) continue;
         bcf_hrec_t *hrec = bcf_hdr_get_hrec(hdr, BCF_HL_CTG, "ID", seq[i], NULL);
         int hkey = hrec ? bcf_hrec_find_key(hrec, "length") : -1;
-        fprintf(out,"%s\t%s\t%" PRIu64 "\n", seq[i], hkey<0?".":hrec->vals[hkey], records);
+        printf("%s\t%s\t%" PRIu64 "\n", seq[i], hkey<0?".":hrec->vals[hkey], records);
     }
     if (!sum)
     {
@@ -106,14 +104,13 @@ int vcf_index_stats(char *fname, int stats)
         bcf1_t *rec = bcf_init1();
         if (bcf_read1(fp, hdr, rec) >= 0)
         {
-            fprintf(stderr,"%s index of %s does not contain any count metadata. Please re-index with a newer version of bcftools or tabix.\n", tbx ? "TBI" : "CSI", fname);
+            fprintf(stderr,"index of %s does not contain any count metadata. Please re-index with a newer version of bcftools or tabix.\n", fname);
             return 1;
         }
         bcf_destroy1(rec);
     }
-    if (stats&2) fprintf(out, "%" PRIu64 "\n", sum);
+    if (stats&2) printf("%" PRIu64 "\n", sum);
     free(seq);
-    fclose(out);
     hts_close(fp);
     bcf_hdr_destroy(hdr);
     if (tbx)
@@ -125,8 +122,9 @@ int vcf_index_stats(char *fname, int stats)
 
 int main_vcfindex(int argc, char *argv[])
 {
-    int c, force = 0, tbi = 0, stats = 0;
+    int c, force = 0, tbi = 0, stats = 0, n_threads = 0;
     int min_shift = BCF_LIDX_SHIFT;
+    char *outfn = NULL;
 
     static struct option loptions[] =
     {
@@ -136,27 +134,33 @@ int main_vcfindex(int argc, char *argv[])
         {"min-shift",required_argument,NULL,'m'},
         {"stats",no_argument,NULL,'s'},
         {"nrecords",no_argument,NULL,'n'},
+        {"threads",required_argument,NULL,9},
+        {"output-file",required_argument,NULL,'o'},
         {NULL, 0, NULL, 0}
     };
 
     char *tmp;
-    while ((c = getopt_long(argc, argv, "ctfm:sn", loptions, NULL)) >= 0)
+    while ((c = getopt_long(argc, argv, "ctfm:sno:", loptions, NULL)) >= 0)
     {
         switch (c)
         {
             case 'c': tbi = 0; break;
             case 't': tbi = 1; min_shift = 0; break;
             case 'f': force = 1; break;
-            case 'm': 
+            case 'm':
                 min_shift = strtol(optarg,&tmp,10);
                 if ( *tmp ) error("Could not parse argument: --min-shift %s\n", optarg);
                 break;
             case 's': stats |= 1; break;
             case 'n': stats |= 2; break;
+            case 9:
+                n_threads = strtol(optarg,&tmp,10);
+                if ( *tmp ) error("Could not parse argument: --threads %s\n", optarg);
+                break;
+            case 'o': outfn = optarg; break;
             default: usage();
         }
     }
-    if ( optind==argc ) usage();
     if (stats>2)
     {
         fprintf(stderr, "[E::%s] expected only one of --stats or --nrecords options\n", __func__);
@@ -173,69 +177,48 @@ int main_vcfindex(int argc, char *argv[])
         return 1;
     }
 
-    char *fname = argv[optind];
-    if (stats) return vcf_index_stats(fname, stats);
-
-    htsFile *fp = hts_open(fname,"r"); 
-    if ( !fp ) error("Failed to read %s\n", fname);
-    htsFormat type = *hts_get_format(fp);
-    hts_close(fp);
-
-    if ( (type.format!=bcf && type.format!=vcf) || type.compression!=bgzf )
+    char *fname = NULL;
+    if ( optind>=argc )
     {
-        fprintf(stderr, "[E::%s] unknown filetype; expected bgzip compressed VCF or BCF\n", __func__);
-        if ( type.compression!=bgzf )
-            fprintf(stderr, "[E::%s] was the VCF/BCF compressed with bgzip?\n", __func__);
-        return 1;
-    }
-    if (tbi && type.format==bcf)
-    {
-        fprintf(stderr, "[Warning] TBI-index does not work for BCF files. Generating CSI instead.\n");
-        tbi = 0; min_shift = BCF_LIDX_SHIFT;
+        if ( !isatty(fileno((FILE *)stdin)) ) fname = "-";  // reading from stdin
+        else usage();
     }
-    if (min_shift == 0 && type.format==bcf)
-    {
-        fprintf(stderr, "[E::%s] Require min_shift>0 for BCF files.\n", __func__);
-        return 1;
-    }
-    if (!tbi && type.format==vcf && min_shift == 0)
+    else fname = argv[optind];
+    if (stats) return vcf_index_stats(fname, stats);
+
+    kstring_t idx_fname = {0,0,0};
+    if (outfn)
+        kputs(outfn,&idx_fname);
+    else
     {
-        fprintf(stderr, "[Warning] min-shift set to 0 for VCF file. Generating TBI file.\n");
-        tbi = 1;
+        if (!strcmp(fname, "-")) { fprintf(stderr, "[E::%s] must specify an output path for index file when reading VCF/BCF from stdin\n", __func__); return 1; }
+        ksprintf(&idx_fname, "%s.%s", fname, tbi ? "tbi" : "csi");
     }
-
     if (!force)
     {
         // Before complaining about existing index, check if the VCF file isn't newer.
-        char *idx_fname = (char*)alloca(strlen(fname) + 5);
-        strcat(strcpy(idx_fname, fname), tbi ? ".tbi" : ".csi");
         struct stat stat_tbi, stat_file;
-        if ( stat(idx_fname, &stat_tbi)==0 )
+        if ( stat(idx_fname.s, &stat_tbi)==0 )
         {
             stat(fname, &stat_file);
             if ( stat_file.st_mtime <= stat_tbi.st_mtime )
             {
-                fprintf(stderr,"[E::%s] the index file exists. Please use '-f' to overwrite.\n", __func__);
+                fprintf(stderr,"[E::%s] the index file exists. Please use '-f' to overwrite %s\n", __func__, idx_fname.s);
+                free(idx_fname.s);
                 return 1;
             }
         }
     }
 
-    if (type.format==bcf)
-    {
-        if ( bcf_index_build(fname, min_shift) != 0 )
-        {
-            fprintf(stderr,"[E::%s] bcf_index_build failed for %s\n", __func__, fname);
-            return 1;
-        }
-    }
-    else
-    {
-        if ( tbx_index_build(fname, min_shift, &tbx_conf_vcf) != 0 )
-        {
-            fprintf(stderr,"[E::%s] tbx_index_build failed for %s\n", __func__, fname);
-            return 1;
-        }
+    int ret = bcf_index_build3(fname, idx_fname.s, min_shift, n_threads);
+    free(idx_fname.s);
+    if (ret != 0) {
+        if (ret == -2)
+            error("index: failed to open \"%s\"\n", fname);
+        else if (ret == -3)
+            error("index: \"%s\" is in a format that cannot be usefully indexed\n", fname);
+        else
+            error("index: failed to create index for \"%s\"\n", fname);
     }
     return 0;
 }
diff --git a/bcftools/vcfindex.c.pysam.c b/bcftools/vcfindex.c.pysam.c
index 479fc57..ff960b9 100644
--- a/bcftools/vcfindex.c.pysam.c
+++ b/bcftools/vcfindex.c.pysam.c
@@ -1,6 +1,5 @@
 #include "pysam.h"
 
-
 /*  vcfindex.c -- Index bgzip compressed VCF/BCF files for random access.
 
     Copyright (C) 2014-2016 Genome Research Ltd.
@@ -34,6 +33,7 @@ DEALINGS IN THE SOFTWARE.  */
 #include <sys/stat.h>
 #define __STDC_FORMAT_MACROS
 #include <inttypes.h>
+#include <htslib/kstring.h>
 #include "bcftools.h"
 
 #define BCF_LIDX_SHIFT    14
@@ -45,24 +45,22 @@ static void usage(void)
     fprintf(pysam_stderr, "Usage:   bcftools index [options] <in.bcf>|<in.vcf.gz>\n");
     fprintf(pysam_stderr, "\n");
     fprintf(pysam_stderr, "Indexing options:\n");
-    fprintf(pysam_stderr, "    -c, --csi            generate CSI-format index for VCF/BCF files [default]\n");
-    fprintf(pysam_stderr, "    -f, --force          overwrite index if it already exists\n");
-    fprintf(pysam_stderr, "    -m, --min-shift INT  set minimal interval size for CSI indices to 2^INT [14]\n");
-    fprintf(pysam_stderr, "    -t, --tbi            generate TBI-format index for VCF files\n");
+    fprintf(pysam_stderr, "    -c, --csi                generate CSI-format index for VCF/BCF files [default]\n");
+    fprintf(pysam_stderr, "    -f, --force              overwrite index if it already exists\n");
+    fprintf(pysam_stderr, "    -m, --min-shift INT      set minimal interval size for CSI indices to 2^INT [14]\n");
+    fprintf(pysam_stderr, "    -o, --output-file FILE   optional output index file name\n");
+    fprintf(pysam_stderr, "    -t, --tbi                generate TBI-format index for VCF files\n");
+    fprintf(pysam_stderr, "        --threads            sets the number of threads [0]\n");
     fprintf(pysam_stderr, "\n");
     fprintf(pysam_stderr, "Stats options:\n");
     fprintf(pysam_stderr, "    -n, --nrecords       print number of records based on existing index file\n");
-    fprintf(pysam_stderr, "    -s, --stats   print per contig stats based on existing index file\n");
+    fprintf(pysam_stderr, "    -s, --stats          print per contig stats based on existing index file\n");
     fprintf(pysam_stderr, "\n");
     exit(1);
 }
 
 int vcf_index_stats(char *fname, int stats)
 {
-    char *fn_out = NULL;
-    FILE *out;
-    out = fn_out ? fopen(fn_out, "w") : pysam_stdout;
-
     const char **seq;
     int i, nseq;
     tbx_t *tbx = NULL;
@@ -76,12 +74,12 @@ int vcf_index_stats(char *fname, int stats)
     if ( hts_get_format(fp)->format==vcf )
     {
         tbx = tbx_index_load(fname);
-        if ( !tbx ) { fprintf(pysam_stderr,"Could not load TBI index: %s\n", fname); return 1; }
+        if ( !tbx ) { fprintf(pysam_stderr,"Could not load index for VCF: %s\n", fname); return 1; }
     }
     else if ( hts_get_format(fp)->format==bcf )
     {
         idx = bcf_index_load(fname);
-        if ( !idx ) { fprintf(pysam_stderr,"Could not load CSI index: %s\n", fname); return 1; }
+        if ( !idx ) { fprintf(pysam_stderr,"Could not load index for BCF file: %s\n", fname); return 1; }
     }
     else
     {
@@ -99,7 +97,7 @@ int vcf_index_stats(char *fname, int stats)
         if (stats&2 || !records) continue;
         bcf_hrec_t *hrec = bcf_hdr_get_hrec(hdr, BCF_HL_CTG, "ID", seq[i], NULL);
         int hkey = hrec ? bcf_hrec_find_key(hrec, "length") : -1;
-        fprintf(out,"%s\t%s\t%" PRIu64 "\n", seq[i], hkey<0?".":hrec->vals[hkey], records);
+        fprintf(pysam_stdout, "%s\t%s\t%" PRIu64 "\n", seq[i], hkey<0?".":hrec->vals[hkey], records);
     }
     if (!sum)
     {
@@ -108,14 +106,13 @@ int vcf_index_stats(char *fname, int stats)
         bcf1_t *rec = bcf_init1();
         if (bcf_read1(fp, hdr, rec) >= 0)
         {
-            fprintf(pysam_stderr,"%s index of %s does not contain any count metadata. Please re-index with a newer version of bcftools or tabix.\n", tbx ? "TBI" : "CSI", fname);
+            fprintf(pysam_stderr,"index of %s does not contain any count metadata. Please re-index with a newer version of bcftools or tabix.\n", fname);
             return 1;
         }
         bcf_destroy1(rec);
     }
-    if (stats&2) fprintf(out, "%" PRIu64 "\n", sum);
+    if (stats&2) fprintf(pysam_stdout, "%" PRIu64 "\n", sum);
     free(seq);
-    fclose(out);
     hts_close(fp);
     bcf_hdr_destroy(hdr);
     if (tbx)
@@ -127,8 +124,9 @@ int vcf_index_stats(char *fname, int stats)
 
 int main_vcfindex(int argc, char *argv[])
 {
-    int c, force = 0, tbi = 0, stats = 0;
+    int c, force = 0, tbi = 0, stats = 0, n_threads = 0;
     int min_shift = BCF_LIDX_SHIFT;
+    char *outfn = NULL;
 
     static struct option loptions[] =
     {
@@ -138,27 +136,33 @@ int main_vcfindex(int argc, char *argv[])
         {"min-shift",required_argument,NULL,'m'},
         {"stats",no_argument,NULL,'s'},
         {"nrecords",no_argument,NULL,'n'},
+        {"threads",required_argument,NULL,9},
+        {"output-file",required_argument,NULL,'o'},
         {NULL, 0, NULL, 0}
     };
 
     char *tmp;
-    while ((c = getopt_long(argc, argv, "ctfm:sn", loptions, NULL)) >= 0)
+    while ((c = getopt_long(argc, argv, "ctfm:sno:", loptions, NULL)) >= 0)
     {
         switch (c)
         {
             case 'c': tbi = 0; break;
             case 't': tbi = 1; min_shift = 0; break;
             case 'f': force = 1; break;
-            case 'm': 
+            case 'm':
                 min_shift = strtol(optarg,&tmp,10);
                 if ( *tmp ) error("Could not parse argument: --min-shift %s\n", optarg);
                 break;
             case 's': stats |= 1; break;
             case 'n': stats |= 2; break;
+            case 9:
+                n_threads = strtol(optarg,&tmp,10);
+                if ( *tmp ) error("Could not parse argument: --threads %s\n", optarg);
+                break;
+            case 'o': outfn = optarg; break;
             default: usage();
         }
     }
-    if ( optind==argc ) usage();
     if (stats>2)
     {
         fprintf(pysam_stderr, "[E::%s] expected only one of --stats or --nrecords options\n", __func__);
@@ -175,69 +179,48 @@ int main_vcfindex(int argc, char *argv[])
         return 1;
     }
 
-    char *fname = argv[optind];
-    if (stats) return vcf_index_stats(fname, stats);
-
-    htsFile *fp = hts_open(fname,"r"); 
-    if ( !fp ) error("Failed to read %s\n", fname);
-    htsFormat type = *hts_get_format(fp);
-    hts_close(fp);
-
-    if ( (type.format!=bcf && type.format!=vcf) || type.compression!=bgzf )
+    char *fname = NULL;
+    if ( optind>=argc )
     {
-        fprintf(pysam_stderr, "[E::%s] unknown filetype; expected bgzip compressed VCF or BCF\n", __func__);
-        if ( type.compression!=bgzf )
-            fprintf(pysam_stderr, "[E::%s] was the VCF/BCF compressed with bgzip?\n", __func__);
-        return 1;
-    }
-    if (tbi && type.format==bcf)
-    {
-        fprintf(pysam_stderr, "[Warning] TBI-index does not work for BCF files. Generating CSI instead.\n");
-        tbi = 0; min_shift = BCF_LIDX_SHIFT;
+        if ( !isatty(fileno((FILE *)stdin)) ) fname = "-";  // reading from stdin
+        else usage();
     }
-    if (min_shift == 0 && type.format==bcf)
-    {
-        fprintf(pysam_stderr, "[E::%s] Require min_shift>0 for BCF files.\n", __func__);
-        return 1;
-    }
-    if (!tbi && type.format==vcf && min_shift == 0)
+    else fname = argv[optind];
+    if (stats) return vcf_index_stats(fname, stats);
+
+    kstring_t idx_fname = {0,0,0};
+    if (outfn)
+        kputs(outfn,&idx_fname);
+    else
     {
-        fprintf(pysam_stderr, "[Warning] min-shift set to 0 for VCF file. Generating TBI file.\n");
-        tbi = 1;
+        if (!strcmp(fname, "-")) { fprintf(pysam_stderr, "[E::%s] must specify an output path for index file when reading VCF/BCF from stdin\n", __func__); return 1; }
+        ksprintf(&idx_fname, "%s.%s", fname, tbi ? "tbi" : "csi");
     }
-
     if (!force)
     {
         // Before complaining about existing index, check if the VCF file isn't newer.
-        char *idx_fname = (char*)alloca(strlen(fname) + 5);
-        strcat(strcpy(idx_fname, fname), tbi ? ".tbi" : ".csi");
         struct stat stat_tbi, stat_file;
-        if ( stat(idx_fname, &stat_tbi)==0 )
+        if ( stat(idx_fname.s, &stat_tbi)==0 )
         {
             stat(fname, &stat_file);
             if ( stat_file.st_mtime <= stat_tbi.st_mtime )
             {
-                fprintf(pysam_stderr,"[E::%s] the index file exists. Please use '-f' to overwrite.\n", __func__);
+                fprintf(pysam_stderr,"[E::%s] the index file exists. Please use '-f' to overwrite %s\n", __func__, idx_fname.s);
+                free(idx_fname.s);
                 return 1;
             }
         }
     }
 
-    if (type.format==bcf)
-    {
-        if ( bcf_index_build(fname, min_shift) != 0 )
-        {
-            fprintf(pysam_stderr,"[E::%s] bcf_index_build failed for %s\n", __func__, fname);
-            return 1;
-        }
-    }
-    else
-    {
-        if ( tbx_index_build(fname, min_shift, &tbx_conf_vcf) != 0 )
-        {
-            fprintf(pysam_stderr,"[E::%s] tbx_index_build failed for %s\n", __func__, fname);
-            return 1;
-        }
+    int ret = bcf_index_build3(fname, idx_fname.s, min_shift, n_threads);
+    free(idx_fname.s);
+    if (ret != 0) {
+        if (ret == -2)
+            error("index: failed to open \"%s\"\n", fname);
+        else if (ret == -3)
+            error("index: \"%s\" is in a format that cannot be usefully indexed\n", fname);
+        else
+            error("index: failed to create index for \"%s\"\n", fname);
     }
     return 0;
 }
diff --git a/bcftools/vcfmerge.c b/bcftools/vcfmerge.c
index 02fac6b..1aeb739 100644
--- a/bcftools/vcfmerge.c
+++ b/bcftools/vcfmerge.c
@@ -1,6 +1,6 @@
 /*  vcfmerge.c -- Merge multiple VCF/BCF files to create one multi-sample file.
 
-    Copyright (C) 2012-2014 Genome Research Ltd.
+    Copyright (C) 2012-2016 Genome Research Ltd.
 
     Author: Petr Danecek <pd3 at sanger.ac.uk>
 
@@ -24,28 +24,39 @@ THE SOFTWARE.  */
 
 #include <stdio.h>
 #include <string.h>
+#include <strings.h>
 #include <errno.h>
 #include <unistd.h>
 #include <getopt.h>
 #include <htslib/vcf.h>
 #include <htslib/synced_bcf_reader.h>
 #include <htslib/vcfutils.h>
+#include <htslib/faidx.h>
 #include <math.h>
 #include <ctype.h>
+#include <time.h>
 #include "bcftools.h"
+#include "regidx.h"
 #include "vcmp.h"
 
+#define DBG 0
+
 #include <htslib/khash.h>
 KHASH_MAP_INIT_STR(strdict, int)
 typedef khash_t(strdict) strdict_t;
 
-#define SKIP_DONE 1
-#define SKIP_DIFF 2
+#define FLT_LOGIC_ADD    0
+#define FLT_LOGIC_REMOVE 1
+
+#define SKIP_DONE 1     // the record was processed
+#define SKIP_DIFF 2     // not compatible, merge later
 
 #define IS_VL_G(hdr,id) (bcf_hdr_id2length(hdr,BCF_HL_FMT,id) == BCF_VL_G)
 #define IS_VL_A(hdr,id) (bcf_hdr_id2length(hdr,BCF_HL_FMT,id) == BCF_VL_A)
 #define IS_VL_R(hdr,id) (bcf_hdr_id2length(hdr,BCF_HL_FMT,id) == BCF_VL_R)
 
+#define SWAP(type_t,a,b) { type_t tmp = (a); (a) = (b); (b) = tmp; }
+
 // For merging INFO Number=A,G,R tags
 typedef struct
 {
@@ -63,43 +74,61 @@ typedef struct _info_rule_t
     void (*merger)(bcf_hdr_t *hdr, bcf1_t *line, struct _info_rule_t *rule);
     int type;           // one of BCF_HT_*
     int block_size;     // number of values in a block
+    int type_size;      // size of the corresponding BCF_HT_* type
     int nblocks;        // number of blocks in nvals (the number of merged files)
     int nvals, mvals;   // used and total size of vals array
     void *vals;         // the info tag values
 }
 info_rule_t;
 
+typedef struct
+{
+    bcf1_t *line;
+    int end, active;
+}
+gvcf_aux_t;
+
 // Auxiliary merge data for selecting the right combination
 //  of buffered records across multiple readers. maux1_t
 //  corresponds to one buffered line.
 typedef struct
 {
     int skip;
-    int *map;   // mapping from input alleles to the output array
+    int *map;   // mapping from input alleles to the array of output alleles (set by merge_alleles)
     int mmap;   // size of map array (only buffer[i].n_allele is actually used)
     int als_differ;
 }
 maux1_t;
 typedef struct
 {
-    int n;  // number of readers
+    int rid;        // current rid
+    int beg,end;    // valid ranges in reader's buffer [beg,end). Maintained by maux_reset and gvcf_flush.
+    int cur;        // current line or -1 if none
+    int npos;       // number of unprocessed lines at this position
+    int mrec;       // allocated size of buf
+    maux1_t *rec;   // buffer to keep reader's lines
+    bcf1_t **lines; // source buffer: either gvcf or readers' buffer
+}
+buffer_t;
+typedef struct
+{
+    int n, pos, var_types;  // number of readers, current position, currently available variant types
+    char *chr;              // current chromosome
     char **als, **out_als;  // merged alleles (temp, may contain empty records) and merged alleles ready for output
     int nals, mals, nout_als, mout_als; // size of the output array
     int *cnt, ncnt; // number of records that refer to the alleles
-    int *nbuf;      // readers have buffers of varying lengths
     int *smpl_ploidy, *smpl_nGsize; // ploidy and derived number of values in Number=G tags, updated for each line (todo: cache for missing cases)
-    int *flt, mflt, minf;
-    bcf_info_t *inf;// out_line's INFO fields
     bcf_fmt_t **fmt_map; // i-th output FORMAT field corresponds in j-th reader to i*nreader+j, first row is reserved for GT
     int nfmt_map;        // number of rows in the fmt_map array
     int *agr_map, nagr_map, magr_map;   // mapping between Number=AGR element indexes
     void *tmp_arr;
     int ntmp_arr;
-    maux1_t **d;    // d[i][j] i-th reader, j-th buffer line
+    buffer_t *buf;
     AGR_info_t *AGR_info;
     int nAGR_info, mAGR_info;
     bcf_srs_t *files;
-    int *has_line;  // which files are being merged
+    int gvcf_min, gvcf_break;   // min buffered gvcf END position (NB: gvcf_min is 1-based) or 0 if no active lines are present
+    gvcf_aux_t *gvcf;           // buffer of gVCF lines
 }
 maux_t;
 
@@ -107,8 +136,11 @@ typedef struct
 {
     vcmp_t *vcmp;
     maux_t *maux;
-    int header_only, collapse, output_type, force_samples, merge_by_id;
+    regidx_t *regs;    // apply regions only after the blocks are expanded
+    regitr_t *regs_itr;
+    int header_only, collapse, output_type, force_samples, merge_by_id, do_gvcf, filter_logic, missing_to_ref;
     char *header_fname, *output_fname, *regions_list, *info_rules, *file_list;
+    faidx_t *gvcf_fai;
     info_rule_t *rules;
     int nrules;
     strdict_t *tmph;
@@ -122,6 +154,14 @@ typedef struct
 }
 args_t;
 
+static bcf1_t *maux_get_line(args_t *args, int i)
+{
+    maux_t *ma = args->maux;
+    int ibuf = ma->buf[i].cur;
+    if ( ibuf >= 0 ) return ma->buf[i].lines[ibuf];
+    return NULL;
+}
+
 static void info_rules_merge_sum(bcf_hdr_t *hdr, bcf1_t *line, info_rule_t *rule)
 {
     if ( !rule->nvals ) return;
@@ -247,6 +287,32 @@ static void info_rules_init(args_t *args)
             if ( str.l ) kputc(',',&str);
             kputs("DP4:sum",&str);
         }
+        if ( args->do_gvcf && bcf_hdr_idinfo_exists(args->out_hdr,BCF_HL_INFO,bcf_hdr_id2int(args->out_hdr, BCF_DT_ID, "QS")) )
+        {
+            if ( str.l ) kputc(',',&str);
+            kputs("QS:sum",&str);
+        }
+        if ( args->do_gvcf && bcf_hdr_idinfo_exists(args->out_hdr,BCF_HL_INFO,bcf_hdr_id2int(args->out_hdr, BCF_DT_ID, "MinDP")) )
+        {
+            if ( str.l ) kputc(',',&str);
+            kputs("MinDP:min",&str);
+        }
+        if ( args->do_gvcf && bcf_hdr_idinfo_exists(args->out_hdr,BCF_HL_INFO,bcf_hdr_id2int(args->out_hdr, BCF_DT_ID, "I16")) )
+        {
+            if ( str.l ) kputc(',',&str);
+            kputs("I16:sum",&str);
+        }
+        if ( args->do_gvcf && bcf_hdr_idinfo_exists(args->out_hdr,BCF_HL_INFO,bcf_hdr_id2int(args->out_hdr, BCF_DT_ID, "IDV")) )
+        {
+            if ( str.l ) kputc(',',&str);
+            kputs("IDV:max",&str);
+        }
+        if ( args->do_gvcf && bcf_hdr_idinfo_exists(args->out_hdr,BCF_HL_INFO,bcf_hdr_id2int(args->out_hdr, BCF_DT_ID, "IMF")) )
+        {
+            if ( str.l ) kputc(',',&str);
+            kputs("IMF:max",&str);
+        }
+
         if ( !str.l ) return;
         args->info_rules = str.s;
     }
@@ -272,9 +338,12 @@ static void info_rules_init(args_t *args)
         int id = bcf_hdr_id2int(args->out_hdr, BCF_DT_ID, rule->hdr_tag);
         if ( !bcf_hdr_idinfo_exists(args->out_hdr,BCF_HL_INFO,id) ) error("The tag is not defined in the header: \"%s\"\n", rule->hdr_tag);
         rule->type = bcf_hdr_id2type(args->out_hdr,BCF_HL_INFO,id);
-        if ( rule->type!=BCF_HT_INT && rule->type!=BCF_HT_REAL && rule->type!=BCF_HT_STR ) error("The type is not supported: \"%s\"\n", rule->hdr_tag);
+        if ( rule->type==BCF_HT_INT ) rule->type_size = sizeof(int32_t);
+        else if ( rule->type==BCF_HT_REAL ) rule->type_size = sizeof(float);
+        else if ( rule->type==BCF_HT_STR ) rule->type_size = sizeof(char); 
+        else error("The type is not supported: \"%s\"\n", rule->hdr_tag);
 
-        while ( *ss ) ss++; ss++;
+        ss = strchr(ss, '\0'); ss++;
         if ( !*ss ) error("Could not parse INFO rules, missing logic of \"%s\"\n", rule->hdr_tag);
 
         int is_join = 0;
@@ -300,7 +369,8 @@ static void info_rules_init(args_t *args)
                 error("Only fixed-length vectors are supported with -i %s:%s\n", ss, rule->hdr_tag);
         }
 
-        while ( *ss ) ss++; ss++; n++;
+        ss = strchr(ss, '\0'); ss++;
+        n++;
     }
     free(str.s);
     free(tmp);
@@ -326,8 +396,10 @@ static void info_rules_reset(args_t *args)
 }
 static int info_rules_add_values(args_t *args, bcf_hdr_t *hdr, bcf1_t *line, info_rule_t *rule, maux1_t *als, int var_len)
 {
-    int ret = bcf_get_info_values(hdr, line, rule->hdr_tag, &args->maux->tmp_arr, &args->maux->ntmp_arr, rule->type);
+    int msize = args->maux->ntmp_arr / rule->type_size;
+    int ret = bcf_get_info_values(hdr, line, rule->hdr_tag, &args->maux->tmp_arr, &msize, rule->type);
     if ( ret<=0 ) error("FIXME: error parsing %s at %s:%d .. %d\n", rule->hdr_tag,bcf_seqname(hdr,line),line->pos+1,ret);
+    args->maux->ntmp_arr = msize * rule->type_size;
 
     rule->nblocks++;
 
@@ -345,7 +417,7 @@ static int info_rules_add_values(args_t *args, bcf_hdr_t *hdr, bcf1_t *line, inf
     int i, j;
     if ( var_len==BCF_VL_A )
     {
-        assert( ret==line->n_allele-1 );
+        if ( ret!=line->n_allele-1 ) error("Wrong number of %s fields at %s:%d\n",rule->hdr_tag,bcf_seqname(hdr,line),line->pos+1);
         args->maux->nagr_map = ret;
         hts_expand(int,args->maux->nagr_map,args->maux->magr_map,args->maux->agr_map);
         // create mapping from source file ALT indexes to dst file indexes
@@ -354,7 +426,7 @@ static int info_rules_add_values(args_t *args, bcf_hdr_t *hdr, bcf1_t *line, inf
     }
     else if ( var_len==BCF_VL_R )
     {
-        assert( ret==line->n_allele );
+        if ( ret!=line->n_allele ) error("Wrong number of %s fields at %s:%d\n",rule->hdr_tag,bcf_seqname(hdr,line),line->pos+1);
         args->maux->nagr_map = ret;
         hts_expand(int,args->maux->nagr_map,args->maux->magr_map,args->maux->agr_map);
         for (i=0; i<ret; i++) args->maux->agr_map[i] = als->map[i];
@@ -556,6 +628,8 @@ char **merge_alleles(char **a, int na, int *map, char **b, int *nb, int *mb)
     {
         for (i=0; i<*nb; i++)
         {
+            if ( b[i][0]=='<' ) continue;   // symbolic allele, do not modify
+            if ( b[i][0]=='*' ) continue;   // overlapping deletion (*), do not modify
             int l = strlen(b[i]);
             b[i] = (char*) realloc(b[i],l+rla-rlb+1);
             memcpy(b[i]+l,a[0]+rlb,rla-rlb+1);
@@ -565,13 +639,15 @@ char **merge_alleles(char **a, int na, int *map, char **b, int *nb, int *mb)
     // now check if the $a alleles are present and if not add them
     for (i=1; i<na; i++)
     {
+        int const_ai = 1;
         char *ai;
-        if ( rlb>rla )  // $a alleles need expanding
+        if ( rlb>rla && a[i][0]!='<' && a[i][0]!='*' )  // $a alleles need expanding and not a symbolic allele or *
         {
             int l = strlen(a[i]);
             ai = (char*) malloc(l+rlb-rla+1);
             memcpy(ai,a[i],l);
             memcpy(ai+l,b[0]+rla,rlb-rla+1);
+            const_ai = 0;
         }
         else
             ai = a[i];
@@ -582,42 +658,59 @@ char **merge_alleles(char **a, int na, int *map, char **b, int *nb, int *mb)
         if ( j<*nb ) // $b already has the same allele
         {
             map[i] = j;
-            if ( rlb>rla ) free(ai);
+            if ( !const_ai ) free(ai);
             continue;
         }
         // new allele
         map[i] = *nb;
-        b[*nb] = rlb>rla ? ai : strdup(ai);
+        if ( b[*nb] ) free(b[*nb]);
+        b[*nb] = const_ai ? strdup(ai) : ai;
         (*nb)++;
     }
     return b;
 }
 
-maux_t *maux_init(bcf_srs_t *files)
+maux_t *maux_init(args_t *args)
 {
+    bcf_srs_t *files = args->files;
     maux_t *ma = (maux_t*) calloc(1,sizeof(maux_t));
     ma->n      = files->nreaders;
-    ma->nbuf   = (int *) calloc(ma->n,sizeof(int));
-    ma->d      = (maux1_t**) calloc(ma->n,sizeof(maux1_t*));
     ma->files  = files;
     int i, n_smpl = 0;
     for (i=0; i<ma->n; i++)
         n_smpl += bcf_hdr_nsamples(files->readers[i].header);
+    if ( args->do_gvcf )
+    {
+        ma->gvcf = (gvcf_aux_t*) calloc(ma->n,sizeof(gvcf_aux_t));
+        for (i=0; i<ma->n; i++)
+            ma->gvcf[i].line = bcf_init1();
+    }
     ma->smpl_ploidy = (int*) calloc(n_smpl,sizeof(int));
     ma->smpl_nGsize = (int*) malloc(n_smpl*sizeof(int));
-    ma->has_line = (int*) malloc(ma->n*sizeof(int));
+    ma->buf = (buffer_t*) calloc(ma->n,sizeof(buffer_t));
+    for (i=0; i<ma->n; i++)
+        ma->buf[i].rid = -1;
     return ma;
 }
 void maux_destroy(maux_t *ma)
 {
-    int i;
+    int i,j;
+    for (i=0; i<ma->mals; i++)
+    {
+        free(ma->als[i]);
+        ma->als[i] = NULL;
+    }
     for (i=0; i<ma->n; i++) // for each reader
     {
-        if ( !ma->d[i] ) continue;
-        int j;
-        for (j=0; j<ma->nbuf[i]; j++)  // for each buffered line
-            if ( ma->d[i][j].map ) free(ma->d[i][j].map);
-        free(ma->d[i]);
+        for (j=0; j<ma->buf[i].mrec; j++)  // for each buffered line
+            free(ma->buf[i].rec[j].map);
+        free(ma->buf[i].rec);
+    }
+    free(ma->buf);
+    if ( ma->gvcf )
+    {
+        for (i=0; i<ma->n; i++) bcf_destroy(ma->gvcf[i].line);
+        free(ma->gvcf);
     }
     for (i=0; i<ma->mAGR_info; i++)
         free(ma->AGR_info[i].buf);
@@ -626,32 +719,69 @@ void maux_destroy(maux_t *ma)
     if (ma->ntmp_arr) free(ma->tmp_arr);
     if (ma->nfmt_map) free(ma->fmt_map);
     // ma->inf freed in bcf_destroy1
-    free(ma->d);
-    free(ma->nbuf);
     for (i=0; i<ma->mals; i++) free(ma->als[i]);
     if (ma->mout_als) free(ma->out_als);
     free(ma->als);
     free(ma->cnt);
     free(ma->smpl_ploidy);
     free(ma->smpl_nGsize);
-    free(ma->has_line);
+    free(ma->chr);
     free(ma);
 }
-void maux_expand1(maux_t *ma, int i)
+void maux_expand1(buffer_t *buf, int size)
 {
-    if ( ma->nbuf[i] <= ma->files->readers[i].nbuffer )
+    if ( buf->mrec < size )
     {
-        int n = ma->files->readers[i].nbuffer + 1;
-        ma->d[i] = (maux1_t*) realloc(ma->d[i], sizeof(maux1_t)*n);
-        memset(ma->d[i]+ma->nbuf[i],0,sizeof(maux1_t)*(n-ma->nbuf[i]));
-        ma->nbuf[i] = n;
+        hts_expand0(maux1_t,size,buf->mrec,buf->rec);
+        buf->mrec = size;
     }
 }
 void maux_reset(maux_t *ma)
 {
-    int i;
-    for (i=0; i<ma->n; i++) maux_expand1(ma, i);
-    for (i=1; i<ma->ncnt; i++) ma->cnt[i] = 0;
+    int i,j;
+    for (i=0; i<ma->n; i++) maux_expand1(&ma->buf[i],ma->files->readers[i].nbuffer+1);
+    for (i=0; i<ma->ncnt; i++) ma->cnt[i] = 0;
+    for (i=0; i<ma->mals; i++)
+    {
+        free(ma->als[i]);
+        ma->als[i] = NULL;
+    }
+    const char *chr = NULL;
+    ma->nals  = 0;
+    ma->pos   = -1;
+    for (i=0; i<ma->n; i++)
+    {
+        if ( !bcf_sr_has_line(ma->files,i) ) continue;
+        bcf1_t *line = bcf_sr_get_line(ma->files,i);
+        bcf_hdr_t *hdr = bcf_sr_get_header(ma->files,i);
+        chr = bcf_seqname(hdr,line);
+        ma->pos = line->pos;
+        break;
+    }
+    if ( chr )
+    {
+        free(ma->chr);
+        ma->chr = strdup(chr);
+    }
+    for (i=0; i<ma->n; i++)
+    {
+        bcf_hdr_t *hdr = bcf_sr_get_header(ma->files,i);
+        ma->buf[i].rid = bcf_hdr_name2id(hdr,chr);
+        ma->buf[i].beg = bcf_sr_has_line(ma->files,i) ? 0 : 1;
+        for (j=ma->buf[i].beg; j<=ma->files->readers[i].nbuffer; j++)
+        {
+            ma->buf[i].rec[j].skip = 0;
+            bcf1_t *line = ma->files->readers[i].buffer[j];
+            if ( line->rid!=ma->buf[i].rid || line->pos!=ma->pos ) break;
+        }
+        ma->buf[i].end = j;
+        ma->buf[i].cur = -1;
+        if ( ma->buf[i].beg < ma->buf[i].end ) 
+        {
+            ma->buf[i].lines = ma->files->readers[i].buffer;
+            if ( ma->gvcf ) ma->gvcf[i].active = 0;     // gvcf block cannot overlap with the next record
+        }
+    }
 }
 void maux_debug(maux_t *ma, int ir, int ib)
 {
@@ -684,16 +814,20 @@ void merge_chrom2qual(args_t *args, bcf1_t *out)
     out->pos = -1;
     for (i=0; i<files->nreaders; i++)
     {
-        if ( !ma->has_line[i] ) continue;
+        bcf1_t *line = maux_get_line(args, i);
+        if ( !line ) continue;
+        bcf_unpack(line, BCF_UN_ALL);
 
         bcf_sr_t *reader = &files->readers[i];
-        bcf1_t *line = reader->buffer[0];
         bcf_hdr_t *hdr = reader->header;
 
-        // alleles
+        // not all maux alleles are always used, mark the ones we'll need
         int j;
         for (j=1; j<line->n_allele; j++)
-            al_idxs[ ma->d[i][0].map[j] ] = 1;
+        {
+            int irec = ma->buf[i].cur;
+            al_idxs[ ma->buf[i].rec[irec].map[j] ] = 1;
+        }
 
         // position
         if ( out->pos==-1 )
@@ -717,16 +851,15 @@ void merge_chrom2qual(args_t *args, bcf1_t *out)
         }
 
         // set QUAL to the max qual value. Not exactly correct, but good enough for now
-        if ( !bcf_float_is_missing(files->readers[i].buffer[0]->qual) )
+        if ( !bcf_float_is_missing(line->qual) )
         {
-            if ( bcf_float_is_missing(out->qual) || out->qual < files->readers[i].buffer[0]->qual ) out->qual = files->readers[i].buffer[0]->qual;
+            if ( bcf_float_is_missing(out->qual) || out->qual < line->qual ) out->qual = line->qual;
         }
     }
 
     // set ID
     if ( !tmps->l ) kputs(".", tmps);
-    if ( out->d.id ) free(out->d.id);
-    out->d.id = strdup(tmps->s);
+    bcf_update_id(out_hdr, out, tmps->s);
 
     // set alleles
     ma->nout_als = 0;
@@ -740,10 +873,13 @@ void merge_chrom2qual(args_t *args, bcf1_t *out)
         int ir, j;
         for (ir=0; ir<files->nreaders; ir++)
         {
-            if ( !ma->has_line[ir] ) continue;
-            bcf1_t *line = files->readers[ir].buffer[0];
+            bcf1_t *line = maux_get_line(args,ir);
+            if ( !line ) continue;
             for (j=1; j<line->n_allele; j++)
-                if ( ma->d[ir][0].map[j]==i ) ma->d[ir][0].map[j] = ma->nout_als;
+            {
+                int irec = ma->buf[ir].cur;
+                if ( ma->buf[ir].rec[irec].map[j]==i ) ma->buf[ir].rec[irec].map[j] = ma->nout_als;
+            }
         }
     }
     // Expand the arrays and realloc the alleles string. Note that all alleles are in a single allocated block.
@@ -765,20 +901,36 @@ void merge_filter(args_t *args, bcf1_t *out)
     bcf_hdr_t *out_hdr = args->out_hdr;
 
     int i, ret;
+    if ( args->filter_logic == FLT_LOGIC_REMOVE )
+    {
+        for (i=0; i<files->nreaders; i++)
+        {
+            bcf1_t *line = maux_get_line(args, i);
+            if ( !line ) continue;
+            bcf_sr_t *reader = &files->readers[i];
+            bcf_hdr_t *hdr = reader->header;
+            if ( bcf_has_filter(hdr, line, "PASS") ) break;
+        }
+        if ( i<files->nreaders )
+        {
+            int flt_id = bcf_hdr_id2int(out_hdr, BCF_DT_ID, "PASS");
+            bcf_add_filter(out_hdr, out, flt_id);
+            return;
+        }
+    }
+
     khiter_t kitr;
     strdict_t *tmph = args->tmph;
     kh_clear(strdict, tmph);
 
-    maux_t *ma = args->maux;
     out->d.n_flt = 0;
     for (i=0; i<files->nreaders; i++)
     {
-        if ( !ma->has_line[i]) continue;
+        bcf1_t *line = maux_get_line(args, i);
+        if ( !line ) continue;
 
         bcf_sr_t *reader = &files->readers[i];
-        bcf1_t *line = reader->buffer[0];
         bcf_hdr_t *hdr = reader->header;
-        bcf_unpack(line, BCF_UN_ALL);
 
         int k;
         for (k=0; k<line->d.n_flt; k++)
@@ -789,8 +941,8 @@ void merge_filter(args_t *args, bcf1_t *out)
             {
                 int id = bcf_hdr_id2int(out_hdr, BCF_DT_ID, flt);
                 if ( id==-1 ) error("Error: The filter is not defined in the header: %s\n", flt);
-                hts_expand(int,out->d.n_flt+1,ma->mflt,ma->flt);
-                ma->flt[out->d.n_flt] = id;
+                hts_expand(int,out->d.n_flt+1,out->d.m_flt,out->d.flt);
+                out->d.flt[out->d.n_flt] = id;
                 out->d.n_flt++;
                 kh_put(strdict, tmph, flt, &ret);
             }
@@ -801,20 +953,17 @@ void merge_filter(args_t *args, bcf1_t *out)
     {
         int id = bcf_hdr_id2int(out_hdr, BCF_DT_ID, "PASS");
         for (i=0; i<out->d.n_flt; i++)
-            if ( ma->flt[i]==id ) break;
+            if ( out->d.flt[i]==id ) break;
         if ( i<out->d.n_flt )
         {
             out->d.n_flt--;
-            for (; i<out->d.n_flt; i++) ma->flt[i] = ma->flt[i+1];
+            for (; i<out->d.n_flt; i++) out->d.flt[i] = out->d.flt[i+1];
         }
     }
-    out->d.flt = ma->flt;
 }
 
 static void bcf_info_set_id(bcf1_t *line, bcf_info_t *info, int id, kstring_t *tmp_str)
 {
-    assert( !info->vptr_free );
-
     uint8_t *ptr = info->vptr - info->vptr_off;
     bcf_dec_typed_int1(ptr, &ptr);
 
@@ -833,8 +982,6 @@ static void bcf_info_set_id(bcf1_t *line, bcf_info_t *info, int id, kstring_t *t
     kputsn_(info->vptr, info->len << bcf_type_shift[info->type], tmp_str);
 
     info->vptr = (uint8_t*) tmp_str->s + info->vptr_off;
-    info->vptr_free = 1;
-    line->d.shared_dirty |= BCF1_DIRTY_INF;
     tmp_str->s = NULL;
     tmp_str->m = 0;
     tmp_str->l = 0;
@@ -1029,9 +1176,10 @@ void merge_info(args_t *args, bcf1_t *out)
     info_rules_reset(args);
     for (i=0; i<files->nreaders; i++)
     {
-        if ( !ma->has_line[i] ) continue;
+        bcf1_t *line = maux_get_line(args,i);
+        if ( !line ) continue;
+        int irec = ma->buf[i].cur;
         bcf_sr_t *reader = &files->readers[i];
-        bcf1_t *line = reader->buffer[0];
         bcf_hdr_t *hdr = reader->header;
         for (j=0; j<line->n_info; j++)
         {
@@ -1050,7 +1198,7 @@ void merge_info(args_t *args, bcf1_t *out)
                 info_rule_t *rule = (info_rule_t*) bsearch(key, args->rules, args->nrules, sizeof(*args->rules), info_rules_comp_key);
                 if ( rule )
                 {
-                    maux1_t *als = ( len==BCF_VL_A || len==BCF_VL_G || len==BCF_VL_R ) ? &ma->d[i][0] : NULL;
+                    maux1_t *als = ( len==BCF_VL_A || len==BCF_VL_G || len==BCF_VL_R ) ? &ma->buf[i].rec[irec] : NULL;
                     if ( info_rules_add_values(args, hdr, line, rule, als, len) ) continue;
                 }
             }
@@ -1061,7 +1209,7 @@ void merge_info(args_t *args, bcf1_t *out)
             {
                 if ( kitr == kh_end(tmph) )
                 {
-                    // first occurance in this reader, alloc arrays
+                    // seeing this key for the first time
                     ma->nAGR_info++;
                     hts_expand0(AGR_info_t,ma->nAGR_info,ma->mAGR_info,ma->AGR_info);
                     kitr = kh_put(strdict, tmph, key, &ret);
@@ -1079,37 +1227,36 @@ void merge_info(args_t *args, bcf1_t *out)
                 kitr = kh_get(strdict, tmph, key);
                 int idx = kh_val(tmph, kitr);
                 if ( idx<0 ) error("Error occurred while processing INFO tag \"%s\" at %s:%d\n", key,bcf_seqname(hdr,line),line->pos+1);
-                merge_AGR_info_tag(hdr, line,inf,len,&ma->d[i][0],&ma->AGR_info[idx]);
+                merge_AGR_info_tag(hdr, line,inf,len,&ma->buf[i].rec[irec],&ma->AGR_info[idx]);
                 continue;
             }
 
             if ( kitr == kh_end(tmph) )
             {
-                hts_expand0(bcf_info_t,out->n_info+1,ma->minf,ma->inf);
-                ma->inf[out->n_info].key  = id;
-                ma->inf[out->n_info].type = inf->type;
-                ma->inf[out->n_info].len  = inf->len;
-                ma->inf[out->n_info].vptr = inf->vptr;
-                ma->inf[out->n_info].v1.i = inf->v1.i;
-                ma->inf[out->n_info].v1.f = inf->v1.f;
-                ma->inf[out->n_info].vptr_off  = inf->vptr_off;
-                ma->inf[out->n_info].vptr_len  = inf->vptr_len;
-                ma->inf[out->n_info].vptr_free = inf->vptr_free;
+                // Seeing this key for the first time.  Although quite hacky,
+                // this is faster than anything else given the data structures..
+
+                hts_expand0(bcf_info_t,out->n_info+1,out->d.m_info,out->d.info);
+                out->d.info[out->n_info].key  = id;
+                out->d.info[out->n_info].type = inf->type;
+                out->d.info[out->n_info].len  = inf->len;
+                out->d.info[out->n_info].v1.i = inf->v1.i;
+                out->d.info[out->n_info].v1.f = inf->v1.f;
+                out->d.info[out->n_info].vptr_off  = inf->vptr_off;
+                out->d.info[out->n_info].vptr_len  = inf->vptr_len;
+                out->d.info[out->n_info].vptr_free = 1;
+                out->d.info[out->n_info].vptr = (uint8_t*) malloc(inf->vptr_len+inf->vptr_off); 
+                memcpy(out->d.info[out->n_info].vptr,inf->vptr-inf->vptr_off, inf->vptr_len+inf->vptr_off);
+                out->d.info[out->n_info].vptr += inf->vptr_off;
                 if ( (args->output_type & FT_BCF) && id!=bcf_hdr_id2int(hdr, BCF_DT_ID, key) )
-                {
-                    // The existing packed info cannot be reused. Change the id.
-                    // Although quite hacky, it's faster than anything else given
-                    // the data structures
-                    bcf_info_set_id(out, &ma->inf[out->n_info], id, &args->tmps);
-                }
+                    bcf_info_set_id(out, &out->d.info[out->n_info], id, &args->tmps);
+                out->d.shared_dirty |= BCF1_DIRTY_INF;
                 out->n_info++;
                 kitr = kh_put(strdict, tmph, key, &ret);
                 kh_val(tmph,kitr) = -(out->n_info-1);   // arbitrary negative value
             }
         }
     }
-    out->d.info = ma->inf;
-    out->d.m_info = ma->minf;
     for (i=0; i<args->nrules; i++)
         args->rules[i].merger(args->out_hdr, out, &args->rules[i]);
     for (i=0; i<ma->nAGR_info; i++)
@@ -1154,12 +1301,14 @@ void merge_GT(args_t *args, bcf_fmt_t **fmt_map, bcf1_t *out)
     }
     memset(ma->smpl_ploidy,0,nsamples*sizeof(int));
 
+    int default_gt = args->missing_to_ref ? bcf_gt_unphased(0) : bcf_gt_missing;
     for (i=0; i<files->nreaders; i++)
     {
         bcf_sr_t *reader = &files->readers[i];
         bcf_hdr_t *hdr = reader->header;
         bcf_fmt_t *fmt_ori = fmt_map[i];
         int32_t *tmp  = (int32_t *) ma->tmp_arr + ismpl*nsize;
+        int irec = ma->buf[i].cur;
 
         int j, k;
         if ( !fmt_ori )
@@ -1167,7 +1316,7 @@ void merge_GT(args_t *args, bcf_fmt_t **fmt_map, bcf1_t *out)
             // missing values: assume maximum ploidy
             for (j=0; j<bcf_hdr_nsamples(hdr); j++)
             {
-                for (k=0; k<nsize; k++) { tmp[k] = 0; ma->smpl_ploidy[ismpl+j]++; }
+                for (k=0; k<nsize; k++) { tmp[k] = default_gt; ma->smpl_ploidy[ismpl+j]++; }
                 tmp += nsize;
             }
             ismpl += bcf_hdr_nsamples(hdr);
@@ -1176,7 +1325,7 @@ void merge_GT(args_t *args, bcf_fmt_t **fmt_map, bcf1_t *out)
 
         #define BRANCH(type_t, vector_end) { \
             type_t *p_ori  = (type_t*) fmt_ori->p; \
-            if ( !ma->d[i][0].als_differ ) \
+            if ( !ma->buf[i].rec[irec].als_differ ) \
             { \
                 /* the allele numbering is unchanged */ \
                 for (j=0; j<bcf_hdr_nsamples(hdr); j++) \
@@ -1206,7 +1355,7 @@ void merge_GT(args_t *args, bcf_fmt_t **fmt_map, bcf1_t *out)
                     else \
                     { \
                         int al = (p_ori[k]>>1) - 1; \
-                        al = al<=0 ? al + 1 : ma->d[i][0].map[al] + 1; \
+                        al = al<=0 ? al + 1 : ma->buf[i].rec[irec].map[al] + 1; \
                         tmp[k] = (al << 1) | ((p_ori[k])&1); \
                     } \
                 } \
@@ -1239,7 +1388,7 @@ void merge_format_field(args_t *args, bcf_fmt_t **fmt_map, bcf1_t *out)
     int nsize = 0, length = BCF_VL_FIXED, type = -1;
     for (i=0; i<files->nreaders; i++)
     {
-        if ( !ma->has_line[i] ) continue;
+        if ( !maux_get_line(args,i) ) continue;
         if ( !fmt_map[i] ) continue;
         if ( !key ) key = files->readers[i].header->id[BCF_DT_ID][fmt_map[i]->id].key;
         type = fmt_map[i]->type;
@@ -1277,10 +1426,12 @@ void merge_format_field(args_t *args, bcf_fmt_t **fmt_map, bcf1_t *out)
         bcf_sr_t *reader = &files->readers[i];
         bcf_hdr_t *hdr = reader->header;
         bcf_fmt_t *fmt_ori = fmt_map[i];
+        bcf1_t *line = maux_get_line(args, i);
+        int irec = ma->buf[i].cur;
         if ( fmt_ori )
         {
             type = fmt_ori->type;
-            int nals_ori = reader->buffer[0]->n_allele;
+            int nals_ori = line->n_allele;
             if ( length==BCF_VL_G )
             {
                 // if all fields are missing then n==1 is valid
@@ -1313,10 +1464,8 @@ void merge_format_field(args_t *args, bcf_fmt_t **fmt_map, bcf1_t *out)
                 ismpl += bcf_hdr_nsamples(hdr); \
                 continue; \
             } \
-            assert( ma->has_line[i] ); \
-            bcf1_t *line    = reader->buffer[0]; \
             src_type_t *src = (src_type_t*) fmt_ori->p; \
-            if ( (length!=BCF_VL_G && length!=BCF_VL_A && length!=BCF_VL_R) || (line->n_allele==out->n_allele && !ma->d[i][0].als_differ) ) \
+            if ( (length!=BCF_VL_G && length!=BCF_VL_A && length!=BCF_VL_R) || (line->n_allele==out->n_allele && !ma->buf[i].rec[irec].als_differ) ) \
             { \
                 /* alleles unchanged, copy over */ \
                 for (j=0; j<bcf_hdr_nsamples(hdr); j++) \
@@ -1358,7 +1507,7 @@ void merge_format_field(args_t *args, bcf_fmt_t **fmt_map, bcf1_t *out)
                         int iori, inew; \
                         for (iori=0; iori<line->n_allele; iori++) \
                         { \
-                            inew = ma->d[i][0].map[iori]; \
+                            inew = ma->buf[i].rec[irec].map[iori]; \
                             src = (src_type_t*) fmt_ori->p + j*fmt_ori->n + iori; \
                             tgt = (tgt_type_t *) ma->tmp_arr + (ismpl+j)*nsize + inew; \
                             if ( src_is_vector_end ) break; \
@@ -1372,10 +1521,10 @@ void merge_format_field(args_t *args, bcf_fmt_t **fmt_map, bcf1_t *out)
                         int iori,jori, inew,jnew; \
                         for (iori=0; iori<line->n_allele; iori++) \
                         { \
-                            inew = ma->d[i][0].map[iori]; \
+                            inew = ma->buf[i].rec[irec].map[iori]; \
                             for (jori=0; jori<=iori; jori++) \
                             { \
-                                jnew = ma->d[i][0].map[jori]; \
+                                jnew = ma->buf[i].rec[irec].map[jori]; \
                                 int kori = iori*(iori+1)/2 + jori; \
                                 int knew = inew>jnew ? inew*(inew+1)/2 + jnew : jnew*(jnew+1)/2 + inew; \
                                 src = (src_type_t*) fmt_ori->p + j*fmt_ori->n + kori; \
@@ -1412,7 +1561,7 @@ void merge_format_field(args_t *args, bcf_fmt_t **fmt_map, bcf1_t *out)
                     int iori,inew; \
                     for (iori=ifrom; iori<line->n_allele; iori++) \
                     { \
-                        inew = ma->d[i][0].map[iori] - ifrom; \
+                        inew = ma->buf[i].rec[irec].map[iori] - ifrom; \
                         tgt = (tgt_type_t *) ma->tmp_arr + (ismpl+j)*nsize + inew; \
                         if ( src_is_vector_end ) break; \
                         if ( src_is_missing ) tgt_set_missing; \
@@ -1461,9 +1610,9 @@ void merge_format(args_t *args, bcf1_t *out)
     int i, j, ret, has_GT = 0, max_ifmt = 0; // max fmt index
     for (i=0; i<files->nreaders; i++)
     {
-        if ( !ma->has_line[i] ) continue;
+        bcf1_t *line = maux_get_line(args,i);
+        if ( !line ) continue;
         bcf_sr_t *reader = &files->readers[i];
-        bcf1_t *line = reader->buffer[0];
         bcf_hdr_t *hdr = reader->header;
         for (j=0; j<line->n_fmt; j++)
         {
@@ -1495,9 +1644,10 @@ void merge_format(args_t *args, bcf1_t *out)
             ma->fmt_map[ifmt*files->nreaders+i] = fmt;
         }
         // Check if the allele numbering must be changed
-        for (j=1; j<reader->buffer[0]->n_allele; j++)
-            if ( ma->d[i][0].map[j]!=j ) break;
-        ma->d[i][0].als_differ = j==reader->buffer[0]->n_allele ? 0 : 1;
+        int irec = ma->buf[i].cur;
+        for (j=1; j<line->n_allele; j++)
+            if ( ma->buf[i].rec[irec].map[j]!=j ) break;
+        ma->buf[i].rec[irec].als_differ = j==line->n_allele ? 0 : 1;
     }
 
     out->n_sample = bcf_hdr_nsamples(out_hdr);
@@ -1505,203 +1655,383 @@ void merge_format(args_t *args, bcf1_t *out)
         merge_GT(args, ma->fmt_map, out);
     update_AN_AC(out_hdr, out);
 
-    if ( out->d.info!=ma->inf )
-    {
-        // hacky, we rely on htslib internals: bcf_update_info() reallocated the info
-        ma->inf  = out->d.info;
-        ma->minf = out->d.m_info;
-    }
-
     for (i=1; i<=max_ifmt; i++)
         merge_format_field(args, &ma->fmt_map[i*files->nreaders], out);
     out->d.indiv_dirty = 1;
 }
 
-// The core merging function, one or none line from each reader
-void merge_line(args_t *args)
+void gvcf_set_alleles(args_t *args)
+{
+    int i,k;
+    bcf_srs_t *files = args->files;
+    maux_t *maux = args->maux;
+    gvcf_aux_t *gaux = maux->gvcf;
+    maux->nals = 0;
+
+    for (i=0; i<files->nreaders; i++)
+    {
+        if ( !gaux[i].active ) continue;
+        bcf1_t *line = maux_get_line(args, i);
+        int irec = maux->buf[i].cur;
+
+        hts_expand(int, line->n_allele, maux->buf[i].rec[irec].mmap, maux->buf[i].rec[irec].map);
+        if ( !maux->nals )    // first record, copy the alleles to the output
+        {
+            maux->nals = line->n_allele;
+            hts_expand0(char*, maux->nals, maux->mals, maux->als);
+            hts_expand0(int, maux->nals, maux->ncnt, maux->cnt);
+            for (k=0; k<maux->nals; k++)
+            {
+                if ( maux->als[k] ) free(maux->als[k]);
+                maux->als[k] = strdup(line->d.allele[k]);
+                maux->buf[i].rec[irec].map[k] = k;
+            }
+        }
+        else
+        {
+            maux->als = merge_alleles(line->d.allele, line->n_allele, maux->buf[i].rec[irec].map, maux->als, &maux->nals, &maux->mals);
+            if ( !maux->als )
+            {
+                bcf_hdr_t *hdr = bcf_sr_get_header(args->files,i);
+                error("Failed to merge alleles at %s:%d\n",bcf_seqname(hdr,line),line->pos+1);
+            }
+        }
+    }
+}
+
+/*
+    Output staged gVCF blocks, end is the last position of the block. Assuming
+    gaux[i].active flags are set and maux_get_line returns correct lines.
+*/
+void gvcf_write_block(args_t *args, int start, int end)
 {
+    int i;
+    maux_t *maux = args->maux;
+    gvcf_aux_t *gaux = maux->gvcf;
+    assert(gaux);
+
+    // Update POS
+    int min = INT_MAX;
+    char ref = 'N';
+    for (i=0; i<args->files->nreaders; i++)
+    {
+        if ( !gaux[i].active ) continue;
+        if ( ref=='N' && gaux[i].line->pos==start ) ref = gaux[i].line->d.allele[0][0];
+        gaux[i].line->pos = start;
+    }
+    for (i=0; i<args->files->nreaders; i++)
+    {
+        if ( !gaux[i].active ) continue;
+        if ( gaux[i].end < start ) 
+        { 
+            gaux[i].active = 0; 
+            maux->buf[i].cur = -1;
+            continue; 
+        }
+        gaux[i].line->d.allele[0][0] = ref;
+        if ( min > gaux[i].end ) min = gaux[i].end;
+    }
+    // Check for valid gVCF blocks in this region
+    if ( min==INT_MAX )
+    {
+    assert(0);
+        maux->gvcf_min = 0;
+        return;
+    }
+
     bcf1_t *out = args->out_line;
-    bcf_clear1(out);
-    out->unpacked = BCF_UN_ALL;
 
+    gvcf_set_alleles(args);
+
+    // Merge the staged lines
     merge_chrom2qual(args, out);
     merge_filter(args, out);
     merge_info(args, out);
     merge_format(args, out);
 
-    bcf_write1(args->out_fh, args->out_hdr, out);
-}
+    if ( args->gvcf_fai && out->d.allele[0][0]=='N' )
+    {
+        int slen  = 0;
+        char *seq = faidx_fetch_seq(args->gvcf_fai,maux->chr,out->pos,out->pos,&slen);
+        if (slen)
+        {
+            out->d.allele[0][0] = seq[0];
+            free(seq);
+        }
+    }
 
+    // Update END boundary
+    if ( end > start )
+    {
+        end++;
+        bcf_update_info_int32(args->out_hdr, out, "END", &end, 1);
+    }
+    else
+        bcf_update_info_int32(args->out_hdr, out, "END", NULL, 0);
+    bcf_write1(args->out_fh, args->out_hdr, out);
+    bcf_clear1(out);
 
-void debug_buffers(FILE *fp, bcf_srs_t *files);
-void debug_buffer(FILE *fp, bcf_sr_t *reader);
 
-#define SWAP(type_t,a,b) { type_t tmp = (a); (a) = (b); (b) = tmp; }
+    // Inactivate blocks which do not extend beyond END and find new gvcf_min
+    min = INT_MAX;
+    for (i=0; i<args->files->nreaders; i++)
+    {
+        if ( !gaux[i].active ) continue;
+        if ( gaux[i].end < end )
+        {
+            gaux[i].active = 0; 
+            maux->buf[i].cur = -1;
+            continue; 
+        }
+        // next min END position bigger than the current one
+        if ( maux->gvcf_min < gaux[i].end+1 && min > gaux[i].end+1 ) min = gaux[i].end + 1;
+    }
+    maux->gvcf_min = min==INT_MAX ? 0 : min;
+}
 
-// Clean the reader's buffer to and make it ready for the next next_line() call.
-// Moves finished records (SKIP_DONE flag set) at the end of the buffer and put
-// the rest to the beggining. Then shorten the buffer so that the last element
-// points to the last unfinished record. There are two special cases: the last
-// line of the buffer typically has a different position and must stay at the
-// end; next, the first record of the buffer must be one of those already
-// printed, as it will be discarded by next_line().
-//
-void shake_buffer(maux_t *maux, int ir, int pos)
+/*
+    Flush staged gVCF blocks. Flush everything if there are no more lines
+    (done=1) or if there is a new chromosome. If still on the same chromosome,
+    all hanging blocks must be ended by creating new records:
+        A
+            1 END=10
+        B
+            3 END=7
+        C
+            3 END=5
+        out
+            1 END=2  A . .
+            3 END=5  A B C
+            6 END=7  A B .
+            8 END=10 A . .
+    
+*/
+void gvcf_flush(args_t *args, int done)
 {
-    bcf_sr_t *reader = &maux->files->readers[ir];
-    maux1_t *m = maux->d[ir];
-
-    if ( !reader->buffer ) return;
-
     int i;
-    // FILE *fp = stdout;
-    // fprintf(fp,"<going to shake> nbuf=%d\t", reader->nbuffer); for (i=0; i<reader->nbuffer; i++) fprintf(fp," %d", skip[i]); fprintf(fp,"\n");
-    // debug_buffer(fp,reader);
-    // fprintf(fp,"--\n");
+    maux_t *maux = args->maux;
 
-    int a = 1, b = reader->nbuffer;
-    if ( reader->buffer[b]->pos != pos ) b--;   // move the last line separately afterwards
+    if ( !maux->chr ) return;   // first time here, nothing to flush
 
-    while ( a<b )
+    int flush_until = INT_MAX;
+    if ( !done )
     {
-        if ( !(m[a].skip&SKIP_DONE) ) { a++; continue; }
-        if ( m[b].skip&SKIP_DONE ) { b--; continue; }
-        SWAP(bcf1_t*, reader->buffer[a], reader->buffer[b]);
-        SWAP(maux1_t, m[a], m[b]);
-        a++;
-        b--;
-    }
+        // Get current position and chromosome
+        for (i=0; i<maux->n; i++)
+            if ( bcf_sr_has_line(maux->files,i) ) break;
+        bcf1_t *line = bcf_sr_get_line(maux->files,i);
+        bcf_hdr_t *hdr = bcf_sr_get_header(maux->files,i);
 
-    // position $a to the after the first unfinished record
-    while ( a<=reader->nbuffer && !(m[a].skip&SKIP_DONE) ) a++;
+        if ( !strcmp(maux->chr,bcf_seqname(hdr,line)) ) flush_until = line->pos;    // still on the same chr
+    }
 
-    if ( a<reader->nbuffer )
+    // When called on a region, trim the blocks accordingly
+    int start = maux->gvcf_break>=0 ? maux->gvcf_break + 1 : maux->pos;
+    if ( args->regs )
     {
-        // there is a gap between the unfinished lines at the beggining and the
-        // last line. The last line must be brought forward to fill the gap
-        if ( reader->buffer[reader->nbuffer]->pos != pos )
+        int rstart = -1, rend = -1;
+        if ( regidx_overlap(args->regs,maux->chr,start,flush_until,args->regs_itr) )
         {
-            SWAP(bcf1_t*, reader->buffer[a], reader->buffer[reader->nbuffer]);
-            SWAP(maux1_t, m[a], m[reader->nbuffer]);
-            reader->nbuffer = a;
+            // In case there are multiple regions, we treat them as one
+            rstart = args->regs_itr->beg;
+            while ( regitr_overlap(args->regs_itr) ) rend = args->regs_itr->end;
         }
+        if ( rstart > start ) start = rstart;
+        if ( rend < flush_until ) flush_until = rend+1;
     }
 
-    if ( !(m[0].skip&SKIP_DONE) && reader->buffer[0]->pos==pos )
+    // output all finished blocks
+    while ( maux->gvcf_min && start < flush_until )
     {
-        // the first record is unfinished, replace it with an empty line
-        // from the end of the buffer or else next_line will remove it
-        if ( reader->nbuffer + 1 >= maux->nbuf[ir] )
+        // does the block end before the new line or is it interrupted?
+        int tmp = maux->gvcf_min < flush_until ? maux->gvcf_min : flush_until;
+        if ( start > tmp-1 ) break;
+        gvcf_write_block(args,start,tmp-1); // gvcf_min is 1-based
+        start = tmp;
+    }
+}
+
+/*
+    Check incoming lines for new gVCF blocks, set pointer to the current source
+    buffer (gvcf or readers).  In contrast to gvcf_flush, this function can be
+    called only after maux_reset as it relies on updated maux buffers.
+*/
+void gvcf_stage(args_t *args, int pos)
+{
+    maux_t *maux = args->maux;
+    gvcf_aux_t *gaux = maux->gvcf;
+    bcf_srs_t *files = args->files;
+    int32_t *end = (int32_t*) maux->tmp_arr;
+    int i, nend = maux->ntmp_arr / sizeof(int32_t);
+
+    maux->gvcf_break = -1;
+    maux->gvcf_min = INT_MAX;
+    for (i=0; i<files->nreaders; i++)
+    {
+        if ( gaux[i].active )
         {
-            reader->nbuffer++;
-            maux_expand1(maux, ir);
-            reader->nbuffer--;
-            m = maux->d[ir];
+            // gvcf block should not overlap with another record
+            if ( maux->gvcf_min > gaux[i].end+1 ) maux->gvcf_min = gaux[i].end + 1;
+            maux->buf[i].beg = 0;
+            maux->buf[i].end = 1;
+            maux->buf[i].cur = 0;
+            continue;
         }
-        if ( reader->nbuffer+1 >= reader->mbuffer )
-            error("Uh, did not expect this: %d vs %d\n", reader->nbuffer,reader->mbuffer);
 
-        if ( reader->buffer[reader->nbuffer]->pos!=pos )
+        // Does any of the lines have END set? It is enough to check only the
+        // first line, there should be no duplicate records with END in gVCF
+
+        if ( maux->buf[i].beg==maux->buf[i].end ) continue; // no new record
+
+        int irec = maux->buf[i].beg;
+        bcf_hdr_t *hdr = bcf_sr_get_header(files, i);
+        bcf1_t *line = args->files->readers[i].buffer[irec];
+        int ret = bcf_get_info_int32(hdr,line,"END",&end,&nend);
+        if ( ret==1 )
         {
-            // 4way swap
-            bcf1_t *tmp = reader->buffer[0];
-            reader->buffer[0] = reader->buffer[reader->nbuffer+1];
-            reader->buffer[reader->nbuffer+1] = reader->buffer[reader->nbuffer];
-            reader->buffer[reader->nbuffer] = tmp;
-            m[reader->nbuffer].skip   = m[0].skip;
-            m[reader->nbuffer+1].skip = SKIP_DIFF;
-            reader->nbuffer++;
+            // END is set, this is a new gVCF block. Cache this line in gaux[i] and swap with
+            // an empty record: the gaux line must be kept until we reach its END.
+            gaux[i].active = 1;
+            gaux[i].end = end[0] - 1;
+            SWAP(bcf1_t*,args->files->readers[i].buffer[irec],gaux[i].line);
+            gaux[i].line->pos = pos;
+
+            maux->buf[i].lines = &gaux[i].line;
+            maux->buf[i].beg = 0;
+            maux->buf[i].end = 1;
+            maux->buf[i].cur = 0;
+
+            // Set the rid,pos of the swapped line in the buffer or else the
+            // synced reader will have a problem with the next line
+            //
+            args->files->readers[i].buffer[irec]->rid = maux->buf[i].rid;
+            args->files->readers[i].buffer[irec]->pos = maux->pos;
+
+            // Update block offsets
+            if ( maux->gvcf_min > gaux[i].end+1 ) maux->gvcf_min = gaux[i].end + 1;
         }
         else
-        {
-            SWAP(bcf1_t*, reader->buffer[0], reader->buffer[reader->nbuffer+1]);
-            SWAP(maux1_t, m[0], m[reader->nbuffer+1]);
-        }
+            maux->gvcf_break = line->pos;   // must break the gvcf block 
     }
+    maux->ntmp_arr = nend * sizeof(int32_t);
+    maux->tmp_arr  = end;
+    if ( maux->gvcf_min==INT_MAX ) maux->gvcf_min = 0;
+}
+
+
+void debug_buffers(FILE *fp, bcf_srs_t *files);
+void debug_buffer(FILE *fp, bcf_srs_t *files, int reader);
+
+/*
+    Flush all buffered and processed records with the same coordinate.
+    Note that synced reader discards buffer[0], so that needs to stay
+    untouched.
+*/
+void clean_buffer(args_t *args)
+{
+    maux_t *ma = args->maux;
+
+    int ir;
+    for (ir=0; ir<ma->n; ir++)
+    {
+        // Invalidate pointer to reader's buffer or else gvcf_flush will attempt
+        // to use the old lines via maux_get_line()
+        if ( ma->gvcf && !ma->gvcf[ir].active ) ma->buf[ir].cur = -1;
 
-    // debug_buffer(fp,reader);
-    // fprintf(fp,"<shaken>\t"); for (i=0; i<reader->nbuffer; i++) fprintf(fp," %d", skip[i]);
-    // fprintf(fp,"\n\n");
+        bcf_sr_t *reader = bcf_sr_get_reader(args->files,ir);
+        if ( !reader->nbuffer ) continue;   // nothing to clean
 
-    // set position of finished buffer[0] line to -1, otherwise swapping may
-    // bring it back after next_line()
-    reader->buffer[0]->pos = -1;
+        bcf1_t **buf = reader->buffer;
+        if ( buf[1]->rid!=ma->buf[ir].rid || buf[1]->pos!=ma->pos ) continue;    // nothing to flush
 
-    // trim the buffer, remove finished lines from the end
-    i = reader->nbuffer;
-    while ( i>=1 && m[i--].skip&SKIP_DONE )
-        reader->nbuffer--;
+        int a = 1, b = 2;
+        while ( b<=reader->nbuffer && buf[b]->rid==ma->buf[ir].rid && buf[b]->pos==ma->pos ) b++;
+        // b now points to the first line we want to preserve
+        while ( b<=reader->nbuffer )
+        {
+            SWAP(bcf1_t*, buf[a], buf[b]);
+            a++; b++;
+        }
+        reader->nbuffer -= b-a;
+    }
 }
 
-void debug_maux(args_t *args, int pos, int var_type)
+void debug_maux(args_t *args)
 {
     bcf_srs_t *files = args->files;
     maux_t *maux = args->maux;
     int j,k,l;
 
-    fprintf(stderr,"Alleles to merge at %d\n", pos+1);
+    fprintf(stderr,"Alleles to merge at %d, nals=%d\n", maux->pos+1,maux->nals);
     for (j=0; j<files->nreaders; j++)
     {
         bcf_sr_t *reader = &files->readers[j];
+        buffer_t *buf = &maux->buf[j];
         fprintf(stderr," reader %d: ", j);
-        for (k=0; k<=reader->nbuffer; k++)
+        for (k=buf->beg; k<buf->end; k++)
         {
-            if ( maux->d[j][k].skip==SKIP_DONE ) continue;
+            if ( buf->rec[k].skip & SKIP_DONE ) continue;
             bcf1_t *line = reader->buffer[k];
-            if ( line->pos!=pos ) continue;
             fprintf(stderr,"\t");
-            if ( maux->d[j][k].skip ) fprintf(stderr,"[");  // this record will not be merged in this round
+            if ( buf->rec[k].skip ) fprintf(stderr,"[");  // this record will not be merged in this round
             for (l=0; l<line->n_allele; l++)
                 fprintf(stderr,"%s%s", l==0?"":",", line->d.allele[l]);
-            if ( maux->d[j][k].skip ) fprintf(stderr,"]");
+            if ( buf->rec[k].skip ) fprintf(stderr,"]");
         }
         fprintf(stderr,"\n");
     }
     fprintf(stderr," counts: ");
-    for (j=0; j<maux->nals; j++) fprintf(stderr,"%s   %dx %s", j==0?"":",",maux->cnt[j], maux->als[j]); fprintf(stderr,"\n");
-    for (j=0; j<files->nreaders; j++)
-    {
-        bcf_sr_t *reader = &files->readers[j];
-        fprintf(stderr," out %d: ", j);
-        for (k=0; k<=reader->nbuffer; k++)
-        {
-            if ( maux->d[j][k].skip==SKIP_DONE ) continue;
-            bcf1_t *line = reader->buffer[k];
-            if ( line->pos!=pos ) continue;
-            if ( maux->d[j][k].skip ) continue;
-            fprintf(stderr,"\t");
-            for (l=0; l<line->n_allele; l++)
-                fprintf(stderr,"%s%s", l==0?"":",", maux->als[maux->d[j][k].map[l]]);
-        }
-        fprintf(stderr,"\n");
-    }
-    fprintf(stderr,"\n");
+    for (j=0; j<maux->nals; j++) fprintf(stderr,"%s   %dx %s", j==0?"":",",maux->cnt[j], maux->als[j]);
+    fprintf(stderr,"\n\n");
 }
 
-// Determine which line should be merged from which reader: go through all
-// readers and all buffered lines, expand REF,ALT and try to match lines with
-// the same ALTs. A step towards output independent on input ordering of the
-// lines.
-void merge_buffer(args_t *args)
+
+/*
+   Determine which line should be merged from which reader: go through all
+   readers and all buffered lines, expand REF,ALT and try to match lines with
+   the same ALTs.
+ */
+int can_merge(args_t *args)
 {
     bcf_srs_t *files = args->files;
-    int i, pos = -1, var_type = 0;
-    char *id = NULL;
+    int snp_mask = (VCF_SNP<<1)|(VCF_MNP<<1), indel_mask = VCF_INDEL<<1, ref_mask = 1;
     maux_t *maux = args->maux;
-    maux_reset(maux);
+    gvcf_aux_t *gaux = maux->gvcf;
+    char *id = NULL, ref = 'N';
+    maux->var_types = maux->nals = 0;
 
-    // set the current position
+    int i,j,k, ntodo = 0;
     for (i=0; i<files->nreaders; i++)
     {
-        if ( bcf_sr_has_line(files,i) )
+        buffer_t *buf = &maux->buf[i];
+
+        if ( gaux && gaux[i].active )
         {
-            bcf1_t *line = bcf_sr_get_line(files,i);
-            pos = line->pos;
-            var_type = bcf_get_variant_types(line);
-            id = line->d.id;
-            break;
+            // skip readers with active gvcf blocks
+            buf->rec[buf->beg].skip = SKIP_DIFF;
+            continue;
+        }
+        for (j=buf->beg; j<buf->end; j++)
+        {
+            if ( buf->rec[j].skip & SKIP_DONE ) continue;
+
+            buf->rec[j].skip = SKIP_DIFF;
+            ntodo++;
+
+            if ( args->merge_by_id )
+                id = buf->lines[j]->d.id;
+            else
+            {
+                int var_type = bcf_get_variant_types(buf->lines[j]);
+                maux->var_types |= var_type ? var_type<<1 : 1;
+            }
         }
+
+        // for gvcf: find out REF at this position
+        if ( buf->beg < buf->end && ref=='N' )
+            ref = buf->lines[buf->beg]->d.allele[0][0];
     }
+    if ( !ntodo ) return 0;
 
     // In this loop we select from each reader compatible candidate lines.
     // (i.e. SNPs or indels). Go through all files and all lines at this
@@ -1710,19 +2040,24 @@ void merge_buffer(args_t *args)
     for (i=0; i<files->nreaders; i++)
     {
         bcf_sr_t *reader = &files->readers[i];
-        if ( !reader->buffer ) continue;
-        int j, k;
-        for (j=0; j<=reader->nbuffer; j++)
+        buffer_t *buf = &maux->buf[i];
+
+        if ( gaux && gaux[i].active )
         {
-            bcf1_t *line = reader->buffer[j];
+            gaux[i].line->d.allele[0][0] = ref;
+            gaux[i].line->pos = maux->pos;
+        }
+
+        for (j=buf->beg; j<buf->end; j++)
+        {
+            if ( buf->rec[j].skip & SKIP_DONE ) continue;
+
+            bcf1_t *line = buf->lines[j]; // ptr to reader's buffer or gvcf buffer
+
             int line_type = bcf_get_variant_types(line);
+            line_type = line_type ? line_type<<1 : 1;
+
             // select relevant lines
-            maux->d[i][j].skip = SKIP_DIFF;
-            if ( pos!=line->pos )
-            {
-                if ( j==0 ) maux->d[i][j].skip |= SKIP_DONE; // left from previous run, force to ignore
-                continue;
-            }
             if ( args->merge_by_id )
             {
                 if ( strcmp(id,line->d.id) ) continue;
@@ -1733,30 +2068,30 @@ void merge_buffer(args_t *args)
                 {
                     // All alleles of the tested record must be present in the
                     // selected maux record plus variant types must be the same
-                    if ( var_type!=line->d.var_type ) continue;
+                    if ( (maux->var_types & line_type) != line_type ) continue;
                     if ( vcmp_set_ref(args->vcmp,maux->als[0],line->d.allele[0]) < 0 ) continue;   // refs not compatible
                     for (k=1; k<line->n_allele; k++)
                     {
                         if ( vcmp_find_allele(args->vcmp,maux->als+1,maux->nals-1,line->d.allele[k])>=0 ) break;
                     }
-                    if ( k==line->n_allele ) continue;  // no matching allele
+                    if ( !(line_type&ref_mask) && k==line->n_allele ) continue;  // not a REF-only site and there is no matching allele
                 }
                 if ( !(args->collapse&COLLAPSE_ANY) )
                 {
-                    int compatible = 0;
-                    if ( line_type==var_type ) compatible = 1;
-                    else if ( line_type==VCF_REF ) compatible = 1;   // REF can go with anything
-                    else if ( var_type&VCF_SNP && line_type&VCF_SNP ) compatible = 1;
-                    else if ( var_type&VCF_INDEL && line_type&VCF_INDEL ) compatible = 1;
-                    else if ( var_type&VCF_MNP && line_type&VCF_MNP ) compatible = 1;
-                    else if ( var_type&VCF_SNP && line_type&VCF_MNP ) compatible = 1;
-                    else if ( var_type&VCF_MNP && line_type&VCF_SNP ) compatible = 1;
-                    if ( !compatible ) continue;
+                    // Merge:
+                    //  - SNPs+SNPs+MNPs+REF if -m both,snps
+                    //  - indels+indels+REF  if -m both,indels, REF only if SNPs are not present
+                    //  - SNPs come first
+                    if ( line_type & indel_mask )
+                    {
+                        if ( !(line_type&snp_mask) && maux->var_types&snp_mask ) continue;  // SNPs come first
+                        if ( args->do_gvcf && maux->var_types&ref_mask ) continue;  // never merge indels with gVCF blocks
+                    }
                 }
             }
-            maux->d[i][j].skip = 0;
+            buf->rec[j].skip = 0;
 
-            hts_expand(int, line->n_allele, maux->d[i][j].mmap, maux->d[i][j].map);
+            hts_expand(int, line->n_allele, buf->rec[j].mmap, buf->rec[j].map);
             if ( !maux->nals )    // first record, copy the alleles to the output
             {
                 maux->nals = line->n_allele;
@@ -1764,111 +2099,118 @@ void merge_buffer(args_t *args)
                 hts_expand0(int, maux->nals, maux->ncnt, maux->cnt);
                 for (k=0; k<maux->nals; k++)
                 {
+                    free(maux->als[k]);
                     maux->als[k] = strdup(line->d.allele[k]);
-                    maux->d[i][j].map[k] = k;
+                    buf->rec[j].map[k] = k;
                     maux->cnt[k] = 1;
                 }
-                pos = line->pos;
                 continue;
             }
-
             // normalize alleles
-            maux->als = merge_alleles(line->d.allele, line->n_allele, maux->d[i][j].map, maux->als, &maux->nals, &maux->mals);
-            if ( !maux->als ) error("Failed to merge alleles at %s:%d in %s\n",bcf_seqname(bcf_sr_get_header(args->files,j),line),line->pos+1,reader->fname);
+            maux->als = merge_alleles(line->d.allele, line->n_allele, buf->rec[j].map, maux->als, &maux->nals, &maux->mals);
+            if ( !maux->als ) error("Failed to merge alleles at %s:%d in %s\n",bcf_seqname(args->out_hdr,line),line->pos+1,reader->fname);
             hts_expand0(int, maux->nals, maux->ncnt, maux->cnt);
             for (k=1; k<line->n_allele; k++)
-                maux->cnt[ maux->d[i][j].map[k] ]++;    // how many times an allele appears in the files
+                maux->cnt[ buf->rec[j].map[k] ]++;    // how many times an allele appears in the files
             maux->cnt[0]++;
         }
     }
+    return 1;
+}
 
-    // debug_maux(args, pos, var_type);
+/*
+   Select records that have the same alleles; the input ordering of indels
+   must not matter. Multiple VCF lines can be emitted from this loop.
+   We expect only very few alleles and not many records with the same
+   position in the buffers, therefore the nested loops should not slow us
+   much.
+*/
+void stage_line(args_t *args)
+{
+    int snp_mask = (VCF_SNP<<1)|(VCF_MNP<<1), indel_mask = VCF_INDEL<<1, ref_mask = 1;
+    bcf_srs_t *files = args->files;
+    maux_t *maux = args->maux;
 
-    // Select records that have the same alleles; the input ordering of indels
-    // must not matter. Multiple VCF lines can be emitted from this loop.
-    // We expect only very few alleles and not many records with the same
-    // position in the buffers, therefore the nested loops should not slow us
-    // much.
-    while (1)
+    // debug_maux(args);
+
+    // take the most frequent allele present in multiple files, REF is skipped
+    int i,j,k,icnt = 1;
+    for (i=2; i<maux->nals; i++)
+        if ( maux->cnt[i] > maux->cnt[icnt] ) icnt = i;
+
+    int nout = 0;
+    for (i=0; i<files->nreaders; i++)
     {
-        // take the most frequent allele present in multiple files
-        int icnt = 0;
-        for (i=1; i<maux->nals; i++)
-            if ( maux->cnt[i] > maux->cnt[icnt] ) icnt = i;
-        if ( maux->cnt[icnt]<0 ) break;
+        buffer_t *buf = &maux->buf[i];
+        buf->cur = -1;
+        if ( buf->beg >= buf->end ) continue;   // no lines in the buffer
 
-        int nmask = 0;
-        for (i=0; i<files->nreaders; i++)
+        // find lines with the same allele
+        for (j=buf->beg; j<buf->end; j++)
         {
-            maux->has_line[i] = 0;
+            if ( buf->rec[j].skip ) continue;   // done or not compatible
+            if ( args->merge_by_id ) break;
+            if ( maux->nals==1 && buf->lines[j]->n_allele==1 ) break;   // REF-only record
 
-            bcf_sr_t *reader = &files->readers[i];
-            if ( !reader->buffer ) continue;
+            for (k=0; k<buf->lines[j]->n_allele; k++)
+                if ( icnt==buf->rec[j].map[k] ) break;
 
-            // find lines with the same allele
-            int j;
-            for (j=0; j<=reader->nbuffer; j++)
-            {
-                if ( maux->d[i][j].skip ) continue;
-                int k;
-                for (k=0; k<reader->buffer[j]->n_allele; k++)
-                    if ( icnt==maux->d[i][j].map[k] ) break;
-                if ( k<reader->buffer[j]->n_allele ) break;
-            }
-            if ( j>reader->nbuffer )
-            {
-                // no matching allele found in this file
-                if ( args->collapse==COLLAPSE_NONE ) continue;
+            if ( k<buf->lines[j]->n_allele ) break;
+        }
+        if ( j>=buf->end )
+        {
+            // no matching allele found in this file
+            if ( args->collapse==COLLAPSE_NONE ) continue;
 
-                for (j=0; j<=reader->nbuffer; j++)
+            for (j=buf->beg; j<buf->end; j++)
+            {
+                if ( buf->rec[j].skip ) continue;   // done or not compatible
+                if ( args->collapse&COLLAPSE_ANY ) break;   // anything can be merged
+                int line_type = bcf_get_variant_types(buf->lines[j]);
+                if ( maux->var_types&snp_mask && line_type&VCF_SNP && (args->collapse&COLLAPSE_SNPS) ) break;
+                if ( maux->var_types&indel_mask && line_type&VCF_INDEL && (args->collapse&COLLAPSE_INDELS) ) break;
+                if ( line_type==VCF_REF )
                 {
-                    if ( maux->d[i][j].skip ) continue;
-                    if ( args->collapse&COLLAPSE_ANY ) break;
-                    int line_type = bcf_get_variant_types(reader->buffer[j]);
-                    if ( var_type&VCF_SNP && line_type&VCF_SNP && (args->collapse&COLLAPSE_SNPS) ) break;
-                    if ( var_type&VCF_INDEL && line_type&VCF_INDEL && (args->collapse&COLLAPSE_INDELS) ) break;
-                    if ( line_type==VCF_REF )
-                    {
-                        if ( var_type&VCF_SNP && (args->collapse&COLLAPSE_SNPS) ) break;
-                        if ( var_type&VCF_INDEL && (args->collapse&COLLAPSE_INDELS) ) break;
-                    }
-                    else if ( var_type==VCF_REF )
-                    {
-                        if ( line_type&VCF_SNP && (args->collapse&COLLAPSE_SNPS) ) break;
-                        if ( line_type&VCF_INDEL && (args->collapse&COLLAPSE_INDELS) ) break;
-                    }
+                    if ( maux->var_types&snp_mask && (args->collapse&COLLAPSE_SNPS) ) break;
+                    if ( maux->var_types&indel_mask && (args->collapse&COLLAPSE_INDELS) ) break;
+                    if ( maux->var_types&ref_mask ) break;
                 }
-            }
-            if ( j<=reader->nbuffer )
-            {
-                // found a suitable line for merging, place it at the beggining
-                if ( j>0 )
+                else if ( maux->var_types&ref_mask )
                 {
-                    SWAP(bcf1_t*, reader->buffer[0], reader->buffer[j]);
-                    SWAP(maux1_t, maux->d[i][0], maux->d[i][j]);
+                    if ( line_type&snp_mask && (args->collapse&COLLAPSE_SNPS) ) break;
+                    if ( line_type&indel_mask && (args->collapse&COLLAPSE_INDELS) ) break;
                 }
-                // mark as finished so that it's ignored next time
-                maux->d[i][0].skip |= SKIP_DONE;
-                maux->has_line[i] = 1;
-                nmask++;
             }
         }
-        if ( !nmask ) break;    // done, no more lines suitable for merging found
-        merge_line(args);       // merge and output the line
-        maux->cnt[icnt] = -1;   // do not pick this allele again, mark it as finished
+        if ( j<buf->end )
+        {
+            // found a suitable line for merging
+            buf->cur = j;
+
+            // mark as finished so that it's ignored next time
+            buf->rec[j].skip  = SKIP_DONE;
+            nout++;
+        }
     }
+    assert( nout );
+}
 
-    // clean the alleles
-    for (i=0; i<maux->nals; i++)
+void merge_line(args_t *args)
+{
+    if ( args->regs )
     {
-        free(maux->als[i]);
-        maux->als[i] = 0;
+        if ( !regidx_overlap(args->regs,args->maux->chr,args->maux->pos,args->maux->pos,NULL) ) return;
     }
-    maux->nals = 0;
 
-    // get the buffers ready for the next next_line() call
-    for (i=0; i<files->nreaders; i++)
-        shake_buffer(maux, i, pos);
+    bcf1_t *out = args->out_line;
+    merge_chrom2qual(args, out);
+    merge_filter(args, out);
+    merge_info(args, out);
+    if ( args->do_gvcf )
+        bcf_update_info_int32(args->out_hdr, out, "END", NULL, 0);
+    merge_format(args, out);
+    bcf_write1(args->out_fh, args->out_hdr, out);
+    bcf_clear1(out);
 }
 
 void bcf_hdr_append_version(bcf_hdr_t *hdr, int argc, char **argv, const char *cmd)
@@ -1887,6 +2229,8 @@ void bcf_hdr_append_version(bcf_hdr_t *hdr, int argc, char **argv, const char *c
         else
             ksprintf(&str, " %s", argv[i]);
     }
+    kputs("; Date=", &str);
+    time_t tm; time(&tm); kputs(ctime(&tm), &str);
     kputc('\n', &str);
     bcf_hdr_append(hdr,str.s);
     free(str.s);
@@ -1898,7 +2242,7 @@ void merge_vcf(args_t *args)
 {
     args->out_fh  = hts_open(args->output_fname, hts_bcf_wmode(args->output_type));
     if ( args->out_fh == NULL ) error("Can't write to \"%s\": %s\n", args->output_fname, strerror(errno));
-    if ( args->n_threads ) hts_set_threads(args->out_fh, args->n_threads);
+    if ( args->n_threads ) hts_set_opt(args->out_fh, HTS_OPT_THREAD_POOL, args->files->p); //hts_set_threads(args->out_fh, args->n_threads);
     args->out_hdr = bcf_hdr_init("w");
 
     if ( args->header_fname )
@@ -1928,14 +2272,32 @@ void merge_vcf(args_t *args)
     }
 
     if ( args->collapse==COLLAPSE_NONE ) args->vcmp = vcmp_init();
-    args->maux = maux_init(args->files);
+    args->maux = maux_init(args);
     args->out_line = bcf_init1();
     args->tmph = kh_init(strdict);
-    int ret;
-    while ( (ret=bcf_sr_next_line(args->files)) )
+
+    while ( bcf_sr_next_line(args->files) )
     {
-        merge_buffer(args);
+        // output cached gVCF blocks which end before the new record
+        if ( args->do_gvcf )
+            gvcf_flush(args,0);
+
+        maux_reset(args->maux);
+
+        // determine which of the new records are gvcf blocks
+        if ( args->do_gvcf )
+            gvcf_stage(args, args->maux->pos);
+
+        while ( can_merge(args) )
+        {
+            stage_line(args);
+            merge_line(args);
+        }
+        clean_buffer(args);
     }
+    if ( args->do_gvcf )
+        gvcf_flush(args,1);
+
     info_rules_destroy(args);
     maux_destroy(args->maux);
     bcf_hdr_destroy(args->out_hdr);
@@ -1958,7 +2320,10 @@ static void usage(void)
     fprintf(stderr, "        --force-samples                resolve duplicate sample names\n");
     fprintf(stderr, "        --print-header                 print only the merged header and exit\n");
     fprintf(stderr, "        --use-header <file>            use the provided header\n");
+    fprintf(stderr, "    -0  --missing-to-ref               assume genotypes at missing sites are 0/0\n");
     fprintf(stderr, "    -f, --apply-filters <list>         require at least one of the listed FILTER strings (e.g. \"PASS,.\")\n");
+    fprintf(stderr, "    -F, --filter-logic <x|+>           remove filters if some input is PASS (\"x\"), or apply all filters (\"+\") [+]\n");
+    fprintf(stderr, "    -g, --gvcf <-|ref.fa>              merge gVCF blocks, INFO/END tag is expected. Implies -i QS:sum,MinDP:min,I16:sum,IDV:max,IMF:max\n");
     fprintf(stderr, "    -i, --info-rules <tag:method,..>   rules for merging INFO fields (method is one of sum,avg,min,max,join) or \"-\" to turn off the default [DP:sum,DP4:sum]\n");
     fprintf(stderr, "    -l, --file-list <file>             read file names from the file\n");
     fprintf(stderr, "    -m, --merge <string>               allow multiallelic records for <snps|indels|both|all|none|id>, see man page for details [both]\n");
@@ -1989,7 +2354,9 @@ int main_vcfmerge(int argc, char *argv[])
     {
         {"help",no_argument,NULL,'h'},
         {"merge",required_argument,NULL,'m'},
+        {"gvcf",required_argument,NULL,'g'},
         {"file-list",required_argument,NULL,'l'},
+        {"missing-to-ref",no_argument,NULL,'0'},
         {"apply-filters",required_argument,NULL,'f'},
         {"use-header",required_argument,NULL,1},
         {"print-header",no_argument,NULL,2},
@@ -2001,10 +2368,25 @@ int main_vcfmerge(int argc, char *argv[])
         {"regions-file",required_argument,NULL,'R'},
         {"info-rules",required_argument,NULL,'i'},
         {"no-version",no_argument,NULL,8},
+        {"filter-logic",required_argument,NULL,'F'},
         {NULL,0,NULL,0}
     };
-    while ((c = getopt_long(argc, argv, "hm:f:r:R:o:O:i:l:",loptions,NULL)) >= 0) {
+    while ((c = getopt_long(argc, argv, "hm:f:r:R:o:O:i:l:g:F:0",loptions,NULL)) >= 0) {
         switch (c) {
+            case 'F': 
+                if ( !strcmp(optarg,"+") ) args->filter_logic = FLT_LOGIC_ADD;
+                else if ( !strcmp(optarg,"x") ) args->filter_logic = FLT_LOGIC_REMOVE;
+                else error("Filter logic not recognised: %s\n", optarg);
+                break;
+            case '0': args->missing_to_ref = 1; break;
+            case 'g':
+                args->do_gvcf = 1;
+                if ( strcmp("-",optarg) )
+                {
+                    args->gvcf_fai = fai_load(optarg);
+                    if ( !args->gvcf_fai ) error("Failed to load the fai index: %s\n", optarg);
+                }
+                break;
             case 'l': args->file_list = optarg; break;
             case 'i': args->info_rules = optarg; break;
             case 'o': args->output_fname = optarg; break;
@@ -2045,9 +2427,23 @@ int main_vcfmerge(int argc, char *argv[])
     if ( argc-optind<2 && !args->file_list ) usage();
 
     args->files->require_index = 1;
-    if ( args->regions_list && bcf_sr_set_regions(args->files, args->regions_list, regions_is_file)<0 )
-        error("Failed to read the regions: %s\n", args->regions_list);
+    if ( args->regions_list )
+    {
+        if ( bcf_sr_set_regions(args->files, args->regions_list, regions_is_file)<0 )
+            error("Failed to read the regions: %s\n", args->regions_list);
+        if ( regions_is_file )
+            args->regs = regidx_init(args->regions_list,NULL,NULL,sizeof(char*),NULL);
+        else
+        {
+            args->regs = regidx_init(NULL,regidx_parse_reg,NULL,sizeof(char*),NULL);
+            if ( regidx_insert_list(args->regs,args->regions_list,',') !=0 ) error("Could not parse the regions: %s\n", args->regions_list);
+            regidx_insert(args->regs,NULL);
+        }
+        if ( !args->regs ) error("Could not parse the regions: %s\n", args->regions_list);
+        args->regs_itr = regitr_init(args->regs);
+    }
 
+    if ( bcf_sr_set_threads(args->files, args->n_threads)<0 ) error("Failed to create threads\n");
     while (optind<argc)
     {
         if ( !bcf_sr_add_reader(args->files, argv[optind]) ) error("Failed to open %s: %s\n", argv[optind],bcf_sr_strerror(args->files->errnum));
@@ -2065,6 +2461,9 @@ int main_vcfmerge(int argc, char *argv[])
     }
     merge_vcf(args);
     bcf_sr_destroy(args->files);
+    if ( args->regs ) regidx_destroy(args->regs);
+    if ( args->regs_itr ) regitr_destroy(args->regs_itr);
+    if ( args->gvcf_fai ) fai_destroy(args->gvcf_fai);
     free(args);
     return 0;
 }
diff --git a/bcftools/vcfmerge.c.pysam.c b/bcftools/vcfmerge.c.pysam.c
index daac458..db9aff5 100644
--- a/bcftools/vcfmerge.c.pysam.c
+++ b/bcftools/vcfmerge.c.pysam.c
@@ -2,7 +2,7 @@
 
 /*  vcfmerge.c -- Merge multiple VCF/BCF files to create one multi-sample file.
 
-    Copyright (C) 2012-2014 Genome Research Ltd.
+    Copyright (C) 2012-2016 Genome Research Ltd.
 
     Author: Petr Danecek <pd3 at sanger.ac.uk>
 
@@ -26,28 +26,39 @@ THE SOFTWARE.  */
 
 #include <stdio.h>
 #include <string.h>
+#include <strings.h>
 #include <errno.h>
 #include <unistd.h>
 #include <getopt.h>
 #include <htslib/vcf.h>
 #include <htslib/synced_bcf_reader.h>
 #include <htslib/vcfutils.h>
+#include <htslib/faidx.h>
 #include <math.h>
 #include <ctype.h>
+#include <time.h>
 #include "bcftools.h"
+#include "regidx.h"
 #include "vcmp.h"
 
+#define DBG 0
+
 #include <htslib/khash.h>
 KHASH_MAP_INIT_STR(strdict, int)
 typedef khash_t(strdict) strdict_t;
 
-#define SKIP_DONE 1
-#define SKIP_DIFF 2
+#define FLT_LOGIC_ADD    0
+#define FLT_LOGIC_REMOVE 1
+
+#define SKIP_DONE 1     // the record was processed
+#define SKIP_DIFF 2     // not compatible, merge later
 
 #define IS_VL_G(hdr,id) (bcf_hdr_id2length(hdr,BCF_HL_FMT,id) == BCF_VL_G)
 #define IS_VL_A(hdr,id) (bcf_hdr_id2length(hdr,BCF_HL_FMT,id) == BCF_VL_A)
 #define IS_VL_R(hdr,id) (bcf_hdr_id2length(hdr,BCF_HL_FMT,id) == BCF_VL_R)
 
+#define SWAP(type_t,a,b) { type_t tmp = (a); (a) = (b); (b) = tmp; }
+
 // For merging INFO Number=A,G,R tags
 typedef struct
 {
@@ -65,43 +76,61 @@ typedef struct _info_rule_t
     void (*merger)(bcf_hdr_t *hdr, bcf1_t *line, struct _info_rule_t *rule);
     int type;           // one of BCF_HT_*
     int block_size;     // number of values in a block
+    int type_size;      // size of the corresponding BCF_HT_* type
     int nblocks;        // number of blocks in nvals (the number of merged files)
     int nvals, mvals;   // used and total size of vals array
     void *vals;         // the info tag values
 }
 info_rule_t;
 
+typedef struct
+{
+    bcf1_t *line;
+    int end, active;
+}
+gvcf_aux_t;
+
 // Auxiliary merge data for selecting the right combination
 //  of buffered records across multiple readers. maux1_t
 //  corresponds to one buffered line.
 typedef struct
 {
     int skip;
-    int *map;   // mapping from input alleles to the output array
+    int *map;   // mapping from input alleles to the array of output alleles (set by merge_alleles)
     int mmap;   // size of map array (only buffer[i].n_allele is actually used)
     int als_differ;
 }
 maux1_t;
 typedef struct
 {
-    int n;  // number of readers
+    int rid;        // current rid
+    int beg,end;    // valid ranges in reader's buffer [beg,end). Maintained by maux_reset and gvcf_flush.
+    int cur;        // current line or -1 if none
+    int npos;       // number of unprocessed lines at this position
+    int mrec;       // allocated size of buf
+    maux1_t *rec;   // buffer to keep reader's lines
+    bcf1_t **lines; // source buffer: either gvcf or readers' buffer
+}
+buffer_t;
+typedef struct
+{
+    int n, pos, var_types;  // number of readers, current position, currently available variant types
+    char *chr;              // current chromosome
     char **als, **out_als;  // merged alleles (temp, may contain empty records) and merged alleles ready for output
     int nals, mals, nout_als, mout_als; // size of the output array
     int *cnt, ncnt; // number of records that refer to the alleles
-    int *nbuf;      // readers have buffers of varying lengths
     int *smpl_ploidy, *smpl_nGsize; // ploidy and derived number of values in Number=G tags, updated for each line (todo: cache for missing cases)
-    int *flt, mflt, minf;
-    bcf_info_t *inf;// out_line's INFO fields
     bcf_fmt_t **fmt_map; // i-th output FORMAT field corresponds in j-th reader to i*nreader+j, first row is reserved for GT
     int nfmt_map;        // number of rows in the fmt_map array
     int *agr_map, nagr_map, magr_map;   // mapping between Number=AGR element indexes
     void *tmp_arr;
     int ntmp_arr;
-    maux1_t **d;    // d[i][j] i-th reader, j-th buffer line
+    buffer_t *buf;
     AGR_info_t *AGR_info;
     int nAGR_info, mAGR_info;
     bcf_srs_t *files;
-    int *has_line;  // which files are being merged
+    int gvcf_min, gvcf_break;   // min buffered gvcf END position (NB: gvcf_min is 1-based) or 0 if no active lines are present
+    gvcf_aux_t *gvcf;           // buffer of gVCF lines
 }
 maux_t;
 
@@ -109,8 +138,11 @@ typedef struct
 {
     vcmp_t *vcmp;
     maux_t *maux;
-    int header_only, collapse, output_type, force_samples, merge_by_id;
+    regidx_t *regs;    // apply regions only after the blocks are expanded
+    regitr_t *regs_itr;
+    int header_only, collapse, output_type, force_samples, merge_by_id, do_gvcf, filter_logic, missing_to_ref;
     char *header_fname, *output_fname, *regions_list, *info_rules, *file_list;
+    faidx_t *gvcf_fai;
     info_rule_t *rules;
     int nrules;
     strdict_t *tmph;
@@ -124,6 +156,14 @@ typedef struct
 }
 args_t;
 
+static bcf1_t *maux_get_line(args_t *args, int i)
+{
+    maux_t *ma = args->maux;
+    int ibuf = ma->buf[i].cur;
+    if ( ibuf >= 0 ) return ma->buf[i].lines[ibuf];
+    return NULL;
+}
+
 static void info_rules_merge_sum(bcf_hdr_t *hdr, bcf1_t *line, info_rule_t *rule)
 {
     if ( !rule->nvals ) return;
@@ -249,6 +289,32 @@ static void info_rules_init(args_t *args)
             if ( str.l ) kputc(',',&str);
             kputs("DP4:sum",&str);
         }
+        if ( args->do_gvcf && bcf_hdr_idinfo_exists(args->out_hdr,BCF_HL_INFO,bcf_hdr_id2int(args->out_hdr, BCF_DT_ID, "QS")) )
+        {
+            if ( str.l ) kputc(',',&str);
+            kputs("QS:sum",&str);
+        }
+        if ( args->do_gvcf && bcf_hdr_idinfo_exists(args->out_hdr,BCF_HL_INFO,bcf_hdr_id2int(args->out_hdr, BCF_DT_ID, "MinDP")) )
+        {
+            if ( str.l ) kputc(',',&str);
+            kputs("MinDP:min",&str);
+        }
+        if ( args->do_gvcf && bcf_hdr_idinfo_exists(args->out_hdr,BCF_HL_INFO,bcf_hdr_id2int(args->out_hdr, BCF_DT_ID, "I16")) )
+        {
+            if ( str.l ) kputc(',',&str);
+            kputs("I16:sum",&str);
+        }
+        if ( args->do_gvcf && bcf_hdr_idinfo_exists(args->out_hdr,BCF_HL_INFO,bcf_hdr_id2int(args->out_hdr, BCF_DT_ID, "IDV")) )
+        {
+            if ( str.l ) kputc(',',&str);
+            kputs("IDV:max",&str);
+        }
+        if ( args->do_gvcf && bcf_hdr_idinfo_exists(args->out_hdr,BCF_HL_INFO,bcf_hdr_id2int(args->out_hdr, BCF_DT_ID, "IMF")) )
+        {
+            if ( str.l ) kputc(',',&str);
+            kputs("IMF:max",&str);
+        }
+
         if ( !str.l ) return;
         args->info_rules = str.s;
     }
@@ -274,9 +340,12 @@ static void info_rules_init(args_t *args)
         int id = bcf_hdr_id2int(args->out_hdr, BCF_DT_ID, rule->hdr_tag);
         if ( !bcf_hdr_idinfo_exists(args->out_hdr,BCF_HL_INFO,id) ) error("The tag is not defined in the header: \"%s\"\n", rule->hdr_tag);
         rule->type = bcf_hdr_id2type(args->out_hdr,BCF_HL_INFO,id);
-        if ( rule->type!=BCF_HT_INT && rule->type!=BCF_HT_REAL && rule->type!=BCF_HT_STR ) error("The type is not supported: \"%s\"\n", rule->hdr_tag);
+        if ( rule->type==BCF_HT_INT ) rule->type_size = sizeof(int32_t);
+        else if ( rule->type==BCF_HT_REAL ) rule->type_size = sizeof(float);
+        else if ( rule->type==BCF_HT_STR ) rule->type_size = sizeof(char); 
+        else error("The type is not supported: \"%s\"\n", rule->hdr_tag);
 
-        while ( *ss ) ss++; ss++;
+        ss = strchr(ss, '\0'); ss++;
         if ( !*ss ) error("Could not parse INFO rules, missing logic of \"%s\"\n", rule->hdr_tag);
 
         int is_join = 0;
@@ -302,7 +371,8 @@ static void info_rules_init(args_t *args)
                 error("Only fixed-length vectors are supported with -i %s:%s\n", ss, rule->hdr_tag);
         }
 
-        while ( *ss ) ss++; ss++; n++;
+        ss = strchr(ss, '\0'); ss++;
+        n++;
     }
     free(str.s);
     free(tmp);
@@ -328,8 +398,10 @@ static void info_rules_reset(args_t *args)
 }
 static int info_rules_add_values(args_t *args, bcf_hdr_t *hdr, bcf1_t *line, info_rule_t *rule, maux1_t *als, int var_len)
 {
-    int ret = bcf_get_info_values(hdr, line, rule->hdr_tag, &args->maux->tmp_arr, &args->maux->ntmp_arr, rule->type);
+    int msize = args->maux->ntmp_arr / rule->type_size;
+    int ret = bcf_get_info_values(hdr, line, rule->hdr_tag, &args->maux->tmp_arr, &msize, rule->type);
     if ( ret<=0 ) error("FIXME: error parsing %s at %s:%d .. %d\n", rule->hdr_tag,bcf_seqname(hdr,line),line->pos+1,ret);
+    args->maux->ntmp_arr = msize * rule->type_size;
 
     rule->nblocks++;
 
@@ -347,7 +419,7 @@ static int info_rules_add_values(args_t *args, bcf_hdr_t *hdr, bcf1_t *line, inf
     int i, j;
     if ( var_len==BCF_VL_A )
     {
-        assert( ret==line->n_allele-1 );
+        if ( ret!=line->n_allele-1 ) error("Wrong number of %s fields at %s:%d\n",rule->hdr_tag,bcf_seqname(hdr,line),line->pos+1);
         args->maux->nagr_map = ret;
         hts_expand(int,args->maux->nagr_map,args->maux->magr_map,args->maux->agr_map);
         // create mapping from source file ALT indexes to dst file indexes
@@ -356,7 +428,7 @@ static int info_rules_add_values(args_t *args, bcf_hdr_t *hdr, bcf1_t *line, inf
     }
     else if ( var_len==BCF_VL_R )
     {
-        assert( ret==line->n_allele );
+        if ( ret!=line->n_allele ) error("Wrong number of %s fields at %s:%d\n",rule->hdr_tag,bcf_seqname(hdr,line),line->pos+1);
         args->maux->nagr_map = ret;
         hts_expand(int,args->maux->nagr_map,args->maux->magr_map,args->maux->agr_map);
         for (i=0; i<ret; i++) args->maux->agr_map[i] = als->map[i];
@@ -558,6 +630,8 @@ char **merge_alleles(char **a, int na, int *map, char **b, int *nb, int *mb)
     {
         for (i=0; i<*nb; i++)
         {
+            if ( b[i][0]=='<' ) continue;   // symbolic allele, do not modify
+            if ( b[i][0]=='*' ) continue;   // overlapping deletion (*), do not modify
             int l = strlen(b[i]);
             b[i] = (char*) realloc(b[i],l+rla-rlb+1);
             memcpy(b[i]+l,a[0]+rlb,rla-rlb+1);
@@ -567,13 +641,15 @@ char **merge_alleles(char **a, int na, int *map, char **b, int *nb, int *mb)
     // now check if the $a alleles are present and if not add them
     for (i=1; i<na; i++)
     {
+        int const_ai = 1;
         char *ai;
-        if ( rlb>rla )  // $a alleles need expanding
+        if ( rlb>rla && a[i][0]!='<' && a[i][0]!='*' )  // $a alleles need expanding and not a symbolic allele or *
         {
             int l = strlen(a[i]);
             ai = (char*) malloc(l+rlb-rla+1);
             memcpy(ai,a[i],l);
             memcpy(ai+l,b[0]+rla,rlb-rla+1);
+            const_ai = 0;
         }
         else
             ai = a[i];
@@ -584,42 +660,59 @@ char **merge_alleles(char **a, int na, int *map, char **b, int *nb, int *mb)
         if ( j<*nb ) // $b already has the same allele
         {
             map[i] = j;
-            if ( rlb>rla ) free(ai);
+            if ( !const_ai ) free(ai);
             continue;
         }
         // new allele
         map[i] = *nb;
-        b[*nb] = rlb>rla ? ai : strdup(ai);
+        if ( b[*nb] ) free(b[*nb]);
+        b[*nb] = const_ai ? strdup(ai) : ai;
         (*nb)++;
     }
     return b;
 }
 
-maux_t *maux_init(bcf_srs_t *files)
+maux_t *maux_init(args_t *args)
 {
+    bcf_srs_t *files = args->files;
     maux_t *ma = (maux_t*) calloc(1,sizeof(maux_t));
     ma->n      = files->nreaders;
-    ma->nbuf   = (int *) calloc(ma->n,sizeof(int));
-    ma->d      = (maux1_t**) calloc(ma->n,sizeof(maux1_t*));
     ma->files  = files;
     int i, n_smpl = 0;
     for (i=0; i<ma->n; i++)
         n_smpl += bcf_hdr_nsamples(files->readers[i].header);
+    if ( args->do_gvcf )
+    {
+        ma->gvcf = (gvcf_aux_t*) calloc(ma->n,sizeof(gvcf_aux_t));
+        for (i=0; i<ma->n; i++)
+            ma->gvcf[i].line = bcf_init1();
+    }
     ma->smpl_ploidy = (int*) calloc(n_smpl,sizeof(int));
     ma->smpl_nGsize = (int*) malloc(n_smpl*sizeof(int));
-    ma->has_line = (int*) malloc(ma->n*sizeof(int));
+    ma->buf = (buffer_t*) calloc(ma->n,sizeof(buffer_t));
+    for (i=0; i<ma->n; i++)
+        ma->buf[i].rid = -1;
     return ma;
 }
 void maux_destroy(maux_t *ma)
 {
-    int i;
+    int i,j;
+    for (i=0; i<ma->mals; i++)
+    {
+        free(ma->als[i]);
+        ma->als[i] = NULL;
+    }
     for (i=0; i<ma->n; i++) // for each reader
     {
-        if ( !ma->d[i] ) continue;
-        int j;
-        for (j=0; j<ma->nbuf[i]; j++)  // for each buffered line
-            if ( ma->d[i][j].map ) free(ma->d[i][j].map);
-        free(ma->d[i]);
+        for (j=0; j<ma->buf[i].mrec; j++)  // for each buffered line
+            free(ma->buf[i].rec[j].map);
+        free(ma->buf[i].rec);
+    }
+    free(ma->buf);
+    if ( ma->gvcf )
+    {
+        for (i=0; i<ma->n; i++) bcf_destroy(ma->gvcf[i].line);
+        free(ma->gvcf);
     }
     for (i=0; i<ma->mAGR_info; i++)
         free(ma->AGR_info[i].buf);
@@ -628,32 +721,69 @@ void maux_destroy(maux_t *ma)
     if (ma->ntmp_arr) free(ma->tmp_arr);
     if (ma->nfmt_map) free(ma->fmt_map);
     // ma->inf freed in bcf_destroy1
-    free(ma->d);
-    free(ma->nbuf);
     for (i=0; i<ma->mals; i++) free(ma->als[i]);
     if (ma->mout_als) free(ma->out_als);
     free(ma->als);
     free(ma->cnt);
     free(ma->smpl_ploidy);
     free(ma->smpl_nGsize);
-    free(ma->has_line);
+    free(ma->chr);
     free(ma);
 }
-void maux_expand1(maux_t *ma, int i)
+void maux_expand1(buffer_t *buf, int size)
 {
-    if ( ma->nbuf[i] <= ma->files->readers[i].nbuffer )
+    if ( buf->mrec < size )
     {
-        int n = ma->files->readers[i].nbuffer + 1;
-        ma->d[i] = (maux1_t*) realloc(ma->d[i], sizeof(maux1_t)*n);
-        memset(ma->d[i]+ma->nbuf[i],0,sizeof(maux1_t)*(n-ma->nbuf[i]));
-        ma->nbuf[i] = n;
+        hts_expand0(maux1_t,size,buf->mrec,buf->rec);
+        buf->mrec = size;
     }
 }
 void maux_reset(maux_t *ma)
 {
-    int i;
-    for (i=0; i<ma->n; i++) maux_expand1(ma, i);
-    for (i=1; i<ma->ncnt; i++) ma->cnt[i] = 0;
+    int i,j;
+    for (i=0; i<ma->n; i++) maux_expand1(&ma->buf[i],ma->files->readers[i].nbuffer+1);
+    for (i=0; i<ma->ncnt; i++) ma->cnt[i] = 0;
+    for (i=0; i<ma->mals; i++)
+    {
+        free(ma->als[i]);
+        ma->als[i] = NULL;
+    }
+    const char *chr = NULL;
+    ma->nals  = 0;
+    ma->pos   = -1;
+    for (i=0; i<ma->n; i++)
+    {
+        if ( !bcf_sr_has_line(ma->files,i) ) continue;
+        bcf1_t *line = bcf_sr_get_line(ma->files,i);
+        bcf_hdr_t *hdr = bcf_sr_get_header(ma->files,i);
+        chr = bcf_seqname(hdr,line);
+        ma->pos = line->pos;
+        break;
+    }
+    if ( chr )
+    {
+        free(ma->chr);
+        ma->chr = strdup(chr);
+    }
+    for (i=0; i<ma->n; i++)
+    {
+        bcf_hdr_t *hdr = bcf_sr_get_header(ma->files,i);
+        ma->buf[i].rid = bcf_hdr_name2id(hdr,chr);
+        ma->buf[i].beg = bcf_sr_has_line(ma->files,i) ? 0 : 1;
+        for (j=ma->buf[i].beg; j<=ma->files->readers[i].nbuffer; j++)
+        {
+            ma->buf[i].rec[j].skip = 0;
+            bcf1_t *line = ma->files->readers[i].buffer[j];
+            if ( line->rid!=ma->buf[i].rid || line->pos!=ma->pos ) break;
+        }
+        ma->buf[i].end = j;
+        ma->buf[i].cur = -1;
+        if ( ma->buf[i].beg < ma->buf[i].end ) 
+        {
+            ma->buf[i].lines = ma->files->readers[i].buffer;
+            if ( ma->gvcf ) ma->gvcf[i].active = 0;     // gvcf block cannot overlap with the next record
+        }
+    }
 }
 void maux_debug(maux_t *ma, int ir, int ib)
 {
@@ -686,16 +816,20 @@ void merge_chrom2qual(args_t *args, bcf1_t *out)
     out->pos = -1;
     for (i=0; i<files->nreaders; i++)
     {
-        if ( !ma->has_line[i] ) continue;
+        bcf1_t *line = maux_get_line(args, i);
+        if ( !line ) continue;
+        bcf_unpack(line, BCF_UN_ALL);
 
         bcf_sr_t *reader = &files->readers[i];
-        bcf1_t *line = reader->buffer[0];
         bcf_hdr_t *hdr = reader->header;
 
-        // alleles
+        // not all maux alleles are always used, mark the ones we'll need
         int j;
         for (j=1; j<line->n_allele; j++)
-            al_idxs[ ma->d[i][0].map[j] ] = 1;
+        {
+            int irec = ma->buf[i].cur;
+            al_idxs[ ma->buf[i].rec[irec].map[j] ] = 1;
+        }
 
         // position
         if ( out->pos==-1 )
@@ -719,16 +853,15 @@ void merge_chrom2qual(args_t *args, bcf1_t *out)
         }
 
         // set QUAL to the max qual value. Not exactly correct, but good enough for now
-        if ( !bcf_float_is_missing(files->readers[i].buffer[0]->qual) )
+        if ( !bcf_float_is_missing(line->qual) )
         {
-            if ( bcf_float_is_missing(out->qual) || out->qual < files->readers[i].buffer[0]->qual ) out->qual = files->readers[i].buffer[0]->qual;
+            if ( bcf_float_is_missing(out->qual) || out->qual < line->qual ) out->qual = line->qual;
         }
     }
 
     // set ID
     if ( !tmps->l ) kputs(".", tmps);
-    if ( out->d.id ) free(out->d.id);
-    out->d.id = strdup(tmps->s);
+    bcf_update_id(out_hdr, out, tmps->s);
 
     // set alleles
     ma->nout_als = 0;
@@ -742,10 +875,13 @@ void merge_chrom2qual(args_t *args, bcf1_t *out)
         int ir, j;
         for (ir=0; ir<files->nreaders; ir++)
         {
-            if ( !ma->has_line[ir] ) continue;
-            bcf1_t *line = files->readers[ir].buffer[0];
+            bcf1_t *line = maux_get_line(args,ir);
+            if ( !line ) continue;
             for (j=1; j<line->n_allele; j++)
-                if ( ma->d[ir][0].map[j]==i ) ma->d[ir][0].map[j] = ma->nout_als;
+            {
+                int irec = ma->buf[ir].cur;
+                if ( ma->buf[ir].rec[irec].map[j]==i ) ma->buf[ir].rec[irec].map[j] = ma->nout_als;
+            }
         }
     }
     // Expand the arrays and realloc the alleles string. Note that all alleles are in a single allocated block.
@@ -767,20 +903,36 @@ void merge_filter(args_t *args, bcf1_t *out)
     bcf_hdr_t *out_hdr = args->out_hdr;
 
     int i, ret;
+    if ( args->filter_logic == FLT_LOGIC_REMOVE )
+    {
+        for (i=0; i<files->nreaders; i++)
+        {
+            bcf1_t *line = maux_get_line(args, i);
+            if ( !line ) continue;
+            bcf_sr_t *reader = &files->readers[i];
+            bcf_hdr_t *hdr = reader->header;
+            if ( bcf_has_filter(hdr, line, "PASS") ) break;
+        }
+        if ( i<files->nreaders )
+        {
+            int flt_id = bcf_hdr_id2int(out_hdr, BCF_DT_ID, "PASS");
+            bcf_add_filter(out_hdr, out, flt_id);
+            return;
+        }
+    }
+
     khiter_t kitr;
     strdict_t *tmph = args->tmph;
     kh_clear(strdict, tmph);
 
-    maux_t *ma = args->maux;
     out->d.n_flt = 0;
     for (i=0; i<files->nreaders; i++)
     {
-        if ( !ma->has_line[i]) continue;
+        bcf1_t *line = maux_get_line(args, i);
+        if ( !line ) continue;
 
         bcf_sr_t *reader = &files->readers[i];
-        bcf1_t *line = reader->buffer[0];
         bcf_hdr_t *hdr = reader->header;
-        bcf_unpack(line, BCF_UN_ALL);
 
         int k;
         for (k=0; k<line->d.n_flt; k++)
@@ -791,8 +943,8 @@ void merge_filter(args_t *args, bcf1_t *out)
             {
                 int id = bcf_hdr_id2int(out_hdr, BCF_DT_ID, flt);
                 if ( id==-1 ) error("Error: The filter is not defined in the header: %s\n", flt);
-                hts_expand(int,out->d.n_flt+1,ma->mflt,ma->flt);
-                ma->flt[out->d.n_flt] = id;
+                hts_expand(int,out->d.n_flt+1,out->d.m_flt,out->d.flt);
+                out->d.flt[out->d.n_flt] = id;
                 out->d.n_flt++;
                 kh_put(strdict, tmph, flt, &ret);
             }
@@ -803,20 +955,17 @@ void merge_filter(args_t *args, bcf1_t *out)
     {
         int id = bcf_hdr_id2int(out_hdr, BCF_DT_ID, "PASS");
         for (i=0; i<out->d.n_flt; i++)
-            if ( ma->flt[i]==id ) break;
+            if ( out->d.flt[i]==id ) break;
         if ( i<out->d.n_flt )
         {
             out->d.n_flt--;
-            for (; i<out->d.n_flt; i++) ma->flt[i] = ma->flt[i+1];
+            for (; i<out->d.n_flt; i++) out->d.flt[i] = out->d.flt[i+1];
         }
     }
-    out->d.flt = ma->flt;
 }
 
 static void bcf_info_set_id(bcf1_t *line, bcf_info_t *info, int id, kstring_t *tmp_str)
 {
-    assert( !info->vptr_free );
-
     uint8_t *ptr = info->vptr - info->vptr_off;
     bcf_dec_typed_int1(ptr, &ptr);
 
@@ -835,8 +984,6 @@ static void bcf_info_set_id(bcf1_t *line, bcf_info_t *info, int id, kstring_t *t
     kputsn_(info->vptr, info->len << bcf_type_shift[info->type], tmp_str);
 
     info->vptr = (uint8_t*) tmp_str->s + info->vptr_off;
-    info->vptr_free = 1;
-    line->d.shared_dirty |= BCF1_DIRTY_INF;
     tmp_str->s = NULL;
     tmp_str->m = 0;
     tmp_str->l = 0;
@@ -1031,9 +1178,10 @@ void merge_info(args_t *args, bcf1_t *out)
     info_rules_reset(args);
     for (i=0; i<files->nreaders; i++)
     {
-        if ( !ma->has_line[i] ) continue;
+        bcf1_t *line = maux_get_line(args,i);
+        if ( !line ) continue;
+        int irec = ma->buf[i].cur;
         bcf_sr_t *reader = &files->readers[i];
-        bcf1_t *line = reader->buffer[0];
         bcf_hdr_t *hdr = reader->header;
         for (j=0; j<line->n_info; j++)
         {
@@ -1052,7 +1200,7 @@ void merge_info(args_t *args, bcf1_t *out)
                 info_rule_t *rule = (info_rule_t*) bsearch(key, args->rules, args->nrules, sizeof(*args->rules), info_rules_comp_key);
                 if ( rule )
                 {
-                    maux1_t *als = ( len==BCF_VL_A || len==BCF_VL_G || len==BCF_VL_R ) ? &ma->d[i][0] : NULL;
+                    maux1_t *als = ( len==BCF_VL_A || len==BCF_VL_G || len==BCF_VL_R ) ? &ma->buf[i].rec[irec] : NULL;
                     if ( info_rules_add_values(args, hdr, line, rule, als, len) ) continue;
                 }
             }
@@ -1063,7 +1211,7 @@ void merge_info(args_t *args, bcf1_t *out)
             {
                 if ( kitr == kh_end(tmph) )
                 {
-                    // first occurance in this reader, alloc arrays
+                    // seeing this key for the first time
                     ma->nAGR_info++;
                     hts_expand0(AGR_info_t,ma->nAGR_info,ma->mAGR_info,ma->AGR_info);
                     kitr = kh_put(strdict, tmph, key, &ret);
@@ -1081,37 +1229,36 @@ void merge_info(args_t *args, bcf1_t *out)
                 kitr = kh_get(strdict, tmph, key);
                 int idx = kh_val(tmph, kitr);
                 if ( idx<0 ) error("Error occurred while processing INFO tag \"%s\" at %s:%d\n", key,bcf_seqname(hdr,line),line->pos+1);
-                merge_AGR_info_tag(hdr, line,inf,len,&ma->d[i][0],&ma->AGR_info[idx]);
+                merge_AGR_info_tag(hdr, line,inf,len,&ma->buf[i].rec[irec],&ma->AGR_info[idx]);
                 continue;
             }
 
             if ( kitr == kh_end(tmph) )
             {
-                hts_expand0(bcf_info_t,out->n_info+1,ma->minf,ma->inf);
-                ma->inf[out->n_info].key  = id;
-                ma->inf[out->n_info].type = inf->type;
-                ma->inf[out->n_info].len  = inf->len;
-                ma->inf[out->n_info].vptr = inf->vptr;
-                ma->inf[out->n_info].v1.i = inf->v1.i;
-                ma->inf[out->n_info].v1.f = inf->v1.f;
-                ma->inf[out->n_info].vptr_off  = inf->vptr_off;
-                ma->inf[out->n_info].vptr_len  = inf->vptr_len;
-                ma->inf[out->n_info].vptr_free = inf->vptr_free;
+                // Seeing this key for the first time.  Although quite hacky,
+                // this is faster than anything else given the data structures..
+
+                hts_expand0(bcf_info_t,out->n_info+1,out->d.m_info,out->d.info);
+                out->d.info[out->n_info].key  = id;
+                out->d.info[out->n_info].type = inf->type;
+                out->d.info[out->n_info].len  = inf->len;
+                out->d.info[out->n_info].v1.i = inf->v1.i;
+                out->d.info[out->n_info].v1.f = inf->v1.f;
+                out->d.info[out->n_info].vptr_off  = inf->vptr_off;
+                out->d.info[out->n_info].vptr_len  = inf->vptr_len;
+                out->d.info[out->n_info].vptr_free = 1;
+                out->d.info[out->n_info].vptr = (uint8_t*) malloc(inf->vptr_len+inf->vptr_off); 
+                memcpy(out->d.info[out->n_info].vptr,inf->vptr-inf->vptr_off, inf->vptr_len+inf->vptr_off);
+                out->d.info[out->n_info].vptr += inf->vptr_off;
                 if ( (args->output_type & FT_BCF) && id!=bcf_hdr_id2int(hdr, BCF_DT_ID, key) )
-                {
-                    // The existing packed info cannot be reused. Change the id.
-                    // Although quite hacky, it's faster than anything else given
-                    // the data structures
-                    bcf_info_set_id(out, &ma->inf[out->n_info], id, &args->tmps);
-                }
+                    bcf_info_set_id(out, &out->d.info[out->n_info], id, &args->tmps);
+                out->d.shared_dirty |= BCF1_DIRTY_INF;
                 out->n_info++;
                 kitr = kh_put(strdict, tmph, key, &ret);
                 kh_val(tmph,kitr) = -(out->n_info-1);   // arbitrary negative value
             }
         }
     }
-    out->d.info = ma->inf;
-    out->d.m_info = ma->minf;
     for (i=0; i<args->nrules; i++)
         args->rules[i].merger(args->out_hdr, out, &args->rules[i]);
     for (i=0; i<ma->nAGR_info; i++)
@@ -1156,12 +1303,14 @@ void merge_GT(args_t *args, bcf_fmt_t **fmt_map, bcf1_t *out)
     }
     memset(ma->smpl_ploidy,0,nsamples*sizeof(int));
 
+    int default_gt = args->missing_to_ref ? bcf_gt_unphased(0) : bcf_gt_missing;
     for (i=0; i<files->nreaders; i++)
     {
         bcf_sr_t *reader = &files->readers[i];
         bcf_hdr_t *hdr = reader->header;
         bcf_fmt_t *fmt_ori = fmt_map[i];
         int32_t *tmp  = (int32_t *) ma->tmp_arr + ismpl*nsize;
+        int irec = ma->buf[i].cur;
 
         int j, k;
         if ( !fmt_ori )
@@ -1169,7 +1318,7 @@ void merge_GT(args_t *args, bcf_fmt_t **fmt_map, bcf1_t *out)
             // missing values: assume maximum ploidy
             for (j=0; j<bcf_hdr_nsamples(hdr); j++)
             {
-                for (k=0; k<nsize; k++) { tmp[k] = 0; ma->smpl_ploidy[ismpl+j]++; }
+                for (k=0; k<nsize; k++) { tmp[k] = default_gt; ma->smpl_ploidy[ismpl+j]++; }
                 tmp += nsize;
             }
             ismpl += bcf_hdr_nsamples(hdr);
@@ -1178,7 +1327,7 @@ void merge_GT(args_t *args, bcf_fmt_t **fmt_map, bcf1_t *out)
 
         #define BRANCH(type_t, vector_end) { \
             type_t *p_ori  = (type_t*) fmt_ori->p; \
-            if ( !ma->d[i][0].als_differ ) \
+            if ( !ma->buf[i].rec[irec].als_differ ) \
             { \
                 /* the allele numbering is unchanged */ \
                 for (j=0; j<bcf_hdr_nsamples(hdr); j++) \
@@ -1208,7 +1357,7 @@ void merge_GT(args_t *args, bcf_fmt_t **fmt_map, bcf1_t *out)
                     else \
                     { \
                         int al = (p_ori[k]>>1) - 1; \
-                        al = al<=0 ? al + 1 : ma->d[i][0].map[al] + 1; \
+                        al = al<=0 ? al + 1 : ma->buf[i].rec[irec].map[al] + 1; \
                         tmp[k] = (al << 1) | ((p_ori[k])&1); \
                     } \
                 } \
@@ -1241,7 +1390,7 @@ void merge_format_field(args_t *args, bcf_fmt_t **fmt_map, bcf1_t *out)
     int nsize = 0, length = BCF_VL_FIXED, type = -1;
     for (i=0; i<files->nreaders; i++)
     {
-        if ( !ma->has_line[i] ) continue;
+        if ( !maux_get_line(args,i) ) continue;
         if ( !fmt_map[i] ) continue;
         if ( !key ) key = files->readers[i].header->id[BCF_DT_ID][fmt_map[i]->id].key;
         type = fmt_map[i]->type;
@@ -1279,10 +1428,12 @@ void merge_format_field(args_t *args, bcf_fmt_t **fmt_map, bcf1_t *out)
         bcf_sr_t *reader = &files->readers[i];
         bcf_hdr_t *hdr = reader->header;
         bcf_fmt_t *fmt_ori = fmt_map[i];
+        bcf1_t *line = maux_get_line(args, i);
+        int irec = ma->buf[i].cur;
         if ( fmt_ori )
         {
             type = fmt_ori->type;
-            int nals_ori = reader->buffer[0]->n_allele;
+            int nals_ori = line->n_allele;
             if ( length==BCF_VL_G )
             {
                 // if all fields are missing then n==1 is valid
@@ -1315,10 +1466,8 @@ void merge_format_field(args_t *args, bcf_fmt_t **fmt_map, bcf1_t *out)
                 ismpl += bcf_hdr_nsamples(hdr); \
                 continue; \
             } \
-            assert( ma->has_line[i] ); \
-            bcf1_t *line    = reader->buffer[0]; \
             src_type_t *src = (src_type_t*) fmt_ori->p; \
-            if ( (length!=BCF_VL_G && length!=BCF_VL_A && length!=BCF_VL_R) || (line->n_allele==out->n_allele && !ma->d[i][0].als_differ) ) \
+            if ( (length!=BCF_VL_G && length!=BCF_VL_A && length!=BCF_VL_R) || (line->n_allele==out->n_allele && !ma->buf[i].rec[irec].als_differ) ) \
             { \
                 /* alleles unchanged, copy over */ \
                 for (j=0; j<bcf_hdr_nsamples(hdr); j++) \
@@ -1360,7 +1509,7 @@ void merge_format_field(args_t *args, bcf_fmt_t **fmt_map, bcf1_t *out)
                         int iori, inew; \
                         for (iori=0; iori<line->n_allele; iori++) \
                         { \
-                            inew = ma->d[i][0].map[iori]; \
+                            inew = ma->buf[i].rec[irec].map[iori]; \
                             src = (src_type_t*) fmt_ori->p + j*fmt_ori->n + iori; \
                             tgt = (tgt_type_t *) ma->tmp_arr + (ismpl+j)*nsize + inew; \
                             if ( src_is_vector_end ) break; \
@@ -1374,10 +1523,10 @@ void merge_format_field(args_t *args, bcf_fmt_t **fmt_map, bcf1_t *out)
                         int iori,jori, inew,jnew; \
                         for (iori=0; iori<line->n_allele; iori++) \
                         { \
-                            inew = ma->d[i][0].map[iori]; \
+                            inew = ma->buf[i].rec[irec].map[iori]; \
                             for (jori=0; jori<=iori; jori++) \
                             { \
-                                jnew = ma->d[i][0].map[jori]; \
+                                jnew = ma->buf[i].rec[irec].map[jori]; \
                                 int kori = iori*(iori+1)/2 + jori; \
                                 int knew = inew>jnew ? inew*(inew+1)/2 + jnew : jnew*(jnew+1)/2 + inew; \
                                 src = (src_type_t*) fmt_ori->p + j*fmt_ori->n + kori; \
@@ -1414,7 +1563,7 @@ void merge_format_field(args_t *args, bcf_fmt_t **fmt_map, bcf1_t *out)
                     int iori,inew; \
                     for (iori=ifrom; iori<line->n_allele; iori++) \
                     { \
-                        inew = ma->d[i][0].map[iori] - ifrom; \
+                        inew = ma->buf[i].rec[irec].map[iori] - ifrom; \
                         tgt = (tgt_type_t *) ma->tmp_arr + (ismpl+j)*nsize + inew; \
                         if ( src_is_vector_end ) break; \
                         if ( src_is_missing ) tgt_set_missing; \
@@ -1463,9 +1612,9 @@ void merge_format(args_t *args, bcf1_t *out)
     int i, j, ret, has_GT = 0, max_ifmt = 0; // max fmt index
     for (i=0; i<files->nreaders; i++)
     {
-        if ( !ma->has_line[i] ) continue;
+        bcf1_t *line = maux_get_line(args,i);
+        if ( !line ) continue;
         bcf_sr_t *reader = &files->readers[i];
-        bcf1_t *line = reader->buffer[0];
         bcf_hdr_t *hdr = reader->header;
         for (j=0; j<line->n_fmt; j++)
         {
@@ -1497,9 +1646,10 @@ void merge_format(args_t *args, bcf1_t *out)
             ma->fmt_map[ifmt*files->nreaders+i] = fmt;
         }
         // Check if the allele numbering must be changed
-        for (j=1; j<reader->buffer[0]->n_allele; j++)
-            if ( ma->d[i][0].map[j]!=j ) break;
-        ma->d[i][0].als_differ = j==reader->buffer[0]->n_allele ? 0 : 1;
+        int irec = ma->buf[i].cur;
+        for (j=1; j<line->n_allele; j++)
+            if ( ma->buf[i].rec[irec].map[j]!=j ) break;
+        ma->buf[i].rec[irec].als_differ = j==line->n_allele ? 0 : 1;
     }
 
     out->n_sample = bcf_hdr_nsamples(out_hdr);
@@ -1507,203 +1657,383 @@ void merge_format(args_t *args, bcf1_t *out)
         merge_GT(args, ma->fmt_map, out);
     update_AN_AC(out_hdr, out);
 
-    if ( out->d.info!=ma->inf )
-    {
-        // hacky, we rely on htslib internals: bcf_update_info() reallocated the info
-        ma->inf  = out->d.info;
-        ma->minf = out->d.m_info;
-    }
-
     for (i=1; i<=max_ifmt; i++)
         merge_format_field(args, &ma->fmt_map[i*files->nreaders], out);
     out->d.indiv_dirty = 1;
 }
 
-// The core merging function, one or none line from each reader
-void merge_line(args_t *args)
+void gvcf_set_alleles(args_t *args)
+{
+    int i,k;
+    bcf_srs_t *files = args->files;
+    maux_t *maux = args->maux;
+    gvcf_aux_t *gaux = maux->gvcf;
+    maux->nals = 0;
+
+    for (i=0; i<files->nreaders; i++)
+    {
+        if ( !gaux[i].active ) continue;
+        bcf1_t *line = maux_get_line(args, i);
+        int irec = maux->buf[i].cur;
+
+        hts_expand(int, line->n_allele, maux->buf[i].rec[irec].mmap, maux->buf[i].rec[irec].map);
+        if ( !maux->nals )    // first record, copy the alleles to the output
+        {
+            maux->nals = line->n_allele;
+            hts_expand0(char*, maux->nals, maux->mals, maux->als);
+            hts_expand0(int, maux->nals, maux->ncnt, maux->cnt);
+            for (k=0; k<maux->nals; k++)
+            {
+                if ( maux->als[k] ) free(maux->als[k]);
+                maux->als[k] = strdup(line->d.allele[k]);
+                maux->buf[i].rec[irec].map[k] = k;
+            }
+        }
+        else
+        {
+            maux->als = merge_alleles(line->d.allele, line->n_allele, maux->buf[i].rec[irec].map, maux->als, &maux->nals, &maux->mals);
+            if ( !maux->als )
+            {
+                bcf_hdr_t *hdr = bcf_sr_get_header(args->files,i);
+                error("Failed to merge alleles at %s:%d\n",bcf_seqname(hdr,line),line->pos+1);
+            }
+        }
+    }
+}
+
+/*
+    Output staged gVCF blocks, end is the last position of the block. Assuming
+    gaux[i].active flags are set and maux_get_line returns correct lines.
+*/
+void gvcf_write_block(args_t *args, int start, int end)
 {
+    int i;
+    maux_t *maux = args->maux;
+    gvcf_aux_t *gaux = maux->gvcf;
+    assert(gaux);
+
+    // Update POS
+    int min = INT_MAX;
+    char ref = 'N';
+    for (i=0; i<args->files->nreaders; i++)
+    {
+        if ( !gaux[i].active ) continue;
+        if ( ref=='N' && gaux[i].line->pos==start ) ref = gaux[i].line->d.allele[0][0];
+        gaux[i].line->pos = start;
+    }
+    for (i=0; i<args->files->nreaders; i++)
+    {
+        if ( !gaux[i].active ) continue;
+        if ( gaux[i].end < start ) 
+        { 
+            gaux[i].active = 0; 
+            maux->buf[i].cur = -1;
+            continue; 
+        }
+        gaux[i].line->d.allele[0][0] = ref;
+        if ( min > gaux[i].end ) min = gaux[i].end;
+    }
+    // Check for valid gVCF blocks in this region
+    if ( min==INT_MAX )
+    {
+    assert(0);
+        maux->gvcf_min = 0;
+        return;
+    }
+
     bcf1_t *out = args->out_line;
-    bcf_clear1(out);
-    out->unpacked = BCF_UN_ALL;
 
+    gvcf_set_alleles(args);
+
+    // Merge the staged lines
     merge_chrom2qual(args, out);
     merge_filter(args, out);
     merge_info(args, out);
     merge_format(args, out);
 
-    bcf_write1(args->out_fh, args->out_hdr, out);
-}
+    if ( args->gvcf_fai && out->d.allele[0][0]=='N' )
+    {
+        int slen  = 0;
+        char *seq = faidx_fetch_seq(args->gvcf_fai,maux->chr,out->pos,out->pos,&slen);
+        if (slen)
+        {
+            out->d.allele[0][0] = seq[0];
+            free(seq);
+        }
+    }
 
+    // Update END boundary
+    if ( end > start )
+    {
+        end++;
+        bcf_update_info_int32(args->out_hdr, out, "END", &end, 1);
+    }
+    else
+        bcf_update_info_int32(args->out_hdr, out, "END", NULL, 0);
+    bcf_write1(args->out_fh, args->out_hdr, out);
+    bcf_clear1(out);
 
-void debug_buffers(FILE *fp, bcf_srs_t *files);
-void debug_buffer(FILE *fp, bcf_sr_t *reader);
 
-#define SWAP(type_t,a,b) { type_t tmp = (a); (a) = (b); (b) = tmp; }
+    // Inactivate blocks which do not extend beyond END and find new gvcf_min
+    min = INT_MAX;
+    for (i=0; i<args->files->nreaders; i++)
+    {
+        if ( !gaux[i].active ) continue;
+        if ( gaux[i].end < end )
+        {
+            gaux[i].active = 0; 
+            maux->buf[i].cur = -1;
+            continue; 
+        }
+        // next min END position bigger than the current one
+        if ( maux->gvcf_min < gaux[i].end+1 && min > gaux[i].end+1 ) min = gaux[i].end + 1;
+    }
+    maux->gvcf_min = min==INT_MAX ? 0 : min;
+}
 
-// Clean the reader's buffer to and make it ready for the next next_line() call.
-// Moves finished records (SKIP_DONE flag set) at the end of the buffer and put
-// the rest to the beggining. Then shorten the buffer so that the last element
-// points to the last unfinished record. There are two special cases: the last
-// line of the buffer typically has a different position and must stay at the
-// end; next, the first record of the buffer must be one of those already
-// printed, as it will be discarded by next_line().
-//
-void shake_buffer(maux_t *maux, int ir, int pos)
+/*
+    Flush staged gVCF blocks. Flush everything if there are no more lines
+    (done=1) or if there is a new chromosome. If still on the same chromosome,
+    all hanging blocks must be ended by creating new records:
+        A
+            1 END=10
+        B
+            3 END=7
+        C
+            3 END=5
+        out
+            1 END=2  A . .
+            3 END=5  A B C
+            6 END=7  A B .
+            8 END=10 A . .
+    
+*/
+void gvcf_flush(args_t *args, int done)
 {
-    bcf_sr_t *reader = &maux->files->readers[ir];
-    maux1_t *m = maux->d[ir];
-
-    if ( !reader->buffer ) return;
-
     int i;
-    // FILE *fp = pysam_stdout;
-    // fprintf(fp,"<going to shake> nbuf=%d\t", reader->nbuffer); for (i=0; i<reader->nbuffer; i++) fprintf(fp," %d", skip[i]); fprintf(fp,"\n");
-    // debug_buffer(fp,reader);
-    // fprintf(fp,"--\n");
+    maux_t *maux = args->maux;
 
-    int a = 1, b = reader->nbuffer;
-    if ( reader->buffer[b]->pos != pos ) b--;   // move the last line separately afterwards
+    if ( !maux->chr ) return;   // first time here, nothing to flush
 
-    while ( a<b )
+    int flush_until = INT_MAX;
+    if ( !done )
     {
-        if ( !(m[a].skip&SKIP_DONE) ) { a++; continue; }
-        if ( m[b].skip&SKIP_DONE ) { b--; continue; }
-        SWAP(bcf1_t*, reader->buffer[a], reader->buffer[b]);
-        SWAP(maux1_t, m[a], m[b]);
-        a++;
-        b--;
-    }
+        // Get current position and chromosome
+        for (i=0; i<maux->n; i++)
+            if ( bcf_sr_has_line(maux->files,i) ) break;
+        bcf1_t *line = bcf_sr_get_line(maux->files,i);
+        bcf_hdr_t *hdr = bcf_sr_get_header(maux->files,i);
 
-    // position $a to the after the first unfinished record
-    while ( a<=reader->nbuffer && !(m[a].skip&SKIP_DONE) ) a++;
+        if ( !strcmp(maux->chr,bcf_seqname(hdr,line)) ) flush_until = line->pos;    // still on the same chr
+    }
 
-    if ( a<reader->nbuffer )
+    // When called on a region, trim the blocks accordingly
+    int start = maux->gvcf_break>=0 ? maux->gvcf_break + 1 : maux->pos;
+    if ( args->regs )
     {
-        // there is a gap between the unfinished lines at the beggining and the
-        // last line. The last line must be brought forward to fill the gap
-        if ( reader->buffer[reader->nbuffer]->pos != pos )
+        int rstart = -1, rend = -1;
+        if ( regidx_overlap(args->regs,maux->chr,start,flush_until,args->regs_itr) )
         {
-            SWAP(bcf1_t*, reader->buffer[a], reader->buffer[reader->nbuffer]);
-            SWAP(maux1_t, m[a], m[reader->nbuffer]);
-            reader->nbuffer = a;
+            // In case there are multiple regions, we treat them as one
+            rstart = args->regs_itr->beg;
+            while ( regitr_overlap(args->regs_itr) ) rend = args->regs_itr->end;
         }
+        if ( rstart > start ) start = rstart;
+        if ( rend < flush_until ) flush_until = rend+1;
     }
 
-    if ( !(m[0].skip&SKIP_DONE) && reader->buffer[0]->pos==pos )
+    // output all finished blocks
+    while ( maux->gvcf_min && start < flush_until )
     {
-        // the first record is unfinished, replace it with an empty line
-        // from the end of the buffer or else next_line will remove it
-        if ( reader->nbuffer + 1 >= maux->nbuf[ir] )
+        // does the block end before the new line or is it interrupted?
+        int tmp = maux->gvcf_min < flush_until ? maux->gvcf_min : flush_until;
+        if ( start > tmp-1 ) break;
+        gvcf_write_block(args,start,tmp-1); // gvcf_min is 1-based
+        start = tmp;
+    }
+}
+
+/*
+    Check incoming lines for new gVCF blocks, set pointer to the current source
+    buffer (gvcf or readers).  In contrast to gvcf_flush, this function can be
+    called only after maux_reset as it relies on updated maux buffers.
+*/
+void gvcf_stage(args_t *args, int pos)
+{
+    maux_t *maux = args->maux;
+    gvcf_aux_t *gaux = maux->gvcf;
+    bcf_srs_t *files = args->files;
+    int32_t *end = (int32_t*) maux->tmp_arr;
+    int i, nend = maux->ntmp_arr / sizeof(int32_t);
+
+    maux->gvcf_break = -1;
+    maux->gvcf_min = INT_MAX;
+    for (i=0; i<files->nreaders; i++)
+    {
+        if ( gaux[i].active )
         {
-            reader->nbuffer++;
-            maux_expand1(maux, ir);
-            reader->nbuffer--;
-            m = maux->d[ir];
+            // gvcf block should not overlap with another record
+            if ( maux->gvcf_min > gaux[i].end+1 ) maux->gvcf_min = gaux[i].end + 1;
+            maux->buf[i].beg = 0;
+            maux->buf[i].end = 1;
+            maux->buf[i].cur = 0;
+            continue;
         }
-        if ( reader->nbuffer+1 >= reader->mbuffer )
-            error("Uh, did not expect this: %d vs %d\n", reader->nbuffer,reader->mbuffer);
 
-        if ( reader->buffer[reader->nbuffer]->pos!=pos )
+        // Does any of the lines have END set? It is enough to check only the
+        // first line, there should be no duplicate records with END in gVCF
+
+        if ( maux->buf[i].beg==maux->buf[i].end ) continue; // no new record
+
+        int irec = maux->buf[i].beg;
+        bcf_hdr_t *hdr = bcf_sr_get_header(files, i);
+        bcf1_t *line = args->files->readers[i].buffer[irec];
+        int ret = bcf_get_info_int32(hdr,line,"END",&end,&nend);
+        if ( ret==1 )
         {
-            // 4way swap
-            bcf1_t *tmp = reader->buffer[0];
-            reader->buffer[0] = reader->buffer[reader->nbuffer+1];
-            reader->buffer[reader->nbuffer+1] = reader->buffer[reader->nbuffer];
-            reader->buffer[reader->nbuffer] = tmp;
-            m[reader->nbuffer].skip   = m[0].skip;
-            m[reader->nbuffer+1].skip = SKIP_DIFF;
-            reader->nbuffer++;
+            // END is set, this is a new gVCF block. Cache this line in gaux[i] and swap with
+            // an empty record: the gaux line must be kept until we reach its END.
+            gaux[i].active = 1;
+            gaux[i].end = end[0] - 1;
+            SWAP(bcf1_t*,args->files->readers[i].buffer[irec],gaux[i].line);
+            gaux[i].line->pos = pos;
+
+            maux->buf[i].lines = &gaux[i].line;
+            maux->buf[i].beg = 0;
+            maux->buf[i].end = 1;
+            maux->buf[i].cur = 0;
+
+            // Set the rid,pos of the swapped line in the buffer or else the
+            // synced reader will have a problem with the next line
+            //
+            args->files->readers[i].buffer[irec]->rid = maux->buf[i].rid;
+            args->files->readers[i].buffer[irec]->pos = maux->pos;
+
+            // Update block offsets
+            if ( maux->gvcf_min > gaux[i].end+1 ) maux->gvcf_min = gaux[i].end + 1;
         }
         else
-        {
-            SWAP(bcf1_t*, reader->buffer[0], reader->buffer[reader->nbuffer+1]);
-            SWAP(maux1_t, m[0], m[reader->nbuffer+1]);
-        }
+            maux->gvcf_break = line->pos;   // must break the gvcf block 
     }
+    maux->ntmp_arr = nend * sizeof(int32_t);
+    maux->tmp_arr  = end;
+    if ( maux->gvcf_min==INT_MAX ) maux->gvcf_min = 0;
+}
+
+
+void debug_buffers(FILE *fp, bcf_srs_t *files);
+void debug_buffer(FILE *fp, bcf_srs_t *files, int reader);
+
+/*
+    Flush all buffered and processed records with the same coordinate.
+    Note that synced reader discards buffer[0], so that needs to stay
+    untouched.
+*/
+void clean_buffer(args_t *args)
+{
+    maux_t *ma = args->maux;
+
+    int ir;
+    for (ir=0; ir<ma->n; ir++)
+    {
+        // Invalidate pointer to reader's buffer or else gvcf_flush will attempt
+        // to use the old lines via maux_get_line()
+        if ( ma->gvcf && !ma->gvcf[ir].active ) ma->buf[ir].cur = -1;
 
-    // debug_buffer(fp,reader);
-    // fprintf(fp,"<shaken>\t"); for (i=0; i<reader->nbuffer; i++) fprintf(fp," %d", skip[i]);
-    // fprintf(fp,"\n\n");
+        bcf_sr_t *reader = bcf_sr_get_reader(args->files,ir);
+        if ( !reader->nbuffer ) continue;   // nothing to clean
 
-    // set position of finished buffer[0] line to -1, otherwise swapping may
-    // bring it back after next_line()
-    reader->buffer[0]->pos = -1;
+        bcf1_t **buf = reader->buffer;
+        if ( buf[1]->rid!=ma->buf[ir].rid || buf[1]->pos!=ma->pos ) continue;    // nothing to flush
 
-    // trim the buffer, remove finished lines from the end
-    i = reader->nbuffer;
-    while ( i>=1 && m[i--].skip&SKIP_DONE )
-        reader->nbuffer--;
+        int a = 1, b = 2;
+        while ( b<=reader->nbuffer && buf[b]->rid==ma->buf[ir].rid && buf[b]->pos==ma->pos ) b++;
+        // b now points to the first line we want to preserve
+        while ( b<=reader->nbuffer )
+        {
+            SWAP(bcf1_t*, buf[a], buf[b]);
+            a++; b++;
+        }
+        reader->nbuffer -= b-a;
+    }
 }
 
-void debug_maux(args_t *args, int pos, int var_type)
+void debug_maux(args_t *args)
 {
     bcf_srs_t *files = args->files;
     maux_t *maux = args->maux;
     int j,k,l;
 
-    fprintf(pysam_stderr,"Alleles to merge at %d\n", pos+1);
+    fprintf(pysam_stderr,"Alleles to merge at %d, nals=%d\n", maux->pos+1,maux->nals);
     for (j=0; j<files->nreaders; j++)
     {
         bcf_sr_t *reader = &files->readers[j];
+        buffer_t *buf = &maux->buf[j];
         fprintf(pysam_stderr," reader %d: ", j);
-        for (k=0; k<=reader->nbuffer; k++)
+        for (k=buf->beg; k<buf->end; k++)
         {
-            if ( maux->d[j][k].skip==SKIP_DONE ) continue;
+            if ( buf->rec[k].skip & SKIP_DONE ) continue;
             bcf1_t *line = reader->buffer[k];
-            if ( line->pos!=pos ) continue;
             fprintf(pysam_stderr,"\t");
-            if ( maux->d[j][k].skip ) fprintf(pysam_stderr,"[");  // this record will not be merged in this round
+            if ( buf->rec[k].skip ) fprintf(pysam_stderr,"[");  // this record will not be merged in this round
             for (l=0; l<line->n_allele; l++)
                 fprintf(pysam_stderr,"%s%s", l==0?"":",", line->d.allele[l]);
-            if ( maux->d[j][k].skip ) fprintf(pysam_stderr,"]");
+            if ( buf->rec[k].skip ) fprintf(pysam_stderr,"]");
         }
         fprintf(pysam_stderr,"\n");
     }
     fprintf(pysam_stderr," counts: ");
-    for (j=0; j<maux->nals; j++) fprintf(pysam_stderr,"%s   %dx %s", j==0?"":",",maux->cnt[j], maux->als[j]); fprintf(pysam_stderr,"\n");
-    for (j=0; j<files->nreaders; j++)
-    {
-        bcf_sr_t *reader = &files->readers[j];
-        fprintf(pysam_stderr," out %d: ", j);
-        for (k=0; k<=reader->nbuffer; k++)
-        {
-            if ( maux->d[j][k].skip==SKIP_DONE ) continue;
-            bcf1_t *line = reader->buffer[k];
-            if ( line->pos!=pos ) continue;
-            if ( maux->d[j][k].skip ) continue;
-            fprintf(pysam_stderr,"\t");
-            for (l=0; l<line->n_allele; l++)
-                fprintf(pysam_stderr,"%s%s", l==0?"":",", maux->als[maux->d[j][k].map[l]]);
-        }
-        fprintf(pysam_stderr,"\n");
-    }
-    fprintf(pysam_stderr,"\n");
+    for (j=0; j<maux->nals; j++) fprintf(pysam_stderr,"%s   %dx %s", j==0?"":",",maux->cnt[j], maux->als[j]);
+    fprintf(pysam_stderr,"\n\n");
 }
 
-// Determine which line should be merged from which reader: go through all
-// readers and all buffered lines, expand REF,ALT and try to match lines with
-// the same ALTs. A step towards output independent on input ordering of the
-// lines.
-void merge_buffer(args_t *args)
+
+/*
+   Determine which line should be merged from which reader: go through all
+   readers and all buffered lines, expand REF,ALT and try to match lines with
+   the same ALTs.
+ */
+int can_merge(args_t *args)
 {
     bcf_srs_t *files = args->files;
-    int i, pos = -1, var_type = 0;
-    char *id = NULL;
+    int snp_mask = (VCF_SNP<<1)|(VCF_MNP<<1), indel_mask = VCF_INDEL<<1, ref_mask = 1;
     maux_t *maux = args->maux;
-    maux_reset(maux);
+    gvcf_aux_t *gaux = maux->gvcf;
+    char *id = NULL, ref = 'N';
+    maux->var_types = maux->nals = 0;
 
-    // set the current position
+    int i,j,k, ntodo = 0;
     for (i=0; i<files->nreaders; i++)
     {
-        if ( bcf_sr_has_line(files,i) )
+        buffer_t *buf = &maux->buf[i];
+
+        if ( gaux && gaux[i].active )
         {
-            bcf1_t *line = bcf_sr_get_line(files,i);
-            pos = line->pos;
-            var_type = bcf_get_variant_types(line);
-            id = line->d.id;
-            break;
+            // skip readers with active gvcf blocks
+            buf->rec[buf->beg].skip = SKIP_DIFF;
+            continue;
+        }
+        for (j=buf->beg; j<buf->end; j++)
+        {
+            if ( buf->rec[j].skip & SKIP_DONE ) continue;
+
+            buf->rec[j].skip = SKIP_DIFF;
+            ntodo++;
+
+            if ( args->merge_by_id )
+                id = buf->lines[j]->d.id;
+            else
+            {
+                int var_type = bcf_get_variant_types(buf->lines[j]);
+                maux->var_types |= var_type ? var_type<<1 : 1;
+            }
         }
+
+        // for gvcf: find out REF at this position
+        if ( buf->beg < buf->end && ref=='N' )
+            ref = buf->lines[buf->beg]->d.allele[0][0];
     }
+    if ( !ntodo ) return 0;
 
     // In this loop we select from each reader compatible candidate lines.
     // (i.e. SNPs or indels). Go through all files and all lines at this
@@ -1712,19 +2042,24 @@ void merge_buffer(args_t *args)
     for (i=0; i<files->nreaders; i++)
     {
         bcf_sr_t *reader = &files->readers[i];
-        if ( !reader->buffer ) continue;
-        int j, k;
-        for (j=0; j<=reader->nbuffer; j++)
+        buffer_t *buf = &maux->buf[i];
+
+        if ( gaux && gaux[i].active )
         {
-            bcf1_t *line = reader->buffer[j];
+            gaux[i].line->d.allele[0][0] = ref;
+            gaux[i].line->pos = maux->pos;
+        }
+
+        for (j=buf->beg; j<buf->end; j++)
+        {
+            if ( buf->rec[j].skip & SKIP_DONE ) continue;
+
+            bcf1_t *line = buf->lines[j]; // ptr to reader's buffer or gvcf buffer
+
             int line_type = bcf_get_variant_types(line);
+            line_type = line_type ? line_type<<1 : 1;
+
             // select relevant lines
-            maux->d[i][j].skip = SKIP_DIFF;
-            if ( pos!=line->pos )
-            {
-                if ( j==0 ) maux->d[i][j].skip |= SKIP_DONE; // left from previous run, force to ignore
-                continue;
-            }
             if ( args->merge_by_id )
             {
                 if ( strcmp(id,line->d.id) ) continue;
@@ -1735,30 +2070,30 @@ void merge_buffer(args_t *args)
                 {
                     // All alleles of the tested record must be present in the
                     // selected maux record plus variant types must be the same
-                    if ( var_type!=line->d.var_type ) continue;
+                    if ( (maux->var_types & line_type) != line_type ) continue;
                     if ( vcmp_set_ref(args->vcmp,maux->als[0],line->d.allele[0]) < 0 ) continue;   // refs not compatible
                     for (k=1; k<line->n_allele; k++)
                     {
                         if ( vcmp_find_allele(args->vcmp,maux->als+1,maux->nals-1,line->d.allele[k])>=0 ) break;
                     }
-                    if ( k==line->n_allele ) continue;  // no matching allele
+                    if ( !(line_type&ref_mask) && k==line->n_allele ) continue;  // not a REF-only site and there is no matching allele
                 }
                 if ( !(args->collapse&COLLAPSE_ANY) )
                 {
-                    int compatible = 0;
-                    if ( line_type==var_type ) compatible = 1;
-                    else if ( line_type==VCF_REF ) compatible = 1;   // REF can go with anything
-                    else if ( var_type&VCF_SNP && line_type&VCF_SNP ) compatible = 1;
-                    else if ( var_type&VCF_INDEL && line_type&VCF_INDEL ) compatible = 1;
-                    else if ( var_type&VCF_MNP && line_type&VCF_MNP ) compatible = 1;
-                    else if ( var_type&VCF_SNP && line_type&VCF_MNP ) compatible = 1;
-                    else if ( var_type&VCF_MNP && line_type&VCF_SNP ) compatible = 1;
-                    if ( !compatible ) continue;
+                    // Merge:
+                    //  - SNPs+SNPs+MNPs+REF if -m both,snps
+                    //  - indels+indels+REF  if -m both,indels, REF only if SNPs are not present
+                    //  - SNPs come first
+                    if ( line_type & indel_mask )
+                    {
+                        if ( !(line_type&snp_mask) && maux->var_types&snp_mask ) continue;  // SNPs come first
+                        if ( args->do_gvcf && maux->var_types&ref_mask ) continue;  // never merge indels with gVCF blocks
+                    }
                 }
             }
-            maux->d[i][j].skip = 0;
+            buf->rec[j].skip = 0;
 
-            hts_expand(int, line->n_allele, maux->d[i][j].mmap, maux->d[i][j].map);
+            hts_expand(int, line->n_allele, buf->rec[j].mmap, buf->rec[j].map);
             if ( !maux->nals )    // first record, copy the alleles to the output
             {
                 maux->nals = line->n_allele;
@@ -1766,111 +2101,118 @@ void merge_buffer(args_t *args)
                 hts_expand0(int, maux->nals, maux->ncnt, maux->cnt);
                 for (k=0; k<maux->nals; k++)
                 {
+                    free(maux->als[k]);
                     maux->als[k] = strdup(line->d.allele[k]);
-                    maux->d[i][j].map[k] = k;
+                    buf->rec[j].map[k] = k;
                     maux->cnt[k] = 1;
                 }
-                pos = line->pos;
                 continue;
             }
-
             // normalize alleles
-            maux->als = merge_alleles(line->d.allele, line->n_allele, maux->d[i][j].map, maux->als, &maux->nals, &maux->mals);
-            if ( !maux->als ) error("Failed to merge alleles at %s:%d in %s\n",bcf_seqname(bcf_sr_get_header(args->files,j),line),line->pos+1,reader->fname);
+            maux->als = merge_alleles(line->d.allele, line->n_allele, buf->rec[j].map, maux->als, &maux->nals, &maux->mals);
+            if ( !maux->als ) error("Failed to merge alleles at %s:%d in %s\n",bcf_seqname(args->out_hdr,line),line->pos+1,reader->fname);
             hts_expand0(int, maux->nals, maux->ncnt, maux->cnt);
             for (k=1; k<line->n_allele; k++)
-                maux->cnt[ maux->d[i][j].map[k] ]++;    // how many times an allele appears in the files
+                maux->cnt[ buf->rec[j].map[k] ]++;    // how many times an allele appears in the files
             maux->cnt[0]++;
         }
     }
+    return 1;
+}
 
-    // debug_maux(args, pos, var_type);
+/*
+   Select records that have the same alleles; the input ordering of indels
+   must not matter. Multiple VCF lines can be emitted from this loop.
+   We expect only very few alleles and not many records with the same
+   position in the buffers, therefore the nested loops should not slow us
+   much.
+*/
+void stage_line(args_t *args)
+{
+    int snp_mask = (VCF_SNP<<1)|(VCF_MNP<<1), indel_mask = VCF_INDEL<<1, ref_mask = 1;
+    bcf_srs_t *files = args->files;
+    maux_t *maux = args->maux;
 
-    // Select records that have the same alleles; the input ordering of indels
-    // must not matter. Multiple VCF lines can be emitted from this loop.
-    // We expect only very few alleles and not many records with the same
-    // position in the buffers, therefore the nested loops should not slow us
-    // much.
-    while (1)
+    // debug_maux(args);
+
+    // take the most frequent allele present in multiple files, REF is skipped
+    int i,j,k,icnt = 1;
+    for (i=2; i<maux->nals; i++)
+        if ( maux->cnt[i] > maux->cnt[icnt] ) icnt = i;
+
+    int nout = 0;
+    for (i=0; i<files->nreaders; i++)
     {
-        // take the most frequent allele present in multiple files
-        int icnt = 0;
-        for (i=1; i<maux->nals; i++)
-            if ( maux->cnt[i] > maux->cnt[icnt] ) icnt = i;
-        if ( maux->cnt[icnt]<0 ) break;
+        buffer_t *buf = &maux->buf[i];
+        buf->cur = -1;
+        if ( buf->beg >= buf->end ) continue;   // no lines in the buffer
 
-        int nmask = 0;
-        for (i=0; i<files->nreaders; i++)
+        // find lines with the same allele
+        for (j=buf->beg; j<buf->end; j++)
         {
-            maux->has_line[i] = 0;
+            if ( buf->rec[j].skip ) continue;   // done or not compatible
+            if ( args->merge_by_id ) break;
+            if ( maux->nals==1 && buf->lines[j]->n_allele==1 ) break;   // REF-only record
 
-            bcf_sr_t *reader = &files->readers[i];
-            if ( !reader->buffer ) continue;
+            for (k=0; k<buf->lines[j]->n_allele; k++)
+                if ( icnt==buf->rec[j].map[k] ) break;
 
-            // find lines with the same allele
-            int j;
-            for (j=0; j<=reader->nbuffer; j++)
-            {
-                if ( maux->d[i][j].skip ) continue;
-                int k;
-                for (k=0; k<reader->buffer[j]->n_allele; k++)
-                    if ( icnt==maux->d[i][j].map[k] ) break;
-                if ( k<reader->buffer[j]->n_allele ) break;
-            }
-            if ( j>reader->nbuffer )
-            {
-                // no matching allele found in this file
-                if ( args->collapse==COLLAPSE_NONE ) continue;
+            if ( k<buf->lines[j]->n_allele ) break;
+        }
+        if ( j>=buf->end )
+        {
+            // no matching allele found in this file
+            if ( args->collapse==COLLAPSE_NONE ) continue;
 
-                for (j=0; j<=reader->nbuffer; j++)
+            for (j=buf->beg; j<buf->end; j++)
+            {
+                if ( buf->rec[j].skip ) continue;   // done or not compatible
+                if ( args->collapse&COLLAPSE_ANY ) break;   // anything can be merged
+                int line_type = bcf_get_variant_types(buf->lines[j]);
+                if ( maux->var_types&snp_mask && line_type&VCF_SNP && (args->collapse&COLLAPSE_SNPS) ) break;
+                if ( maux->var_types&indel_mask && line_type&VCF_INDEL && (args->collapse&COLLAPSE_INDELS) ) break;
+                if ( line_type==VCF_REF )
                 {
-                    if ( maux->d[i][j].skip ) continue;
-                    if ( args->collapse&COLLAPSE_ANY ) break;
-                    int line_type = bcf_get_variant_types(reader->buffer[j]);
-                    if ( var_type&VCF_SNP && line_type&VCF_SNP && (args->collapse&COLLAPSE_SNPS) ) break;
-                    if ( var_type&VCF_INDEL && line_type&VCF_INDEL && (args->collapse&COLLAPSE_INDELS) ) break;
-                    if ( line_type==VCF_REF )
-                    {
-                        if ( var_type&VCF_SNP && (args->collapse&COLLAPSE_SNPS) ) break;
-                        if ( var_type&VCF_INDEL && (args->collapse&COLLAPSE_INDELS) ) break;
-                    }
-                    else if ( var_type==VCF_REF )
-                    {
-                        if ( line_type&VCF_SNP && (args->collapse&COLLAPSE_SNPS) ) break;
-                        if ( line_type&VCF_INDEL && (args->collapse&COLLAPSE_INDELS) ) break;
-                    }
+                    if ( maux->var_types&snp_mask && (args->collapse&COLLAPSE_SNPS) ) break;
+                    if ( maux->var_types&indel_mask && (args->collapse&COLLAPSE_INDELS) ) break;
+                    if ( maux->var_types&ref_mask ) break;
                 }
-            }
-            if ( j<=reader->nbuffer )
-            {
-                // found a suitable line for merging, place it at the beggining
-                if ( j>0 )
+                else if ( maux->var_types&ref_mask )
                 {
-                    SWAP(bcf1_t*, reader->buffer[0], reader->buffer[j]);
-                    SWAP(maux1_t, maux->d[i][0], maux->d[i][j]);
+                    if ( line_type&snp_mask && (args->collapse&COLLAPSE_SNPS) ) break;
+                    if ( line_type&indel_mask && (args->collapse&COLLAPSE_INDELS) ) break;
                 }
-                // mark as finished so that it's ignored next time
-                maux->d[i][0].skip |= SKIP_DONE;
-                maux->has_line[i] = 1;
-                nmask++;
             }
         }
-        if ( !nmask ) break;    // done, no more lines suitable for merging found
-        merge_line(args);       // merge and output the line
-        maux->cnt[icnt] = -1;   // do not pick this allele again, mark it as finished
+        if ( j<buf->end )
+        {
+            // found a suitable line for merging
+            buf->cur = j;
+
+            // mark as finished so that it's ignored next time
+            buf->rec[j].skip  = SKIP_DONE;
+            nout++;
+        }
     }
+    assert( nout );
+}
 
-    // clean the alleles
-    for (i=0; i<maux->nals; i++)
+void merge_line(args_t *args)
+{
+    if ( args->regs )
     {
-        free(maux->als[i]);
-        maux->als[i] = 0;
+        if ( !regidx_overlap(args->regs,args->maux->chr,args->maux->pos,args->maux->pos,NULL) ) return;
     }
-    maux->nals = 0;
 
-    // get the buffers ready for the next next_line() call
-    for (i=0; i<files->nreaders; i++)
-        shake_buffer(maux, i, pos);
+    bcf1_t *out = args->out_line;
+    merge_chrom2qual(args, out);
+    merge_filter(args, out);
+    merge_info(args, out);
+    if ( args->do_gvcf )
+        bcf_update_info_int32(args->out_hdr, out, "END", NULL, 0);
+    merge_format(args, out);
+    bcf_write1(args->out_fh, args->out_hdr, out);
+    bcf_clear1(out);
 }
 
 void bcf_hdr_append_version(bcf_hdr_t *hdr, int argc, char **argv, const char *cmd)
@@ -1889,6 +2231,8 @@ void bcf_hdr_append_version(bcf_hdr_t *hdr, int argc, char **argv, const char *c
         else
             ksprintf(&str, " %s", argv[i]);
     }
+    kputs("; Date=", &str);
+    time_t tm; time(&tm); kputs(ctime(&tm), &str);
     kputc('\n', &str);
     bcf_hdr_append(hdr,str.s);
     free(str.s);
@@ -1900,7 +2244,7 @@ void merge_vcf(args_t *args)
 {
     args->out_fh  = hts_open(args->output_fname, hts_bcf_wmode(args->output_type));
     if ( args->out_fh == NULL ) error("Can't write to \"%s\": %s\n", args->output_fname, strerror(errno));
-    if ( args->n_threads ) hts_set_threads(args->out_fh, args->n_threads);
+    if ( args->n_threads ) hts_set_opt(args->out_fh, HTS_OPT_THREAD_POOL, args->files->p); //hts_set_threads(args->out_fh, args->n_threads);
     args->out_hdr = bcf_hdr_init("w");
 
     if ( args->header_fname )
@@ -1930,14 +2274,32 @@ void merge_vcf(args_t *args)
     }
 
     if ( args->collapse==COLLAPSE_NONE ) args->vcmp = vcmp_init();
-    args->maux = maux_init(args->files);
+    args->maux = maux_init(args);
     args->out_line = bcf_init1();
     args->tmph = kh_init(strdict);
-    int ret;
-    while ( (ret=bcf_sr_next_line(args->files)) )
+
+    while ( bcf_sr_next_line(args->files) )
     {
-        merge_buffer(args);
+        // output cached gVCF blocks which end before the new record
+        if ( args->do_gvcf )
+            gvcf_flush(args,0);
+
+        maux_reset(args->maux);
+
+        // determine which of the new records are gvcf blocks
+        if ( args->do_gvcf )
+            gvcf_stage(args, args->maux->pos);
+
+        while ( can_merge(args) )
+        {
+            stage_line(args);
+            merge_line(args);
+        }
+        clean_buffer(args);
     }
+    if ( args->do_gvcf )
+        gvcf_flush(args,1);
+
     info_rules_destroy(args);
     maux_destroy(args->maux);
     bcf_hdr_destroy(args->out_hdr);
@@ -1960,7 +2322,10 @@ static void usage(void)
     fprintf(pysam_stderr, "        --force-samples                resolve duplicate sample names\n");
     fprintf(pysam_stderr, "        --print-header                 print only the merged header and exit\n");
     fprintf(pysam_stderr, "        --use-header <file>            use the provided header\n");
+    fprintf(pysam_stderr, "    -0  --missing-to-ref               assume genotypes at missing sites are 0/0\n");
     fprintf(pysam_stderr, "    -f, --apply-filters <list>         require at least one of the listed FILTER strings (e.g. \"PASS,.\")\n");
+    fprintf(pysam_stderr, "    -F, --filter-logic <x|+>           remove filters if some input is PASS (\"x\"), or apply all filters (\"+\") [+]\n");
+    fprintf(pysam_stderr, "    -g, --gvcf <-|ref.fa>              merge gVCF blocks, INFO/END tag is expected. Implies -i QS:sum,MinDP:min,I16:sum,IDV:max,IMF:max\n");
     fprintf(pysam_stderr, "    -i, --info-rules <tag:method,..>   rules for merging INFO fields (method is one of sum,avg,min,max,join) or \"-\" to turn off the default [DP:sum,DP4:sum]\n");
     fprintf(pysam_stderr, "    -l, --file-list <file>             read file names from the file\n");
     fprintf(pysam_stderr, "    -m, --merge <string>               allow multiallelic records for <snps|indels|both|all|none|id>, see man page for details [both]\n");
@@ -1991,7 +2356,9 @@ int main_vcfmerge(int argc, char *argv[])
     {
         {"help",no_argument,NULL,'h'},
         {"merge",required_argument,NULL,'m'},
+        {"gvcf",required_argument,NULL,'g'},
         {"file-list",required_argument,NULL,'l'},
+        {"missing-to-ref",no_argument,NULL,'0'},
         {"apply-filters",required_argument,NULL,'f'},
         {"use-header",required_argument,NULL,1},
         {"print-header",no_argument,NULL,2},
@@ -2003,10 +2370,25 @@ int main_vcfmerge(int argc, char *argv[])
         {"regions-file",required_argument,NULL,'R'},
         {"info-rules",required_argument,NULL,'i'},
         {"no-version",no_argument,NULL,8},
+        {"filter-logic",required_argument,NULL,'F'},
         {NULL,0,NULL,0}
     };
-    while ((c = getopt_long(argc, argv, "hm:f:r:R:o:O:i:l:",loptions,NULL)) >= 0) {
+    while ((c = getopt_long(argc, argv, "hm:f:r:R:o:O:i:l:g:F:0",loptions,NULL)) >= 0) {
         switch (c) {
+            case 'F': 
+                if ( !strcmp(optarg,"+") ) args->filter_logic = FLT_LOGIC_ADD;
+                else if ( !strcmp(optarg,"x") ) args->filter_logic = FLT_LOGIC_REMOVE;
+                else error("Filter logic not recognised: %s\n", optarg);
+                break;
+            case '0': args->missing_to_ref = 1; break;
+            case 'g':
+                args->do_gvcf = 1;
+                if ( strcmp("-",optarg) )
+                {
+                    args->gvcf_fai = fai_load(optarg);
+                    if ( !args->gvcf_fai ) error("Failed to load the fai index: %s\n", optarg);
+                }
+                break;
             case 'l': args->file_list = optarg; break;
             case 'i': args->info_rules = optarg; break;
             case 'o': args->output_fname = optarg; break;
@@ -2047,9 +2429,23 @@ int main_vcfmerge(int argc, char *argv[])
     if ( argc-optind<2 && !args->file_list ) usage();
 
     args->files->require_index = 1;
-    if ( args->regions_list && bcf_sr_set_regions(args->files, args->regions_list, regions_is_file)<0 )
-        error("Failed to read the regions: %s\n", args->regions_list);
+    if ( args->regions_list )
+    {
+        if ( bcf_sr_set_regions(args->files, args->regions_list, regions_is_file)<0 )
+            error("Failed to read the regions: %s\n", args->regions_list);
+        if ( regions_is_file )
+            args->regs = regidx_init(args->regions_list,NULL,NULL,sizeof(char*),NULL);
+        else
+        {
+            args->regs = regidx_init(NULL,regidx_parse_reg,NULL,sizeof(char*),NULL);
+            if ( regidx_insert_list(args->regs,args->regions_list,',') !=0 ) error("Could not parse the regions: %s\n", args->regions_list);
+            regidx_insert(args->regs,NULL);
+        }
+        if ( !args->regs ) error("Could not parse the regions: %s\n", args->regions_list);
+        args->regs_itr = regitr_init(args->regs);
+    }
 
+    if ( bcf_sr_set_threads(args->files, args->n_threads)<0 ) error("Failed to create threads\n");
     while (optind<argc)
     {
         if ( !bcf_sr_add_reader(args->files, argv[optind]) ) error("Failed to open %s: %s\n", argv[optind],bcf_sr_strerror(args->files->errnum));
@@ -2067,6 +2463,9 @@ int main_vcfmerge(int argc, char *argv[])
     }
     merge_vcf(args);
     bcf_sr_destroy(args->files);
+    if ( args->regs ) regidx_destroy(args->regs);
+    if ( args->regs_itr ) regitr_destroy(args->regs_itr);
+    if ( args->gvcf_fai ) fai_destroy(args->gvcf_fai);
     free(args);
     return 0;
 }
diff --git a/bcftools/vcfnorm.c b/bcftools/vcfnorm.c
index 781833c..3a1706b 100644
--- a/bcftools/vcfnorm.c
+++ b/bcftools/vcfnorm.c
@@ -23,6 +23,7 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 THE SOFTWARE.  */
 
 #include <stdio.h>
+#include <strings.h>
 #include <unistd.h>
 #include <getopt.h>
 #include <ctype.h>
@@ -87,10 +88,21 @@ static inline int replace_iupac_codes(char *seq, int nseq)
     for (i=0; i<nseq; i++)
     {
         char c = toupper(seq[i]);
-        if ( c!='A' && c!='C' && c!='G' && c!='T' ) { seq[i] = 'N'; n++; }
+        if ( c!='A' && c!='C' && c!='G' && c!='T' && c!='N' ) { seq[i] = 'N'; n++; }
     }
     return n;
 }
+static inline int has_non_acgtn(char *seq, int nseq)
+{
+    char *end = nseq ? seq + nseq : seq + UINT32_MAX;   // arbitrary large number
+    while ( *seq && seq<end )
+    {
+        char c = toupper(*seq);
+        if ( c!='A' && c!='C' && c!='G' && c!='T' && c!='N' ) return 1;
+        seq++;
+    }
+    return 0;
+}
 
 static void fix_ref(args_t *args, bcf1_t *line)
 {
@@ -248,10 +260,11 @@ static void fix_dup_alt(args_t *args, bcf1_t *line)
     if ( changed ) bcf_update_genotypes(args->hdr,line,gts,ngts);
 }
 
-#define ERR_DUP_ALLELE      -2
-#define ERR_REF_MISMATCH    -1
-#define ERR_OK              0
-#define ERR_SYMBOLIC        1
+#define ERR_DUP_ALLELE       -2
+#define ERR_REF_MISMATCH     -1
+#define ERR_OK                0
+#define ERR_SYMBOLIC          1
+#define ERR_SPANNING_DELETION 2
 
 static int realign(args_t *args, bcf1_t *line)
 {
@@ -261,13 +274,17 @@ static int realign(args_t *args, bcf1_t *line)
     int i, nref, reflen = strlen(line->d.allele[0]);
     char *ref = faidx_fetch_seq(args->fai, (char*)args->hdr->id[BCF_DT_CTG][line->rid].key, line->pos, line->pos+reflen-1, &nref);
     if ( !ref ) error("faidx_fetch_seq failed at %s:%d\n", args->hdr->id[BCF_DT_CTG][line->rid].key, line->pos+1);
-    replace_iupac_codes(ref,nref);
+    replace_iupac_codes(ref,nref);  // any non-ACGT character in fasta ref is replaced with N
 
-    // does REF contain non-standard bases?
-    if ( replace_iupac_codes(line->d.allele[0],reflen) )
+    // does VCF REF contain non-standard bases?
+    if ( has_non_acgtn(line->d.allele[0],reflen) )
     {
-        args->nchanged++;
-        bcf_update_alleles(args->hdr,line,(const char**)line->d.allele,line->n_allele);
+        if ( args->check_ref==CHECK_REF_EXIT )
+            error("Non-ACGTN reference allele at %s:%d .. REF_SEQ:'%s' vs VCF:'%s'\n", bcf_seqname(args->hdr,line),line->pos+1,ref,line->d.allele[0]);
+        if ( args->check_ref & CHECK_REF_WARN )
+            fprintf(stderr,"NON_ACGTN_REF\t%s\t%d\t%s\n", bcf_seqname(args->hdr,line),line->pos+1,line->d.allele[0]);
+        free(ref);
+        return ERR_REF_MISMATCH;
     }
     if ( strcasecmp(ref,line->d.allele[0]) )
     {
@@ -289,6 +306,16 @@ static int realign(args_t *args, bcf1_t *line)
     for (i=0; i<line->n_allele; i++)
     {
         if ( line->d.allele[i][0]=='<' ) return ERR_SYMBOLIC;  // symbolic allele
+        if ( line->d.allele[i][0]=='*' ) return ERR_SPANNING_DELETION;  // spanning deletion
+        if ( bcf_get_variant_type(line,i)==VCF_BND ) return ERR_SYMBOLIC;   // breakend, not an error
+        if ( has_non_acgtn(line->d.allele[i],0) )
+        {
+            if ( args->check_ref==CHECK_REF_EXIT )
+                error("Non-ACGTN alternate allele at %s:%d .. REF_SEQ:'%s' vs VCF:'%s'\n", bcf_seqname(args->hdr,line),line->pos+1,ref,line->d.allele[i]);
+            if ( args->check_ref & CHECK_REF_WARN )
+                fprintf(stderr,"NON_ACGTN_ALT\t%s\t%d\t%s\n", bcf_seqname(args->hdr,line),line->pos+1,line->d.allele[i]);
+            return ERR_REF_MISMATCH;
+        }
 
         als[i].l = 0;
         kputs(line->d.allele[i], &als[i]);
@@ -390,18 +417,24 @@ static void split_info_numeric(args_t *args, bcf1_t *src, bcf_info_t *info, int
         int len = bcf_hdr_id2length(args->hdr,BCF_HL_INFO,info->key); \
         if ( len==BCF_VL_A ) \
         { \
-            assert( ret==src->n_allele-1); \
+            if ( ret!=src->n_allele-1 ) \
+                error("Error: wrong number of fields in INFO/%s at %s:%d, expected %d, found %d\n", \
+                        tag,bcf_seqname(args->hdr,src),src->pos+1,src->n_allele-1,ret); \
             bcf_update_info_##type(args->hdr,dst,tag,vals+ialt,1); \
         } \
         else if ( len==BCF_VL_R ) \
         { \
-            assert( ret==src->n_allele); \
+            if ( ret!=src->n_allele ) \
+                error("Error: wrong number of fields in INFO/%s at %s:%d, expected %d, found %d\n", \
+                        tag,bcf_seqname(args->hdr,src),src->pos+1,src->n_allele,ret); \
             if ( ialt!=0 ) vals[1] = vals[ialt+1]; \
             bcf_update_info_##type(args->hdr,dst,tag,vals,2); \
         } \
         else if ( len==BCF_VL_G ) \
         { \
-            assert( ret==src->n_allele*(src->n_allele+1)/2 ); \
+            if ( ret!=src->n_allele*(src->n_allele+1)/2 ) \
+                error("Error: wrong number of fields in INFO/%s at %s:%d, expected %d, found %d\n", \
+                        tag,bcf_seqname(args->hdr,src),src->pos+1,src->n_allele*(src->n_allele+1)/2,ret); \
             if ( ialt!=0 ) \
             { \
                 vals[1] = vals[bcf_alleles2gt(0,ialt+1)]; \
@@ -545,7 +578,9 @@ static void split_format_numeric(args_t *args, bcf1_t *src, bcf_fmt_t *fmt, int
         } \
         if ( len==BCF_VL_A ) \
         { \
-            assert( nvals==(src->n_allele-1)*nsmpl); \
+            if ( nvals!=(src->n_allele-1)*nsmpl ) \
+                error("Error: wrong number of fields in FMT/%s at %s:%d, expected %d, found %d\n", \
+                    tag,bcf_seqname(args->hdr,src),src->pos+1,(src->n_allele-1)*nsmpl,nvals); \
             nvals /= nsmpl; \
             type_t *src_vals = vals, *dst_vals = vals; \
             for (i=0; i<nsmpl; i++) \
@@ -558,7 +593,9 @@ static void split_format_numeric(args_t *args, bcf1_t *src, bcf_fmt_t *fmt, int
         } \
         else if ( len==BCF_VL_R ) \
         { \
-            assert( nvals==src->n_allele*nsmpl); \
+            if ( nvals!=src->n_allele*nsmpl ) \
+                error("Error: wrong number of fields in FMT/%s at %s:%d, expected %d, found %d\n", \
+                    tag,bcf_seqname(args->hdr,src),src->pos+1,src->n_allele*nsmpl,nvals); \
             nvals /= nsmpl; \
             type_t *src_vals = vals, *dst_vals = vals; \
             for (i=0; i<nsmpl; i++) \
@@ -682,7 +719,10 @@ static void split_format_string(args_t *args, bcf1_t *src, bcf_fmt_t *fmt, int i
                 if ( *se==',' ) nfields++;
                 se++;
             }
-            assert( nfields==src->n_allele*(src->n_allele+1)/2 || nfields==src->n_allele );
+            if ( nfields!=src->n_allele*(src->n_allele+1)/2 && nfields!=src->n_allele )
+                error("Error: wrong number of fields in FMT/%s at %s:%d, expected %d or %d, found %d\n",
+                        tag,bcf_seqname(args->hdr,src),src->pos+1,src->n_allele*(src->n_allele+1)/2,src->n_allele,nfields);
+
             int len = 0;
             if ( nfields==src->n_allele )   // haploid
             {
@@ -994,7 +1034,7 @@ static void merge_format_genotype(args_t *args, bcf1_t **lines, int nlines, bcf_
                 else
                 {
                     int ial = bcf_gt_allele(gt2[k]);
-                    assert( ial<args->maps[i].nals );
+                    if ( ial>=args->maps[i].nals ) error("Error at %s:%d: incorrect allele index %d\n",bcf_seqname(args->hdr,lines[i]),lines[i]->pos+1,ial);
                     gt[k] = bcf_gt_unphased( args->maps[i].map[ial] ) | bcf_gt_is_phased(gt[k]);
                 }
             }
@@ -1583,7 +1623,8 @@ static void normalize_vcf(args_t *args)
 {
     htsFile *out = hts_open(args->output_fname, hts_bcf_wmode(args->output_type));
     if ( out == NULL ) error("Can't write to \"%s\": %s\n", args->output_fname, strerror(errno));
-    if ( args->n_threads ) hts_set_threads(out, args->n_threads);
+    if ( args->n_threads )
+        hts_set_opt(out, HTS_OPT_THREAD_POOL, args->files->p);
     if (args->record_cmd_line) bcf_hdr_append_version(args->hdr, args->argc, args->argv, "bcftools_norm");
     bcf_hdr_write(out, args->hdr);
 
@@ -1666,7 +1707,7 @@ static void usage(void)
     fprintf(stderr, "    -c, --check-ref <e|w|x|s>         check REF alleles and exit (e), warn (w), exclude (x), or set (s) bad sites [e]\n");
     fprintf(stderr, "    -D, --remove-duplicates           remove duplicate lines of the same type.\n");
     fprintf(stderr, "    -d, --rm-dup <type>               remove duplicate snps|indels|both|any\n");
-    fprintf(stderr, "    -f, --fasta-ref <file>            reference sequence\n");
+    fprintf(stderr, "    -f, --fasta-ref <file>            reference sequence (MANDATORY)\n");
     fprintf(stderr, "    -m, --multiallelics <-|+>[type]   split multiallelics (-) or join biallelics (+), type: snps|indels|both|any [both]\n");
     fprintf(stderr, "        --no-version                  do not append version and command line to the header\n");
     fprintf(stderr, "    -N, --do-not-normalize            do not normalize indels (with -m or -c s)\n");
@@ -1677,7 +1718,7 @@ static void usage(void)
     fprintf(stderr, "    -s, --strict-filter               when merging (-m+), merged site is PASS only if all sites being merged PASS\n");
     fprintf(stderr, "    -t, --targets <region>            similar to -r but streams rather than index-jumps\n");
     fprintf(stderr, "    -T, --targets-file <file>         similar to -R but streams rather than index-jumps\n");
-    fprintf(stderr, "        --threads <int>               number of extra output compression threads [0]\n");
+    fprintf(stderr, "        --threads <int>               number of extra (de)compression threads [0]\n");
     fprintf(stderr, "    -w, --site-win <int>              buffer for sorting lines which changed position during realignment [1000]\n");
     fprintf(stderr, "\n");
     exit(1);
@@ -1804,6 +1845,7 @@ int main_vcfnorm(int argc, char *argv[])
             error("Failed to read the targets: %s\n", args->targets);
     }
 
+    if ( bcf_sr_set_threads(args->files, args->n_threads)<0 ) error("Failed to create threads\n");
     if ( !bcf_sr_add_reader(args->files, fname) ) error("Failed to open %s: %s\n", fname,bcf_sr_strerror(args->files->errnum));
     if ( args->mrows_op&MROWS_SPLIT && args->rmdup ) error("Cannot combine -D and -m-\n");
     init_data(args);
diff --git a/bcftools/vcfnorm.c.pysam.c b/bcftools/vcfnorm.c.pysam.c
index 200ce79..da5a2aa 100644
--- a/bcftools/vcfnorm.c.pysam.c
+++ b/bcftools/vcfnorm.c.pysam.c
@@ -25,6 +25,7 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 THE SOFTWARE.  */
 
 #include <stdio.h>
+#include <strings.h>
 #include <unistd.h>
 #include <getopt.h>
 #include <ctype.h>
@@ -89,10 +90,21 @@ static inline int replace_iupac_codes(char *seq, int nseq)
     for (i=0; i<nseq; i++)
     {
         char c = toupper(seq[i]);
-        if ( c!='A' && c!='C' && c!='G' && c!='T' ) { seq[i] = 'N'; n++; }
+        if ( c!='A' && c!='C' && c!='G' && c!='T' && c!='N' ) { seq[i] = 'N'; n++; }
     }
     return n;
 }
+static inline int has_non_acgtn(char *seq, int nseq)
+{
+    char *end = nseq ? seq + nseq : seq + UINT32_MAX;   // arbitrary large number
+    while ( *seq && seq<end )
+    {
+        char c = toupper(*seq);
+        if ( c!='A' && c!='C' && c!='G' && c!='T' && c!='N' ) return 1;
+        seq++;
+    }
+    return 0;
+}
 
 static void fix_ref(args_t *args, bcf1_t *line)
 {
@@ -250,10 +262,11 @@ static void fix_dup_alt(args_t *args, bcf1_t *line)
     if ( changed ) bcf_update_genotypes(args->hdr,line,gts,ngts);
 }
 
-#define ERR_DUP_ALLELE      -2
-#define ERR_REF_MISMATCH    -1
-#define ERR_OK              0
-#define ERR_SYMBOLIC        1
+#define ERR_DUP_ALLELE       -2
+#define ERR_REF_MISMATCH     -1
+#define ERR_OK                0
+#define ERR_SYMBOLIC          1
+#define ERR_SPANNING_DELETION 2
 
 static int realign(args_t *args, bcf1_t *line)
 {
@@ -263,13 +276,17 @@ static int realign(args_t *args, bcf1_t *line)
     int i, nref, reflen = strlen(line->d.allele[0]);
     char *ref = faidx_fetch_seq(args->fai, (char*)args->hdr->id[BCF_DT_CTG][line->rid].key, line->pos, line->pos+reflen-1, &nref);
     if ( !ref ) error("faidx_fetch_seq failed at %s:%d\n", args->hdr->id[BCF_DT_CTG][line->rid].key, line->pos+1);
-    replace_iupac_codes(ref,nref);
+    replace_iupac_codes(ref,nref);  // any non-ACGT character in fasta ref is replaced with N
 
-    // does REF contain non-standard bases?
-    if ( replace_iupac_codes(line->d.allele[0],reflen) )
+    // does VCF REF contain non-standard bases?
+    if ( has_non_acgtn(line->d.allele[0],reflen) )
     {
-        args->nchanged++;
-        bcf_update_alleles(args->hdr,line,(const char**)line->d.allele,line->n_allele);
+        if ( args->check_ref==CHECK_REF_EXIT )
+            error("Non-ACGTN reference allele at %s:%d .. REF_SEQ:'%s' vs VCF:'%s'\n", bcf_seqname(args->hdr,line),line->pos+1,ref,line->d.allele[0]);
+        if ( args->check_ref & CHECK_REF_WARN )
+            fprintf(pysam_stderr,"NON_ACGTN_REF\t%s\t%d\t%s\n", bcf_seqname(args->hdr,line),line->pos+1,line->d.allele[0]);
+        free(ref);
+        return ERR_REF_MISMATCH;
     }
     if ( strcasecmp(ref,line->d.allele[0]) )
     {
@@ -291,6 +308,16 @@ static int realign(args_t *args, bcf1_t *line)
     for (i=0; i<line->n_allele; i++)
     {
         if ( line->d.allele[i][0]=='<' ) return ERR_SYMBOLIC;  // symbolic allele
+        if ( line->d.allele[i][0]=='*' ) return ERR_SPANNING_DELETION;  // spanning deletion
+        if ( bcf_get_variant_type(line,i)==VCF_BND ) return ERR_SYMBOLIC;   // breakend, not an error
+        if ( has_non_acgtn(line->d.allele[i],0) )
+        {
+            if ( args->check_ref==CHECK_REF_EXIT )
+                error("Non-ACGTN alternate allele at %s:%d .. REF_SEQ:'%s' vs VCF:'%s'\n", bcf_seqname(args->hdr,line),line->pos+1,ref,line->d.allele[i]);
+            if ( args->check_ref & CHECK_REF_WARN )
+                fprintf(pysam_stderr,"NON_ACGTN_ALT\t%s\t%d\t%s\n", bcf_seqname(args->hdr,line),line->pos+1,line->d.allele[i]);
+            return ERR_REF_MISMATCH;
+        }
 
         als[i].l = 0;
         kputs(line->d.allele[i], &als[i]);
@@ -392,18 +419,24 @@ static void split_info_numeric(args_t *args, bcf1_t *src, bcf_info_t *info, int
         int len = bcf_hdr_id2length(args->hdr,BCF_HL_INFO,info->key); \
         if ( len==BCF_VL_A ) \
         { \
-            assert( ret==src->n_allele-1); \
+            if ( ret!=src->n_allele-1 ) \
+                error("Error: wrong number of fields in INFO/%s at %s:%d, expected %d, found %d\n", \
+                        tag,bcf_seqname(args->hdr,src),src->pos+1,src->n_allele-1,ret); \
             bcf_update_info_##type(args->hdr,dst,tag,vals+ialt,1); \
         } \
         else if ( len==BCF_VL_R ) \
         { \
-            assert( ret==src->n_allele); \
+            if ( ret!=src->n_allele ) \
+                error("Error: wrong number of fields in INFO/%s at %s:%d, expected %d, found %d\n", \
+                        tag,bcf_seqname(args->hdr,src),src->pos+1,src->n_allele,ret); \
             if ( ialt!=0 ) vals[1] = vals[ialt+1]; \
             bcf_update_info_##type(args->hdr,dst,tag,vals,2); \
         } \
         else if ( len==BCF_VL_G ) \
         { \
-            assert( ret==src->n_allele*(src->n_allele+1)/2 ); \
+            if ( ret!=src->n_allele*(src->n_allele+1)/2 ) \
+                error("Error: wrong number of fields in INFO/%s at %s:%d, expected %d, found %d\n", \
+                        tag,bcf_seqname(args->hdr,src),src->pos+1,src->n_allele*(src->n_allele+1)/2,ret); \
             if ( ialt!=0 ) \
             { \
                 vals[1] = vals[bcf_alleles2gt(0,ialt+1)]; \
@@ -547,7 +580,9 @@ static void split_format_numeric(args_t *args, bcf1_t *src, bcf_fmt_t *fmt, int
         } \
         if ( len==BCF_VL_A ) \
         { \
-            assert( nvals==(src->n_allele-1)*nsmpl); \
+            if ( nvals!=(src->n_allele-1)*nsmpl ) \
+                error("Error: wrong number of fields in FMT/%s at %s:%d, expected %d, found %d\n", \
+                    tag,bcf_seqname(args->hdr,src),src->pos+1,(src->n_allele-1)*nsmpl,nvals); \
             nvals /= nsmpl; \
             type_t *src_vals = vals, *dst_vals = vals; \
             for (i=0; i<nsmpl; i++) \
@@ -560,7 +595,9 @@ static void split_format_numeric(args_t *args, bcf1_t *src, bcf_fmt_t *fmt, int
         } \
         else if ( len==BCF_VL_R ) \
         { \
-            assert( nvals==src->n_allele*nsmpl); \
+            if ( nvals!=src->n_allele*nsmpl ) \
+                error("Error: wrong number of fields in FMT/%s at %s:%d, expected %d, found %d\n", \
+                    tag,bcf_seqname(args->hdr,src),src->pos+1,src->n_allele*nsmpl,nvals); \
             nvals /= nsmpl; \
             type_t *src_vals = vals, *dst_vals = vals; \
             for (i=0; i<nsmpl; i++) \
@@ -684,7 +721,10 @@ static void split_format_string(args_t *args, bcf1_t *src, bcf_fmt_t *fmt, int i
                 if ( *se==',' ) nfields++;
                 se++;
             }
-            assert( nfields==src->n_allele*(src->n_allele+1)/2 || nfields==src->n_allele );
+            if ( nfields!=src->n_allele*(src->n_allele+1)/2 && nfields!=src->n_allele )
+                error("Error: wrong number of fields in FMT/%s at %s:%d, expected %d or %d, found %d\n",
+                        tag,bcf_seqname(args->hdr,src),src->pos+1,src->n_allele*(src->n_allele+1)/2,src->n_allele,nfields);
+
             int len = 0;
             if ( nfields==src->n_allele )   // haploid
             {
@@ -996,7 +1036,7 @@ static void merge_format_genotype(args_t *args, bcf1_t **lines, int nlines, bcf_
                 else
                 {
                     int ial = bcf_gt_allele(gt2[k]);
-                    assert( ial<args->maps[i].nals );
+                    if ( ial>=args->maps[i].nals ) error("Error at %s:%d: incorrect allele index %d\n",bcf_seqname(args->hdr,lines[i]),lines[i]->pos+1,ial);
                     gt[k] = bcf_gt_unphased( args->maps[i].map[ial] ) | bcf_gt_is_phased(gt[k]);
                 }
             }
@@ -1585,7 +1625,8 @@ static void normalize_vcf(args_t *args)
 {
     htsFile *out = hts_open(args->output_fname, hts_bcf_wmode(args->output_type));
     if ( out == NULL ) error("Can't write to \"%s\": %s\n", args->output_fname, strerror(errno));
-    if ( args->n_threads ) hts_set_threads(out, args->n_threads);
+    if ( args->n_threads )
+        hts_set_opt(out, HTS_OPT_THREAD_POOL, args->files->p);
     if (args->record_cmd_line) bcf_hdr_append_version(args->hdr, args->argc, args->argv, "bcftools_norm");
     bcf_hdr_write(out, args->hdr);
 
@@ -1668,7 +1709,7 @@ static void usage(void)
     fprintf(pysam_stderr, "    -c, --check-ref <e|w|x|s>         check REF alleles and exit (e), warn (w), exclude (x), or set (s) bad sites [e]\n");
     fprintf(pysam_stderr, "    -D, --remove-duplicates           remove duplicate lines of the same type.\n");
     fprintf(pysam_stderr, "    -d, --rm-dup <type>               remove duplicate snps|indels|both|any\n");
-    fprintf(pysam_stderr, "    -f, --fasta-ref <file>            reference sequence\n");
+    fprintf(pysam_stderr, "    -f, --fasta-ref <file>            reference sequence (MANDATORY)\n");
     fprintf(pysam_stderr, "    -m, --multiallelics <-|+>[type]   split multiallelics (-) or join biallelics (+), type: snps|indels|both|any [both]\n");
     fprintf(pysam_stderr, "        --no-version                  do not append version and command line to the header\n");
     fprintf(pysam_stderr, "    -N, --do-not-normalize            do not normalize indels (with -m or -c s)\n");
@@ -1679,7 +1720,7 @@ static void usage(void)
     fprintf(pysam_stderr, "    -s, --strict-filter               when merging (-m+), merged site is PASS only if all sites being merged PASS\n");
     fprintf(pysam_stderr, "    -t, --targets <region>            similar to -r but streams rather than index-jumps\n");
     fprintf(pysam_stderr, "    -T, --targets-file <file>         similar to -R but streams rather than index-jumps\n");
-    fprintf(pysam_stderr, "        --threads <int>               number of extra output compression threads [0]\n");
+    fprintf(pysam_stderr, "        --threads <int>               number of extra (de)compression threads [0]\n");
     fprintf(pysam_stderr, "    -w, --site-win <int>              buffer for sorting lines which changed position during realignment [1000]\n");
     fprintf(pysam_stderr, "\n");
     exit(1);
@@ -1806,6 +1847,7 @@ int main_vcfnorm(int argc, char *argv[])
             error("Failed to read the targets: %s\n", args->targets);
     }
 
+    if ( bcf_sr_set_threads(args->files, args->n_threads)<0 ) error("Failed to create threads\n");
     if ( !bcf_sr_add_reader(args->files, fname) ) error("Failed to open %s: %s\n", fname,bcf_sr_strerror(args->files->errnum));
     if ( args->mrows_op&MROWS_SPLIT && args->rmdup ) error("Cannot combine -D and -m-\n");
     init_data(args);
diff --git a/bcftools/vcfplugin.c b/bcftools/vcfplugin.c
index 87a773f..bfd6ad2 100644
--- a/bcftools/vcfplugin.c
+++ b/bcftools/vcfplugin.c
@@ -1,6 +1,6 @@
 /*  vcfplugin.c -- plugin modules for operating on VCF/BCF files.
 
-    Copyright (C) 2013-2015 Genome Research Ltd.
+    Copyright (C) 2013-2016 Genome Research Ltd.
 
     Author: Petr Danecek <pd3 at sanger.ac.uk>
 
@@ -23,6 +23,7 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 THE SOFTWARE.  */
 
 #include <stdio.h>
+#include <strings.h>
 #include <unistd.h>
 #include <getopt.h>
 #include <ctype.h>
@@ -47,7 +48,7 @@ typedef struct _plugin_t plugin_t;
  *   Plugin API:
  *   ----------
  *   const char *about(void)
- *      - short description used by 'bcftools plugin -l'
+ *      - short description used by 'bcftools plugin -lv'
  *
  *   const char *usage(void)
  *      - longer description used by 'bcftools +name -h'
@@ -170,11 +171,11 @@ static void add_plugin_paths(args_t *args, const char *path)
                 args->plugin_paths = (char**) realloc(args->plugin_paths,sizeof(char*)*(args->nplugin_paths+1));
                 args->plugin_paths[args->nplugin_paths] = dir;
                 args->nplugin_paths++;
-                if ( args->verbose ) fprintf(stderr, "plugin directory %s .. ok\n", dir);
+                if ( args->verbose > 1 ) fprintf(stderr, "plugin directory %s .. ok\n", dir);
             }
             else
             {
-                if ( args->verbose ) fprintf(stderr, "plugin directory %s .. %s\n", dir, strerror(errno));
+                if ( args->verbose > 1 ) fprintf(stderr, "plugin directory %s .. %s\n", dir, strerror(errno));
                 free(dir);
             }
 
@@ -210,7 +211,7 @@ static void *dlopen_plugin(args_t *args, const char *fname)
         {
             tmp = msprintf("%s/%s.so", args->plugin_paths[i],fname);
             handle = dlopen(tmp, RTLD_NOW); // valgrind complains about unfreed memory, not our problem though
-            if ( args->verbose )
+            if ( args->verbose > 1 )
             {
                 if ( !handle ) fprintf(stderr,"%s:\n\tdlopen   .. %s\n", tmp,dlerror());
                 else fprintf(stderr,"%s:\n\tdlopen   .. ok\n", tmp);
@@ -221,7 +222,7 @@ static void *dlopen_plugin(args_t *args, const char *fname)
     }
 
     handle = dlopen(fname, RTLD_NOW);
-    if ( args->verbose )
+    if ( args->verbose > 1 )
     {
         if ( !handle ) fprintf(stderr,"%s:\n\tdlopen   .. %s\n", fname,dlerror());
         else fprintf(stderr,"%s:\n\tdlopen   .. ok\n", fname);
@@ -266,19 +267,19 @@ static int load_plugin(args_t *args, const char *fname, int exit_on_error, plugi
     if ( ret )
         plugin->init = NULL;
     else
-        if ( args->verbose ) fprintf(stderr,"\tinit     .. ok\n");
+        if ( args->verbose > 1 ) fprintf(stderr,"\tinit     .. ok\n");
 
     plugin->run = (dl_run_f) dlsym(plugin->handle, "run");
     ret = dlerror();
     if ( ret )
         plugin->run = NULL;
     else
-        if ( args->verbose ) fprintf(stderr,"\trun      .. ok\n");
+        if ( args->verbose > 1 ) fprintf(stderr,"\trun      .. ok\n");
 
     if ( !plugin->init && !plugin->run )
     {
         if ( exit_on_error ) error("Could not initialize %s, neither run or init found \n", plugin->name);
-        else if ( args->verbose ) fprintf(stderr,"\tinit/run .. not found\n");
+        else if ( args->verbose > 1 ) fprintf(stderr,"\tinit/run .. not found\n");
         return -1;
     }
 
@@ -287,7 +288,7 @@ static int load_plugin(args_t *args, const char *fname, int exit_on_error, plugi
     if ( ret )
     {
         if ( exit_on_error ) error("Could not initialize %s, version string not found\n", plugin->name);
-        else if ( args->verbose ) fprintf(stderr,"\tversion  .. not found\n");
+        else if ( args->verbose > 1 ) fprintf(stderr,"\tversion  .. not found\n");
         return -1;
     }
 
@@ -392,8 +393,13 @@ static int list_plugins(args_t *args)
         qsort(plugins, nplugins, sizeof(plugins[0]), cmp_plugin_name);
 
         for (i=0; i<nplugins; i++)
-            printf("\n-- %s --\n%s", plugins[i].name, plugins[i].about());
-        printf("\n");
+        {
+            if ( args->verbose )
+                printf("\n-- %s --\n%s", plugins[i].name, plugins[i].about());
+            else
+                printf("%s\n", plugins[i].name);
+        }
+        if ( args->verbose ) printf("\n");
     }
     else
         print_plugin_usage_hint();
@@ -460,12 +466,33 @@ static void usage(args_t *args)
     fprintf(stderr, "Plugin options:\n");
     fprintf(stderr, "   -h, --help                  list plugin's options\n");
     fprintf(stderr, "   -l, --list-plugins          list available plugins. See BCFTOOLS_PLUGINS environment variable and man page for details\n");
-    fprintf(stderr, "   -v, --verbose               print debugging information on plugin failure\n");
+    fprintf(stderr, "   -v, --verbose               print verbose information, -vv increases verbosity\n");
     fprintf(stderr, "   -V, --version               print version string and exit\n");
     fprintf(stderr, "\n");
     exit(1);
 }
 
+static int is_verbose(int argc, char *argv[])
+{
+    int c, verbose = 0, opterr_ori = opterr;
+    static struct option loptions[] =
+    {
+        {"verbose",no_argument,NULL,'v'},
+        {NULL,0,NULL,0}
+    };
+    opterr = 0;
+    while ((c = getopt_long(argc, argv, "-v",loptions,NULL)) >= 0)
+    {
+        switch (c) {
+            case 'v': verbose++; break;
+            case 1:
+            default: break;
+        }
+    }
+    opterr = opterr_ori;
+    optind = 0;
+    return verbose;
+}
 int main_plugin(int argc, char *argv[])
 {
     int c;
@@ -483,6 +510,7 @@ int main_plugin(int argc, char *argv[])
     char *plugin_name = NULL;
     if ( argv[1][0]!='-' )
     {
+        args->verbose = is_verbose(argc, argv);
         plugin_name = argv[1]; 
         argc--; 
         argv++; 
@@ -518,7 +546,7 @@ int main_plugin(int argc, char *argv[])
     {
         switch (c) {
             case 'V': version_only = 1; break;
-            case 'v': args->verbose = 1; break;
+            case 'v': args->verbose++; break;
             case 'o': args->output_fname = optarg; break;
             case 'O':
                 switch (optarg[0]) {
diff --git a/bcftools/vcfplugin.c.pysam.c b/bcftools/vcfplugin.c.pysam.c
index 8365f7e..ec1d586 100644
--- a/bcftools/vcfplugin.c.pysam.c
+++ b/bcftools/vcfplugin.c.pysam.c
@@ -2,7 +2,7 @@
 
 /*  vcfplugin.c -- plugin modules for operating on VCF/BCF files.
 
-    Copyright (C) 2013-2015 Genome Research Ltd.
+    Copyright (C) 2013-2016 Genome Research Ltd.
 
     Author: Petr Danecek <pd3 at sanger.ac.uk>
 
@@ -25,6 +25,7 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 THE SOFTWARE.  */
 
 #include <stdio.h>
+#include <strings.h>
 #include <unistd.h>
 #include <getopt.h>
 #include <ctype.h>
@@ -49,7 +50,7 @@ typedef struct _plugin_t plugin_t;
  *   Plugin API:
  *   ----------
  *   const char *about(void)
- *      - short description used by 'bcftools plugin -l'
+ *      - short description used by 'bcftools plugin -lv'
  *
  *   const char *usage(void)
  *      - longer description used by 'bcftools +name -h'
@@ -172,11 +173,11 @@ static void add_plugin_paths(args_t *args, const char *path)
                 args->plugin_paths = (char**) realloc(args->plugin_paths,sizeof(char*)*(args->nplugin_paths+1));
                 args->plugin_paths[args->nplugin_paths] = dir;
                 args->nplugin_paths++;
-                if ( args->verbose ) fprintf(pysam_stderr, "plugin directory %s .. ok\n", dir);
+                if ( args->verbose > 1 ) fprintf(pysam_stderr, "plugin directory %s .. ok\n", dir);
             }
             else
             {
-                if ( args->verbose ) fprintf(pysam_stderr, "plugin directory %s .. %s\n", dir, strerror(errno));
+                if ( args->verbose > 1 ) fprintf(pysam_stderr, "plugin directory %s .. %s\n", dir, strerror(errno));
                 free(dir);
             }
 
@@ -212,7 +213,7 @@ static void *dlopen_plugin(args_t *args, const char *fname)
         {
             tmp = msprintf("%s/%s.so", args->plugin_paths[i],fname);
             handle = dlopen(tmp, RTLD_NOW); // valgrind complains about unfreed memory, not our problem though
-            if ( args->verbose )
+            if ( args->verbose > 1 )
             {
                 if ( !handle ) fprintf(pysam_stderr,"%s:\n\tdlopen   .. %s\n", tmp,dlerror());
                 else fprintf(pysam_stderr,"%s:\n\tdlopen   .. ok\n", tmp);
@@ -223,7 +224,7 @@ static void *dlopen_plugin(args_t *args, const char *fname)
     }
 
     handle = dlopen(fname, RTLD_NOW);
-    if ( args->verbose )
+    if ( args->verbose > 1 )
     {
         if ( !handle ) fprintf(pysam_stderr,"%s:\n\tdlopen   .. %s\n", fname,dlerror());
         else fprintf(pysam_stderr,"%s:\n\tdlopen   .. ok\n", fname);
@@ -268,19 +269,19 @@ static int load_plugin(args_t *args, const char *fname, int exit_on_error, plugi
     if ( ret )
         plugin->init = NULL;
     else
-        if ( args->verbose ) fprintf(pysam_stderr,"\tinit     .. ok\n");
+        if ( args->verbose > 1 ) fprintf(pysam_stderr,"\tinit     .. ok\n");
 
     plugin->run = (dl_run_f) dlsym(plugin->handle, "run");
     ret = dlerror();
     if ( ret )
         plugin->run = NULL;
     else
-        if ( args->verbose ) fprintf(pysam_stderr,"\trun      .. ok\n");
+        if ( args->verbose > 1 ) fprintf(pysam_stderr,"\trun      .. ok\n");
 
     if ( !plugin->init && !plugin->run )
     {
         if ( exit_on_error ) error("Could not initialize %s, neither run or init found \n", plugin->name);
-        else if ( args->verbose ) fprintf(pysam_stderr,"\tinit/run .. not found\n");
+        else if ( args->verbose > 1 ) fprintf(pysam_stderr,"\tinit/run .. not found\n");
         return -1;
     }
 
@@ -289,7 +290,7 @@ static int load_plugin(args_t *args, const char *fname, int exit_on_error, plugi
     if ( ret )
     {
         if ( exit_on_error ) error("Could not initialize %s, version string not found\n", plugin->name);
-        else if ( args->verbose ) fprintf(pysam_stderr,"\tversion  .. not found\n");
+        else if ( args->verbose > 1 ) fprintf(pysam_stderr,"\tversion  .. not found\n");
         return -1;
     }
 
@@ -394,8 +395,13 @@ static int list_plugins(args_t *args)
         qsort(plugins, nplugins, sizeof(plugins[0]), cmp_plugin_name);
 
         for (i=0; i<nplugins; i++)
-            fprintf(pysam_stdout, "\n-- %s --\n%s", plugins[i].name, plugins[i].about());
-        fprintf(pysam_stdout, "\n");
+        {
+            if ( args->verbose )
+                fprintf(pysam_stdout, "\n-- %s --\n%s", plugins[i].name, plugins[i].about());
+            else
+                fprintf(pysam_stdout, "%s\n", plugins[i].name);
+        }
+        if ( args->verbose ) fprintf(pysam_stdout, "\n");
     }
     else
         print_plugin_usage_hint();
@@ -462,12 +468,33 @@ static void usage(args_t *args)
     fprintf(pysam_stderr, "Plugin options:\n");
     fprintf(pysam_stderr, "   -h, --help                  list plugin's options\n");
     fprintf(pysam_stderr, "   -l, --list-plugins          list available plugins. See BCFTOOLS_PLUGINS environment variable and man page for details\n");
-    fprintf(pysam_stderr, "   -v, --verbose               print debugging information on plugin failure\n");
+    fprintf(pysam_stderr, "   -v, --verbose               print verbose information, -vv increases verbosity\n");
     fprintf(pysam_stderr, "   -V, --version               print version string and exit\n");
     fprintf(pysam_stderr, "\n");
     exit(1);
 }
 
+static int is_verbose(int argc, char *argv[])
+{
+    int c, verbose = 0, opterr_ori = opterr;
+    static struct option loptions[] =
+    {
+        {"verbose",no_argument,NULL,'v'},
+        {NULL,0,NULL,0}
+    };
+    opterr = 0;
+    while ((c = getopt_long(argc, argv, "-v",loptions,NULL)) >= 0)
+    {
+        switch (c) {
+            case 'v': verbose++; break;
+            case 1:
+            default: break;
+        }
+    }
+    opterr = opterr_ori;
+    optind = 0;
+    return verbose;
+}
 int main_plugin(int argc, char *argv[])
 {
     int c;
@@ -485,6 +512,7 @@ int main_plugin(int argc, char *argv[])
     char *plugin_name = NULL;
     if ( argv[1][0]!='-' )
     {
+        args->verbose = is_verbose(argc, argv);
         plugin_name = argv[1]; 
         argc--; 
         argv++; 
@@ -520,7 +548,7 @@ int main_plugin(int argc, char *argv[])
     {
         switch (c) {
             case 'V': version_only = 1; break;
-            case 'v': args->verbose = 1; break;
+            case 'v': args->verbose++; break;
             case 'o': args->output_fname = optarg; break;
             case 'O':
                 switch (optarg[0]) {
diff --git a/bcftools/vcfroh.c b/bcftools/vcfroh.c
index 9560559..9437d7e 100644
--- a/bcftools/vcfroh.c
+++ b/bcftools/vcfroh.c
@@ -30,12 +30,19 @@ THE SOFTWARE.  */
 #include <htslib/synced_bcf_reader.h>
 #include <htslib/kstring.h>
 #include <htslib/kseq.h>
+#include <htslib/bgzf.h>
+#include <errno.h>
 #include "bcftools.h"
 #include "HMM.h"
+#include "smpl_ilist.h"
 
 #define STATE_HW 0        // normal state, follows Hardy-Weinberg allele frequencies
 #define STATE_AZ 1        // autozygous state
 
+#define OUTPUT_ST (1<<1)
+#define OUTPUT_RG (1<<2)
+#define OUTPUT_GZ (1<<3)
+
 /** Genetic map */
 typedef struct
 {
@@ -44,6 +51,24 @@ typedef struct
 }
 genmap_t;
 
+/** HMM data for each sample */
+typedef struct
+{
+    double *eprob;      // emission probs [2*nsites,msites]
+    uint32_t *sites;    // positions [nsites,msites]
+    int nsites, msites;
+    int igenmap;        // current position in genmap
+    int nused;          // some stats to detect if things didn't go wrong
+    int nrid, *rid, *rid_off;   // for viterbi training, keep all chromosomes
+    void *snapshot;             // hmm snapshot
+    struct {
+        uint32_t beg,end,nqual;
+        double qual;
+        int rid, state;
+    } rg;
+}
+smpl_t;
+
 typedef struct _args_t
 {
     bcf_srs_t *files;
@@ -57,29 +82,32 @@ typedef struct _args_t
     double rec_rate;        // constant recombination rate if > 0
 
     hmm_t *hmm;
-    double *eprob;          // emission probs [2*nsites,msites]
-    uint32_t *sites;        // positions [nsites,msites]
-    int nsites, msites;
+    double baum_welch_th;
     int nrids, *rids, *rid_offs;    // multiple chroms with vi_training
+    int nbuf_max, nbuf_olap;
 
-    int32_t *itmp;
-    int nitmp, mitmp;
     float *AFs;
-    int mAFs;
+    int32_t *itmp;
+    int mAFs, nitmp, mitmp, pl_hdr_id, gt_hdr_id;
 
     double pl2p[256], *pdg;
     int32_t skip_rid, prev_rid, prev_pos;
 
-    int ntot, nused;            // some stats to detect if things didn't go awfully wrong
-    int ismpl, nsmpl;           // index of query sample
-    char *estimate_AF, *sample; // list of samples for AF estimate and query sample
-    char **argv, *targets_list, *regions_list, *af_fname, *af_tag;
-    int argc, fake_PLs, snps_only, vi_training;
+    int ntot;                   // some stats to detect if things didn't go wrong
+    smpl_t *smpl;               // HMM data for each sample
+    smpl_ilist_t *af_smpl;      // list of samples to estimate AF from (--estimate-AF)
+    smpl_ilist_t *roh_smpl;     // list of samples to analyze (--samples, --samples-file)
+    char *estimate_AF;          // list of samples for AF estimate and query sample
+    int af_from_PL;             // estimate AF from FMT/PL rather than FMT/GT
+    char **argv, *targets_list, *regions_list, *af_fname, *af_tag, *samples, *buffer_size, *output_fname;
+    int argc, fake_PLs, snps_only, vi_training, samples_is_file, output_type, skip_homref, n_threads;
+    BGZF *out;
+    kstring_t str;
 }
 args_t;
 
 void set_tprob_genmap(hmm_t *hmm, uint32_t prev_pos, uint32_t pos, void *data, double *tprob);
-void set_tprob_recrate(hmm_t *hmm, uint32_t prev_pos, uint32_t pos, void *data, double *tprob);
+void set_tprob_rrate(hmm_t *hmm, uint32_t prev_pos, uint32_t pos, void *data, double *tprob);
 
 void *smalloc(size_t size)
 {
@@ -90,57 +118,137 @@ void *smalloc(size_t size)
 
 static void init_data(args_t *args)
 {
+    int i;
+
     args->prev_rid = args->skip_rid = -1;
     args->hdr = args->files->readers[0].header;
 
-    if ( !args->sample )
-    {
-        if ( bcf_hdr_nsamples(args->hdr)>1 ) error("Missing the option -s, --sample\n");
-        args->sample = strdup(args->hdr->samples[0]);
-    }
     if ( !bcf_hdr_nsamples(args->hdr) ) error("No samples in the VCF?\n");
 
-    // Set samples
-    kstring_t str = {0,0,0};
-    if ( args->estimate_AF && strcmp("-",args->estimate_AF) )
+    if ( !args->fake_PLs )
     {
-        int i, n;
-        char **smpls = hts_readlist(args->estimate_AF, 1, &n);
+        args->pl_hdr_id = bcf_hdr_id2int(args->hdr, BCF_DT_ID, "PL");
+        if ( !bcf_hdr_idinfo_exists(args->hdr,BCF_HL_FMT,args->pl_hdr_id) )
+            error("Error: The FORMAT/PL tag not found in the header, consider running with -G\n");
+        if ( bcf_hdr_id2type(args->hdr,BCF_HL_FMT,args->pl_hdr_id)!=BCF_HT_INT ) 
+            error("Error: The FORMAT/PL tag not defined as Integer in the header\n");
+    }
 
-        // Make sure the query sample is included
-        for (i=0; i<n; i++)
-            if ( !strcmp(args->sample,smpls[i]) ) break;
+    if ( args->estimate_AF )
+    {
+        if ( !strncmp("GT,",args->estimate_AF,3) ) args->estimate_AF += 3;
+        else if ( !strncmp("PL,",args->estimate_AF,3) ) { args->estimate_AF += 3; args->af_from_PL = 1; }
+        if ( strcmp("-",args->estimate_AF) )
+            args->af_smpl = smpl_ilist_init(args->hdr, args->estimate_AF, 1, SMPL_NONE);
+    }
 
-        // Add the query sample if not present
-        if ( i!=n ) kputs(args->sample, &str);
+    if ( args->estimate_AF || args->fake_PLs )
+    {
+        if ( args->af_from_PL )
+        {
+            args->pl_hdr_id = bcf_hdr_id2int(args->hdr, BCF_DT_ID, "PL");
+            if ( !bcf_hdr_idinfo_exists(args->hdr,BCF_HL_FMT,args->pl_hdr_id) )
+                error("Error: The FORMAT/PL tag not found in the header\n");
+        }
+        else
+        {
+            args->gt_hdr_id = bcf_hdr_id2int(args->hdr, BCF_DT_ID, "GT");
+            if ( !bcf_hdr_idinfo_exists(args->hdr,BCF_HL_FMT,args->gt_hdr_id) )
+                error("Error: The FORMAT/GT tag not found in the header\n");
+        }
+    }
+    if ( args->fake_PLs )
+    {
+        args->gt_hdr_id = bcf_hdr_id2int(args->hdr, BCF_DT_ID, "GT");
+        if ( !bcf_hdr_idinfo_exists(args->hdr,BCF_HL_FMT,args->gt_hdr_id) )
+            error("Error: The FORMAT/GT tag not found in the header\n");
+    }
 
-        for (i=0; i<n; i++)
+    args->roh_smpl = smpl_ilist_init(args->hdr, args->samples, args->samples_is_file, SMPL_NONE);
+    if ( args->samples )
+    {
+        // we may be able to subset to a few samples, for a text VCF this can be a major speedup
+        if ( (bcf_sr_get_reader(args->files,0))->file->format.format==vcf )
         {
-            if ( str.l ) kputc(',', &str);
-            kputs(smpls[i], &str);
-            free(smpls[i]);
+            kstring_t str = {0,0,0};
+            smpl_ilist_t *tmp = args->roh_smpl, *rmme = NULL;
+            if ( args->af_smpl )
+            {
+                for (i=0; i<args->roh_smpl->n; i++)
+                {
+                    if ( str.l ) kputc(',', &str);
+                    kputs(args->hdr->samples[args->roh_smpl->idx[i]], &str);
+                }
+                for (i=0; i<args->af_smpl->n; i++)
+                {
+                    kputc(',', &str);
+                    kputs(args->hdr->samples[args->af_smpl->idx[i]], &str);
+                }
+                rmme = tmp = smpl_ilist_init(args->hdr, str.s, 0, SMPL_NONE);
+            }
+            if ( tmp->n < bcf_hdr_nsamples(args->hdr) )
+            {
+                str.l = 0;
+                for (i=0; i<tmp->n; i++)
+                {
+                    if ( str.l ) kputc(',', &str);
+                    kputs(args->hdr->samples[tmp->idx[i]], &str);
+                }
+                int ret = bcf_hdr_set_samples(args->hdr, str.s, 0);
+                if ( ret<0 ) error("Error parsing the list of samples: %s\n", str.s);
+                else if ( ret>0 ) error("The %d-th sample not found in the VCF: %s\n", ret,str.s);
+
+                // update sample ids
+                smpl_ilist_destroy(args->roh_smpl);
+                args->roh_smpl = smpl_ilist_init(args->hdr, args->samples, args->samples_is_file, SMPL_NONE);
+
+                if ( args->af_smpl )
+                {
+                    smpl_ilist_destroy(args->af_smpl);
+                    args->af_smpl = smpl_ilist_init(args->hdr, args->estimate_AF, 1, SMPL_NONE);
+                }
+            }
+            free(str.s);
+            if ( rmme )
+                smpl_ilist_destroy(rmme);
         }
-        free(smpls);
     }
-    else if ( !args->estimate_AF )
-        kputs(args->sample, &str);
 
-    if ( str.l )
+    // check whether all samples are in this list. If so, the lookup will not be needed
+    if ( args->af_smpl && args->af_smpl->n == bcf_hdr_nsamples(args->hdr) )
     {
-        int ret = bcf_hdr_set_samples(args->hdr, str.s, 0);
-        if ( ret<0 ) error("Error parsing the list of samples: %s\n", str.s);
-        else if ( ret>0 ) error("The %d-th sample not found in the VCF\n", ret);
+        // all samples are in this list
+        smpl_ilist_destroy(args->af_smpl);
+        args->af_smpl = NULL;
     }
 
-    if ( args->af_tag )
-        if ( !bcf_hdr_idinfo_exists(args->hdr,BCF_HL_INFO,bcf_hdr_id2int(args->hdr,BCF_DT_ID,args->af_tag)) )
-            error("No such INFO tag in the VCF: %s\n", args->af_tag);
+    if ( args->buffer_size )
+    {
+        args->nbuf_olap = -1;
+        char *end;
+        double tmp = strtod(args->buffer_size,&end);
+        if ( *end )
+        {
+            if ( *end!=',') error("Could not parse: --buffer-size %s\n", args->buffer_size);
+            args->nbuf_olap = strtol(end+1,&end,10);
+            if ( *end || args->nbuf_olap<0 ) error("Could not parse: --bufer-size %s\n", args->buffer_size);
+        }
+        if ( tmp<0 )
+            args->nbuf_max = fabs(tmp)*1e6/(4+8*2)/args->roh_smpl->n;
+        else
+            args->nbuf_max = tmp;
 
-    args->nsmpl = bcf_hdr_nsamples(args->hdr);
-    args->ismpl = bcf_hdr_id2int(args->hdr, BCF_DT_SAMPLE, args->sample);
-    free(str.s);
+        if ( args->nbuf_olap<0 )
+            args->nbuf_olap = args->nbuf_max*0.01;
+    }
+    fprintf(stderr,"Number of target samples: %d\n", args->roh_smpl->n);
+    fprintf(stderr,"Number of --estimate-AF samples: %d\n", args->af_smpl ? args->af_smpl->n : (args->estimate_AF ? bcf_hdr_nsamples(args->hdr) : 0));
+    fprintf(stderr,"Number of sites in the buffer/overlap: ");
+    if ( args->nbuf_max ) fprintf(stderr,"%d/%d\n", args->nbuf_max,args->nbuf_olap);
+    else fprintf(stderr,"unlimited\n");
+
+    args->smpl = (smpl_t*) calloc(args->roh_smpl->n,sizeof(smpl_t));
 
-    int i;
     for (i=0; i<256; i++) args->pl2p[i] = pow(10., -i/10.);
 
     // Init transition matrix and HMM
@@ -150,40 +258,88 @@ static void init_data(args_t *args)
     MAT(tprob,2,STATE_AZ,STATE_HW) = args->t2AZ;
     MAT(tprob,2,STATE_AZ,STATE_AZ) = 1 - args->t2HW; 
 
+    args->hmm = hmm_init(2, tprob, 10000);
     if ( args->genmap_fname ) 
-    {
-        args->hmm = hmm_init(2, tprob, 0);
         hmm_set_tprob_func(args->hmm, set_tprob_genmap, args);
-    }
     else if ( args->rec_rate > 0 )
-    {
-        args->hmm = hmm_init(2, tprob, 0);
-        hmm_set_tprob_func(args->hmm, set_tprob_recrate, args);
+        hmm_set_tprob_func(args->hmm, set_tprob_rrate, args);
 
-    }
-    else
-        args->hmm = hmm_init(2, tprob, 10000);
+    args->out = bgzf_open(strcmp("stdout",args->output_fname)?args->output_fname:"-", args->output_type&OUTPUT_GZ ? "wg" : "wu"); 
+    if ( !args->out ) error("Failed to open %s: %s\n", args->output_fname, strerror(errno));
 
     // print header
-    printf("# This file was produced by: bcftools roh(%s+htslib-%s)\n", bcftools_version(),hts_version());
-    printf("# The command line was:\tbcftools %s", args->argv[0]);
+    args->str.l = 0;
+    ksprintf(&args->str, "# This file was produced by: bcftools roh(%s+htslib-%s)\n", bcftools_version(),hts_version());
+    ksprintf(&args->str, "# The command line was:\tbcftools %s", args->argv[0]);
     for (i=1; i<args->argc; i++)
-        printf(" %s",args->argv[i]);
-    printf("\n#\n");
-    printf("# [1]Chromosome\t[2]Position\t[3]State (0:HW, 1:AZ)\t[4]Quality\n");
+        ksprintf(&args->str, " %s",args->argv[i]);
+    ksprintf(&args->str, "\n#\n");
+    if ( args->output_type & OUTPUT_RG )
+    {
+        i = 2;
+        ksprintf(&args->str, "# RG");
+        ksprintf(&args->str, "\t[%d]Sample", i++);
+        ksprintf(&args->str, "\t[%d]Chromosome", i++);
+        ksprintf(&args->str, "\t[%d]Start", i++);
+        ksprintf(&args->str, "\t[%d]End", i++);
+        ksprintf(&args->str, "\t[%d]Length (bp)", i++);
+        ksprintf(&args->str, "\t[%d]Number of markers", i++);
+        ksprintf(&args->str, "\t[%d]Quality (average fwd-bwd phred score)", i++);
+        ksprintf(&args->str, "\n");
+    }
+    if ( args->output_type & OUTPUT_ST )
+    {
+        i = 2;
+        ksprintf(&args->str, "# ST");
+        ksprintf(&args->str, "\t[%d]Sample", i++);
+        ksprintf(&args->str, "\t[%d]Chromosome", i++);
+        ksprintf(&args->str, "\t[%d]Position", i++);
+        ksprintf(&args->str, "\t[%d]State (0:HW, 1:AZ)", i++);
+        ksprintf(&args->str, "\t[%d]Quality (fwd-bwd phred score)", i++);
+        ksprintf(&args->str, "\n");
+    }
+    if ( args->vi_training)
+    {
+        i = 2;
+        ksprintf(&args->str, "# VT, Viterbi Training");
+        ksprintf(&args->str, "\t[%d]Sample", i++);
+        ksprintf(&args->str, "\t[%d]Iteration", i++);
+        ksprintf(&args->str, "\t[%d]dAZ", i++);
+        ksprintf(&args->str, "\t[%d]dHW", i++);
+        ksprintf(&args->str, "\t[%d]1 - P(HW|HW)", i++);
+        ksprintf(&args->str, "\t[%d]P(AZ|HW)", i++);
+        ksprintf(&args->str, "\t[%d]1 - P(AZ|AZ)", i++);
+        ksprintf(&args->str, "\t[%d]P(HW|AZ)", i++);
+        ksprintf(&args->str, "\n");
+    }
+    if ( bgzf_write(args->out, args->str.s, args->str.l) != args->str.l )
+        error("Error writing %s: %s\n", args->output_fname, strerror(errno));
 }
 
 static void destroy_data(args_t *args)
 {
-    free(args->sites);
-    free(args->eprob);
-    free(args->sample);
+    if ( bgzf_close(args->out)!=0 ) error("Error: close failed .. %s\n", args->output_fname);
+    int i;
+    for (i=0; i<args->roh_smpl->n; i++)
+    {
+        free(args->smpl[i].eprob);
+        free(args->smpl[i].sites);
+        free(args->smpl[i].rid);
+        free(args->smpl[i].rid_off);
+        free(args->smpl[i].snapshot);
+    }
+    free(args->str.s);
+    free(args->smpl);
+    if ( args->af_smpl ) smpl_ilist_destroy(args->af_smpl);
+    smpl_ilist_destroy(args->roh_smpl);
     free(args->rids);
     free(args->rid_offs);
     hmm_destroy(args->hmm);
     bcf_sr_destroy(args->files);
-    free(args->itmp); free(args->AFs); free(args->pdg);
+    free(args->AFs); free(args->pdg);
     free(args->genmap);
+    free(args->itmp);
+    free(args->samples);
 }
 
 static int load_genmap(args_t *args, bcf1_t *line)
@@ -220,21 +376,22 @@ static int load_genmap(args_t *args, bcf1_t *line)
         hts_expand(genmap_t,args->ngenmap,args->mgenmap,args->genmap);
         genmap_t *gm = &args->genmap[args->ngenmap-1];
 
+        // position, convert to 0-based
         char *tmp, *end;
         gm->pos = strtol(str.s, &tmp, 10);
         if ( str.s==tmp ) error("Could not parse %s: %s\n", fname, str.s);
+        gm->pos -= 1;
 
         // skip second column
         tmp++;
         while ( *tmp && !isspace(*tmp) ) tmp++;
 
-        // read the genetic map in cM
+        // read the genetic map in cM, scale from % to likelihood
         gm->rate = strtod(tmp+1, &end);
         if ( tmp+1==end ) error("Could not parse %s: %s\n", fname, str.s);
+        gm->rate *= 0.01;
     }
     if ( !args->ngenmap ) error("Genetic map empty?\n");
-    int i;
-    for (i=0; i<args->ngenmap; i++) args->genmap[i].rate /= args->genmap[args->ngenmap-1].rate; // scale to 1
     if ( hts_close(fp) ) error("Close failed\n");
     free(str.s);
     return 0;
@@ -255,7 +412,6 @@ static double get_genmap_rate(args_t *args, int start, int end)
     // position j to be equal or larger than end
     int j = i;
     while ( j+1<args->ngenmap && args->genmap[j].pos < end ) j++;
-
     if ( i==j )
     {
         args->igenmap = i;
@@ -272,17 +428,20 @@ static double get_genmap_rate(args_t *args, int start, int end)
 void set_tprob_genmap(hmm_t *hmm, uint32_t prev_pos, uint32_t pos, void *data, double *tprob)
 {
     args_t *args = (args_t*) data;
-    double ci = get_genmap_rate(args, pos - prev_pos, pos);
+    double ci = get_genmap_rate(args, prev_pos, pos);
+    if ( args->rec_rate ) ci *= args->rec_rate;
+    if ( ci > 1 ) ci = 1;
     MAT(tprob,2,STATE_HW,STATE_AZ) *= ci;
     MAT(tprob,2,STATE_AZ,STATE_HW) *= ci;
     MAT(tprob,2,STATE_AZ,STATE_AZ)  = 1 - MAT(tprob,2,STATE_HW,STATE_AZ);
     MAT(tprob,2,STATE_HW,STATE_HW)  = 1 - MAT(tprob,2,STATE_AZ,STATE_HW);
 }
 
-void set_tprob_recrate(hmm_t *hmm, uint32_t prev_pos, uint32_t pos, void *data, double *tprob)
+void set_tprob_rrate(hmm_t *hmm, uint32_t prev_pos, uint32_t pos, void *data, double *tprob)
 {
     args_t *args = (args_t*) data;
     double ci = (pos - prev_pos) * args->rec_rate;
+    if ( ci > 1 ) ci = 1;
     MAT(tprob,2,STATE_HW,STATE_AZ) *= ci;
     MAT(tprob,2,STATE_AZ,STATE_HW) *= ci;
     MAT(tprob,2,STATE_AZ,STATE_AZ)  = 1 - MAT(tprob,2,STATE_HW,STATE_AZ);
@@ -315,132 +474,163 @@ void set_tprob_recrate(hmm_t *hmm, uint32_t prev_pos, uint32_t pos, void *data,
  *
  */
 
-static void flush_viterbi(args_t *args)
+static void flush_viterbi(args_t *args, int ismpl)
 {
-    int i,j;
+    smpl_t *smpl = &args->smpl[ismpl];
+    if ( !smpl->nsites ) return;
 
-    if ( !args->nsites ) return; 
+    const char *name = args->hdr->samples[ args->roh_smpl->idx[ismpl] ];
 
-    if ( !args->vi_training )
+    int i,j,k;
+
+    if ( !args->vi_training ) // single viterbi pass
     {
-        // single viterbi pass, one chromsome
-        hmm_run_viterbi(args->hmm, args->nsites, args->eprob, args->sites);
-        hmm_run_fwd_bwd(args->hmm, args->nsites, args->eprob, args->sites);
+        hmm_restore(args->hmm, smpl->snapshot); 
+        int end = (args->nbuf_max && smpl->nsites >= args->nbuf_max && smpl->nsites > args->nbuf_olap) ? smpl->nsites - args->nbuf_olap : smpl->nsites;
+        if ( end < smpl->nsites )
+            smpl->snapshot = hmm_snapshot(args->hmm, smpl->snapshot, smpl->nsites - args->nbuf_olap - 1);
+
+        args->igenmap = smpl->igenmap;
+        hmm_run_viterbi(args->hmm, smpl->nsites, smpl->eprob, smpl->sites);
+        hmm_run_fwd_bwd(args->hmm, smpl->nsites, smpl->eprob, smpl->sites);
         double *fwd = hmm_get_fwd_bwd_prob(args->hmm);
 
-        const char *chr = bcf_hdr_id2name(args->hdr,args->prev_rid);
-        uint8_t *vpath = hmm_get_viterbi_path(args->hmm);
-        for (i=0; i<args->nsites; i++)
+        const char *chr  = bcf_hdr_id2name(args->hdr,args->prev_rid);
+        uint8_t *vpath   = hmm_get_viterbi_path(args->hmm);
+
+        for (i=0; i<end; i++)
         {
             int state = vpath[i*2]==STATE_AZ ? 1 : 0;
-            double *pval = fwd + i*2;
-            printf("%s\t%d\t%d\t%.1f\n", chr,args->sites[i]+1, state, phred_score(1.0-pval[state]));
-        }
-        return;
-    }
+            double qual = phred_score(1.0 - fwd[i*2 + state]);
+            if ( args->output_type & OUTPUT_ST )
+            {
+                args->str.l = 0;
+                ksprintf(&args->str, "ST\t%s\t%s\t%d\t%d\t%.1f\n", name,chr,smpl->sites[i]+1, state, qual);
+                if ( bgzf_write(args->out, args->str.s, args->str.l) != args->str.l ) error("Error writing %s: %s\n", args->output_fname, strerror(errno));
+            }
 
-    // viterbi training, multiple chromosomes
-    double t2az_prev, t2hw_prev;
-    double deltaz, delthw;
-    int niter = 0;
-    do
-    {
-        double *tprob_arr = hmm_get_tprob(args->hmm);
-        t2az_prev = MAT(tprob_arr,2,1,0); //args->t2AZ;
-        t2hw_prev = MAT(tprob_arr,2,0,1); //args->t2HW;
-        double tcounts[] = { 0,0,0,0 };
-        for (i=0; i<args->nrids; i++)
-        {
-            // run viterbi for each chromosomes. eprob and sites contain
-            // multiple chromosomes, rid_offs mark the boundaries
-            int ioff = args->rid_offs[i];
-            int nsites = (i+1==args->nrids ? args->nsites : args->rid_offs[i+1]) - ioff;
-            hmm_run_viterbi(args->hmm, nsites, args->eprob+ioff*2, args->sites+ioff);
-
-            // what transitions were observed: add to the total counts
-            uint8_t *vpath = hmm_get_viterbi_path(args->hmm);
-            for (j=1; j<nsites; j++)
+            if ( args->output_type & OUTPUT_RG )
             {
-                // count the number of transitions
-                int prev_state = vpath[2*(j-1)];
-                int curr_state = vpath[2*j];
-                MAT(tcounts,2,curr_state,prev_state) += 1;
+                if ( state!=smpl->rg.state ) 
+                {
+                    if ( !state )   // the region ends, flush
+                    {
+                        args->str.l = 0;
+                        ksprintf(&args->str, "RG\t%s\t%s\t%d\t%d\t%d\t%d\t%.1f\n",name,bcf_hdr_id2name(args->hdr,smpl->rg.rid),
+                                smpl->rg.beg+1,smpl->rg.end+1,smpl->rg.end-smpl->rg.beg+1,smpl->rg.nqual,smpl->rg.qual/smpl->rg.nqual);
+                        if ( bgzf_write(args->out, args->str.s, args->str.l) != args->str.l ) error("Error writing %s: %s\n", args->output_fname, strerror(errno));
+                        smpl->rg.state = 0;
+                    }
+                    else
+                    {
+                        smpl->rg.state = 1;
+                        smpl->rg.beg = smpl->sites[i];
+                        smpl->rg.rid = args->prev_rid;
+                    }
+                }
+                else if ( state )
+                {
+                    smpl->rg.nqual++;
+                    smpl->rg.qual += qual;
+                    smpl->rg.end  = smpl->sites[i];
+                }
             }
         }
 
-        // update the transition matrix
-        int n = 1;
-        for (i=0; i<2; i++)
+        if ( end < smpl->nsites )
         {
-            for (j=0; j<2; j++) n += MAT(tcounts,2,i,j);
+            end = smpl->nsites - args->nbuf_olap;
+            memmove(smpl->sites, smpl->sites + end, sizeof(*smpl->sites)*args->nbuf_olap);
+            memmove(smpl->eprob, smpl->eprob + end*2, sizeof(*smpl->eprob)*args->nbuf_olap*2);
+            smpl->nsites  = args->nbuf_olap;
+            smpl->igenmap = args->igenmap;
         }
-        for (i=0; i<2; i++)
+        else
         {
-            for (j=0; j<2; j++)
+            smpl->nsites  = 0;
+            smpl->igenmap = 0;
+
+            if ( smpl->rg.state )
             {
-                // no transition to i-th state was observed, set to a small number
-                if ( !MAT(tcounts,2,i,j) ) MAT(tcounts,2,i,j) = 0.1/n;
-                else MAT(tcounts,2,i,j) /= n;
+                args->str.l = 0;
+                ksprintf(&args->str, "RG\t%s\t%s\t%d\t%d\t%d\t%d\t%.1f\n",name,bcf_hdr_id2name(args->hdr,smpl->rg.rid),
+                        smpl->rg.beg+1,smpl->rg.end+1,smpl->rg.end-smpl->rg.beg+1,smpl->rg.nqual,smpl->rg.qual/smpl->rg.nqual);
+                if ( bgzf_write(args->out, args->str.s, args->str.l) != args->str.l ) error("Error writing %s: %s\n", args->output_fname, strerror(errno));
+                smpl->rg.state = 0;
             }
         }
 
-        // normalize
-        for (i=0; i<2; i++)
+        return;
+    }
+
+
+    // viterbi training, multiple chromosomes
+    double t2az_prev, t2hw_prev;
+    double deltaz, delthw;
+
+    double *tprob_arr = hmm_get_tprob(args->hmm);
+    MAT(tprob_arr,2,STATE_HW,STATE_HW) = 1 - args->t2AZ;
+    MAT(tprob_arr,2,STATE_HW,STATE_AZ) = args->t2HW;
+    MAT(tprob_arr,2,STATE_AZ,STATE_HW) = args->t2AZ;
+    MAT(tprob_arr,2,STATE_AZ,STATE_AZ) = 1 - args->t2HW; 
+    hmm_set_tprob(args->hmm, tprob_arr, 10000);
+
+    int niter = 0;
+    do
+    {
+        tprob_arr = hmm_get_tprob(args->hmm);
+        t2az_prev = MAT(tprob_arr,2,STATE_AZ,STATE_HW); //args->t2AZ;
+        t2hw_prev = MAT(tprob_arr,2,STATE_HW,STATE_AZ); //args->t2HW;
+        double tprob_new[] = { 0,0,0,0 };
+        for (i=0; i<smpl->nrid; i++)
         {
-            double norm = 0;
-            for (j=0; j<2; j++) norm += MAT(tcounts,2,j,i);
-            assert( norm!=0 );
-            for (j=0; j<2; j++) MAT(tcounts,2,j,i) /= norm;
+            int ioff = smpl->rid_off[i];
+            int nsites = (i+1==smpl->nrid ? smpl->nsites : smpl->rid_off[i+1]) - ioff;
+            args->igenmap = 0;
+            tprob_arr = hmm_run_baum_welch(args->hmm, nsites, smpl->eprob+ioff*2, smpl->sites+ioff);
+            for (j=0; j<2; j++)
+                for (k=0; k<2; k++) MAT(tprob_new,2,j,k) += MAT(tprob_arr,2,j,k);
         }
+        for (j=0; j<2; j++)
+            for (k=0; k<2; k++) MAT(tprob_new,2,j,k) /= smpl->nrid;
 
-        if ( args->genmap_fname || args->rec_rate > 0 )
-            hmm_set_tprob(args->hmm, tcounts, 0);
-        else
-            hmm_set_tprob(args->hmm, tcounts, 10000);
+        hmm_set_tprob(args->hmm, tprob_new, 10000);
 
-        tprob_arr = hmm_get_tprob(args->hmm);
-        deltaz = fabs(MAT(tprob_arr,2,1,0)-t2az_prev);
-        delthw = fabs(MAT(tprob_arr,2,0,1)-t2hw_prev);
+        deltaz = fabs(MAT(tprob_new,2,STATE_AZ,STATE_HW)-t2az_prev);
+        delthw = fabs(MAT(tprob_new,2,STATE_HW,STATE_AZ)-t2hw_prev);
         niter++;
-        fprintf(stderr,"Viterbi training, iteration %d: dAZ=%e dHW=%e\tP(HW|HW)=%e  P(AZ|HW)=%e  P(AZ|AZ)=%e  P(HW|AZ)=%e\n", 
-            niter,deltaz,delthw,
-            MAT(tprob_arr,2,STATE_HW,STATE_HW),MAT(tprob_arr,2,STATE_AZ,STATE_HW),
-            MAT(tprob_arr,2,STATE_AZ,STATE_AZ),MAT(tprob_arr,2,STATE_HW,STATE_AZ));
+        args->str.l = 0;
+        ksprintf(&args->str, "VT\t%s\t%d\t%e\t%e\t%e\t%e\t%e\t%e\n", 
+            name,niter,deltaz,delthw,
+            1-MAT(tprob_new,2,STATE_HW,STATE_HW),MAT(tprob_new,2,STATE_AZ,STATE_HW),
+            1-MAT(tprob_new,2,STATE_AZ,STATE_AZ),MAT(tprob_new,2,STATE_HW,STATE_AZ));
+        if ( bgzf_write(args->out, args->str.s, args->str.l) != args->str.l ) error("Error writing %s: %s\n", args->output_fname, strerror(errno));
     }
-    while ( deltaz > 0.0 || delthw > 0.0 );
-    double *tprob_arr = hmm_get_tprob(args->hmm);
-    fprintf(stderr, "Viterbi training converged in %d iterations to P(HW|HW)=%e  P(AZ|HW)=%e  P(AZ|AZ)=%e  P(HW|AZ)=%e\n", niter,
-            MAT(tprob_arr,2,STATE_HW,STATE_HW),MAT(tprob_arr,2,STATE_AZ,STATE_HW),
-            MAT(tprob_arr,2,STATE_AZ,STATE_AZ),MAT(tprob_arr,2,STATE_HW,STATE_AZ));
+    while ( deltaz > args->baum_welch_th || delthw > args->baum_welch_th );
     
     // output the results
-    for (i=0; i<args->nrids; i++)
+    for (i=0; i<smpl->nrid; i++)
     {
-        int ioff = args->rid_offs[i];
-        int nsites = (i+1==args->nrids ? args->nsites : args->rid_offs[i+1]) - ioff;
-        hmm_run_viterbi(args->hmm, nsites, args->eprob+ioff*2, args->sites+ioff);
-        hmm_run_fwd_bwd(args->hmm, nsites, args->eprob+ioff*2, args->sites+ioff);
+        int ioff = smpl->rid_off[i];
+        int nsites = (i+1==smpl->nrid ? smpl->nsites : smpl->rid_off[i+1]) - ioff;
+        args->igenmap = 0;
+        hmm_run_viterbi(args->hmm, nsites, smpl->eprob+ioff*2, smpl->sites+ioff);
+        hmm_run_fwd_bwd(args->hmm, nsites, smpl->eprob+ioff*2, smpl->sites+ioff);
         uint8_t *vpath = hmm_get_viterbi_path(args->hmm);
         double  *fwd   = hmm_get_fwd_bwd_prob(args->hmm);
 
-        const char *chr = bcf_hdr_id2name(args->hdr,args->rids[i]);
+        const char *chr = bcf_hdr_id2name(args->hdr,smpl->rid[i]);
         for (j=0; j<nsites; j++)
         {
-            int state = vpath[j*2];
-            double pval = fwd[j*2 + state];
-            printf("%s\t%d\t%d\t%e\n", chr,args->sites[ioff+j]+1,state==STATE_AZ ? 1 : 0, pval);
+            int state = vpath[j*2]==STATE_AZ ? 1 : 0;
+            double *pval = fwd + j*2;
+            args->str.l = 0;
+            ksprintf(&args->str, "ROH\t%s\t%s\t%d\t%d\t%.1f\n", name,chr,smpl->sites[ioff+j]+1, state, phred_score(1.0-pval[state]));
+            if ( bgzf_write(args->out, args->str.s, args->str.l) != args->str.l ) error("Error writing %s: %s\n", args->output_fname, strerror(errno));
         }
     }
 }
 
-static void push_rid(args_t *args, int rid)
-{
-    args->nrids++;
-    args->rids = (int*) realloc(args->rids, args->nrids*sizeof(int));
-    args->rid_offs = (int*) realloc(args->rid_offs, args->nrids*sizeof(int));
-    args->rids[ args->nrids-1 ] = rid;
-    args->rid_offs[ args->nrids-1 ] = args->nsites;
-}
 
 int read_AF(bcf_sr_regions_t *tgt, bcf1_t *line, double *alt_freq)
 {
@@ -468,27 +658,52 @@ int read_AF(bcf_sr_regions_t *tgt, bcf1_t *line, double *alt_freq)
     return 0;
 }
 
-int estimate_AF(args_t *args, bcf1_t *line, double *alt_freq)
+int8_t *get_GT(args_t *args, bcf1_t *line)
 {
-    if ( !args->nitmp )
-    {
-        args->nitmp = bcf_get_genotypes(args->hdr, line, &args->itmp, &args->mitmp);
-        if ( args->nitmp != 2*args->nsmpl ) return -1;     // not diploid?
-        args->nitmp /= args->nsmpl;
-    }
+    int i;
+    for (i=0; i<line->n_fmt; i++)
+        if ( line->d.fmt[i].id==args->gt_hdr_id ) break;
+    if ( i==line->n_fmt ) return NULL;        // the tag is not present in this record
+
+    bcf_fmt_t *fmt = &line->d.fmt[i];
+    if ( fmt->n!=2 ) return NULL;             // not diploid
 
+    if ( fmt->type!=BCF_BT_INT8 ) error("This is unexpected, GT type is %d\n", fmt->type);
+    return (int8_t*) fmt->p;
+}
+
+int estimate_AF_from_GT(args_t *args, int8_t *gt, double *alt_freq)
+{
     int i, nalt = 0, nref = 0;
-    for (i=0; i<args->nsmpl; i++)
+    if ( args->af_smpl )        // subset samples for AF estimate
     {
-        int32_t *gt = &args->itmp[i*args->nitmp];
+        for (i=0; i<args->af_smpl->n; i++)
+        {
+            int ismpl = args->af_smpl->idx[i];
+            if ( bcf_gt_is_missing(gt[2*ismpl]) || bcf_gt_is_missing(gt[2*ismpl+1]) ) continue;
 
-        if ( bcf_gt_is_missing(gt[0]) || bcf_gt_is_missing(gt[1]) ) continue;
+            if ( bcf_gt_allele(gt[2*ismpl]) ) nalt++;
+            else nref++;
 
-        if ( bcf_gt_allele(gt[0]) ) nalt++;
-        else nref++;
+            if ( bcf_gt_allele(gt[2*ismpl+1]) ) nalt++;
+            else nref++;
+        }
+    }
+    else                        // all samples used in AF estimate
+    {
+        int8_t *end = gt + 2*bcf_hdr_nsamples(args->hdr);
+        while ( gt < end )
+        {
+            if ( bcf_gt_is_missing(gt[0]) || bcf_gt_is_missing(gt[1]) ) continue;
+
+            if ( bcf_gt_allele(gt[0]) ) nalt++;
+            else nref++;
+
+            if ( bcf_gt_allele(gt[1]) ) nalt++;
+            else nref++;
 
-        if ( bcf_gt_allele(gt[1]) ) nalt++;
-        else nref++;
+            gt += 2;
+        }
     }
     if ( !nalt && !nref ) return -1;
 
@@ -496,105 +711,249 @@ int estimate_AF(args_t *args, bcf1_t *line, double *alt_freq)
     return 0;
 }
 
+int estimate_AF_from_PL(args_t *args, bcf_fmt_t *fmt_pl, int ial, double *alt_freq)
+{
+    double af = 0;
+    int i, j, naf = 0;
+
+    int irr = bcf_alleles2gt(0,0), ira = bcf_alleles2gt(0,ial), iaa = bcf_alleles2gt(ial,ial);
+    if ( iaa >= fmt_pl->n ) return -1;  // not diploid or wrong number of fields
+    
+    if ( args->af_smpl )        // subset samples for AF estimate
+    {
+        #define BRANCH(type_t) \
+        { \
+            for (i=0; i<args->af_smpl->n; i++) \
+            { \
+                int ismpl = args->af_smpl->idx[i]; \
+                type_t *p = (type_t*)fmt_pl->p + fmt_pl->n*ismpl; \
+                if ( p[irr]<0 || p[ira]<0 || p[iaa]<0 ) continue;    /* missing value */ \
+                if ( p[irr]==p[ira] && p[irr]==p[iaa] ) continue;    /* all values are the same */ \
+                double prob[3], norm = 0; \
+                prob[0] = p[irr] < (type_t)256 ? args->pl2p[ p[irr] ] : args->pl2p[255]; \
+                prob[1] = p[ira] < (type_t)256 ? args->pl2p[ p[ira] ] : args->pl2p[255]; \
+                prob[2] = p[iaa] < (type_t)256 ? args->pl2p[ p[iaa] ] : args->pl2p[255]; \
+                for (j=0; j<3; j++) norm += prob[j]; \
+                for (j=0; j<3; j++) prob[j] /= norm; \
+                af += 0.5*prob[1] + prob[2]; \
+                naf++; \
+            } \
+        }
+        switch (fmt_pl->type) {
+            case BCF_BT_INT8:  BRANCH(int8_t); break;
+            case BCF_BT_INT16: BRANCH(int16_t); break;
+            case BCF_BT_INT32: BRANCH(int32_t); break;
+            default: fprintf(stderr,"Unknown format type for PL: %s:%d .. fmt->type=%d\n", __FILE__,__LINE__, fmt_pl->type); exit(1);
+        }
+        #undef BRANCH
+    }
+    else                        // all samples used in AF estimate
+    {
+        int nsmpl = bcf_hdr_nsamples(args->hdr);
+        #define BRANCH(type_t) \
+        { \
+            type_t *p = (type_t*)fmt_pl->p; \
+            p -= fmt_pl->n; \
+            for (i=0; i<nsmpl; i++) \
+            { \
+                p += fmt_pl->n; \
+                if ( p[irr]<0 || p[ira]<0 || p[iaa]<0 ) continue;    /* missing value */ \
+                if ( p[irr]==p[ira] && p[irr]==p[iaa] ) continue;    /* all values are the same */ \
+                double prob[3], norm = 0; \
+                prob[0] = p[irr] < (type_t)256 ? args->pl2p[ p[irr] ] : args->pl2p[255]; \
+                prob[1] = p[ira] < (type_t)256 ? args->pl2p[ p[ira] ] : args->pl2p[255]; \
+                prob[2] = p[iaa] < (type_t)256 ? args->pl2p[ p[iaa] ] : args->pl2p[255]; \
+                for (j=0; j<3; j++) norm += prob[j]; \
+                for (j=0; j<3; j++) prob[j] /= norm; \
+                af += 0.5*prob[1] + prob[2]; \
+                naf++; \
+            } \
+        }
+        switch (fmt_pl->type) {
+            case BCF_BT_INT8:  BRANCH(int8_t); break;
+            case BCF_BT_INT16: BRANCH(int16_t); break;
+            case BCF_BT_INT32: BRANCH(int32_t); break;
+            default: fprintf(stderr,"Unknown format type for PL: %s:%d .. fmt->type=%d\n", __FILE__,__LINE__, fmt_pl->type); exit(1);
+        }
+        #undef BRANCH
+    }
+    if ( !naf ) return -1;
+
+    *alt_freq = af / naf;
+    return 0;
+}
+
+bcf_fmt_t *get_PL(args_t *args, bcf1_t *line)
+{
+    int i;
+    for (i=0; i<line->n_fmt; i++)
+        if ( line->d.fmt[i].id==args->pl_hdr_id ) return &line->d.fmt[i];
+    return NULL;
+}
 
-int parse_line(args_t *args, bcf1_t *line, double *alt_freq, double *pdg)
+int process_line(args_t *args, bcf1_t *line, int ial)
 {
-    args->nitmp = 0;
+    if ( !(line->unpacked & BCF_UN_FMT) ) bcf_unpack(line, BCF_UN_FMT);
+
+    double alt_freq;
+    int8_t *GTs = NULL;
+    bcf_fmt_t *fmt_pl = NULL;
 
     // Set allele frequency
-    int ret;
+    int ret = 0, i,j;
     if ( args->af_tag )
     {
         // Use an INFO tag provided by the user
         ret = bcf_get_info_float(args->hdr, line, args->af_tag, &args->AFs, &args->mAFs);
-        if ( ret==1 )
-            *alt_freq = args->AFs[0];
+        if ( ret>0 )
+            alt_freq = args->AFs[ial-1];
         if ( ret==-2 )
             error("Type mismatch for INFO/%s tag at %s:%d\n", args->af_tag, bcf_seqname(args->hdr,line), line->pos+1);
     }
     else if ( args->af_fname ) 
     {
         // Read AF from a file
-        ret = read_AF(args->files->targets, line, alt_freq);
+        ret = read_AF(args->files->targets, line, &alt_freq);
+    }
+    else if ( args->dflt_AF > 0 )
+    {
+        alt_freq = args->dflt_AF;
+    }
+    else if ( args->estimate_AF )
+    {
+        // Estimate AF from GTs or PLs of all samples or samples listed in a file
+        if ( args->af_from_PL )
+        {
+            fmt_pl = get_PL(args, line);
+            if ( !fmt_pl ) return -1;
+            ret = estimate_AF_from_PL(args, fmt_pl, ial, &alt_freq);
+        }
+        else
+        {
+            GTs = get_GT(args, line);
+            if ( !GTs ) return -1;
+            ret = estimate_AF_from_GT(args, GTs, &alt_freq);
+        }
     }
     else
     {
-        // Use GTs or AC/AN: GTs when AC/AN not present or when GTs explicitly requested by --estimate-AF
-        ret = -1;
-        if ( !args->estimate_AF )
+        // Use AC/AN
+        int AC = -1, AN = 0;
+        ret = bcf_get_info_int32(args->hdr, line, "AN", &args->itmp, &args->mitmp);
+        if ( ret==1 )
         {
-            int AC = -1, AN = 0;
-            ret = bcf_get_info_int32(args->hdr, line, "AN", &args->itmp, &args->mitmp);
-            if ( ret==1 )
-            {
-                AN = args->itmp[0];
-                ret = bcf_get_info_int32(args->hdr, line, "AC", &args->itmp, &args->mitmp);
-                if ( ret>0 )
-                    AC = args->itmp[0];
-            }
-            if ( AN<=0 || AC<0 ) 
-                ret = -1;
-            else 
-                *alt_freq = (double) AC/AN;
+            AN = args->itmp[0];
+            ret = bcf_get_info_int32(args->hdr, line, "AC", &args->itmp, &args->mitmp);
+            if ( ret>0 )
+                AC = args->itmp[0];
         }
-        if ( ret==-1 )
-            ret = estimate_AF(args, line, alt_freq);    // reads GTs into args->itmp
+        if ( AN<=0 || AC<0 ) 
+            ret = -1;
+        else 
+            alt_freq = (double) AC/AN;
     }
 
     if ( ret<0 ) return ret;
-    if ( *alt_freq==0.0 )
-    {
-        if ( args->dflt_AF==0 ) return -1;       // we skip sites with AF=0
-        *alt_freq = args->dflt_AF;
-    }
+    if ( alt_freq==0.0 ) return -1;
 
-    // Set P(D|G)
+    int irr = bcf_alleles2gt(0,0), ira = bcf_alleles2gt(0,ial), iaa = bcf_alleles2gt(ial,ial);
     if ( args->fake_PLs )
     {
-        if ( !args->nitmp )
-        {
-            args->nitmp = bcf_get_genotypes(args->hdr, line, &args->itmp, &args->mitmp);
-            if ( args->nitmp != 2*args->nsmpl ) return -1;     // not diploid?
-            args->nitmp /= args->nsmpl;
-        }
+        if ( !GTs ) GTs = get_GT(args, line);
+    }
+    else
+    {
+        fmt_pl = get_PL(args, line);
+        if ( !fmt_pl ) return -1;
+        if ( iaa >= fmt_pl->n ) return -1;  // not diploid or wrong number of fields
+    }
 
-        int32_t *gt = &args->itmp[args->ismpl*args->nitmp];
-        if ( bcf_gt_is_missing(gt[0]) || bcf_gt_is_missing(gt[1]) ) return -1;
+    for (i=0; i<args->roh_smpl->n; i++)
+    {
+        int ismpl = args->roh_smpl->idx[i];
 
-        int a = bcf_gt_allele(gt[0]);
-        int b = bcf_gt_allele(gt[1]);
-        if ( a!=b )
-        {
-            pdg[0] = pdg[2] = args->unseen_PL;
-            pdg[1] = 1 - 2*args->unseen_PL;
-        }
-        else if ( a==0 )
+        // set P(D|G)
+        double pdg[3];
+        if ( args->fake_PLs )
         {
-            pdg[0] = 1 - 2*args->unseen_PL;
-            pdg[1] = pdg[2] = args->unseen_PL;
+            int8_t *gt = GTs + 2*ismpl;
+            if ( bcf_gt_is_missing(gt[0]) || bcf_gt_is_missing(gt[1]) ) continue;
+
+            int a = bcf_gt_allele(gt[0]);
+            int b = bcf_gt_allele(gt[1]);
+            if ( a!=b )
+            {
+                pdg[0] = pdg[2] = args->unseen_PL;
+                pdg[1] = 1 - 2*args->unseen_PL;
+            }
+            else if ( a==0 )
+            {
+                pdg[0] = 1 - args->unseen_PL - args->unseen_PL*args->unseen_PL;
+                pdg[1] = args->unseen_PL;
+                pdg[2] = args->unseen_PL*args->unseen_PL;
+            }
+            else
+            {
+                pdg[0] = args->unseen_PL*args->unseen_PL;
+                pdg[1] = args->unseen_PL;
+                pdg[2] = 1 - args->unseen_PL - args->unseen_PL*args->unseen_PL;
+            }
         }
         else
         {
-            pdg[0] = pdg[1] = args->unseen_PL;
-            pdg[2] = 1 - 2*args->unseen_PL;
+            #define BRANCH(type_t) \
+            { \
+                type_t *p = (type_t*)fmt_pl->p + fmt_pl->n*ismpl; \
+                if ( p[irr]<0 || p[ira]<0 || p[iaa]<0 ) continue;    /* missing value */ \
+                if ( p[irr]==p[ira] && p[irr]==p[iaa] ) continue;    /* all values are the same */ \
+                pdg[0] = p[irr] < (type_t)256 ? args->pl2p[ p[irr] ] : args->pl2p[255]; \
+                pdg[1] = p[ira] < (type_t)256 ? args->pl2p[ p[ira] ] : args->pl2p[255]; \
+                pdg[2] = p[iaa] < (type_t)256 ? args->pl2p[ p[iaa] ] : args->pl2p[255]; \
+            }
+            switch (fmt_pl->type) {
+                case BCF_BT_INT8:  BRANCH(int8_t); break;
+                case BCF_BT_INT16: BRANCH(int16_t); break;
+                case BCF_BT_INT32: BRANCH(int32_t); break;
+                default: fprintf(stderr,"Unknown format type for PL: %s:%d .. fmt->type=%d\n", __FILE__,__LINE__, fmt_pl->type); exit(1);
+            }
+            #undef BRANCH
         }
-    }
-    else
-    {
-        args->nitmp = bcf_get_format_int32(args->hdr, line, "PL", &args->itmp, &args->mitmp);
-        if ( args->nitmp != args->nsmpl*line->n_allele*(line->n_allele+1)/2. ) return -1;     // not diploid?
-        args->nitmp /= args->nsmpl;
-
-        int32_t *pl = &args->itmp[args->ismpl*args->nitmp];
-        pdg[0] = pl[0] < 256 ? args->pl2p[ pl[0] ] : 1.0;
-        pdg[1] = pl[1] < 256 ? args->pl2p[ pl[1] ] : 1.0;
-        pdg[2] = pl[2] < 256 ? args->pl2p[ pl[2] ] : 1.0;
 
         double sum = pdg[0] + pdg[1] + pdg[2];
-        if ( !sum ) return -1;
-        pdg[0] /= sum;
-        pdg[1] /= sum;
-        pdg[2] /= sum;
+        if ( !sum ) continue;
+        for (j=0; j<3; j++) pdg[j] /= sum;
+        if ( args->skip_homref && pdg[0]>0.99 ) continue;
+
+        smpl_t *smpl = &args->smpl[i];
+        smpl->nused++;
+
+        if ( smpl->nsites >= smpl->msites )
+        {
+            hts_expand(uint32_t,smpl->nsites+1,smpl->msites,smpl->sites);
+            smpl->eprob = (double*) realloc(smpl->eprob,sizeof(*smpl->eprob)*smpl->msites*2);
+            if ( !smpl->eprob ) error("Error: failed to alloc %d bytes\n", sizeof(*smpl->eprob)*smpl->msites*2);
+        }
+        
+        // Calculate emission probabilities P(D|AZ) and P(D|HW)
+        double *eprob = &smpl->eprob[2*smpl->nsites];
+        eprob[STATE_AZ] = pdg[0]*(1-alt_freq) + pdg[2]*alt_freq;
+        eprob[STATE_HW] = pdg[0]*(1-alt_freq)*(1-alt_freq) + 2*pdg[1]*(1-alt_freq)*alt_freq + pdg[2]*alt_freq*alt_freq;
+        
+        smpl->sites[smpl->nsites] = line->pos;
+        smpl->nsites++;
+
+        if ( args->vi_training )
+        {
+            if ( !smpl->nrid || line->rid!=smpl->rid[smpl->nrid-1] )
+            {
+                smpl->nrid++;
+                smpl->rid = (int*) realloc(smpl->rid,sizeof(*smpl->rid)*smpl->nrid);
+                smpl->rid[smpl->nrid-1] = line->rid;
+                smpl->rid_off = (int*) realloc(smpl->rid_off,sizeof(*smpl->rid_off)*smpl->nrid);
+                smpl->rid_off[smpl->nrid-1] = smpl->nsites - 1;
+            }
+        }
+        else if ( args->nbuf_max && smpl->nsites >= args->nbuf_max ) flush_viterbi(args, i);
     }
 
     return 0;
@@ -602,18 +961,35 @@ int parse_line(args_t *args, bcf1_t *line, double *alt_freq, double *pdg)
 
 static void vcfroh(args_t *args, bcf1_t *line)
 {
+    int i;
+
     // Are we done?
     if ( !line )
     { 
-        flush_viterbi(args);
+        for (i=0; i<args->roh_smpl->n; i++) flush_viterbi(args, i);
         return; 
     }
     args->ntot++;
 
-    // Skip unwanted lines
+    // Skip unwanted lines, for simplicity we consider only biallelic sites 
     if ( line->rid == args->skip_rid ) return;
     if ( line->n_allele==1 ) return;    // no ALT allele
-    if ( line->n_allele!=2 ) return;    // only biallelic sites
+    if ( line->n_allele > 3 ) return;   // cannot be bi-allelic, even with <*>
+
+    // This can be raw callable VCF with the symbolic unseen allele <*>
+    int ial = 0;
+    for (i=1; i<line->n_allele; i++)
+        if ( !strcmp("<*>",line->d.allele[i]) ) { ial = i; break; }
+    if ( ial==0 )    // normal VCF, the symbolic allele is not present
+    {
+        if ( line->n_allele!=2 ) return;    // not biallelic
+        ial = 1;
+    }
+    else
+    {
+        if ( line->n_allele!=3 ) return;    // not biallelic
+        ial = ial==1 ? 2 : 1;               // <*> can come in any order
+    }
     if ( args->snps_only && !bcf_is_snp(line) ) return;
 
     // Initialize genetic map
@@ -623,21 +999,15 @@ static void vcfroh(args_t *args, bcf1_t *line)
         args->prev_rid = line->rid;
         args->prev_pos = line->pos;
         skip_rid = load_genmap(args, line);
-        if ( !skip_rid && args->vi_training ) push_rid(args, line->rid);
     }
 
     // New chromosome?
     if ( args->prev_rid!=line->rid )
     {
         skip_rid = load_genmap(args, line);
-        if ( args->vi_training )
-        {
-            if ( !skip_rid ) push_rid(args, line->rid);
-        }
-        else
+        if ( !args->vi_training )
         {
-            flush_viterbi(args);
-            args->nsites = 0;
+            for (i=0; i<args->roh_smpl->n; i++) flush_viterbi(args, i);
         }
         args->prev_rid = line->rid;
         args->prev_pos = line->pos;
@@ -655,25 +1025,8 @@ static void vcfroh(args_t *args, bcf1_t *line)
     args->prev_pos = line->pos;
 
 
-    // Ready for the new site
-    int m = args->msites;
-    hts_expand(uint32_t,args->nsites+1,args->msites,args->sites);
-    if ( args->msites!=m )
-        args->eprob = (double*) realloc(args->eprob,sizeof(double)*args->msites*2);
-
-    // Set likelihoods and alternate allele frequencies
-    double alt_freq, pdg[3];
-    if ( parse_line(args, line, &alt_freq, pdg)<0 ) return; // something went wrong
-
-    args->nused++;
-
-    // Calculate emission probabilities P(D|AZ) and P(D|HW)
-    double *eprob = &args->eprob[2*args->nsites];
-    eprob[STATE_AZ] = pdg[0]*(1-alt_freq) + pdg[2]*alt_freq;
-    eprob[STATE_HW] = pdg[0]*(1-alt_freq)*(1-alt_freq) + 2*pdg[1]*(1-alt_freq)*alt_freq + pdg[2]*alt_freq*alt_freq;
-
-    args->sites[args->nsites] = line->pos;
-    args->nsites++;
+    // parse the new line
+    process_line(args, line, ial);
 }
 
 static void usage(args_t *args)
@@ -686,21 +1039,32 @@ static void usage(args_t *args)
     fprintf(stderr, "        --AF-dflt <float>              if AF is not known, use this allele frequency [skip]\n");
     fprintf(stderr, "        --AF-tag <TAG>                 use TAG for allele frequency\n");
     fprintf(stderr, "        --AF-file <file>               read allele frequencies from file (CHR\\tPOS\\tREF,ALT\\tAF)\n");
-    fprintf(stderr, "    -e, --estimate-AF <file>           calculate AC,AN counts on the fly, using either all samples (\"-\") or samples listed in <file>\n");
-    fprintf(stderr, "    -G, --GTs-only <float>             use GTs, ignore PLs, use <float> for PL of unseen genotypes. Safe value to use is 30 to account for GT errors.\n");
+    fprintf(stderr, "    -b  --buffer-size <int[,int]>      buffer size and the number of overlapping sites, 0 for unlimited [0]\n");
+    fprintf(stderr, "                                           If the first number is negative, it is interpreted as the maximum memory to\n");
+    fprintf(stderr, "                                           use, in MB. The default overlap is set to roughly 1%% of the buffer size.\n");
+    fprintf(stderr, "    -e, --estimate-AF [TAG],<file>     estimate AF from FORMAT/TAG (GT or PL) of all samples (\"-\") or samples listed\n");
+    fprintf(stderr, "                                            in <file>. If TAG is not given, the frequency is estimated from GT by default\n");
+    fprintf(stderr, "    -G, --GTs-only <float>             use GTs and ignore PLs, instead using <float> for PL of the two least likely genotypes.\n");
+    fprintf(stderr, "                                           Safe value to use is 30 to account for GT errors.\n");
+    fprintf(stderr, "    -i, --ignore-homref                skip hom-ref genotypes (0/0)\n");
     fprintf(stderr, "    -I, --skip-indels                  skip indels as their genotypes are enriched for errors\n");
-    fprintf(stderr, "    -m, --genetic-map <file>           genetic map in IMPUTE2 format, single file or mask, where string \"{CHROM}\" is replaced with chromosome name\n");
+    fprintf(stderr, "    -m, --genetic-map <file>           genetic map in IMPUTE2 format, single file or mask, where string \"{CHROM}\"\n");
+    fprintf(stderr, "                                           is replaced with chromosome name\n");
     fprintf(stderr, "    -M, --rec-rate <float>             constant recombination rate per bp\n");
+    fprintf(stderr, "    -o, --output <file>                write output to a file [standard output]\n");
+    fprintf(stderr, "    -O, --output-type [srz]            output s:per-site, r:regions, z:compressed [sr]\n");
     fprintf(stderr, "    -r, --regions <region>             restrict to comma-separated list of regions\n");
     fprintf(stderr, "    -R, --regions-file <file>          restrict to regions listed in a file\n");
-    fprintf(stderr, "    -s, --sample <sample>              sample to analyze\n");
+    fprintf(stderr, "    -s, --samples <list>               list of samples to analyze [all samples]\n");
+    fprintf(stderr, "    -S, --samples-file <file>          file of samples to analyze [all samples]\n");
     fprintf(stderr, "    -t, --targets <region>             similar to -r but streams rather than index-jumps\n");
     fprintf(stderr, "    -T, --targets-file <file>          similar to -R but streams rather than index-jumps\n");
+    fprintf(stderr, "        --threads <int>                number of extra decompression threads [0]\n");
     fprintf(stderr, "\n");
     fprintf(stderr, "HMM Options:\n");
     fprintf(stderr, "    -a, --hw-to-az <float>             P(AZ|HW) transition probability from HW (Hardy-Weinberg) to AZ (autozygous) state [6.7e-8]\n");
     fprintf(stderr, "    -H, --az-to-hw <float>             P(HW|AZ) transition probability from AZ to HW state [5e-9]\n");
-    fprintf(stderr, "    -V, --viterbi-training             perform Viterbi training to estimate transition probabilities\n");
+    fprintf(stderr, "    -V, --viterbi-training <float>     estimate HMM parameters, <float> is the convergence threshold, e.g. 1e-10 (experimental)\n");
     fprintf(stderr, "\n");
     exit(1);
 }
@@ -721,12 +1085,17 @@ int main_vcfroh(int argc, char *argv[])
         {"AF-tag",1,0,0},
         {"AF-file",1,0,1},
         {"AF-dflt",1,0,2},
+        {"buffer-size",1,0,'b'},
+        {"ignore-homref",0,0,'i'},
         {"estimate-AF",1,0,'e'},
+        {"output",1,0,'o'},
+        {"output-type",1,0,'O'},
         {"GTs-only",1,0,'G'},
-        {"sample",1,0,'s'},
+        {"samples",1,0,'s'},
+        {"samples-file",1,0,'S'},
         {"hw-to-az",1,0,'a'},
         {"az-to-hw",1,0,'H'},
-        {"viterbi-training",0,0,'V'},
+        {"viterbi-training",1,0,'V'},
         {"targets",1,0,'t'},
         {"targets-file",1,0,'T'},
         {"regions",1,0,'r'},
@@ -734,12 +1103,13 @@ int main_vcfroh(int argc, char *argv[])
         {"genetic-map",1,0,'m'},
         {"rec-rate",1,0,'M'},
         {"skip-indels",0,0,'I'},
+        {"threads",1,0,9},
         {0,0,0,0}
     };
 
     int naf_opts = 0;
     char *tmp;
-    while ((c = getopt_long(argc, argv, "h?r:R:t:T:H:a:s:m:M:G:Ia:e:V",loptions,NULL)) >= 0) {
+    while ((c = getopt_long(argc, argv, "h?r:R:t:T:H:a:s:S:m:M:G:Ia:e:V:b:O:o:i",loptions,NULL)) >= 0) {
         switch (c) {
             case 0: args->af_tag = optarg; naf_opts++; break;
             case 1: args->af_fname = optarg; naf_opts++; break;
@@ -747,7 +1117,15 @@ int main_vcfroh(int argc, char *argv[])
                 args->dflt_AF = strtod(optarg,&tmp);
                 if ( *tmp ) error("Could not parse: --AF-dflt %s\n", optarg);
                 break;
+            case 'o': args->output_fname = optarg; break;
+            case 'O': 
+                if ( strchr(optarg,'s') || strchr(optarg,'S') ) args->output_type |= OUTPUT_ST;
+                if ( strchr(optarg,'r') || strchr(optarg,'R') ) args->output_type |= OUTPUT_RG;
+                if ( strchr(optarg,'z') || strchr(optarg,'z') ) args->output_type |= OUTPUT_GZ;
+                break;
             case 'e': args->estimate_AF = optarg; naf_opts++; break;
+            case 'b': args->buffer_size = optarg; break;
+            case 'i': args->skip_homref = 1; break;
             case 'I': args->snps_only = 1; break;
             case 'G':
                 args->fake_PLs = 1; 
@@ -760,7 +1138,8 @@ int main_vcfroh(int argc, char *argv[])
                 args->rec_rate = strtod(optarg,&tmp);
                 if ( *tmp ) error("Could not parse: -M %s\n", optarg);
                 break;
-            case 's': args->sample = strdup(optarg); break;
+            case 's': args->samples = strdup(optarg); break;
+            case 'S': args->samples = strdup(optarg); args->samples_is_file = 1; break;
             case 'a':
                 args->t2AZ = strtod(optarg,&tmp);
                 if ( *tmp ) error("Could not parse: -a %s\n", optarg);
@@ -773,14 +1152,28 @@ int main_vcfroh(int argc, char *argv[])
             case 'T': args->targets_list = optarg; targets_is_file = 1; break;
             case 'r': args->regions_list = optarg; break;
             case 'R': args->regions_list = optarg; regions_is_file = 1; break;
-            case 'V': args->vi_training = 1; break;
+            case  9 : args->n_threads = strtol(optarg, 0, 0); break;
+            case 'V': 
+                args->vi_training = 1; 
+                args->baum_welch_th = strtod(optarg,&tmp); 
+                if ( *tmp ) error("Could not parse: --viterbi-training %s\n", optarg);
+                break;
             case 'h': 
             case '?': usage(args); break;
             default: error("Unknown argument: %s\n", optarg);
         }
     }
+    if ( !args->output_fname ) args->output_fname = "stdout";
+    if ( !args->output_type ) args->output_type = OUTPUT_ST|OUTPUT_RG;
+    char *fname = NULL;
+    if ( optind==argc )
+    {
+        if ( !isatty(fileno((FILE *)stdin)) ) fname = "-";  // reading from stdin
+        else usage(args);
+    }
+    else fname = argv[optind];
 
-    if ( argc<optind+1 ) usage(args);
+    if ( args->vi_training && args->buffer_size ) error("Error: cannot use -b with -V\n");
     if ( args->t2AZ<0 || args->t2AZ>1 ) error("Error: The parameter --hw-to-az is not in [0,1]\n", args->t2AZ);
     if ( args->t2HW<0 || args->t2HW>1 ) error("Error: The parameter --az-to-hw is not in [0,1]\n", args->t2HW);
     if ( naf_opts>1 ) error("Error: The options --AF-tag, --AF-file and -e are mutually exclusive\n");
@@ -800,7 +1193,9 @@ int main_vcfroh(int argc, char *argv[])
         if ( bcf_sr_set_targets(args->files, args->af_fname, 1, 3)<0 )
             error("Failed to read the targets: %s\n", args->af_fname);
     }
-    if ( !bcf_sr_add_reader(args->files, argv[optind]) ) error("Failed to open %s: %s\n", argv[optind],bcf_sr_strerror(args->files->errnum));
+    if ( args->n_threads && bcf_sr_set_threads(args->files, args->n_threads)<0)
+        error("Failed to create threads\n");
+    if ( !bcf_sr_add_reader(args->files, fname) ) error("Failed to open %s: %s\n", fname,bcf_sr_strerror(args->files->errnum));
 
     init_data(args);
     while ( bcf_sr_next_line(args->files) )
@@ -808,7 +1203,15 @@ int main_vcfroh(int argc, char *argv[])
         vcfroh(args, args->files->readers[0].buffer[0]);
     }
     vcfroh(args, NULL);
-    fprintf(stderr,"Number of lines: total/processed: %d/%d\n", args->ntot,args->nused);
+    int i, nmin = 0;
+    for (i=0; i<args->roh_smpl->n; i++)
+        if ( !i || args->smpl[i].nused < nmin ) nmin = args->smpl[i].nused;
+    fprintf(stderr,"Number of lines total/processed: %d/%d\n", args->ntot,nmin);
+    if ( nmin==0 )
+    {
+        fprintf(stderr,"No usable sites were found.");
+        if ( !naf_opts && !args->dflt_AF ) fprintf(stderr, " Consider using one of the AF options.\n");
+    }
     destroy_data(args);
     free(args);
     return 0;
diff --git a/bcftools/vcfroh.c.pysam.c b/bcftools/vcfroh.c.pysam.c
index 66ddc17..70ed798 100644
--- a/bcftools/vcfroh.c.pysam.c
+++ b/bcftools/vcfroh.c.pysam.c
@@ -32,12 +32,19 @@ THE SOFTWARE.  */
 #include <htslib/synced_bcf_reader.h>
 #include <htslib/kstring.h>
 #include <htslib/kseq.h>
+#include <htslib/bgzf.h>
+#include <errno.h>
 #include "bcftools.h"
 #include "HMM.h"
+#include "smpl_ilist.h"
 
 #define STATE_HW 0        // normal state, follows Hardy-Weinberg allele frequencies
 #define STATE_AZ 1        // autozygous state
 
+#define OUTPUT_ST (1<<1)
+#define OUTPUT_RG (1<<2)
+#define OUTPUT_GZ (1<<3)
+
 /** Genetic map */
 typedef struct
 {
@@ -46,6 +53,24 @@ typedef struct
 }
 genmap_t;
 
+/** HMM data for each sample */
+typedef struct
+{
+    double *eprob;      // emission probs [2*nsites,msites]
+    uint32_t *sites;    // positions [nsites,msites]
+    int nsites, msites;
+    int igenmap;        // current position in genmap
+    int nused;          // some stats to detect if things didn't go wrong
+    int nrid, *rid, *rid_off;   // for viterbi training, keep all chromosomes
+    void *snapshot;             // hmm snapshot
+    struct {
+        uint32_t beg,end,nqual;
+        double qual;
+        int rid, state;
+    } rg;
+}
+smpl_t;
+
 typedef struct _args_t
 {
     bcf_srs_t *files;
@@ -59,29 +84,32 @@ typedef struct _args_t
     double rec_rate;        // constant recombination rate if > 0
 
     hmm_t *hmm;
-    double *eprob;          // emission probs [2*nsites,msites]
-    uint32_t *sites;        // positions [nsites,msites]
-    int nsites, msites;
+    double baum_welch_th;
     int nrids, *rids, *rid_offs;    // multiple chroms with vi_training
+    int nbuf_max, nbuf_olap;
 
-    int32_t *itmp;
-    int nitmp, mitmp;
     float *AFs;
-    int mAFs;
+    int32_t *itmp;
+    int mAFs, nitmp, mitmp, pl_hdr_id, gt_hdr_id;
 
     double pl2p[256], *pdg;
     int32_t skip_rid, prev_rid, prev_pos;
 
-    int ntot, nused;            // some stats to detect if things didn't go awfully wrong
-    int ismpl, nsmpl;           // index of query sample
-    char *estimate_AF, *sample; // list of samples for AF estimate and query sample
-    char **argv, *targets_list, *regions_list, *af_fname, *af_tag;
-    int argc, fake_PLs, snps_only, vi_training;
+    int ntot;                   // some stats to detect if things didn't go wrong
+    smpl_t *smpl;               // HMM data for each sample
+    smpl_ilist_t *af_smpl;      // list of samples to estimate AF from (--estimate-AF)
+    smpl_ilist_t *roh_smpl;     // list of samples to analyze (--samples, --samples-file)
+    char *estimate_AF;          // list of samples for AF estimate and query sample
+    int af_from_PL;             // estimate AF from FMT/PL rather than FMT/GT
+    char **argv, *targets_list, *regions_list, *af_fname, *af_tag, *samples, *buffer_size, *output_fname;
+    int argc, fake_PLs, snps_only, vi_training, samples_is_file, output_type, skip_homref, n_threads;
+    BGZF *out;
+    kstring_t str;
 }
 args_t;
 
 void set_tprob_genmap(hmm_t *hmm, uint32_t prev_pos, uint32_t pos, void *data, double *tprob);
-void set_tprob_recrate(hmm_t *hmm, uint32_t prev_pos, uint32_t pos, void *data, double *tprob);
+void set_tprob_rrate(hmm_t *hmm, uint32_t prev_pos, uint32_t pos, void *data, double *tprob);
 
 void *smalloc(size_t size)
 {
@@ -92,57 +120,137 @@ void *smalloc(size_t size)
 
 static void init_data(args_t *args)
 {
+    int i;
+
     args->prev_rid = args->skip_rid = -1;
     args->hdr = args->files->readers[0].header;
 
-    if ( !args->sample )
-    {
-        if ( bcf_hdr_nsamples(args->hdr)>1 ) error("Missing the option -s, --sample\n");
-        args->sample = strdup(args->hdr->samples[0]);
-    }
     if ( !bcf_hdr_nsamples(args->hdr) ) error("No samples in the VCF?\n");
 
-    // Set samples
-    kstring_t str = {0,0,0};
-    if ( args->estimate_AF && strcmp("-",args->estimate_AF) )
+    if ( !args->fake_PLs )
     {
-        int i, n;
-        char **smpls = hts_readlist(args->estimate_AF, 1, &n);
+        args->pl_hdr_id = bcf_hdr_id2int(args->hdr, BCF_DT_ID, "PL");
+        if ( !bcf_hdr_idinfo_exists(args->hdr,BCF_HL_FMT,args->pl_hdr_id) )
+            error("Error: The FORMAT/PL tag not found in the header, consider running with -G\n");
+        if ( bcf_hdr_id2type(args->hdr,BCF_HL_FMT,args->pl_hdr_id)!=BCF_HT_INT ) 
+            error("Error: The FORMAT/PL tag not defined as Integer in the header\n");
+    }
 
-        // Make sure the query sample is included
-        for (i=0; i<n; i++)
-            if ( !strcmp(args->sample,smpls[i]) ) break;
+    if ( args->estimate_AF )
+    {
+        if ( !strncmp("GT,",args->estimate_AF,3) ) args->estimate_AF += 3;
+        else if ( !strncmp("PL,",args->estimate_AF,3) ) { args->estimate_AF += 3; args->af_from_PL = 1; }
+        if ( strcmp("-",args->estimate_AF) )
+            args->af_smpl = smpl_ilist_init(args->hdr, args->estimate_AF, 1, SMPL_NONE);
+    }
 
-        // Add the query sample if not present
-        if ( i!=n ) kputs(args->sample, &str);
+    if ( args->estimate_AF || args->fake_PLs )
+    {
+        if ( args->af_from_PL )
+        {
+            args->pl_hdr_id = bcf_hdr_id2int(args->hdr, BCF_DT_ID, "PL");
+            if ( !bcf_hdr_idinfo_exists(args->hdr,BCF_HL_FMT,args->pl_hdr_id) )
+                error("Error: The FORMAT/PL tag not found in the header\n");
+        }
+        else
+        {
+            args->gt_hdr_id = bcf_hdr_id2int(args->hdr, BCF_DT_ID, "GT");
+            if ( !bcf_hdr_idinfo_exists(args->hdr,BCF_HL_FMT,args->gt_hdr_id) )
+                error("Error: The FORMAT/GT tag not found in the header\n");
+        }
+    }
+    if ( args->fake_PLs )
+    {
+        args->gt_hdr_id = bcf_hdr_id2int(args->hdr, BCF_DT_ID, "GT");
+        if ( !bcf_hdr_idinfo_exists(args->hdr,BCF_HL_FMT,args->gt_hdr_id) )
+            error("Error: The FORMAT/GT tag not found in the header\n");
+    }
 
-        for (i=0; i<n; i++)
+    args->roh_smpl = smpl_ilist_init(args->hdr, args->samples, args->samples_is_file, SMPL_NONE);
+    if ( args->samples )
+    {
+        // we may be able to subset to a few samples, for a text VCF this can be a major speedup
+        if ( (bcf_sr_get_reader(args->files,0))->file->format.format==vcf )
         {
-            if ( str.l ) kputc(',', &str);
-            kputs(smpls[i], &str);
-            free(smpls[i]);
+            kstring_t str = {0,0,0};
+            smpl_ilist_t *tmp = args->roh_smpl, *rmme = NULL;
+            if ( args->af_smpl )
+            {
+                for (i=0; i<args->roh_smpl->n; i++)
+                {
+                    if ( str.l ) kputc(',', &str);
+                    kputs(args->hdr->samples[args->roh_smpl->idx[i]], &str);
+                }
+                for (i=0; i<args->af_smpl->n; i++)
+                {
+                    kputc(',', &str);
+                    kputs(args->hdr->samples[args->af_smpl->idx[i]], &str);
+                }
+                rmme = tmp = smpl_ilist_init(args->hdr, str.s, 0, SMPL_NONE);
+            }
+            if ( tmp->n < bcf_hdr_nsamples(args->hdr) )
+            {
+                str.l = 0;
+                for (i=0; i<tmp->n; i++)
+                {
+                    if ( str.l ) kputc(',', &str);
+                    kputs(args->hdr->samples[tmp->idx[i]], &str);
+                }
+                int ret = bcf_hdr_set_samples(args->hdr, str.s, 0);
+                if ( ret<0 ) error("Error parsing the list of samples: %s\n", str.s);
+                else if ( ret>0 ) error("The %d-th sample not found in the VCF: %s\n", ret,str.s);
+
+                // update sample ids
+                smpl_ilist_destroy(args->roh_smpl);
+                args->roh_smpl = smpl_ilist_init(args->hdr, args->samples, args->samples_is_file, SMPL_NONE);
+
+                if ( args->af_smpl )
+                {
+                    smpl_ilist_destroy(args->af_smpl);
+                    args->af_smpl = smpl_ilist_init(args->hdr, args->estimate_AF, 1, SMPL_NONE);
+                }
+            }
+            free(str.s);
+            if ( rmme )
+                smpl_ilist_destroy(rmme);
         }
-        free(smpls);
     }
-    else if ( !args->estimate_AF )
-        kputs(args->sample, &str);
 
-    if ( str.l )
+    // check whether all samples are in this list. If so, the lookup will not be needed
+    if ( args->af_smpl && args->af_smpl->n == bcf_hdr_nsamples(args->hdr) )
     {
-        int ret = bcf_hdr_set_samples(args->hdr, str.s, 0);
-        if ( ret<0 ) error("Error parsing the list of samples: %s\n", str.s);
-        else if ( ret>0 ) error("The %d-th sample not found in the VCF\n", ret);
+        // all samples are in this list
+        smpl_ilist_destroy(args->af_smpl);
+        args->af_smpl = NULL;
     }
 
-    if ( args->af_tag )
-        if ( !bcf_hdr_idinfo_exists(args->hdr,BCF_HL_INFO,bcf_hdr_id2int(args->hdr,BCF_DT_ID,args->af_tag)) )
-            error("No such INFO tag in the VCF: %s\n", args->af_tag);
+    if ( args->buffer_size )
+    {
+        args->nbuf_olap = -1;
+        char *end;
+        double tmp = strtod(args->buffer_size,&end);
+        if ( *end )
+        {
+            if ( *end!=',') error("Could not parse: --buffer-size %s\n", args->buffer_size);
+            args->nbuf_olap = strtol(end+1,&end,10);
+            if ( *end || args->nbuf_olap<0 ) error("Could not parse: --bufer-size %s\n", args->buffer_size);
+        }
+        if ( tmp<0 )
+            args->nbuf_max = fabs(tmp)*1e6/(4+8*2)/args->roh_smpl->n;
+        else
+            args->nbuf_max = tmp;
 
-    args->nsmpl = bcf_hdr_nsamples(args->hdr);
-    args->ismpl = bcf_hdr_id2int(args->hdr, BCF_DT_SAMPLE, args->sample);
-    free(str.s);
+        if ( args->nbuf_olap<0 )
+            args->nbuf_olap = args->nbuf_max*0.01;
+    }
+    fprintf(pysam_stderr,"Number of target samples: %d\n", args->roh_smpl->n);
+    fprintf(pysam_stderr,"Number of --estimate-AF samples: %d\n", args->af_smpl ? args->af_smpl->n : (args->estimate_AF ? bcf_hdr_nsamples(args->hdr) : 0));
+    fprintf(pysam_stderr,"Number of sites in the buffer/overlap: ");
+    if ( args->nbuf_max ) fprintf(pysam_stderr,"%d/%d\n", args->nbuf_max,args->nbuf_olap);
+    else fprintf(pysam_stderr,"unlimited\n");
+
+    args->smpl = (smpl_t*) calloc(args->roh_smpl->n,sizeof(smpl_t));
 
-    int i;
     for (i=0; i<256; i++) args->pl2p[i] = pow(10., -i/10.);
 
     // Init transition matrix and HMM
@@ -152,40 +260,88 @@ static void init_data(args_t *args)
     MAT(tprob,2,STATE_AZ,STATE_HW) = args->t2AZ;
     MAT(tprob,2,STATE_AZ,STATE_AZ) = 1 - args->t2HW; 
 
+    args->hmm = hmm_init(2, tprob, 10000);
     if ( args->genmap_fname ) 
-    {
-        args->hmm = hmm_init(2, tprob, 0);
         hmm_set_tprob_func(args->hmm, set_tprob_genmap, args);
-    }
     else if ( args->rec_rate > 0 )
-    {
-        args->hmm = hmm_init(2, tprob, 0);
-        hmm_set_tprob_func(args->hmm, set_tprob_recrate, args);
+        hmm_set_tprob_func(args->hmm, set_tprob_rrate, args);
 
-    }
-    else
-        args->hmm = hmm_init(2, tprob, 10000);
+    args->out = bgzf_open(strcmp("pysam_stdout",args->output_fname)?args->output_fname:"-", args->output_type&OUTPUT_GZ ? "wg" : "wu"); 
+    if ( !args->out ) error("Failed to open %s: %s\n", args->output_fname, strerror(errno));
 
     // print header
-    fprintf(pysam_stdout, "# This file was produced by: bcftools roh(%s+htslib-%s)\n", bcftools_version(),hts_version());
-    fprintf(pysam_stdout, "# The command line was:\tbcftools %s", args->argv[0]);
+    args->str.l = 0;
+    ksprintf(&args->str, "# This file was produced by: bcftools roh(%s+htslib-%s)\n", bcftools_version(),hts_version());
+    ksprintf(&args->str, "# The command line was:\tbcftools %s", args->argv[0]);
     for (i=1; i<args->argc; i++)
-        fprintf(pysam_stdout, " %s",args->argv[i]);
-    fprintf(pysam_stdout, "\n#\n");
-    fprintf(pysam_stdout, "# [1]Chromosome\t[2]Position\t[3]State (0:HW, 1:AZ)\t[4]Quality\n");
+        ksprintf(&args->str, " %s",args->argv[i]);
+    ksprintf(&args->str, "\n#\n");
+    if ( args->output_type & OUTPUT_RG )
+    {
+        i = 2;
+        ksprintf(&args->str, "# RG");
+        ksprintf(&args->str, "\t[%d]Sample", i++);
+        ksprintf(&args->str, "\t[%d]Chromosome", i++);
+        ksprintf(&args->str, "\t[%d]Start", i++);
+        ksprintf(&args->str, "\t[%d]End", i++);
+        ksprintf(&args->str, "\t[%d]Length (bp)", i++);
+        ksprintf(&args->str, "\t[%d]Number of markers", i++);
+        ksprintf(&args->str, "\t[%d]Quality (average fwd-bwd phred score)", i++);
+        ksprintf(&args->str, "\n");
+    }
+    if ( args->output_type & OUTPUT_ST )
+    {
+        i = 2;
+        ksprintf(&args->str, "# ST");
+        ksprintf(&args->str, "\t[%d]Sample", i++);
+        ksprintf(&args->str, "\t[%d]Chromosome", i++);
+        ksprintf(&args->str, "\t[%d]Position", i++);
+        ksprintf(&args->str, "\t[%d]State (0:HW, 1:AZ)", i++);
+        ksprintf(&args->str, "\t[%d]Quality (fwd-bwd phred score)", i++);
+        ksprintf(&args->str, "\n");
+    }
+    if ( args->vi_training)
+    {
+        i = 2;
+        ksprintf(&args->str, "# VT, Viterbi Training");
+        ksprintf(&args->str, "\t[%d]Sample", i++);
+        ksprintf(&args->str, "\t[%d]Iteration", i++);
+        ksprintf(&args->str, "\t[%d]dAZ", i++);
+        ksprintf(&args->str, "\t[%d]dHW", i++);
+        ksprintf(&args->str, "\t[%d]1 - P(HW|HW)", i++);
+        ksprintf(&args->str, "\t[%d]P(AZ|HW)", i++);
+        ksprintf(&args->str, "\t[%d]1 - P(AZ|AZ)", i++);
+        ksprintf(&args->str, "\t[%d]P(HW|AZ)", i++);
+        ksprintf(&args->str, "\n");
+    }
+    if ( bgzf_write(args->out, args->str.s, args->str.l) != args->str.l )
+        error("Error writing %s: %s\n", args->output_fname, strerror(errno));
 }
 
 static void destroy_data(args_t *args)
 {
-    free(args->sites);
-    free(args->eprob);
-    free(args->sample);
+    if ( bgzf_close(args->out)!=0 ) error("Error: close failed .. %s\n", args->output_fname);
+    int i;
+    for (i=0; i<args->roh_smpl->n; i++)
+    {
+        free(args->smpl[i].eprob);
+        free(args->smpl[i].sites);
+        free(args->smpl[i].rid);
+        free(args->smpl[i].rid_off);
+        free(args->smpl[i].snapshot);
+    }
+    free(args->str.s);
+    free(args->smpl);
+    if ( args->af_smpl ) smpl_ilist_destroy(args->af_smpl);
+    smpl_ilist_destroy(args->roh_smpl);
     free(args->rids);
     free(args->rid_offs);
     hmm_destroy(args->hmm);
     bcf_sr_destroy(args->files);
-    free(args->itmp); free(args->AFs); free(args->pdg);
+    free(args->AFs); free(args->pdg);
     free(args->genmap);
+    free(args->itmp);
+    free(args->samples);
 }
 
 static int load_genmap(args_t *args, bcf1_t *line)
@@ -222,21 +378,22 @@ static int load_genmap(args_t *args, bcf1_t *line)
         hts_expand(genmap_t,args->ngenmap,args->mgenmap,args->genmap);
         genmap_t *gm = &args->genmap[args->ngenmap-1];
 
+        // position, convert to 0-based
         char *tmp, *end;
         gm->pos = strtol(str.s, &tmp, 10);
         if ( str.s==tmp ) error("Could not parse %s: %s\n", fname, str.s);
+        gm->pos -= 1;
 
         // skip second column
         tmp++;
         while ( *tmp && !isspace(*tmp) ) tmp++;
 
-        // read the genetic map in cM
+        // read the genetic map in cM, scale from % to likelihood
         gm->rate = strtod(tmp+1, &end);
         if ( tmp+1==end ) error("Could not parse %s: %s\n", fname, str.s);
+        gm->rate *= 0.01;
     }
     if ( !args->ngenmap ) error("Genetic map empty?\n");
-    int i;
-    for (i=0; i<args->ngenmap; i++) args->genmap[i].rate /= args->genmap[args->ngenmap-1].rate; // scale to 1
     if ( hts_close(fp) ) error("Close failed\n");
     free(str.s);
     return 0;
@@ -257,7 +414,6 @@ static double get_genmap_rate(args_t *args, int start, int end)
     // position j to be equal or larger than end
     int j = i;
     while ( j+1<args->ngenmap && args->genmap[j].pos < end ) j++;
-
     if ( i==j )
     {
         args->igenmap = i;
@@ -274,17 +430,20 @@ static double get_genmap_rate(args_t *args, int start, int end)
 void set_tprob_genmap(hmm_t *hmm, uint32_t prev_pos, uint32_t pos, void *data, double *tprob)
 {
     args_t *args = (args_t*) data;
-    double ci = get_genmap_rate(args, pos - prev_pos, pos);
+    double ci = get_genmap_rate(args, prev_pos, pos);
+    if ( args->rec_rate ) ci *= args->rec_rate;
+    if ( ci > 1 ) ci = 1;
     MAT(tprob,2,STATE_HW,STATE_AZ) *= ci;
     MAT(tprob,2,STATE_AZ,STATE_HW) *= ci;
     MAT(tprob,2,STATE_AZ,STATE_AZ)  = 1 - MAT(tprob,2,STATE_HW,STATE_AZ);
     MAT(tprob,2,STATE_HW,STATE_HW)  = 1 - MAT(tprob,2,STATE_AZ,STATE_HW);
 }
 
-void set_tprob_recrate(hmm_t *hmm, uint32_t prev_pos, uint32_t pos, void *data, double *tprob)
+void set_tprob_rrate(hmm_t *hmm, uint32_t prev_pos, uint32_t pos, void *data, double *tprob)
 {
     args_t *args = (args_t*) data;
     double ci = (pos - prev_pos) * args->rec_rate;
+    if ( ci > 1 ) ci = 1;
     MAT(tprob,2,STATE_HW,STATE_AZ) *= ci;
     MAT(tprob,2,STATE_AZ,STATE_HW) *= ci;
     MAT(tprob,2,STATE_AZ,STATE_AZ)  = 1 - MAT(tprob,2,STATE_HW,STATE_AZ);
@@ -317,132 +476,163 @@ void set_tprob_recrate(hmm_t *hmm, uint32_t prev_pos, uint32_t pos, void *data,
  *
  */
 
-static void flush_viterbi(args_t *args)
+static void flush_viterbi(args_t *args, int ismpl)
 {
-    int i,j;
+    smpl_t *smpl = &args->smpl[ismpl];
+    if ( !smpl->nsites ) return;
 
-    if ( !args->nsites ) return; 
+    const char *name = args->hdr->samples[ args->roh_smpl->idx[ismpl] ];
 
-    if ( !args->vi_training )
+    int i,j,k;
+
+    if ( !args->vi_training ) // single viterbi pass
     {
-        // single viterbi pass, one chromsome
-        hmm_run_viterbi(args->hmm, args->nsites, args->eprob, args->sites);
-        hmm_run_fwd_bwd(args->hmm, args->nsites, args->eprob, args->sites);
+        hmm_restore(args->hmm, smpl->snapshot); 
+        int end = (args->nbuf_max && smpl->nsites >= args->nbuf_max && smpl->nsites > args->nbuf_olap) ? smpl->nsites - args->nbuf_olap : smpl->nsites;
+        if ( end < smpl->nsites )
+            smpl->snapshot = hmm_snapshot(args->hmm, smpl->snapshot, smpl->nsites - args->nbuf_olap - 1);
+
+        args->igenmap = smpl->igenmap;
+        hmm_run_viterbi(args->hmm, smpl->nsites, smpl->eprob, smpl->sites);
+        hmm_run_fwd_bwd(args->hmm, smpl->nsites, smpl->eprob, smpl->sites);
         double *fwd = hmm_get_fwd_bwd_prob(args->hmm);
 
-        const char *chr = bcf_hdr_id2name(args->hdr,args->prev_rid);
-        uint8_t *vpath = hmm_get_viterbi_path(args->hmm);
-        for (i=0; i<args->nsites; i++)
+        const char *chr  = bcf_hdr_id2name(args->hdr,args->prev_rid);
+        uint8_t *vpath   = hmm_get_viterbi_path(args->hmm);
+
+        for (i=0; i<end; i++)
         {
             int state = vpath[i*2]==STATE_AZ ? 1 : 0;
-            double *pval = fwd + i*2;
-            fprintf(pysam_stdout, "%s\t%d\t%d\t%.1f\n", chr,args->sites[i]+1, state, phred_score(1.0-pval[state]));
-        }
-        return;
-    }
+            double qual = phred_score(1.0 - fwd[i*2 + state]);
+            if ( args->output_type & OUTPUT_ST )
+            {
+                args->str.l = 0;
+                ksprintf(&args->str, "ST\t%s\t%s\t%d\t%d\t%.1f\n", name,chr,smpl->sites[i]+1, state, qual);
+                if ( bgzf_write(args->out, args->str.s, args->str.l) != args->str.l ) error("Error writing %s: %s\n", args->output_fname, strerror(errno));
+            }
 
-    // viterbi training, multiple chromosomes
-    double t2az_prev, t2hw_prev;
-    double deltaz, delthw;
-    int niter = 0;
-    do
-    {
-        double *tprob_arr = hmm_get_tprob(args->hmm);
-        t2az_prev = MAT(tprob_arr,2,1,0); //args->t2AZ;
-        t2hw_prev = MAT(tprob_arr,2,0,1); //args->t2HW;
-        double tcounts[] = { 0,0,0,0 };
-        for (i=0; i<args->nrids; i++)
-        {
-            // run viterbi for each chromosomes. eprob and sites contain
-            // multiple chromosomes, rid_offs mark the boundaries
-            int ioff = args->rid_offs[i];
-            int nsites = (i+1==args->nrids ? args->nsites : args->rid_offs[i+1]) - ioff;
-            hmm_run_viterbi(args->hmm, nsites, args->eprob+ioff*2, args->sites+ioff);
-
-            // what transitions were observed: add to the total counts
-            uint8_t *vpath = hmm_get_viterbi_path(args->hmm);
-            for (j=1; j<nsites; j++)
+            if ( args->output_type & OUTPUT_RG )
             {
-                // count the number of transitions
-                int prev_state = vpath[2*(j-1)];
-                int curr_state = vpath[2*j];
-                MAT(tcounts,2,curr_state,prev_state) += 1;
+                if ( state!=smpl->rg.state ) 
+                {
+                    if ( !state )   // the region ends, flush
+                    {
+                        args->str.l = 0;
+                        ksprintf(&args->str, "RG\t%s\t%s\t%d\t%d\t%d\t%d\t%.1f\n",name,bcf_hdr_id2name(args->hdr,smpl->rg.rid),
+                                smpl->rg.beg+1,smpl->rg.end+1,smpl->rg.end-smpl->rg.beg+1,smpl->rg.nqual,smpl->rg.qual/smpl->rg.nqual);
+                        if ( bgzf_write(args->out, args->str.s, args->str.l) != args->str.l ) error("Error writing %s: %s\n", args->output_fname, strerror(errno));
+                        smpl->rg.state = 0;
+                    }
+                    else
+                    {
+                        smpl->rg.state = 1;
+                        smpl->rg.beg = smpl->sites[i];
+                        smpl->rg.rid = args->prev_rid;
+                    }
+                }
+                else if ( state )
+                {
+                    smpl->rg.nqual++;
+                    smpl->rg.qual += qual;
+                    smpl->rg.end  = smpl->sites[i];
+                }
             }
         }
 
-        // update the transition matrix
-        int n = 1;
-        for (i=0; i<2; i++)
+        if ( end < smpl->nsites )
         {
-            for (j=0; j<2; j++) n += MAT(tcounts,2,i,j);
+            end = smpl->nsites - args->nbuf_olap;
+            memmove(smpl->sites, smpl->sites + end, sizeof(*smpl->sites)*args->nbuf_olap);
+            memmove(smpl->eprob, smpl->eprob + end*2, sizeof(*smpl->eprob)*args->nbuf_olap*2);
+            smpl->nsites  = args->nbuf_olap;
+            smpl->igenmap = args->igenmap;
         }
-        for (i=0; i<2; i++)
+        else
         {
-            for (j=0; j<2; j++)
+            smpl->nsites  = 0;
+            smpl->igenmap = 0;
+
+            if ( smpl->rg.state )
             {
-                // no transition to i-th state was observed, set to a small number
-                if ( !MAT(tcounts,2,i,j) ) MAT(tcounts,2,i,j) = 0.1/n;
-                else MAT(tcounts,2,i,j) /= n;
+                args->str.l = 0;
+                ksprintf(&args->str, "RG\t%s\t%s\t%d\t%d\t%d\t%d\t%.1f\n",name,bcf_hdr_id2name(args->hdr,smpl->rg.rid),
+                        smpl->rg.beg+1,smpl->rg.end+1,smpl->rg.end-smpl->rg.beg+1,smpl->rg.nqual,smpl->rg.qual/smpl->rg.nqual);
+                if ( bgzf_write(args->out, args->str.s, args->str.l) != args->str.l ) error("Error writing %s: %s\n", args->output_fname, strerror(errno));
+                smpl->rg.state = 0;
             }
         }
 
-        // normalize
-        for (i=0; i<2; i++)
+        return;
+    }
+
+
+    // viterbi training, multiple chromosomes
+    double t2az_prev, t2hw_prev;
+    double deltaz, delthw;
+
+    double *tprob_arr = hmm_get_tprob(args->hmm);
+    MAT(tprob_arr,2,STATE_HW,STATE_HW) = 1 - args->t2AZ;
+    MAT(tprob_arr,2,STATE_HW,STATE_AZ) = args->t2HW;
+    MAT(tprob_arr,2,STATE_AZ,STATE_HW) = args->t2AZ;
+    MAT(tprob_arr,2,STATE_AZ,STATE_AZ) = 1 - args->t2HW; 
+    hmm_set_tprob(args->hmm, tprob_arr, 10000);
+
+    int niter = 0;
+    do
+    {
+        tprob_arr = hmm_get_tprob(args->hmm);
+        t2az_prev = MAT(tprob_arr,2,STATE_AZ,STATE_HW); //args->t2AZ;
+        t2hw_prev = MAT(tprob_arr,2,STATE_HW,STATE_AZ); //args->t2HW;
+        double tprob_new[] = { 0,0,0,0 };
+        for (i=0; i<smpl->nrid; i++)
         {
-            double norm = 0;
-            for (j=0; j<2; j++) norm += MAT(tcounts,2,j,i);
-            assert( norm!=0 );
-            for (j=0; j<2; j++) MAT(tcounts,2,j,i) /= norm;
+            int ioff = smpl->rid_off[i];
+            int nsites = (i+1==smpl->nrid ? smpl->nsites : smpl->rid_off[i+1]) - ioff;
+            args->igenmap = 0;
+            tprob_arr = hmm_run_baum_welch(args->hmm, nsites, smpl->eprob+ioff*2, smpl->sites+ioff);
+            for (j=0; j<2; j++)
+                for (k=0; k<2; k++) MAT(tprob_new,2,j,k) += MAT(tprob_arr,2,j,k);
         }
+        for (j=0; j<2; j++)
+            for (k=0; k<2; k++) MAT(tprob_new,2,j,k) /= smpl->nrid;
 
-        if ( args->genmap_fname || args->rec_rate > 0 )
-            hmm_set_tprob(args->hmm, tcounts, 0);
-        else
-            hmm_set_tprob(args->hmm, tcounts, 10000);
+        hmm_set_tprob(args->hmm, tprob_new, 10000);
 
-        tprob_arr = hmm_get_tprob(args->hmm);
-        deltaz = fabs(MAT(tprob_arr,2,1,0)-t2az_prev);
-        delthw = fabs(MAT(tprob_arr,2,0,1)-t2hw_prev);
+        deltaz = fabs(MAT(tprob_new,2,STATE_AZ,STATE_HW)-t2az_prev);
+        delthw = fabs(MAT(tprob_new,2,STATE_HW,STATE_AZ)-t2hw_prev);
         niter++;
-        fprintf(pysam_stderr,"Viterbi training, iteration %d: dAZ=%e dHW=%e\tP(HW|HW)=%e  P(AZ|HW)=%e  P(AZ|AZ)=%e  P(HW|AZ)=%e\n", 
-            niter,deltaz,delthw,
-            MAT(tprob_arr,2,STATE_HW,STATE_HW),MAT(tprob_arr,2,STATE_AZ,STATE_HW),
-            MAT(tprob_arr,2,STATE_AZ,STATE_AZ),MAT(tprob_arr,2,STATE_HW,STATE_AZ));
+        args->str.l = 0;
+        ksprintf(&args->str, "VT\t%s\t%d\t%e\t%e\t%e\t%e\t%e\t%e\n", 
+            name,niter,deltaz,delthw,
+            1-MAT(tprob_new,2,STATE_HW,STATE_HW),MAT(tprob_new,2,STATE_AZ,STATE_HW),
+            1-MAT(tprob_new,2,STATE_AZ,STATE_AZ),MAT(tprob_new,2,STATE_HW,STATE_AZ));
+        if ( bgzf_write(args->out, args->str.s, args->str.l) != args->str.l ) error("Error writing %s: %s\n", args->output_fname, strerror(errno));
     }
-    while ( deltaz > 0.0 || delthw > 0.0 );
-    double *tprob_arr = hmm_get_tprob(args->hmm);
-    fprintf(pysam_stderr, "Viterbi training converged in %d iterations to P(HW|HW)=%e  P(AZ|HW)=%e  P(AZ|AZ)=%e  P(HW|AZ)=%e\n", niter,
-            MAT(tprob_arr,2,STATE_HW,STATE_HW),MAT(tprob_arr,2,STATE_AZ,STATE_HW),
-            MAT(tprob_arr,2,STATE_AZ,STATE_AZ),MAT(tprob_arr,2,STATE_HW,STATE_AZ));
+    while ( deltaz > args->baum_welch_th || delthw > args->baum_welch_th );
     
     // output the results
-    for (i=0; i<args->nrids; i++)
+    for (i=0; i<smpl->nrid; i++)
     {
-        int ioff = args->rid_offs[i];
-        int nsites = (i+1==args->nrids ? args->nsites : args->rid_offs[i+1]) - ioff;
-        hmm_run_viterbi(args->hmm, nsites, args->eprob+ioff*2, args->sites+ioff);
-        hmm_run_fwd_bwd(args->hmm, nsites, args->eprob+ioff*2, args->sites+ioff);
+        int ioff = smpl->rid_off[i];
+        int nsites = (i+1==smpl->nrid ? smpl->nsites : smpl->rid_off[i+1]) - ioff;
+        args->igenmap = 0;
+        hmm_run_viterbi(args->hmm, nsites, smpl->eprob+ioff*2, smpl->sites+ioff);
+        hmm_run_fwd_bwd(args->hmm, nsites, smpl->eprob+ioff*2, smpl->sites+ioff);
         uint8_t *vpath = hmm_get_viterbi_path(args->hmm);
         double  *fwd   = hmm_get_fwd_bwd_prob(args->hmm);
 
-        const char *chr = bcf_hdr_id2name(args->hdr,args->rids[i]);
+        const char *chr = bcf_hdr_id2name(args->hdr,smpl->rid[i]);
         for (j=0; j<nsites; j++)
         {
-            int state = vpath[j*2];
-            double pval = fwd[j*2 + state];
-            fprintf(pysam_stdout, "%s\t%d\t%d\t%e\n", chr,args->sites[ioff+j]+1,state==STATE_AZ ? 1 : 0, pval);
+            int state = vpath[j*2]==STATE_AZ ? 1 : 0;
+            double *pval = fwd + j*2;
+            args->str.l = 0;
+            ksprintf(&args->str, "ROH\t%s\t%s\t%d\t%d\t%.1f\n", name,chr,smpl->sites[ioff+j]+1, state, phred_score(1.0-pval[state]));
+            if ( bgzf_write(args->out, args->str.s, args->str.l) != args->str.l ) error("Error writing %s: %s\n", args->output_fname, strerror(errno));
         }
     }
 }
 
-static void push_rid(args_t *args, int rid)
-{
-    args->nrids++;
-    args->rids = (int*) realloc(args->rids, args->nrids*sizeof(int));
-    args->rid_offs = (int*) realloc(args->rid_offs, args->nrids*sizeof(int));
-    args->rids[ args->nrids-1 ] = rid;
-    args->rid_offs[ args->nrids-1 ] = args->nsites;
-}
 
 int read_AF(bcf_sr_regions_t *tgt, bcf1_t *line, double *alt_freq)
 {
@@ -470,27 +660,52 @@ int read_AF(bcf_sr_regions_t *tgt, bcf1_t *line, double *alt_freq)
     return 0;
 }
 
-int estimate_AF(args_t *args, bcf1_t *line, double *alt_freq)
+int8_t *get_GT(args_t *args, bcf1_t *line)
 {
-    if ( !args->nitmp )
-    {
-        args->nitmp = bcf_get_genotypes(args->hdr, line, &args->itmp, &args->mitmp);
-        if ( args->nitmp != 2*args->nsmpl ) return -1;     // not diploid?
-        args->nitmp /= args->nsmpl;
-    }
+    int i;
+    for (i=0; i<line->n_fmt; i++)
+        if ( line->d.fmt[i].id==args->gt_hdr_id ) break;
+    if ( i==line->n_fmt ) return NULL;        // the tag is not present in this record
+
+    bcf_fmt_t *fmt = &line->d.fmt[i];
+    if ( fmt->n!=2 ) return NULL;             // not diploid
 
+    if ( fmt->type!=BCF_BT_INT8 ) error("This is unexpected, GT type is %d\n", fmt->type);
+    return (int8_t*) fmt->p;
+}
+
+int estimate_AF_from_GT(args_t *args, int8_t *gt, double *alt_freq)
+{
     int i, nalt = 0, nref = 0;
-    for (i=0; i<args->nsmpl; i++)
+    if ( args->af_smpl )        // subset samples for AF estimate
     {
-        int32_t *gt = &args->itmp[i*args->nitmp];
+        for (i=0; i<args->af_smpl->n; i++)
+        {
+            int ismpl = args->af_smpl->idx[i];
+            if ( bcf_gt_is_missing(gt[2*ismpl]) || bcf_gt_is_missing(gt[2*ismpl+1]) ) continue;
 
-        if ( bcf_gt_is_missing(gt[0]) || bcf_gt_is_missing(gt[1]) ) continue;
+            if ( bcf_gt_allele(gt[2*ismpl]) ) nalt++;
+            else nref++;
 
-        if ( bcf_gt_allele(gt[0]) ) nalt++;
-        else nref++;
+            if ( bcf_gt_allele(gt[2*ismpl+1]) ) nalt++;
+            else nref++;
+        }
+    }
+    else                        // all samples used in AF estimate
+    {
+        int8_t *end = gt + 2*bcf_hdr_nsamples(args->hdr);
+        while ( gt < end )
+        {
+            if ( bcf_gt_is_missing(gt[0]) || bcf_gt_is_missing(gt[1]) ) continue;
+
+            if ( bcf_gt_allele(gt[0]) ) nalt++;
+            else nref++;
+
+            if ( bcf_gt_allele(gt[1]) ) nalt++;
+            else nref++;
 
-        if ( bcf_gt_allele(gt[1]) ) nalt++;
-        else nref++;
+            gt += 2;
+        }
     }
     if ( !nalt && !nref ) return -1;
 
@@ -498,105 +713,249 @@ int estimate_AF(args_t *args, bcf1_t *line, double *alt_freq)
     return 0;
 }
 
+int estimate_AF_from_PL(args_t *args, bcf_fmt_t *fmt_pl, int ial, double *alt_freq)
+{
+    double af = 0;
+    int i, j, naf = 0;
+
+    int irr = bcf_alleles2gt(0,0), ira = bcf_alleles2gt(0,ial), iaa = bcf_alleles2gt(ial,ial);
+    if ( iaa >= fmt_pl->n ) return -1;  // not diploid or wrong number of fields
+    
+    if ( args->af_smpl )        // subset samples for AF estimate
+    {
+        #define BRANCH(type_t) \
+        { \
+            for (i=0; i<args->af_smpl->n; i++) \
+            { \
+                int ismpl = args->af_smpl->idx[i]; \
+                type_t *p = (type_t*)fmt_pl->p + fmt_pl->n*ismpl; \
+                if ( p[irr]<0 || p[ira]<0 || p[iaa]<0 ) continue;    /* missing value */ \
+                if ( p[irr]==p[ira] && p[irr]==p[iaa] ) continue;    /* all values are the same */ \
+                double prob[3], norm = 0; \
+                prob[0] = p[irr] < (type_t)256 ? args->pl2p[ p[irr] ] : args->pl2p[255]; \
+                prob[1] = p[ira] < (type_t)256 ? args->pl2p[ p[ira] ] : args->pl2p[255]; \
+                prob[2] = p[iaa] < (type_t)256 ? args->pl2p[ p[iaa] ] : args->pl2p[255]; \
+                for (j=0; j<3; j++) norm += prob[j]; \
+                for (j=0; j<3; j++) prob[j] /= norm; \
+                af += 0.5*prob[1] + prob[2]; \
+                naf++; \
+            } \
+        }
+        switch (fmt_pl->type) {
+            case BCF_BT_INT8:  BRANCH(int8_t); break;
+            case BCF_BT_INT16: BRANCH(int16_t); break;
+            case BCF_BT_INT32: BRANCH(int32_t); break;
+            default: fprintf(pysam_stderr,"Unknown format type for PL: %s:%d .. fmt->type=%d\n", __FILE__,__LINE__, fmt_pl->type); exit(1);
+        }
+        #undef BRANCH
+    }
+    else                        // all samples used in AF estimate
+    {
+        int nsmpl = bcf_hdr_nsamples(args->hdr);
+        #define BRANCH(type_t) \
+        { \
+            type_t *p = (type_t*)fmt_pl->p; \
+            p -= fmt_pl->n; \
+            for (i=0; i<nsmpl; i++) \
+            { \
+                p += fmt_pl->n; \
+                if ( p[irr]<0 || p[ira]<0 || p[iaa]<0 ) continue;    /* missing value */ \
+                if ( p[irr]==p[ira] && p[irr]==p[iaa] ) continue;    /* all values are the same */ \
+                double prob[3], norm = 0; \
+                prob[0] = p[irr] < (type_t)256 ? args->pl2p[ p[irr] ] : args->pl2p[255]; \
+                prob[1] = p[ira] < (type_t)256 ? args->pl2p[ p[ira] ] : args->pl2p[255]; \
+                prob[2] = p[iaa] < (type_t)256 ? args->pl2p[ p[iaa] ] : args->pl2p[255]; \
+                for (j=0; j<3; j++) norm += prob[j]; \
+                for (j=0; j<3; j++) prob[j] /= norm; \
+                af += 0.5*prob[1] + prob[2]; \
+                naf++; \
+            } \
+        }
+        switch (fmt_pl->type) {
+            case BCF_BT_INT8:  BRANCH(int8_t); break;
+            case BCF_BT_INT16: BRANCH(int16_t); break;
+            case BCF_BT_INT32: BRANCH(int32_t); break;
+            default: fprintf(pysam_stderr,"Unknown format type for PL: %s:%d .. fmt->type=%d\n", __FILE__,__LINE__, fmt_pl->type); exit(1);
+        }
+        #undef BRANCH
+    }
+    if ( !naf ) return -1;
+
+    *alt_freq = af / naf;
+    return 0;
+}
+
+bcf_fmt_t *get_PL(args_t *args, bcf1_t *line)
+{
+    int i;
+    for (i=0; i<line->n_fmt; i++)
+        if ( line->d.fmt[i].id==args->pl_hdr_id ) return &line->d.fmt[i];
+    return NULL;
+}
 
-int parse_line(args_t *args, bcf1_t *line, double *alt_freq, double *pdg)
+int process_line(args_t *args, bcf1_t *line, int ial)
 {
-    args->nitmp = 0;
+    if ( !(line->unpacked & BCF_UN_FMT) ) bcf_unpack(line, BCF_UN_FMT);
+
+    double alt_freq;
+    int8_t *GTs = NULL;
+    bcf_fmt_t *fmt_pl = NULL;
 
     // Set allele frequency
-    int ret;
+    int ret = 0, i,j;
     if ( args->af_tag )
     {
         // Use an INFO tag provided by the user
         ret = bcf_get_info_float(args->hdr, line, args->af_tag, &args->AFs, &args->mAFs);
-        if ( ret==1 )
-            *alt_freq = args->AFs[0];
+        if ( ret>0 )
+            alt_freq = args->AFs[ial-1];
         if ( ret==-2 )
             error("Type mismatch for INFO/%s tag at %s:%d\n", args->af_tag, bcf_seqname(args->hdr,line), line->pos+1);
     }
     else if ( args->af_fname ) 
     {
         // Read AF from a file
-        ret = read_AF(args->files->targets, line, alt_freq);
+        ret = read_AF(args->files->targets, line, &alt_freq);
+    }
+    else if ( args->dflt_AF > 0 )
+    {
+        alt_freq = args->dflt_AF;
+    }
+    else if ( args->estimate_AF )
+    {
+        // Estimate AF from GTs or PLs of all samples or samples listed in a file
+        if ( args->af_from_PL )
+        {
+            fmt_pl = get_PL(args, line);
+            if ( !fmt_pl ) return -1;
+            ret = estimate_AF_from_PL(args, fmt_pl, ial, &alt_freq);
+        }
+        else
+        {
+            GTs = get_GT(args, line);
+            if ( !GTs ) return -1;
+            ret = estimate_AF_from_GT(args, GTs, &alt_freq);
+        }
     }
     else
     {
-        // Use GTs or AC/AN: GTs when AC/AN not present or when GTs explicitly requested by --estimate-AF
-        ret = -1;
-        if ( !args->estimate_AF )
+        // Use AC/AN
+        int AC = -1, AN = 0;
+        ret = bcf_get_info_int32(args->hdr, line, "AN", &args->itmp, &args->mitmp);
+        if ( ret==1 )
         {
-            int AC = -1, AN = 0;
-            ret = bcf_get_info_int32(args->hdr, line, "AN", &args->itmp, &args->mitmp);
-            if ( ret==1 )
-            {
-                AN = args->itmp[0];
-                ret = bcf_get_info_int32(args->hdr, line, "AC", &args->itmp, &args->mitmp);
-                if ( ret>0 )
-                    AC = args->itmp[0];
-            }
-            if ( AN<=0 || AC<0 ) 
-                ret = -1;
-            else 
-                *alt_freq = (double) AC/AN;
+            AN = args->itmp[0];
+            ret = bcf_get_info_int32(args->hdr, line, "AC", &args->itmp, &args->mitmp);
+            if ( ret>0 )
+                AC = args->itmp[0];
         }
-        if ( ret==-1 )
-            ret = estimate_AF(args, line, alt_freq);    // reads GTs into args->itmp
+        if ( AN<=0 || AC<0 ) 
+            ret = -1;
+        else 
+            alt_freq = (double) AC/AN;
     }
 
     if ( ret<0 ) return ret;
-    if ( *alt_freq==0.0 )
-    {
-        if ( args->dflt_AF==0 ) return -1;       // we skip sites with AF=0
-        *alt_freq = args->dflt_AF;
-    }
+    if ( alt_freq==0.0 ) return -1;
 
-    // Set P(D|G)
+    int irr = bcf_alleles2gt(0,0), ira = bcf_alleles2gt(0,ial), iaa = bcf_alleles2gt(ial,ial);
     if ( args->fake_PLs )
     {
-        if ( !args->nitmp )
-        {
-            args->nitmp = bcf_get_genotypes(args->hdr, line, &args->itmp, &args->mitmp);
-            if ( args->nitmp != 2*args->nsmpl ) return -1;     // not diploid?
-            args->nitmp /= args->nsmpl;
-        }
+        if ( !GTs ) GTs = get_GT(args, line);
+    }
+    else
+    {
+        fmt_pl = get_PL(args, line);
+        if ( !fmt_pl ) return -1;
+        if ( iaa >= fmt_pl->n ) return -1;  // not diploid or wrong number of fields
+    }
 
-        int32_t *gt = &args->itmp[args->ismpl*args->nitmp];
-        if ( bcf_gt_is_missing(gt[0]) || bcf_gt_is_missing(gt[1]) ) return -1;
+    for (i=0; i<args->roh_smpl->n; i++)
+    {
+        int ismpl = args->roh_smpl->idx[i];
 
-        int a = bcf_gt_allele(gt[0]);
-        int b = bcf_gt_allele(gt[1]);
-        if ( a!=b )
-        {
-            pdg[0] = pdg[2] = args->unseen_PL;
-            pdg[1] = 1 - 2*args->unseen_PL;
-        }
-        else if ( a==0 )
+        // set P(D|G)
+        double pdg[3];
+        if ( args->fake_PLs )
         {
-            pdg[0] = 1 - 2*args->unseen_PL;
-            pdg[1] = pdg[2] = args->unseen_PL;
+            int8_t *gt = GTs + 2*ismpl;
+            if ( bcf_gt_is_missing(gt[0]) || bcf_gt_is_missing(gt[1]) ) continue;
+
+            int a = bcf_gt_allele(gt[0]);
+            int b = bcf_gt_allele(gt[1]);
+            if ( a!=b )
+            {
+                pdg[0] = pdg[2] = args->unseen_PL;
+                pdg[1] = 1 - 2*args->unseen_PL;
+            }
+            else if ( a==0 )
+            {
+                pdg[0] = 1 - args->unseen_PL - args->unseen_PL*args->unseen_PL;
+                pdg[1] = args->unseen_PL;
+                pdg[2] = args->unseen_PL*args->unseen_PL;
+            }
+            else
+            {
+                pdg[0] = args->unseen_PL*args->unseen_PL;
+                pdg[1] = args->unseen_PL;
+                pdg[2] = 1 - args->unseen_PL - args->unseen_PL*args->unseen_PL;
+            }
         }
         else
         {
-            pdg[0] = pdg[1] = args->unseen_PL;
-            pdg[2] = 1 - 2*args->unseen_PL;
+            #define BRANCH(type_t) \
+            { \
+                type_t *p = (type_t*)fmt_pl->p + fmt_pl->n*ismpl; \
+                if ( p[irr]<0 || p[ira]<0 || p[iaa]<0 ) continue;    /* missing value */ \
+                if ( p[irr]==p[ira] && p[irr]==p[iaa] ) continue;    /* all values are the same */ \
+                pdg[0] = p[irr] < (type_t)256 ? args->pl2p[ p[irr] ] : args->pl2p[255]; \
+                pdg[1] = p[ira] < (type_t)256 ? args->pl2p[ p[ira] ] : args->pl2p[255]; \
+                pdg[2] = p[iaa] < (type_t)256 ? args->pl2p[ p[iaa] ] : args->pl2p[255]; \
+            }
+            switch (fmt_pl->type) {
+                case BCF_BT_INT8:  BRANCH(int8_t); break;
+                case BCF_BT_INT16: BRANCH(int16_t); break;
+                case BCF_BT_INT32: BRANCH(int32_t); break;
+                default: fprintf(pysam_stderr,"Unknown format type for PL: %s:%d .. fmt->type=%d\n", __FILE__,__LINE__, fmt_pl->type); exit(1);
+            }
+            #undef BRANCH
         }
-    }
-    else
-    {
-        args->nitmp = bcf_get_format_int32(args->hdr, line, "PL", &args->itmp, &args->mitmp);
-        if ( args->nitmp != args->nsmpl*line->n_allele*(line->n_allele+1)/2. ) return -1;     // not diploid?
-        args->nitmp /= args->nsmpl;
-
-        int32_t *pl = &args->itmp[args->ismpl*args->nitmp];
-        pdg[0] = pl[0] < 256 ? args->pl2p[ pl[0] ] : 1.0;
-        pdg[1] = pl[1] < 256 ? args->pl2p[ pl[1] ] : 1.0;
-        pdg[2] = pl[2] < 256 ? args->pl2p[ pl[2] ] : 1.0;
 
         double sum = pdg[0] + pdg[1] + pdg[2];
-        if ( !sum ) return -1;
-        pdg[0] /= sum;
-        pdg[1] /= sum;
-        pdg[2] /= sum;
+        if ( !sum ) continue;
+        for (j=0; j<3; j++) pdg[j] /= sum;
+        if ( args->skip_homref && pdg[0]>0.99 ) continue;
+
+        smpl_t *smpl = &args->smpl[i];
+        smpl->nused++;
+
+        if ( smpl->nsites >= smpl->msites )
+        {
+            hts_expand(uint32_t,smpl->nsites+1,smpl->msites,smpl->sites);
+            smpl->eprob = (double*) realloc(smpl->eprob,sizeof(*smpl->eprob)*smpl->msites*2);
+            if ( !smpl->eprob ) error("Error: failed to alloc %d bytes\n", sizeof(*smpl->eprob)*smpl->msites*2);
+        }
+        
+        // Calculate emission probabilities P(D|AZ) and P(D|HW)
+        double *eprob = &smpl->eprob[2*smpl->nsites];
+        eprob[STATE_AZ] = pdg[0]*(1-alt_freq) + pdg[2]*alt_freq;
+        eprob[STATE_HW] = pdg[0]*(1-alt_freq)*(1-alt_freq) + 2*pdg[1]*(1-alt_freq)*alt_freq + pdg[2]*alt_freq*alt_freq;
+        
+        smpl->sites[smpl->nsites] = line->pos;
+        smpl->nsites++;
+
+        if ( args->vi_training )
+        {
+            if ( !smpl->nrid || line->rid!=smpl->rid[smpl->nrid-1] )
+            {
+                smpl->nrid++;
+                smpl->rid = (int*) realloc(smpl->rid,sizeof(*smpl->rid)*smpl->nrid);
+                smpl->rid[smpl->nrid-1] = line->rid;
+                smpl->rid_off = (int*) realloc(smpl->rid_off,sizeof(*smpl->rid_off)*smpl->nrid);
+                smpl->rid_off[smpl->nrid-1] = smpl->nsites - 1;
+            }
+        }
+        else if ( args->nbuf_max && smpl->nsites >= args->nbuf_max ) flush_viterbi(args, i);
     }
 
     return 0;
@@ -604,18 +963,35 @@ int parse_line(args_t *args, bcf1_t *line, double *alt_freq, double *pdg)
 
 static void vcfroh(args_t *args, bcf1_t *line)
 {
+    int i;
+
     // Are we done?
     if ( !line )
     { 
-        flush_viterbi(args);
+        for (i=0; i<args->roh_smpl->n; i++) flush_viterbi(args, i);
         return; 
     }
     args->ntot++;
 
-    // Skip unwanted lines
+    // Skip unwanted lines, for simplicity we consider only biallelic sites 
     if ( line->rid == args->skip_rid ) return;
     if ( line->n_allele==1 ) return;    // no ALT allele
-    if ( line->n_allele!=2 ) return;    // only biallelic sites
+    if ( line->n_allele > 3 ) return;   // cannot be bi-allelic, even with <*>
+
+    // This can be raw callable VCF with the symbolic unseen allele <*>
+    int ial = 0;
+    for (i=1; i<line->n_allele; i++)
+        if ( !strcmp("<*>",line->d.allele[i]) ) { ial = i; break; }
+    if ( ial==0 )    // normal VCF, the symbolic allele is not present
+    {
+        if ( line->n_allele!=2 ) return;    // not biallelic
+        ial = 1;
+    }
+    else
+    {
+        if ( line->n_allele!=3 ) return;    // not biallelic
+        ial = ial==1 ? 2 : 1;               // <*> can come in any order
+    }
     if ( args->snps_only && !bcf_is_snp(line) ) return;
 
     // Initialize genetic map
@@ -625,21 +1001,15 @@ static void vcfroh(args_t *args, bcf1_t *line)
         args->prev_rid = line->rid;
         args->prev_pos = line->pos;
         skip_rid = load_genmap(args, line);
-        if ( !skip_rid && args->vi_training ) push_rid(args, line->rid);
     }
 
     // New chromosome?
     if ( args->prev_rid!=line->rid )
     {
         skip_rid = load_genmap(args, line);
-        if ( args->vi_training )
-        {
-            if ( !skip_rid ) push_rid(args, line->rid);
-        }
-        else
+        if ( !args->vi_training )
         {
-            flush_viterbi(args);
-            args->nsites = 0;
+            for (i=0; i<args->roh_smpl->n; i++) flush_viterbi(args, i);
         }
         args->prev_rid = line->rid;
         args->prev_pos = line->pos;
@@ -657,25 +1027,8 @@ static void vcfroh(args_t *args, bcf1_t *line)
     args->prev_pos = line->pos;
 
 
-    // Ready for the new site
-    int m = args->msites;
-    hts_expand(uint32_t,args->nsites+1,args->msites,args->sites);
-    if ( args->msites!=m )
-        args->eprob = (double*) realloc(args->eprob,sizeof(double)*args->msites*2);
-
-    // Set likelihoods and alternate allele frequencies
-    double alt_freq, pdg[3];
-    if ( parse_line(args, line, &alt_freq, pdg)<0 ) return; // something went wrong
-
-    args->nused++;
-
-    // Calculate emission probabilities P(D|AZ) and P(D|HW)
-    double *eprob = &args->eprob[2*args->nsites];
-    eprob[STATE_AZ] = pdg[0]*(1-alt_freq) + pdg[2]*alt_freq;
-    eprob[STATE_HW] = pdg[0]*(1-alt_freq)*(1-alt_freq) + 2*pdg[1]*(1-alt_freq)*alt_freq + pdg[2]*alt_freq*alt_freq;
-
-    args->sites[args->nsites] = line->pos;
-    args->nsites++;
+    // parse the new line
+    process_line(args, line, ial);
 }
 
 static void usage(args_t *args)
@@ -688,21 +1041,32 @@ static void usage(args_t *args)
     fprintf(pysam_stderr, "        --AF-dflt <float>              if AF is not known, use this allele frequency [skip]\n");
     fprintf(pysam_stderr, "        --AF-tag <TAG>                 use TAG for allele frequency\n");
     fprintf(pysam_stderr, "        --AF-file <file>               read allele frequencies from file (CHR\\tPOS\\tREF,ALT\\tAF)\n");
-    fprintf(pysam_stderr, "    -e, --estimate-AF <file>           calculate AC,AN counts on the fly, using either all samples (\"-\") or samples listed in <file>\n");
-    fprintf(pysam_stderr, "    -G, --GTs-only <float>             use GTs, ignore PLs, use <float> for PL of unseen genotypes. Safe value to use is 30 to account for GT errors.\n");
+    fprintf(pysam_stderr, "    -b  --buffer-size <int[,int]>      buffer size and the number of overlapping sites, 0 for unlimited [0]\n");
+    fprintf(pysam_stderr, "                                           If the first number is negative, it is interpreted as the maximum memory to\n");
+    fprintf(pysam_stderr, "                                           use, in MB. The default overlap is set to roughly 1%% of the buffer size.\n");
+    fprintf(pysam_stderr, "    -e, --estimate-AF [TAG],<file>     estimate AF from FORMAT/TAG (GT or PL) of all samples (\"-\") or samples listed\n");
+    fprintf(pysam_stderr, "                                            in <file>. If TAG is not given, the frequency is estimated from GT by default\n");
+    fprintf(pysam_stderr, "    -G, --GTs-only <float>             use GTs and ignore PLs, instead using <float> for PL of the two least likely genotypes.\n");
+    fprintf(pysam_stderr, "                                           Safe value to use is 30 to account for GT errors.\n");
+    fprintf(pysam_stderr, "    -i, --ignore-homref                skip hom-ref genotypes (0/0)\n");
     fprintf(pysam_stderr, "    -I, --skip-indels                  skip indels as their genotypes are enriched for errors\n");
-    fprintf(pysam_stderr, "    -m, --genetic-map <file>           genetic map in IMPUTE2 format, single file or mask, where string \"{CHROM}\" is replaced with chromosome name\n");
+    fprintf(pysam_stderr, "    -m, --genetic-map <file>           genetic map in IMPUTE2 format, single file or mask, where string \"{CHROM}\"\n");
+    fprintf(pysam_stderr, "                                           is replaced with chromosome name\n");
     fprintf(pysam_stderr, "    -M, --rec-rate <float>             constant recombination rate per bp\n");
+    fprintf(pysam_stderr, "    -o, --output <file>                write output to a file [standard output]\n");
+    fprintf(pysam_stderr, "    -O, --output-type [srz]            output s:per-site, r:regions, z:compressed [sr]\n");
     fprintf(pysam_stderr, "    -r, --regions <region>             restrict to comma-separated list of regions\n");
     fprintf(pysam_stderr, "    -R, --regions-file <file>          restrict to regions listed in a file\n");
-    fprintf(pysam_stderr, "    -s, --sample <sample>              sample to analyze\n");
+    fprintf(pysam_stderr, "    -s, --samples <list>               list of samples to analyze [all samples]\n");
+    fprintf(pysam_stderr, "    -S, --samples-file <file>          file of samples to analyze [all samples]\n");
     fprintf(pysam_stderr, "    -t, --targets <region>             similar to -r but streams rather than index-jumps\n");
     fprintf(pysam_stderr, "    -T, --targets-file <file>          similar to -R but streams rather than index-jumps\n");
+    fprintf(pysam_stderr, "        --threads <int>                number of extra decompression threads [0]\n");
     fprintf(pysam_stderr, "\n");
     fprintf(pysam_stderr, "HMM Options:\n");
     fprintf(pysam_stderr, "    -a, --hw-to-az <float>             P(AZ|HW) transition probability from HW (Hardy-Weinberg) to AZ (autozygous) state [6.7e-8]\n");
     fprintf(pysam_stderr, "    -H, --az-to-hw <float>             P(HW|AZ) transition probability from AZ to HW state [5e-9]\n");
-    fprintf(pysam_stderr, "    -V, --viterbi-training             perform Viterbi training to estimate transition probabilities\n");
+    fprintf(pysam_stderr, "    -V, --viterbi-training <float>     estimate HMM parameters, <float> is the convergence threshold, e.g. 1e-10 (experimental)\n");
     fprintf(pysam_stderr, "\n");
     exit(1);
 }
@@ -723,12 +1087,17 @@ int main_vcfroh(int argc, char *argv[])
         {"AF-tag",1,0,0},
         {"AF-file",1,0,1},
         {"AF-dflt",1,0,2},
+        {"buffer-size",1,0,'b'},
+        {"ignore-homref",0,0,'i'},
         {"estimate-AF",1,0,'e'},
+        {"output",1,0,'o'},
+        {"output-type",1,0,'O'},
         {"GTs-only",1,0,'G'},
-        {"sample",1,0,'s'},
+        {"samples",1,0,'s'},
+        {"samples-file",1,0,'S'},
         {"hw-to-az",1,0,'a'},
         {"az-to-hw",1,0,'H'},
-        {"viterbi-training",0,0,'V'},
+        {"viterbi-training",1,0,'V'},
         {"targets",1,0,'t'},
         {"targets-file",1,0,'T'},
         {"regions",1,0,'r'},
@@ -736,12 +1105,13 @@ int main_vcfroh(int argc, char *argv[])
         {"genetic-map",1,0,'m'},
         {"rec-rate",1,0,'M'},
         {"skip-indels",0,0,'I'},
+        {"threads",1,0,9},
         {0,0,0,0}
     };
 
     int naf_opts = 0;
     char *tmp;
-    while ((c = getopt_long(argc, argv, "h?r:R:t:T:H:a:s:m:M:G:Ia:e:V",loptions,NULL)) >= 0) {
+    while ((c = getopt_long(argc, argv, "h?r:R:t:T:H:a:s:S:m:M:G:Ia:e:V:b:O:o:i",loptions,NULL)) >= 0) {
         switch (c) {
             case 0: args->af_tag = optarg; naf_opts++; break;
             case 1: args->af_fname = optarg; naf_opts++; break;
@@ -749,7 +1119,15 @@ int main_vcfroh(int argc, char *argv[])
                 args->dflt_AF = strtod(optarg,&tmp);
                 if ( *tmp ) error("Could not parse: --AF-dflt %s\n", optarg);
                 break;
+            case 'o': args->output_fname = optarg; break;
+            case 'O': 
+                if ( strchr(optarg,'s') || strchr(optarg,'S') ) args->output_type |= OUTPUT_ST;
+                if ( strchr(optarg,'r') || strchr(optarg,'R') ) args->output_type |= OUTPUT_RG;
+                if ( strchr(optarg,'z') || strchr(optarg,'z') ) args->output_type |= OUTPUT_GZ;
+                break;
             case 'e': args->estimate_AF = optarg; naf_opts++; break;
+            case 'b': args->buffer_size = optarg; break;
+            case 'i': args->skip_homref = 1; break;
             case 'I': args->snps_only = 1; break;
             case 'G':
                 args->fake_PLs = 1; 
@@ -762,7 +1140,8 @@ int main_vcfroh(int argc, char *argv[])
                 args->rec_rate = strtod(optarg,&tmp);
                 if ( *tmp ) error("Could not parse: -M %s\n", optarg);
                 break;
-            case 's': args->sample = strdup(optarg); break;
+            case 's': args->samples = strdup(optarg); break;
+            case 'S': args->samples = strdup(optarg); args->samples_is_file = 1; break;
             case 'a':
                 args->t2AZ = strtod(optarg,&tmp);
                 if ( *tmp ) error("Could not parse: -a %s\n", optarg);
@@ -775,14 +1154,28 @@ int main_vcfroh(int argc, char *argv[])
             case 'T': args->targets_list = optarg; targets_is_file = 1; break;
             case 'r': args->regions_list = optarg; break;
             case 'R': args->regions_list = optarg; regions_is_file = 1; break;
-            case 'V': args->vi_training = 1; break;
+            case  9 : args->n_threads = strtol(optarg, 0, 0); break;
+            case 'V': 
+                args->vi_training = 1; 
+                args->baum_welch_th = strtod(optarg,&tmp); 
+                if ( *tmp ) error("Could not parse: --viterbi-training %s\n", optarg);
+                break;
             case 'h': 
             case '?': usage(args); break;
             default: error("Unknown argument: %s\n", optarg);
         }
     }
+    if ( !args->output_fname ) args->output_fname = "pysam_stdout";
+    if ( !args->output_type ) args->output_type = OUTPUT_ST|OUTPUT_RG;
+    char *fname = NULL;
+    if ( optind==argc )
+    {
+        if ( !isatty(fileno((FILE *)stdin)) ) fname = "-";  // reading from stdin
+        else usage(args);
+    }
+    else fname = argv[optind];
 
-    if ( argc<optind+1 ) usage(args);
+    if ( args->vi_training && args->buffer_size ) error("Error: cannot use -b with -V\n");
     if ( args->t2AZ<0 || args->t2AZ>1 ) error("Error: The parameter --hw-to-az is not in [0,1]\n", args->t2AZ);
     if ( args->t2HW<0 || args->t2HW>1 ) error("Error: The parameter --az-to-hw is not in [0,1]\n", args->t2HW);
     if ( naf_opts>1 ) error("Error: The options --AF-tag, --AF-file and -e are mutually exclusive\n");
@@ -802,7 +1195,9 @@ int main_vcfroh(int argc, char *argv[])
         if ( bcf_sr_set_targets(args->files, args->af_fname, 1, 3)<0 )
             error("Failed to read the targets: %s\n", args->af_fname);
     }
-    if ( !bcf_sr_add_reader(args->files, argv[optind]) ) error("Failed to open %s: %s\n", argv[optind],bcf_sr_strerror(args->files->errnum));
+    if ( args->n_threads && bcf_sr_set_threads(args->files, args->n_threads)<0)
+        error("Failed to create threads\n");
+    if ( !bcf_sr_add_reader(args->files, fname) ) error("Failed to open %s: %s\n", fname,bcf_sr_strerror(args->files->errnum));
 
     init_data(args);
     while ( bcf_sr_next_line(args->files) )
@@ -810,7 +1205,15 @@ int main_vcfroh(int argc, char *argv[])
         vcfroh(args, args->files->readers[0].buffer[0]);
     }
     vcfroh(args, NULL);
-    fprintf(pysam_stderr,"Number of lines: total/processed: %d/%d\n", args->ntot,args->nused);
+    int i, nmin = 0;
+    for (i=0; i<args->roh_smpl->n; i++)
+        if ( !i || args->smpl[i].nused < nmin ) nmin = args->smpl[i].nused;
+    fprintf(pysam_stderr,"Number of lines total/processed: %d/%d\n", args->ntot,nmin);
+    if ( nmin==0 )
+    {
+        fprintf(pysam_stderr,"No usable sites were found.");
+        if ( !naf_opts && !args->dflt_AF ) fprintf(pysam_stderr, " Consider using one of the AF options.\n");
+    }
     destroy_data(args);
     free(args);
     return 0;
diff --git a/bcftools/vcfstats.c b/bcftools/vcfstats.c
index 1032bf8..4041a5a 100644
--- a/bcftools/vcfstats.c
+++ b/bcftools/vcfstats.c
@@ -1,6 +1,6 @@
 /*  vcfstats.c -- Produces stats which can be plotted using plot-vcfstats.
 
-    Copyright (C) 2012-2015 Genome Research Ltd.
+    Copyright (C) 2012-2016 Genome Research Ltd.
 
     Author: Petr Danecek <pd3 at sanger.ac.uk>
 
@@ -39,6 +39,7 @@ THE SOFTWARE.  */
 #include <inttypes.h>
 #include "bcftools.h"
 #include "filter.h"
+#include "bin.h"
 
 // Logic of the filters: include or exclude sites which match the filters?
 #define FLT_INCLUDE 1
@@ -69,17 +70,6 @@ idist_t;
 
 typedef struct
 {
-    double x;
-    double x2;
-    double y;
-    double y2;
-    double xy;
-    double n;
-}
-smpl_r_t;
-
-typedef struct
-{
     int n_snps, n_indels, n_mnps, n_others, n_mals, n_snp_mals, n_records, n_noalts;
     int *af_ts, *af_tv, *af_snps;   // first bin of af_* stats are singletons
     #if HWE_STATS
@@ -108,9 +98,14 @@ stats_t;
 
 typedef struct
 {
-    uint64_t m[3], mm[3];        // number of hom, het and non-ref hom matches and mismatches
-    float r2sum;
-    uint32_t r2n;
+    uint64_t gt2gt[5][5];   // number of RR->RR, RR->RA, etc. matches/mismatches; see type2stats
+    /*
+        Pearson's R^2 is used for aggregate R^2 
+        y, yy .. sum of dosage and squared dosage in the query VCF (second file)
+        x, xx .. sum of squared dosage in the truth VCF (first file)
+        n     .. number of genotypes
+     */
+    double y, yy, x, xx, yx, n;
 }
 gtcmp_t;
 
@@ -135,7 +130,11 @@ typedef struct
     int *tmp_iaf, ntmp_iaf, m_af, m_qual, naf_hwe, mtmp_frm;
     uint8_t *tmp_frm;
     int dp_min, dp_max, dp_step;
-    gtcmp_t *af_gts_snps, *af_gts_indels, *smpl_gts_snps, *smpl_gts_indels; // first bin of af_* stats are singletons
+    gtcmp_t *smpl_gts_snps, *smpl_gts_indels;
+    gtcmp_t *af_gts_snps, *af_gts_indels; // first bin of af_* stats are singletons
+    bin_t *af_bins;
+    float *farr;
+    int mfarr;
 
     // indel context
     indel_ctx_t *indel_ctx;
@@ -148,21 +147,18 @@ typedef struct
     // other
     bcf_srs_t *files;
     bcf_sr_regions_t *exons;
-    char **argv, *exons_fname, *regions_list, *samples_list, *targets_list;
+    char **argv, *exons_fname, *regions_list, *samples_list, *targets_list, *af_bins_list, *af_tag;
     int argc, verbose_sites, first_allele_only, samples_is_file;
     int split_by_id, nstats;
 
     filter_t *filter[2];
     char *filter_str;
     int filter_logic;   // include or exclude sites which match the filters? One of FLT_INCLUDE/FLT_EXCLUDE
-
-    // Per Sample r working data arrays of size equal to number of samples
-    smpl_r_t* smpl_r_snps;
-    smpl_r_t* smpl_r_indels;
+    int n_threads;
 }
 args_t;
 
-static int type2dosage[6], type2ploidy[6], type2stats[6];
+static int type2dosage[6], type2ploidy[6], type2stats[7];
 
 static void idist_init(idist_t *d, int min, int max, int step)
 {
@@ -187,6 +183,12 @@ static inline int idist_i2bin(idist_t *d, int i)
     return i-1+d->min;
 }
 
+static inline int clip_nonnegative(float x, int limit)
+{
+    if (x >= limit || isnan(x)) return limit - 1;
+    else if (x <= 0.0) return 0;
+    else return (int) x;
+}
 
 #define IC_DBG 0
 #if IC_DBG
@@ -403,13 +405,30 @@ static void init_stats(args_t *args)
         args->filter[0] = filter_init(bcf_sr_get_header(args->files,0), args->filter_str);
         if ( args->files->nreaders==2 )
             args->filter[1] = filter_init(bcf_sr_get_header(args->files,1), args->filter_str);
+        args->files->max_unpack |= filter_max_unpack(args->filter[0]);
+    }
+
+    // AF corresponds to AC but is more robust to mixtures of haploid and diploid GTs
+    if ( !args->af_bins_list )
+    {
+        args->m_af = 101;
+        for (i=0; i<args->files->nreaders; i++)
+            if ( bcf_hdr_nsamples(args->files->readers[i].header) + 1> args->m_af )
+                args->m_af = bcf_hdr_nsamples(args->files->readers[i].header) + 1;
+    }
+    else
+    {
+        args->af_bins = bin_init(args->af_bins_list,0,1);
+    
+        // m_af is used also for other af arrays, where the first bin is for
+        // singletons. However, since the last element is unused in af_bins
+        // (n boundaries form n-1 intervals), the m_af count is good for both.
+        args->m_af = bin_get_size(args->af_bins);
     }
 
-    // AF corresponds to AC but is more robust for mixture of haploid and diploid GTs
-    args->m_af = 101;
-    for (i=0; i<args->files->nreaders; i++)
-        if ( bcf_hdr_nsamples(args->files->readers[i].header) + 1> args->m_af )
-            args->m_af = bcf_hdr_nsamples(args->files->readers[i].header) + 1;
+    bcf_hdr_t *hdr = bcf_sr_get_header(args->files,0);
+    if ( args->af_tag && !bcf_hdr_idinfo_exists(hdr,BCF_HL_INFO,bcf_hdr_id2int(hdr,BCF_DT_ID,args->af_tag)) )
+        error("No such INFO tag: %s\n", args->af_tag);
 
     #if QUAL_STATS
         args->m_qual = 999;
@@ -430,8 +449,6 @@ static void init_stats(args_t *args)
         args->af_gts_indels   = (gtcmp_t *) calloc(args->m_af,sizeof(gtcmp_t));
         args->smpl_gts_snps   = (gtcmp_t *) calloc(args->files->n_smpl,sizeof(gtcmp_t));
         args->smpl_gts_indels = (gtcmp_t *) calloc(args->files->n_smpl,sizeof(gtcmp_t));
-        args->smpl_r_snps = (smpl_r_t*) calloc(args->files->n_smpl, sizeof(smpl_r_t));
-        args->smpl_r_indels = (smpl_r_t*) calloc(args->files->n_smpl, sizeof(smpl_r_t));
     }
     for (i=0; i<args->nstats; i++)
     {
@@ -503,9 +520,10 @@ static void init_stats(args_t *args)
     type2stats[GT_HOM_RR] = 0;
     type2stats[GT_HET_RA] = 1;
     type2stats[GT_HOM_AA] = 2;
-    type2stats[GT_HET_AA] = 1;
+    type2stats[GT_HET_AA] = 3;
     type2stats[GT_HAPL_R] = 0;
     type2stats[GT_HAPL_A] = 2;
+    type2stats[GT_UNKN]   = 4;
 
 }
 static void destroy_stats(args_t *args)
@@ -526,7 +544,6 @@ static void destroy_stats(args_t *args)
             if (stats->qual_indels) free(stats->qual_indels);
         #endif
         #if HWE_STATS
-            //if ( args->files->n_smpl ) free(stats->af_hwe);
             free(stats->af_hwe);
         #endif
         free(stats->insertions);
@@ -554,6 +571,8 @@ static void destroy_stats(args_t *args)
         if ( args->exons ) free(stats->smpl_frm_shifts);
     }
     for (j=0; j<args->nusr; j++) free(args->usr[j].tag);
+    if ( args->af_bins ) bin_destroy(args->af_bins);
+    free(args->farr);
     free(args->usr);
     free(args->tmp_frm);
     free(args->tmp_iaf);
@@ -562,8 +581,6 @@ static void destroy_stats(args_t *args)
     free(args->af_gts_indels);
     free(args->smpl_gts_snps);
     free(args->smpl_gts_indels);
-    free(args->smpl_r_snps);
-    free(args->smpl_r_indels);
     if (args->indel_ctx) indel_ctx_destroy(args->indel_ctx);
     if (args->filter[0]) filter_destroy(args->filter[0]);
     if (args->filter[1]) filter_destroy(args->filter[1]);
@@ -572,36 +589,59 @@ static void destroy_stats(args_t *args)
 static void init_iaf(args_t *args, bcf_sr_t *reader)
 {
     bcf1_t *line = reader->buffer[0];
-    if ( args->ntmp_iaf < line->n_allele )
+    hts_expand(int32_t,line->n_allele,args->ntmp_iaf,args->tmp_iaf);
+
+    int i, ret;
+    if ( args->af_tag )
     {
-        args->tmp_iaf = (int*)realloc(args->tmp_iaf, line->n_allele*sizeof(int));
-        args->ntmp_iaf = line->n_allele;
+        ret = bcf_get_info_float(reader->header, line, args->af_tag, &args->farr, &args->mfarr);
+        if ( ret<=0 || ret!=line->n_allele-1 )
+        {
+            // the AF tag is not present or wrong number of values, put in the singletons/unknown bin
+            for (i=0; i<line->n_allele; i++) args->tmp_iaf[i] = 0;
+            return;
+        }
+        args->tmp_iaf[0] = 0;
+        for (i=1; i<line->n_allele; i++)
+        {
+            float af = args->farr[i-1];
+            if ( af<0 ) af = 0;
+            else if ( af>1 ) af = 1;
+            int iaf = args->af_bins ? bin_get_idx(args->af_bins,af) : af*(args->m_af-2);
+            args->tmp_iaf[i] = iaf + 1;     // the first tmp_iaf bin is reserved for singletons
+        }
+        return;
     }
+
     // tmp_iaf is first filled with AC counts in calc_ac and then transformed to
     //  an index to af_gts_snps
-    int i, ret = bcf_calc_ac(reader->header, line, args->tmp_iaf, args->samples_list ? BCF_UN_INFO|BCF_UN_FMT : BCF_UN_INFO);
-    if ( ret )
+    ret = bcf_calc_ac(reader->header, line, args->tmp_iaf, args->samples_list ? BCF_UN_INFO|BCF_UN_FMT : BCF_UN_INFO);
+    if ( !ret )
     {
-        int an=0;
-        for (i=0; i<line->n_allele; i++)
-            an += args->tmp_iaf[i];
+        for (i=0; i<line->n_allele; i++) args->tmp_iaf[i] = 0;      // singletons/unknown bin
+        return;
+    }
 
-        args->tmp_iaf[0] = 0;
-        for (i=1; i<line->n_allele; i++)
+    int an = 0;
+    for (i=0; i<line->n_allele; i++)
+        an += args->tmp_iaf[i];
+
+    args->tmp_iaf[0] = 0;
+    for (i=1; i<line->n_allele; i++)
+    {
+        if ( args->tmp_iaf[i]==1 )
+            args->tmp_iaf[i] = 0;   // singletons into the first bin
+        else if ( !an )
+            args->tmp_iaf[i] = 1;   // no genotype at all, put to the AF=0 bin
+        else
         {
-            if ( args->tmp_iaf[i]==1 )
-                args->tmp_iaf[i] = 0; // singletons into the first bin
-            else if ( !an )
-                args->tmp_iaf[i] = 1;   // no genotype at all, put to the AF=0 bin
-            else
-                args->tmp_iaf[i] = 1 + args->tmp_iaf[i] * (args->m_af-2.0) / an;
+            float af = (float) args->tmp_iaf[i] / an;
+            if ( af<0 ) af = 0;
+            else if ( af>1 ) af = 1;
+            int iaf = args->af_bins ? bin_get_idx(args->af_bins,af) : af*(args->m_af-2);
+            args->tmp_iaf[i] = iaf + 1;
         }
     }
-    else
-        for (i=0; i<line->n_allele; i++)
-            args->tmp_iaf[i] = 0;
-
-    // todo: otherwise use AF
 }
 
 static inline void do_mnp_stats(args_t *args, stats_t *stats, bcf_sr_t *reader)
@@ -621,7 +661,7 @@ static void do_indel_stats(args_t *args, stats_t *stats, bcf_sr_t *reader)
     bcf1_t *line = reader->buffer[0];
 
     #if QUAL_STATS
-        int iqual = line->qual >= args->m_qual || isnan(line->qual) ? args->m_qual - 1 : line->qual;
+        int iqual = clip_nonnegative(line->qual, args->m_qual);
         stats->qual_indels[iqual]++;
     #endif
 
@@ -756,7 +796,7 @@ static void do_snp_stats(args_t *args, stats_t *stats, bcf_sr_t *reader)
     if ( ref<0 ) return;
 
     #if QUAL_STATS
-        int iqual = line->qual >= args->m_qual || isnan(line->qual) ? args->m_qual - 1 : line->qual;
+        int iqual = clip_nonnegative(line->qual, args->m_qual);
         stats->qual_snps[iqual]++;
     #endif
 
@@ -873,6 +913,7 @@ static void do_sample_stats(args_t *args, stats_t *stats, bcf_sr_t *reader, int
         {
             float het_frac = (float)nhet_tot/(nhet_tot + nref_tot + nalt_tot);
             int idx = het_frac*(args->naf_hwe - 1);
+//check me: what is this?
             if ( line->n_allele>1 ) idx += args->naf_hwe*args->tmp_iaf[1];
             stats->af_hwe[idx]++;
         }
@@ -911,88 +952,42 @@ static void do_sample_stats(args_t *args, stats_t *stats, bcf_sr_t *reader, int
         fmt1 = bcf_get_fmt(files->readers[1].header,files->readers[1].buffer[0],"GT"); if ( !fmt1 ) return;
 
         // only the first ALT allele is considered
-        int iaf = line->n_allele>1 ? args->tmp_iaf[1] : 1;
+        int iaf = args->tmp_iaf[1];
         int line_type = bcf_get_variant_types(files->readers[0].buffer[0]);
         gtcmp_t *af_stats = line_type&VCF_SNP ? args->af_gts_snps : args->af_gts_indels;
         gtcmp_t *smpl_stats = line_type&VCF_SNP ? args->smpl_gts_snps : args->smpl_gts_indels;
 
-        //
-        // Calculates r squared
-        // x is mean dosage of x at given site
-        // x2 is mean squared dosage of x at given site
-        // y is mean dosage of x at given site
-        // y2 is mean squared dosage of x at given site
-        // xy is mean dosage of x*y at given site
-        // r2sum += (xy - x*y)^2 / ( (x2 - x^2) * (y2 - y^2) )
-        // r2n is number of sites considered
-        // output as r2sum/r2n for each AF bin
-        int r2n = 0;
-        float x = 0, y = 0, xy = 0, x2 = 0, y2 = 0;
-        // Select smpl_r
-        smpl_r_t *smpl_r = NULL;
-        if (line_type&VCF_SNP)
-        {
-            smpl_r = args->smpl_r_snps;
-        }
-        else if (line_type&VCF_INDEL)
-        {
-            smpl_r = args->smpl_r_indels;
-        }
         for (is=0; is<files->n_smpl; is++)
         {
             // Simplified comparison: only 0/0, 0/1, 1/1 is looked at as the identity of
             //  actual alleles can be enforced by running without the -c option.
             int gt0 = bcf_gt_type(fmt0, files->readers[0].samples[is], NULL, NULL);
-            if ( gt0 == GT_UNKN ) continue;
-
             int gt1 = bcf_gt_type(fmt1, files->readers[1].samples[is], NULL, NULL);
-            if ( gt1 == GT_UNKN ) continue;
 
-            if ( type2ploidy[gt0]*type2ploidy[gt1] == -1 ) continue;   // cannot compare diploid and haploid genotypes
+            int idx0 = type2stats[gt0];
+            int idx1 = type2stats[gt1];
+            af_stats[iaf].gt2gt[idx0][idx1]++;
+            smpl_stats[is].gt2gt[idx0][idx1]++;
 
-            int dsg0 = type2dosage[gt0];
-            int dsg1 = type2dosage[gt1];
-            x   += dsg0;
-            x2  += dsg0*dsg0;
-            y   += dsg1;
-            y2  += dsg1*dsg1;
-            xy  += dsg0*dsg1;
-            r2n++;
-
-            int idx = type2stats[gt0];
-            if ( gt0==gt1 )
-            {
-                af_stats[iaf].m[idx]++;
-                smpl_stats[is].m[idx]++;
-            }
-            else
-            {
-                af_stats[iaf].mm[idx]++;
-                smpl_stats[is].mm[idx]++;
-            }
-
-            // Now do it across samples
+            if ( gt0 == GT_UNKN || gt1 == GT_UNKN ) continue;
+            if ( type2ploidy[gt0]*type2ploidy[gt1] == -1 ) continue;   // cannot compare diploid and haploid genotypes
 
-            if (smpl_r) {
-                smpl_r[is].xy += dsg0*dsg1;
-                smpl_r[is].x += dsg0;
-                smpl_r[is].x2 += dsg0*dsg0;
-                smpl_r[is].y += dsg1;
-                smpl_r[is].y2 += dsg1*dsg1;
-                ++(smpl_r[is].n);
-            }
-        }
-
-        if ( r2n )
-        {
-            x /= r2n; y /= r2n; x2 /= r2n; y2 /= r2n; xy /= r2n;
-            float cov  = xy - x*y;
-            float var2 = (x2 - x*x) * (y2 - y*y);
-            if ( var2!=0 )
-            {
-                af_stats[iaf].r2sum += cov*cov/var2;
-                af_stats[iaf].r2n++;
-            }
+            float y = type2dosage[gt0];
+            float x = type2dosage[gt1];
+
+            smpl_stats[is].yx += y*x;
+            smpl_stats[is].x  += x;
+            smpl_stats[is].xx += x*x;
+            smpl_stats[is].y  += y;
+            smpl_stats[is].yy += y*y;
+            smpl_stats[is].n  += 1;
+
+            af_stats[iaf].yx += y*x;
+            af_stats[iaf].x  += x;
+            af_stats[iaf].xx += x*x;
+            af_stats[iaf].y  += y;
+            af_stats[iaf].yy += y*y;
+            af_stats[iaf].n  += 1;
         }
 
         if ( args->verbose_sites )
@@ -1129,7 +1124,7 @@ static void print_header(args_t *args)
 #define T2S(x) type2stats[x]
 static void print_stats(args_t *args)
 {
-    int i, id;
+    int i, j,k, id;
     printf("# SN, Summary numbers:\n# SN\t[2]id\t[3]key\t[4]value\n");
     for (id=0; id<args->files->nreaders; id++)
         printf("SN\t%d\tnumber of samples:\t%d\n", id, bcf_hdr_nsamples(args->files->readers[id].header));
@@ -1202,6 +1197,24 @@ static void print_stats(args_t *args)
         stats->af_repeats[1][1] += stats->af_repeats[1][0];
         stats->af_repeats[2][1] += stats->af_repeats[2][0];
     }
+    // move the singletons stats into the first AF bin, singleton stats was collected separately because of init_iaf
+    if ( args->af_gts_snps )
+    {
+        args->af_gts_snps[1].y    += args->af_gts_snps[0].y;
+        args->af_gts_snps[1].yy   += args->af_gts_snps[0].yy;
+        args->af_gts_snps[1].xx   += args->af_gts_snps[0].xx;
+        args->af_gts_snps[1].yx   += args->af_gts_snps[0].yx;
+        args->af_gts_snps[1].n    += args->af_gts_snps[0].n;
+    }
+    if ( args->af_gts_indels )
+    {
+        args->af_gts_indels[1].y  += args->af_gts_indels[0].y;
+        args->af_gts_indels[1].yy += args->af_gts_indels[0].yy;
+        args->af_gts_indels[1].xx += args->af_gts_indels[0].xx;
+        args->af_gts_indels[1].yx += args->af_gts_indels[0].yx;
+        args->af_gts_indels[1].n  += args->af_gts_indels[0].n;
+    }
+
     printf("# AF, Stats by non-reference allele frequency:\n# AF\t[2]id\t[3]allele frequency\t[4]number of SNPs\t[5]number of transitions\t[6]number of transversions\t[7]number of indels\t[8]repeat-consistent\t[9]repeat-inconsistent\t[10]not applicable\n");
     for (id=0; id<args->nstats; id++)
     {
@@ -1209,7 +1222,8 @@ static void print_stats(args_t *args)
         for (i=1; i<args->m_af; i++) // note that af[1] now contains also af[0], see SiS stats output above
         {
             if ( stats->af_snps[i]+stats->af_ts[i]+stats->af_tv[i]+stats->af_repeats[0][i]+stats->af_repeats[1][i]+stats->af_repeats[2][i] == 0  ) continue;
-            printf("AF\t%d\t%f\t%d\t%d\t%d\t%d\t%d\t%d\t%d\n", id,100.*(i-1)/(args->m_af-1),stats->af_snps[i],stats->af_ts[i],stats->af_tv[i],
+            double af = args->af_bins ? (bin_get_value(args->af_bins,i)+bin_get_value(args->af_bins,i-1))*0.5 : (double)(i-1)/(args->m_af-1);
+            printf("AF\t%d\t%f\t%d\t%d\t%d\t%d\t%d\t%d\t%d\n", id,af,stats->af_snps[i],stats->af_ts[i],stats->af_tv[i],
                 stats->af_repeats[0][i]+stats->af_repeats[1][i]+stats->af_repeats[2][i],stats->af_repeats[0][i],stats->af_repeats[1][i],stats->af_repeats[2][i]);
         }
     }
@@ -1266,34 +1280,56 @@ static void print_stats(args_t *args)
         printf("SN\t%d\tnumber of samples:\t%d\n", 2, args->files->n_smpl);
 
         int x;
-        for (x=0; x<2; x++)
+        for (x=0; x<2; x++)     // x=0: snps, x=1: indels
         {
             gtcmp_t *stats;
             if ( x==0 )
             {
-                printf("# GCsAF, Genotype concordance by non-reference allele frequency (SNPs)\n# GCsAF\t[2]id\t[3]allele frequency\t[4]RR Hom matches\t[5]RA Het matches\t[6]AA Hom matches\t[7]RR Hom mismatches\t[8]RA Het mismatches\t[9]AA Hom mismatches\t[10]dosage r-squared\t[11]number of sites\n");
+                printf("# GCsAF, Genotype concordance by non-reference allele frequency (SNPs)\n# GCsAF\t[2]id\t[3]allele frequency\t[4]RR Hom matches\t[5]RA Het matches\t[6]AA Hom matches\t[7]RR Hom mismatches\t[8]RA Het mismatches\t[9]AA Hom mismatches\t[10]dosage r-squared\t[11]number of genotypes\n");
                 stats = args->af_gts_snps;
             }
             else
             {
-                printf("# GCiAF, Genotype concordance by non-reference allele frequency (indels)\n# GCiAF\t[2]id\t[3]allele frequency\t[4]RR Hom matches\t[5]RA Het matches\t[6]AA Hom matches\t[7]RR Hom mismatches\t[8]RA Het mismatches\t[9]AA Hom mismatches\t[10]dosage r-squared\t[11]number of sites\n");
+                printf("# GCiAF, Genotype concordance by non-reference allele frequency (indels)\n# GCiAF\t[2]id\t[3]allele frequency\t[4]RR Hom matches\t[5]RA Het matches\t[6]AA Hom matches\t[7]RR Hom mismatches\t[8]RA Het mismatches\t[9]AA Hom mismatches\t[10]dosage r-squared\t[11]number of genotypes\n");
                 stats = args->af_gts_indels;
             }
-            uint64_t nrd_m[3] = {0,0,0}, nrd_mm[3] = {0,0,0};
+            uint64_t nrd_m[4] = {0,0,0,0}, nrd_mm[4] = {0,0,0,0};   // across all bins
             for (i=0; i<args->m_af; i++)
             {
-                int j, n = 0;
-                for (j=0; j<3; j++)
+                int n = 0;
+                uint64_t m[4] = {0,0,0,0}, mm[4] = {0,0,0,0};    // in i-th AF bin
+                for (j=0; j<4; j++)     // rr, ra, aa hom, aa het, ./.
+                    for (k=0; k<4; k++)
+                    {
+                        n += stats[i].gt2gt[j][k];
+                        if ( j==k ) 
+                        {
+                            nrd_m[j] += stats[i].gt2gt[j][k];
+                            m[j]     += stats[i].gt2gt[j][k];
+                        }
+                        else
+                        {
+                            nrd_mm[j] += stats[i].gt2gt[j][k];
+                            mm[j]     += stats[i].gt2gt[j][k];
+                        }
+                    }
+                if ( !i || !n ) continue;   // skip singleton stats and empty bins
+
+                // Pearson's r2
+                double r2 = 0;
+                if ( stats[i].n )
                 {
-                    n += stats[i].m[j] + stats[i].mm[j];
-                    nrd_m[j]  += stats[i].m[j];
-                    nrd_mm[j] += stats[i].mm[j];
+                    r2  = (stats[i].yx - stats[i].x*stats[i].y/stats[i].n);
+                    r2 /= sqrt((stats[i].xx - stats[i].x*stats[i].x/stats[i].n) * (stats[i].yy - stats[i].y*stats[i].y/stats[i].n));
+                    r2 *= r2;
                 }
-                if ( !i || !n ) continue;   // skip singleton stats and empty bins
-                printf("GC%cAF\t2\t%f", x==0 ? 's' : 'i', 100.*(i-1)/(args->m_af-1));
-                printf("\t%"PRId64"\t%"PRId64"\t%"PRId64"", stats[i].m[T2S(GT_HOM_RR)],stats[i].m[T2S(GT_HET_RA)],stats[i].m[T2S(GT_HOM_AA)]);
-                printf("\t%"PRId64"\t%"PRId64"\t%"PRId64"", stats[i].mm[T2S(GT_HOM_RR)],stats[i].mm[T2S(GT_HET_RA)],stats[i].mm[T2S(GT_HOM_AA)]);
-                printf("\t%f\t%"PRId32"\n", stats[i].r2n ? stats[i].r2sum/stats[i].r2n : -1.0, stats[i].r2n);
+                double af = args->af_bins ? (bin_get_value(args->af_bins,i)+bin_get_value(args->af_bins,i-1))*0.5 : (double)(i-1)/(args->m_af-1);
+                printf("GC%cAF\t2\t%f", x==0 ? 's' : 'i', af);
+                printf("\t%"PRId64"\t%"PRId64"\t%"PRId64"", m[T2S(GT_HOM_RR)],m[T2S(GT_HET_RA)],m[T2S(GT_HOM_AA)]);
+                printf("\t%"PRId64"\t%"PRId64"\t%"PRId64"", mm[T2S(GT_HOM_RR)],mm[T2S(GT_HET_RA)],mm[T2S(GT_HOM_AA)]);
+                if ( stats[i].n && !isnan(r2) ) printf("\t%f", r2);
+                else printf("\t"NA_STRING);
+                printf("\t%.0f\n", stats[i].n);
             }
 
             if ( x==0 )
@@ -1309,8 +1345,8 @@ static void print_stats(args_t *args)
             }
             else
                 printf("# Non-Reference Discordance (NRD), indels\n# NRDi\t[2]id\t[3]NRD\t[4]Ref/Ref discordance\t[5]Ref/Alt discordance\t[6]Alt/Alt discordance\n");
-            uint64_t m  = nrd_m[T2S(GT_HET_RA)] + nrd_m[T2S(GT_HOM_AA)];
-            uint64_t mm = nrd_mm[T2S(GT_HOM_RR)] + nrd_mm[T2S(GT_HET_RA)] + nrd_mm[T2S(GT_HOM_AA)];
+            uint64_t m  = nrd_m[T2S(GT_HET_RA)] + nrd_m[T2S(GT_HOM_AA)] + nrd_m[T2S(GT_HET_AA)];
+            uint64_t mm = nrd_mm[T2S(GT_HOM_RR)] + nrd_mm[T2S(GT_HET_RA)] + nrd_mm[T2S(GT_HOM_AA)] + nrd_mm[T2S(GT_HET_AA)];
             printf("NRD%c\t2\t%f\t%f\t%f\t%f\n", x==0 ? 's' : 'i',
                     m+mm ? mm*100.0/(m+mm) : 0,
                     nrd_m[T2S(GT_HOM_RR)]+nrd_mm[T2S(GT_HOM_RR)] ? nrd_mm[T2S(GT_HOM_RR)]*100.0/(nrd_m[T2S(GT_HOM_RR)]+nrd_mm[T2S(GT_HOM_RR)]) : 0,
@@ -1319,42 +1355,99 @@ static void print_stats(args_t *args)
                   );
         }
 
-        for (x=0; x<2; x++)
+        for (x=0; x<2; x++) // x=0: snps, x=1: indels
         {
             gtcmp_t *stats;
-            smpl_r_t *smpl_r_array;
             if ( x==0 )
             {
                 printf("# GCsS, Genotype concordance by sample (SNPs)\n# GCsS\t[2]id\t[3]sample\t[4]non-reference discordance rate\t[5]RR Hom matches\t[6]RA Het matches\t[7]AA Hom matches\t[8]RR Hom mismatches\t[9]RA Het mismatches\t[10]AA Hom mismatches\t[11]dosage r-squared\n");
                 stats = args->smpl_gts_snps;
-                smpl_r_array = args->smpl_r_snps;
             }
             else
             {
                 printf("# GCiS, Genotype concordance by sample (indels)\n# GCiS\t[2]id\t[3]sample\t[4]non-reference discordance rate\t[5]RR Hom matches\t[6]RA Het matches\t[7]AA Hom matches\t[8]RR Hom mismatches\t[9]RA Het mismatches\t[10]AA Hom mismatches\t[11]dosage r-squared\n");
                 stats = args->smpl_gts_indels;
-                smpl_r_array = args->smpl_r_indels;
             }
             for (i=0; i<args->files->n_smpl; i++)
             {
-                uint64_t m  = stats[i].m[T2S(GT_HET_RA)] + stats[i].m[T2S(GT_HOM_AA)];
-                uint64_t mm = stats[i].mm[T2S(GT_HOM_RR)] + stats[i].mm[T2S(GT_HET_RA)] + stats[i].mm[T2S(GT_HOM_AA)];
-                // Calculate r by formula 19.2 - Biostatistical Analysis 4th edition - Jerrold H. Zar
-                smpl_r_t *smpl_r = smpl_r_array + i;
-                double r = 0.0;
-                if (smpl_r->n) {
-                    double sum_crossprod = smpl_r->xy-(smpl_r->x*smpl_r->y)/smpl_r->n;//per 17.3 machine formula
-                    double x2_xx = smpl_r->x2-(smpl_r->x*smpl_r->x)/smpl_r->n;
-                    double y2_yy = smpl_r->y2-(smpl_r->y*smpl_r->y)/smpl_r->n;
-                    r = (sum_crossprod)/sqrt(x2_xx*y2_yy);
+                uint64_t mm = 0, m = stats[i].gt2gt[T2S(GT_HET_RA)][T2S(GT_HET_RA)] + stats[i].gt2gt[T2S(GT_HOM_AA)][T2S(GT_HOM_AA)];
+                for (j=0; j<3; j++)
+                    for (k=0; k<3; k++)
+                        if ( j!=k ) mm += stats[i].gt2gt[j][k];
+
+                // Pearson's r2
+                double r2 = 0;
+                if ( stats[i].n )
+                {
+                    r2  = (stats[i].yx - stats[i].x*stats[i].y/stats[i].n);
+                    r2 /= sqrt((stats[i].xx - stats[i].x*stats[i].x/stats[i].n) * (stats[i].yy - stats[i].y*stats[i].y/stats[i].n));
+                    r2 *= r2;
                 }
                 printf("GC%cS\t2\t%s\t%.3f",  x==0 ? 's' : 'i', args->files->samples[i], m+mm ? mm*100.0/(m+mm) : 0);
-                printf("\t%"PRId64"\t%"PRId64"\t%"PRId64"", stats[i].m[T2S(GT_HOM_RR)],stats[i].m[T2S(GT_HET_RA)],stats[i].m[T2S(GT_HOM_AA)]);
-                printf("\t%"PRId64"\t%"PRId64"\t%"PRId64"", stats[i].mm[T2S(GT_HOM_RR)],stats[i].mm[T2S(GT_HET_RA)],stats[i].mm[T2S(GT_HOM_AA)]);
-                if (smpl_r->n && !isnan(r)) printf("\t%f\n", r*r);
+                printf("\t%"PRId64"\t%"PRId64"\t%"PRId64"", 
+                    stats[i].gt2gt[T2S(GT_HOM_RR)][T2S(GT_HOM_RR)],
+                    stats[i].gt2gt[T2S(GT_HET_RA)][T2S(GT_HET_RA)],
+                    stats[i].gt2gt[T2S(GT_HOM_AA)][T2S(GT_HOM_AA)]);
+                printf("\t%"PRId64"\t%"PRId64"\t%"PRId64"",
+                    stats[i].gt2gt[T2S(GT_HOM_RR)][T2S(GT_HET_RA)] + stats[i].gt2gt[T2S(GT_HOM_RR)][T2S(GT_HOM_AA)],
+                    stats[i].gt2gt[T2S(GT_HET_RA)][T2S(GT_HOM_RR)] + stats[i].gt2gt[T2S(GT_HET_RA)][T2S(GT_HOM_AA)],
+                    stats[i].gt2gt[T2S(GT_HOM_AA)][T2S(GT_HOM_RR)] + stats[i].gt2gt[T2S(GT_HOM_AA)][T2S(GT_HET_RA)]);
+                if ( stats[i].n && !isnan(r2) ) printf("\t%f\n", r2);
                 else printf("\t"NA_STRING"\n");
             }
         }
+        for (x=0; x<2; x++) // x=0: snps, x=1: indels
+        {
+                //printf("# GCiS, Genotype concordance by sample (indels)\n# GCiS\t[2]id\t[3]sample\t[4]non-reference discordance rate\t[5]RR Hom matches\t[6]RA Het matches\t[7]AA Hom matches\t[8]RR Hom mismatches\t[9]RA Het mismatches\t[10]AA Hom mismatches\t[11]dosage r-squared\n");
+
+            gtcmp_t *stats;
+            if ( x==0 )
+            {
+                printf("# GCTs, Genotype concordance table (SNPs)\n# GCTs");
+                stats = args->smpl_gts_snps;
+            }
+            else
+            {
+                printf("# GCTi, Genotype concordance table (indels)\n# GCTi");
+                stats = args->smpl_gts_indels;
+            }
+            i = 1;
+            printf("\t[%d]sample", ++i);
+            printf("\t[%d]RR Hom -> RR Hom", ++i);
+            printf("\t[%d]RR Hom -> RA Het", ++i);
+            printf("\t[%d]RR Hom -> AA Hom", ++i);
+            printf("\t[%d]RR Hom -> AA Het", ++i);
+            printf("\t[%d]RR Hom -> missing", ++i);
+            printf("\t[%d]RA Het -> RR Hom", ++i);
+            printf("\t[%d]RA Het -> RA Het", ++i);
+            printf("\t[%d]RA Het -> AA Hom", ++i);
+            printf("\t[%d]RA Het -> AA Het", ++i);
+            printf("\t[%d]RA Het -> missing", ++i);
+            printf("\t[%d]AA Hom -> RR Hom", ++i);
+            printf("\t[%d]AA Hom -> RA Het", ++i);
+            printf("\t[%d]AA Hom -> AA Hom", ++i);
+            printf("\t[%d]AA Hom -> AA Het", ++i);
+            printf("\t[%d]AA Hom -> missing", ++i);
+            printf("\t[%d]AA Het -> RR Hom", ++i);
+            printf("\t[%d]AA Het -> RA Het", ++i);
+            printf("\t[%d]AA Het -> AA Hom", ++i);
+            printf("\t[%d]AA Het -> AA Het", ++i);
+            printf("\t[%d]AA Het -> missing", ++i);
+            printf("\t[%d]missing -> RR Hom", ++i);
+            printf("\t[%d]missing -> RA Het", ++i);
+            printf("\t[%d]missing -> AA Hom", ++i);
+            printf("\t[%d]missing -> AA Het", ++i);
+            printf("\t[%d]missing -> missing\n", ++i);
+
+            for (i=0; i<args->files->n_smpl; i++)
+            {
+                printf("GCT%c\t%s",  x==0 ? 's' : 'i', args->files->samples[i]);
+                for (j=0; j<5; j++)
+                    for (k=0; k<5; k++)
+                        printf("\t%"PRId64, stats[i].gt2gt[j][k]);
+                printf("\n");
+            }
+        }
     }
 
     printf("# DP, Depth distribution\n# DP\t[2]id\t[3]bin\t[4]number of genotypes\t[5]fraction of genotypes (%%)\t[6]number of sites\t[7]fraction of sites (%%)\n");
@@ -1423,8 +1516,10 @@ static void print_stats(args_t *args)
                 for (j=0; j<args->naf_hwe; j++) sum_tot += ptr[j];
                 if ( !sum_tot ) continue;
 
+                double af = args->af_bins ? (bin_get_value(args->af_bins,i)+bin_get_value(args->af_bins,i-1))*0.5 : (double)(i-1)/(args->m_af-1);
+
                 int nprn = 3;
-                printf("HWE\t%d\t%f\t%d",id,100.*(i-1)/(args->m_af-1),sum_tot);
+                printf("HWE\t%d\t%f\t%d",id,af,sum_tot);
                 for (j=0; j<args->naf_hwe; j++)
                 {
                     sum_tmp += ptr[j];
@@ -1462,6 +1557,8 @@ static void usage(void)
     fprintf(stderr, "Usage:   bcftools stats [options] <A.vcf.gz> [<B.vcf.gz>]\n");
     fprintf(stderr, "\n");
     fprintf(stderr, "Options:\n");
+    fprintf(stderr, "        --af-bins <list>               allele frequency bins, a list (0.1,0.5,1) or a file (0.1\\n0.5\\n1)\n");
+    fprintf(stderr, "        --af-tag <string>              allele frequency tag to use, by default estimated from AN,AC or GT\n");
     fprintf(stderr, "    -1, --1st-allele-only              include only 1st allele at multiallelic sites\n");
     fprintf(stderr, "    -c, --collapse <string>            treat as identical records with <snps|indels|both|all|some|none>, see man page for details [none]\n");
     fprintf(stderr, "    -d, --depth <int,int,int>          depth distribution: min,max,bin size [0,500,1]\n");
@@ -1478,6 +1575,7 @@ static void usage(void)
     fprintf(stderr, "    -t, --targets <region>             similar to -r but streams rather than index-jumps\n");
     fprintf(stderr, "    -T, --targets-file <file>          similar to -R but streams rather than index-jumps\n");
     fprintf(stderr, "    -u, --user-tstv <TAG[:min:max:n]>  collect Ts/Tv stats for any tag using the given binning [0:1:100]\n");
+    fprintf(stderr, "        --threads <int>                number of extra decompression threads [0]\n");
     fprintf(stderr, "    -v, --verbose                      produce verbose per-site and per-sample output\n");
     fprintf(stderr, "\n");
     exit(1);
@@ -1494,6 +1592,8 @@ int main_vcfstats(int argc, char *argv[])
 
     static struct option loptions[] =
     {
+        {"af-bins",1,0,1},
+        {"af-tag",1,0,2},
         {"1st-allele-only",0,0,'1'},
         {"include",1,0,'i'},
         {"exclude",1,0,'e'},
@@ -1512,10 +1612,13 @@ int main_vcfstats(int argc, char *argv[])
         {"targets-file",1,0,'T'},
         {"fasta-ref",1,0,'F'},
         {"user-tstv",1,0,'u'},
+        {"threads",1,0,9},
         {0,0,0,0}
     };
     while ((c = getopt_long(argc, argv, "hc:r:R:e:s:S:d:i:t:T:F:f:1u:vIE:",loptions,NULL)) >= 0) {
         switch (c) {
+            case  1 : args->af_bins_list = optarg; break;
+            case  2 : args->af_tag = optarg; break;
             case 'u': add_user_stats(args,optarg); break;
             case '1': args->first_allele_only = 1; break;
             case 'F': args->ref_fname = optarg; break;
@@ -1547,6 +1650,7 @@ int main_vcfstats(int argc, char *argv[])
             case 'I': args->split_by_id = 1; break;
             case 'e': args->filter_str = optarg; args->filter_logic |= FLT_EXCLUDE; break;
             case 'i': args->filter_str = optarg; args->filter_logic |= FLT_INCLUDE; break;
+            case  9 : args->n_threads = strtol(optarg, 0, 0); break;
             case 'h':
             case '?': usage();
             default: error("Unknown argument: %s\n", optarg);
@@ -1571,6 +1675,9 @@ int main_vcfstats(int argc, char *argv[])
         error("Failed to read the targets: %s\n", args->targets_list);
     if ( args->regions_list && bcf_sr_set_regions(args->files, args->regions_list, regions_is_file)<0 )
         error("Failed to read the regions: %s\n", args->regions_list);
+    if ( args->n_threads && bcf_sr_set_threads(args->files, args->n_threads)<0)
+        error("Failed to create threads\n");
+
     while (fname)
     {
         if ( !bcf_sr_add_reader(args->files, fname) )
diff --git a/bcftools/vcfstats.c.pysam.c b/bcftools/vcfstats.c.pysam.c
index 5653760..a5e5a9f 100644
--- a/bcftools/vcfstats.c.pysam.c
+++ b/bcftools/vcfstats.c.pysam.c
@@ -2,7 +2,7 @@
 
 /*  vcfstats.c -- Produces stats which can be plotted using plot-vcfstats.
 
-    Copyright (C) 2012-2015 Genome Research Ltd.
+    Copyright (C) 2012-2016 Genome Research Ltd.
 
     Author: Petr Danecek <pd3 at sanger.ac.uk>
 
@@ -41,6 +41,7 @@ THE SOFTWARE.  */
 #include <inttypes.h>
 #include "bcftools.h"
 #include "filter.h"
+#include "bin.h"
 
 // Logic of the filters: include or exclude sites which match the filters?
 #define FLT_INCLUDE 1
@@ -71,17 +72,6 @@ idist_t;
 
 typedef struct
 {
-    double x;
-    double x2;
-    double y;
-    double y2;
-    double xy;
-    double n;
-}
-smpl_r_t;
-
-typedef struct
-{
     int n_snps, n_indels, n_mnps, n_others, n_mals, n_snp_mals, n_records, n_noalts;
     int *af_ts, *af_tv, *af_snps;   // first bin of af_* stats are singletons
     #if HWE_STATS
@@ -110,9 +100,14 @@ stats_t;
 
 typedef struct
 {
-    uint64_t m[3], mm[3];        // number of hom, het and non-ref hom matches and mismatches
-    float r2sum;
-    uint32_t r2n;
+    uint64_t gt2gt[5][5];   // number of RR->RR, RR->RA, etc. matches/mismatches; see type2stats
+    /*
+        Pearson's R^2 is used for aggregate R^2 
+        y, yy .. sum of dosage and squared dosage in the query VCF (second file)
+        x, xx .. sum of squared dosage in the truth VCF (first file)
+        n     .. number of genotypes
+     */
+    double y, yy, x, xx, yx, n;
 }
 gtcmp_t;
 
@@ -137,7 +132,11 @@ typedef struct
     int *tmp_iaf, ntmp_iaf, m_af, m_qual, naf_hwe, mtmp_frm;
     uint8_t *tmp_frm;
     int dp_min, dp_max, dp_step;
-    gtcmp_t *af_gts_snps, *af_gts_indels, *smpl_gts_snps, *smpl_gts_indels; // first bin of af_* stats are singletons
+    gtcmp_t *smpl_gts_snps, *smpl_gts_indels;
+    gtcmp_t *af_gts_snps, *af_gts_indels; // first bin of af_* stats are singletons
+    bin_t *af_bins;
+    float *farr;
+    int mfarr;
 
     // indel context
     indel_ctx_t *indel_ctx;
@@ -150,21 +149,18 @@ typedef struct
     // other
     bcf_srs_t *files;
     bcf_sr_regions_t *exons;
-    char **argv, *exons_fname, *regions_list, *samples_list, *targets_list;
+    char **argv, *exons_fname, *regions_list, *samples_list, *targets_list, *af_bins_list, *af_tag;
     int argc, verbose_sites, first_allele_only, samples_is_file;
     int split_by_id, nstats;
 
     filter_t *filter[2];
     char *filter_str;
     int filter_logic;   // include or exclude sites which match the filters? One of FLT_INCLUDE/FLT_EXCLUDE
-
-    // Per Sample r working data arrays of size equal to number of samples
-    smpl_r_t* smpl_r_snps;
-    smpl_r_t* smpl_r_indels;
+    int n_threads;
 }
 args_t;
 
-static int type2dosage[6], type2ploidy[6], type2stats[6];
+static int type2dosage[6], type2ploidy[6], type2stats[7];
 
 static void idist_init(idist_t *d, int min, int max, int step)
 {
@@ -189,6 +185,12 @@ static inline int idist_i2bin(idist_t *d, int i)
     return i-1+d->min;
 }
 
+static inline int clip_nonnegative(float x, int limit)
+{
+    if (x >= limit || isnan(x)) return limit - 1;
+    else if (x <= 0.0) return 0;
+    else return (int) x;
+}
 
 #define IC_DBG 0
 #if IC_DBG
@@ -405,13 +407,30 @@ static void init_stats(args_t *args)
         args->filter[0] = filter_init(bcf_sr_get_header(args->files,0), args->filter_str);
         if ( args->files->nreaders==2 )
             args->filter[1] = filter_init(bcf_sr_get_header(args->files,1), args->filter_str);
+        args->files->max_unpack |= filter_max_unpack(args->filter[0]);
+    }
+
+    // AF corresponds to AC but is more robust to mixtures of haploid and diploid GTs
+    if ( !args->af_bins_list )
+    {
+        args->m_af = 101;
+        for (i=0; i<args->files->nreaders; i++)
+            if ( bcf_hdr_nsamples(args->files->readers[i].header) + 1> args->m_af )
+                args->m_af = bcf_hdr_nsamples(args->files->readers[i].header) + 1;
+    }
+    else
+    {
+        args->af_bins = bin_init(args->af_bins_list,0,1);
+    
+        // m_af is used also for other af arrays, where the first bin is for
+        // singletons. However, since the last element is unused in af_bins
+        // (n boundaries form n-1 intervals), the m_af count is good for both.
+        args->m_af = bin_get_size(args->af_bins);
     }
 
-    // AF corresponds to AC but is more robust for mixture of haploid and diploid GTs
-    args->m_af = 101;
-    for (i=0; i<args->files->nreaders; i++)
-        if ( bcf_hdr_nsamples(args->files->readers[i].header) + 1> args->m_af )
-            args->m_af = bcf_hdr_nsamples(args->files->readers[i].header) + 1;
+    bcf_hdr_t *hdr = bcf_sr_get_header(args->files,0);
+    if ( args->af_tag && !bcf_hdr_idinfo_exists(hdr,BCF_HL_INFO,bcf_hdr_id2int(hdr,BCF_DT_ID,args->af_tag)) )
+        error("No such INFO tag: %s\n", args->af_tag);
 
     #if QUAL_STATS
         args->m_qual = 999;
@@ -432,8 +451,6 @@ static void init_stats(args_t *args)
         args->af_gts_indels   = (gtcmp_t *) calloc(args->m_af,sizeof(gtcmp_t));
         args->smpl_gts_snps   = (gtcmp_t *) calloc(args->files->n_smpl,sizeof(gtcmp_t));
         args->smpl_gts_indels = (gtcmp_t *) calloc(args->files->n_smpl,sizeof(gtcmp_t));
-        args->smpl_r_snps = (smpl_r_t*) calloc(args->files->n_smpl, sizeof(smpl_r_t));
-        args->smpl_r_indels = (smpl_r_t*) calloc(args->files->n_smpl, sizeof(smpl_r_t));
     }
     for (i=0; i<args->nstats; i++)
     {
@@ -505,9 +522,10 @@ static void init_stats(args_t *args)
     type2stats[GT_HOM_RR] = 0;
     type2stats[GT_HET_RA] = 1;
     type2stats[GT_HOM_AA] = 2;
-    type2stats[GT_HET_AA] = 1;
+    type2stats[GT_HET_AA] = 3;
     type2stats[GT_HAPL_R] = 0;
     type2stats[GT_HAPL_A] = 2;
+    type2stats[GT_UNKN]   = 4;
 
 }
 static void destroy_stats(args_t *args)
@@ -528,7 +546,6 @@ static void destroy_stats(args_t *args)
             if (stats->qual_indels) free(stats->qual_indels);
         #endif
         #if HWE_STATS
-            //if ( args->files->n_smpl ) free(stats->af_hwe);
             free(stats->af_hwe);
         #endif
         free(stats->insertions);
@@ -556,6 +573,8 @@ static void destroy_stats(args_t *args)
         if ( args->exons ) free(stats->smpl_frm_shifts);
     }
     for (j=0; j<args->nusr; j++) free(args->usr[j].tag);
+    if ( args->af_bins ) bin_destroy(args->af_bins);
+    free(args->farr);
     free(args->usr);
     free(args->tmp_frm);
     free(args->tmp_iaf);
@@ -564,8 +583,6 @@ static void destroy_stats(args_t *args)
     free(args->af_gts_indels);
     free(args->smpl_gts_snps);
     free(args->smpl_gts_indels);
-    free(args->smpl_r_snps);
-    free(args->smpl_r_indels);
     if (args->indel_ctx) indel_ctx_destroy(args->indel_ctx);
     if (args->filter[0]) filter_destroy(args->filter[0]);
     if (args->filter[1]) filter_destroy(args->filter[1]);
@@ -574,36 +591,59 @@ static void destroy_stats(args_t *args)
 static void init_iaf(args_t *args, bcf_sr_t *reader)
 {
     bcf1_t *line = reader->buffer[0];
-    if ( args->ntmp_iaf < line->n_allele )
+    hts_expand(int32_t,line->n_allele,args->ntmp_iaf,args->tmp_iaf);
+
+    int i, ret;
+    if ( args->af_tag )
     {
-        args->tmp_iaf = (int*)realloc(args->tmp_iaf, line->n_allele*sizeof(int));
-        args->ntmp_iaf = line->n_allele;
+        ret = bcf_get_info_float(reader->header, line, args->af_tag, &args->farr, &args->mfarr);
+        if ( ret<=0 || ret!=line->n_allele-1 )
+        {
+            // the AF tag is not present or wrong number of values, put in the singletons/unknown bin
+            for (i=0; i<line->n_allele; i++) args->tmp_iaf[i] = 0;
+            return;
+        }
+        args->tmp_iaf[0] = 0;
+        for (i=1; i<line->n_allele; i++)
+        {
+            float af = args->farr[i-1];
+            if ( af<0 ) af = 0;
+            else if ( af>1 ) af = 1;
+            int iaf = args->af_bins ? bin_get_idx(args->af_bins,af) : af*(args->m_af-2);
+            args->tmp_iaf[i] = iaf + 1;     // the first tmp_iaf bin is reserved for singletons
+        }
+        return;
     }
+
     // tmp_iaf is first filled with AC counts in calc_ac and then transformed to
     //  an index to af_gts_snps
-    int i, ret = bcf_calc_ac(reader->header, line, args->tmp_iaf, args->samples_list ? BCF_UN_INFO|BCF_UN_FMT : BCF_UN_INFO);
-    if ( ret )
+    ret = bcf_calc_ac(reader->header, line, args->tmp_iaf, args->samples_list ? BCF_UN_INFO|BCF_UN_FMT : BCF_UN_INFO);
+    if ( !ret )
     {
-        int an=0;
-        for (i=0; i<line->n_allele; i++)
-            an += args->tmp_iaf[i];
+        for (i=0; i<line->n_allele; i++) args->tmp_iaf[i] = 0;      // singletons/unknown bin
+        return;
+    }
 
-        args->tmp_iaf[0] = 0;
-        for (i=1; i<line->n_allele; i++)
+    int an = 0;
+    for (i=0; i<line->n_allele; i++)
+        an += args->tmp_iaf[i];
+
+    args->tmp_iaf[0] = 0;
+    for (i=1; i<line->n_allele; i++)
+    {
+        if ( args->tmp_iaf[i]==1 )
+            args->tmp_iaf[i] = 0;   // singletons into the first bin
+        else if ( !an )
+            args->tmp_iaf[i] = 1;   // no genotype at all, put to the AF=0 bin
+        else
         {
-            if ( args->tmp_iaf[i]==1 )
-                args->tmp_iaf[i] = 0; // singletons into the first bin
-            else if ( !an )
-                args->tmp_iaf[i] = 1;   // no genotype at all, put to the AF=0 bin
-            else
-                args->tmp_iaf[i] = 1 + args->tmp_iaf[i] * (args->m_af-2.0) / an;
+            float af = (float) args->tmp_iaf[i] / an;
+            if ( af<0 ) af = 0;
+            else if ( af>1 ) af = 1;
+            int iaf = args->af_bins ? bin_get_idx(args->af_bins,af) : af*(args->m_af-2);
+            args->tmp_iaf[i] = iaf + 1;
         }
     }
-    else
-        for (i=0; i<line->n_allele; i++)
-            args->tmp_iaf[i] = 0;
-
-    // todo: otherwise use AF
 }
 
 static inline void do_mnp_stats(args_t *args, stats_t *stats, bcf_sr_t *reader)
@@ -623,7 +663,7 @@ static void do_indel_stats(args_t *args, stats_t *stats, bcf_sr_t *reader)
     bcf1_t *line = reader->buffer[0];
 
     #if QUAL_STATS
-        int iqual = line->qual >= args->m_qual || isnan(line->qual) ? args->m_qual - 1 : line->qual;
+        int iqual = clip_nonnegative(line->qual, args->m_qual);
         stats->qual_indels[iqual]++;
     #endif
 
@@ -758,7 +798,7 @@ static void do_snp_stats(args_t *args, stats_t *stats, bcf_sr_t *reader)
     if ( ref<0 ) return;
 
     #if QUAL_STATS
-        int iqual = line->qual >= args->m_qual || isnan(line->qual) ? args->m_qual - 1 : line->qual;
+        int iqual = clip_nonnegative(line->qual, args->m_qual);
         stats->qual_snps[iqual]++;
     #endif
 
@@ -875,6 +915,7 @@ static void do_sample_stats(args_t *args, stats_t *stats, bcf_sr_t *reader, int
         {
             float het_frac = (float)nhet_tot/(nhet_tot + nref_tot + nalt_tot);
             int idx = het_frac*(args->naf_hwe - 1);
+//check me: what is this?
             if ( line->n_allele>1 ) idx += args->naf_hwe*args->tmp_iaf[1];
             stats->af_hwe[idx]++;
         }
@@ -913,88 +954,42 @@ static void do_sample_stats(args_t *args, stats_t *stats, bcf_sr_t *reader, int
         fmt1 = bcf_get_fmt(files->readers[1].header,files->readers[1].buffer[0],"GT"); if ( !fmt1 ) return;
 
         // only the first ALT allele is considered
-        int iaf = line->n_allele>1 ? args->tmp_iaf[1] : 1;
+        int iaf = args->tmp_iaf[1];
         int line_type = bcf_get_variant_types(files->readers[0].buffer[0]);
         gtcmp_t *af_stats = line_type&VCF_SNP ? args->af_gts_snps : args->af_gts_indels;
         gtcmp_t *smpl_stats = line_type&VCF_SNP ? args->smpl_gts_snps : args->smpl_gts_indels;
 
-        //
-        // Calculates r squared
-        // x is mean dosage of x at given site
-        // x2 is mean squared dosage of x at given site
-        // y is mean dosage of x at given site
-        // y2 is mean squared dosage of x at given site
-        // xy is mean dosage of x*y at given site
-        // r2sum += (xy - x*y)^2 / ( (x2 - x^2) * (y2 - y^2) )
-        // r2n is number of sites considered
-        // output as r2sum/r2n for each AF bin
-        int r2n = 0;
-        float x = 0, y = 0, xy = 0, x2 = 0, y2 = 0;
-        // Select smpl_r
-        smpl_r_t *smpl_r = NULL;
-        if (line_type&VCF_SNP)
-        {
-            smpl_r = args->smpl_r_snps;
-        }
-        else if (line_type&VCF_INDEL)
-        {
-            smpl_r = args->smpl_r_indels;
-        }
         for (is=0; is<files->n_smpl; is++)
         {
             // Simplified comparison: only 0/0, 0/1, 1/1 is looked at as the identity of
             //  actual alleles can be enforced by running without the -c option.
             int gt0 = bcf_gt_type(fmt0, files->readers[0].samples[is], NULL, NULL);
-            if ( gt0 == GT_UNKN ) continue;
-
             int gt1 = bcf_gt_type(fmt1, files->readers[1].samples[is], NULL, NULL);
-            if ( gt1 == GT_UNKN ) continue;
 
-            if ( type2ploidy[gt0]*type2ploidy[gt1] == -1 ) continue;   // cannot compare diploid and haploid genotypes
+            int idx0 = type2stats[gt0];
+            int idx1 = type2stats[gt1];
+            af_stats[iaf].gt2gt[idx0][idx1]++;
+            smpl_stats[is].gt2gt[idx0][idx1]++;
 
-            int dsg0 = type2dosage[gt0];
-            int dsg1 = type2dosage[gt1];
-            x   += dsg0;
-            x2  += dsg0*dsg0;
-            y   += dsg1;
-            y2  += dsg1*dsg1;
-            xy  += dsg0*dsg1;
-            r2n++;
-
-            int idx = type2stats[gt0];
-            if ( gt0==gt1 )
-            {
-                af_stats[iaf].m[idx]++;
-                smpl_stats[is].m[idx]++;
-            }
-            else
-            {
-                af_stats[iaf].mm[idx]++;
-                smpl_stats[is].mm[idx]++;
-            }
-
-            // Now do it across samples
+            if ( gt0 == GT_UNKN || gt1 == GT_UNKN ) continue;
+            if ( type2ploidy[gt0]*type2ploidy[gt1] == -1 ) continue;   // cannot compare diploid and haploid genotypes
 
-            if (smpl_r) {
-                smpl_r[is].xy += dsg0*dsg1;
-                smpl_r[is].x += dsg0;
-                smpl_r[is].x2 += dsg0*dsg0;
-                smpl_r[is].y += dsg1;
-                smpl_r[is].y2 += dsg1*dsg1;
-                ++(smpl_r[is].n);
-            }
-        }
-
-        if ( r2n )
-        {
-            x /= r2n; y /= r2n; x2 /= r2n; y2 /= r2n; xy /= r2n;
-            float cov  = xy - x*y;
-            float var2 = (x2 - x*x) * (y2 - y*y);
-            if ( var2!=0 )
-            {
-                af_stats[iaf].r2sum += cov*cov/var2;
-                af_stats[iaf].r2n++;
-            }
+            float y = type2dosage[gt0];
+            float x = type2dosage[gt1];
+
+            smpl_stats[is].yx += y*x;
+            smpl_stats[is].x  += x;
+            smpl_stats[is].xx += x*x;
+            smpl_stats[is].y  += y;
+            smpl_stats[is].yy += y*y;
+            smpl_stats[is].n  += 1;
+
+            af_stats[iaf].yx += y*x;
+            af_stats[iaf].x  += x;
+            af_stats[iaf].xx += x*x;
+            af_stats[iaf].y  += y;
+            af_stats[iaf].yy += y*y;
+            af_stats[iaf].n  += 1;
         }
 
         if ( args->verbose_sites )
@@ -1131,7 +1126,7 @@ static void print_header(args_t *args)
 #define T2S(x) type2stats[x]
 static void print_stats(args_t *args)
 {
-    int i, id;
+    int i, j,k, id;
     fprintf(pysam_stdout, "# SN, Summary numbers:\n# SN\t[2]id\t[3]key\t[4]value\n");
     for (id=0; id<args->files->nreaders; id++)
         fprintf(pysam_stdout, "SN\t%d\tnumber of samples:\t%d\n", id, bcf_hdr_nsamples(args->files->readers[id].header));
@@ -1204,6 +1199,24 @@ static void print_stats(args_t *args)
         stats->af_repeats[1][1] += stats->af_repeats[1][0];
         stats->af_repeats[2][1] += stats->af_repeats[2][0];
     }
+    // move the singletons stats into the first AF bin, singleton stats was collected separately because of init_iaf
+    if ( args->af_gts_snps )
+    {
+        args->af_gts_snps[1].y    += args->af_gts_snps[0].y;
+        args->af_gts_snps[1].yy   += args->af_gts_snps[0].yy;
+        args->af_gts_snps[1].xx   += args->af_gts_snps[0].xx;
+        args->af_gts_snps[1].yx   += args->af_gts_snps[0].yx;
+        args->af_gts_snps[1].n    += args->af_gts_snps[0].n;
+    }
+    if ( args->af_gts_indels )
+    {
+        args->af_gts_indels[1].y  += args->af_gts_indels[0].y;
+        args->af_gts_indels[1].yy += args->af_gts_indels[0].yy;
+        args->af_gts_indels[1].xx += args->af_gts_indels[0].xx;
+        args->af_gts_indels[1].yx += args->af_gts_indels[0].yx;
+        args->af_gts_indels[1].n  += args->af_gts_indels[0].n;
+    }
+
     fprintf(pysam_stdout, "# AF, Stats by non-reference allele frequency:\n# AF\t[2]id\t[3]allele frequency\t[4]number of SNPs\t[5]number of transitions\t[6]number of transversions\t[7]number of indels\t[8]repeat-consistent\t[9]repeat-inconsistent\t[10]not applicable\n");
     for (id=0; id<args->nstats; id++)
     {
@@ -1211,7 +1224,8 @@ static void print_stats(args_t *args)
         for (i=1; i<args->m_af; i++) // note that af[1] now contains also af[0], see SiS stats output above
         {
             if ( stats->af_snps[i]+stats->af_ts[i]+stats->af_tv[i]+stats->af_repeats[0][i]+stats->af_repeats[1][i]+stats->af_repeats[2][i] == 0  ) continue;
-            fprintf(pysam_stdout, "AF\t%d\t%f\t%d\t%d\t%d\t%d\t%d\t%d\t%d\n", id,100.*(i-1)/(args->m_af-1),stats->af_snps[i],stats->af_ts[i],stats->af_tv[i],
+            double af = args->af_bins ? (bin_get_value(args->af_bins,i)+bin_get_value(args->af_bins,i-1))*0.5 : (double)(i-1)/(args->m_af-1);
+            fprintf(pysam_stdout, "AF\t%d\t%f\t%d\t%d\t%d\t%d\t%d\t%d\t%d\n", id,af,stats->af_snps[i],stats->af_ts[i],stats->af_tv[i],
                 stats->af_repeats[0][i]+stats->af_repeats[1][i]+stats->af_repeats[2][i],stats->af_repeats[0][i],stats->af_repeats[1][i],stats->af_repeats[2][i]);
         }
     }
@@ -1268,34 +1282,56 @@ static void print_stats(args_t *args)
         fprintf(pysam_stdout, "SN\t%d\tnumber of samples:\t%d\n", 2, args->files->n_smpl);
 
         int x;
-        for (x=0; x<2; x++)
+        for (x=0; x<2; x++)     // x=0: snps, x=1: indels
         {
             gtcmp_t *stats;
             if ( x==0 )
             {
-                fprintf(pysam_stdout, "# GCsAF, Genotype concordance by non-reference allele frequency (SNPs)\n# GCsAF\t[2]id\t[3]allele frequency\t[4]RR Hom matches\t[5]RA Het matches\t[6]AA Hom matches\t[7]RR Hom mismatches\t[8]RA Het mismatches\t[9]AA Hom mismatches\t[10]dosage r-squared\t[11]number of sites\n");
+                fprintf(pysam_stdout, "# GCsAF, Genotype concordance by non-reference allele frequency (SNPs)\n# GCsAF\t[2]id\t[3]allele frequency\t[4]RR Hom matches\t[5]RA Het matches\t[6]AA Hom matches\t[7]RR Hom mismatches\t[8]RA Het mismatches\t[9]AA Hom mismatches\t[10]dosage r-squared\t[11]number of genotypes\n");
                 stats = args->af_gts_snps;
             }
             else
             {
-                fprintf(pysam_stdout, "# GCiAF, Genotype concordance by non-reference allele frequency (indels)\n# GCiAF\t[2]id\t[3]allele frequency\t[4]RR Hom matches\t[5]RA Het matches\t[6]AA Hom matches\t[7]RR Hom mismatches\t[8]RA Het mismatches\t[9]AA Hom mismatches\t[10]dosage r-squared\t[11]number of sites\n");
+                fprintf(pysam_stdout, "# GCiAF, Genotype concordance by non-reference allele frequency (indels)\n# GCiAF\t[2]id\t[3]allele frequency\t[4]RR Hom matches\t[5]RA Het matches\t[6]AA Hom matches\t[7]RR Hom mismatches\t[8]RA Het mismatches\t[9]AA Hom mismatches\t[10]dosage r-squared\t[11]number of genotypes\n");
                 stats = args->af_gts_indels;
             }
-            uint64_t nrd_m[3] = {0,0,0}, nrd_mm[3] = {0,0,0};
+            uint64_t nrd_m[4] = {0,0,0,0}, nrd_mm[4] = {0,0,0,0};   // across all bins
             for (i=0; i<args->m_af; i++)
             {
-                int j, n = 0;
-                for (j=0; j<3; j++)
+                int n = 0;
+                uint64_t m[4] = {0,0,0,0}, mm[4] = {0,0,0,0};    // in i-th AF bin
+                for (j=0; j<4; j++)     // rr, ra, aa hom, aa het, ./.
+                    for (k=0; k<4; k++)
+                    {
+                        n += stats[i].gt2gt[j][k];
+                        if ( j==k ) 
+                        {
+                            nrd_m[j] += stats[i].gt2gt[j][k];
+                            m[j]     += stats[i].gt2gt[j][k];
+                        }
+                        else
+                        {
+                            nrd_mm[j] += stats[i].gt2gt[j][k];
+                            mm[j]     += stats[i].gt2gt[j][k];
+                        }
+                    }
+                if ( !i || !n ) continue;   // skip singleton stats and empty bins
+
+                // Pearson's r2
+                double r2 = 0;
+                if ( stats[i].n )
                 {
-                    n += stats[i].m[j] + stats[i].mm[j];
-                    nrd_m[j]  += stats[i].m[j];
-                    nrd_mm[j] += stats[i].mm[j];
+                    r2  = (stats[i].yx - stats[i].x*stats[i].y/stats[i].n);
+                    r2 /= sqrt((stats[i].xx - stats[i].x*stats[i].x/stats[i].n) * (stats[i].yy - stats[i].y*stats[i].y/stats[i].n));
+                    r2 *= r2;
                 }
-                if ( !i || !n ) continue;   // skip singleton stats and empty bins
-                fprintf(pysam_stdout, "GC%cAF\t2\t%f", x==0 ? 's' : 'i', 100.*(i-1)/(args->m_af-1));
-                fprintf(pysam_stdout, "\t%"PRId64"\t%"PRId64"\t%"PRId64"", stats[i].m[T2S(GT_HOM_RR)],stats[i].m[T2S(GT_HET_RA)],stats[i].m[T2S(GT_HOM_AA)]);
-                fprintf(pysam_stdout, "\t%"PRId64"\t%"PRId64"\t%"PRId64"", stats[i].mm[T2S(GT_HOM_RR)],stats[i].mm[T2S(GT_HET_RA)],stats[i].mm[T2S(GT_HOM_AA)]);
-                fprintf(pysam_stdout, "\t%f\t%"PRId32"\n", stats[i].r2n ? stats[i].r2sum/stats[i].r2n : -1.0, stats[i].r2n);
+                double af = args->af_bins ? (bin_get_value(args->af_bins,i)+bin_get_value(args->af_bins,i-1))*0.5 : (double)(i-1)/(args->m_af-1);
+                fprintf(pysam_stdout, "GC%cAF\t2\t%f", x==0 ? 's' : 'i', af);
+                fprintf(pysam_stdout, "\t%"PRId64"\t%"PRId64"\t%"PRId64"", m[T2S(GT_HOM_RR)],m[T2S(GT_HET_RA)],m[T2S(GT_HOM_AA)]);
+                fprintf(pysam_stdout, "\t%"PRId64"\t%"PRId64"\t%"PRId64"", mm[T2S(GT_HOM_RR)],mm[T2S(GT_HET_RA)],mm[T2S(GT_HOM_AA)]);
+                if ( stats[i].n && !isnan(r2) ) fprintf(pysam_stdout, "\t%f", r2);
+                else fprintf(pysam_stdout, "\t"NA_STRING);
+                fprintf(pysam_stdout, "\t%.0f\n", stats[i].n);
             }
 
             if ( x==0 )
@@ -1311,8 +1347,8 @@ static void print_stats(args_t *args)
             }
             else
                 fprintf(pysam_stdout, "# Non-Reference Discordance (NRD), indels\n# NRDi\t[2]id\t[3]NRD\t[4]Ref/Ref discordance\t[5]Ref/Alt discordance\t[6]Alt/Alt discordance\n");
-            uint64_t m  = nrd_m[T2S(GT_HET_RA)] + nrd_m[T2S(GT_HOM_AA)];
-            uint64_t mm = nrd_mm[T2S(GT_HOM_RR)] + nrd_mm[T2S(GT_HET_RA)] + nrd_mm[T2S(GT_HOM_AA)];
+            uint64_t m  = nrd_m[T2S(GT_HET_RA)] + nrd_m[T2S(GT_HOM_AA)] + nrd_m[T2S(GT_HET_AA)];
+            uint64_t mm = nrd_mm[T2S(GT_HOM_RR)] + nrd_mm[T2S(GT_HET_RA)] + nrd_mm[T2S(GT_HOM_AA)] + nrd_mm[T2S(GT_HET_AA)];
             fprintf(pysam_stdout, "NRD%c\t2\t%f\t%f\t%f\t%f\n", x==0 ? 's' : 'i',
                     m+mm ? mm*100.0/(m+mm) : 0,
                     nrd_m[T2S(GT_HOM_RR)]+nrd_mm[T2S(GT_HOM_RR)] ? nrd_mm[T2S(GT_HOM_RR)]*100.0/(nrd_m[T2S(GT_HOM_RR)]+nrd_mm[T2S(GT_HOM_RR)]) : 0,
@@ -1321,42 +1357,99 @@ static void print_stats(args_t *args)
                   );
         }
 
-        for (x=0; x<2; x++)
+        for (x=0; x<2; x++) // x=0: snps, x=1: indels
         {
             gtcmp_t *stats;
-            smpl_r_t *smpl_r_array;
             if ( x==0 )
             {
                 fprintf(pysam_stdout, "# GCsS, Genotype concordance by sample (SNPs)\n# GCsS\t[2]id\t[3]sample\t[4]non-reference discordance rate\t[5]RR Hom matches\t[6]RA Het matches\t[7]AA Hom matches\t[8]RR Hom mismatches\t[9]RA Het mismatches\t[10]AA Hom mismatches\t[11]dosage r-squared\n");
                 stats = args->smpl_gts_snps;
-                smpl_r_array = args->smpl_r_snps;
             }
             else
             {
                 fprintf(pysam_stdout, "# GCiS, Genotype concordance by sample (indels)\n# GCiS\t[2]id\t[3]sample\t[4]non-reference discordance rate\t[5]RR Hom matches\t[6]RA Het matches\t[7]AA Hom matches\t[8]RR Hom mismatches\t[9]RA Het mismatches\t[10]AA Hom mismatches\t[11]dosage r-squared\n");
                 stats = args->smpl_gts_indels;
-                smpl_r_array = args->smpl_r_indels;
             }
             for (i=0; i<args->files->n_smpl; i++)
             {
-                uint64_t m  = stats[i].m[T2S(GT_HET_RA)] + stats[i].m[T2S(GT_HOM_AA)];
-                uint64_t mm = stats[i].mm[T2S(GT_HOM_RR)] + stats[i].mm[T2S(GT_HET_RA)] + stats[i].mm[T2S(GT_HOM_AA)];
-                // Calculate r by formula 19.2 - Biostatistical Analysis 4th edition - Jerrold H. Zar
-                smpl_r_t *smpl_r = smpl_r_array + i;
-                double r = 0.0;
-                if (smpl_r->n) {
-                    double sum_crossprod = smpl_r->xy-(smpl_r->x*smpl_r->y)/smpl_r->n;//per 17.3 machine formula
-                    double x2_xx = smpl_r->x2-(smpl_r->x*smpl_r->x)/smpl_r->n;
-                    double y2_yy = smpl_r->y2-(smpl_r->y*smpl_r->y)/smpl_r->n;
-                    r = (sum_crossprod)/sqrt(x2_xx*y2_yy);
+                uint64_t mm = 0, m = stats[i].gt2gt[T2S(GT_HET_RA)][T2S(GT_HET_RA)] + stats[i].gt2gt[T2S(GT_HOM_AA)][T2S(GT_HOM_AA)];
+                for (j=0; j<3; j++)
+                    for (k=0; k<3; k++)
+                        if ( j!=k ) mm += stats[i].gt2gt[j][k];
+
+                // Pearson's r2
+                double r2 = 0;
+                if ( stats[i].n )
+                {
+                    r2  = (stats[i].yx - stats[i].x*stats[i].y/stats[i].n);
+                    r2 /= sqrt((stats[i].xx - stats[i].x*stats[i].x/stats[i].n) * (stats[i].yy - stats[i].y*stats[i].y/stats[i].n));
+                    r2 *= r2;
                 }
                 fprintf(pysam_stdout, "GC%cS\t2\t%s\t%.3f",  x==0 ? 's' : 'i', args->files->samples[i], m+mm ? mm*100.0/(m+mm) : 0);
-                fprintf(pysam_stdout, "\t%"PRId64"\t%"PRId64"\t%"PRId64"", stats[i].m[T2S(GT_HOM_RR)],stats[i].m[T2S(GT_HET_RA)],stats[i].m[T2S(GT_HOM_AA)]);
-                fprintf(pysam_stdout, "\t%"PRId64"\t%"PRId64"\t%"PRId64"", stats[i].mm[T2S(GT_HOM_RR)],stats[i].mm[T2S(GT_HET_RA)],stats[i].mm[T2S(GT_HOM_AA)]);
-                if (smpl_r->n && !isnan(r)) fprintf(pysam_stdout, "\t%f\n", r*r);
+                fprintf(pysam_stdout, "\t%"PRId64"\t%"PRId64"\t%"PRId64"", 
+                    stats[i].gt2gt[T2S(GT_HOM_RR)][T2S(GT_HOM_RR)],
+                    stats[i].gt2gt[T2S(GT_HET_RA)][T2S(GT_HET_RA)],
+                    stats[i].gt2gt[T2S(GT_HOM_AA)][T2S(GT_HOM_AA)]);
+                fprintf(pysam_stdout, "\t%"PRId64"\t%"PRId64"\t%"PRId64"",
+                    stats[i].gt2gt[T2S(GT_HOM_RR)][T2S(GT_HET_RA)] + stats[i].gt2gt[T2S(GT_HOM_RR)][T2S(GT_HOM_AA)],
+                    stats[i].gt2gt[T2S(GT_HET_RA)][T2S(GT_HOM_RR)] + stats[i].gt2gt[T2S(GT_HET_RA)][T2S(GT_HOM_AA)],
+                    stats[i].gt2gt[T2S(GT_HOM_AA)][T2S(GT_HOM_RR)] + stats[i].gt2gt[T2S(GT_HOM_AA)][T2S(GT_HET_RA)]);
+                if ( stats[i].n && !isnan(r2) ) fprintf(pysam_stdout, "\t%f\n", r2);
                 else fprintf(pysam_stdout, "\t"NA_STRING"\n");
             }
         }
+        for (x=0; x<2; x++) // x=0: snps, x=1: indels
+        {
+                //printf("# GCiS, Genotype concordance by sample (indels)\n# GCiS\t[2]id\t[3]sample\t[4]non-reference discordance rate\t[5]RR Hom matches\t[6]RA Het matches\t[7]AA Hom matches\t[8]RR Hom mismatches\t[9]RA Het mismatches\t[10]AA Hom mismatches\t[11]dosage r-squared\n");
+
+            gtcmp_t *stats;
+            if ( x==0 )
+            {
+                fprintf(pysam_stdout, "# GCTs, Genotype concordance table (SNPs)\n# GCTs");
+                stats = args->smpl_gts_snps;
+            }
+            else
+            {
+                fprintf(pysam_stdout, "# GCTi, Genotype concordance table (indels)\n# GCTi");
+                stats = args->smpl_gts_indels;
+            }
+            i = 1;
+            fprintf(pysam_stdout, "\t[%d]sample", ++i);
+            fprintf(pysam_stdout, "\t[%d]RR Hom -> RR Hom", ++i);
+            fprintf(pysam_stdout, "\t[%d]RR Hom -> RA Het", ++i);
+            fprintf(pysam_stdout, "\t[%d]RR Hom -> AA Hom", ++i);
+            fprintf(pysam_stdout, "\t[%d]RR Hom -> AA Het", ++i);
+            fprintf(pysam_stdout, "\t[%d]RR Hom -> missing", ++i);
+            fprintf(pysam_stdout, "\t[%d]RA Het -> RR Hom", ++i);
+            fprintf(pysam_stdout, "\t[%d]RA Het -> RA Het", ++i);
+            fprintf(pysam_stdout, "\t[%d]RA Het -> AA Hom", ++i);
+            fprintf(pysam_stdout, "\t[%d]RA Het -> AA Het", ++i);
+            fprintf(pysam_stdout, "\t[%d]RA Het -> missing", ++i);
+            fprintf(pysam_stdout, "\t[%d]AA Hom -> RR Hom", ++i);
+            fprintf(pysam_stdout, "\t[%d]AA Hom -> RA Het", ++i);
+            fprintf(pysam_stdout, "\t[%d]AA Hom -> AA Hom", ++i);
+            fprintf(pysam_stdout, "\t[%d]AA Hom -> AA Het", ++i);
+            fprintf(pysam_stdout, "\t[%d]AA Hom -> missing", ++i);
+            fprintf(pysam_stdout, "\t[%d]AA Het -> RR Hom", ++i);
+            fprintf(pysam_stdout, "\t[%d]AA Het -> RA Het", ++i);
+            fprintf(pysam_stdout, "\t[%d]AA Het -> AA Hom", ++i);
+            fprintf(pysam_stdout, "\t[%d]AA Het -> AA Het", ++i);
+            fprintf(pysam_stdout, "\t[%d]AA Het -> missing", ++i);
+            fprintf(pysam_stdout, "\t[%d]missing -> RR Hom", ++i);
+            fprintf(pysam_stdout, "\t[%d]missing -> RA Het", ++i);
+            fprintf(pysam_stdout, "\t[%d]missing -> AA Hom", ++i);
+            fprintf(pysam_stdout, "\t[%d]missing -> AA Het", ++i);
+            fprintf(pysam_stdout, "\t[%d]missing -> missing\n", ++i);
+
+            for (i=0; i<args->files->n_smpl; i++)
+            {
+                fprintf(pysam_stdout, "GCT%c\t%s",  x==0 ? 's' : 'i', args->files->samples[i]);
+                for (j=0; j<5; j++)
+                    for (k=0; k<5; k++)
+                        fprintf(pysam_stdout, "\t%"PRId64, stats[i].gt2gt[j][k]);
+                fprintf(pysam_stdout, "\n");
+            }
+        }
     }
 
     fprintf(pysam_stdout, "# DP, Depth distribution\n# DP\t[2]id\t[3]bin\t[4]number of genotypes\t[5]fraction of genotypes (%%)\t[6]number of sites\t[7]fraction of sites (%%)\n");
@@ -1425,8 +1518,10 @@ static void print_stats(args_t *args)
                 for (j=0; j<args->naf_hwe; j++) sum_tot += ptr[j];
                 if ( !sum_tot ) continue;
 
+                double af = args->af_bins ? (bin_get_value(args->af_bins,i)+bin_get_value(args->af_bins,i-1))*0.5 : (double)(i-1)/(args->m_af-1);
+
                 int nprn = 3;
-                fprintf(pysam_stdout, "HWE\t%d\t%f\t%d",id,100.*(i-1)/(args->m_af-1),sum_tot);
+                fprintf(pysam_stdout, "HWE\t%d\t%f\t%d",id,af,sum_tot);
                 for (j=0; j<args->naf_hwe; j++)
                 {
                     sum_tmp += ptr[j];
@@ -1464,6 +1559,8 @@ static void usage(void)
     fprintf(pysam_stderr, "Usage:   bcftools stats [options] <A.vcf.gz> [<B.vcf.gz>]\n");
     fprintf(pysam_stderr, "\n");
     fprintf(pysam_stderr, "Options:\n");
+    fprintf(pysam_stderr, "        --af-bins <list>               allele frequency bins, a list (0.1,0.5,1) or a file (0.1\\n0.5\\n1)\n");
+    fprintf(pysam_stderr, "        --af-tag <string>              allele frequency tag to use, by default estimated from AN,AC or GT\n");
     fprintf(pysam_stderr, "    -1, --1st-allele-only              include only 1st allele at multiallelic sites\n");
     fprintf(pysam_stderr, "    -c, --collapse <string>            treat as identical records with <snps|indels|both|all|some|none>, see man page for details [none]\n");
     fprintf(pysam_stderr, "    -d, --depth <int,int,int>          depth distribution: min,max,bin size [0,500,1]\n");
@@ -1480,6 +1577,7 @@ static void usage(void)
     fprintf(pysam_stderr, "    -t, --targets <region>             similar to -r but streams rather than index-jumps\n");
     fprintf(pysam_stderr, "    -T, --targets-file <file>          similar to -R but streams rather than index-jumps\n");
     fprintf(pysam_stderr, "    -u, --user-tstv <TAG[:min:max:n]>  collect Ts/Tv stats for any tag using the given binning [0:1:100]\n");
+    fprintf(pysam_stderr, "        --threads <int>                number of extra decompression threads [0]\n");
     fprintf(pysam_stderr, "    -v, --verbose                      produce verbose per-site and per-sample output\n");
     fprintf(pysam_stderr, "\n");
     exit(1);
@@ -1496,6 +1594,8 @@ int main_vcfstats(int argc, char *argv[])
 
     static struct option loptions[] =
     {
+        {"af-bins",1,0,1},
+        {"af-tag",1,0,2},
         {"1st-allele-only",0,0,'1'},
         {"include",1,0,'i'},
         {"exclude",1,0,'e'},
@@ -1514,10 +1614,13 @@ int main_vcfstats(int argc, char *argv[])
         {"targets-file",1,0,'T'},
         {"fasta-ref",1,0,'F'},
         {"user-tstv",1,0,'u'},
+        {"threads",1,0,9},
         {0,0,0,0}
     };
     while ((c = getopt_long(argc, argv, "hc:r:R:e:s:S:d:i:t:T:F:f:1u:vIE:",loptions,NULL)) >= 0) {
         switch (c) {
+            case  1 : args->af_bins_list = optarg; break;
+            case  2 : args->af_tag = optarg; break;
             case 'u': add_user_stats(args,optarg); break;
             case '1': args->first_allele_only = 1; break;
             case 'F': args->ref_fname = optarg; break;
@@ -1549,6 +1652,7 @@ int main_vcfstats(int argc, char *argv[])
             case 'I': args->split_by_id = 1; break;
             case 'e': args->filter_str = optarg; args->filter_logic |= FLT_EXCLUDE; break;
             case 'i': args->filter_str = optarg; args->filter_logic |= FLT_INCLUDE; break;
+            case  9 : args->n_threads = strtol(optarg, 0, 0); break;
             case 'h':
             case '?': usage();
             default: error("Unknown argument: %s\n", optarg);
@@ -1573,6 +1677,9 @@ int main_vcfstats(int argc, char *argv[])
         error("Failed to read the targets: %s\n", args->targets_list);
     if ( args->regions_list && bcf_sr_set_regions(args->files, args->regions_list, regions_is_file)<0 )
         error("Failed to read the regions: %s\n", args->regions_list);
+    if ( args->n_threads && bcf_sr_set_threads(args->files, args->n_threads)<0)
+        error("Failed to create threads\n");
+
     while (fname)
     {
         if ( !bcf_sr_add_reader(args->files, fname) )
diff --git a/bcftools/vcfview.c b/bcftools/vcfview.c
index c14075d..645cc8a 100644
--- a/bcftools/vcfview.c
+++ b/bcftools/vcfview.c
@@ -23,6 +23,7 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 THE SOFTWARE.  */
 
 #include <stdio.h>
+#include <strings.h>
 #include <unistd.h>
 #include <getopt.h>
 #include <ctype.h>
@@ -181,10 +182,12 @@ static void init_data(args_t *args)
         if (args->include_types) {
             args->include = 0;
             for (i = 0; i < n; ++i) {
-                if (strcmp(type_list[i], "snps") == 0) args->include |= VCF_SNP;
-                else if (strcmp(type_list[i], "indels") == 0) args->include |= VCF_INDEL;
-                else if (strcmp(type_list[i], "mnps") == 0) args->include |= VCF_MNP;
-                else if (strcmp(type_list[i], "other") == 0) args->include |= VCF_OTHER;
+                if (strcmp(type_list[i], "snps") == 0) args->include |= VCF_SNP<<1;
+                else if (strcmp(type_list[i], "indels") == 0) args->include |= VCF_INDEL<<1;
+                else if (strcmp(type_list[i], "mnps") == 0) args->include |= VCF_MNP<<1;
+                else if (strcmp(type_list[i], "other") == 0) args->include |= VCF_OTHER<<1;
+                else if (strcmp(type_list[i], "ref") == 0) args->include |= VCF_OTHER<<1;
+                else if (strcmp(type_list[i], "bnd") == 0) args->include |= VCF_BND<<1;
                 else {
                     fprintf(stderr, "[E::%s] unknown type\n", type_list[i]);
                     fprintf(stderr, "Accepted types are snps, indels, mnps, other\n");
@@ -195,10 +198,12 @@ static void init_data(args_t *args)
         if (args->exclude_types) {
             args->exclude = 0;
             for (i = 0; i < n; ++i) {
-                if (strcmp(type_list[i], "snps") == 0) args->exclude |= VCF_SNP;
-                else if (strcmp(type_list[i], "indels") == 0) args->exclude |= VCF_INDEL;
-                else if (strcmp(type_list[i], "mnps") == 0) args->exclude |= VCF_MNP;
-                else if (strcmp(type_list[i], "other") == 0) args->exclude |= VCF_OTHER;
+                if (strcmp(type_list[i], "snps") == 0) args->exclude |= VCF_SNP<<1;
+                else if (strcmp(type_list[i], "indels") == 0) args->exclude |= VCF_INDEL<<1;
+                else if (strcmp(type_list[i], "mnps") == 0) args->exclude |= VCF_MNP<<1;
+                else if (strcmp(type_list[i], "other") == 0) args->exclude |= VCF_OTHER<<1;
+                else if (strcmp(type_list[i], "ref") == 0) args->exclude |= VCF_OTHER<<1;
+                else if (strcmp(type_list[i], "bnd") == 0) args->exclude |= VCF_BND<<1;
                 else {
                     fprintf(stderr, "[E::%s] unknown type\n", type_list[i]);
                     fprintf(stderr, "Accepted types are snps, indels, mnps, other\n");
@@ -220,7 +225,8 @@ static void init_data(args_t *args)
     else if (args->output_type & FT_GZ) strcat(modew,"z");      // compressed VCF
     args->out = hts_open(args->fn_out ? args->fn_out : "-", modew);
     if ( !args->out ) error("%s: %s\n", args->fn_out,strerror(errno));
-    if ( args->n_threads ) hts_set_threads(args->out, args->n_threads);
+    if ( args->n_threads > 0)
+        hts_set_opt(args->out, HTS_OPT_THREAD_POOL, args->files->p);
 
     // headers: hdr=full header, hsub=subset header, hnull=sites only header
     if (args->sites_only){
@@ -315,8 +321,8 @@ int subset_vcf(args_t *args, bcf1_t *line)
     if (args->include || args->exclude)
     {
         int line_type = bcf_get_variant_types(line);
-        if ( args->include && !(line_type&args->include) ) return 0; // include only given variant types
-        if ( args->exclude &&   line_type&args->exclude  ) return 0; // exclude given variant types
+        if ( args->include && !((line_type<<1) & args->include) ) return 0; // include only given variant types
+        if ( args->exclude &&   (line_type<<1) & args->exclude  ) return 0; // exclude given variant types
     }
 
     if ( args->filter )
@@ -398,7 +404,7 @@ int subset_vcf(args_t *args, bcf1_t *line)
         }
     }
 
-    if (args->min_ac)
+    if (args->min_ac!=-1)
     {
         if (args->min_ac_type == ALLELE_NONREF && args->min_ac>non_ref_ac) return 0; // min AC
         else if (args->min_ac_type == ALLELE_MINOR && args->min_ac>minor_ac) return 0; // min minor AC
@@ -406,7 +412,7 @@ int subset_vcf(args_t *args, bcf1_t *line)
         else if (args->min_ac_type == ALLELE_MAJOR && args->min_ac > major_ac) return 0; // min major AC
         else if (args->min_ac_type == ALLELE_NONMAJOR && args->min_ac > an-major_ac) return 0; // min non-major AC
     }
-    if (args->max_ac)
+    if (args->max_ac!=-1)
     {
         if (args->max_ac_type == ALLELE_NONREF && args->max_ac<non_ref_ac) return 0; // max AC
         else if (args->max_ac_type == ALLELE_MINOR && args->max_ac<minor_ac) return 0; // max minor AC
@@ -414,7 +420,7 @@ int subset_vcf(args_t *args, bcf1_t *line)
         else if (args->max_ac_type == ALLELE_MAJOR && args->max_ac < major_ac) return 0; // max major AC
         else if (args->max_ac_type == ALLELE_NONMAJOR && args->max_ac < an-major_ac) return 0; // max non-major AC
     }
-    if (args->min_af)
+    if (args->min_af!=-1)
     {
         if (an == 0) return 0; // freq not defined, skip site
         if (args->min_af_type == ALLELE_NONREF && args->min_af>non_ref_ac/(double)an) return 0; // min AF
@@ -423,7 +429,7 @@ int subset_vcf(args_t *args, bcf1_t *line)
         else if (args->min_af_type == ALLELE_MAJOR && args->min_af > major_ac/(double)an) return 0; // min major AF
         else if (args->min_af_type == ALLELE_NONMAJOR && args->min_af > (an-major_ac)/(double)an) return 0; // min non-major AF
     }
-    if (args->max_af)
+    if (args->max_af!=-1)
     {
         if (an == 0) return 0; // freq not defined, skip site
         if (args->max_af_type == ALLELE_NONREF && args->max_af<non_ref_ac/(double)an) return 0; // max AF
@@ -443,7 +449,7 @@ int subset_vcf(args_t *args, bcf1_t *line)
     if (args->trim_alts)
     {
         int ret = bcf_trim_alleles(args->hsub ? args->hsub : args->hdr, line);
-        if ( ret==-1 ) error("Error: some GT index is out of bounds at %s:%d\n", bcf_seqname(args->hsub ? args->hsub : args->hdr, line), line->pos+1);
+        if ( ret<0 ) error("Error: Could not trim alleles at %s:%d\n", bcf_seqname(args->hsub ? args->hsub : args->hdr, line), line->pos+1);
     }
     if (args->phased) {
         int phased = bcf_all_phased(args->hdr, line);
@@ -494,7 +500,7 @@ static void usage(args_t *args)
     fprintf(stderr, "    -R, --regions-file <file>           restrict to regions listed in a file\n");
     fprintf(stderr, "    -t, --targets [^]<region>           similar to -r but streams rather than index-jumps. Exclude regions with \"^\" prefix\n");
     fprintf(stderr, "    -T, --targets-file [^]<file>        similar to -R but streams rather than index-jumps. Exclude regions with \"^\" prefix\n");
-    fprintf(stderr, "        --threads <int>                 number of extra output compression threads [0]\n");
+    fprintf(stderr, "        --threads <int>                 number of extra (de)compression threads [0]\n");
     fprintf(stderr, "\n");
     fprintf(stderr, "Subset options:\n");
     fprintf(stderr, "    -a, --trim-alt-alleles        trim alternate alleles not seen in the subset\n");
@@ -515,7 +521,7 @@ static void usage(args_t *args)
     fprintf(stderr, "    -q/Q, --min-af/--max-af <float>[:<type>]    minimum/maximum frequency for non-reference (nref), 1st alternate (alt1), least frequent\n");
     fprintf(stderr, "                                                   (minor), most frequent (major) or sum of all but most frequent (nonmajor) alleles [nref]\n");
     fprintf(stderr, "    -u/U, --uncalled/--exclude-uncalled         select/exclude sites without a called genotype\n");
-    fprintf(stderr, "    -v/V, --types/--exclude-types <list>        select/exclude comma-separated list of variant types: snps,indels,mnps,other [null]\n");
+    fprintf(stderr, "    -v/V, --types/--exclude-types <list>        select/exclude comma-separated list of variant types: snps,indels,mnps,ref,bnd,other [null]\n");
     fprintf(stderr, "    -x/X, --private/--exclude-private           select/exclude sites where the non-reference alleles are exclusive (private) to the subset samples\n");
     fprintf(stderr, "\n");
     exit(1);
@@ -533,6 +539,7 @@ int main_vcfview(int argc, char *argv[])
     args->output_type = FT_VCF;
     args->n_threads = 0;
     args->record_cmd_line = 1;
+    args->min_ac = args->max_ac = args->min_af = args->max_af = -1;
     int targets_is_file = 0, regions_is_file = 0;
 
     static struct option loptions[] =
@@ -726,6 +733,7 @@ int main_vcfview(int argc, char *argv[])
             error("Failed to read the targets: %s\n", args->targets_list);
     }
 
+    if ( bcf_sr_set_threads(args->files, args->n_threads)<0 ) error("Failed to create threads\n");
     if ( !bcf_sr_add_reader(args->files, fname) ) error("Failed to open %s: %s\n", fname,bcf_sr_strerror(args->files->errnum));
 
     init_data(args);
@@ -734,6 +742,8 @@ int main_vcfview(int argc, char *argv[])
         bcf_hdr_write(args->out, out_hdr);
     else if ( args->output_type & FT_BCF )
         error("BCF output requires header, cannot proceed with -H\n");
+
+    int ret = 0;
     if (!args->header_only)
     {
         while ( bcf_sr_next_line(args->files) )
@@ -743,10 +753,12 @@ int main_vcfview(int argc, char *argv[])
             if ( subset_vcf(args, line) )
                 bcf_write1(args->out, out_hdr, line);
         }
+        ret = args->files->errnum;
+        if ( ret ) fprintf(stderr,"Error: %s\n", bcf_sr_strerror(args->files->errnum));
     }
     hts_close(args->out);
     destroy_data(args);
     bcf_sr_destroy(args->files);
     free(args);
-    return 0;
+    return ret;
 }
diff --git a/bcftools/vcfview.c.pysam.c b/bcftools/vcfview.c.pysam.c
index 53b7c53..a471f37 100644
--- a/bcftools/vcfview.c.pysam.c
+++ b/bcftools/vcfview.c.pysam.c
@@ -25,6 +25,7 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 THE SOFTWARE.  */
 
 #include <stdio.h>
+#include <strings.h>
 #include <unistd.h>
 #include <getopt.h>
 #include <ctype.h>
@@ -183,10 +184,12 @@ static void init_data(args_t *args)
         if (args->include_types) {
             args->include = 0;
             for (i = 0; i < n; ++i) {
-                if (strcmp(type_list[i], "snps") == 0) args->include |= VCF_SNP;
-                else if (strcmp(type_list[i], "indels") == 0) args->include |= VCF_INDEL;
-                else if (strcmp(type_list[i], "mnps") == 0) args->include |= VCF_MNP;
-                else if (strcmp(type_list[i], "other") == 0) args->include |= VCF_OTHER;
+                if (strcmp(type_list[i], "snps") == 0) args->include |= VCF_SNP<<1;
+                else if (strcmp(type_list[i], "indels") == 0) args->include |= VCF_INDEL<<1;
+                else if (strcmp(type_list[i], "mnps") == 0) args->include |= VCF_MNP<<1;
+                else if (strcmp(type_list[i], "other") == 0) args->include |= VCF_OTHER<<1;
+                else if (strcmp(type_list[i], "ref") == 0) args->include |= VCF_OTHER<<1;
+                else if (strcmp(type_list[i], "bnd") == 0) args->include |= VCF_BND<<1;
                 else {
                     fprintf(pysam_stderr, "[E::%s] unknown type\n", type_list[i]);
                     fprintf(pysam_stderr, "Accepted types are snps, indels, mnps, other\n");
@@ -197,10 +200,12 @@ static void init_data(args_t *args)
         if (args->exclude_types) {
             args->exclude = 0;
             for (i = 0; i < n; ++i) {
-                if (strcmp(type_list[i], "snps") == 0) args->exclude |= VCF_SNP;
-                else if (strcmp(type_list[i], "indels") == 0) args->exclude |= VCF_INDEL;
-                else if (strcmp(type_list[i], "mnps") == 0) args->exclude |= VCF_MNP;
-                else if (strcmp(type_list[i], "other") == 0) args->exclude |= VCF_OTHER;
+                if (strcmp(type_list[i], "snps") == 0) args->exclude |= VCF_SNP<<1;
+                else if (strcmp(type_list[i], "indels") == 0) args->exclude |= VCF_INDEL<<1;
+                else if (strcmp(type_list[i], "mnps") == 0) args->exclude |= VCF_MNP<<1;
+                else if (strcmp(type_list[i], "other") == 0) args->exclude |= VCF_OTHER<<1;
+                else if (strcmp(type_list[i], "ref") == 0) args->exclude |= VCF_OTHER<<1;
+                else if (strcmp(type_list[i], "bnd") == 0) args->exclude |= VCF_BND<<1;
                 else {
                     fprintf(pysam_stderr, "[E::%s] unknown type\n", type_list[i]);
                     fprintf(pysam_stderr, "Accepted types are snps, indels, mnps, other\n");
@@ -222,7 +227,8 @@ static void init_data(args_t *args)
     else if (args->output_type & FT_GZ) strcat(modew,"z");      // compressed VCF
     args->out = hts_open(args->fn_out ? args->fn_out : "-", modew);
     if ( !args->out ) error("%s: %s\n", args->fn_out,strerror(errno));
-    if ( args->n_threads ) hts_set_threads(args->out, args->n_threads);
+    if ( args->n_threads > 0)
+        hts_set_opt(args->out, HTS_OPT_THREAD_POOL, args->files->p);
 
     // headers: hdr=full header, hsub=subset header, hnull=sites only header
     if (args->sites_only){
@@ -317,8 +323,8 @@ int subset_vcf(args_t *args, bcf1_t *line)
     if (args->include || args->exclude)
     {
         int line_type = bcf_get_variant_types(line);
-        if ( args->include && !(line_type&args->include) ) return 0; // include only given variant types
-        if ( args->exclude &&   line_type&args->exclude  ) return 0; // exclude given variant types
+        if ( args->include && !((line_type<<1) & args->include) ) return 0; // include only given variant types
+        if ( args->exclude &&   (line_type<<1) & args->exclude  ) return 0; // exclude given variant types
     }
 
     if ( args->filter )
@@ -400,7 +406,7 @@ int subset_vcf(args_t *args, bcf1_t *line)
         }
     }
 
-    if (args->min_ac)
+    if (args->min_ac!=-1)
     {
         if (args->min_ac_type == ALLELE_NONREF && args->min_ac>non_ref_ac) return 0; // min AC
         else if (args->min_ac_type == ALLELE_MINOR && args->min_ac>minor_ac) return 0; // min minor AC
@@ -408,7 +414,7 @@ int subset_vcf(args_t *args, bcf1_t *line)
         else if (args->min_ac_type == ALLELE_MAJOR && args->min_ac > major_ac) return 0; // min major AC
         else if (args->min_ac_type == ALLELE_NONMAJOR && args->min_ac > an-major_ac) return 0; // min non-major AC
     }
-    if (args->max_ac)
+    if (args->max_ac!=-1)
     {
         if (args->max_ac_type == ALLELE_NONREF && args->max_ac<non_ref_ac) return 0; // max AC
         else if (args->max_ac_type == ALLELE_MINOR && args->max_ac<minor_ac) return 0; // max minor AC
@@ -416,7 +422,7 @@ int subset_vcf(args_t *args, bcf1_t *line)
         else if (args->max_ac_type == ALLELE_MAJOR && args->max_ac < major_ac) return 0; // max major AC
         else if (args->max_ac_type == ALLELE_NONMAJOR && args->max_ac < an-major_ac) return 0; // max non-major AC
     }
-    if (args->min_af)
+    if (args->min_af!=-1)
     {
         if (an == 0) return 0; // freq not defined, skip site
         if (args->min_af_type == ALLELE_NONREF && args->min_af>non_ref_ac/(double)an) return 0; // min AF
@@ -425,7 +431,7 @@ int subset_vcf(args_t *args, bcf1_t *line)
         else if (args->min_af_type == ALLELE_MAJOR && args->min_af > major_ac/(double)an) return 0; // min major AF
         else if (args->min_af_type == ALLELE_NONMAJOR && args->min_af > (an-major_ac)/(double)an) return 0; // min non-major AF
     }
-    if (args->max_af)
+    if (args->max_af!=-1)
     {
         if (an == 0) return 0; // freq not defined, skip site
         if (args->max_af_type == ALLELE_NONREF && args->max_af<non_ref_ac/(double)an) return 0; // max AF
@@ -445,7 +451,7 @@ int subset_vcf(args_t *args, bcf1_t *line)
     if (args->trim_alts)
     {
         int ret = bcf_trim_alleles(args->hsub ? args->hsub : args->hdr, line);
-        if ( ret==-1 ) error("Error: some GT index is out of bounds at %s:%d\n", bcf_seqname(args->hsub ? args->hsub : args->hdr, line), line->pos+1);
+        if ( ret<0 ) error("Error: Could not trim alleles at %s:%d\n", bcf_seqname(args->hsub ? args->hsub : args->hdr, line), line->pos+1);
     }
     if (args->phased) {
         int phased = bcf_all_phased(args->hdr, line);
@@ -496,7 +502,7 @@ static void usage(args_t *args)
     fprintf(pysam_stderr, "    -R, --regions-file <file>           restrict to regions listed in a file\n");
     fprintf(pysam_stderr, "    -t, --targets [^]<region>           similar to -r but streams rather than index-jumps. Exclude regions with \"^\" prefix\n");
     fprintf(pysam_stderr, "    -T, --targets-file [^]<file>        similar to -R but streams rather than index-jumps. Exclude regions with \"^\" prefix\n");
-    fprintf(pysam_stderr, "        --threads <int>                 number of extra output compression threads [0]\n");
+    fprintf(pysam_stderr, "        --threads <int>                 number of extra (de)compression threads [0]\n");
     fprintf(pysam_stderr, "\n");
     fprintf(pysam_stderr, "Subset options:\n");
     fprintf(pysam_stderr, "    -a, --trim-alt-alleles        trim alternate alleles not seen in the subset\n");
@@ -517,7 +523,7 @@ static void usage(args_t *args)
     fprintf(pysam_stderr, "    -q/Q, --min-af/--max-af <float>[:<type>]    minimum/maximum frequency for non-reference (nref), 1st alternate (alt1), least frequent\n");
     fprintf(pysam_stderr, "                                                   (minor), most frequent (major) or sum of all but most frequent (nonmajor) alleles [nref]\n");
     fprintf(pysam_stderr, "    -u/U, --uncalled/--exclude-uncalled         select/exclude sites without a called genotype\n");
-    fprintf(pysam_stderr, "    -v/V, --types/--exclude-types <list>        select/exclude comma-separated list of variant types: snps,indels,mnps,other [null]\n");
+    fprintf(pysam_stderr, "    -v/V, --types/--exclude-types <list>        select/exclude comma-separated list of variant types: snps,indels,mnps,ref,bnd,other [null]\n");
     fprintf(pysam_stderr, "    -x/X, --private/--exclude-private           select/exclude sites where the non-reference alleles are exclusive (private) to the subset samples\n");
     fprintf(pysam_stderr, "\n");
     exit(1);
@@ -535,6 +541,7 @@ int main_vcfview(int argc, char *argv[])
     args->output_type = FT_VCF;
     args->n_threads = 0;
     args->record_cmd_line = 1;
+    args->min_ac = args->max_ac = args->min_af = args->max_af = -1;
     int targets_is_file = 0, regions_is_file = 0;
 
     static struct option loptions[] =
@@ -728,6 +735,7 @@ int main_vcfview(int argc, char *argv[])
             error("Failed to read the targets: %s\n", args->targets_list);
     }
 
+    if ( bcf_sr_set_threads(args->files, args->n_threads)<0 ) error("Failed to create threads\n");
     if ( !bcf_sr_add_reader(args->files, fname) ) error("Failed to open %s: %s\n", fname,bcf_sr_strerror(args->files->errnum));
 
     init_data(args);
@@ -736,6 +744,8 @@ int main_vcfview(int argc, char *argv[])
         bcf_hdr_write(args->out, out_hdr);
     else if ( args->output_type & FT_BCF )
         error("BCF output requires header, cannot proceed with -H\n");
+
+    int ret = 0;
     if (!args->header_only)
     {
         while ( bcf_sr_next_line(args->files) )
@@ -745,10 +755,12 @@ int main_vcfview(int argc, char *argv[])
             if ( subset_vcf(args, line) )
                 bcf_write1(args->out, out_hdr, line);
         }
+        ret = args->files->errnum;
+        if ( ret ) fprintf(pysam_stderr,"Error: %s\n", bcf_sr_strerror(args->files->errnum));
     }
     hts_close(args->out);
     destroy_data(args);
     bcf_sr_destroy(args->files);
     free(args);
-    return 0;
+    return ret;
 }
diff --git a/bcftools/version.h b/bcftools/version.h
index 05929f5..84247e7 100644
--- a/bcftools/version.h
+++ b/bcftools/version.h
@@ -1 +1 @@
-#define BCFTOOLS_VERSION "1.3.1"
+#define BCFTOOLS_VERSION "1.4.1"
diff --git a/buildwheels.sh b/buildwheels.sh
index a5987f1..ae0d953 100755
--- a/buildwheels.sh
+++ b/buildwheels.sh
@@ -22,7 +22,7 @@ if ! grep -q docker /proc/1/cgroup; then
   exec docker run --rm -v $(pwd):/io quay.io/pypa/manylinux1_x86_64 /io/$0
 fi
 
-yum install -y zlib-devel
+yum install -y zlib-devel bzip2-devel xz-devel
 
 # Python 2.6 is not supported
 rm -r /opt/python/cp26*
diff --git a/doc/api.rst b/doc/api.rst
index 686c60d..8e76686 100644
--- a/doc/api.rst
+++ b/doc/api.rst
@@ -88,11 +88,11 @@ The above code outputs::
 Commands available in :term:`csamtools` are available as simple
 function calls. For example::
 
-   pysam.sort("ex1.bam", "output")
+   pysam.sort("-o", "output.bam", "ex1.bam")
 
 corresponds to the command line::
 
-   samtools sort ex1.bam output
+   samtools sort -o output.bam ex1.bam 
 
 Analogous to :class:`~pysam.AlignmentFile`, a
 :class:`~pysam.TabixFile` allows fast random access to compressed and
diff --git a/doc/release.rst b/doc/release.rst
index 1d378f3..3874856 100644
--- a/doc/release.rst
+++ b/doc/release.rst
@@ -2,6 +2,81 @@
 Release notes
 =============
 
+Release 0.11.2.2
+================
+
+Bugfix release to address two issues:
+
+* Changes in 0.11.2.1 broke the GTF/GFF3 parser. Corrected and
+  more tests have been added.
+* [#479] Correct VariantRecord edge cases described in issue
+
+
+Release 0.11.2.1
+================
+
+Release to fix release tar-ball containing 0.11.1 pre-compiled
+C-files.
+
+
+Release 0.11.2
+==============
+
+This release wraps htslib/samtools/bcfools versions 1.4.1 in response
+to a security fix in these libraries. Additionaly the following
+issues have been fixed:
+
+* [#452] add GFF3 support for tabix parsers
+* [#461] Multiple fixes related to VariantRecordInfo and handling of INFO/END
+* [#447] limit query name to 251 characters (only partially addresses issue)
+
+VariantFile and related object fixes
+
+* Restore VariantFile.\_\_dealloc\_\_
+* Correct handling of bcf_str_missing in bcf_array_to_object and
+  bcf_object_to_array
+* Added update() and pop() methods to some dict-like proxy objects
+* scalar INFO entries could not be set again after being deleted
+* VariantRecordInfo.__delitem__ now allows unset flags to be deleted without
+  raising a KeyError
+* Multiple other fixes for VariantRecordInfo methods
+* INFO/END is now accessible only via VariantRecord.stop and
+  VariantRecord.rlen.  Even if present behind the scenes, it is no longer
+  accessible via VariantRecordInfo.
+* Add argument to issue a warning instead of an exception if input appears
+  to be truncated
+
+Other features and fixes:
+
+* Make AlignmentFile \_\_dealloc\_\_ and close more
+  stringent
+* Add argument AlignmentFile to issue a warning instead of an
+  exception if input appears to be truncated
+
+Release 0.11.1
+==============
+
+Bugfix release
+
+* [#440] add deprecated 'always' option to infer_query_length for backwards compatibility.
+
+Release 0.11.0
+==============
+
+This release wraps the latest versions of htslib/samtools/bcftools and
+implements a few bugfixes.
+
+* [#413] Wrap HTSlib/Samtools/BCFtools 1.4 
+* [#422] Fix missing pysam.sort.usage() message
+* [#411] Fix BGZfile initialization bug
+* [#412] Add seek support for BGZFile
+* [#395] Make BGZfile iterable
+* [#433] Correct getQueryEnd
+* [#419] Export SAM enums such as pysam.CMATCH
+* [#415] Fix access by tid in AlignmentFile.fetch()
+* [#405] Writing SAM now outputs a header by default.
+* [#332] split infer_query_length(always) into infer_query_length and infer_read_length
+
 Release 0.10.0
 ==============
 
diff --git a/doc/usage.rst b/doc/usage.rst
index 936f3bd..6172329 100644
--- a/doc/usage.rst
+++ b/doc/usage.rst
@@ -123,26 +123,23 @@ Note that the file open mode needs to changed from ``r`` to ``rb``.
 Using samtools commands within python
 =====================================
 
-Commands available in :term:`csamtools` are available
-as simple function calls. For example::
+Commands available in :term:`csamtools` are available as simple
+function calls. Command line options are provided as arguments. For
+example::
 
-   pysam.sort("ex1.bam", "output")
+   pysam.sort("-o", "output.bam", "ex1.bam")
 
 corresponds to the command line::
 
-   samtools sort ex1.bam output
+   samtools sort -o output.bam ex1.bam
 
-Command line options can be provided as arguments::
-   
-   pysam.sort("-n", "ex1.bam", "output")
-
-or::
+Or for example::
 
-   pysam.sort("-m", "1000000", "ex1.bam", "output")
+   pysam.sort("-m", "1000000", "-o", "output.bam", "ex1.bam")
 
 In order to get usage information, try::
 
-   print pysam.sort.usage()
+   print(pysam.sort.usage())
 
 Argument errors raise a :class:`pysam.SamtoolsError`::
 
diff --git a/import.py b/import.py
index 12d2016..b8eab01 100644
--- a/import.py
+++ b/import.py
@@ -31,10 +31,22 @@ import hashlib
 
 EXCLUDE = {
     "samtools": (
-        "razip.c", "bgzip.c", "main.c",
-        "calDepth.c", "bam2bed.c", "wgsim.c",
-        "md5fa.c", "md5sum-lite.c", "maq2sam.c",
-        "bamcheck.c", "chk_indel.c", "vcf-miniview.c",
+        "razip.c",
+        "bgzip.c",
+        "main.c",
+        "calDepth.c",
+        "bam2bed.c",
+        "wgsim.c",
+        "bam_tview.c",
+        "bam_tview.h",
+        "bam_tview_html.c",
+        "bam_tview_curses.c",
+        "md5fa.c",
+        "md5sum-lite.c",
+        "maq2sam.c",
+        "bamcheck.c",
+        "chk_indel.c",
+        "vcf-miniview.c",
         "htslib-1.3",   # do not import twice
         "hfile_irods.c",  # requires irods library
     ),
@@ -73,9 +85,10 @@ def _update_pysam_files(cf, destdir):
         if not filename:
             continue
         dest = filename + ".pysam.c"
-        with open(filename) as infile:
+        with open(filename, encoding="utf-8") as infile:
             lines = "".join(infile.readlines())
-            with open(dest, "w") as outfile:
+
+            with open(dest, "w", encoding="utf-8") as outfile:
                 outfile.write('#include "pysam.h"\n\n')
                 subname, _ = os.path.splitext(os.path.basename(filename))
                 if subname in MAIN.get(basename, []):
@@ -161,9 +174,9 @@ if len(sys.argv) >= 1:
         old_file = os.path.join(targetdir, f)
         if os.path.exists(old_file):
             md5_old = hashlib.md5(
-                "".join(open(old_file, "r").readlines())).digest()
+                "".join(open(old_file, "r", encoding="utf-8").readlines()).encode()).digest()
             md5_new = hashlib.md5(
-                "".join(open(src, "r").readlines())).digest()
+                "".join(open(src, "r", encoding="utf-8").readlines()).encode()).digest()
             if md5_old != md5_new:
                 raise ValueError(
                     "incompatible files for %s and %s" %
diff --git a/pysam/__init__.py b/pysam/__init__.py
index ed17e04..c142c6c 100644
--- a/pysam/__init__.py
+++ b/pysam/__init__.py
@@ -3,6 +3,8 @@ import sys
 import sysconfig
 
 from pysam.libchtslib import *
+from pysam.libcsamtools import *
+from pysam.libcbcftools import *
 from pysam.libcutils import *
 import pysam.libcutils as libcutils
 import pysam.libcfaidx as libcfaidx
diff --git a/pysam/cbcftools_util.h b/pysam/cbcftools_util.h
new file mode 100644
index 0000000..4a9f2e9
--- /dev/null
+++ b/pysam/cbcftools_util.h
@@ -0,0 +1,6 @@
+#ifndef CBCFTOOLS_UTIL_H
+#define CBCFTOOLS_UTIL_H
+
+int bcftools_main(int argc, char *argv[]);
+
+#endif
diff --git a/pysam/csamtools_util.h b/pysam/csamtools_util.h
new file mode 100644
index 0000000..0a03c13
--- /dev/null
+++ b/pysam/csamtools_util.h
@@ -0,0 +1,6 @@
+#ifndef CSAMTOOLS_UTIL_H
+#define CSAMTOOLS_UTIL_H
+
+int samtools_main(int argc, char *argv[]);
+
+#endif
diff --git a/pysam/htslib_util.h b/pysam/htslib_util.h
index f0d582c..c714986 100644
--- a/pysam/htslib_util.h
+++ b/pysam/htslib_util.h
@@ -92,36 +92,16 @@ static inline int pysam_bam_get_l_aux(bam1_t * b) {
 static inline char pysam_bam_seqi(uint8_t * s, int i) {
   return bam_seqi(s,i);}
 
-// Wrapping bit field access in bam1_core_t
-// bit fields not supported in cython and due
-// to endian-ness it is not clear which part
-// of the bit-field is in the higher or lower bytes.
-static inline uint16_t pysam_get_bin(bam1_t * b) {
-  return b->core.bin;}
-
 static inline uint8_t pysam_get_qual(bam1_t * b) {
   return b->core.qual;}
 
-static inline uint8_t pysam_get_l_qname(bam1_t * b) {
-  return b->core.l_qname;}
-
-static inline uint16_t pysam_get_flag(bam1_t * b) {
-  return b->core.flag;}
 
 static inline uint16_t pysam_get_n_cigar(bam1_t * b) {
   return b->core.n_cigar;}
 
-static inline void pysam_set_bin(bam1_t * b, uint16_t v) {
-  b->core.bin=v;}
-
 static inline void pysam_set_qual(bam1_t * b, uint8_t v) {
   b->core.qual=v;}
 
-static inline void pysam_set_l_qname(bam1_t * b, uint8_t v) {
-  b->core.l_qname=v;}
-
-static inline void pysam_set_flag(bam1_t * b, uint16_t v) {
-  b->core.flag=v;}
 
 static inline void pysam_set_n_cigar(bam1_t * b, uint16_t v) {
   b->core.n_cigar=v;}
diff --git a/pysam/libcalignedsegment.pxd b/pysam/libcalignedsegment.pxd
index f1d59d1..8441313 100644
--- a/pysam/libcalignedsegment.pxd
+++ b/pysam/libcalignedsegment.pxd
@@ -19,15 +19,9 @@ cdef extern from "htslib_util.h":
     int pysam_bam_get_l_aux(bam1_t * b)
     char pysam_bam_seqi(uint8_t * s, int i)
 
-    uint16_t pysam_get_bin(bam1_t * b)
     uint8_t pysam_get_qual(bam1_t * b)
-    uint8_t pysam_get_l_qname(bam1_t * b)
-    uint16_t pysam_get_flag(bam1_t * b)
     uint16_t pysam_get_n_cigar(bam1_t * b)
-    void pysam_set_bin(bam1_t * b, uint16_t v)
     void pysam_set_qual(bam1_t * b, uint8_t v)
-    void pysam_set_l_qname(bam1_t * b, uint8_t v)
-    void pysam_set_flag(bam1_t * b, uint16_t v)
     void pysam_set_n_cigar(bam1_t * b, uint16_t v)
     void pysam_update_flag(bam1_t * b, uint16_t v, uint16_t flag)
 
diff --git a/pysam/libcalignedsegment.pyx b/pysam/libcalignedsegment.pyx
index c95bb13..73d426a 100644
--- a/pysam/libcalignedsegment.pyx
+++ b/pysam/libcalignedsegment.pyx
@@ -61,7 +61,7 @@ import struct
 cimport cython
 from cpython cimport array as c_array
 from cpython.version cimport PY_MAJOR_VERSION
-from cpython cimport PyErr_SetString, PyBytes_FromStringAndSize
+from cpython cimport PyBytes_FromStringAndSize
 from libc.string cimport strchr
 from cpython cimport array as c_array
 
@@ -281,6 +281,9 @@ cdef inline packTags(tags):
                          len(value)] + list(value))
 
         elif isinstance(value, array.array):
+            valuetype = value.typecode
+            if valuetype not in datatype2format:
+                valuetype = None
             # binary tags from arrays
             if valuetype is None:
                 array_typecode = map_typecode_python_to_htslib(ord(value.typecode))
@@ -325,9 +328,41 @@ cdef inline packTags(tags):
     return "".join(fmts), args
 
 
-cdef inline int32_t calculateQueryLength(bam1_t * src):
+cdef inline int32_t calculateQueryLengthWithoutHardClipping(bam1_t * src):
     """return query length computed from CIGAR alignment.
 
+    Length ignores hard-clipped bases.
+
+    Return 0 if there is no CIGAR alignment.
+    """
+
+    cdef uint32_t * cigar_p = pysam_bam_get_cigar(src)
+
+    if cigar_p == NULL:
+        return 0
+
+    cdef uint32_t k, qpos
+    cdef int op
+    qpos = 0
+
+    for k from 0 <= k < pysam_get_n_cigar(src):
+        op = cigar_p[k] & BAM_CIGAR_MASK
+
+        if op == BAM_CMATCH or \
+           op == BAM_CINS or \
+           op == BAM_CSOFT_CLIP or \
+           op == BAM_CEQUAL or \
+           op == BAM_CDIFF:
+            qpos += cigar_p[k] >> BAM_CIGAR_SHIFT
+
+    return qpos
+
+
+cdef inline int32_t calculateQueryLengthWithHardClipping(bam1_t * src):
+    """return query length computed from CIGAR alignment.
+
+    Length includes hard-clipped bases.
+
     Return 0 if there is no CIGAR alignment.
     """
 
@@ -356,44 +391,45 @@ cdef inline int32_t calculateQueryLength(bam1_t * src):
 
 cdef inline int32_t getQueryStart(bam1_t *src) except -1:
     cdef uint32_t * cigar_p
-    cdef uint32_t k, op
     cdef uint32_t start_offset = 0
+    cdef uint32_t k, op
 
-    if pysam_get_n_cigar(src):
-        cigar_p = pysam_bam_get_cigar(src);
-        for k from 0 <= k < pysam_get_n_cigar(src):
-            op = cigar_p[k] & BAM_CIGAR_MASK
-            if op == BAM_CHARD_CLIP:
-                if start_offset != 0 and start_offset != src.core.l_qseq:
-                    PyErr_SetString(ValueError, 'Invalid clipping in CIGAR string')
-                    return -1
-            elif op == BAM_CSOFT_CLIP:
-                start_offset += cigar_p[k] >> BAM_CIGAR_SHIFT
-            else:
-                break
+    cigar_p = pysam_bam_get_cigar(src);
+    for k from 0 <= k < pysam_get_n_cigar(src):
+        op = cigar_p[k] & BAM_CIGAR_MASK
+        if op == BAM_CHARD_CLIP:
+            if start_offset != 0 and start_offset != src.core.l_qseq:
+                raise ValueError('Invalid clipping in CIGAR string')
+        elif op == BAM_CSOFT_CLIP:
+            start_offset += cigar_p[k] >> BAM_CIGAR_SHIFT
+        else:
+            break
 
     return start_offset
 
 
 cdef inline int32_t getQueryEnd(bam1_t *src) except -1:
-    cdef uint32_t * cigar_p
-    cdef uint32_t k, op
+    cdef uint32_t * cigar_p = pysam_bam_get_cigar(src)
     cdef uint32_t end_offset = src.core.l_qseq
+    cdef uint32_t k, op
 
     # if there is no sequence, compute length from cigar string
     if end_offset == 0:
-        end_offset = calculateQueryLength(src)
-
-    # walk backwards in cigar string
-    if pysam_get_n_cigar(src) > 1:
-        cigar_p = pysam_bam_get_cigar(src);
+        for k from 0 <= k < pysam_get_n_cigar(src):
+            op = cigar_p[k] & BAM_CIGAR_MASK
+            if op == BAM_CMATCH or \
+               op == BAM_CINS or \
+               op == BAM_CEQUAL or \
+               op == BAM_CDIFF or \
+              (op == BAM_CSOFT_CLIP and end_offset == 0):
+                end_offset += cigar_p[k] >> BAM_CIGAR_SHIFT
+    else:
+        # walk backwards in cigar string
         for k from pysam_get_n_cigar(src) > k >= 1:
             op = cigar_p[k] & BAM_CIGAR_MASK
             if op == BAM_CHARD_CLIP:
-                if end_offset != 0 and end_offset != src.core.l_qseq:
-                    PyErr_SetString(ValueError,
-                                    'Invalid clipping in CIGAR string')
-                    return -1
+                if end_offset != src.core.l_qseq:
+                    raise ValueError('Invalid clipping in CIGAR string')
             elif op == BAM_CSOFT_CLIP:
                 end_offset -= cigar_p[k] >> BAM_CIGAR_SHIFT
             else:
@@ -748,10 +784,13 @@ cdef class AlignedSegment:
         if t == o:
             return 0
 
+        cdef uint8_t *a = <uint8_t*>&t.core
+        cdef uint8_t *b = <uint8_t*>&o.core
+        
         retval = memcmp(&t.core, &o.core, sizeof(bam1_core_t))
-
         if retval:
             return retval
+
         # cmp(t.l_data, o.l_data)
         retval = (t.l_data > o.l_data) - (t.l_data < o.l_data)
         if retval:
@@ -819,49 +858,60 @@ cdef class AlignedSegment:
     property query_name:
         """the query template name (None if not present)"""
         def __get__(self):
-            cdef bam1_t * src
-            src = self._delegate
-            if pysam_get_l_qname(src) == 0:
+
+            cdef bam1_t * src = self._delegate
+            if src.core.l_qname == 0:
                 return None
+
             return charptr_to_str(<char *>pysam_bam_get_qname(src))
 
         def __set__(self, qname):
+
             if qname is None or len(qname) == 0:
                 return
 
-            if len(qname) >= 255:
-                raise ValueError("query length out of range {} > 254".format(
+            # See issue #447
+            # (The threshold is 252 chars, but this includes a \0 byte.
+            if len(qname) > 251:
+                raise ValueError("query length out of range {} > 251".format(
                     len(qname)))
 
             qname = force_bytes(qname)
-            cdef bam1_t * src
-            cdef int l
-            cdef char * p
+            cdef bam1_t * src = self._delegate
+            # the qname is \0 terminated
+            cdef uint8_t l = len(qname) + 1
 
-            src = self._delegate
-            p = pysam_bam_get_qname(src)
+            cdef char * p = pysam_bam_get_qname(src)
+            cdef uint8_t l_extranul = 0
+
+            if l % 4 != 0:
+                l_extranul = 4 - l % 4
 
-            # the qname is \0 terminated
-            l = len(qname) + 1
             pysam_bam_update(src,
-                             pysam_get_l_qname(src),
-                             l,
+                             src.core.l_qname,
+                             l + l_extranul,
                              <uint8_t*>p)
 
-            pysam_set_l_qname(src, l)
-
+            src.core.l_extranul = l_extranul
+            src.core.l_qname = l + l_extranul
+            
             # re-acquire pointer to location in memory
             # as it might have moved
             p = pysam_bam_get_qname(src)
 
             strncpy(p, qname, l)
+            # x might be > 255
+            cdef uint16_t x = 0
+
+            for x from l <= x < l + l_extranul:
+                p[x] = '\0'
 
     property flag:
         """properties flag"""
         def __get__(self):
-            return pysam_get_flag(self._delegate)
+            return self._delegate.core.flag
         def __set__(self, flag):
-            pysam_set_flag(self._delegate, flag)
+            self._delegate.core.flag = flag
 
     property reference_name:
         """:term:`reference` name (None if no AlignmentFile is associated)"""
@@ -893,19 +943,17 @@ cdef class AlignedSegment:
             src = self._delegate
             src.core.pos = pos
             if pysam_get_n_cigar(src):
-                pysam_set_bin(src,
-                              hts_reg2bin(
-                                  src.core.pos,
-                                  bam_endpos(src),
-                                  14,
-                                  5))
+                src.core.bin = hts_reg2bin(
+                    src.core.pos,
+                    bam_endpos(src),
+                    14,
+                    5)
             else:
-                pysam_set_bin(src,
-                              hts_reg2bin(
-                                  src.core.pos,
-                                  src.core.pos + 1,
-                                  14,
-                                  5))
+                src.core.bin = hts_reg2bin(
+                    src.core.pos,
+                    src.core.pos + 1,
+                    14,
+                    5)
 
     property mapping_quality:
         """mapping quality"""
@@ -1156,9 +1204,9 @@ cdef class AlignedSegment:
     property bin:
         """properties bin"""
         def __get__(self):
-            return pysam_get_bin(self._delegate)
+            return self._delegate.core.bin
         def __set__(self, bin):
-            pysam_set_bin(self._delegate, bin)
+            self._delegate.core.bin = bin
 
 
     ##########################################################
@@ -1344,14 +1392,17 @@ cdef class AlignedSegment:
 
         This the index of the first base in :attr:`seq` that is not
         soft-clipped.
-
         """
         def __get__(self):
             return getQueryStart(self._delegate)
 
     property query_alignment_end:
         """end index of the aligned query portion of the sequence (0-based,
-        exclusive)"""
+        exclusive)
+
+        This the index just past the last base in :attr:`seq` that is not
+        soft-clipped.
+        """
         def __get__(self):
             return getQueryEnd(self._delegate)
 
@@ -1408,26 +1459,30 @@ cdef class AlignedSegment:
 
         return result
 
-    def infer_query_length(self, always=True):
-        """inferred read length from CIGAR string.
+    def infer_query_length(self, always=False):
+        """infer query length from sequence or CIGAR alignment.
 
-        If *always* is set to True, the read length
-        will be always inferred. If set to False, the length
-        of the read sequence will be returned if it is
-        available.
+        This method deduces the query length from the CIGAR alignment
+        but does not include hard-clipped bases.
 
-        Returns None if CIGAR string is not present.
-        """
+        Returns None if CIGAR alignment is not present.
 
-        cdef uint32_t * cigar_p
-        cdef bam1_t * src
+        If *always* is set to True, `infer_read_length` is used instead.
+        This is deprecated and only present for backward compatibility.
+        """
+        if always is True:
+            return self.infer_read_length()
+        return calculateQueryLengthWithoutHardClipping(self._delegate)
 
-        src = self._delegate
+    def infer_read_length(self):
+        """infer read length from CIGAR alignment.
 
-        if not always and src.core.l_qseq:
-            return src.core.l_qseq
+        This method deduces the read length from the CIGAR alignment
+        including hard-clipped bases.
 
-        return calculateQueryLength(src)
+        Returns None if CIGAR alignment is not present.
+        """
+        return calculateQueryLengthWithHardClipping(self._delegate)
 
     def get_reference_sequence(self):
         """return the reference sequence.
@@ -1677,7 +1732,9 @@ cdef class AlignedSegment:
         +-----+--------------+-----+
         |X    |BAM_CDIFF     |8    |
         +-----+--------------+-----+
-        |NM   |NM tag        |9    |
+        |B    |BAM_CBACK     |9    |
+        +-----+--------------+-----+
+        |NM   |NM tag        |10   |
         +-----+--------------+-----+
 
         If no cigar string is present, empty arrays will be returned.
@@ -1756,6 +1813,8 @@ cdef class AlignedSegment:
         +-----+--------------+-----+
         |X    |BAM_CDIFF     |8    |
         +-----+--------------+-----+
+        |B    |BAM_CBACK     |9    |
+        +-----+--------------+-----+
 
         .. note::
             The output is a list of (operation, length) tuples, such as
@@ -1823,12 +1882,11 @@ cdef class AlignedSegment:
                 k += 1
 
             ## setting the cigar string requires updating the bin
-            pysam_set_bin(src,
-                          hts_reg2bin(
-                              src.core.pos,
-                              bam_endpos(src),
-                              14,
-                              5))
+            src.core.bin = hts_reg2bin(
+                src.core.pos,
+                bam_endpos(src),
+                14,
+                5)
 
 
     cpdef set_tag(self,
@@ -2477,7 +2535,71 @@ cdef class PileupRead:
         def __get__(self):
             return self._is_refskip
 
+
+cpdef enum CIGAR_OPS:
+    CMATCH = 0
+    CINS = 1
+    CDEL = 2
+    CREF_SKIP = 3
+    CSOFT_CLIP = 4
+    CHARD_CLIP = 5
+    CPAD = 6
+    CEQUAL = 7
+    CDIFF = 8
+    CBACK = 9
+
+
+cpdef enum SAM_FLAGS:
+    # the read is paired in sequencing, no matter whether it is mapped in a pair 
+    FPAIRED = 1
+    # the read is mapped in a proper pair 
+    FPROPER_PAIR = 2
+    # the read itself is unmapped; conflictive with FPROPER_PAIR 
+    FUNMAP = 4
+    # the mate is unmapped 
+    FMUNMAP = 8
+    # the read is mapped to the reverse strand 
+    FREVERSE = 16
+    # the mate is mapped to the reverse strand 
+    FMREVERSE = 32
+    # this is read1 
+    FREAD1 = 64
+    # this is read2 
+    FREAD2 = 128
+    # not primary alignment 
+    FSECONDARY = 256
+    # QC failure 
+    FQCFAIL = 512
+    # optical or PCR duplicate 
+    FDUP = 1024
+    # supplementary alignment 
+    FSUPPLEMENTARY = 2048      
+
+
 __all__ = [
     "AlignedSegment",
     "PileupColumn",
-    "PileupRead"]
+    "PileupRead",
+    "CMATCH",
+    "CINS",
+    "CDEL",
+    "CREF_SKIP",
+    "CSOFT_CLIP",
+    "CHARD_CLIP",
+    "CPAD",
+    "CEQUAL",
+    "CDIFF",
+    "CBACK",
+    "FPAIRED",
+    "FPROPER_PAIR",
+    "FUNMAP",
+    "FMUNMAP",
+    "FREVERSE",
+    "FMREVERSE",
+    "FREAD1",
+    "FREAD2",
+    "FSECONDARY",
+    "FQCFAIL",
+    "FDUP",
+    "FSUPPLEMENTARY"]
+
diff --git a/pysam/libcalignmentfile.pyx b/pysam/libcalignmentfile.pyx
index 2161f87..0b248c1 100644
--- a/pysam/libcalignmentfile.pyx
+++ b/pysam/libcalignmentfile.pyx
@@ -7,16 +7,16 @@
 # The principal classes defined in this module are:
 #
 # class AlignmentFile   read/write access to SAM/BAM/CRAM formatted files
-# 
+#
 # class IndexedReads    index a SAM/BAM/CRAM file by query name while keeping
 #                       the original sort order intact
-# 
+#
 # Additionally this module defines numerous additional classes that
 # are part of the internal API. These are:
-# 
+#
 # Various iterator classes to iterate over alignments in sequential
 # (IteratorRow) or in a stacked fashion (IteratorColumn):
-# 
+#
 # class IteratorRow
 # class IteratorRowRegion
 # class IteratorRowHead
@@ -76,15 +76,9 @@ else:
 cimport cython
 
 ########################################################
-## Constants and global variables
-
-# defines imported from samtools
-DEF SEEK_SET = 0
-DEF SEEK_CUR = 1
-DEF SEEK_END = 2
-
+## global variables
 # maximum genomic coordinace
-cdef int MAX_POS = 2 << 29
+cdef int  MAX_POS = 2 << 29
 
 # valid types for SAM headers
 VALID_HEADER_TYPES = {"HD" : dict,
@@ -98,7 +92,7 @@ VALID_HEADERS = ("HD", "SQ", "RG", "PG", "CO")
 
 # default type conversions within SAM header records
 KNOWN_HEADER_FIELDS = {"HD" : {"VN" : str, "SO" : str, "GO" : str},
-                       "SQ" : {"SN" : str, "LN" : int, "AS" : str, 
+                       "SQ" : {"SN" : str, "LN" : int, "AS" : str,
                                "M5" : str, "SP" : str, "UR" : str,
                                "AH" : str,},
                        "RG" : {"ID" : str, "CN" : str, "DS" : str,
@@ -106,7 +100,7 @@ KNOWN_HEADER_FIELDS = {"HD" : {"VN" : str, "SO" : str, "GO" : str},
                                "LB" : str, "PG" : str, "PI" : str,
                                "PL" : str, "PM" : str, "PU" : str,
                                "SM" : str,},
-                       "PG" : {"ID" : str, "PN" : str, "CL" : str, 
+                       "PG" : {"ID" : str, "PN" : str, "CL" : str,
                                "PP" : str, "DS" : str, "VN" : str,},}
 
 # output order of fields within records. Ensure that CL is at
@@ -147,20 +141,15 @@ def build_header_line(fields, record):
 
     return "\t".join(line)
 
-cdef bam_hdr_t * build_header(new_header):
+cdef bam_hdr_t * build_header_from_dict(new_header):
     '''return a new header built from a dictionary in `new_header`.
 
     This method inserts the text field, target_name and target_len.
     '''
-
-    lines = []
-
-    # check if hash exists
+    cdef list lines = []
 
     # create new header and copy old data
-    cdef bam_hdr_t * dest
-
-    dest = bam_hdr_init()
+    cdef bam_hdr_t * dest = bam_hdr_init()
 
     # first: defined tags
     for record in VALID_HEADERS:
@@ -219,13 +208,63 @@ cdef bam_hdr_t * build_header(new_header):
     return dest
 
 
+cdef bam_hdr_t * build_header_from_list(reference_names,
+                                        reference_lengths,
+                                        add_sq_text=True,
+                                        text=None):
+
+    assert len(reference_names) == len(reference_lengths), \
+        "unequal names and lengths of reference sequences"
+
+    cdef bam_hdr_t * dest = bam_hdr_init()
+
+    # allocate and fill header
+    reference_names = [force_bytes(ref) for ref in reference_names]
+    dest.n_targets = len(reference_names)
+    n = 0
+    for x in reference_names:
+        n += len(x) + 1
+    dest.target_name = <char**>calloc(n, sizeof(char*))
+    dest.target_len = <uint32_t*>calloc(n, sizeof(uint32_t))
+    for x from 0 <= x < dest.n_targets:
+        dest.target_len[x] = reference_lengths[x]
+        name = reference_names[x]
+        dest.target_name[x] = <char*>calloc(
+            len(name) + 1, sizeof(char))
+        strncpy(dest.target_name[x], name, len(name))
+
+    # Optionally, if there is no text, add a SAM
+    # compatible header to output file.
+    if text is None and add_sq_text:
+        text = []
+        for x from 0 <= x < dest.n_targets:
+            text.append("@SQ\tSN:%s\tLN:%s\n" % \
+                        (force_str(reference_names[x]),
+                         reference_lengths[x]))
+        text = ''.join(text)
+
+    cdef char * ctext = NULL
+
+    if text is not None:
+        # copy without \0
+        text = force_bytes(text)
+        ctext = text
+        dest.l_text = strlen(ctext)
+        dest.text = <char*>calloc(
+            strlen(ctext), sizeof(char))
+        memcpy(dest.text, ctext, strlen(ctext))
+
+    return dest
+
+
 cdef class AlignmentFile(HTSFile):
     """AlignmentFile(filepath_or_object, mode=None, template=None,
     reference_names=None, reference_lengths=None, text=NULL,
     header=None, add_sq_text=False, check_header=True, check_sq=True,
-    reference_filename=None, filename=None, duplicate_filehandle=True)
+    reference_filename=None, filename=None, duplicate_filehandle=True,
+    ignore_truncation=False)
 
-    A :term:`SAM`/:term:`BAM` formatted file. 
+    A :term:`SAM`/:term:`BAM`/:term:`CRAM` formatted file.
 
     If `filepath_or_object` is a string, the file is automatically
     opened. If `filepath_or_object` is a python File object, the
@@ -245,7 +284,7 @@ cdef class AlignmentFile(HTSFile):
            :class:`~pysam.AlignmentFile`).
 
         2. If `header` is given, the header is built from a
-           multi-level dictionary. 
+           multi-level dictionary.
 
         3. If `text` is given, new header text is copied from raw
            text.
@@ -297,20 +336,27 @@ cdef class AlignmentFile(HTSFile):
         when writing, use the string provided as the header
 
     reference_names : list
-        see referece_lengths
+        see reference_lengths
 
     reference_lengths : list
-        when writing, build header from list of chromosome names and
-        lengths.  By default, 'SQ' and 'LN' tags will be added to the
-        header text. This option can be changed by unsetting the flag
-        `add_sq_text`.
+        when writing or opening a SAM file without header build header
+        from list of chromosome names and lengths.  By default, 'SQ'
+        and 'LN' tags will be added to the header text. This option
+        can be changed by unsetting the flag `add_sq_text`.
 
     add_sq_text : bool
         do not add 'SQ' and 'LN' tags to header. This option permits
         construction :term:`SAM` formatted files without a header.
 
+    add_sam_header : bool
+        when outputting SAM the default is to output a header. This is
+        equivalent to opening the file in 'wh' mode. If this option is
+        set to False, no header will be output. To read such a file,
+        set `check_header=False`.
+
     check_header : bool
-        when reading, check if header is present (default=True)
+        obsolete: when reading a SAM file, check if header is present
+        (default=True)
 
     check_sq : bool
         when reading, check if SQ entries are present in header
@@ -326,7 +372,7 @@ cdef class AlignmentFile(HTSFile):
         Alternative to filepath_or_object. Filename of the file
         to be opened.
 
-    duplicate_filehandle: bool 
+    duplicate_filehandle: bool
         By default, file handles passed either directly or through
         File-like objects will be duplicated before passing them to
         htslib. The duplication prevents issues where the same stream
@@ -334,6 +380,10 @@ cdef class AlignmentFile(HTSFile):
         high-level python object. Set to False to turn off
         duplication.
 
+    ignore_truncation: bool
+        Issue a warning, instead of raising an error if the current file
+        appears to be truncated due to a missing EOF marker.  Only applies
+        to bgzipped formats. (Default=False)
     """
 
     def __cinit__(self, *args, **kwargs):
@@ -393,16 +443,19 @@ cdef class AlignmentFile(HTSFile):
               header=None,
               port=None,
               add_sq_text=True,
+              add_sam_header=True,
               check_header=True,
               check_sq=True,
               filepath_index=None,
               referencenames=None,
               referencelengths=None,
-              duplicate_filehandle=True):
+              duplicate_filehandle=True,
+              ignore_truncation=False):
         '''open a sam, bam or cram formatted file.
 
         If _open is called on an existing file, the current file
         will be closed and a new file will be opened.
+
         '''
         cdef char *cfilename = NULL
         cdef char *creference_filename = NULL
@@ -423,6 +476,9 @@ cdef class AlignmentFile(HTSFile):
         if mode is None:
             mode = "r"
 
+        if add_sam_header and mode == "w":
+            mode = "wh"
+
         assert mode in ("r", "w", "rb", "wb", "wh",
                         "wbu", "rU", "wb0",
                         "rc", "wc"), \
@@ -468,10 +524,6 @@ cdef class AlignmentFile(HTSFile):
         self.reference_filename = reference_filename = encode_filename(
             reference_filename)
 
-        cdef char * ctext
-        cdef hFILE * fp
-        ctext = NULL
-
         if mode[0] == 'w':
             # open file for writing
 
@@ -479,50 +531,18 @@ cdef class AlignmentFile(HTSFile):
             if template:
                 self.header = bam_hdr_dup(template.header)
             elif header:
-                self.header = build_header(header)
+                self.header = build_header_from_dict(header)
             else:
-                # build header from a target names and lengths
                 assert reference_names and reference_lengths, \
                     ("either supply options `template`, `header` "
                      "or  both `reference_names` and `reference_lengths` "
                      "for writing")
-                assert len(reference_names) == len(reference_lengths), \
-                    "unequal names and lengths of reference sequences"
-
-                # allocate and fill header
-                reference_names = [force_bytes(ref) for ref in reference_names]
-                self.header = bam_hdr_init()
-                self.header.n_targets = len(reference_names)
-                n = 0
-                for x in reference_names:
-                    n += len(x) + 1
-                self.header.target_name = <char**>calloc(n, sizeof(char*))
-                self.header.target_len = <uint32_t*>calloc(n, sizeof(uint32_t))
-                for x from 0 <= x < self.header.n_targets:
-                    self.header.target_len[x] = reference_lengths[x]
-                    name = reference_names[x]
-                    self.header.target_name[x] = <char*>calloc(
-                        len(name) + 1, sizeof(char))
-                    strncpy(self.header.target_name[x], name, len(name))
-
-                # Optionally, if there is no text, add a SAM
-                # compatible header to output file.
-                if text is None and add_sq_text:
-                    text = []
-                    for x from 0 <= x < self.header.n_targets:
-                        text.append("@SQ\tSN:%s\tLN:%s\n" % \
-                                    (force_str(reference_names[x]), 
-                                     reference_lengths[x]))
-                    text = ''.join(text)
-
-                if text is not None:
-                    # copy without \0
-                    text = force_bytes(text)
-                    ctext = text
-                    self.header.l_text = strlen(ctext)
-                    self.header.text = <char*>calloc(
-                        strlen(ctext), sizeof(char))
-                    memcpy(self.header.text, ctext, strlen(ctext))
+                # build header from a target names and lengths
+                self.header = build_header_from_list(
+                    reference_names,
+                    reference_lengths,
+                    add_sq_text=add_sq_text,
+                    text=text)
 
             self.htsfile = self._open_htsfile()
 
@@ -542,7 +562,7 @@ cdef class AlignmentFile(HTSFile):
             # open file for reading
             if not self._exists():
                 raise IOError("file `%s` not found" % self.filename)
-                
+
             self.htsfile = self._open_htsfile()
 
             if self.htsfile == NULL:
@@ -553,6 +573,8 @@ cdef class AlignmentFile(HTSFile):
             if self.htsfile.format.category != sequence_data:
                 raise ValueError("file does not contain alignment data")
 
+            self.check_truncation(ignore_truncation)
+
             # bam files require a valid header
             if self.is_bam or self.is_cram:
                 with nogil:
@@ -562,16 +584,21 @@ cdef class AlignmentFile(HTSFile):
                         "file does not have valid header (mode='%s') "
                         "- is it BAM format?" % mode )
             else:
-                # in sam files it is optional (htsfile full of
-                # unmapped reads)
-                if check_header:
+                # in sam files a header is optional, but requires
+                # reference names and lengths
+                if reference_names and reference_lengths:
+                    self.header = build_header_from_list(
+                        reference_names,
+                        reference_lengths,
+                        add_sq_text=add_sq_text,
+                        text=text)
+                else:
                     with nogil:
                         self.header = sam_hdr_read(self.htsfile)
                     if self.header == NULL:
                         raise ValueError(
-                            "file does not have valid header (mode='%s') "
-                            "- is it SAM format?" % mode )
-                    # self.header.ignore_sam_err = True
+                            "file does not have valid header (mode='%s'), "
+                            "please provide reference_names and reference_lengths")
 
             # set filename with reference sequences
             if self.is_cram and reference_filename:
@@ -669,7 +696,7 @@ cdef class AlignmentFile(HTSFile):
         if not self.is_open:
             raise ValueError("I/O operation on closed file")
         if not 0 <= tid < self.header.n_targets:
-            raise ValueError("reference_id %i out of range 0<=tid<%i" % 
+            raise ValueError("reference_id %i out of range 0<=tid<%i" %
                              (tid, self.header.n_targets))
         return charptr_to_str(self.header.target_name[tid])
 
@@ -686,7 +713,7 @@ cdef class AlignmentFile(HTSFile):
 
         Alternatively, a samtools :term:`region` string can be
         supplied.
-        
+
         If any of the coordinates are missing they will be replaced by the
         minimum (`start`) or maximum (`end`) coordinate.
 
@@ -695,14 +722,14 @@ cdef class AlignmentFile(HTSFile):
 
         Returns
         -------
-        
+
         tuple :  a tuple of `flag`, :term:`tid`, `start` and `end`. The
         flag indicates whether no coordinates were supplied and the
         genomic region is the complete genomic space.
 
         Raises
         ------
-        
+
         ValueError
            for invalid or out of bounds regions.
 
@@ -711,6 +738,9 @@ cdef class AlignmentFile(HTSFile):
         cdef long long rstart
         cdef long long rend
 
+        if reference is None and tid is None and region is None:
+            return 0, 0, 0, 0
+
         rtid = -1
         rstart = 0
         rend = MAX_POS
@@ -735,11 +765,11 @@ cdef class AlignmentFile(HTSFile):
             if len(parts) >= 3:
                 rend = int(parts[2])
 
-        if not reference:
-            return 0, 0, 0, 0
-
         if tid is not None:
             rtid = tid
+            if rtid < 0 or rtid >= self.header.n_targets:
+                raise IndexError("invalid reference, {} out of range 0-{}".format(
+                        rtid, self.header.n_targets))
         else:
             rtid = self.gettid(reference)
 
@@ -764,7 +794,7 @@ cdef class AlignmentFile(HTSFile):
               tid=None,
               until_eof=False,
               multiple_iterators=False):
-        """fetch reads aligned in a :term:`region`. 
+        """fetch reads aligned in a :term:`region`.
 
         See :meth:`AlignmentFile.parse_region` for more information
         on genomic regions.
@@ -789,7 +819,7 @@ cdef class AlignmentFile(HTSFile):
 
         Parameters
         ----------
-        
+
         until_eof : bool
 
            If `until_eof` is True, all reads from the current file
@@ -797,7 +827,7 @@ cdef class AlignmentFile(HTSFile):
            file. Using this option will also fetch unmapped reads.
 
         multiple_iterators : bool
-           
+
            If `multiple_iterators` is True, multiple
            iterators on the same file can be used at the same time. The
            iterator returned will receive its own copy of a filehandle to
@@ -841,7 +871,7 @@ cdef class AlignmentFile(HTSFile):
 
             if has_coord:
                 return IteratorRowRegion(
-                    self, rtid, rstart, rend, 
+                    self, rtid, rstart, rend,
                     multiple_iterators=multiple_iterators)
             else:
                 if until_eof:
@@ -857,22 +887,17 @@ cdef class AlignmentFile(HTSFile):
         else:
             if has_coord:
                 raise ValueError(
-                    "fetching by region is not available for sam files")
+                    "fetching by region is not available for SAM files")
 
-            if self.header == NULL:
+            if multiple_iterators == True:
                 raise ValueError(
-                    "fetch called for htsfile without header")
+                    "multiple iterators not implemented for SAM files")
 
-            # check if targets are defined
-            # give warning, sam_read1 segfaults
-            if self.header.n_targets == 0:
-                warnings.warn("fetch called for htsfile without header")
-                
             return IteratorRowAll(self,
                                   multiple_iterators=multiple_iterators)
 
     def head(self, n, multiple_iterators=True):
-        '''return an iterator over the first n alignments. 
+        '''return an iterator over the first n alignments.
 
         This iterator is is useful for inspecting the bam-file.
 
@@ -880,15 +905,15 @@ cdef class AlignmentFile(HTSFile):
         ----------
 
         multiple_iterators : bool
-        
+
             is set to True by default in order to
             avoid changing the current file position.
-        
+
         Returns
         -------
-        
+
         an iterator over a collection of reads
-        
+
         '''
         return IteratorRowHead(self, n,
                                multiple_iterators=multiple_iterators)
@@ -903,14 +928,14 @@ cdef class AlignmentFile(HTSFile):
             not re-opened the file.
 
         .. note::
-  
+
            This method is too slow for high-throughput processing.
            If a read needs to be processed with its mate, work
            from a read name sorted file or, better, cache reads.
 
         Returns
         -------
-        
+
         :class:`~pysam.AlignedSegment` : the mate
 
         Raises
@@ -1061,7 +1086,7 @@ cdef class AlignmentFile(HTSFile):
 
         Parameters
         ----------
-        
+
         reference : string
             reference_name of the genomic region (chromosome)
 
@@ -1070,12 +1095,12 @@ cdef class AlignmentFile(HTSFile):
 
         end : int
             end of the genomic region
-        
+
         region : string
             a region string in samtools format.
 
         until_eof : bool
-            count until the end of the file, possibly including 
+            count until the end of the file, possibly including
             unmapped reads as well.
 
         read_callback: string or function
@@ -1135,7 +1160,7 @@ cdef class AlignmentFile(HTSFile):
         return counter
 
     @cython.boundscheck(False)  # we do manual bounds checking
-    def count_coverage(self, 
+    def count_coverage(self,
                        reference=None,
                        start=None,
                        end=None,
@@ -1150,7 +1175,7 @@ cdef class AlignmentFile(HTSFile):
 
         Parameters
         ----------
-        
+
         reference : string
             reference_name of the genomic region (chromosome)
 
@@ -1165,7 +1190,7 @@ cdef class AlignmentFile(HTSFile):
 
         quality_threshold : int
             quality_threshold is the minimum quality score (in phred) a
-            base has to reach to be counted. 
+            base has to reach to be counted.
 
         read_callback: string or function
 
@@ -1196,7 +1221,7 @@ cdef class AlignmentFile(HTSFile):
         four array.arrays of the same length in order A C G T : tuple
 
         """
-        
+
         cdef int _start = start
         cdef int _stop = end
         cdef int length = _stop - _start
@@ -1221,7 +1246,7 @@ cdef class AlignmentFile(HTSFile):
             filter_method = 1
         elif read_callback == "nofilter":
             filter_method = 2
-    
+
         cdef int _threshold = quality_threshold
         for read in self.fetch(reference=reference,
                                start=start,
@@ -1283,16 +1308,22 @@ cdef class AlignmentFile(HTSFile):
         return res
 
     def close(self):
-        '''
-        closes the :class:`pysam.AlignmentFile`.'''
+        '''closes the :class:`pysam.AlignmentFile`.'''
 
         if self.htsfile == NULL:
             return
 
         cdef int ret = hts_close(self.htsfile)
-        hts_idx_destroy(self.index)
         self.htsfile = NULL
 
+        if self.index != NULL:
+            hts_idx_destroy(self.index)
+            self.index = NULL
+
+        if self.header != NULL:
+            bam_hdr_destroy(self.header)
+            self.header = NULL
+
         if ret < 0:
             global errno
             if errno == EPIPE:
@@ -1301,28 +1332,23 @@ cdef class AlignmentFile(HTSFile):
                 raise OSError(errno, force_str(strerror(errno)))
 
     def __dealloc__(self):
-        # remember: dealloc cannot call other methods
-        # note: no doc string
-        # note: __del__ is not called.
-
-        # FIXME[kbj]: isn't self.close a method?  I've been duplicating
-        # close within __dealloc__ (see BCFFile.__dealloc__).  Not a pretty
-        # solution and perhaps unnecessary given that calling self.close has
-        # been working for years.
-        # AH: I have removed the call to close. Even though it is working,
-        # it seems to be dangerous according to the documentation as the
-        # object be partially deconstructed already.
         cdef int ret = 0
 
         if self.htsfile != NULL:
             ret = hts_close(self.htsfile)
-            hts_idx_destroy(self.index);
             self.htsfile = NULL
 
-        bam_destroy1(self.b)
+        if self.index != NULL:
+            hts_idx_destroy(self.index)
+            self.index = NULL
+
         if self.header != NULL:
             bam_hdr_destroy(self.header)
+            self.header = NULL
 
+        if self.b:
+            bam_destroy1(self.b)
+            self.b = NULL
 
         if ret < 0:
             global errno
@@ -1330,7 +1356,7 @@ cdef class AlignmentFile(HTSFile):
                 errno = 0
             else:
                 raise OSError(errno, force_str(strerror(errno)))
-            
+
     cpdef int write(self, AlignedSegment read) except -1:
         '''
         write a single :class:`pysam.AlignedSegment` to disk.
@@ -1342,7 +1368,7 @@ cdef class AlignmentFile(HTSFile):
 
         Returns
         -------
-            
+
         int : the number of bytes written. If the file is closed,
               this will be 0.
         '''
@@ -1387,7 +1413,7 @@ cdef class AlignmentFile(HTSFile):
             return self.header.n_targets
 
     property references:
-        """tuple with the names of :term:`reference` sequences. This is a 
+        """tuple with the names of :term:`reference` sequences. This is a
         read-only attribute"""
         def __get__(self):
             if not self.is_open: raise ValueError( "I/O operation on closed file" )
@@ -1455,10 +1481,10 @@ cdef class AlignmentFile(HTSFile):
 
     property text:
         '''string with the full contents of the :term:`sam file` header as a
-        string. 
+        string.
 
         This is a read-only attribute.
-        
+
         See :attr:`pysam.AlignmentFile.header` to get a parsed
         representation of the header.
         '''
@@ -1468,13 +1494,13 @@ cdef class AlignmentFile(HTSFile):
             return from_string_and_size(self.header.text, self.header.l_text)
 
     property header:
-        """two-level dictionay with header information from the file. 
-        
+        """two-level dictionay with header information from the file.
+
         This is a read-only attribute.
 
         The first level contains the record (``HD``, ``SQ``, etc) and
         the second level contains the fields (``VN``, ``LN``, etc).
-        
+
         The parser is validating and will raise an AssertionError if
         if encounters any record or field tags that are not part of
         the SAM specification. Use the
@@ -1494,7 +1520,7 @@ cdef class AlignmentFile(HTSFile):
                 raise ValueError( "I/O operation on closed file" )
 
             result = {}
-            
+
             if self.header.text != NULL:
                 # convert to python string (note: call self.text to
                 # create 0-terminated string)
@@ -1518,7 +1544,7 @@ cdef class AlignmentFile(HTSFile):
                     x = {}
 
                     for idx, field in enumerate(fields[1:]):
-                        if ":" not in field: 
+                        if ":" not in field:
                             raise ValueError("malformatted header: no ':' in field" )
                         key, value = field.split(":", 1)
                         if key in ("CL",):
@@ -1576,7 +1602,7 @@ cdef class AlignmentFile(HTSFile):
                 "can not iterate over samfile without header")
         return self
 
-    cdef bam1_t * getCurrent( self ):
+    cdef bam1_t * getCurrent(self):
         return self.b
 
     cdef int cnext(self):
@@ -1598,12 +1624,12 @@ cdef class AlignmentFile(HTSFile):
             raise IOError('truncated file')
         else:
             raise StopIteration
-            
+
     # Compatibility functions for pysam < 0.8.3
     def gettid(self, reference):
         """deprecated, use get_tid() instead"""
         return self.get_tid(reference)
-        
+
     def getrname(self, tid):
         """deprecated, use get_reference_name() instead"""
         return self.get_reference_name(tid)
@@ -1637,7 +1663,7 @@ cdef class IteratorRow:
     def __init__(self, AlignmentFile samfile, int multiple_iterators=False):
         cdef char *cfilename
         cdef char *creference_filename
-        
+
         if not samfile.is_open:
             raise ValueError("I/O operation on closed file")
 
@@ -1711,7 +1737,7 @@ cdef class IteratorRowRegion(IteratorRow):
                 tid,
                 beg,
                 end)
-    
+
     def __iter__(self):
         return self
 
@@ -1766,7 +1792,7 @@ cdef class IteratorRowHead(IteratorRow):
     def __iter__(self):
         return self
 
-    cdef bam1_t * getCurrent( self ):
+    cdef bam1_t * getCurrent(self):
         return self.b
 
     cdef int cnext(self):
@@ -1814,7 +1840,7 @@ cdef class IteratorRowAll(IteratorRow):
     def __iter__(self):
         return self
 
-    cdef bam1_t * getCurrent( self ):
+    cdef bam1_t * getCurrent(self):
         return self.b
 
     cdef int cnext(self):
@@ -1988,7 +2014,7 @@ cdef int __advance_snpcalls(void * data, bam1_t * b):
     the samtools pileup.
     '''
 
-    # Note that this method requries acces to some 
+    # Note that this method requries acces to some
     # functions in the samtools code base and is thus
     # not htslib only.
     # The functions accessed in samtools are:
@@ -2029,11 +2055,13 @@ cdef int __advance_snpcalls(void * data, bam1_t * b):
         skip = 0
 
         # realign read - changes base qualities
-        if d.seq != NULL and is_cns and not is_nobaq: 
-            bam_prob_realn(b, d.seq)
+        if d.seq != NULL and is_cns and not is_nobaq:
+            # flag:
+            # apply_baq = flag&1, extend_baq = flag>>1&1, redo_baq = flag&4;
+            sam_prob_realn(b, d.seq, d.seq_len, 0)
 
         if d.seq != NULL and capQ_thres > 10:
-            q = bam_cap_mapQ(b, d.seq, capQ_thres)
+            q = sam_cap_mapq(b, d.seq, d.seq_len, capQ_thres)
             if q < 0:
                 skip = 1
             elif b.core.qual > q:
@@ -2089,7 +2117,7 @@ cdef class IteratorColumn:
        Valid values are None, "all" (default), "nofilter" or "samtools".
 
        See AlignmentFile.pileup for description.
-    
+
     fastafile
        A :class:`~pysam.FastaFile` object
 
@@ -2271,7 +2299,7 @@ cdef class IteratorColumnRegion(IteratorColumn):
 
             if self.plp == NULL:
                 raise StopIteration
-            
+
             if self.truncate:
                 if self.start > self.pos: continue
                 if self.pos >= self.end: raise StopIteration
@@ -2313,7 +2341,7 @@ cdef class IteratorColumnAllRefs(IteratorColumn):
                                         self.pos,
                                         self.n_plp,
                                         self.samfile)
-                
+
             # otherwise, proceed to next reference or stop
             self.tid += 1
             if self.tid < self.samfile.nreferences:
@@ -2465,7 +2493,7 @@ cdef class IndexedReads:
 
         Raises
         ------
-        
+
         KeyError
             if the `query_name` is not in the index.
 
diff --git a/pysam/libcbcf.pxd b/pysam/libcbcf.pxd
index fc7f56c..1d4129b 100644
--- a/pysam/libcbcf.pxd
+++ b/pysam/libcbcf.pxd
@@ -38,45 +38,44 @@ from pysam.libchtslib cimport *
 cdef class VariantHeader(object):
     cdef bcf_hdr_t *ptr
 
-    cpdef VariantRecord new_record(self)
     cdef _subset_samples(self, include_samples)
 
 
 cdef class VariantHeaderRecord(object):
-    cdef VariantHeader header
+    cdef readonly VariantHeader header
     cdef bcf_hrec_t *ptr
 
 
 cdef class VariantHeaderRecords(object):
-    cdef VariantHeader header
+    cdef readonly VariantHeader header
 
 
 cdef class VariantHeaderContigs(object):
-    cdef VariantHeader header
+    cdef readonly VariantHeader header
 
 
 cdef class VariantHeaderSamples(object):
-    cdef VariantHeader header
+    cdef readonly VariantHeader header
 
 
 cdef class VariantContig(object):
-    cdef VariantHeader header
+    cdef readonly VariantHeader header
     cdef int id
 
 
 cdef class VariantMetadata(object):
-    cdef VariantHeader header
+    cdef readonly VariantHeader header
     cdef int type
     cdef int id
 
 
 cdef class VariantHeaderMetadata(object):
-    cdef VariantHeader header
+    cdef readonly VariantHeader header
     cdef int32_t type
 
 
 cdef class VariantRecord(object):
-    cdef VariantHeader header
+    cdef readonly VariantHeader header
     cdef bcf1_t *ptr
 
 
@@ -107,7 +106,7 @@ cdef class BaseIndex(object):
 
 
 cdef class BCFIndex(BaseIndex):
-    cdef VariantHeader header
+    cdef readonly VariantHeader header
     cdef hts_idx_t *ptr
 
 
@@ -139,6 +138,4 @@ cdef class VariantFile(HTSFile):
     cdef readonly bint       is_reading     # true if file has begun reading records
     cdef readonly bint       header_written # true if header has already been written
 
-    cpdef VariantRecord new_record(self)
-
     cpdef int write(self, VariantRecord record) except -1
diff --git a/pysam/libcbcf.pyx b/pysam/libcbcf.pyx
index 8f40451..9413e70 100644
--- a/pysam/libcbcf.pyx
+++ b/pysam/libcbcf.pyx
@@ -101,9 +101,6 @@ from cpython.version cimport PY_MAJOR_VERSION
 from pysam.libchtslib cimport HTSFile, hisremote
 
 
-from warnings         import warn
-
-
 __all__ = ['VariantFile',
            'VariantHeader',
            'VariantHeaderRecord',
@@ -131,6 +128,13 @@ from pysam.libcutils cimport encode_filename, from_string_and_size
 
 ########################################################################
 ########################################################################
+## Sentinel object
+########################################################################
+
+cdef object _nothing = object()
+
+########################################################################
+########################################################################
 ## VCF/BCF string intern system
 ########################################################################
 
@@ -156,6 +160,55 @@ cdef inline bcf_str_cache_get_charptr(const char* s):
 
 ########################################################################
 ########################################################################
+## Genotype math
+########################################################################
+
+cdef int comb(int n, int k) except -1:
+    """Return binomial coeffient: n choose k
+
+    >>> comb(5, 1)
+    5
+    >>> comb(5, 2)
+    10
+    >>> comb(2, 2)
+    1
+    >>> comb(100, 2)
+    4950
+    """
+    if k > n:
+        return 0
+    elif k == n:
+        return 1
+    elif k > n // 2:
+        k = n - k
+
+    cdef d, result
+
+    d = result = n - k + 1
+    for i in range(2, k + 1):
+        d += 1
+        result  *= d
+        result //= i
+    return result
+
+
+cdef inline int bcf_geno_combinations(int ploidy, int alleles) except -1:
+    """Return the count of genotypes expected for the given ploidy and number of alleles.
+
+    >>> bcf_geno_combinations(1, 2)
+    2
+    >>> bcf_geno_combinations(2, 2)
+    3
+    >>> bcf_geno_combinations(2, 3)
+    6
+    >>> bcf_geno_combinations(3, 2)
+    4
+    """
+    return comb(alleles + ploidy - 1, ploidy)
+
+
+########################################################################
+########################################################################
 ## Low level type conversion helpers
 ########################################################################
 
@@ -165,7 +218,32 @@ cdef inline bint check_header_id(bcf_hdr_t *hdr, int hl_type, int id):
 
 
 cdef inline int is_gt_fmt(bcf_hdr_t *hdr, int fmt_id):
-    return strcmp(bcf_hdr_int2id(hdr, BCF_DT_ID, fmt_id), "GT") == 0
+    return strcmp(bcf_hdr_int2id(hdr, BCF_DT_ID, fmt_id), 'GT') == 0
+
+
+cdef inline int bcf_genotype_count(bcf_hdr_t *hdr, bcf1_t *rec, int sample) except -1:
+    if sample < 0:
+        raise ValueError('genotype is only valid as a format field')
+
+    cdef int32_t *gt_arr = NULL
+    cdef int ngt = 0
+    ngt = bcf_get_genotypes(hdr, rec, &gt_arr, &ngt)
+
+    if ngt <= 0 or not gt_arr:
+        return 0
+
+    assert ngt % rec.n_sample == 0
+    cdef int max_ploidy = ngt // rec.n_sample
+    cdef int32_t *gt = gt_arr + sample * max_ploidy
+    cdef int ploidy = 0
+
+    while ploidy < max_ploidy and gt[0] != bcf_int32_vector_end:
+        gt += 1
+        ploidy += 1
+
+    free(<void*>gt_arr)
+
+    return bcf_geno_combinations(ploidy, rec.n_allele)
 
 
 cdef tuple char_array_to_tuple(const char **a, ssize_t n, int free_after=0):
@@ -185,19 +263,25 @@ cdef bcf_array_to_object(void *data, int type, ssize_t n, ssize_t count, int sca
     cdef int32_t *data32
     cdef float   *dataf
     cdef int      i
+    cdef bytes    b
 
     if not data or n <= 0:
         return None
 
     if type == BCF_BT_CHAR:
         datac = <char *>data
-        while n and datac[n-1] == bcf_str_vector_end:
-            n -= 1
-        value = charptr_to_str_w_len(datac, n) if datac[0] != bcf_str_missing else None
-        # FIXME: Need to know length?  Report errors?  Pad with missing values?  Not clear what to do.
 
-        value = tuple(v or None for v in value.split(',')) if value else ()
-        # FIXME: Need to know length?  Report errors?  Pad with missing values?  Not clear what to do.
+        if not n:
+            value = ()
+        else:
+            # Check if at least one null terminator is present
+            if datac[n-1] == bcf_str_vector_end:
+                # If so, create a string up to the first null terminator
+                b = datac
+            else:
+                # Otherwise, copy the entire block
+                b = datac[:n]
+            value = tuple(v.decode('ascii') if v and v != bcf_str_missing else None for v in b.split(b','))
     else:
         value = []
         if type == BCF_BT_INT8:
@@ -251,13 +335,13 @@ cdef bcf_object_to_array(values, void *data, int bt_type, ssize_t n, int vlen):
     cdef float   *dataf
     cdef ssize_t  i, value_count = len(values)
 
-    assert(value_count <= n)
+    assert value_count <= n
 
     if bt_type == BCF_BT_CHAR:
         if not isinstance(values, (str, bytes)):
-            values = b','.join(force_bytes(v) if v is not None else b'' for v in values)
+            values = b','.join(force_bytes(v) if v else bcf_str_missing for v in values)
             value_count = len(values)
-        assert(value_count <= n)
+        assert value_count <= n
         datac = <char *>data
         memcpy(datac, <char *>values, value_count)
         for i in range(value_count, n):
@@ -392,7 +476,7 @@ cdef bcf_copy_expand_array(void *src_data, int src_type, ssize_t src_values,
         raise TypeError('unsupported types')
 
 
-cdef bcf_get_value_count(VariantRecord record, int hl_type, int id, ssize_t *count, int *scalar):
+cdef bcf_get_value_count(VariantRecord record, int hl_type, int id, ssize_t *count, int *scalar, int sample):
     if record is None:
         raise ValueError('record must not be None')
 
@@ -418,7 +502,7 @@ cdef bcf_get_value_count(VariantRecord record, int hl_type, int id, ssize_t *cou
     elif length == BCF_VL_A:
         count[0] = r.n_allele - 1
     elif length == BCF_VL_G:
-        count[0] = r.n_allele * (r.n_allele + 1) // 2
+        count[0] = bcf_genotype_count(hdr, r, sample)
     elif length == BCF_VL_VAR:
         count[0] = -1
     else:
@@ -435,7 +519,7 @@ cdef object bcf_info_get_value(VariantRecord record, const bcf_info_t *z):
     cdef ssize_t count
     cdef int scalar
 
-    bcf_get_value_count(record, BCF_HL_INFO, z.key, &count, &scalar)
+    bcf_get_value_count(record, BCF_HL_INFO, z.key, &count, &scalar, -1)
 
     if z.len == 0:
         if  bcf_hdr_id2type(hdr, BCF_HL_INFO, z.key) == BCF_HT_FLAG:
@@ -466,14 +550,15 @@ cdef object bcf_info_get_value(VariantRecord record, const bcf_info_t *z):
     return value
 
 
-cdef object bcf_check_values(VariantRecord record, value, int hl_type, int ht_type,
+cdef object bcf_check_values(VariantRecord record, value, int sample,
+                             int hl_type, int ht_type,
                              int id, int bt_type, ssize_t bt_len,
                              ssize_t *value_count, int *scalar, int *realloc):
 
     if record is None:
         raise ValueError('record must not be None')
 
-    bcf_get_value_count(record, hl_type, id, value_count, scalar)
+    bcf_get_value_count(record, hl_type, id, value_count, scalar, sample)
 
     # Validate values now that we know the type and size
     values = (value,) if not isinstance(value, (list, tuple)) else value
@@ -485,11 +570,12 @@ cdef object bcf_check_values(VariantRecord record, value, int hl_type, int ht_ty
         # KBJ: htslib lies about the cardinality of GT fields-- they're really VLEN (-1)
         value_count[0] = -1
 
-    if value_count[0] != -1 and value_count[0] != len(values):
+    cdef int given = len(values)
+    if value_count[0] != -1 and value_count[0] != given:
         if scalar[0]:
-            raise TypeError('value expected to be scalar'.format(value_count[0]))
+            raise TypeError('value expected to be scalar, given len={}'.format(value_count[0], given))
         else:
-            raise TypeError('values expected to be {:d}-tuple'.format(value_count[0]))
+            raise TypeError('values expected to be {}-tuple, given len={}'.format(value_count[0], given))
 
     if ht_type == BCF_HT_REAL:
         for v in values:
@@ -572,33 +658,29 @@ cdef bcf_info_set_value(VariantRecord record, key, value):
 
     cdef bcf_hdr_t *hdr = record.header.ptr
     cdef bcf1_t *r = record.ptr
-    cdef vdict_t *d
-    cdef khiter_t k
     cdef int info_id, info_type, scalar, dst_type, realloc, vlen = 0
     cdef ssize_t i, value_count, alloc_len, alloc_size, dst_size
 
     if bcf_unpack(r, BCF_UN_INFO) < 0:
         raise ValueError('Error unpacking VariantRecord')
 
-    bkey = force_bytes(key)
+    cdef bytes bkey = force_bytes(key)
     cdef bcf_info_t *info = bcf_get_info(hdr, r, bkey)
 
     if info:
         info_id = info.key
     else:
-        d = <vdict_t *>hdr.dict[BCF_DT_ID]
-        k = kh_get_vdict(d, bkey)
+        info_id = bcf_header_get_info_id(hdr, bkey)
 
-        if k == kh_end(d) or kh_val_vdict(d, k).info[BCF_HL_INFO] & 0xF == 0xF:
-            raise KeyError('unknown INFO')
-
-        info_id = kh_val_vdict(d, k).id
+    if info_id < 0:
+        raise KeyError('unknown INFO: {}'.format(key))
 
     if not check_header_id(hdr, BCF_HL_INFO, info_id):
         raise ValueError('Invalid header')
 
     info_type = bcf_hdr_id2type(hdr, BCF_HL_INFO, info_id)
-    values = bcf_check_values(record, value, BCF_HL_INFO, info_type, info_id,
+    values = bcf_check_values(record, value, -1,
+                              BCF_HL_INFO, info_type, info_id,
                               info.type if info else -1,
                               info.len  if info else -1,
                               &value_count, &scalar, &realloc)
@@ -611,13 +693,16 @@ cdef bcf_info_set_value(VariantRecord record, key, value):
     vlen = value_count < 0
     value_count = len(values)
 
+    # DISABLED DUE TO ISSUES WITH THE CRAZY POINTERS
     # If we can, write updated values to existing allocated storage
-    if info and not realloc:
+    if 0 and info and not realloc:
         r.d.shared_dirty |= BCF1_DIRTY_INF
 
         if value_count == 0:
             info.len = 0
-            # FIXME: Check if need to free vptr if info.len > 0?
+            if not info.vptr:
+                info.vptr = <uint8_t *>&info.v1.i
+
         elif value_count == 1:
             # FIXME: Check if need to free vptr if info.len > 0?
             if info.type == BCF_BT_INT8 or info.type == BCF_BT_INT16 or info.type == BCF_BT_INT32:
@@ -626,9 +711,13 @@ cdef bcf_info_set_value(VariantRecord record, key, value):
                 bcf_object_to_array(values, &info.v1.f, BCF_BT_FLOAT, 1, vlen)
             else:
                 raise TypeError('unsupported info type code')
+
             info.len = 1
+            if not info.vptr:
+                info.vptr = <uint8_t *>&info.v1.i
         else:
             bcf_object_to_array(values, info.vptr, info.type, info.len, vlen)
+
         return
 
     alloc_len = max(1, value_count)
@@ -665,13 +754,13 @@ cdef bcf_info_del_value(VariantRecord record, key):
     if bcf_unpack(r, BCF_UN_INFO) < 0:
         raise ValueError('Error unpacking VariantRecord')
 
-    bkey = force_bytes(key)
+    cdef bytes bkey = force_bytes(key)
     cdef bcf_info_t *info = bcf_get_info(hdr, r, bkey)
 
     if not info:
         raise KeyError(key)
 
-    bcf_get_value_count(record, BCF_HL_INFO, info.key, &value_count, &scalar)
+    bcf_get_value_count(record, BCF_HL_INFO, info.key, &value_count, &scalar, -1)
 
     if value_count <= 0:
         null_value = ()
@@ -695,16 +784,16 @@ cdef bcf_format_get_value(VariantRecordSample sample, key):
     if bcf_unpack(r, BCF_UN_ALL) < 0:
         raise ValueError('Error unpacking VariantRecord')
 
-    bkey = force_bytes(key)
+    cdef bytes bkey = force_bytes(key)
     cdef bcf_fmt_t *fmt = bcf_get_fmt(hdr, r, bkey)
 
     if not fmt or not fmt.p:
-        raise KeyError('invalid FORMAT')
+        raise KeyError('invalid FORMAT: {}'.format(key))
 
     if is_gt_fmt(hdr, fmt.id):
         return bcf_format_get_allele_indices(sample)
 
-    bcf_get_value_count(sample.record, BCF_HL_FMT, fmt.id, &count, &scalar)
+    bcf_get_value_count(sample.record, BCF_HL_FMT, fmt.id, &count, &scalar, sample.index)
 
     if fmt.p and fmt.n and fmt.size:
         return bcf_array_to_object(fmt.p + sample.index * fmt.size, fmt.type, fmt.n, count, scalar)
@@ -720,6 +809,10 @@ cdef bcf_format_set_value(VariantRecordSample sample, key, value):
     if sample is None:
         raise ValueError('sample must not be None')
 
+    if key == 'phased':
+        sample.phased = bool(value)
+        return
+
     cdef bcf_hdr_t *hdr = sample.record.header.ptr
     cdef bcf1_t *r = sample.record.ptr
     cdef int fmt_id
@@ -731,7 +824,7 @@ cdef bcf_format_set_value(VariantRecordSample sample, key, value):
     if bcf_unpack(r, BCF_UN_ALL) < 0:
         raise ValueError('Error unpacking VariantRecord')
 
-    bkey = force_bytes(key)
+    cdef bytes bkey = force_bytes(key)
     cdef bcf_fmt_t *fmt = bcf_get_fmt(hdr, r, bkey)
 
     if fmt:
@@ -741,7 +834,7 @@ cdef bcf_format_set_value(VariantRecordSample sample, key, value):
         k = kh_get_vdict(d, bkey)
 
         if k == kh_end(d) or kh_val_vdict(d, k).info[BCF_HL_FMT] & 0xF == 0xF:
-            raise KeyError('unknown format')
+            raise KeyError('unknown format: {}'.format(key))
 
         fmt_id = kh_val_vdict(d, k).id
 
@@ -758,7 +851,8 @@ cdef bcf_format_set_value(VariantRecordSample sample, key, value):
         # KBJ: GT field is considered to be a string by the VCF header but BCF represents it as INT.
         fmt_type = BCF_HT_INT
 
-    values = bcf_check_values(sample.record, value, BCF_HL_FMT, fmt_type, fmt_id,
+    values = bcf_check_values(sample.record, value, sample.index,
+                              BCF_HL_FMT, fmt_type, fmt_id,
                               fmt.type if fmt else -1,
                               fmt.n    if fmt else -1,
                               &value_count, &scalar, &realloc)
@@ -776,7 +870,7 @@ cdef bcf_format_set_value(VariantRecordSample sample, key, value):
     if fmt and fmt.n > alloc_len:
         alloc_len = fmt.n
 
-    n = bcf_hdr_nsamples(hdr)
+    n = r.n_sample
     new_values = bcf_empty_array(fmt_type, n*alloc_len, vlen)
     cdef char *valp = <char *>new_values
 
@@ -816,13 +910,13 @@ cdef bcf_format_del_value(VariantRecordSample sample, key):
     if bcf_unpack(r, BCF_UN_ALL) < 0:
         raise ValueError('Error unpacking VariantRecord')
 
-    bkey = force_bytes(key)
+    cdef bytes bkey = force_bytes(key)
     cdef bcf_fmt_t *fmt = bcf_get_fmt(hdr, r, bkey)
 
     if not fmt or not fmt.p:
         raise KeyError(key)
 
-    bcf_get_value_count(sample.record, BCF_HL_FMT, fmt.id, &value_count, &scalar)
+    bcf_get_value_count(sample.record, BCF_HL_FMT, fmt.id, &value_count, &scalar, sample.index)
 
     if value_count <= 0:
         null_value = ()
@@ -840,7 +934,7 @@ cdef bcf_format_get_allele_indices(VariantRecordSample sample):
 
     cdef bcf_hdr_t *hdr = sample.record.header.ptr
     cdef bcf1_t *r = sample.record.ptr
-    cdef int32_t n = bcf_hdr_nsamples(hdr)
+    cdef int32_t n = r.n_sample
 
     if bcf_unpack(r, BCF_UN_ALL) < 0:
         raise ValueError('Error unpacking VariantRecord')
@@ -900,7 +994,7 @@ cdef bcf_format_get_alleles(VariantRecordSample sample):
 
     cdef bcf_hdr_t *hdr = sample.record.header.ptr
     cdef bcf1_t *r = sample.record.ptr
-    cdef int32_t nsamples = bcf_hdr_nsamples(hdr)
+    cdef int32_t nsamples = r.n_sample
 
     if bcf_unpack(r, BCF_UN_ALL) < 0:
         raise ValueError('Error unpacking VariantRecord')
@@ -951,7 +1045,7 @@ cdef bint bcf_sample_get_phased(VariantRecordSample sample):
 
     cdef bcf_hdr_t *hdr = sample.record.header.ptr
     cdef bcf1_t *r = sample.record.ptr
-    cdef int32_t n = bcf_hdr_nsamples(hdr)
+    cdef int32_t n = r.n_sample
 
     if bcf_unpack(r, BCF_UN_ALL) < 0:
         raise ValueError('Error unpacking VariantRecord')
@@ -1014,7 +1108,7 @@ cdef bcf_sample_set_phased(VariantRecordSample sample, bint phased):
 
     cdef bcf_hdr_t *hdr = sample.record.header.ptr
     cdef bcf1_t *r = sample.record.ptr
-    cdef int32_t n = bcf_hdr_nsamples(hdr)
+    cdef int32_t n = r.n_sample
 
     if bcf_unpack(r, BCF_UN_ALL) < 0:
         raise ValueError('Error unpacking VariantRecord')
@@ -1061,6 +1155,29 @@ cdef bcf_sample_set_phased(VariantRecordSample sample, bint phased):
                 data32[i] = (data32[i] & 0xFFFFFFFE) | phased
 
 
+cdef inline bcf_sync_end(VariantRecord record):
+    cdef bcf_hdr_t *hdr = record.header.ptr
+    cdef bcf_info_t *info
+    cdef int end_id = bcf_header_get_info_id(record.header.ptr, b'END')
+    cdef int ref_len = len(record.ref)
+
+    # Delete INFO/END if no alleles are present or if rlen is equal to len(ref)
+    if not record.ptr.n_allele or record.ptr.rlen == ref_len:
+        # If INFO/END is not defined in the header, it doesn't exist in the record
+        if end_id >= 0:
+            info = bcf_get_info(hdr, record.ptr, b'END')
+            if info and info.vptr:
+                if bcf_update_info(hdr, record.ptr, b'END', NULL, 0, info.type) < 0:
+                    raise ValueError('Unable to delete END')
+    else:
+        # Create END header, if not present
+        if end_id < 0:
+            record.header.info.add('END', number=1, type='Integer', description='Stop position of the interval')
+
+        # Update to reflect stop position
+        bcf_info_set_value(record, b'END', record.ptr.pos + record.ptr.rlen)
+
+
 ########################################################################
 ########################################################################
 ## Variant Header objects
@@ -1205,6 +1322,28 @@ cdef class VariantHeaderRecord(object):
         """D.values() -> list of D's values"""
         return list(self.itervalues())
 
+    def update(self, items=None, **kwargs):
+        """D.update([E, ]**F) -> None.
+
+        Update D from dict/iterable E and F.
+        """
+        for k, v in items.items():
+            self[k] = v
+
+        if kwargs:
+            for k, v in kwargs.items():
+                self[k] = v
+
+    def pop(self, key, default=_nothing):
+        try:
+            value = self[key]
+            del self[key]
+            return value
+        except KeyError:
+            if default is not _nothing:
+                return default
+            raise
+
     # Mappings are not hashable by default, but subclasses can change this
     __hash__ = None
 
@@ -1235,9 +1374,8 @@ cdef class VariantHeaderRecord(object):
         cdef bcf_hrec_t *r = self.ptr
         if not r:
             return
-        assert(r.key)
+        assert r.key
         cdef char *key = r.key if r.type == BCF_HL_GEN else r.value
-        print('Removing header type={} key={} value={} hdr={}'.format(METADATA_TYPES[r.type], r.key, r.value, key))
         bcf_hdr_remove(hdr, r.type, key)
         self.ptr = NULL
 
@@ -1358,8 +1496,8 @@ cdef class VariantMetadata(object):
 
     def remove_header(self):
         cdef bcf_hdr_t *hdr = self.header.ptr
-        cdef const char *bkey = hdr.id[BCF_DT_ID][self.id].key
-        bcf_hdr_remove(hdr, self.type, bkey)
+        cdef const char *key = hdr.id[BCF_DT_ID][self.id].key
+        bcf_hdr_remove(hdr, self.type, key)
 
 
 cdef VariantMetadata makeVariantMetadata(VariantHeader header, int type, int id):
@@ -1437,11 +1575,11 @@ cdef class VariantHeaderMetadata(object):
         cdef bcf_hdr_t *hdr = self.header.ptr
         cdef vdict_t *d = <vdict_t *>hdr.dict[BCF_DT_ID]
 
-        bkey = force_bytes(key)
+        cdef bytes bkey = force_bytes(key)
         cdef khiter_t k = kh_get_vdict(d, bkey)
 
         if k == kh_end(d) or kh_val_vdict(d, k).info[self.type] & 0xF == 0xF:
-            raise KeyError('invalid key')
+            raise KeyError('invalid key: {}'.format(key))
 
         return makeVariantMetadata(self.header, self.type, kh_val_vdict(d, k).id)
 
@@ -1449,11 +1587,11 @@ cdef class VariantHeaderMetadata(object):
         cdef bcf_hdr_t *hdr = self.header.ptr
         cdef vdict_t *d = <vdict_t *>hdr.dict[BCF_DT_ID]
 
-        bkey = force_bytes(key)
+        cdef bytes bkey = force_bytes(key)
         cdef khiter_t k = kh_get_vdict(d, bkey)
 
         if k == kh_end(d) or kh_val_vdict(d, k).info[self.type] & 0xF == 0xF:
-            raise KeyError('invalid key')
+            raise KeyError('invalid key: {}'.format(key))
 
         bcf_hdr_remove(hdr, self.type, bkey)
         #bcf_hdr_sync(hdr)
@@ -1555,7 +1693,7 @@ cdef class VariantContig(object):
         return length if length else None
 
     @property
-    def header(self):
+    def header_record(self):
         """:class:`VariantHeaderRecord` associated with this :class:`VariantContig` object"""
         cdef bcf_hdr_t *hdr = self.header.ptr
         cdef bcf_hrec_t *hrec = hdr.id[BCF_DT_CTG][self.id].val.hrec[0]
@@ -1563,8 +1701,8 @@ cdef class VariantContig(object):
 
     def remove_header(self):
         cdef bcf_hdr_t *hdr = self.header.ptr
-        cdef const char *bkey = hdr.id[BCF_DT_CTG][self.id].key
-        bcf_hdr_remove(hdr, BCF_HL_CTG, bkey)
+        cdef const char *key = hdr.id[BCF_DT_CTG][self.id].key
+        bcf_hdr_remove(hdr, BCF_HL_CTG, key)
 
 
 cdef VariantContig makeVariantContig(VariantHeader header, int id):
@@ -1607,11 +1745,11 @@ cdef class VariantHeaderContigs(object):
             return makeVariantContig(self.header, index)
 
         cdef vdict_t *d = <vdict_t *>hdr.dict[BCF_DT_CTG]
-        bkey = force_bytes(key)
+        cdef bytes bkey = force_bytes(key)
         cdef khiter_t k = kh_get_vdict(d, bkey)
 
         if k == kh_end(d):
-            raise KeyError('invalid contig')
+            raise KeyError('invalid contig: {}'.format(key))
 
         cdef int id = kh_val_vdict(d, k).id
 
@@ -1620,7 +1758,7 @@ cdef class VariantHeaderContigs(object):
     def remove_header(self, key):
         cdef bcf_hdr_t *hdr = self.header.ptr
         cdef int index
-        cdef const char *bkey
+        cdef const char *ckey
         cdef vdict_t *d
         cdef khiter_t k
 
@@ -1628,15 +1766,15 @@ cdef class VariantHeaderContigs(object):
             index = key
             if index < 0 or index >= hdr.n[BCF_DT_CTG]:
                 raise IndexError('invalid contig index')
-            bkey = hdr.id[BCF_DT_CTG][self.id].key
+            ckey = hdr.id[BCF_DT_CTG][self.id].key
         else:
             d = <vdict_t *>hdr.dict[BCF_DT_CTG]
             key = force_bytes(key)
             if kh_get_vdict(d, key) == kh_end(d):
-                raise KeyError('invalid contig')
-            bkey = key
+                raise KeyError('invalid contig: {}'.format(key))
+            ckey = key
 
-        bcf_hdr_remove(hdr, BCF_HL_CTG, bkey)
+        bcf_hdr_remove(hdr, BCF_HL_CTG, ckey)
 
     def clear_header(self):
         cdef bcf_hdr_t *hdr = self.header.ptr
@@ -1704,7 +1842,8 @@ cdef class VariantHeaderContigs(object):
         if id in self:
             raise ValueError('Header already exists for contig {}'.format(id))
 
-        items = [('ID', id)] + kwargs.items()
+        items = [('ID', id)]
+        items += kwargs.items()
         self.header.add_meta('contig', items=items)
 
 
@@ -1749,7 +1888,7 @@ cdef class VariantHeaderSamples(object):
     def __contains__(self, key):
         cdef bcf_hdr_t *hdr = self.header.ptr
         cdef vdict_t *d = <vdict_t *>hdr.dict[BCF_DT_SAMPLE]
-        bkey = force_bytes(key)
+        cdef bytes bkey = force_bytes(key)
         cdef khiter_t k = kh_get_vdict(d, bkey)
 
         return k != kh_end(d)
@@ -1796,7 +1935,6 @@ cdef class VariantHeader(object):
             self.ptr = NULL
 
     def __bool__(self):
-        # self.ptr == NULL should be impossible
         return self.ptr != NULL
 
     def copy(self):
@@ -1886,11 +2024,50 @@ cdef class VariantHeader(object):
         finally:
             free(hstr)
 
-    cpdef VariantRecord new_record(self):
-        """Create a new empty VariantRecord"""
-        r = makeVariantRecord(self, bcf_init())
-        r.ptr.n_sample = bcf_hdr_nsamples(self.ptr)
-        return r
+    def new_record(self, contig=None, start=0, stop=0, alleles=None,
+                         id=None, qual=None, filter=None, info=None, samples=None,
+                         **kwargs):
+        """Create a new empty VariantRecord.
+
+        Arguments are currently experimental.  Use with caution and expect
+        changes in upcoming releases.
+
+        """
+        rec = makeVariantRecord(self, bcf_init())
+        rec.ptr.n_sample = bcf_hdr_nsamples(self.ptr)
+
+        if contig is not None:
+            rec.contig  = contig
+        if alleles is not None:
+            rec.alleles = alleles
+
+        rec.start = start
+        rec.stop  = stop
+        rec.id    = id
+        rec.qual  = qual
+
+        if filter is not None:
+            if isinstance(filter, (list, tuple, VariantRecordFilter)):
+                for f in filter:
+                    rec.filter.add(f)
+            else:
+                rec.filter.add(filter)
+
+        if info:
+            rec.info.update(info)
+
+        if kwargs:
+            if 'GT' in kwargs:
+                rec.samples[0]['GT'] = kwargs.pop('GT')
+            rec.samples[0].update(kwargs)
+
+        if samples:
+            for i, sample in enumerate(samples):
+                if 'GT' in sample:
+                    rec.samples[i]['GT'] = sample.pop('GT')
+                rec.samples[i].update(sample)
+
+        return rec
 
     def add_record(self, VariantHeaderRecord record):
         """Add an existing :class:`VariantHeaderRecord` to this header"""
@@ -1963,6 +2140,23 @@ cdef VariantHeader makeVariantHeader(bcf_hdr_t *hdr):
     return header
 
 
+cdef inline int bcf_header_get_info_id(bcf_hdr_t *hdr, key) except? -2:
+    cdef vdict_t *d
+    cdef khiter_t k
+    cdef int info_id
+
+    if isinstance(key, str):
+        key = force_bytes(key)
+
+    d = <vdict_t *>hdr.dict[BCF_DT_ID]
+    k = kh_get_vdict(d, key)
+
+    if k == kh_end(d) or kh_val_vdict(d, k).info[BCF_HL_INFO] & 0xF == 0xF:
+        return -1
+
+    return kh_val_vdict(d, k).id
+
+
 ########################################################################
 ########################################################################
 ## Variant Record objects
@@ -2001,7 +2195,7 @@ cdef class VariantRecordFilter(object):
             id = bcf_hdr_id2int(hdr, BCF_DT_ID, bkey)
 
             if not check_header_id(hdr, BCF_HL_FLT, id) or not bcf_has_filter(hdr, r, bkey):
-                raise KeyError('Invalid filter')
+                raise KeyError('Invalid filter: {}'.format(key))
 
         return makeVariantMetadata(self.record.header, BCF_HL_FLT, id)
 
@@ -2014,11 +2208,11 @@ cdef class VariantRecordFilter(object):
         if key == '.':
             key = 'PASS'
 
-        bkey = force_bytes(key)
+        cdef bytes bkey = force_bytes(key)
         id = bcf_hdr_id2int(hdr, BCF_DT_ID, bkey)
 
         if not check_header_id(hdr, BCF_HL_FLT, id):
-            raise KeyError('Invalid filter')
+            raise KeyError('Invalid filter: {}'.format(key))
 
         bcf_add_filter(hdr, r, id)
 
@@ -2043,7 +2237,7 @@ cdef class VariantRecordFilter(object):
             id = bcf_hdr_id2int(hdr, BCF_DT_ID, bkey)
 
             if not check_header_id(hdr, BCF_HL_FLT, id) or not bcf_has_filter(hdr, r, bkey):
-                raise KeyError('Invalid filter')
+                raise KeyError('Invalid filter: {}'.format(key))
 
         bcf_remove_filter(hdr, r, id, 0)
 
@@ -2071,7 +2265,7 @@ cdef class VariantRecordFilter(object):
     def __contains__(self, key):
         cdef bcf_hdr_t *hdr = self.record.header.ptr
         cdef bcf1_t *r = self.record.ptr
-        bkey = force_bytes(key)
+        cdef bytes bkey = force_bytes(key)
         return bcf_has_filter(hdr, r, bkey) == 1
 
     def iterkeys(self):
@@ -2100,6 +2294,20 @@ cdef class VariantRecordFilter(object):
         """D.values() -> list of D's values"""
         return list(self.itervalues())
 
+    def __richcmp__(VariantRecordFilter self not None, VariantRecordFilter other not None, int op):
+        if op != 2 and op != 3:
+            return NotImplemented
+
+        cdef bcf1_t *s = self.record.ptr
+        cdef bcf1_t *o = other.record.ptr
+
+        cdef bint cmp = (s.d.n_flt == o.d.n_flt and list(self) == list(other))
+
+        if op == 3:
+            cmp = not cmp
+
+        return cmp
+
     # Mappings are not hashable by default, but subclasses can change this
     __hash__ = None
 
@@ -2146,11 +2354,11 @@ cdef class VariantRecordFormat(object):
         cdef bcf_hdr_t *hdr = self.record.header.ptr
         cdef bcf1_t *r = self.record.ptr
 
-        bkey = force_bytes(key)
+        cdef bytes bkey = force_bytes(key)
         cdef bcf_fmt_t *fmt = bcf_get_fmt(hdr, r, bkey)
 
         if not fmt or not fmt.p:
-            raise KeyError('unknown format')
+            raise KeyError('unknown format: {}'.format(key))
 
         return makeVariantMetadata(self.record.header, BCF_HL_FMT, fmt.id)
 
@@ -2158,11 +2366,11 @@ cdef class VariantRecordFormat(object):
         cdef bcf_hdr_t *hdr = self.record.header.ptr
         cdef bcf1_t *r = self.record.ptr
 
-        bkey = force_bytes(key)
+        cdef bytes bkey = force_bytes(key)
         cdef bcf_fmt_t *fmt = bcf_get_fmt(hdr, r, bkey)
 
         if not fmt or not fmt.p:
-            raise KeyError('unknown format')
+            raise KeyError('unknown format: {}'.format(key))
 
         if bcf_update_format(hdr, r, bkey, fmt.p, 0, fmt.type) < 0:
             raise ValueError('Unable to delete FORMAT')
@@ -2204,7 +2412,7 @@ cdef class VariantRecordFormat(object):
     def __contains__(self, key):
         cdef bcf_hdr_t *hdr = self.record.header.ptr
         cdef bcf1_t *r = self.record.ptr
-        bkey = force_bytes(key)
+        cdef bytes bkey = force_bytes(key)
         cdef bcf_fmt_t *fmt = bcf_get_fmt(hdr, r, bkey)
         return fmt != NULL and fmt.p != NULL
 
@@ -2259,38 +2467,65 @@ cdef class VariantRecordInfo(object):
         raise TypeError('this class cannot be instantiated from Python')
 
     def __len__(self):
-        return self.record.ptr.n_info
+        cdef bcf_hdr_t *hdr = self.record.header.ptr
+        cdef bcf1_t *r = self.record.ptr
+        cdef bcf_info_t *info
+        cdef const char *key
+        cdef int i, count = 0
+
+        if bcf_unpack(r, BCF_UN_INFO) < 0:
+            raise ValueError('Error unpacking VariantRecord')
+
+        for i in range(r.n_info):
+            info = &r.d.info[i]
+            key = bcf_hdr_int2id(hdr, BCF_DT_ID, info.key)
+            if info != NULL and info.vptr != NULL and strcmp(key, b'END') != 0:
+                count += 1
+
+        return count
 
     def __bool__(self):
-        return self.record.ptr.n_info != 0
+        cdef bcf_hdr_t *hdr = self.record.header.ptr
+        cdef bcf1_t *r = self.record.ptr
+        cdef bcf_info_t *info
+        cdef const char *key
+        cdef int i
+
+        if bcf_unpack(r, BCF_UN_INFO) < 0:
+            raise ValueError('Error unpacking VariantRecord')
+
+        for i in range(r.n_info):
+            info = &r.d.info[i]
+            key = bcf_hdr_int2id(hdr, BCF_DT_ID, info.key)
+            if info != NULL and info.vptr != NULL and strcmp(key, b'END') != 0:
+                return True
+
+        return False
 
     def __getitem__(self, key):
         cdef bcf_hdr_t *hdr = self.record.header.ptr
         cdef bcf1_t *r = self.record.ptr
-        cdef vdict_t *d
-        cdef khiter_t k
-        cdef info_id
 
         if bcf_unpack(r, BCF_UN_INFO) < 0:
             raise ValueError('Error unpacking VariantRecord')
 
-        bkey = force_bytes(key)
-        cdef bcf_info_t *info = bcf_get_info(hdr, r, bkey)
+        cdef bytes bkey = force_bytes(key)
 
-        if not info:
-            d = <vdict_t *>hdr.dict[BCF_DT_ID]
-            k = kh_get_vdict(d, bkey)
+        if strcmp(bkey, b'END') == 0:
+            raise KeyError('END is a reserved attribute; access is via record.stop')
 
-            if k == kh_end(d) or kh_val_vdict(d, k).info[BCF_HL_INFO] & 0xF == 0xF:
-                raise KeyError('Unknown INFO field: {}'.format(key))
+        cdef bcf_info_t *info = bcf_get_info(hdr, r, bkey)
 
-            info_id = kh_val_vdict(d, k).id
-        else:
-            info_id = info.key
+        # Cannot stop here if info == NULL, since flags must return False
+        cdef int info_id = bcf_header_get_info_id(hdr, bkey) if not info else info.key
+
+        if info_id < 0:
+            raise KeyError('Unknown INFO field: {}'.format(key))
 
         if not check_header_id(hdr, BCF_HL_INFO, info_id):
             raise ValueError('Invalid header')
 
+        # Handle type=Flag values
         if bcf_hdr_id2type(hdr, BCF_HL_INFO, info_id) == BCF_HT_FLAG:
             return info != NULL and info.vptr != NULL
 
@@ -2300,18 +2535,42 @@ cdef class VariantRecordInfo(object):
         return bcf_info_get_value(self.record, info)
 
     def __setitem__(self, key, value):
+        cdef bytes bkey = force_bytes(key)
+
+        if strcmp(bkey, b'END') == 0:
+            raise KeyError('END is a reserved attribute; access is via record.stop')
+
+        if bcf_unpack(self.record.ptr, BCF_UN_INFO) < 0:
+            raise ValueError('Error unpacking VariantRecord')
+
         bcf_info_set_value(self.record, key, value)
 
     def __delitem__(self, key):
         cdef bcf_hdr_t *hdr = self.record.header.ptr
         cdef bcf1_t *r = self.record.ptr
 
+        cdef bytes bkey = force_bytes(key)
+        if strcmp(bkey, b'END') == 0:
+            raise KeyError('END is a reserved attribute; access is via record.stop')
+
         if bcf_unpack(r, BCF_UN_INFO) < 0:
             raise ValueError('Error unpacking VariantRecord')
 
-        bkey = force_bytes(key)
         cdef bcf_info_t *info = bcf_get_info(hdr, r, bkey)
 
+        # Cannot stop here if info == NULL, since flags must return False
+        cdef int info_id = bcf_header_get_info_id(hdr, bkey) if not info else info.key
+
+        if info_id < 0:
+            raise KeyError('Unknown INFO field: {}'.format(key))
+
+        if not check_header_id(hdr, BCF_HL_INFO, info_id):
+            raise ValueError('Invalid header')
+
+        # Handle flags
+        if bcf_hdr_id2type(hdr, BCF_HL_INFO, info_id) == BCF_HT_FLAG and (not info or not info.vptr):
+            return
+
         if not info or not info.vptr:
             raise KeyError('Unknown INFO field: {}'.format(key))
 
@@ -2333,6 +2592,8 @@ cdef class VariantRecordInfo(object):
             info = &r.d.info[i]
             if info and info.vptr:
                 key = bcf_hdr_int2id(hdr, BCF_DT_ID, info.key)
+                if strcmp(key, b'END') == 0:
+                    continue
                 if bcf_update_info(hdr, r, key, NULL, 0, info.type) < 0:
                     raise ValueError('Unable to delete INFO')
 
@@ -2340,20 +2601,49 @@ cdef class VariantRecordInfo(object):
         cdef bcf_hdr_t *hdr = self.record.header.ptr
         cdef bcf1_t *r = self.record.ptr
         cdef bcf_info_t *info
+        cdef const char *key
         cdef int i
 
+        if bcf_unpack(r, BCF_UN_INFO) < 0:
+            raise ValueError('Error unpacking VariantRecord')
+
         for i in range(r.n_info):
             info = &r.d.info[i]
             if info and info.vptr:
-                yield bcf_str_cache_get_charptr(bcf_hdr_int2id(hdr, BCF_DT_ID, info.key))
+                key = bcf_hdr_int2id(hdr, BCF_DT_ID, info.key)
+                if strcmp(key, b'END') != 0:
+                    yield bcf_str_cache_get_charptr(key)
 
     def get(self, key, default=None):
         """D.get(k[,d]) -> D[k] if k in D, else d.  d defaults to None."""
-        try:
-            return self[key]
-        except KeyError:
+        cdef bcf_hdr_t *hdr = self.record.header.ptr
+        cdef bcf1_t *r = self.record.ptr
+
+        if bcf_unpack(r, BCF_UN_INFO) < 0:
+            raise ValueError('Error unpacking VariantRecord')
+
+        cdef bytes bkey = force_bytes(key)
+
+        if strcmp(bkey, b'END') == 0:
             return default
 
+        cdef bcf_info_t *info = bcf_get_info(hdr, r, bkey)
+
+        # Cannot stop here if info == NULL, since flags must return False
+        cdef int info_id = bcf_header_get_info_id(hdr, bkey) if not info else info.key
+
+        if not check_header_id(hdr, BCF_HL_INFO, info_id):
+            raise ValueError('Invalid header')
+
+        # Handle flags
+        if bcf_hdr_id2type(hdr, BCF_HL_INFO, info_id) == BCF_HT_FLAG:
+            return info != NULL and info.vptr != NULL
+
+        if not info or not info.vptr:
+            return default
+
+        return bcf_info_get_value(self.record, info)
+
     def __contains__(self, key):
         cdef bcf_hdr_t *hdr = self.record.header.ptr
         cdef bcf1_t *r = self.record.ptr
@@ -2361,10 +2651,14 @@ cdef class VariantRecordInfo(object):
         if bcf_unpack(r, BCF_UN_INFO) < 0:
             raise ValueError('Error unpacking VariantRecord')
 
-        bkey = force_bytes(key)
+        cdef bytes bkey = force_bytes(key)
+
+        if strcmp(bkey, b'END') == 0:
+            return False
+
         cdef bcf_info_t *info = bcf_get_info(hdr, r, bkey)
 
-        return info != NULL
+        return info != NULL and info.vptr != NULL
 
     def iterkeys(self):
         """D.iterkeys() -> an iterator over the keys of D"""
@@ -2372,28 +2666,40 @@ cdef class VariantRecordInfo(object):
 
     def itervalues(self):
         """D.itervalues() -> an iterator over the values of D"""
+        cdef bcf_hdr_t *hdr = self.record.header.ptr
         cdef bcf1_t *r = self.record.ptr
         cdef bcf_info_t *info
+        cdef const char *key
         cdef int i
 
+        if bcf_unpack(r, BCF_UN_INFO) < 0:
+            raise ValueError('Error unpacking VariantRecord')
+
         for i in range(r.n_info):
             info = &r.d.info[i]
             if info and info.vptr:
-                yield bcf_info_get_value(self.record, info)
+                key = bcf_hdr_int2id(hdr, BCF_DT_ID, info.key)
+                if strcmp(key, b'END') != 0:
+                    yield bcf_info_get_value(self.record, info)
 
     def iteritems(self):
         """D.iteritems() -> an iterator over the (key, value) items of D"""
         cdef bcf_hdr_t *hdr = self.record.header.ptr
         cdef bcf1_t *r = self.record.ptr
         cdef bcf_info_t *info
+        cdef const char *key
         cdef int i
 
+        if bcf_unpack(r, BCF_UN_INFO) < 0:
+            raise ValueError('Error unpacking VariantRecord')
+
         for i in range(r.n_info):
             info = &r.d.info[i]
             if info and info.vptr:
                 key = bcf_hdr_int2id(hdr, BCF_DT_ID, info.key)
-                value = bcf_info_get_value(self.record, info)
-                yield bcf_str_cache_get_charptr(key), value
+                if strcmp(key, b'END') != 0:
+                    value = bcf_info_get_value(self.record, info)
+                    yield bcf_str_cache_get_charptr(key), value
 
     def keys(self):
         """D.keys() -> list of D's keys"""
@@ -2407,11 +2713,75 @@ cdef class VariantRecordInfo(object):
         """D.values() -> list of D's values"""
         return list(self.itervalues())
 
+    def update(self, items=None, **kwargs):
+        """D.update([E, ]**F) -> None.
+
+        Update D from dict/iterable E and F.
+        """
+        for k, v in items.items():
+            if k != 'END':
+                self[k] = v
+
+        if kwargs:
+            kwargs.pop('END', None)
+            for k, v in kwargs.items():
+                self[k] = v
+
+    def pop(self, key, default=_nothing):
+        cdef bcf_hdr_t *hdr = self.record.header.ptr
+        cdef bcf1_t *r = self.record.ptr
+
+        if bcf_unpack(r, BCF_UN_INFO) < 0:
+            raise ValueError('Error unpacking VariantRecord')
+
+        cdef bytes bkey = force_bytes(key)
+        cdef bcf_info_t *info = bcf_get_info(hdr, r, bkey)
+
+        # Cannot stop here if info == NULL, since flags must return False
+        cdef int info_id = bcf_header_get_info_id(hdr, bkey) if not info else info.key
+
+        if info_id < 0:
+            if default is _nothing:
+                raise KeyError('Unknown INFO field: {}'.format(key))
+            return default
+
+        if not check_header_id(hdr, BCF_HL_INFO, info_id):
+            raise ValueError('Invalid header')
+
+        # Handle flags
+        if bcf_hdr_id2type(hdr, BCF_HL_INFO, info_id) == BCF_HT_FLAG and (not info or not info.vptr):
+            return
+
+        if not info or not info.vptr:
+            if default is _nothing:
+                raise KeyError('Unknown INFO field: {}'.format(key))
+            return default
+
+        value = bcf_info_get_value(self.record, info)
+
+        if bcf_update_info(hdr, r, bkey, NULL, 0, info.type) < 0:
+            raise ValueError('Unable to delete INFO')
+
+        return value
+
+    def __richcmp__(VariantRecordInfo self not None, VariantRecordInfo other not None, int op):
+        if op != 2 and op != 3:
+            return NotImplemented
+
+        cdef bcf1_t *s = self.record.ptr
+        cdef bcf1_t *o = other.record.ptr
+
+        # Cannot use n_info as shortcut logic, since null values may remain
+        cdef bint cmp = dict(self) == dict(other)
+
+        if op == 3:
+            cmp = not cmp
+
+        return cmp
+
     # Mappings are not hashable by default, but subclasses can change this
     __hash__ = None
 
-    #TODO: implement __richcmp__
-
 
 cdef VariantRecordInfo makeVariantRecordInfo(VariantRecord record):
     if not record:
@@ -2429,15 +2799,15 @@ cdef class VariantRecordSamples(object):
         raise TypeError('this class cannot be instantiated from Python')
 
     def __len__(self):
-        return bcf_hdr_nsamples(self.record.header.ptr)
+        return self.record.ptr.n_sample  # bcf_hdr_nsamples(self.record.header.ptr)
 
     def __bool__(self):
-        return bcf_hdr_nsamples(self.record.header.ptr) != 0
+        return self.record.ptr.n_sample != 0  # bcf_hdr_nsamples(self.record.header.ptr) != 0
 
     def __getitem__(self, key):
         cdef bcf_hdr_t *hdr = self.record.header.ptr
         cdef bcf1_t *r = self.record.ptr
-        cdef int n = bcf_hdr_nsamples(hdr)
+        cdef int n = self.record.ptr.n_sample
         cdef int sample_index
         cdef vdict_t *d
         cdef khiter_t k
@@ -2448,7 +2818,7 @@ cdef class VariantRecordSamples(object):
             bkey = force_bytes(key)
             sample_index = bcf_hdr_id2int(hdr, BCF_DT_SAMPLE, bkey)
             if sample_index < 0:
-                raise KeyError('invalid sample name')
+                raise KeyError('invalid sample name: {}'.format(key))
 
         if sample_index < 0 or sample_index >= n:
             raise IndexError('invalid sample index')
@@ -2458,7 +2828,7 @@ cdef class VariantRecordSamples(object):
     def __iter__(self):
         cdef bcf_hdr_t *hdr = self.record.header.ptr
         cdef bcf1_t *r = self.record.ptr
-        cdef int32_t i, n = bcf_hdr_nsamples(hdr)
+        cdef int32_t i, n = self.record.ptr.n_sample
 
         for i in range(n):
             yield charptr_to_str(hdr.samples[i])
@@ -2473,7 +2843,7 @@ cdef class VariantRecordSamples(object):
     def __contains__(self, key):
         cdef bcf_hdr_t *hdr = self.record.header.ptr
         cdef bcf1_t *r = self.record.ptr
-        cdef int n = bcf_hdr_nsamples(hdr)
+        cdef int n = self.record.ptr.n_sample
         cdef int sample_index
         cdef vdict_t *d
         cdef khiter_t k
@@ -2484,7 +2854,7 @@ cdef class VariantRecordSamples(object):
             bkey = force_bytes(key)
             sample_index = bcf_hdr_id2int(hdr, BCF_DT_SAMPLE, bkey)
             if sample_index < 0:
-                raise KeyError('invalid sample name')
+                raise KeyError('invalid sample name: {}'.format(key))
 
         return 0 <= sample_index < n
 
@@ -2496,7 +2866,7 @@ cdef class VariantRecordSamples(object):
         """D.itervalues() -> an iterator over the values of D"""
         cdef bcf_hdr_t *hdr = self.record.header.ptr
         cdef bcf1_t *r = self.record.ptr
-        cdef int32_t i, n = bcf_hdr_nsamples(hdr)
+        cdef int32_t i, n = self.record.ptr.n_sample
 
         for i in range(n):
             yield makeVariantRecordSample(self.record, i)
@@ -2505,7 +2875,7 @@ cdef class VariantRecordSamples(object):
         """D.iteritems() -> an iterator over the (key, value) items of D"""
         cdef bcf_hdr_t *hdr = self.record.header.ptr
         cdef bcf1_t *r = self.record.ptr
-        cdef int32_t i, n = bcf_hdr_nsamples(hdr)
+        cdef int32_t i, n = self.record.ptr.n_sample
 
         for i in range(n):
             yield (charptr_to_str(hdr.samples[i]), makeVariantRecordSample(self.record, i))
@@ -2522,11 +2892,45 @@ cdef class VariantRecordSamples(object):
         """D.values() -> list of D's values"""
         return list(self.itervalues())
 
+    def update(self, items=None, **kwargs):
+        """D.update([E, ]**F) -> None.
+
+        Update D from dict/iterable E and F.
+        """
+        for k, v in items.items():
+            self[k] = v
+
+        if kwargs:
+            for k, v in kwargs.items():
+                self[k] = v
+
+    def pop(self, key, default=_nothing):
+        try:
+            value = self[key]
+            del self[key]
+            return value
+        except KeyError:
+            if default is not _nothing:
+                return default
+            raise
+
+    def __richcmp__(VariantRecordSamples self not None, VariantRecordSamples other not None, int op):
+        if op != 2 and op != 3:
+            return NotImplemented
+
+        cdef bcf1_t *s = self.record.ptr
+        cdef bcf1_t *o = other.record.ptr
+
+        cdef bint cmp = (s.n_sample == o.n_sample and self.values() == other.values())
+
+        if op == 3:
+            cmp = not cmp
+
+        return cmp
+
     # Mappings are not hashable by default, but subclasses can change this
     __hash__ = None
 
-    #TODO: implement __richcmp__
-
 
 cdef VariantRecordSamples makeVariantRecordSamples(VariantRecord record):
     if not record:
@@ -2566,6 +2970,7 @@ cdef class VariantRecord(object):
                 raise ValueError(msg.format(self.ptr.n_sample, bcf_hdr_nsamples(dst_hdr)))
 
             bcf_translate(dst_hdr, src_hdr, self.ptr)
+            self.header = dst_header
 
     @property
     def rid(self):
@@ -2627,6 +3032,7 @@ cdef class VariantRecord(object):
         if p < 1:
             raise ValueError('Position must be positive')
         self.ptr.pos = p - 1
+        bcf_sync_end(self)
 
     @property
     def start(self):
@@ -2639,6 +3045,7 @@ cdef class VariantRecord(object):
         if s < 0:
             raise ValueError('Start coordinate must be non-negative')
         self.ptr.pos = s
+        bcf_sync_end(self)
 
     @property
     def stop(self):
@@ -2648,25 +3055,21 @@ cdef class VariantRecord(object):
     @stop.setter
     def stop(self, value):
         cdef int s = value
-        if s < self.ptr.pos:
-            raise ValueError('Stop coordinate must be greater than or equal to start')
+        if s < 0:
+            raise ValueError('Stop coordinate must be non-negative')
         self.ptr.rlen = s - self.ptr.pos
-        if self.ptr.rlen != len(self.ref) or 'END' in self.info:
-            self.info['END'] = s
+        bcf_sync_end(self)
 
     @property
     def rlen(self):
-        """record length on chrom/contig (typically rec.stop - rec.start unless END info is supplied)"""
+        """record length on chrom/contig (aka rec.stop - rec.start)"""
         return self.ptr.rlen
 
     @rlen.setter
     def rlen(self, value):
         cdef int r = value
-        if r < 0:
-            raise ValueError('Reference length must be non-negative')
         self.ptr.rlen = r
-        if r != len(self.ref) or 'END' in self.info:
-            self.info['END'] = self.ptr.pos + r
+        bcf_sync_end(self)
 
     @property
     def qual(self):
@@ -2732,6 +3135,8 @@ cdef class VariantRecord(object):
         else:
             alleles = [value]
         self.alleles = alleles
+        self.ptr.rlen = len(value)
+        bcf_sync_end(self)
 
     @property
     def alleles(self):
@@ -2749,17 +3154,28 @@ cdef class VariantRecord(object):
         return res
 
     @alleles.setter
-    def alleles(self, value):
+    def alleles(self, values):
         cdef bcf1_t *r = self.ptr
+
         if bcf_unpack(r, BCF_UN_STR) < 0:
             raise ValueError('Error unpacking VariantRecord')
-        value = [force_bytes(v) for v in value]
-        if b'' in value:
+
+        values = [force_bytes(v) for v in values]
+
+        if len(values) < 2:
+            raise ValueError('must set at least 2 alleles')
+
+        if b'' in values:
             raise ValueError('cannot set null allele')
-        value = b','.join(value)
+
+        value = b','.join(values)
+
         if bcf_update_alleles_str(self.header.ptr, r, value) < 0:
             raise ValueError('Error updating alleles')
 
+        self.ptr.rlen = len(values[0])
+        bcf_sync_end(self)
+
     @property
     def alts(self):
         """tuple of alt alleles"""
@@ -2815,6 +3231,32 @@ cdef class VariantRecord(object):
             raise ValueError('Error unpacking VariantRecord')
         return makeVariantRecordSamples(self)
 
+    def __richcmp__(VariantRecord self not None, VariantRecord other not None, int op):
+        if op != 2 and op != 3:
+            return NotImplemented
+
+        cdef bcf1_t *s = self.ptr
+        cdef bcf1_t *o = other.ptr
+
+        cdef bint cmp = self is other or (
+                             s.pos        == o.pos
+                        and  s.rlen       == o.rlen
+                        and ((bcf_float_is_missing(s.qual) and bcf_float_is_missing(o.qual))
+                          or s.qual       == o.qual)
+                        and  s.n_sample   == o.n_sample
+                        and  s.n_allele   == o.n_allele
+                        and  self.contig  == other.contig
+                        and  self.alleles == other.alleles
+                        and  self.id      == other.id
+                        and  self.info    == other.info
+                        and  self.filter  == other.filter
+                        and  self.samples == other.samples)
+
+        if op == 3:
+            cmp = not cmp
+
+        return cmp
+
     def __str__(self):
         cdef kstring_t line
         cdef char c
@@ -2896,7 +3338,7 @@ cdef class VariantRecordSample(object):
         """sample name"""
         cdef bcf_hdr_t *hdr = self.record.header.ptr
         cdef bcf1_t *r = self.record.ptr
-        cdef int32_t n = bcf_hdr_nsamples(hdr)
+        cdef int32_t n = r.n_sample
 
         if self.index < 0 or self.index >= n:
             raise ValueError('invalid sample index')
@@ -3006,7 +3448,7 @@ cdef class VariantRecordSample(object):
     def __contains__(self, key):
         cdef bcf_hdr_t *hdr = self.record.header.ptr
         cdef bcf1_t *r = self.record.ptr
-        bkey = force_bytes(key)
+        cdef bytes bkey = force_bytes(key)
         cdef bcf_fmt_t *fmt = bcf_get_fmt(hdr, r, bkey)
         return fmt != NULL and fmt.p != NULL
 
@@ -3036,11 +3478,42 @@ cdef class VariantRecordSample(object):
         """D.values() -> list of D's values"""
         return list(self.itervalues())
 
+    def update(self, items=None, **kwargs):
+        """D.update([E, ]**F) -> None.
+
+        Update D from dict/iterable E and F.
+        """
+        for k, v in items.items():
+            self[k] = v
+
+        if kwargs:
+            for k, v in kwargs.items():
+                self[k] = v
+
+    def pop(self, key, default=_nothing):
+        try:
+            value = self[key]
+            del self[key]
+            return value
+        except KeyError:
+            if default is not _nothing:
+                return default
+            raise
+
+    def __richcmp__(VariantRecordSample self not None, VariantRecordSample other not None, int op):
+        if op != 2 and op != 3:
+            return NotImplemented
+
+        cdef bint cmp = dict(self) == dict(other)
+
+        if op == 3:
+            cmp = not cmp
+
+        return cmp
+
     # Mappings are not hashable by default, but subclasses can change this
     __hash__ = None
 
-    #TODO: implement __richcmp__
-
 
 cdef VariantRecordSample makeVariantRecordSample(VariantRecord record, int32_t sample_index):
     if not record or sample_index < 0:
@@ -3120,6 +3593,28 @@ cdef class BaseIndex(object):
         """D.values() -> list of D's values"""
         return list(self.itervalues())
 
+    def update(self, items=None, **kwargs):
+        """D.update([E, ]**F) -> None.
+
+        Update D from dict/iterable E and F.
+        """
+        for k, v in items.items():
+            self[k] = v
+
+        if kwargs:
+            for k, v in kwargs.items():
+                self[k] = v
+
+    def pop(self, key, default=_nothing):
+        try:
+            value = self[key]
+            del self[key]
+            return value
+        except KeyError:
+            if default is not _nothing:
+                return default
+            raise
+
     # Mappings are not hashable by default, but subclasses can change this
     __hash__ = None
 
@@ -3253,7 +3748,7 @@ cdef class BCFIterator(BaseIterator):
             try:
                 rid = index.refmap[contig]
             except KeyError:
-                raise ValueError('Unknown contig specified')
+                raise ValueError('Unknown contig specified: {}'.format(contig))
 
             if start is None:
                 start = 0
@@ -3409,7 +3904,7 @@ cdef class TabixIterator(BaseIterator):
 
 cdef class VariantFile(HTSFile):
     """*(filename, mode=None, index_filename=None, header=None, drop_samples=False,
-    duplicate_filehandle=True)*
+    duplicate_filehandle=True, ignore_truncation=False)*
 
     A :term:`VCF`/:term:`BCF` formatted file. The file is automatically
     opened.
@@ -3451,7 +3946,7 @@ cdef class VariantFile(HTSFile):
     drop_samples: bool
         Ignore sample information when reading.
 
-    duplicate_filehandle: bool 
+    duplicate_filehandle: bool
         By default, file handles passed either directly or through
         File-like objects will be duplicated before passing them to
         htslib. The duplication prevents issues where the same stream
@@ -3459,6 +3954,11 @@ cdef class VariantFile(HTSFile):
         high-level python object. Set to False to turn off
         duplication.
 
+    ignore_truncation: bool
+        Issue a warning, instead of raising an error if the current file
+        appears to be truncated due to a missing EOF marker.  Only applies
+        to bgzipped formats. (Default=False)
+
     """
     def __cinit__(self, *args, **kwargs):
         self.htsfile = NULL
@@ -3478,19 +3978,39 @@ cdef class VariantFile(HTSFile):
 
         self.open(*args, **kwargs)
 
+    def __dealloc__(self):
+        if not self.htsfile or not self.header:
+            return
+
+        # Write header if no records were written
+        if self.htsfile.is_write and not self.header_written:
+            with nogil:
+                bcf_hdr_write(self.htsfile, self.header.ptr)
+
+        cdef int ret = hts_close(self.htsfile)
+        self.htsfile = NULL
+        self.header = self.index = None
+
+        if ret < 0:
+            global errno
+            if errno == EPIPE:
+                errno = 0
+            else:
+                raise OSError(errno, force_str(strerror(errno)))
+
     def close(self):
         """closes the :class:`pysam.VariantFile`."""
-        cdef int ret = 0
-        self.header = self.index = None
-        if self.htsfile:
-            # Write header if no records were written
-            if self.htsfile.is_write and not self.header_written:
-                self.header_written = True
-                with nogil:
-                    bcf_hdr_write(self.htsfile, self.header.ptr)
+        if not self.htsfile:
+            return
 
-            ret = hts_close(self.htsfile)
-            self.htsfile = NULL
+        # Write header if no records were written
+        if self.htsfile.is_write and not self.header_written:
+            with nogil:
+                bcf_hdr_write(self.htsfile, self.header.ptr)
+
+        cdef int ret = hts_close(self.htsfile)
+        self.htsfile = NULL
+        self.header = self.index = None
 
         if ret < 0:
             global errno
@@ -3525,7 +4045,7 @@ cdef class VariantFile(HTSFile):
             if ret == -1:
                 raise StopIteration
             elif ret == -2:
-                raise IOError('truncated file')
+                raise OSError('truncated file')
             else:
                 raise ValueError('Variant read failed')
 
@@ -3572,7 +4092,8 @@ cdef class VariantFile(HTSFile):
              index_filename=None,
              VariantHeader header=None,
              drop_samples=False,
-             duplicate_filehandle=True):
+             duplicate_filehandle=True,
+             ignore_truncation=False):
         """open a vcf/bcf file.
 
         If open is called on an existing VariantFile, the current file will be
@@ -3656,7 +4177,6 @@ cdef class VariantFile(HTSFile):
 
         elif mode.startswith(b'r'):
             # open file for reading
-            
             if not self._exists():
                 raise IOError('file `{}` not found'.format(filename))
 
@@ -3668,10 +4188,7 @@ cdef class VariantFile(HTSFile):
             if self.htsfile.format.format not in (bcf, vcf):
                 raise ValueError("invalid file `{}` (mode='{}') - is it VCF/BCF format?".format(filename, mode))
 
-            if self.htsfile.format.compression == bgzf:
-                bgzfp = hts_get_bgzfp(self.htsfile)
-                if bgzfp and bgzf_check_EOF(bgzfp) == 0:
-                    warn('[%s] Warning: no BGZF EOF marker; file may be truncated'.format(filename))
+            self.check_truncation(ignore_truncation)
 
             with nogil:
                 hdr = bcf_hdr_read(self.htsfile)
@@ -3710,7 +4227,6 @@ cdef class VariantFile(HTSFile):
         """reset file position to beginning of file just after the header."""
         return self.seek(self.start_offset)
 
-
     def fetch(self, contig=None, start=None, stop=None, region=None, reopen=False):
         """fetch records in a :term:`region` using 0-based indexing. The
         region is specified by :term:`contig`, *start* and *end*.
@@ -3750,9 +4266,12 @@ cdef class VariantFile(HTSFile):
         self.is_reading = 1
         return self.index.fetch(self, contig, start, stop, region, reopen)
 
-    cpdef VariantRecord new_record(self):
-        """Create a new empty VariantRecord"""
-        return self.header.new_record()
+    def new_record(self, *args, **kwargs):
+        """Create a new empty :class:`VariantRecord`.
+
+        See :meth:`VariantHeader.new_record`
+        """
+        return self.header.new_record(*args, **kwargs)
 
     cpdef int write(self, VariantRecord record) except -1:
         """
@@ -3782,6 +4301,9 @@ cdef class VariantFile(HTSFile):
             msg = 'Invalid VariantRecord.  Number of samples does not match header ({} vs {})'
             raise ValueError(msg.format(record.ptr.n_sample, bcf_hdr_nsamples(self.header.ptr)))
 
+        # Sync END annotation before writing
+        bcf_sync_end(record)
+
         cdef int ret
 
         with nogil:
diff --git a/pysam/libcbcftools.pxd b/pysam/libcbcftools.pxd
new file mode 100644
index 0000000..7c8e632
--- /dev/null
+++ b/pysam/libcbcftools.pxd
@@ -0,0 +1,3 @@
+cdef extern from "cbcftools_util.h":
+
+    int bcftools_main(int argc, char *argv[])
diff --git a/pysam/libcbcftools.pyx b/pysam/libcbcftools.pyx
new file mode 100644
index 0000000..8e90388
--- /dev/null
+++ b/pysam/libcbcftools.pyx
@@ -0,0 +1,2 @@
+def py_bcftools():
+    pass
diff --git a/pysam/libcbgzf.pyx b/pysam/libcbgzf.pyx
index 558ceff..f1d2fa9 100644
--- a/pysam/libcbgzf.pyx
+++ b/pysam/libcbgzf.pyx
@@ -14,9 +14,10 @@ from libc.stdlib cimport malloc, calloc, realloc, free
 from cpython.object cimport PyObject
 from cpython.bytes  cimport PyBytes_FromStringAndSize, _PyBytes_Resize
 
-from pysam.libcutils   cimport force_bytes, force_str, charptr_to_str, charptr_to_str_w_len
-from pysam.libchtslib  cimport *
-
+from pysam.libcutils   cimport force_bytes, encode_filename
+from pysam.libchtslib  cimport bgzf_open, bgzf_index_build_init, bgzf_write, bgzf_read, \
+                               bgzf_flush, bgzf_index_dump, bgzf_close, bgzf_seek, \
+                               bgzf_tell, bgzf_getline, kstring_t, SEEK_SET, BGZF
 
 __all__ = ["BGZFile"]
 
@@ -32,7 +33,7 @@ cdef class BGZFile(object):
     compressed file in text mode, use the gzip.open() function.
     """
     cdef BGZF* bgzf
-    cdef bytes name, index
+    cdef readonly object name, index
 
     def __init__(self, filename, mode=None, index=None):
         """Constructor for the BGZFile class.
@@ -47,10 +48,14 @@ cdef class BGZFile(object):
             raise ValueError("Invalid mode: {!r}".format(mode))
         if not mode:
             mode = 'rb'
-        if mode and 'b' not in mode:
+        elif mode and 'b' not in mode:
             mode += 'b'
-        self.name = force_bytes(filename)
-        self.index = force_bytes(index) if index is not None else None
+
+        mode = force_bytes(mode)
+
+        self.name = encode_filename(filename)
+        self.index = encode_filename(index) if index is not None else None
+
         self.bgzf = bgzf_open(self.name, mode)
 
         if self.bgzf.is_write and index is not None and bgzf_index_build_init(self.bgzf) < 0:
@@ -59,7 +64,7 @@ cdef class BGZFile(object):
     def __dealloc__(self):
         self.close()
 
-    def write(self,data):
+    def write(self, data):
         if not self.bgzf:
             raise ValueError("write() on closed BGZFile object")
 
@@ -177,6 +182,15 @@ cdef class BGZFile(object):
     def seekable(self):
         return True
 
+    def tell(self):
+        if not self.bgzf:
+            raise ValueError("seek() on closed BGZFile object")
+        cdef int64_t off = bgzf_tell(self.bgzf)
+        if off < 0:
+            raise IOError('Error in tell on BGZFFile object')
+
+        return off
+
     def seek(self, offset, whence=io.SEEK_SET):
         if not self.bgzf:
             raise ValueError("seek() on closed BGZFile object")
@@ -198,12 +212,27 @@ cdef class BGZFile(object):
 
         line.l = line.m = 0
         line.s = NULL
-        if bgzf_getline(self.bgzf, '\n', &line) < 0:
-            raise IOError('Error reading line in BGZFFile object')
 
-        ret = charptr_to_str_w_len(line.s, line.l)
+        cdef int ret = bgzf_getline(self.bgzf, '\n', &line)
+        if ret == -1:
+            s = b''
+        elif ret == -2:
+            if line.m:
+                free(line.s)
+            raise IOError('Error reading line in BGZFFile object')
+        else:
+            s = line.s[:line.l]
 
         if line.m:
             free(line.s)
 
-        return ret
+        return s
+
+    def __iter__(self):
+        return self
+
+    def __next__(self):
+        line = self.readline()
+        if not line:
+            raise StopIteration()
+        return line
diff --git a/pysam/libcfaidx.pyx b/pysam/libcfaidx.pyx
index 774152d..3af76f6 100644
--- a/pysam/libcfaidx.pyx
+++ b/pysam/libcfaidx.pyx
@@ -59,7 +59,7 @@ from cpython.version cimport PY_MAJOR_VERSION
 
 from pysam.libchtslib cimport \
     faidx_nseq, fai_load, fai_destroy, fai_fetch, \
-    faidx_seq_len, \
+    faidx_seq_len, faidx_iseq, faidx_seq_len, \
     faidx_fetch_seq, hisremote, \
     bgzf_open, bgzf_close
 
@@ -154,21 +154,17 @@ cdef class FastaFile:
         if self.fastafile == NULL:
             raise IOError("could not open file `%s`" % filename)
 
-        if self.is_remote:
-            filepath_index = os.path.basename(
-                re.sub("[^:]+:[/]*", "", filename)) + ".fai"
-        elif filepath_index is None:
-            filepath_index = filename + ".fai"
-
-        if not os.path.exists(filepath_index):
-            raise ValueError("could not locate index file {}".format(
-                filepath_index))
-
-        with open(filepath_index) as inf:
-            data = [x.split("\t") for x in inf]
-            self._references = tuple(x[0] for x in data)
-            self._lengths = tuple(int(x[1]) for x in data)
-            self.reference2length = dict(zip(self._references, self._lengths))
+        cdef int nreferences = faidx_nseq(self.fastafile)
+        cdef int x
+        cdef const char * s
+        self._references = []
+        self._lengths = []
+        for x from 0 <= x < nreferences:
+            s = faidx_iseq(self.fastafile, x)
+            ss = force_str(s)
+            self._references.append(ss)
+            self._lengths.append(faidx_seq_len(self.fastafile, s))
+        self.reference2length = dict(zip(self._references, self._lengths))
 
     def close(self):
         """close the file."""
@@ -447,6 +443,9 @@ cdef class FastxFile:
     ...        print(entry.sequence)
     ...        print(entry.comment)
     ...        print(entry.quality)
+    >>> with pysam.FastxFile(filename) as fin, open(out_filename, mode='w') as fout:
+    ...    for entry in fin:
+    ...        fout.write(str(entry))
 
     """
     def __cinit__(self, *args, **kwargs):
diff --git a/pysam/libchtslib.pxd b/pysam/libchtslib.pxd
index 657a754..78a55f8 100644
--- a/pysam/libchtslib.pxd
+++ b/pysam/libchtslib.pxd
@@ -9,6 +9,12 @@ cdef extern from "Python.h":
    FILE* PyFile_AsFile(object)
 
 
+# cython does not wrap stdarg
+cdef extern from "stdarg.h":
+    ctypedef struct va_list:
+        pass
+
+   
 cdef extern from "htslib/kstring.h" nogil:
     ctypedef struct kstring_t:
         size_t l, m
@@ -54,7 +60,7 @@ cdef extern from "htslib/hfile.h" nogil:
 
     # @abstract  Open the named file or URL as a stream
     # @return    An hFILE pointer, or NULL (with errno set) if an error occurred.
-    hFILE *hopen(const char *filename, const char *mode)
+    hFILE *hopen(const char *filename, const char *mode, ...)
 
     # @abstract  Associate a stream with an existing open file descriptor
     # @return    An hFILE pointer, or NULL (with errno set) if an error occurred.
@@ -97,6 +103,40 @@ cdef extern from "htslib/hfile.h" nogil:
     # @return    The character read, or EOF on end-of-file or error
     int hgetc(hFILE *fp)
 
+    # Read from the stream until the delimiter, up to a maximum length
+    #    @param buffer  The buffer into which bytes will be written
+    #    @param size    The size of the buffer
+    #    @param delim   The delimiter (interpreted as an `unsigned char`)
+    #    @param fp      The file stream
+    #    @return  The number of bytes read, or negative on error.
+    #    @since   1.4
+    #
+    # Bytes will be read into the buffer up to and including a delimiter, until
+    # EOF is reached, or _size-1_ bytes have been written, whichever comes first.
+    # The string will then be terminated with a NUL byte (`\0`).
+    ssize_t hgetdelim(char *buffer, size_t size, int delim, hFILE *fp)
+
+    # Read a line from the stream, up to a maximum length
+    #    @param buffer  The buffer into which bytes will be written
+    #    @param size    The size of the buffer
+    #    @param fp      The file stream
+    #    @return  The number of bytes read, or negative on error.
+    #    @since   1.4
+    #
+    # Specialization of hgetdelim() for a `\n` delimiter.
+    ssize_t hgetln(char *buffer, size_t size, hFILE *fp)
+
+    # Read a line from the stream, up to a maximum length
+    #    @param buffer  The buffer into which bytes will be written
+    #    @param size    The size of the buffer (must be > 1 to be useful)
+    #    @param fp      The file stream
+    #    @return  _buffer_ on success, or `NULL` if an error occurred.
+    #    @since   1.4
+    #
+    # This function can be used as a replacement for `fgets(3)`, or together with
+    # kstring's `kgetline()` to read arbitrarily-long lines into a _kstring_t_.
+    char *hgets(char *buffer, int size, hFILE *fp)
+
     # @abstract  Peek at characters to be read without removing them from buffers
     # @param fp      The file stream
     # @param buffer  The buffer to which the peeked bytes will be written
@@ -623,7 +663,7 @@ cdef extern from "htslib/hts.h" nogil:
     #    @return  The index, or NULL if an error occurred.
     hts_idx_t *hts_idx_load2(const char *fn, const char *fnidx)
 
-    uint8_t *hts_idx_get_meta(hts_idx_t *idx, int *l_meta)
+    uint8_t *hts_idx_get_meta(hts_idx_t *idx, uint32_t *l_meta)
     void hts_idx_set_meta(hts_idx_t *idx, int l_meta, uint8_t *meta, int is_copy)
 
     int hts_idx_get_stat(const hts_idx_t* idx, int tid,
@@ -694,6 +734,79 @@ cdef extern from "htslib/hts.h" nogil:
 
     int hts_file_type(const char *fname)
 
+    # /***************************
+    #  * Revised MAQ error model *
+    #  ***************************/
+
+    ctypedef struct errmod_t
+
+    errmod_t *errmod_init(double depcorr)
+    void errmod_destroy(errmod_t *em)
+
+    # /*
+    #     n: number of bases
+    #     m: maximum base
+    #     bases[i]: qual:6, strand:1, base:4
+    #     q[i*m+j]: phred-scaled likelihood of (i,j)
+    #  */
+    int errmod_cal(const errmod_t *em, int n, int m, uint16_t *bases, float *Probabilistic)
+
+    # /*****************************************
+    #  * q banded glocal alignment *
+    #  *****************************************/
+
+    ctypedef struct probaln_par_t:
+        float d, e
+        int bw;
+
+    int probaln_glocal(const uint8_t *ref,
+                       int l_ref,
+                       const uint8_t *query,
+                       int l_query, const uint8_t *iqual,
+                       const probaln_par_t *c,
+                       int *state, uint8_t *q)
+
+    # /**********************
+    #  * MD5 implementation *
+    #  **********************/
+
+    ctypedef struct hts_md5_context
+
+    # /*! @abstract   Intialises an MD5 context.
+    #  *  @discussion
+    #  *    The expected use is to allocate an hts_md5_context using
+    #  *    hts_md5_init().  This pointer is then passed into one or more calls
+    #  *    of hts_md5_update() to compute successive internal portions of the
+    #  *    MD5 sum, which can then be externalised as a full 16-byte MD5sum
+    #  *    calculation by calling hts_md5_final().  This can then be turned
+    #  *    into ASCII via hts_md5_hex().
+    #  *
+    #  *    To dealloate any resources created by hts_md5_init() call the
+    #  *    hts_md5_destroy() function.
+    #  *
+    #  *  @return     hts_md5_context pointer on success, NULL otherwise.
+    #  */
+    hts_md5_context *hts_md5_init()
+
+    # /*! @abstract Updates the context with the MD5 of the data. */
+    void hts_md5_update(hts_md5_context *ctx, const void *data, unsigned long size)
+
+    # /*! @abstract Computes the final 128-bit MD5 hash from the given context */
+    void hts_md5_final(unsigned char *digest, hts_md5_context *ctx)
+
+    # /*! @abstract Resets an md5_context to the initial state, as returned
+    #  *            by hts_md5_init().
+    #  */
+    void hts_md5_reset(hts_md5_context *ctx)
+
+    # /*! @abstract Converts a 128-bit MD5 hash into a 33-byte nul-termninated
+    #  *            hex string.
+    #  */
+    void hts_md5_hex(char *hex, const unsigned char *digest)
+
+    # /*! @abstract Deallocates any memory allocated by hts_md5_init. */
+    void hts_md5_destroy(hts_md5_context *ctx)
+
     inline int hts_reg2bin(int64_t beg, int64_t end, int min_shift, int n_lvls)
     inline int hts_bin_bot(int bin, int n_lvls)
 
@@ -803,7 +916,9 @@ cdef extern from "htslib/sam.h" nogil:
         uint8_t qual
         uint8_t l_qname
         uint16_t flag
-        uint16_t n_cigar
+        uint8_t unused1
+        uint8_t l_extranul
+        uint32_t n_cigar
         int32_t l_qseq
         int32_t mtid
         int32_t mpos
@@ -999,7 +1114,7 @@ cdef extern from "htslib/sam.h" nogil:
     #*************************************
 
     uint8_t *bam_aux_get(const bam1_t *b, const char *tag)
-    int32_t  bam_aux2i(const uint8_t *s)
+    int64_t  bam_aux2i(const uint8_t *s)
     double   bam_aux2f(const uint8_t *s)
     char     bam_aux2A(const uint8_t *s)
     char    *bam_aux2Z(const uint8_t *s)
@@ -1011,6 +1126,18 @@ cdef extern from "htslib/sam.h" nogil:
     #*** Pileup and Mpileup ***
     #**************************
 
+    #  @abstract Generic pileup 'client data'.
+    #  @discussion The pileup iterator allows setting a constructor and
+    #  destructor function, which will be called every time a sequence is
+    #  fetched and discarded.  This permits caching of per-sequence data in
+    #  a tidy manner during the pileup process.  This union is the cached
+    #  data to be manipulated by the "client" (the caller of pileup).
+    # 
+    union bam_pileup_cd:
+        void *p
+        int64_t i
+        double f
+
     # @abstract Structure for one alignment covering the pileup position.
     # @field  b          pointer to the alignment
     # @field  qpos       position of the read base at the pileup site, 0-based
@@ -1041,6 +1168,7 @@ cdef extern from "htslib/sam.h" nogil:
         uint32_t is_tail
         uint32_t is_refskip
         uint32_t aux
+        bam_pileup_cd cd
 
     ctypedef int (*bam_plp_auto_f)(void *data, bam1_t *b)
     ctypedef int (*bam_test_f)()
@@ -1079,34 +1207,116 @@ cdef extern from "htslib/sam.h" nogil:
     # Added by AH
     # ctypedef bam_pileup1_t * const_bam_pileup1_t_ptr "const bam_pileup1_t *"
 
+    # ***********************************
+    # * BAQ calculation and realignment *
+    # ***********************************/
+    int sam_cap_mapq(bam1_t *b, const char *ref, int ref_len, int thres)
+    int sam_prob_realn(bam1_t *b, const char *ref, int ref_len, int flag)
+
 
 cdef extern from "htslib/faidx.h" nogil:
 
     ctypedef struct faidx_t:
        pass
 
+    # /// Build index for a FASTA or bgzip-compressed FASTA file.
+    # /**  @param  fn  FASTA file name
+    # @param  fnfai Name of .fai file to build.
+    # @param  fngzi Name of .gzi file to build (if fn is bgzip-compressed).
+    # @return     0 on success; or -1 on failure
+
+    # If fnfai is NULL, ".fai" will be appended to fn to make the FAI file name.
+    # If fngzi is NULL, ".gzi" will be appended to fn for the GZI file.  The GZI
+    # file will only be built if fn is bgzip-compressed.
+    # */
+    int fai_build3(const char *fn,
+                   const char *fnfai,
+                   const char *fngzi)
+
+    # /// Build index for a FASTA or bgzip-compressed FASTA file.
+    # /** @param  fn  FASTA file name
+    # @return     0 on success; or -1 on failure
+    #
+    # File "fn.fai" will be generated.  This function is equivalent to
+    # fai_build3(fn, NULL, NULL);
+    # */
     int fai_build(char *fn)
 
+    # /// Destroy a faidx_t struct
     void fai_destroy(faidx_t *fai)
 
+    # /// Load FASTA indexes.
+    # /** @param  fn  File name of the FASTA file (can be compressed with bgzip).
+    #     @param  fnfai File name of the FASTA index.
+    #     @param  fngzi File name of the bgzip index.
+    #     @param  flags Option flags to control index file caching and creation.
+    #     @return Pointer to a faidx_t struct on success, NULL on failure.
+    
+    # If fnfai is NULL, ".fai" will be appended to fn to make the FAI file name.
+    # If fngzi is NULL, ".gzi" will be appended to fn for the bgzip index name.
+    # The bgzip index is only needed if fn is compressed.
+    
+    # If (flags & FAI_CREATE) is true, the index files will be built using
+    # fai_build3() if they are not already present.
+    # */
+    faidx_t *fai_load3(const char *fn,
+                       const char *fnfai,
+                       const char *fngzi,
+                       int flags)
+
+    # /// Load index from "fn.fai".
+    # /** @param  fn  File name of the FASTA file
+    #     @return Pointer to a faidx_t struct on success, NULL on failure.
+    # This function is equivalent to fai_load3(fn, NULL, NULL, FAI_CREATE|FAI_CACHE);
+    # */
     faidx_t *fai_load(char *fn)
 
+    # /// Fetch the sequence in a region
+    # /** @param  fai  Pointer to the faidx_t struct
+    #     @param  reg  Region in the format "chr2:20,000-30,000"
+    #     @param  len  Length of the region; -2 if seq not present, -1 general error
+    #     @return      Pointer to the sequence; `NULL` on failure
+    # The returned sequence is allocated by `malloc()` family and should be destroyed
+    # by end users by calling `free()` on it.
+    # */
     char *fai_fetch(faidx_t *fai,
                     char *reg,
                     int *len)
 
-    int faidx_nseq(faidx_t *fai)
-
-    int faidx_has_seq(faidx_t *fai, const char *seq)
-
+    # /// Fetch the sequence in a region
+    # /** @param  fai  Pointer to the faidx_t struct
+    #     @param  c_name Region name
+    #     @param  p_beg_i  Beginning position number (zero-based)
+    #     @param  p_end_i  End position number (zero-based)
+    #     @param  len  Length of the region; -2 if c_name not present, -1 general error
+    #     @return      Pointer to the sequence; null on failure
+    # The returned sequence is allocated by `malloc()` family and should be destroyed
+    # by end users by calling `free()` on it.
+    # */
     char *faidx_fetch_seq(faidx_t *fai,
                          char *c_name,
                          int p_beg_i,
                          int p_end_i,
                          int *len)
 
-    int faidx_seq_len(faidx_t *fai, const char *seq)
+    # /// Query if sequence is present
+    # /**   @param  fai  Pointer to the faidx_t struct
+    #   @param  seq  Sequence name
+    #   @return      1 if present or 0 if absent
+    #   */
+    int faidx_has_seq(faidx_t *fai, const char *seq)
+
+    # /// Fetch the number of sequences
+    # /** @param  fai  Pointer to the faidx_t struct
+    # @return      The number of sequences
+    # */
+    int faidx_nseq(const faidx_t *fai)
 
+    # /// Return name of i-th sequence
+    const char *faidx_iseq(const faidx_t *fai, int i)
+
+    # /// Return sequence length, -1 if not present
+    int faidx_seq_len(faidx_t *fai, const char *seq)
 
 # tabix support
 cdef extern from "htslib/tbx.h" nogil:
@@ -1695,7 +1905,7 @@ cdef extern from "htslib/vcf.h" nogil:
     int bcf_get_format_int32(const bcf_hdr_t *hdr, bcf1_t *line, const char *tag, int32_t **dst, int *ndst)
     int bcf_get_format_float(const bcf_hdr_t *hdr, bcf1_t *line, const char *tag, float **dst, int *ndst)
     int bcf_get_format_char(const bcf_hdr_t *hdr, bcf1_t *line, const char *tag, char **dst, int *ndst)
-    int bcf_get_genotypes(const bcf_hdr_t *hdr, bcf1_t *line, int **dst, int *ndst)
+    int bcf_get_genotypes(const bcf_hdr_t *hdr, bcf1_t *line, int32_t **dst, int *ndst)
     int bcf_get_format_string(const bcf_hdr_t *hdr, bcf1_t *line, const char *tag, char ***dst, int *ndst)
     int bcf_get_format_values(const bcf_hdr_t *hdr, bcf1_t *line, const char *tag, void **dst, int *ndst, int type)
 
@@ -1901,6 +2111,455 @@ cdef extern from "htslib/vcfutils.h" nogil:
     uint32_t bcf_ij2G(uint32_t i, uint32_t j)
 
 
+cdef extern from "htslib/cram.h" nogil:
+
+    enum cram_block_method:
+        ERROR
+        RAW
+        GZIP
+        BZIP2
+        LZMA
+        RANS
+        RANS0
+        RANS1
+        GZIP_RLE
+
+    enum cram_content_type:
+        CT_ERROR
+        FILE_HEADER
+        COMPRESSION_HEADER
+        MAPPED_SLICE
+        UNMAPPED_SLICE
+        EXTERNAL
+        CORE
+
+    # Opaque data types, see cram_structs for the fully fledged versions.
+    ctypedef struct SAM_hdr
+    ctypedef struct cram_file_def
+    ctypedef struct cram_fd
+    ctypedef struct cram_container
+    ctypedef struct cram_block
+    ctypedef struct cram_slice
+    ctypedef struct cram_metrics
+    ctypedef struct cram_block_slice_hdr
+    ctypedef struct cram_block_compression_hdr
+    ctypedef struct refs_t
+
+    # Accessor functions
+
+    #
+    #-----------------------------------------------------------------------------
+    # cram_fd
+    #
+    SAM_hdr *cram_fd_get_header(cram_fd *fd)
+    void cram_fd_set_header(cram_fd *fd, SAM_hdr *hdr)
+
+    int cram_fd_get_version(cram_fd *fd)
+    void cram_fd_set_version(cram_fd *fd, int vers)
+
+    int cram_major_vers(cram_fd *fd)
+    int cram_minor_vers(cram_fd *fd)
+
+    hFILE *cram_fd_get_fp(cram_fd *fd)
+    void cram_fd_set_fp(cram_fd *fd, hFILE *fp)
+
+    #
+    #-----------------------------------------------------------------------------
+    # cram_container
+    #
+    int32_t cram_container_get_length(cram_container *c)
+    void cram_container_set_length(cram_container *c, int32_t length)
+    int32_t cram_container_get_num_blocks(cram_container *c)
+    void cram_container_set_num_blocks(cram_container *c, int32_t num_blocks)
+    int32_t *cram_container_get_landmarks(cram_container *c, int32_t *num_landmarks)
+    void cram_container_set_landmarks(cram_container *c, int32_t num_landmarks,
+				      int32_t *landmarks)
+
+    # Returns true if the container is empty (EOF marker) */
+    int cram_container_is_empty(cram_fd *fd)
+
+
+    #
+    #-----------------------------------------------------------------------------
+    # cram_block
+    #
+    int32_t cram_block_get_content_id(cram_block *b)
+    int32_t cram_block_get_comp_size(cram_block *b)
+    int32_t cram_block_get_uncomp_size(cram_block *b)
+    int32_t cram_block_get_crc32(cram_block *b)
+    void *  cram_block_get_data(cram_block *b)
+
+    cram_content_type cram_block_get_content_type(cram_block *b)
+
+    void cram_block_set_content_id(cram_block *b, int32_t id)
+    void cram_block_set_comp_size(cram_block *b, int32_t size)
+    void cram_block_set_uncomp_size(cram_block *b, int32_t size)
+    void cram_block_set_crc32(cram_block *b, int32_t crc)
+    void cram_block_set_data(cram_block *b, void *data)
+
+    int cram_block_append(cram_block *b, void *data, int size)
+    void cram_block_update_size(cram_block *b)
+
+    # Offset is known as "size" internally, but it can be confusing.
+    size_t cram_block_get_offset(cram_block *b)
+    void cram_block_set_offset(cram_block *b, size_t offset)
+
+    #
+    # Computes the size of a cram block, including the block
+    # header itself.
+    #
+    uint32_t cram_block_size(cram_block *b)
+
+    #
+    # Renumbers RG numbers in a cram compression header.
+    #
+    # CRAM stores RG as the Nth number in the header, rather than a
+    # string holding the ID: tag.  This is smaller in space, but means
+    # "samtools cat" to join files together that contain single but
+    # different RG lines needs a way of renumbering them.
+    #
+    # The file descriptor is expected to be immediately after the
+    # cram_container structure (ie before the cram compression header).
+    # Due to the nature of the CRAM format, this needs to read and write
+    # the blocks itself.  Note that there may be multiple slices within
+    # the container, meaning multiple compression headers to manipulate.
+    # Changing RG may change the size of the compression header and
+    # therefore the length field in the container.  Hence we rewrite all
+    # blocks just incase and also emit the adjusted container.
+    #
+    # The current implementation can only cope with renumbering a single
+    # RG (and only then if it is using HUFFMAN or BETA codecs).  In
+    # theory it *may* be possible to renumber multiple RGs if they use
+    # HUFFMAN to the CORE block or use an external block unshared by any
+    # other data series.  So we have an API that can be upgraded to
+    # support this, but do not implement it for now.  An example
+    # implementation of RG as an EXTERNAL block would be to find that
+    # block and rewrite it, returning the number of blocks consumed.
+    #
+    # Returns 0 on success;
+    #        -1 if unable to edit;
+    #        -2 on other errors (eg I/O).
+    #
+    int cram_transcode_rg(cram_fd *input, cram_fd *output,
+    			  cram_container *c,
+			  int nrg, int *in_rg, int *out_rg)
+
+    #
+    # Copies the blocks representing the next num_slice slices from a
+    # container from 'in' to 'out'.  It is expected that the file pointer
+    # is just after the read of the cram_container and cram compression
+    # header.
+    #
+    # Returns 0 on success
+    #        -1 on failure
+    #
+    int cram_copy_slice(cram_fd *input, cram_fd *output, int32_t num_slice)
+
+    #
+    #-----------------------------------------------------------------------------
+    # SAM_hdr
+    #
+
+    # Tokenises a SAM header into a hash table.
+    #
+    # Also extracts a few bits on specific data types, such as @RG lines.
+    #
+    # @return
+    # Returns a SAM_hdr struct on success (free with sam_hdr_free())
+    #         NULL on failure
+    #
+    SAM_hdr *sam_hdr_parse_(const char *hdr, int len)
+
+
+    #
+    #-----------------------------------------------------------------------------
+    # cram_io basics
+    #
+
+    # CRAM blocks - the dynamically growable data block. We have code to
+    # create, update, (un)compress and read/write.
+    #
+    # These are derived from the deflate_interlaced.c blocks, but with the
+    # CRAM extension of content types and IDs.
+    #
+
+    # Allocates a new cram_block structure with a specified content_type and
+    # id.
+    #
+    # @return
+    # Returns block pointer on success;
+    #         NULL on failure
+    #
+    cram_block *cram_new_block(cram_content_type content_type,
+			       int content_id)
+
+    # Reads a block from a cram file.
+    #
+    # @return
+    # Returns cram_block pointer on success;
+    #         NULL on failure
+    #
+    cram_block *cram_read_block(cram_fd *fd)
+
+    # Writes a CRAM block.
+    #
+    # @return
+    # Returns 0 on success;
+    #        -1 on failure
+    #
+    int cram_write_block(cram_fd *fd, cram_block *b)
+
+    # Frees a CRAM block, deallocating internal data too.
+    #
+    void cram_free_block(cram_block *b)
+
+    # Uncompresses a CRAM block, if compressed.
+    #
+    # @return
+    # Returns 0 on success;
+    #        -1 on failure
+    #
+    int cram_uncompress_block(cram_block *b)
+
+    # Compresses a block.
+    #
+    # Compresses a block using one of two different zlib strategies. If we only
+    # want one choice set strat2 to be -1.
+    #
+    # The logic here is that sometimes Z_RLE does a better job than Z_FILTERED
+    # or Z_DEFAULT_STRATEGY on quality data. If so, we'd rather use it as it is
+    # significantly faster.
+    #
+    # @return
+    # Returns 0 on success;
+    #        -1 on failure
+    #
+    int cram_compress_block(cram_fd *fd, cram_block *b, cram_metrics *metrics,
+			    int method, int level)
+
+    # Containers
+    #
+
+    # Creates a new container, specifying the maximum number of slices
+    # and records permitted.
+    #
+    # @return
+    # Returns cram_container ptr on success;
+    #         NULL on failure
+    #
+    cram_container *cram_new_container(int nrec, int nslice)
+    void cram_free_container(cram_container *c)
+
+    # Reads a container header.
+    #
+    # @return
+    # Returns cram_container on success;
+    #         NULL on failure or no container left (fd->err == 0).
+    #
+    cram_container *cram_read_container(cram_fd *fd)
+
+    # Writes a container structure.
+    #
+    # @return
+    # Returns 0 on success;
+    #        -1 on failure
+    #
+    int cram_write_container(cram_fd *fd, cram_container *h)
+
+    #
+    # Stores the container structure in dat and returns *size as the
+    # number of bytes written to dat[].  The input size of dat is also
+    # held in *size and should be initialised to cram_container_size(c).
+    #
+    # Returns 0 on success;
+    #        -1 on failure
+    #
+    int cram_store_container(cram_fd *fd, cram_container *c, char *dat, int *size)
+
+    int cram_container_size(cram_container *c)
+
+    # The top-level cram opening, closing and option handling
+    #
+
+    # Opens a CRAM file for read (mode "rb") or write ("wb").
+    #
+    # The filename may be "-" to indicate stdin or stdout.
+    #
+    # @return
+    # Returns file handle on success;
+    #         NULL on failure.
+    #
+    cram_fd *cram_open(const char *filename, const char *mode)
+
+    # Opens an existing stream for reading or writing.
+    #
+    # @return
+    # Returns file handle on success;
+    #         NULL on failure.
+    #
+    cram_fd *cram_dopen(hFILE *fp, const char *filename, const char *mode)
+
+    # Closes a CRAM file.
+    #
+    # @return
+    # Returns 0 on success;
+    #        -1 on failure
+    #
+    int cram_close(cram_fd *fd)
+
+    #
+    # Seek within a CRAM file.
+    #
+    # Returns 0 on success
+    #        -1 on failure
+    #
+    int cram_seek(cram_fd *fd, off_t offset, int whence)
+
+    #
+    # Flushes a CRAM file.
+    # Useful for when writing to stdout without wishing to close the stream.
+    #
+    # Returns 0 on success
+    #        -1 on failure
+    #
+    int cram_flush(cram_fd *fd)
+
+    # Checks for end of file on a cram_fd stream.
+    #
+    # @return
+    # Returns 0 if not at end of file
+    #         1 if we hit an expected EOF (end of range or EOF block)
+    #         2 for other EOF (end of stream without EOF block)
+    #
+    int cram_eof(cram_fd *fd)
+
+    # Sets options on the cram_fd.
+    #
+    # See CRAM_OPT_* definitions in hts.h.
+    # Use this immediately after opening.
+    #
+    # @return
+    # Returns 0 on success;
+    #        -1 on failure
+    #
+    int cram_set_option(cram_fd *fd, hts_fmt_option opt, ...)
+
+    # Sets options on the cram_fd.
+    #
+    # See CRAM_OPT_* definitions in hts.h.
+    # Use this immediately after opening.
+    #
+    # @return
+    # Returns 0 on success;
+    #        -1 on failure
+    #
+    int cram_set_voption(cram_fd *fd, hts_fmt_option opt, va_list args)
+
+    #
+    # Attaches a header to a cram_fd.
+    #
+    # This should be used when creating a new cram_fd for writing where
+    # we have an SAM_hdr already constructed (eg from a file we've read
+    # in).
+    #
+    # @return
+    # Returns 0 on success;
+    #        -1 on failure
+    #
+    int cram_set_header(cram_fd *fd, SAM_hdr *hdr)
+
+    # Check if this file has a proper EOF block
+    #
+    # @return
+    # Returns 3 if the file is a version of CRAM that does not contain EOF blocks
+    #         2 if the file is a stream and thus unseekable
+    #         1 if the file contains an EOF block
+    #         0 if the file does not contain an EOF block
+    #        -1 if an error occured whilst reading the file or we could not seek back to where we were
+    #
+    #
+    int cram_check_EOF(cram_fd *fd)
+
+    # As int32_decoded/encode, but from/to blocks instead of cram_fd */
+    int int32_put_blk(cram_block *b, int32_t val)
+
+    # Deallocates all storage used by a SAM_hdr struct.
+    #
+    # This also decrements the header reference count. If after decrementing
+    # it is still non-zero then the header is assumed to be in use by another
+    # caller and the free is not done.
+    #
+    # This is a synonym for sam_hdr_dec_ref().
+    #
+    void sam_hdr_free(SAM_hdr *hdr)
+
+    # Returns the current length of the SAM_hdr in text form.
+    #
+    # Call sam_hdr_rebuild() first if editing has taken place.
+    #
+    int sam_hdr_length(SAM_hdr *hdr)
+
+    # Returns the string form of the SAM_hdr.
+    #
+    # Call sam_hdr_rebuild() first if editing has taken place.
+    #
+    char *sam_hdr_str(SAM_hdr *hdr)
+
+    # Appends a formatted line to an existing SAM header.
+    #
+    # Line is a full SAM header record, eg "@SQ\tSN:foo\tLN:100", with
+    # optional new-line. If it contains more than 1 line then multiple lines
+    # will be added in order.
+    #
+    # Len is the length of the text data, or 0 if unknown (in which case
+    # it should be null terminated).
+    #
+    # @return
+    # Returns 0 on success;
+    #        -1 on failure
+    #
+
+    # Add an @PG line.
+    #
+    # If we wish complete control over this use sam_hdr_add() directly. This
+    # function uses that, but attempts to do a lot of tedious house work for
+    # you too.
+    #
+    # - It will generate a suitable ID if the supplied one clashes.
+    # - It will generate multiple @PG records if we have multiple PG chains.
+    #
+    # Call it as per sam_hdr_add() with a series of key,value pairs ending
+    # in NULL.
+    #
+    # @return
+    # Returns 0 on success;
+    #        -1 on failure
+    #
+    int sam_hdr_add_PG(SAM_hdr *sh, const char *name, ...)
+
+    #
+    # A function to help with construction of CL tags in @PG records.
+    # Takes an argc, argv pair and returns a single space-separated string.
+    # This string should be deallocated by the calling function.
+    #
+    # @return
+    # Returns malloced char * on success;
+    #         NULL on failure
+    #
+    char *stringify_argv(int argc, char *argv[])
+
+    #
+    # Returns the refs_t structure used by a cram file handle.
+    #
+    # This may be used in conjunction with option CRAM_OPT_SHARED_REF to
+    # share reference memory between multiple file handles.
+    #
+    # @return
+    # Returns NULL if none exists or the file handle is not a CRAM file.
+    #
+    refs_t *cram_get_refs(htsFile *fd)
+
+
 cdef class HTSFile(object):
     cdef          htsFile *htsfile       # pointer to htsFile structure
     cdef          int64_t start_offset   # BGZF offset of first record
diff --git a/pysam/libchtslib.pyx b/pysam/libchtslib.pyx
index 7eea059..4b8d9c0 100644
--- a/pysam/libchtslib.pyx
+++ b/pysam/libchtslib.pyx
@@ -2,8 +2,11 @@
 # cython: profile=True
 # adds doc-strings for sphinx
 import os
+import io
 
 from posix.unistd cimport dup
+from libc.errno  cimport errno
+from cpython cimport PyBytes_FromStringAndSize
 
 from pysam.libchtslib cimport *
 
@@ -11,15 +14,24 @@ from pysam.libcutils cimport force_bytes, force_str, charptr_to_str, charptr_to_
 from pysam.libcutils cimport encode_filename, from_string_and_size
 
 
-__all__ = ["get_verbosity", "set_verbosity"]
+from warnings         import warn
 
 
+__all__ = ['get_verbosity', 'set_verbosity', 'HFile', 'HTSFile']
+
+# defines imported from samtools
+DEF SEEK_SET = 0
+DEF SEEK_CUR = 1
+DEF SEEK_END = 2
+
 ########################################################################
 ########################################################################
 ## Constants
 ########################################################################
 
+# maximum genomic coordinace
 cdef int   MAX_POS = 2 << 29
+
 cdef tuple FORMAT_CATEGORIES = ('UNKNOWN', 'ALIGNMENTS', 'VARIANTS', 'INDEX', 'REGIONS')
 cdef tuple FORMATS = ('UNKNOWN', 'BINARY_FORMAT', 'TEXT_FORMAT', 'SAM', 'BAM', 'BAI', 'CRAM', 'CRAI',
                       'VCF', 'BCF', 'CSI', 'GZI', 'TBI', 'BED')
@@ -35,6 +47,230 @@ cpdef get_verbosity():
     return hts_get_verbosity()
 
 
+cdef class HFile(object):
+    cdef hFILE *fp
+    cdef readonly object name, mode
+
+    def __init__(self, name, mode='r', closedf=True):
+        self._open(name, mode, closefd=True)
+
+    def __dealloc__(self):
+        self.close()
+
+    @property
+    def closed(self):
+        return self.fp == NULL
+
+    cdef _open(self, name, mode, closefd=True):
+        self.name = name
+        self.mode = mode
+
+        mode = force_bytes(mode)
+
+        if isinstance(name, int):
+            if self.fp != NULL:
+                name = dup(name)
+            self.fp = hdopen(name, mode)
+        else:
+            name = encode_filename(name)
+            self.fp = hopen(name, mode)
+
+        if not self.fp:
+            raise OSError(errno, 'failed to open HFile', self.name)
+
+    def close(self):
+        if self.fp == NULL:
+            return
+
+        cdef hFILE *fp = self.fp
+        self.fp = NULL
+
+        if hclose(fp) != 0:
+            raise OSError(herrno(self.fp), 'failed to close HFile', self.name)
+
+    def fileno(self):
+        if self.fp == NULL:
+            raise OSError('operation on closed HFile')
+        if isinstance(self.name, int):
+            return self.name
+        else:
+            raise AttributeError('fileno not available')
+
+    def __enter__(self):
+        return self
+
+    def __exit__(self, type, value, tb):
+        self.close()
+
+    def __iter__(self):
+        return self
+
+    def __next__(self):
+        line = self.readline()
+        if not line:
+            raise StopIteration()
+        return line
+
+    def flush(self):
+        if self.fp == NULL:
+            raise OSError('operation on closed HFile')
+        if hflush(self.fp) != 0:
+            raise OSError(herrno(self.fp), 'failed to flush HFile', self.name)
+
+    def isatty(self):
+        if self.fp == NULL:
+            raise OSError('operation on closed HFile')
+        return False
+
+    def readable(self):
+        return self.fp != NULL and 'r' in self.mode
+
+    def read(self, Py_ssize_t size=-1):
+        if self.fp == NULL:
+            raise OSError('operation on closed HFile')
+
+        if size == 0:
+            return b''
+
+        cdef list parts = []
+        cdef bytes part
+        cdef Py_ssize_t chunk_size, ret, bytes_read = 0
+        cdef char *cpart
+
+        while size == -1 or bytes_read < size:
+            chunk_size = 4096
+            if size != -1:
+                chunk_size = min(chunk_size, size - bytes_read)
+
+            part = PyBytes_FromStringAndSize(NULL, chunk_size)
+            cpart = <char *>part
+            ret = hread(self.fp, <void *>cpart, chunk_size)
+
+            if ret < 0:
+                OSError(herrno(self.fp), 'failed to read HFile', self.name)
+            elif not ret:
+                break
+
+            bytes_read += ret
+
+            if ret < chunk_size:
+                part = cpart[:ret]
+
+            parts.append(part)
+
+        return b''.join(parts)
+
+    def readall(self):
+        return self.read()
+
+    def readinto(self, buf):
+        if self.fp == NULL:
+            raise OSError('operation on closed HFile')
+
+        size = len(buf)
+
+        if size == 0:
+            return size
+
+        mv = memoryview(buf)
+        ret = hread(self.fp, <void *>mv, size)
+
+        if ret < 0:
+            OSError(herrno(self.fp), 'failed to read HFile', self.name)
+
+        return ret
+
+    def readline(self, Py_ssize_t size=-1):
+        if self.fp == NULL:
+            raise OSError('operation on closed HFile')
+
+        if size == 0:
+            return b''
+
+        cdef list parts = []
+        cdef bytes part
+        cdef Py_ssize_t chunk_size, ret, bytes_read = 0
+        cdef char *cpart
+
+        while size == -1 or bytes_read < size:
+            chunk_size = 4096
+            if size != -1:
+                chunk_size = min(chunk_size, size - bytes_read)
+
+            part = PyBytes_FromStringAndSize(NULL, chunk_size)
+            cpart = <char *>part
+
+            # Python bytes objects allocate an extra byte for a null terminator
+            ret = hgetln(cpart, chunk_size+1, self.fp)
+
+            if ret < 0:
+                OSError(herrno(self.fp), 'failed to read HFile', self.name)
+            elif not ret:
+                break
+
+            bytes_read += ret
+
+            if ret < chunk_size:
+                part = cpart[:ret]
+                cpart = <char *>part
+
+            parts.append(part)
+
+            if cpart[ret-1] == b'\n':
+               break
+
+        return b''.join(parts)
+
+    def readlines(self):
+        return list(self)
+
+    def seek(self, Py_ssize_t offset, int whence=SEEK_SET):
+        if self.fp == NULL:
+            raise OSError('operation on closed HFile')
+
+        cdef Py_ssize_t off = hseek(self.fp, offset, whence)
+
+        if off < 0:
+            raise OSError(herrno(self.fp), 'seek failed on HFile', self.name)
+
+        return off
+
+    def tell(self):
+        if self.fp == NULL:
+            raise OSError('operation on closed HFile')
+
+        ret = htell(self.fp)
+
+        if ret < 0:
+            raise OSError(herrno(self.fp), 'tell failed on HFile', self.name)
+
+        return ret
+
+    def seekable(self):
+        return self.fp != NULL
+
+    def truncate(self, size=None):
+        raise NotImplementedError()
+
+    def writable(self):
+        return self.fp != NULL and 'w' in self.mode
+
+    def write(self, bytes b):
+        if self.fp == NULL:
+            raise OSError('operation on closed HFile')
+
+        got = hwrite(self.fp, <void *>b, len(b))
+
+        if got < 0:
+            raise OSError(herrno(self.fp), 'write failed on HFile', self.name)
+
+        return got
+
+    def writelines(self, lines):
+        for line in lines:
+            self.write(line)
+
+
 class CallableValue(object):
     def __init__(self, value):
         self.value = value
@@ -62,11 +298,38 @@ cdef class HTSFile(object):
         self.htsfile = NULL
         self.duplicate_filehandle = True
 
+    def close(self):
+        if self.htsfile:
+            hts_close(self.htsfile)
+            self.htsfile = NULL
+
     def __dealloc__(self):
         if self.htsfile:
             hts_close(self.htsfile)
             self.htsfile = NULL
 
+    def check_truncation(self, ignore_truncation=False):
+        """Check if file is truncated."""
+        if not self.htsfile:
+            return
+
+        if self.htsfile.format.compression != bgzf:
+            return
+
+        cdef BGZF *bgzfp = hts_get_bgzfp(self.htsfile)
+        if not bgzfp:
+            return
+
+        cdef int ret = bgzf_check_EOF(bgzfp)
+        if ret < 0:
+            raise OSError(errno, 'error checking for EOF marker')
+        elif ret == 0:
+            msg = 'no BGZF EOF marker; file may be truncated'.format(self.filename)
+            if ignore_truncation:
+                warn(msg)
+            else:
+                raise OSError(msg)
+
     def __enter__(self):
         return self
 
@@ -189,12 +452,15 @@ cdef class HTSFile(object):
             raise OSError('seek not available in streams')
 
         cdef int64_t ret
-        if self.htsfile.format.compression != no_compression:
+        if self.htsfile.format.compression == bgzf:
             with nogil:
                 ret = bgzf_seek(hts_get_bgzfp(self.htsfile), offset, SEEK_SET)
-        else:
+        elif self.htsfile.format.compression == no_compression:
             with nogil:
                 ret = hts_useek(self.htsfile, <int>offset, SEEK_SET)
+        else:
+            raise NotImplementedError("seek not implemented in files compressed by method {}".format(
+                self.htsfile.format.compression))
         return ret
 
     def tell(self):
@@ -205,12 +471,19 @@ cdef class HTSFile(object):
             raise OSError('tell not available in streams')
 
         cdef int64_t ret
-        if self.htsfile.format.compression != no_compression:
+        if self.htsfile.format.compression == bgzf:
             with nogil:
                 ret = bgzf_tell(hts_get_bgzfp(self.htsfile))
-        else:
+        elif self.htsfile.format.compression == no_compression:
             with nogil:
                 ret = hts_utell(self.htsfile)
+        elif self.htsfile.format.format == cram:
+            with nogil:
+                ret = htell(cram_fd_get_fp(self.htsfile.fp.cram))
+        else:
+            raise NotImplementedError("seek not implemented in files compressed by method {}".format(
+                self.htsfile.format.compression))
+
         return ret
 
     cdef htsFile *_open_htsfile(self) except? NULL:
@@ -227,7 +500,7 @@ cdef class HTSFile(object):
                 fd = self.filename
             else:
                 fd = self.filename.fileno()
-               
+
             if self.duplicate_filehandle:
                 dup_fd = dup(fd)
             else:
diff --git a/pysam/libcsamtools.pxd b/pysam/libcsamtools.pxd
new file mode 100644
index 0000000..5fdc57f
--- /dev/null
+++ b/pysam/libcsamtools.pxd
@@ -0,0 +1,3 @@
+cdef extern from "csamtools_util.h":
+
+    int samtools_main(int argc, char *argv[])
diff --git a/pysam/libcsamtools.pyx b/pysam/libcsamtools.pyx
new file mode 100644
index 0000000..cc60ace
--- /dev/null
+++ b/pysam/libcsamtools.pyx
@@ -0,0 +1,2 @@
+def py_samtools():
+    pass
diff --git a/pysam/libctabix.pxd b/pysam/libctabix.pxd
index 12cd9dd..c986f03 100644
--- a/pysam/libctabix.pxd
+++ b/pysam/libctabix.pxd
@@ -81,6 +81,10 @@ cdef class asGTF(Parser):
     pass
 
 
+cdef class asGFF3(Parser):
+    pass
+
+
 cdef class asBed(Parser):
     pass
 
diff --git a/pysam/libctabix.pyx b/pysam/libctabix.pyx
index 10dc23b..b10c0d0 100644
--- a/pysam/libctabix.pyx
+++ b/pysam/libctabix.pyx
@@ -9,7 +9,8 @@
 # class TabixFile  class wrapping tabix indexed files in bgzf format
 #
 # class asTuple  Parser class for tuples
-# class asGT     Parser class for GTF formatted rows
+# class asGTF    Parser class for GTF formatted rows
+# class asGFF3   Parser class for GFF3 formatted rows
 # class asBed    Parser class for Bed formatted rows
 # class asVCF    Parser class for VCF formatted rows
 #
@@ -110,6 +111,42 @@ cdef class asTuple(Parser):
         return r
 
 
+cdef class asGFF3(Parser):
+    '''converts a :term:`tabix row` into a GFF record with the following
+    fields:
+   
+    +----------+----------+-------------------------------+
+    |*Column*  |*Name*    |*Content*                      |
+    +----------+----------+-------------------------------+
+    |1         |contig    |the chromosome name            |
+    +----------+----------+-------------------------------+
+    |2         |feature   |The feature type               |
+    +----------+----------+-------------------------------+
+    |3         |source    |The feature source             |
+    +----------+----------+-------------------------------+
+    |4         |start     |genomic start coordinate       |
+    |          |          |(0-based)                      |
+    +----------+----------+-------------------------------+
+    |5         |end       |genomic end coordinate         |
+    |          |          |(0-based)                      |
+    +----------+----------+-------------------------------+
+    |6         |score     |feature score                  |
+    +----------+----------+-------------------------------+
+    |7         |strand    |strand                         |
+    +----------+----------+-------------------------------+
+    |8         |frame     |frame                          |
+    +----------+----------+-------------------------------+
+    |9         |attributes|the attribute field            |
+    +----------+----------+-------------------------------+
+
+    ''' 
+    cdef parse(self, char * buffer, int len):
+        cdef ctabixproxies.GFF3Proxy r
+        r = ctabixproxies.GFF3Proxy(self.encoding)
+        r.copy(buffer, len)
+        return r
+
+
 cdef class asGTF(Parser):
     '''converts a :term:`tabix row` into a GTF record with the following
     fields:
@@ -155,7 +192,7 @@ cdef class asGTF(Parser):
         r = ctabixproxies.GTFProxy(self.encoding)
         r.copy(buffer, len)
         return r
-
+    
 
 cdef class asBed(Parser):
     '''converts a :term:`tabix row` into a bed record
@@ -1178,6 +1215,7 @@ __all__ = [
     "Tabixfile",
     "asTuple",
     "asGTF",
+    "asGFF3",
     "asVCF",
     "asBed",
     "GZIterator",
diff --git a/pysam/libctabixproxies.pxd b/pysam/libctabixproxies.pxd
index 5317b81..edea701 100644
--- a/pysam/libctabixproxies.pxd
+++ b/pysam/libctabixproxies.pxd
@@ -25,19 +25,21 @@ cdef class TupleProxy:
     cdef copy(self, char * buffer, size_t nbytes, bint reset=*)
     cdef update(self, char * buffer, size_t nbytes)
 
-cdef class GTFProxy(TupleProxy) :
 
-    cdef:
-        char * _attributes
-        cdef bint hasOwnAttributes
+cdef class NamedTupleProxy(TupleProxy):
+    pass
+
 
+cdef class GTFProxy(NamedTupleProxy):
+    cdef object attribute_dict
     cpdef int getMaxFields(self)
     cpdef int getMinFields(self)
-    cdef char * getAttributes(self)
 
-cdef class NamedTupleProxy(TupleProxy):
+
+cdef class GFF3Proxy(GTFProxy):
     pass
 
+
 cdef class BedProxy(NamedTupleProxy):
 
     cdef:
diff --git a/pysam/libctabixproxies.pyx b/pysam/libctabixproxies.pyx
index 9a8a678..dc434e0 100644
--- a/pysam/libctabixproxies.pyx
+++ b/pysam/libctabixproxies.pyx
@@ -10,18 +10,21 @@ from pysam.libcutils cimport encode_filename, from_string_and_size
 
 import collections
 
+
 cdef char *StrOrEmpty(char * buffer):
      if buffer == NULL:
          return ""
      else: return buffer
 
+
 cdef int isNew(char * p, char * buffer, size_t nbytes):
     """return True if `p` is located within `buffer` of size
     `nbytes`
     """
     if p == NULL:
         return 0
-    return not (buffer <= p < buffer + nbytes)
+    
+    return not (buffer <= p <= buffer + nbytes)
 
 
 cdef class TupleProxy:
@@ -230,7 +233,7 @@ cdef class TupleProxy:
         self.nfields = field
         if self.nfields < self.getMinFields():
             raise ValueError(
-                "parsing error: fewer that %i fields in line: %s" %
+                "parsing error: fewer than %i fields in line: %s" %
                 (self.getMinFields(), buffer))
 
     def _getindex(self, int index):
@@ -268,7 +271,7 @@ cdef class TupleProxy:
             raise IndexError("list index out of range")
 
         if isNew(self.fields[idx], self.data, self.nbytes):
-            free(self.fields[idx] )
+            free(self.fields[idx])
 
         self.is_modified = 1
 
@@ -350,7 +353,62 @@ def quote(v):
         return str(v)
 
 
-cdef class GTFProxy(TupleProxy):
+cdef class NamedTupleProxy(TupleProxy):
+
+    map_key2field = {}
+
+    def __setattr__(self, key, value):
+        '''set attribute.'''
+        cdef int idx
+        idx, f = self.map_key2field[key]
+        if self.nfields < idx:
+            raise KeyError("field %s not set" % key)
+        TupleProxy.__setitem__(self, idx, str(value))
+
+    def __getattr__(self, key):
+        cdef int idx
+        idx, f = self.map_key2field[key]
+        if self.nfields < idx:
+            raise KeyError("field %s not set" % key)
+        if f == str:
+            return force_str(self.fields[idx],
+                              self.encoding)
+        return f(self.fields[idx])
+
+
+cdef dot_or_float(v):
+    if v == "" or v == b".":
+        return None
+    else:
+        try:
+            return int(v)
+        except ValueError:
+            return float(v)
+
+
+cdef dot_or_int(v):
+    if v == "" or v == b".":
+        return None
+    else:
+        return int(v)
+
+
+cdef dot_or_str(v):
+    if v == "" or v == b".":
+        return None
+    else:
+        return force_str(v)
+
+
+cdef int from1based(v):
+    return atoi(v) - 1
+
+
+cdef str to1based(int v):
+    return str(v + 1)
+
+
+cdef class GTFProxy(NamedTupleProxy):
     '''Proxy class for access to GTF fields.
 
     This class represents a GTF entry for fast read-access.
@@ -361,18 +419,29 @@ cdef class GTFProxy(TupleProxy):
 
     The only exception is the attributes field when set from
     a dictionary - this field will manage its own memory.
+
     '''
+    separator = "; "
 
+    # first value is field index, the tuple contains conversion
+    # functions for getting (converting internal string representation
+    # to pythonic value) and setting (converting pythonic value to
+    # interval string representation)
+    map_key2field = {
+        'contig' : (0, (str, str)),
+        'source' : (1, (dot_or_str, str)),
+        'feature': (2, (dot_or_str, str)),
+        'start' : (3, (from1based, to1based)),
+        'end' : (4, (int, int)),
+        'score' : (5, (dot_or_float, toDot)),
+        'strand' : (6, (dot_or_str, str)),
+        'frame' : (7, (dot_or_int, toDot)),
+        'attributes': (8, (str, str))}
+    
     def __cinit__(self): 
         # automatically calls TupleProxy.__cinit__
-        self.hasOwnAttributes = False
-        self._attributes = NULL
-
-    def __dealloc__(self):
-        # automatically calls TupleProxy.__dealloc__
-        if self.hasOwnAttributes:
-            free(self._attributes)
-
+        self.attribute_dict = None
+        
     cpdef int getMinFields(self):
         '''return minimum number of fields.'''
         return 9
@@ -381,182 +450,18 @@ cdef class GTFProxy(TupleProxy):
         '''return max number of fields.'''
         return 9
 
-    property contig:
-        '''contig of feature.'''
-        def __get__(self):
-            return self._getindex(0)
-        def __set__(self, value):
-            self._setindex(0, value)
-
-    property source:
-        '''feature source.'''
-        def __get__(self):
-            return self._getindex(1)
-        def __set__(self, value):
-            if value is None:
-                value = "."
-            self._setindex(1, value)
-
-    property feature:
-        '''feature name.'''
-        def __get__(self):
-            return self._getindex(2)
-        def __set__(self, value):
-            if value is None:
-                value = "."
-            self._setindex(2, value)
-
-    property start:
-        '''feature start (in 0-based open/closed coordinates).'''
-        def __get__(self ):
-            return int( self._getindex(3)) - 1
-        def __set__(self, value ):
-            self._setindex(3, str(value+1))
-
-    property end:
-        '''feature end (in 0-based open/closed coordinates).'''
-        def __get__(self):
-            return int(self._getindex(4))
-        def __set__(self, value):
-            self._setindex(4, str(value))
-
-    property score:
-        '''feature score.'''
-        def __get__(self): 
-            v = self._getindex(5)
-            if v == "" or v[0] == '.':
-                return None
-            else:
-                return float(v)
-
-        def __set__(self, value):
-            if value is None:
-                value = "."
-            self._setindex(5, str(value))
-
-    property strand:
-        '''feature strand.'''
-        def __get__(self):
-           return self._getindex(6)
-        def __set__(self, value ):
-            if value is None:
-                value = "."
-            self._setindex(6, value)
-
-    property frame:
-       '''feature frame.'''
-       def __get__(self):
-            v = self._getindex(7)
-            if v == "" or v[0] == '.':
-                return v
-            else:
-                return int(v)
-
-       def __set__(self, value):
-            if value is None:
-                value = "."
-            self._setindex(7, str(value))
-
-    property attributes:
-        '''feature attributes (as a string).'''
-        def __get__(self): 
-            if self.hasOwnAttributes:
-                return force_str(self._attributes)
-            else:
-                return force_str(self._getindex(8))
-        def __set__( self, value): 
-            if self.hasOwnAttributes:
-                free(self._attributes)
-                self._attributes = NULL
-                self.hasOwnAttributes = False
-            self._setindex(8, value)
-
-    cdef char * getAttributes(self):
-        '''return pointer to attributes.'''
-        cdef char * attributes
-        if self.hasOwnAttributes:
-            attributes = self._attributes
-        else:
-            attributes = self.fields[8]
-        if attributes == NULL:
-            raise KeyError("no attributes defined GTF entry")
-        return attributes
-
     def asDict(self):
         """parse attributes - return as dict
         """
-
-        # remove comments
-        attributes = self.attributes
-
-        # separate into fields
-        # Fields might contain a ";", for example in ENSEMBL GTF file
-        # for mouse, v78:
-        # ...; transcript_name "TXNRD2;-001"; ....
-        # The current heuristic is to split on a semicolon followed by a
-        # space, see also http://mblab.wustl.edu/GTF22.html
-
-        # Remove white space to prevent a last empty field.
-        fields = [x.strip() for x in attributes.strip().split("; ")]
-        
-        result = collections.OrderedDict()
-
-        for f in fields:
-
-            # strip semicolon (GTF files without a space after the last semicolon)
-            if f.endswith(";"):
-                f = f[:-1]
-
-            # split at most once in order to avoid separating
-            # multi-word values
-            d = [x.strip() for x in f.split(" ", 1)]
-
-            n,v = d[0], d[1]
-            if len(d) > 2:
-                v = d[1:]
-
-            if v[0] == '"' and v[-1] == '"':
-                v = v[1:-1]
-            else:
-                ## try to convert to a value
-                try:
-                    v = float(v)
-                    v = int(v)
-                except ValueError:
-                    pass
-                except TypeError:
-                    pass
-
-            result[n] = v
-        
-        return result
+        return collections.OrderedDict(self.attribute_iterator())
     
     def fromDict(self, d):
         '''set attributes from a dictionary.'''
-        cdef char * p
-        cdef int l
-
-        # clean up if this field is set twice
-        if self.hasOwnAttributes: 
-            free(self._attributes)
-
-        aa = []
-        for k,v in d.items():
-            if isinstance(v, str):
-                aa.append( '%s "%s"' % (k,v) )
-            else:
-                aa.append( '%s %s' % (k,str(v)) )
-
-        a = force_bytes("; ".join(aa) + ";")
-        p = a
-        l = len(a)
-        self._attributes = <char *>calloc(l + 1, sizeof(char))
-        if self._attributes == NULL:
-            raise ValueError("out of memory")
-        memcpy(self._attributes, p, l)
-
-        self.hasOwnAttributes = True
-        self.is_modified = True
+        self.attribute_dict = None
+        attribute_string = force_bytes(
+            self.attribute_dict2string(d),
+            self.encoding)
+        self._setindex(8, attribute_string)
 
     def __str__(self):
         cdef char * cpy
@@ -565,9 +470,9 @@ cdef class GTFProxy(TupleProxy):
         if self.is_modified:
             return "\t".join( 
                 (self.contig, 
-                 self.source, 
-                 self.feature, 
-                 str(self.start+1),
+                 toDot(self.source), 
+                 toDot(self.feature), 
+                 str(self.start + 1),
                  str(self.end),
                  toDot(self.score),
                  toDot(self.strand),
@@ -589,73 +494,26 @@ cdef class GTFProxy(TupleProxy):
 
     def keys(self):
         '''return a list of attributes defined in this entry.'''
-        r = self.attributes
-        return [x.strip().split(" ")[0]
-                # separator is ';' followed by space
-                for x in r.split("; ") if x.strip() != '']
+        if not self.attribute_dict:
+            self.attribute_dict = self.attribute_string2dict(
+                self.attributes)
+        return self.attribute_dict.keys()
 
     def __getitem__(self, key):
         return self.__getattr__(key)
 
-    def __getattr__(self, item):
-        """Generic lookup of attribute from GFF/GTF attributes 
-        Only called if there *isn't* an attribute with this name
-        """
-        cdef char * start
-        cdef char * query
-        cdef char * cpy
-        cdef char * end
-        cdef int l
-
-        #
-        # important to use the getAttributes function.
-        # Using the self.attributes property to access
-        # the attributes caused a hard-to-trace bug
-        # in which fields in the attribute string were
-        # set to 0.
-        # Running through valgrind complained that
-        # memory was accessed in the memory field
-        # that has been released. It is not clear
-        # why this happened and might be a cython bug
-        # (Version 0.16). The valgrind warnings
-        # disappeard after accessing the C data structures
-        # directly and so did the bug.
-        cdef char * attributes = self.getAttributes()
-        if attributes == NULL:
-            raise KeyError("key %s not found, no attributes" % item)
-
-        # add space in order to make sure
-        # to not pick up a field that is a prefix of another field
-        r = force_bytes(item + " ")
-        query = r
-        start = strstr(attributes, query)
-
-        if start == NULL:
-            raise AttributeError("'GTFProxy' has no attribute '%s'" % item)
-
-        start += strlen(query)
-        # skip gaps before
-        while start[0] == ' ':
-            start += 1
-
-        if start[0] == '"':
-            start += 1
-            end = start
-            while end[0] != '\0' and end[0] != '"':
-                end += 1
-            l = end - start
-            result = force_str(PyBytes_FromStringAndSize(start, l),
-                                self.encoding)
-            return result
-        else:
-            return force_str(start, self.encoding)
-
     def setAttribute(self, name, value):
-        '''convenience method to set an attribute.'''
-        r = self.asDict()
-        r[name] = value
-        self.fromDict(r)
-
+        '''convenience method to set an attribute.
+        '''
+        if not self.attribute_dict:
+            self.attribute_dict = self.attribute_string2dict(
+                self.attributes)
+        self.attribute_dict[name] = value
+
+    def attribute_string2dict(self, s):
+        return collections.OrderedDict(
+            self.attribute_string2iterator(s))
+    
     def __cmp__(self, other):
         return (self.contig, self.strand, self.start) < \
             (other.contig, other.strand, other.start)
@@ -676,29 +534,148 @@ cdef class GTFProxy(TupleProxy):
             err_msg = "op {0} isn't implemented yet".format(op)
             raise NotImplementedError(err_msg)
 
+    def dict2attribute_string(self, d):
+        """convert dictionary to attribute string in GTF format.
 
-cdef class NamedTupleProxy(TupleProxy):
+        """
+        aa = []
+        for k, v in d.items():
+            if isinstance(v, str):
+                aa.append('{} "{}"'.format(k, v))
+            else:
+                aa.append("{} {}".format(k, str(v)))
 
-    map_key2field = {}
+        return self.separator.join(aa) + ";"
+
+    def attribute_string2iterator(self, s):
+        """convert attribute string in GTF format to records
+        and iterate over key, value pairs.
+        """
+        
+        # remove comments
+        attributes = force_str(s, encoding=self.encoding)
+
+        # separate into fields
+        # Fields might contain a ";", for example in ENSEMBL GTF file
+        # for mouse, v78:
+        # ...; transcript_name "TXNRD2;-001"; ....
+        # The current heuristic is to split on a semicolon followed by a
+        # space, see also http://mblab.wustl.edu/GTF22.html
+
+        # Remove white space to prevent a last empty field.
+        fields = [x.strip() for x in attributes.strip().split("; ")]
+        for f in fields:
+
+            # strip semicolon (GTF files without a space after the last semicolon)
+            if f.endswith(";"):
+                f = f[:-1]
+
+            # split at most once in order to avoid separating
+            # multi-word values
+            d = [x.strip() for x in f.split(" ", 1)]
+
+            n, v = d[0], d[1]
+            if len(d) > 2:
+                v = d[1:]
+
+            if v[0] == '"' and v[-1] == '"':
+                v = v[1:-1]
+            else:
+                ## try to convert to a value
+                try:
+                    v = float(v)
+                    v = int(v)
+                except ValueError:
+                    pass
+                except TypeError:
+                    pass
+                
+            yield n, v
+       
+    def __getattr__(self, key):
+        """Generic lookup of attribute from GFF/GTF attributes 
+        """
+
+        # Only called if there *isn't* an attribute with this name
+        cdef int idx
+        idx, f = self.map_key2field.get(key, (-1, None))
+        if idx >= 0:
+            # deal with known attributes (fields 0-8)
+            if idx == 8:
+                # flush attributes if requested
+                if self.is_modified and self.attribute_dict is not None:
+                    s = self.dict2attribute_string(self.attribute_dict)
+                    TupleProxy._setindex(self, idx, s)
+                    self.attribute_dict = None
+                    return s
+                                         
+            if f[0] == str:
+                return force_str(self.fields[idx],
+                                 self.encoding)
+            else:
+                return f[0](self.fields[idx])
+        else:
+            # deal with generic attributes (gene_id, ...)
+            if self.attribute_dict is None:
+                self.attribute_dict = self.attribute_string2dict(
+                    self.attributes)
+            return self.attribute_dict[key]
 
     def __setattr__(self, key, value):
         '''set attribute.'''
-        cdef int idx
-        idx, f = self.map_key2field[key]
-        if self.nfields < idx:
-            raise KeyError("field %s not set" % key)
-        TupleProxy.__setitem__(self, idx, str(value))
 
-    def __getattr__(self, key):
+        # Note that __setattr__ is called before properties, so __setattr__ and
+        # properties don't mix well. This is different from __getattr__ which is
+        # called after any properties have been resolved.
         cdef int idx
-        idx, f = self.map_key2field[key]
-        if self.nfields < idx:
-            raise KeyError("field %s not set" % key)
-        if f == str:
-            return force_str(self.fields[idx],
-                              self.encoding)
-        return f(self.fields[idx])
+        idx, f = self.map_key2field.get(key, (-1, None))
+
+        if idx >= 0:
+            if value is None:
+                s = "."
+            elif f[1] == str:
+                s = force_bytes(value,
+                                self.encoding)
+            else:
+                s = str(f[1](value))
+            TupleProxy._setindex(self, idx, s)
+        else:
+            if self.attribute_dict is None:
+                self.attribute_dict = self.attribute_string2dict(
+                    self.attributes)
+            self.attribute_dict[key] = value
+            self.is_modified = True
+            
+
+cdef class GFF3Proxy(GTFProxy):
+
+    def dict2attribute_string(self, d):
+        """convert dictionary to attribute string."""
+        return ";".join(["{}={}".format(k, v) for k, v in d.items()])
+        
+    def attribute_string2iterator(self, s):
+        """convert attribute string in GFF3 format to records
+        and iterate over key, value pairs.
+        """
+        
+        for f in (x.strip() for x in s.split(";")):
+            if not f:
+                continue
 
+            key, value = f.split("=", 1)
+            value = value.strip()
+            
+            ## try to convert to a value
+            try:
+                value = float(value)
+                value = int(value)
+            except ValueError:
+                pass
+            except TypeError:
+                pass
+                
+            yield key.strip(), value
+   
 
 cdef class BedProxy(NamedTupleProxy):
     '''Proxy class for access to Bed fields.
@@ -762,7 +739,7 @@ cdef class BedProxy(NamedTupleProxy):
         self.nfields = save_fields
         return retval
 
-    def __setattr__(self, key, value ):
+    def __setattr__(self, key, value):
         '''set attribute.'''
         if key == "start":
             self.start = value
@@ -771,7 +748,8 @@ cdef class BedProxy(NamedTupleProxy):
 
         cdef int idx
         idx, f = self.map_key2field[key]
-        TupleProxy._setindex(self, idx, str(value) )
+        TupleProxy._setindex(self, idx, str(value))
+        
 
 cdef class VCFProxy(NamedTupleProxy):
     '''Proxy class for access to VCF fields.
diff --git a/pysam/libcutils.pxd b/pysam/libcutils.pxd
index 81e544a..479d337 100644
--- a/pysam/libcutils.pxd
+++ b/pysam/libcutils.pxd
@@ -28,11 +28,11 @@ cdef from_string_and_size(const char *s, size_t length)
 
 cdef extern from "pysam_util.h":
 
-    int samtools_main(int argc, char *argv[])
-    int bcftools_main(int argc, char *argv[])
     void pysam_set_stderr(int fd)
     void pysam_unset_stderr()
     void pysam_set_stdout(int fd)
     void pysam_set_stdout_fn(const char *)
     void pysam_unset_stdout()
     void set_optind(int)
+    extern int samtools_main(int argc, char *argv[])
+    extern int bcftools_main(int argc, char *argv[])
diff --git a/pysam/libcutils.pyx b/pysam/libcutils.pyx
index 80bd9e4..2b90420 100644
--- a/pysam/libcutils.pyx
+++ b/pysam/libcutils.pyx
@@ -16,6 +16,9 @@ from libc.stdio cimport fprintf, stderr, fflush
 from libc.stdio cimport stdout as c_stdout
 from posix.fcntl cimport open as c_open, O_WRONLY
 
+from libcbcftools cimport bcftools_main
+from libcsamtools cimport samtools_main
+
 #####################################################################
 # hard-coded constants
 cdef int MAX_POS = 2 << 29
@@ -234,16 +237,22 @@ def _pysam_dispatch(collection,
                     method,
                     args=None,
                     catch_stdout=True,
+                    is_usage=False,
                     save_stdout=None):
     '''call ``method`` in samtools/bcftools providing arguments in args.
     
+    By default, stdout is redirected to a temporary file using the patched
+    C sources except for a few commands that have an explicit output option
+    (typically: -o). In these commands (such as samtools view), this explicit
+    option is used. If *is_usage* is True, then these explicit output options
+    will not be used.
+
     Catching of stdout can be turned off by setting *catch_stdout* to
     False.
-
     '''
 
     if method == "index":
-        if not os.path.exists(args[0]):
+        if args and not os.path.exists(args[0]):
             raise IOError("No such file or directory: '%s'" % args[0])
             
     if args is None:
@@ -267,17 +276,16 @@ def _pysam_dispatch(collection,
         pysam_set_stdout(stdout_h)
     elif catch_stdout:
         stdout_h, stdout_f = tempfile.mkstemp()
-
         MAP_STDOUT_OPTIONS = {
-            "samtools": {
-                "view": "-o {}",
-                "mpileup": "-o {}",
-                "depad": "-o {}",
-                "calmd": "",  # uses pysam_stdout_fn
-            },
+        "samtools": {
+            "view": "-o {}",
+            "mpileup": "-o {}",
+            "depad": "-o {}",
+            "calmd": "",  # uses pysam_stdout_fn
+        },
             "bcftools": {}
         }
-
+        
         stdout_option = None
         if collection == "bcftools":
             # in bcftools, most methods accept -o, the exceptions
@@ -289,7 +297,7 @@ def _pysam_dispatch(collection,
             if not(method == "view" and "-c" in args):
                 stdout_option = MAP_STDOUT_OPTIONS[collection][method]
 
-        if stdout_option is not None:
+        if stdout_option is not None and not is_usage:
             os.close(stdout_h)
             pysam_set_stdout_fn(force_bytes(stdout_f))
             args.extend(stdout_option.format(stdout_f).split(" "))
diff --git a/pysam/pysam_util.c b/pysam/pysam_util.c
index 94717c8..5940a35 100644
--- a/pysam/pysam_util.c
+++ b/pysam/pysam_util.c
@@ -2,8 +2,10 @@
 #include <assert.h>
 #include <unistd.h>
 #include <stdio.h>
-#include "bam.h"
-#include "bam_endian.h"
+
+/* #include "bam.h" */
+/* #include "bam_endian.h" */
+
 #include "htslib/khash.h"
 #include "htslib/ksort.h"
 #include "htslib/knetfile.h"
diff --git a/pysam/pysam_util.h b/pysam/pysam_util.h
index a30808f..8627d96 100644
--- a/pysam/pysam_util.h
+++ b/pysam/pysam_util.h
@@ -34,4 +34,8 @@ int pysam_dispatch(int argc, char *argv[]);
 
 void set_optind(int);
 
+extern int samtools_main(int argc, char *argv[]);
+  
+extern int bcftools_main(int argc, char *argv[]);
+
 #endif
diff --git a/pysam/samfile_util.c b/pysam/samfile_util.c
index f5724ae..b6917ed 100644
--- a/pysam/samfile_util.c
+++ b/pysam/samfile_util.c
@@ -1,8 +1,6 @@
 #include "samfile_util.h"
 #include "htslib/sam.h"
 
-#include "kprobaln.h"
-
 // taken from bam_md.c
 // replace bam1_{qual,seq,cigar} with bam_get_{qual,seq,cigar}
 // bam1_seqi -> bam_seqi
@@ -14,175 +12,5 @@
 
 char bam_nt16_nt4_table[] = { 4, 0, 1, 4, 2, 4, 4, 4, 3, 4, 4, 4, 4, 4, 4, 4 };
 
-int bam_cap_mapQ(bam1_t *b, char *ref, int thres)
-{
-	uint8_t *seq = bam_get_seq(b), *qual = bam_get_qual(b);
-	uint32_t *cigar = bam_get_cigar(b);
-	bam1_core_t *c = &b->core;
-	int i, x, y, mm, q, len, clip_l, clip_q;
-	double t;
-	if (thres < 0) thres = 40; // set the default
-	mm = q = len = clip_l = clip_q = 0;
-	for (i = y = 0, x = c->pos; i < c->n_cigar; ++i) {
-		int j, l = cigar[i]>>4, op = cigar[i]&0xf;
-		if (op == BAM_CMATCH || op == BAM_CEQUAL || op == BAM_CDIFF) {
-			for (j = 0; j < l; ++j) {
-				int z = y + j;
-				int c1 = bam_seqi(seq, z), c2 = seq_nt16_table[(int)ref[x+j]];
-				if (ref[x+j] == 0) break; // out of boundary
-				if (c2 != 15 && c1 != 15 && qual[z] >= 13) { // not ambiguous
-					++len;
-					if (c1 && c1 != c2 && qual[z] >= 13) { // mismatch
-						++mm;
-						q += qual[z] > 33? 33 : qual[z];
-					}
-				}
-			}
-			if (j < l) break;
-			x += l; y += l; len += l;
-		} else if (op == BAM_CDEL) {
-			for (j = 0; j < l; ++j)
-				if (ref[x+j] == 0) break;
-			if (j < l) break;
-			x += l;
-		} else if (op == BAM_CSOFT_CLIP) {
-			for (j = 0; j < l; ++j) clip_q += qual[y+j];
-			clip_l += l;
-			y += l;
-		} else if (op == BAM_CHARD_CLIP) {
-			clip_q += 13 * l;
-			clip_l += l;
-		} else if (op == BAM_CINS) y += l;
-		else if (op == BAM_CREF_SKIP) x += l;
-	}
-	for (i = 0, t = 1; i < mm; ++i)
-		t *= (double)len / (i+1);
-	t = q - 4.343 * log(t) + clip_q / 5.;
-	if (t > thres) return -1;
-	if (t < 0) t = 0;
-	t = sqrt((thres - t) / thres) * thres;
-//	fprintf(stderr, "%s %lf %d\n", bam1_qname(b), t, q);
-	return (int)(t + .499);
-}
-
-
-int bam_prob_realn_core(bam1_t *b, const char *ref, int flag)
-{
-	int k, i, bw, x, y, yb, ye, xb, xe, apply_baq = flag&1, extend_baq = flag>>1&1, redo_baq = flag&4;
-	uint32_t *cigar = bam_get_cigar(b);
-	bam1_core_t *c = &b->core;
-	kpa_par_t conf = kpa_par_def;
-	uint8_t *bq = 0, *zq = 0, *qual = bam_get_qual(b);
-	if ((c->flag & BAM_FUNMAP) || b->core.l_qseq == 0) return -1; // do nothing
-	// test if BQ or ZQ is present
-	if ((bq = bam_aux_get(b, "BQ")) != 0) ++bq;
-	if ((zq = bam_aux_get(b, "ZQ")) != 0 && *zq == 'Z') ++zq;
-	if (bq && redo_baq)
-	{
-	    bam_aux_del(b, bq-1);
-	    bq = 0;
-	}
-	if (bq && zq) { // remove the ZQ tag
-		bam_aux_del(b, zq-1);
-		zq = 0;
-	}
-	if (bq || zq) {
-		if ((apply_baq && zq) || (!apply_baq && bq)) return -3; // in both cases, do nothing
-		if (bq && apply_baq) { // then convert BQ to ZQ
-			for (i = 0; i < c->l_qseq; ++i)
-				qual[i] = qual[i] + 64 < bq[i]? 0 : qual[i] - ((int)bq[i] - 64);
-			*(bq - 3) = 'Z';
-		} else if (zq && !apply_baq) { // then convert ZQ to BQ
-			for (i = 0; i < c->l_qseq; ++i)
-				qual[i] += (int)zq[i] - 64;
-			*(zq - 3) = 'B';
-		}
-		return 0;
-	}
-	// find the start and end of the alignment	
-	x = c->pos, y = 0, yb = ye = xb = xe = -1;
-	for (k = 0; k < c->n_cigar; ++k) {
-		int op, l;
-		op = cigar[k]&0xf; l = cigar[k]>>4;
-		if (op == BAM_CMATCH || op == BAM_CEQUAL || op == BAM_CDIFF) {
-			if (yb < 0) yb = y;
-			if (xb < 0) xb = x;
-			ye = y + l; xe = x + l;
-			x += l; y += l;
-		} else if (op == BAM_CSOFT_CLIP || op == BAM_CINS) y += l;
-		else if (op == BAM_CDEL) x += l;
-		else if (op == BAM_CREF_SKIP) return -1; // do nothing if there is a reference skip
-	}
-	// set bandwidth and the start and the end
-	bw = 7;
-	if (abs((xe - xb) - (ye - yb)) > bw)
-		bw = abs((xe - xb) - (ye - yb)) + 3;
-	conf.bw = bw;
-	xb -= yb + bw/2; if (xb < 0) xb = 0;
-	xe += c->l_qseq - ye + bw/2;
-	if (xe - xb - c->l_qseq > bw)
-		xb += (xe - xb - c->l_qseq - bw) / 2, xe -= (xe - xb - c->l_qseq - bw) / 2;
-	{ // glocal
-		uint8_t *s, *r, *q, *seq = bam_get_seq(b), *bq;
-		int *state;
-		bq = calloc(c->l_qseq + 1, 1);
-		memcpy(bq, qual, c->l_qseq);
-		s = calloc(c->l_qseq, 1);
-		for (i = 0; i < c->l_qseq; ++i) s[i] = bam_nt16_nt4_table[bam_seqi(seq, i)];
-		r = calloc(xe - xb, 1);
-		for (i = xb; i < xe; ++i) {
-			if (ref[i] == 0) { xe = i; break; }
-			r[i-xb] = bam_nt16_nt4_table[seq_nt16_table[(int)ref[i]]];
-		}
-		state = calloc(c->l_qseq, sizeof(int));
-		q = calloc(c->l_qseq, 1);
-		kpa_glocal(r, xe-xb, s, c->l_qseq, qual, &conf, state, q);
-		if (!extend_baq) { // in this block, bq[] is capped by base quality qual[]
-			for (k = 0, x = c->pos, y = 0; k < c->n_cigar; ++k) {
-				int op = cigar[k]&0xf, l = cigar[k]>>4;
-				if (op == BAM_CMATCH || op == BAM_CEQUAL || op == BAM_CDIFF) {
-					for (i = y; i < y + l; ++i) {
-						if ((state[i]&3) != 0 || state[i]>>2 != x - xb + (i - y)) bq[i] = 0;
-						else bq[i] = bq[i] < q[i]? bq[i] : q[i];
-					}
-					x += l; y += l;
-				} else if (op == BAM_CSOFT_CLIP || op == BAM_CINS) y += l;
-				else if (op == BAM_CDEL) x += l;
-			}
-			for (i = 0; i < c->l_qseq; ++i) bq[i] = qual[i] - bq[i] + 64; // finalize BQ
-		} else { // in this block, bq[] is BAQ that can be larger than qual[] (different from the above!)
-			uint8_t *left, *rght;
-			left = calloc(c->l_qseq, 1); rght = calloc(c->l_qseq, 1);
-			for (k = 0, x = c->pos, y = 0; k < c->n_cigar; ++k) {
-				int op = cigar[k]&0xf, l = cigar[k]>>4;
-				if (op == BAM_CMATCH || op == BAM_CEQUAL || op == BAM_CDIFF) {
-					for (i = y; i < y + l; ++i)
-						bq[i] = ((state[i]&3) != 0 || state[i]>>2 != x - xb + (i - y))? 0 : q[i];
-					for (left[y] = bq[y], i = y + 1; i < y + l; ++i)
-						left[i] = bq[i] > left[i-1]? bq[i] : left[i-1];
-					for (rght[y+l-1] = bq[y+l-1], i = y + l - 2; i >= y; --i)
-						rght[i] = bq[i] > rght[i+1]? bq[i] : rght[i+1];
-					for (i = y; i < y + l; ++i)
-						bq[i] = left[i] < rght[i]? left[i] : rght[i];
-					x += l; y += l;
-				} else if (op == BAM_CSOFT_CLIP || op == BAM_CINS) y += l;
-				else if (op == BAM_CDEL) x += l;
-			}
-			for (i = 0; i < c->l_qseq; ++i) bq[i] = 64 + (qual[i] <= bq[i]? 0 : qual[i] - bq[i]); // finalize BQ
-			free(left); free(rght);
-		}
-		if (apply_baq) {
-			for (i = 0; i < c->l_qseq; ++i) qual[i] -= bq[i] - 64; // modify qual
-			bam_aux_append(b, "ZQ", 'Z', c->l_qseq + 1, bq);
-		} else bam_aux_append(b, "BQ", 'Z', c->l_qseq + 1, bq);
-		free(bq); free(s); free(r); free(q); free(state);
-	}
-	return 0;
-}
-
-int bam_prob_realn(bam1_t *b, const char *ref)
-{
-	return bam_prob_realn_core(b, ref, 1);
-}
  
 
diff --git a/pysam/samfile_util.h b/pysam/samfile_util.h
index dd3e27a..94ce096 100644
--- a/pysam/samfile_util.h
+++ b/pysam/samfile_util.h
@@ -3,8 +3,5 @@
 
 #include "htslib/sam.h"
 
-int bam_cap_mapQ(bam1_t *b, char *ref, int thres);
-int bam_prob_realn(bam1_t *b, const char *ref);
-
 #endif
 
diff --git a/pysam/tabix_util.c b/pysam/tabix_util.c
index bff140e..319808a 100644
--- a/pysam/tabix_util.c
+++ b/pysam/tabix_util.c
@@ -1,6 +1,7 @@
 #include <stdio.h>
 #include <unistd.h>
 #include <stdlib.h>
+#include <string.h>
 
 #if !(_POSIX_C_SOURCE >= 200809L || _XOPEN_SOURCE >= 700)
 /*
diff --git a/pysam/utils.py b/pysam/utils.py
index 5c045df..239f5db 100644
--- a/pysam/utils.py
+++ b/pysam/utils.py
@@ -92,7 +92,14 @@ class PysamDispatcher(object):
 
     def usage(self):
         '''return the samtools usage information for this command'''
-        retval, stderr, stdout = csamtools._samtools_dispatch(
-            self.dispatch)
-        return stderr
+        retval, stderr, stdout = _pysam_dispatch(
+            self.collection,
+            self.dispatch,
+            is_usage=True,
+            catch_stdout=True)
+        # some tools write usage to stderr, such as mpileup
+        if stderr:
+            return stderr
+        else:
+            return stdout
 
diff --git a/pysam/version.py b/pysam/version.py
index facb3bb..ac832cf 100644
--- a/pysam/version.py
+++ b/pysam/version.py
@@ -1,9 +1,10 @@
 # pysam versioning information
+__version__ = "0.11.2.2"
 
-__version__ = "0.10.0"
+# TODO: upgrade number
+__samtools_version__ = "1.4.1"
 
-__samtools_version__ = "1.3.1"
+# TODO: upgrade code and number
+__bcftools_version__ = "1.4.1"
 
-__bcftools_version__ = "1.3.1"
-
-__htslib_version__ = "1.3.2"
+__htslib_version__ = "1.4.1"
diff --git a/samtools/bam.h b/samtools/bam.h
index e928ce4..108987c 100644
--- a/samtools/bam.h
+++ b/samtools/bam.h
@@ -38,7 +38,7 @@ DEALINGS IN THE SOFTWARE.  */
   @copyright Genome Research Ltd.
  */
 
-#define BAM_VERSION "1.3.1"
+#define BAM_VERSION "1.4.1"
 
 #include <stdint.h>
 #include <stdlib.h>
diff --git a/samtools/bam2bcf.c b/samtools/bam2bcf.c
index 85ce307..a824d5a 100644
--- a/samtools/bam2bcf.c
+++ b/samtools/bam2bcf.c
@@ -29,11 +29,11 @@ DEALINGS IN THE SOFTWARE.  */
 #include <stdint.h>
 #include <assert.h>
 #include <float.h>
+#include <htslib/hts.h>
 #include <htslib/sam.h>
 #include <htslib/kstring.h>
 #include <htslib/kfunc.h>
 #include "bam2bcf.h"
-#include "errmod.h"
 
 extern  void ks_introsort_uint32_t(size_t n, uint32_t a[]);
 
diff --git a/samtools/bam2bcf.c.pysam.c b/samtools/bam2bcf.c.pysam.c
index 6938ec0..3e3e01c 100644
--- a/samtools/bam2bcf.c.pysam.c
+++ b/samtools/bam2bcf.c.pysam.c
@@ -31,11 +31,11 @@ DEALINGS IN THE SOFTWARE.  */
 #include <stdint.h>
 #include <assert.h>
 #include <float.h>
+#include <htslib/hts.h>
 #include <htslib/sam.h>
 #include <htslib/kstring.h>
 #include <htslib/kfunc.h>
 #include "bam2bcf.h"
-#include "errmod.h"
 
 extern  void ks_introsort_uint32_t(size_t n, uint32_t a[]);
 
diff --git a/samtools/bam2bcf.h b/samtools/bam2bcf.h
index 22c67cc..54e5faa 100644
--- a/samtools/bam2bcf.h
+++ b/samtools/bam2bcf.h
@@ -27,8 +27,8 @@ DEALINGS IN THE SOFTWARE.  */
 #define BAM2BCF_H
 
 #include <stdint.h>
+#include <htslib/hts.h>
 #include <htslib/vcf.h>
-#include "errmod.h"
 
 /**
  *  A simplified version of Mann-Whitney U-test is calculated
diff --git a/samtools/bam2bcf_indel.c b/samtools/bam2bcf_indel.c
index 5b353fc..9749d5b 100644
--- a/samtools/bam2bcf_indel.c
+++ b/samtools/bam2bcf_indel.c
@@ -28,9 +28,9 @@ DEALINGS IN THE SOFTWARE.  */
 #include <assert.h>
 #include <ctype.h>
 #include <string.h>
+#include "htslib/hts.h"
 #include "htslib/sam.h"
 #include "bam2bcf.h"
-#include "kprobaln.h"
 #include "htslib/khash.h"
 KHASH_SET_INIT_STR(rg)
 
@@ -359,7 +359,7 @@ int bcf_call_gap_prep(int n, int *n_plp, bam_pileup1_t **plp, int pos, bcf_calla
     bca->indelreg = 0;
     for (t = 0; t < n_types; ++t) {
         int l, ir;
-        kpa_par_t apf1 = { 1e-4, 1e-2, 10 }, apf2 = { 1e-6, 1e-3, 10 };
+        probaln_par_t apf1 = { 1e-4, 1e-2, 10 }, apf2 = { 1e-6, 1e-3, 10 };
         apf1.bw = apf2.bw = abs(types[t]) + 3;
         // compute indelreg
         if (types[t] == 0) ir = 0;
@@ -412,14 +412,14 @@ int bcf_call_gap_prep(int n, int *n_plp, bam_pileup1_t **plp, int pos, bcf_calla
                         if (qq[l - qbeg] > 30) qq[l - qbeg] = 30;
                         if (qq[l - qbeg] < 7) qq[l - qbeg] = 7;
                     }
-                    sc = kpa_glocal((uint8_t*)ref2 + tbeg - left, tend - tbeg + abs(types[t]),
-                                    (uint8_t*)query, qend - qbeg, qq, &apf1, 0, 0);
+                    sc = probaln_glocal((uint8_t*)ref2 + tbeg - left, tend - tbeg + abs(types[t]),
+                                        (uint8_t*)query, qend - qbeg, qq, &apf1, 0, 0);
                     l = (int)(100. * sc / (qend - qbeg) + .499); // used for adjusting indelQ below
                     if (l > 255) l = 255;
                     score1[K*n_types + t] = score2[K*n_types + t] = sc<<8 | l;
                     if (sc > 5) {
-                        sc = kpa_glocal((uint8_t*)ref2 + tbeg - left, tend - tbeg + abs(types[t]),
-                                        (uint8_t*)query, qend - qbeg, qq, &apf2, 0, 0);
+                        sc = probaln_glocal((uint8_t*)ref2 + tbeg - left, tend - tbeg + abs(types[t]),
+                                            (uint8_t*)query, qend - qbeg, qq, &apf2, 0, 0);
                         l = (int)(100. * sc / (qend - qbeg) + .499);
                         if (l > 255) l = 255;
                         score2[K*n_types + t] = sc<<8 | l;
@@ -439,10 +439,13 @@ int bcf_call_gap_prep(int n, int *n_plp, bam_pileup1_t **plp, int pos, bcf_calla
     }
     free(ref2); free(query);
     { // compute indelQ
-        int *sc, tmp, *sumq;
-        sc   = alloca(n_types * sizeof(int));
-        sumq = alloca(n_types * sizeof(int));
-        memset(sumq, 0, sizeof(int) * n_types);
+        int sc_a[16], sumq_a[16];
+        int tmp, *sc = sc_a, *sumq = sumq_a;
+        if (n_types > 16) {
+            sc   = (int *)malloc(n_types * sizeof(int));
+            sumq = (int *)malloc(n_types * sizeof(int));
+        }
+        memset(sumq, 0, n_types * sizeof(int));
         for (s = K = 0; s < n; ++s) {
             for (i = 0; i < n_plp[s]; ++i, ++K) {
                 bam_pileup1_t *p = plp[s] + i;
@@ -523,6 +526,9 @@ int bcf_call_gap_prep(int n, int *n_plp, bam_pileup1_t **plp, int pos, bcf_calla
                 //fprintf(stderr, "X pos=%d read=%d:%d name=%s call=%d type=%d seqQ=%d indelQ=%d\n", pos, s, i, bam1_qname(p->b), (p->aux>>16)&0x3f, bca->indel_types[(p->aux>>16)&0x3f], (p->aux>>8)&0xff, p->aux&0xff);
             }
         }
+
+        if (sc   != sc_a)   free(sc);
+        if (sumq != sumq_a) free(sumq);
     }
     free(score1); free(score2);
     // free
diff --git a/samtools/bam2bcf_indel.c.pysam.c b/samtools/bam2bcf_indel.c.pysam.c
index 21cbb03..fcbc90f 100644
--- a/samtools/bam2bcf_indel.c.pysam.c
+++ b/samtools/bam2bcf_indel.c.pysam.c
@@ -30,9 +30,9 @@ DEALINGS IN THE SOFTWARE.  */
 #include <assert.h>
 #include <ctype.h>
 #include <string.h>
+#include "htslib/hts.h"
 #include "htslib/sam.h"
 #include "bam2bcf.h"
-#include "kprobaln.h"
 #include "htslib/khash.h"
 KHASH_SET_INIT_STR(rg)
 
@@ -361,7 +361,7 @@ int bcf_call_gap_prep(int n, int *n_plp, bam_pileup1_t **plp, int pos, bcf_calla
     bca->indelreg = 0;
     for (t = 0; t < n_types; ++t) {
         int l, ir;
-        kpa_par_t apf1 = { 1e-4, 1e-2, 10 }, apf2 = { 1e-6, 1e-3, 10 };
+        probaln_par_t apf1 = { 1e-4, 1e-2, 10 }, apf2 = { 1e-6, 1e-3, 10 };
         apf1.bw = apf2.bw = abs(types[t]) + 3;
         // compute indelreg
         if (types[t] == 0) ir = 0;
@@ -414,14 +414,14 @@ int bcf_call_gap_prep(int n, int *n_plp, bam_pileup1_t **plp, int pos, bcf_calla
                         if (qq[l - qbeg] > 30) qq[l - qbeg] = 30;
                         if (qq[l - qbeg] < 7) qq[l - qbeg] = 7;
                     }
-                    sc = kpa_glocal((uint8_t*)ref2 + tbeg - left, tend - tbeg + abs(types[t]),
-                                    (uint8_t*)query, qend - qbeg, qq, &apf1, 0, 0);
+                    sc = probaln_glocal((uint8_t*)ref2 + tbeg - left, tend - tbeg + abs(types[t]),
+                                        (uint8_t*)query, qend - qbeg, qq, &apf1, 0, 0);
                     l = (int)(100. * sc / (qend - qbeg) + .499); // used for adjusting indelQ below
                     if (l > 255) l = 255;
                     score1[K*n_types + t] = score2[K*n_types + t] = sc<<8 | l;
                     if (sc > 5) {
-                        sc = kpa_glocal((uint8_t*)ref2 + tbeg - left, tend - tbeg + abs(types[t]),
-                                        (uint8_t*)query, qend - qbeg, qq, &apf2, 0, 0);
+                        sc = probaln_glocal((uint8_t*)ref2 + tbeg - left, tend - tbeg + abs(types[t]),
+                                            (uint8_t*)query, qend - qbeg, qq, &apf2, 0, 0);
                         l = (int)(100. * sc / (qend - qbeg) + .499);
                         if (l > 255) l = 255;
                         score2[K*n_types + t] = sc<<8 | l;
@@ -441,10 +441,13 @@ int bcf_call_gap_prep(int n, int *n_plp, bam_pileup1_t **plp, int pos, bcf_calla
     }
     free(ref2); free(query);
     { // compute indelQ
-        int *sc, tmp, *sumq;
-        sc   = alloca(n_types * sizeof(int));
-        sumq = alloca(n_types * sizeof(int));
-        memset(sumq, 0, sizeof(int) * n_types);
+        int sc_a[16], sumq_a[16];
+        int tmp, *sc = sc_a, *sumq = sumq_a;
+        if (n_types > 16) {
+            sc   = (int *)malloc(n_types * sizeof(int));
+            sumq = (int *)malloc(n_types * sizeof(int));
+        }
+        memset(sumq, 0, n_types * sizeof(int));
         for (s = K = 0; s < n; ++s) {
             for (i = 0; i < n_plp[s]; ++i, ++K) {
                 bam_pileup1_t *p = plp[s] + i;
@@ -525,6 +528,9 @@ int bcf_call_gap_prep(int n, int *n_plp, bam_pileup1_t **plp, int pos, bcf_calla
                 //fprintf(pysam_stderr, "X pos=%d read=%d:%d name=%s call=%d type=%d seqQ=%d indelQ=%d\n", pos, s, i, bam1_qname(p->b), (p->aux>>16)&0x3f, bca->indel_types[(p->aux>>16)&0x3f], (p->aux>>8)&0xff, p->aux&0xff);
             }
         }
+
+        if (sc   != sc_a)   free(sc);
+        if (sumq != sumq_a) free(sumq);
     }
     free(score1); free(score2);
     // free
diff --git a/samtools/bam2depth.c b/samtools/bam2depth.c
index 21220f1..b732e8e 100644
--- a/samtools/bam2depth.c
+++ b/samtools/bam2depth.c
@@ -80,13 +80,13 @@ static int usage() {
     fprintf(stderr, "   -a -a (or -aa)      output absolutely all positions, including unused ref. sequences\n");
     fprintf(stderr, "   -b <bed>            list of positions or regions\n");
     fprintf(stderr, "   -f <list>           list of input BAM filenames, one per line [null]\n");
-    fprintf(stderr, "   -l <int>            read length threshold (ignore reads shorter than <int>)\n");
+    fprintf(stderr, "   -l <int>            read length threshold (ignore reads shorter than <int>) [0]\n");
     fprintf(stderr, "   -d/-m <int>         maximum coverage depth [8000]\n");  // the htslib's default
-    fprintf(stderr, "   -q <int>            base quality threshold\n");
-    fprintf(stderr, "   -Q <int>            mapping quality threshold\n");
+    fprintf(stderr, "   -q <int>            base quality threshold [0]\n");
+    fprintf(stderr, "   -Q <int>            mapping quality threshold [0]\n");
     fprintf(stderr, "   -r <chr:from-to>    region\n");
 
-    sam_global_opt_help(stderr, "-.--.");
+    sam_global_opt_help(stderr, "-.--.-");
 
     fprintf(stderr, "\n");
     fprintf(stderr, "The output is a simple tab-separated table with three columns: reference name,\n");
@@ -99,7 +99,7 @@ static int usage() {
 
 int main_depth(int argc, char *argv[])
 {
-    int i, n, tid, beg, end, pos, *n_plp, baseQ = 0, mapQ = 0, min_len = 0;
+    int i, n, tid, reg_tid, beg, end, pos, *n_plp, baseQ = 0, mapQ = 0, min_len = 0;
     int all = 0, status = EXIT_SUCCESS, nfiles, max_depth = -1;
     const bam_pileup1_t **plp;
     char *reg = 0; // specified region
@@ -112,7 +112,7 @@ int main_depth(int argc, char *argv[])
 
     sam_global_args ga = SAM_GLOBAL_ARGS_INIT;
     static const struct option lopts[] = {
-        SAM_OPT_GLOBAL_OPTIONS('-', 0, '-', '-', 0),
+        SAM_OPT_GLOBAL_OPTIONS('-', 0, '-', '-', 0, '-'),
         { NULL, 0, NULL, 0 }
     };
 
@@ -149,7 +149,7 @@ int main_depth(int argc, char *argv[])
     else
         n = argc - optind; // the number of BAMs on the command line
     data = calloc(n, sizeof(aux_t*)); // data[i] for the i-th input
-    beg = 0; end = INT_MAX;  // set the default region
+    reg_tid = 0; beg = 0; end = INT_MAX;  // set the default region
     for (i = 0; i < n; ++i) {
         int rf;
         data[i] = calloc(1, sizeof(aux_t));
@@ -199,6 +199,7 @@ int main_depth(int argc, char *argv[])
     if (reg) {
         beg = data[0]->iter->beg; // and to the parsed region coordinates
         end = data[0]->iter->end;
+        reg_tid = data[0]->iter->tid;
     }
 
     // the core multi-pileup loop
@@ -210,12 +211,12 @@ int main_depth(int argc, char *argv[])
     while ((ret=bam_mplp_auto(mplp, &tid, &pos, n_plp, plp)) > 0) { // come to the next covered position
         if (pos < beg || pos >= end) continue; // out of range; skip
         if (tid >= h->n_targets) continue;     // diff number of @SQ lines per file?
-        if (bed && bed_overlap(bed, h->target_name[tid], pos, pos + 1) == 0) continue; // not in BED; skip
         if (all) {
             while (tid > last_tid) {
-                if (last_tid >= 0 && all > 1 && !reg) {
-                    // Deal with remainder or entirety of last tid
+                if (last_tid >= 0 && !reg) {
+                    // Deal with remainder or entirety of last tid.
                     while (++last_pos < h->target_len[last_tid]) {
+                        // Horribly inefficient, but the bed API is an obfuscated black box.
                         if (bed && bed_overlap(bed, h->target_name[last_tid], last_pos, last_pos + 1) == 0)
                             continue;
                         fputs(h->target_name[last_tid], stdout); printf("\t%d", last_pos+1);
@@ -226,6 +227,8 @@ int main_depth(int argc, char *argv[])
                 }
                 last_tid++;
                 last_pos = -1;
+                if (all < 2)
+                    break;
             }
 
             // Deal with missing portion of current tid
@@ -242,6 +245,7 @@ int main_depth(int argc, char *argv[])
             last_tid = tid;
             last_pos = pos;
         }
+        if (bed && bed_overlap(bed, h->target_name[tid], pos, pos + 1) == 0) continue;
         fputs(h->target_name[tid], stdout); printf("\t%d", pos+1); // a customized printf() would be faster
         for (i = 0; i < n; ++i) { // base level filters have to go here
             int j, m = 0;
@@ -260,7 +264,11 @@ int main_depth(int argc, char *argv[])
 
     if (all) {
         // Handle terminating region
-        while (last_tid < h->n_targets) {
+        if (last_tid < 0 && reg && all > 1) {
+            last_tid = reg_tid;
+            last_pos = beg-1;
+        }
+        while (last_tid >= 0 && last_tid < h->n_targets) {
             while (++last_pos < h->target_len[last_tid]) {
                 if (last_pos >= end) break;
                 if (bed && bed_overlap(bed, h->target_name[last_tid], last_pos, last_pos + 1) == 0)
diff --git a/samtools/bam2depth.c.pysam.c b/samtools/bam2depth.c.pysam.c
index 9d9dc40..4d9110b 100644
--- a/samtools/bam2depth.c.pysam.c
+++ b/samtools/bam2depth.c.pysam.c
@@ -82,13 +82,13 @@ static int usage() {
     fprintf(pysam_stderr, "   -a -a (or -aa)      output absolutely all positions, including unused ref. sequences\n");
     fprintf(pysam_stderr, "   -b <bed>            list of positions or regions\n");
     fprintf(pysam_stderr, "   -f <list>           list of input BAM filenames, one per line [null]\n");
-    fprintf(pysam_stderr, "   -l <int>            read length threshold (ignore reads shorter than <int>)\n");
+    fprintf(pysam_stderr, "   -l <int>            read length threshold (ignore reads shorter than <int>) [0]\n");
     fprintf(pysam_stderr, "   -d/-m <int>         maximum coverage depth [8000]\n");  // the htslib's default
-    fprintf(pysam_stderr, "   -q <int>            base quality threshold\n");
-    fprintf(pysam_stderr, "   -Q <int>            mapping quality threshold\n");
+    fprintf(pysam_stderr, "   -q <int>            base quality threshold [0]\n");
+    fprintf(pysam_stderr, "   -Q <int>            mapping quality threshold [0]\n");
     fprintf(pysam_stderr, "   -r <chr:from-to>    region\n");
 
-    sam_global_opt_help(pysam_stderr, "-.--.");
+    sam_global_opt_help(pysam_stderr, "-.--.-");
 
     fprintf(pysam_stderr, "\n");
     fprintf(pysam_stderr, "The output is a simple tab-separated table with three columns: reference name,\n");
@@ -101,7 +101,7 @@ static int usage() {
 
 int main_depth(int argc, char *argv[])
 {
-    int i, n, tid, beg, end, pos, *n_plp, baseQ = 0, mapQ = 0, min_len = 0;
+    int i, n, tid, reg_tid, beg, end, pos, *n_plp, baseQ = 0, mapQ = 0, min_len = 0;
     int all = 0, status = EXIT_SUCCESS, nfiles, max_depth = -1;
     const bam_pileup1_t **plp;
     char *reg = 0; // specified region
@@ -114,7 +114,7 @@ int main_depth(int argc, char *argv[])
 
     sam_global_args ga = SAM_GLOBAL_ARGS_INIT;
     static const struct option lopts[] = {
-        SAM_OPT_GLOBAL_OPTIONS('-', 0, '-', '-', 0),
+        SAM_OPT_GLOBAL_OPTIONS('-', 0, '-', '-', 0, '-'),
         { NULL, 0, NULL, 0 }
     };
 
@@ -151,7 +151,7 @@ int main_depth(int argc, char *argv[])
     else
         n = argc - optind; // the number of BAMs on the command line
     data = calloc(n, sizeof(aux_t*)); // data[i] for the i-th input
-    beg = 0; end = INT_MAX;  // set the default region
+    reg_tid = 0; beg = 0; end = INT_MAX;  // set the default region
     for (i = 0; i < n; ++i) {
         int rf;
         data[i] = calloc(1, sizeof(aux_t));
@@ -201,6 +201,7 @@ int main_depth(int argc, char *argv[])
     if (reg) {
         beg = data[0]->iter->beg; // and to the parsed region coordinates
         end = data[0]->iter->end;
+        reg_tid = data[0]->iter->tid;
     }
 
     // the core multi-pileup loop
@@ -212,12 +213,12 @@ int main_depth(int argc, char *argv[])
     while ((ret=bam_mplp_auto(mplp, &tid, &pos, n_plp, plp)) > 0) { // come to the next covered position
         if (pos < beg || pos >= end) continue; // out of range; skip
         if (tid >= h->n_targets) continue;     // diff number of @SQ lines per file?
-        if (bed && bed_overlap(bed, h->target_name[tid], pos, pos + 1) == 0) continue; // not in BED; skip
         if (all) {
             while (tid > last_tid) {
-                if (last_tid >= 0 && all > 1 && !reg) {
-                    // Deal with remainder or entirety of last tid
+                if (last_tid >= 0 && !reg) {
+                    // Deal with remainder or entirety of last tid.
                     while (++last_pos < h->target_len[last_tid]) {
+                        // Horribly inefficient, but the bed API is an obfuscated black box.
                         if (bed && bed_overlap(bed, h->target_name[last_tid], last_pos, last_pos + 1) == 0)
                             continue;
                         fputs(h->target_name[last_tid], pysam_stdout); fprintf(pysam_stdout, "\t%d", last_pos+1);
@@ -228,6 +229,8 @@ int main_depth(int argc, char *argv[])
                 }
                 last_tid++;
                 last_pos = -1;
+                if (all < 2)
+                    break;
             }
 
             // Deal with missing portion of current tid
@@ -244,6 +247,7 @@ int main_depth(int argc, char *argv[])
             last_tid = tid;
             last_pos = pos;
         }
+        if (bed && bed_overlap(bed, h->target_name[tid], pos, pos + 1) == 0) continue;
         fputs(h->target_name[tid], pysam_stdout); fprintf(pysam_stdout, "\t%d", pos+1); // a customized fprintf(pysam_stdout, ) would be faster
         for (i = 0; i < n; ++i) { // base level filters have to go here
             int j, m = 0;
@@ -262,7 +266,11 @@ int main_depth(int argc, char *argv[])
 
     if (all) {
         // Handle terminating region
-        while (last_tid < h->n_targets) {
+        if (last_tid < 0 && reg && all > 1) {
+            last_tid = reg_tid;
+            last_pos = beg-1;
+        }
+        while (last_tid >= 0 && last_tid < h->n_targets) {
             while (++last_pos < h->target_len[last_tid]) {
                 if (last_pos >= end) break;
                 if (bed && bed_overlap(bed, h->target_name[last_tid], last_pos, last_pos + 1) == 0)
diff --git a/samtools/bam_addrprg.c b/samtools/bam_addrprg.c
index f7bbfab..99a198d 100644
--- a/samtools/bam_addrprg.c
+++ b/samtools/bam_addrprg.c
@@ -1,6 +1,6 @@
 /* bam_addrprg.c -- samtools command to add or replace readgroups.
 
-   Copyright (c) 2013, 2015 Genome Research Limited.
+   Copyright (c) 2013, 2015, 2016 Genome Research Limited.
 
    Author: Martin O. Pollard <mp15 at sanger.ac.uk>
 
@@ -27,6 +27,7 @@ DEALINGS IN THE SOFTWARE.  */
 #include <htslib/sam.h>
 #include <htslib/kstring.h>
 #include "samtools.h"
+#include "htslib/thread_pool.h"
 #include "sam_opts.h"
 #include <string.h>
 #include <stdio.h>
@@ -48,6 +49,7 @@ struct parsed_opts {
     char* rg_line;
     rg_mode mode;
     sam_global_args ga;
+    htsThreadPool p;
 };
 
 struct state;
@@ -69,6 +71,7 @@ static void cleanup_opts(parsed_opts_t* opts)
     free(opts->rg_id);
     free(opts->output_name);
     free(opts->input_name);
+    if (opts->p.pool) hts_tpool_destroy(opts->p.pool);
     sam_global_args_free(&opts->ga);
     free(opts);
 }
@@ -131,6 +134,19 @@ static char* basic_unescape(const char* in)
     return tmp;
 }
 
+// Malloc a string containing [s,slim) or to the end of s if slim is NULL.
+// If lenp is non-NULL, stores the length of the resulting string there.
+static char *dup_substring(const char *s, const char *slim, size_t *lenp)
+{
+    size_t len = slim? (slim - s) : strlen(s);
+    char *ns = malloc(len+1);
+    if (ns == NULL) return NULL;
+    memcpy(ns, s, len);
+    ns[len] = '\0';
+    if (lenp) *lenp = len;
+    return ns;
+}
+
 // These are to be replaced by samtools header parser
 // Extracts the first @RG line from a string.
 static char* get_rg_line(const char* text, size_t* last)
@@ -143,37 +159,17 @@ static char* get_rg_line(const char* text, size_t* last)
         rg++;//skip initial \n
     }
     // duplicate the line for return
-    char* line;
-    char* end = strchr(rg, '\n');
-    if (end) {
-        line = strndup(rg,(end-rg));
-        *last = end - rg;
-    } else {
-        line = strdup(rg);
-        *last = strlen(rg);
-    }
-    return line;
+    return dup_substring(rg, strchr(rg, '\n'), last);
 }
 
 // Given a @RG line return the id
-static char* get_rg_id(const char* input)
+static char* get_rg_id(const char *line)
 {
-    assert(input!=NULL);
-    char* line = strdup(input);
-    char *next = line;
-    char* token = strsep(&next, "\t");
-    token = strsep(&next,"\t"); // skip first token it should always be "@RG"
-    while (next != NULL) {
-        char* key = strsep(&token,":");
-        if (!strcmp(key,"ID")) {
-            char* retval = strdup(token);
-            free(line);
-            return retval;
-        }
-        token = strsep(&next,"\t");
-    }
-    free(line);
-    return NULL;
+    const char *id = strstr(line, "\tID:");
+    if (! id) return NULL;
+
+    id += 4;
+    return dup_substring(id, strchr(id, '\t'), NULL);
 }
 
 // Confirms the existance of an RG line with a given ID in a bam header
@@ -181,9 +177,8 @@ static bool confirm_rg( const bam_hdr_t *hdr, const char* rgid )
 {
     assert( hdr != NULL && rgid != NULL );
 
-    char *ptr, *start;
+    const char *ptr = hdr->text;
     bool found = false;
-    start = ptr = strndup(hdr->text, hdr->l_text);
     while (ptr != NULL && *ptr != '\0' && found == false ) {
         size_t end = 0;
         char* line = get_rg_line(ptr, &end);
@@ -196,16 +191,14 @@ static bool confirm_rg( const bam_hdr_t *hdr, const char* rgid )
         free(line);
         ptr += end;
     }
-    free(start);
     return found;
 }
 
 static char* get_first_rgid( const bam_hdr_t *hdr )
 {
     assert( hdr != NULL );
-    char *ptr, *start;
+    const char *ptr = hdr->text;
     char* found = NULL;
-    start = ptr = strndup(hdr->text, hdr->l_text);
     while (ptr != NULL && *ptr != '\0' && found == NULL ) {
         size_t end = 0;
         char* line = get_rg_line(ptr, &end);
@@ -215,7 +208,6 @@ static char* get_first_rgid( const bam_hdr_t *hdr )
         free(line);
         ptr += end;
     }
-    free(start);
     return found;
 }
 
@@ -230,7 +222,7 @@ static void usage(FILE *fp)
             "  -r STRING @RG line text\n"
             "  -R STRING ID of @RG line in existing header to use\n"
             );
-    sam_global_opt_help(fp, "..O..");
+    sam_global_opt_help(fp, "..O..@");
 }
 
 static bool parse_args(int argc, char** argv, parsed_opts_t** opts)
@@ -249,12 +241,12 @@ static bool parse_args(int argc, char** argv, parsed_opts_t** opts)
     retval->mode = overwrite_all;
     sam_global_args_init(&retval->ga);
     static const struct option lopts[] = {
-        SAM_OPT_GLOBAL_OPTIONS(0, 0, 'O', 0, 0),
+        SAM_OPT_GLOBAL_OPTIONS(0, 0, 'O', 0, 0, '@'),
         { NULL, 0, NULL, 0 }
     };
     kstring_t rg_line = {0,0,NULL};
 
-    while ((n = getopt_long(argc, argv, "r:R:m:o:O:l:h", lopts, NULL)) >= 0) {
+    while ((n = getopt_long(argc, argv, "r:R:m:o:O:l:h@:", lopts, NULL)) >= 0) {
         switch (n) {
             case 'r':
                 // Are we adding to existing rg line?
@@ -328,6 +320,13 @@ static bool parse_args(int argc, char** argv, parsed_opts_t** opts)
     }
     retval->input_name = strdup(argv[optind+0]);
 
+    if (retval->ga.nthreads > 0) {
+        if (!(retval->p.pool = hts_tpool_init(retval->ga.nthreads))) {
+            fprintf(stderr, "Error creating thread pool\n");
+            return false;
+        }
+    }
+
     *opts = retval;
     return true;
 }
@@ -369,7 +368,7 @@ static bool init(const parsed_opts_t* opts, state_t** state_out) {
     // Open files
     retval->input_file = sam_open_format(opts->input_name, "r", &opts->ga.in);
     if (retval->input_file == NULL) {
-        fprintf(stderr, "[init] Could not open input file: %s\n", opts->input_name);
+        print_error_errno("addreplacerg", "could not open \"%s\"", opts->input_name);
         return false;
     }
     retval->input_header = sam_hdr_read(retval->input_file);
@@ -378,10 +377,15 @@ static bool init(const parsed_opts_t* opts, state_t** state_out) {
     retval->output_file = sam_open_format(opts->output_name == NULL?"-":opts->output_name, "w", &opts->ga.out);
 
     if (retval->output_file == NULL) {
-        print_error_errno("addreplacerg", "Could not open output file: %s\n", opts->output_name);
+        print_error_errno("addreplacerg", "could not create \"%s\"", opts->output_name);
         return false;
     }
 
+    if (opts->p.pool) {
+        hts_set_opt(retval->input_file,  HTS_OPT_THREAD_POOL, &opts->p);
+        hts_set_opt(retval->output_file, HTS_OPT_THREAD_POOL, &opts->p);
+    }
+
     if (opts->rg_line) {
         // Append new RG line to header.
         // Check does not already exist
@@ -466,13 +470,13 @@ int main_addreplacerg(int argc, char** argv)
 
     if (!readgroupise(state)) goto error;
 
-    cleanup_opts(opts);
     cleanup_state(state);
+    cleanup_opts(opts);
 
     return EXIT_SUCCESS;
 error:
-    cleanup_opts(opts);
     cleanup_state(state);
+    cleanup_opts(opts);
 
     return EXIT_FAILURE;
 }
diff --git a/samtools/bam_addrprg.c.pysam.c b/samtools/bam_addrprg.c.pysam.c
index 2ddd1b1..56986dd 100644
--- a/samtools/bam_addrprg.c.pysam.c
+++ b/samtools/bam_addrprg.c.pysam.c
@@ -2,7 +2,7 @@
 
 /* bam_addrprg.c -- samtools command to add or replace readgroups.
 
-   Copyright (c) 2013, 2015 Genome Research Limited.
+   Copyright (c) 2013, 2015, 2016 Genome Research Limited.
 
    Author: Martin O. Pollard <mp15 at sanger.ac.uk>
 
@@ -29,6 +29,7 @@ DEALINGS IN THE SOFTWARE.  */
 #include <htslib/sam.h>
 #include <htslib/kstring.h>
 #include "samtools.h"
+#include "htslib/thread_pool.h"
 #include "sam_opts.h"
 #include <string.h>
 #include <stdio.h>
@@ -50,6 +51,7 @@ struct parsed_opts {
     char* rg_line;
     rg_mode mode;
     sam_global_args ga;
+    htsThreadPool p;
 };
 
 struct state;
@@ -71,6 +73,7 @@ static void cleanup_opts(parsed_opts_t* opts)
     free(opts->rg_id);
     free(opts->output_name);
     free(opts->input_name);
+    if (opts->p.pool) hts_tpool_destroy(opts->p.pool);
     sam_global_args_free(&opts->ga);
     free(opts);
 }
@@ -133,6 +136,19 @@ static char* basic_unescape(const char* in)
     return tmp;
 }
 
+// Malloc a string containing [s,slim) or to the end of s if slim is NULL.
+// If lenp is non-NULL, stores the length of the resulting string there.
+static char *dup_substring(const char *s, const char *slim, size_t *lenp)
+{
+    size_t len = slim? (slim - s) : strlen(s);
+    char *ns = malloc(len+1);
+    if (ns == NULL) return NULL;
+    memcpy(ns, s, len);
+    ns[len] = '\0';
+    if (lenp) *lenp = len;
+    return ns;
+}
+
 // These are to be replaced by samtools header parser
 // Extracts the first @RG line from a string.
 static char* get_rg_line(const char* text, size_t* last)
@@ -145,37 +161,17 @@ static char* get_rg_line(const char* text, size_t* last)
         rg++;//skip initial \n
     }
     // duplicate the line for return
-    char* line;
-    char* end = strchr(rg, '\n');
-    if (end) {
-        line = strndup(rg,(end-rg));
-        *last = end - rg;
-    } else {
-        line = strdup(rg);
-        *last = strlen(rg);
-    }
-    return line;
+    return dup_substring(rg, strchr(rg, '\n'), last);
 }
 
 // Given a @RG line return the id
-static char* get_rg_id(const char* input)
+static char* get_rg_id(const char *line)
 {
-    assert(input!=NULL);
-    char* line = strdup(input);
-    char *next = line;
-    char* token = strsep(&next, "\t");
-    token = strsep(&next,"\t"); // skip first token it should always be "@RG"
-    while (next != NULL) {
-        char* key = strsep(&token,":");
-        if (!strcmp(key,"ID")) {
-            char* retval = strdup(token);
-            free(line);
-            return retval;
-        }
-        token = strsep(&next,"\t");
-    }
-    free(line);
-    return NULL;
+    const char *id = strstr(line, "\tID:");
+    if (! id) return NULL;
+
+    id += 4;
+    return dup_substring(id, strchr(id, '\t'), NULL);
 }
 
 // Confirms the existance of an RG line with a given ID in a bam header
@@ -183,9 +179,8 @@ static bool confirm_rg( const bam_hdr_t *hdr, const char* rgid )
 {
     assert( hdr != NULL && rgid != NULL );
 
-    char *ptr, *start;
+    const char *ptr = hdr->text;
     bool found = false;
-    start = ptr = strndup(hdr->text, hdr->l_text);
     while (ptr != NULL && *ptr != '\0' && found == false ) {
         size_t end = 0;
         char* line = get_rg_line(ptr, &end);
@@ -198,16 +193,14 @@ static bool confirm_rg( const bam_hdr_t *hdr, const char* rgid )
         free(line);
         ptr += end;
     }
-    free(start);
     return found;
 }
 
 static char* get_first_rgid( const bam_hdr_t *hdr )
 {
     assert( hdr != NULL );
-    char *ptr, *start;
+    const char *ptr = hdr->text;
     char* found = NULL;
-    start = ptr = strndup(hdr->text, hdr->l_text);
     while (ptr != NULL && *ptr != '\0' && found == NULL ) {
         size_t end = 0;
         char* line = get_rg_line(ptr, &end);
@@ -217,7 +210,6 @@ static char* get_first_rgid( const bam_hdr_t *hdr )
         free(line);
         ptr += end;
     }
-    free(start);
     return found;
 }
 
@@ -232,7 +224,7 @@ static void usage(FILE *fp)
             "  -r STRING @RG line text\n"
             "  -R STRING ID of @RG line in existing header to use\n"
             );
-    sam_global_opt_help(fp, "..O..");
+    sam_global_opt_help(fp, "..O..@");
 }
 
 static bool parse_args(int argc, char** argv, parsed_opts_t** opts)
@@ -251,12 +243,12 @@ static bool parse_args(int argc, char** argv, parsed_opts_t** opts)
     retval->mode = overwrite_all;
     sam_global_args_init(&retval->ga);
     static const struct option lopts[] = {
-        SAM_OPT_GLOBAL_OPTIONS(0, 0, 'O', 0, 0),
+        SAM_OPT_GLOBAL_OPTIONS(0, 0, 'O', 0, 0, '@'),
         { NULL, 0, NULL, 0 }
     };
     kstring_t rg_line = {0,0,NULL};
 
-    while ((n = getopt_long(argc, argv, "r:R:m:o:O:l:h", lopts, NULL)) >= 0) {
+    while ((n = getopt_long(argc, argv, "r:R:m:o:O:l:h@:", lopts, NULL)) >= 0) {
         switch (n) {
             case 'r':
                 // Are we adding to existing rg line?
@@ -330,6 +322,13 @@ static bool parse_args(int argc, char** argv, parsed_opts_t** opts)
     }
     retval->input_name = strdup(argv[optind+0]);
 
+    if (retval->ga.nthreads > 0) {
+        if (!(retval->p.pool = hts_tpool_init(retval->ga.nthreads))) {
+            fprintf(pysam_stderr, "Error creating thread pool\n");
+            return false;
+        }
+    }
+
     *opts = retval;
     return true;
 }
@@ -371,7 +370,7 @@ static bool init(const parsed_opts_t* opts, state_t** state_out) {
     // Open files
     retval->input_file = sam_open_format(opts->input_name, "r", &opts->ga.in);
     if (retval->input_file == NULL) {
-        fprintf(pysam_stderr, "[init] Could not open input file: %s\n", opts->input_name);
+        print_error_errno("addreplacerg", "could not open \"%s\"", opts->input_name);
         return false;
     }
     retval->input_header = sam_hdr_read(retval->input_file);
@@ -380,10 +379,15 @@ static bool init(const parsed_opts_t* opts, state_t** state_out) {
     retval->output_file = sam_open_format(opts->output_name == NULL?"-":opts->output_name, "w", &opts->ga.out);
 
     if (retval->output_file == NULL) {
-        print_error_errno("addreplacerg", "Could not open output file: %s\n", opts->output_name);
+        print_error_errno("addreplacerg", "could not create \"%s\"", opts->output_name);
         return false;
     }
 
+    if (opts->p.pool) {
+        hts_set_opt(retval->input_file,  HTS_OPT_THREAD_POOL, &opts->p);
+        hts_set_opt(retval->output_file, HTS_OPT_THREAD_POOL, &opts->p);
+    }
+
     if (opts->rg_line) {
         // Append new RG line to header.
         // Check does not already exist
@@ -468,13 +472,13 @@ int main_addreplacerg(int argc, char** argv)
 
     if (!readgroupise(state)) goto error;
 
-    cleanup_opts(opts);
     cleanup_state(state);
+    cleanup_opts(opts);
 
     return EXIT_SUCCESS;
 error:
-    cleanup_opts(opts);
     cleanup_state(state);
+    cleanup_opts(opts);
 
     return EXIT_FAILURE;
 }
diff --git a/samtools/bam_cat.c b/samtools/bam_cat.c
index 5c303d1..95498ec 100644
--- a/samtools/bam_cat.c
+++ b/samtools/bam_cat.c
@@ -40,6 +40,7 @@ Illumina.
 #include <stdlib.h>
 #include <unistd.h>
 #include <string.h>
+#include <strings.h>
 
 #include "htslib/bgzf.h"
 #include "htslib/sam.h"
@@ -468,7 +469,7 @@ int bam_cat(int nfn, char * const *fn, const bam_hdr_t *h, const char* outbam)
         }
 
         if (in->block_offset < in->block_length) {
-            if (bgzf_write(fp, in->uncompressed_block + in->block_offset, in->block_length - in->block_offset) < 0) goto write_fail;
+            if (bgzf_write(fp, (char *)in->uncompressed_block + in->block_offset, in->block_length - in->block_offset) < 0) goto write_fail;
             if (bgzf_flush(fp) != 0) goto write_fail;
         }
 
@@ -531,10 +532,12 @@ int main_cat(int argc, char *argv[])
 {
     bam_hdr_t *h = 0;
     char *outfn = 0;
+    char **infns = NULL; // files to concatenate
+    int infns_size = 0;
     int c, ret = 0;
     samFile *in;
 
-    while ((c = getopt(argc, argv, "h:o:")) >= 0) {
+    while ((c = getopt(argc, argv, "h:o:b:")) >= 0) {
         switch (c) {
             case 'h': {
                 samFile *fph = sam_open(optarg, "r");
@@ -553,29 +556,61 @@ int main_cat(int argc, char *argv[])
                 break;
             }
             case 'o': outfn = strdup(optarg); break;
+            case 'b': {
+                // add file names in "optarg" to the list
+                // of files to concatenate
+                int nfns;
+                char **fns_read = hts_readlines(optarg, &nfns);
+                if (fns_read) {
+                    infns = realloc(infns, (infns_size + nfns) * sizeof(char*));
+                    if (infns == NULL) { ret = 1; goto end; }
+                    memcpy(infns+infns_size, fns_read, nfns * sizeof(char*));
+                    infns_size += nfns;
+                    free(fns_read);
+                } else {
+                    print_error("cat", "Invalid file list \"%s\"", optarg);
+                    ret = 1;
+                }
+                break;
+            }
         }
     }
-    if (argc - optind < 1) {
-        fprintf(stderr, "Usage: samtools cat [-h header.sam] [-o out.bam] <in1.bam> [...]\n");
+
+    // Append files specified in argv to the list.
+    int nargv_fns = argc - optind;
+    if (nargv_fns > 0) {
+        infns = realloc(infns, (infns_size + nargv_fns) * sizeof(char*));
+        if (infns == NULL) { ret = 1; goto end; }
+        memcpy(infns + infns_size, argv + optind, nargv_fns * sizeof(char*));
+    }
+
+    // Require at least one input file
+    if (infns_size + nargv_fns == 0) {
+        fprintf(stderr, "Usage: samtools cat [options] <in1.bam>  [... <inN.bam>]\n");
+        fprintf(stderr, "       samtools cat [options] <in1.cram> [... <inN.cram>]\n\n");
+        fprintf(stderr, "Concatenate BAM or CRAM files, first those in <bamlist.fofn>, then those\non the command line.\n\n");
+        fprintf(stderr, "Options: -b FILE  list of input BAM/CRAM file names, one per line\n");
+        fprintf(stderr, "         -h FILE  copy the header from FILE [default is 1st input file]\n");
+        fprintf(stderr, "         -o FILE  output BAM/CRAM\n");
         return 1;
     }
 
-    in = sam_open(argv[optind], "r");
+    in = sam_open(infns[0], "r");
     if (!in) {
-        print_error_errno("cat", "failed to open file '%s'", argv[optind]);
+        print_error_errno("cat", "failed to open file '%s'", infns[0]);
         return 1;
     }
 
     switch (hts_get_format(in)->format) {
     case bam:
         sam_close(in);
-        if (bam_cat(argc - optind, argv + optind, h, outfn? outfn : "-") < 0)
+        if (bam_cat(infns_size+nargv_fns, infns, h, outfn? outfn : "-") < 0)
             ret = 1;
         break;
 
     case cram:
         sam_close(in);
-        if (cram_cat(argc - optind, argv + optind, h, outfn? outfn : "-") < 0)
+        if (cram_cat(infns_size+nargv_fns, infns, h, outfn? outfn : "-") < 0)
             ret = 1;
         break;
 
@@ -584,7 +619,16 @@ int main_cat(int argc, char *argv[])
         fprintf(stderr, "[%s] ERROR: input is not BAM or CRAM\n", __func__);
         return 1;
     }
+
+ end:
+    if (infns_size > 0) {
+        int i;
+        for (i=0; i<infns_size; i++)
+            free(infns[i]);
+    }
+
     free(outfn);
+    free(infns);
 
     if (h)
         bam_hdr_destroy(h);
diff --git a/samtools/bam_cat.c.pysam.c b/samtools/bam_cat.c.pysam.c
index daa0454..20adbc1 100644
--- a/samtools/bam_cat.c.pysam.c
+++ b/samtools/bam_cat.c.pysam.c
@@ -42,6 +42,7 @@ Illumina.
 #include <stdlib.h>
 #include <unistd.h>
 #include <string.h>
+#include <strings.h>
 
 #include "htslib/bgzf.h"
 #include "htslib/sam.h"
@@ -470,7 +471,7 @@ int bam_cat(int nfn, char * const *fn, const bam_hdr_t *h, const char* outbam)
         }
 
         if (in->block_offset < in->block_length) {
-            if (bgzf_write(fp, in->uncompressed_block + in->block_offset, in->block_length - in->block_offset) < 0) goto write_fail;
+            if (bgzf_write(fp, (char *)in->uncompressed_block + in->block_offset, in->block_length - in->block_offset) < 0) goto write_fail;
             if (bgzf_flush(fp) != 0) goto write_fail;
         }
 
@@ -533,10 +534,12 @@ int main_cat(int argc, char *argv[])
 {
     bam_hdr_t *h = 0;
     char *outfn = 0;
+    char **infns = NULL; // files to concatenate
+    int infns_size = 0;
     int c, ret = 0;
     samFile *in;
 
-    while ((c = getopt(argc, argv, "h:o:")) >= 0) {
+    while ((c = getopt(argc, argv, "h:o:b:")) >= 0) {
         switch (c) {
             case 'h': {
                 samFile *fph = sam_open(optarg, "r");
@@ -555,29 +558,61 @@ int main_cat(int argc, char *argv[])
                 break;
             }
             case 'o': outfn = strdup(optarg); break;
+            case 'b': {
+                // add file names in "optarg" to the list
+                // of files to concatenate
+                int nfns;
+                char **fns_read = hts_readlines(optarg, &nfns);
+                if (fns_read) {
+                    infns = realloc(infns, (infns_size + nfns) * sizeof(char*));
+                    if (infns == NULL) { ret = 1; goto end; }
+                    memcpy(infns+infns_size, fns_read, nfns * sizeof(char*));
+                    infns_size += nfns;
+                    free(fns_read);
+                } else {
+                    print_error("cat", "Invalid file list \"%s\"", optarg);
+                    ret = 1;
+                }
+                break;
+            }
         }
     }
-    if (argc - optind < 1) {
-        fprintf(pysam_stderr, "Usage: samtools cat [-h header.sam] [-o out.bam] <in1.bam> [...]\n");
+
+    // Append files specified in argv to the list.
+    int nargv_fns = argc - optind;
+    if (nargv_fns > 0) {
+        infns = realloc(infns, (infns_size + nargv_fns) * sizeof(char*));
+        if (infns == NULL) { ret = 1; goto end; }
+        memcpy(infns + infns_size, argv + optind, nargv_fns * sizeof(char*));
+    }
+
+    // Require at least one input file
+    if (infns_size + nargv_fns == 0) {
+        fprintf(pysam_stderr, "Usage: samtools cat [options] <in1.bam>  [... <inN.bam>]\n");
+        fprintf(pysam_stderr, "       samtools cat [options] <in1.cram> [... <inN.cram>]\n\n");
+        fprintf(pysam_stderr, "Concatenate BAM or CRAM files, first those in <bamlist.fofn>, then those\non the command line.\n\n");
+        fprintf(pysam_stderr, "Options: -b FILE  list of input BAM/CRAM file names, one per line\n");
+        fprintf(pysam_stderr, "         -h FILE  copy the header from FILE [default is 1st input file]\n");
+        fprintf(pysam_stderr, "         -o FILE  output BAM/CRAM\n");
         return 1;
     }
 
-    in = sam_open(argv[optind], "r");
+    in = sam_open(infns[0], "r");
     if (!in) {
-        print_error_errno("cat", "failed to open file '%s'", argv[optind]);
+        print_error_errno("cat", "failed to open file '%s'", infns[0]);
         return 1;
     }
 
     switch (hts_get_format(in)->format) {
     case bam:
         sam_close(in);
-        if (bam_cat(argc - optind, argv + optind, h, outfn? outfn : "-") < 0)
+        if (bam_cat(infns_size+nargv_fns, infns, h, outfn? outfn : "-") < 0)
             ret = 1;
         break;
 
     case cram:
         sam_close(in);
-        if (cram_cat(argc - optind, argv + optind, h, outfn? outfn : "-") < 0)
+        if (cram_cat(infns_size+nargv_fns, infns, h, outfn? outfn : "-") < 0)
             ret = 1;
         break;
 
@@ -586,7 +621,16 @@ int main_cat(int argc, char *argv[])
         fprintf(pysam_stderr, "[%s] ERROR: input is not BAM or CRAM\n", __func__);
         return 1;
     }
+
+ end:
+    if (infns_size > 0) {
+        int i;
+        for (i=0; i<infns_size; i++)
+            free(infns[i]);
+    }
+
     free(outfn);
+    free(infns);
 
     if (h)
         bam_hdr_destroy(h);
diff --git a/samtools/bam_index.c b/samtools/bam_index.c
index 3a5acf6..40b7e0f 100644
--- a/samtools/bam_index.c
+++ b/samtools/bam_index.c
@@ -46,20 +46,23 @@ static void index_usage(FILE *fp)
 "Options:\n"
 "  -b       Generate BAI-format index for BAM files [default]\n"
 "  -c       Generate CSI-format index for BAM files\n"
-"  -m INT   Set minimum interval size for CSI indices to 2^INT [%d]\n", BAM_LIDX_SHIFT);
+"  -m INT   Set minimum interval size for CSI indices to 2^INT [%d]\n"
+"  -@ INT   Sets the number of threads [none]\n", BAM_LIDX_SHIFT);
 }
 
 int bam_index(int argc, char *argv[])
 {
     int csi = 0;
     int min_shift = BAM_LIDX_SHIFT;
+    int n_threads = 0;
     int c, ret;
 
-    while ((c = getopt(argc, argv, "bcm:")) >= 0)
+    while ((c = getopt(argc, argv, "bcm:@:")) >= 0)
         switch (c) {
         case 'b': csi = 0; break;
         case 'c': csi = 1; break;
         case 'm': csi = 1; min_shift = atoi(optarg); break;
+        case '@': n_threads = atoi(optarg); break;
         default:
             index_usage(stderr);
             return 1;
@@ -70,18 +73,32 @@ int bam_index(int argc, char *argv[])
         return 1;
     }
 
-    ret = sam_index_build2(argv[optind], argv[optind+1], csi? min_shift : 0);
-    if (ret != 0) {
-        if (ret == -2)
-            print_error_errno("index", "failed to open \"%s\"", argv[optind]);
-        else if (ret == -3)
-            print_error("index", "\"%s\" is in a format that cannot be usefully indexed", argv[optind]);
+    ret = sam_index_build3(argv[optind], argv[optind+1], csi? min_shift : 0, n_threads);
+    switch (ret) {
+    case 0:
+        return 0;
+
+    case -2:
+        print_error_errno("index", "failed to open \"%s\"", argv[optind]);
+        break;
+
+    case -3:
+        print_error("index", "\"%s\" is in a format that cannot be usefully indexed", argv[optind]);
+        break;
+
+    case -4:
+        if (argv[optind+1])
+            print_error("index", "failed to create or write index \"%s\"", argv[optind+1]);
         else
-            print_error("index", "\"%s\" is corrupted or unsorted", argv[optind]);
-        return EXIT_FAILURE;
+            print_error("index", "failed to create or write index");
+        break;
+
+    default:
+        print_error_errno("index", "failed to create index for \"%s\"", argv[optind]);
+        break;
     }
 
-    return 0;
+    return EXIT_FAILURE;
 }
 
 int bam_idxstats(int argc, char *argv[])
@@ -95,15 +112,20 @@ int bam_idxstats(int argc, char *argv[])
         return 1;
     }
     fp = sam_open(argv[1], "r");
-    if (fp == NULL) { fprintf(stderr, "[%s] fail to open BAM.\n", __func__); return 1; }
+    if (fp == NULL) {
+        print_error_errno("idxstats", "failed to open \"%s\"", argv[1]);
+        return 1;
+    }
     header = sam_hdr_read(fp);
     if (header == NULL) {
-        fprintf(stderr, "[%s] failed to read header for '%s'.\n",
-                __func__, argv[1]);
+        print_error("idxstats", "failed to read header for \"%s\"", argv[1]);
         return 1;
     }
     idx = sam_index_load(fp, argv[1]);
-    if (idx == NULL) { fprintf(stderr, "[%s] fail to load the index.\n", __func__); return 1; }
+    if (idx == NULL) {
+        print_error("idxstats", "fail to load index for \"%s\"", argv[1]);
+        return 1;
+    }
 
     int i;
     for (i = 0; i < header->n_targets; ++i) {
diff --git a/samtools/bam_index.c.pysam.c b/samtools/bam_index.c.pysam.c
index 6c0efdc..a91ee76 100644
--- a/samtools/bam_index.c.pysam.c
+++ b/samtools/bam_index.c.pysam.c
@@ -48,20 +48,23 @@ static void index_usage(FILE *fp)
 "Options:\n"
 "  -b       Generate BAI-format index for BAM files [default]\n"
 "  -c       Generate CSI-format index for BAM files\n"
-"  -m INT   Set minimum interval size for CSI indices to 2^INT [%d]\n", BAM_LIDX_SHIFT);
+"  -m INT   Set minimum interval size for CSI indices to 2^INT [%d]\n"
+"  -@ INT   Sets the number of threads [none]\n", BAM_LIDX_SHIFT);
 }
 
 int bam_index(int argc, char *argv[])
 {
     int csi = 0;
     int min_shift = BAM_LIDX_SHIFT;
+    int n_threads = 0;
     int c, ret;
 
-    while ((c = getopt(argc, argv, "bcm:")) >= 0)
+    while ((c = getopt(argc, argv, "bcm:@:")) >= 0)
         switch (c) {
         case 'b': csi = 0; break;
         case 'c': csi = 1; break;
         case 'm': csi = 1; min_shift = atoi(optarg); break;
+        case '@': n_threads = atoi(optarg); break;
         default:
             index_usage(pysam_stderr);
             return 1;
@@ -72,18 +75,32 @@ int bam_index(int argc, char *argv[])
         return 1;
     }
 
-    ret = sam_index_build2(argv[optind], argv[optind+1], csi? min_shift : 0);
-    if (ret != 0) {
-        if (ret == -2)
-            print_error_errno("index", "failed to open \"%s\"", argv[optind]);
-        else if (ret == -3)
-            print_error("index", "\"%s\" is in a format that cannot be usefully indexed", argv[optind]);
+    ret = sam_index_build3(argv[optind], argv[optind+1], csi? min_shift : 0, n_threads);
+    switch (ret) {
+    case 0:
+        return 0;
+
+    case -2:
+        print_error_errno("index", "failed to open \"%s\"", argv[optind]);
+        break;
+
+    case -3:
+        print_error("index", "\"%s\" is in a format that cannot be usefully indexed", argv[optind]);
+        break;
+
+    case -4:
+        if (argv[optind+1])
+            print_error("index", "failed to create or write index \"%s\"", argv[optind+1]);
         else
-            print_error("index", "\"%s\" is corrupted or unsorted", argv[optind]);
-        return EXIT_FAILURE;
+            print_error("index", "failed to create or write index");
+        break;
+
+    default:
+        print_error_errno("index", "failed to create index for \"%s\"", argv[optind]);
+        break;
     }
 
-    return 0;
+    return EXIT_FAILURE;
 }
 
 int bam_idxstats(int argc, char *argv[])
@@ -97,15 +114,20 @@ int bam_idxstats(int argc, char *argv[])
         return 1;
     }
     fp = sam_open(argv[1], "r");
-    if (fp == NULL) { fprintf(pysam_stderr, "[%s] fail to open BAM.\n", __func__); return 1; }
+    if (fp == NULL) {
+        print_error_errno("idxstats", "failed to open \"%s\"", argv[1]);
+        return 1;
+    }
     header = sam_hdr_read(fp);
     if (header == NULL) {
-        fprintf(pysam_stderr, "[%s] failed to read header for '%s'.\n",
-                __func__, argv[1]);
+        print_error("idxstats", "failed to read header for \"%s\"", argv[1]);
         return 1;
     }
     idx = sam_index_load(fp, argv[1]);
-    if (idx == NULL) { fprintf(pysam_stderr, "[%s] fail to load the index.\n", __func__); return 1; }
+    if (idx == NULL) {
+        print_error("idxstats", "fail to load index for \"%s\"", argv[1]);
+        return 1;
+    }
 
     int i;
     for (i = 0; i < header->n_targets; ++i) {
diff --git a/samtools/bam_mate.c b/samtools/bam_mate.c
index 5b13b2e..75c2f51 100644
--- a/samtools/bam_mate.c
+++ b/samtools/bam_mate.c
@@ -1,6 +1,6 @@
 /*  bam_mate.c -- fix mate pairing information and clean up flags.
 
-    Copyright (C) 2009, 2011-2016 Genome Research Ltd.
+    Copyright (C) 2009, 2011-2017 Genome Research Ltd.
     Portions copyright (C) 2011 Broad Institute.
     Portions copyright (C) 2012 Peter Cock, The James Hutton Institute.
 
@@ -31,6 +31,7 @@ DEALINGS IN THE SOFTWARE.  */
 #include <stdlib.h>
 #include <string.h>
 #include <unistd.h>
+#include "htslib/thread_pool.h"
 #include "sam_opts.h"
 #include "htslib/kstring.h"
 #include "htslib/sam.h"
@@ -155,9 +156,30 @@ static bool plausibly_properly_paired(bam1_t* a, bam1_t* b)
         return false;
 }
 
-static void sync_mq(bam1_t* src, bam1_t* dest)
+// Returns 0 on success, -1 on failure.
+static int bam_format_cigar(const bam1_t* b, kstring_t* str)
+{
+    // An empty cigar is a special case return "*" rather than ""
+    if (b->core.n_cigar == 0) {
+        return (kputc('*', str) == EOF) ? -1 : 0;
+    }
+
+    const uint32_t *cigar = bam_get_cigar(b);
+    uint32_t i;
+
+    for (i = 0; i < b->core.n_cigar; ++i) {
+        if (kputw(bam_cigar_oplen(cigar[i]), str) == EOF) return -1;
+        if (kputc(bam_cigar_opchr(cigar[i]), str) == EOF) return -1;
+    }
+
+    return 0;
+}
+
+// Returns 0 on success, -1 on failure.
+static int sync_mq_mc(bam1_t* src, bam1_t* dest)
 {
     if ( (src->core.flag & BAM_FUNMAP) == 0 ) { // If mapped
+        // Copy Mate Mapping Quality
         uint32_t mq = src->core.qual;
         uint8_t* data;
         if ((data = bam_aux_get(dest,"MQ")) != NULL) {
@@ -166,17 +188,34 @@ static void sync_mq(bam1_t* src, bam1_t* dest)
 
         bam_aux_append(dest, "MQ", 'i', sizeof(uint32_t), (uint8_t*)&mq);
     }
+    // Copy mate cigar if either read is mapped
+    if ( (src->core.flag & BAM_FUNMAP) == 0 || (dest->core.flag & BAM_FUNMAP) == 0 ) {
+        uint8_t* data_mc;
+        if ((data_mc = bam_aux_get(dest,"MC")) != NULL) {
+            bam_aux_del(dest, data_mc);
+        }
+
+        // Convert cigar to string
+        kstring_t mc = { 0, 0, NULL };
+        if (bam_format_cigar(src, &mc) < 0) return -1;
+
+        bam_aux_append(dest, "MC", 'Z', ks_len(&mc)+1, (uint8_t*)ks_str(&mc));
+        free(mc.s);
+    }
+    return 0;
 }
 
-// copy flags
-static void sync_mate(bam1_t* a, bam1_t* b)
+// Copy flags.
+// Returns 0 on success, -1 on failure.
+static int sync_mate(bam1_t* a, bam1_t* b)
 {
     sync_unmapped_pos_inner(a,b);
     sync_unmapped_pos_inner(b,a);
     sync_mate_inner(a,b);
     sync_mate_inner(b,a);
-    sync_mq(a,b);
-    sync_mq(b,a);
+    if (sync_mq_mc(a,b) < 0) return -1;
+    if (sync_mq_mc(b,a) < 0) return -1;
+    return 0;
 }
 
 // currently, this function ONLY works if each read has one hit
@@ -239,7 +278,7 @@ static int bam_mating_core(samFile* in, samFile* out, int remove_reads, int prop
             if (strcmp(bam_get_qname(cur), bam_get_qname(pre)) == 0) { // identical pair name
                 pre->core.flag |= BAM_FPAIRED;
                 cur->core.flag |= BAM_FPAIRED;
-                sync_mate(pre, cur);
+                if (sync_mate(pre, cur)) goto fail;
 
                 if (pre->core.tid == cur->core.tid && !(cur->core.flag&(BAM_FUNMAP|BAM_FMUNMAP))
                     && !(pre->core.flag&(BAM_FUNMAP|BAM_FMUNMAP))) // if safe set TLEN/ISIZE
@@ -324,7 +363,7 @@ void usage(FILE* where)
 "  -p           Disable FR proper pair check\n"
 "  -c           Add template cigar ct tag\n");
 
-    sam_global_opt_help(where, "-.O..");
+    sam_global_opt_help(where, "-.O..@");
 
     fprintf(where,
 "\n"
@@ -335,18 +374,19 @@ void usage(FILE* where)
 
 int bam_mating(int argc, char *argv[])
 {
+    htsThreadPool p = {NULL, 0};
     samFile *in = NULL, *out = NULL;
     int c, remove_reads = 0, proper_pair_check = 1, add_ct = 0, res = 1;
     sam_global_args ga = SAM_GLOBAL_ARGS_INIT;
     char wmode[3] = {'w', 'b', 0};
     static const struct option lopts[] = {
-        SAM_OPT_GLOBAL_OPTIONS('-', 0, 'O', 0, 0),
+        SAM_OPT_GLOBAL_OPTIONS('-', 0, 'O', 0, 0, '@'),
         { NULL, 0, NULL, 0 }
     };
 
     // parse args
     if (argc == 1) { usage(stdout); return 0; }
-    while ((c = getopt_long(argc, argv, "rpcO:", lopts, NULL)) >= 0) {
+    while ((c = getopt_long(argc, argv, "rpcO:@:", lopts, NULL)) >= 0) {
         switch (c) {
             case 'r': remove_reads = 1; break;
             case 'p': proper_pair_check = 0; break;
@@ -369,6 +409,15 @@ int bam_mating(int argc, char *argv[])
         goto fail;
     }
 
+    if (ga.nthreads > 0) {
+        if (!(p.pool = hts_tpool_init(ga.nthreads))) {
+            fprintf(stderr, "Error creating thread pool\n");
+            goto fail;
+        }
+        hts_set_opt(in,  HTS_OPT_THREAD_POOL, &p);
+        hts_set_opt(out, HTS_OPT_THREAD_POOL, &p);
+    }
+
     // run
     res = bam_mating_core(in, out, remove_reads, proper_pair_check, add_ct);
 
@@ -379,12 +428,14 @@ int bam_mating(int argc, char *argv[])
         res = 1;
     }
 
+    if (p.pool) hts_tpool_destroy(p.pool);
     sam_global_args_free(&ga);
     return res;
 
  fail:
     if (in) sam_close(in);
     if (out) sam_close(out);
+    if (p.pool) hts_tpool_destroy(p.pool);
     sam_global_args_free(&ga);
     return 1;
 }
diff --git a/samtools/bam_mate.c.pysam.c b/samtools/bam_mate.c.pysam.c
index a416d07..a03de96 100644
--- a/samtools/bam_mate.c.pysam.c
+++ b/samtools/bam_mate.c.pysam.c
@@ -2,7 +2,7 @@
 
 /*  bam_mate.c -- fix mate pairing information and clean up flags.
 
-    Copyright (C) 2009, 2011-2016 Genome Research Ltd.
+    Copyright (C) 2009, 2011-2017 Genome Research Ltd.
     Portions copyright (C) 2011 Broad Institute.
     Portions copyright (C) 2012 Peter Cock, The James Hutton Institute.
 
@@ -33,6 +33,7 @@ DEALINGS IN THE SOFTWARE.  */
 #include <stdlib.h>
 #include <string.h>
 #include <unistd.h>
+#include "htslib/thread_pool.h"
 #include "sam_opts.h"
 #include "htslib/kstring.h"
 #include "htslib/sam.h"
@@ -157,9 +158,30 @@ static bool plausibly_properly_paired(bam1_t* a, bam1_t* b)
         return false;
 }
 
-static void sync_mq(bam1_t* src, bam1_t* dest)
+// Returns 0 on success, -1 on failure.
+static int bam_format_cigar(const bam1_t* b, kstring_t* str)
+{
+    // An empty cigar is a special case return "*" rather than ""
+    if (b->core.n_cigar == 0) {
+        return (kputc('*', str) == EOF) ? -1 : 0;
+    }
+
+    const uint32_t *cigar = bam_get_cigar(b);
+    uint32_t i;
+
+    for (i = 0; i < b->core.n_cigar; ++i) {
+        if (kputw(bam_cigar_oplen(cigar[i]), str) == EOF) return -1;
+        if (kputc(bam_cigar_opchr(cigar[i]), str) == EOF) return -1;
+    }
+
+    return 0;
+}
+
+// Returns 0 on success, -1 on failure.
+static int sync_mq_mc(bam1_t* src, bam1_t* dest)
 {
     if ( (src->core.flag & BAM_FUNMAP) == 0 ) { // If mapped
+        // Copy Mate Mapping Quality
         uint32_t mq = src->core.qual;
         uint8_t* data;
         if ((data = bam_aux_get(dest,"MQ")) != NULL) {
@@ -168,17 +190,34 @@ static void sync_mq(bam1_t* src, bam1_t* dest)
 
         bam_aux_append(dest, "MQ", 'i', sizeof(uint32_t), (uint8_t*)&mq);
     }
+    // Copy mate cigar if either read is mapped
+    if ( (src->core.flag & BAM_FUNMAP) == 0 || (dest->core.flag & BAM_FUNMAP) == 0 ) {
+        uint8_t* data_mc;
+        if ((data_mc = bam_aux_get(dest,"MC")) != NULL) {
+            bam_aux_del(dest, data_mc);
+        }
+
+        // Convert cigar to string
+        kstring_t mc = { 0, 0, NULL };
+        if (bam_format_cigar(src, &mc) < 0) return -1;
+
+        bam_aux_append(dest, "MC", 'Z', ks_len(&mc)+1, (uint8_t*)ks_str(&mc));
+        free(mc.s);
+    }
+    return 0;
 }
 
-// copy flags
-static void sync_mate(bam1_t* a, bam1_t* b)
+// Copy flags.
+// Returns 0 on success, -1 on failure.
+static int sync_mate(bam1_t* a, bam1_t* b)
 {
     sync_unmapped_pos_inner(a,b);
     sync_unmapped_pos_inner(b,a);
     sync_mate_inner(a,b);
     sync_mate_inner(b,a);
-    sync_mq(a,b);
-    sync_mq(b,a);
+    if (sync_mq_mc(a,b) < 0) return -1;
+    if (sync_mq_mc(b,a) < 0) return -1;
+    return 0;
 }
 
 // currently, this function ONLY works if each read has one hit
@@ -241,7 +280,7 @@ static int bam_mating_core(samFile* in, samFile* out, int remove_reads, int prop
             if (strcmp(bam_get_qname(cur), bam_get_qname(pre)) == 0) { // identical pair name
                 pre->core.flag |= BAM_FPAIRED;
                 cur->core.flag |= BAM_FPAIRED;
-                sync_mate(pre, cur);
+                if (sync_mate(pre, cur)) goto fail;
 
                 if (pre->core.tid == cur->core.tid && !(cur->core.flag&(BAM_FUNMAP|BAM_FMUNMAP))
                     && !(pre->core.flag&(BAM_FUNMAP|BAM_FMUNMAP))) // if safe set TLEN/ISIZE
@@ -326,7 +365,7 @@ void usage(FILE* where)
 "  -p           Disable FR proper pair check\n"
 "  -c           Add template cigar ct tag\n");
 
-    sam_global_opt_help(where, "-.O..");
+    sam_global_opt_help(where, "-.O..@");
 
     fprintf(where,
 "\n"
@@ -337,18 +376,19 @@ void usage(FILE* where)
 
 int bam_mating(int argc, char *argv[])
 {
+    htsThreadPool p = {NULL, 0};
     samFile *in = NULL, *out = NULL;
     int c, remove_reads = 0, proper_pair_check = 1, add_ct = 0, res = 1;
     sam_global_args ga = SAM_GLOBAL_ARGS_INIT;
     char wmode[3] = {'w', 'b', 0};
     static const struct option lopts[] = {
-        SAM_OPT_GLOBAL_OPTIONS('-', 0, 'O', 0, 0),
+        SAM_OPT_GLOBAL_OPTIONS('-', 0, 'O', 0, 0, '@'),
         { NULL, 0, NULL, 0 }
     };
 
     // parse args
     if (argc == 1) { usage(pysam_stdout); return 0; }
-    while ((c = getopt_long(argc, argv, "rpcO:", lopts, NULL)) >= 0) {
+    while ((c = getopt_long(argc, argv, "rpcO:@:", lopts, NULL)) >= 0) {
         switch (c) {
             case 'r': remove_reads = 1; break;
             case 'p': proper_pair_check = 0; break;
@@ -371,6 +411,15 @@ int bam_mating(int argc, char *argv[])
         goto fail;
     }
 
+    if (ga.nthreads > 0) {
+        if (!(p.pool = hts_tpool_init(ga.nthreads))) {
+            fprintf(pysam_stderr, "Error creating thread pool\n");
+            goto fail;
+        }
+        hts_set_opt(in,  HTS_OPT_THREAD_POOL, &p);
+        hts_set_opt(out, HTS_OPT_THREAD_POOL, &p);
+    }
+
     // run
     res = bam_mating_core(in, out, remove_reads, proper_pair_check, add_ct);
 
@@ -381,12 +430,14 @@ int bam_mating(int argc, char *argv[])
         res = 1;
     }
 
+    if (p.pool) hts_tpool_destroy(p.pool);
     sam_global_args_free(&ga);
     return res;
 
  fail:
     if (in) sam_close(in);
     if (out) sam_close(out);
+    if (p.pool) hts_tpool_destroy(p.pool);
     sam_global_args_free(&ga);
     return 1;
 }
diff --git a/samtools/bam_md.c b/samtools/bam_md.c
index 71206cd..f095030 100644
--- a/samtools/bam_md.c
+++ b/samtools/bam_md.c
@@ -25,15 +25,15 @@ DEALINGS IN THE SOFTWARE.  */
 
 #include <config.h>
 
-#include <unistd.h>
+#include <stdio.h>
+#include <stdlib.h>
 #include <string.h>
 #include <ctype.h>
 #include <limits.h>
-#include <math.h>
 #include "htslib/faidx.h"
 #include "htslib/sam.h"
 #include "htslib/kstring.h"
-#include "kprobaln.h"
+#include "htslib/thread_pool.h"
 #include "sam_opts.h"
 #include "samtools.h"
 
@@ -161,178 +161,6 @@ void bam_fillmd1(bam1_t *b, char *ref, int flag)
     bam_fillmd1_core(b, ref, INT_MAX, flag, 0);
 }
 
-int bam_cap_mapQ(bam1_t *b, char *ref, int ref_len, int thres)
-{
-    uint8_t *seq = bam_get_seq(b), *qual = bam_get_qual(b);
-    uint32_t *cigar = bam_get_cigar(b);
-    bam1_core_t *c = &b->core;
-    int i, x, y, mm, q, len, clip_l, clip_q;
-    double t;
-    if (thres < 0) thres = 40; // set the default
-    mm = q = len = clip_l = clip_q = 0;
-    for (i = y = 0, x = c->pos; i < c->n_cigar; ++i) {
-        int j, l = cigar[i]>>4, op = cigar[i]&0xf;
-        if (op == BAM_CMATCH || op == BAM_CEQUAL || op == BAM_CDIFF) {
-            for (j = 0; j < l; ++j) {
-                int c1, c2, z = y + j;
-                if (x+j >= ref_len || ref[x+j] == '\0') break; // out of bounds
-                c1 = bam_seqi(seq, z), c2 = seq_nt16_table[(int)ref[x+j]];
-                if (c2 != 15 && c1 != 15 && qual[z] >= 13) { // not ambiguous
-                    ++len;
-                    if (c1 && c1 != c2 && qual[z] >= 13) { // mismatch
-                        ++mm;
-                        q += qual[z] > 33? 33 : qual[z];
-                    }
-                }
-            }
-            if (j < l) break;
-            x += l; y += l; len += l;
-        } else if (op == BAM_CDEL) {
-            for (j = 0; j < l; ++j)
-                if (x+j >= ref_len || ref[x+j] == '\0') break;
-            if (j < l) break;
-            x += l;
-        } else if (op == BAM_CSOFT_CLIP) {
-            for (j = 0; j < l; ++j) clip_q += qual[y+j];
-            clip_l += l;
-            y += l;
-        } else if (op == BAM_CHARD_CLIP) {
-            clip_q += 13 * l;
-            clip_l += l;
-        } else if (op == BAM_CINS) y += l;
-        else if (op == BAM_CREF_SKIP) x += l;
-    }
-    for (i = 0, t = 1; i < mm; ++i)
-        t *= (double)len / (i+1);
-    t = q - 4.343 * log(t) + clip_q / 5.;
-    if (t > thres) return -1;
-    if (t < 0) t = 0;
-    t = sqrt((thres - t) / thres) * thres;
-//  fprintf(stderr, "%s %lf %d\n", bam_get_qname(b), t, q);
-    return (int)(t + .499);
-}
-
-int bam_prob_realn_core(bam1_t *b, const char *ref, int ref_len, int flag)
-{
-    int k, i, bw, x, y, yb, ye, xb, xe, apply_baq = flag&1, extend_baq = flag>>1&1, redo_baq = flag&4;
-    uint32_t *cigar = bam_get_cigar(b);
-    bam1_core_t *c = &b->core;
-    kpa_par_t conf = kpa_par_def;
-    uint8_t *bq = 0, *zq = 0, *qual = bam_get_qual(b);
-    if ((c->flag & BAM_FUNMAP) || b->core.l_qseq == 0 || qual[0] == (uint8_t)-1)
-        return -1; // do nothing
-
-    // test if BQ or ZQ is present
-    if ((bq = bam_aux_get(b, "BQ")) != 0) ++bq;
-    if ((zq = bam_aux_get(b, "ZQ")) != 0 && *zq == 'Z') ++zq;
-    if (bq && redo_baq)
-    {
-        bam_aux_del(b, bq-1);
-        bq = 0;
-    }
-    if (bq && zq) { // remove the ZQ tag
-        bam_aux_del(b, zq-1);
-        zq = 0;
-    }
-    if (bq || zq) {
-        if ((apply_baq && zq) || (!apply_baq && bq)) return -3; // in both cases, do nothing
-        if (bq && apply_baq) { // then convert BQ to ZQ
-            for (i = 0; i < c->l_qseq; ++i)
-                qual[i] = qual[i] + 64 < bq[i]? 0 : qual[i] - ((int)bq[i] - 64);
-            *(bq - 3) = 'Z';
-        } else if (zq && !apply_baq) { // then convert ZQ to BQ
-            for (i = 0; i < c->l_qseq; ++i)
-                qual[i] += (int)zq[i] - 64;
-            *(zq - 3) = 'B';
-        }
-        return 0;
-    }
-    // find the start and end of the alignment
-    x = c->pos, y = 0, yb = ye = xb = xe = -1;
-    for (k = 0; k < c->n_cigar; ++k) {
-        int op, l;
-        op = cigar[k]&0xf; l = cigar[k]>>4;
-        if (op == BAM_CMATCH || op == BAM_CEQUAL || op == BAM_CDIFF) {
-            if (yb < 0) yb = y;
-            if (xb < 0) xb = x;
-            ye = y + l; xe = x + l;
-            x += l; y += l;
-        } else if (op == BAM_CSOFT_CLIP || op == BAM_CINS) y += l;
-        else if (op == BAM_CDEL) x += l;
-        else if (op == BAM_CREF_SKIP) return -1; // do nothing if there is a reference skip
-    }
-    // set bandwidth and the start and the end
-    bw = 7;
-    if (abs((xe - xb) - (ye - yb)) > bw)
-        bw = abs((xe - xb) - (ye - yb)) + 3;
-    conf.bw = bw;
-    xb -= yb + bw/2; if (xb < 0) xb = 0;
-    xe += c->l_qseq - ye + bw/2;
-    if (xe - xb - c->l_qseq > bw)
-        xb += (xe - xb - c->l_qseq - bw) / 2, xe -= (xe - xb - c->l_qseq - bw) / 2;
-    { // glocal
-        uint8_t *s, *r, *q, *seq = bam_get_seq(b), *bq;
-        int *state;
-        bq = calloc(c->l_qseq + 1, 1);
-        memcpy(bq, qual, c->l_qseq);
-        s = calloc(c->l_qseq, 1);
-        for (i = 0; i < c->l_qseq; ++i) s[i] = seq_nt16_int[bam_seqi(seq, i)];
-        r = calloc(xe - xb, 1);
-        for (i = xb; i < xe; ++i) {
-            if (i >= ref_len || ref[i] == '\0') { xe = i; break; }
-            r[i-xb] = seq_nt16_int[seq_nt16_table[(int)ref[i]]];
-        }
-        state = calloc(c->l_qseq, sizeof(int));
-        q = calloc(c->l_qseq, 1);
-        kpa_glocal(r, xe-xb, s, c->l_qseq, qual, &conf, state, q);
-        if (!extend_baq) { // in this block, bq[] is capped by base quality qual[]
-            for (k = 0, x = c->pos, y = 0; k < c->n_cigar; ++k) {
-                int op = cigar[k]&0xf, l = cigar[k]>>4;
-                if (op == BAM_CMATCH || op == BAM_CEQUAL || op == BAM_CDIFF) {
-                    for (i = y; i < y + l; ++i) {
-                        if ((state[i]&3) != 0 || state[i]>>2 != x - xb + (i - y)) bq[i] = 0;
-                        else bq[i] = bq[i] < q[i]? bq[i] : q[i];
-                    }
-                    x += l; y += l;
-                } else if (op == BAM_CSOFT_CLIP || op == BAM_CINS) y += l;
-                else if (op == BAM_CDEL) x += l;
-            }
-            for (i = 0; i < c->l_qseq; ++i) bq[i] = qual[i] - bq[i] + 64; // finalize BQ
-        } else { // in this block, bq[] is BAQ that can be larger than qual[] (different from the above!)
-            uint8_t *left, *rght;
-            left = calloc(c->l_qseq, 1); rght = calloc(c->l_qseq, 1);
-            for (k = 0, x = c->pos, y = 0; k < c->n_cigar; ++k) {
-                int op = cigar[k]&0xf, l = cigar[k]>>4;
-                if (op == BAM_CMATCH || op == BAM_CEQUAL || op == BAM_CDIFF) {
-                    for (i = y; i < y + l; ++i)
-                        bq[i] = ((state[i]&3) != 0 || state[i]>>2 != x - xb + (i - y))? 0 : q[i];
-                    for (left[y] = bq[y], i = y + 1; i < y + l; ++i)
-                        left[i] = bq[i] > left[i-1]? bq[i] : left[i-1];
-                    for (rght[y+l-1] = bq[y+l-1], i = y + l - 2; i >= y; --i)
-                        rght[i] = bq[i] > rght[i+1]? bq[i] : rght[i+1];
-                    for (i = y; i < y + l; ++i)
-                        bq[i] = left[i] < rght[i]? left[i] : rght[i];
-                    x += l; y += l;
-                } else if (op == BAM_CSOFT_CLIP || op == BAM_CINS) y += l;
-                else if (op == BAM_CDEL) x += l;
-            }
-            for (i = 0; i < c->l_qseq; ++i) bq[i] = 64 + (qual[i] <= bq[i]? 0 : qual[i] - bq[i]); // finalize BQ
-            free(left); free(rght);
-        }
-        if (apply_baq) {
-            for (i = 0; i < c->l_qseq; ++i) qual[i] -= bq[i] - 64; // modify qual
-            bam_aux_append(b, "ZQ", 'Z', c->l_qseq + 1, bq);
-        } else bam_aux_append(b, "BQ", 'Z', c->l_qseq + 1, bq);
-        free(bq); free(s); free(r); free(q); free(state);
-    }
-    return 0;
-}
-
-int bam_prob_realn(bam1_t *b, const char *ref)
-{
-    return bam_prob_realn_core(b, ref, INT_MAX, 1);
-}
-
 int calmd_usage() {
     fprintf(stderr,
 "Usage: samtools calmd [-eubrAES] <aln.bam> <ref.fasta>\n"
@@ -345,13 +173,14 @@ int calmd_usage() {
 "  -r       compute the BQ tag (without -A) or cap baseQ by BAQ (with -A)\n"
 "  -E       extended BAQ for better sensitivity but lower specificity\n");
 
-    sam_global_opt_help(stderr, "-....");
+    sam_global_opt_help(stderr, "-....@");
     return 1;
 }
 
 int bam_fillmd(int argc, char *argv[])
 {
     int c, flt_flag, tid = -2, ret, len, is_bam_out, is_uncompressed, max_nm, is_realn, capQ, baq_flag;
+    htsThreadPool p = {NULL, 0};
     samFile *fp = NULL, *fpout = NULL;
     bam_hdr_t *header = NULL;
     faidx_t *fai = NULL;
@@ -360,14 +189,14 @@ int bam_fillmd(int argc, char *argv[])
     sam_global_args ga = SAM_GLOBAL_ARGS_INIT;
 
     static const struct option lopts[] = {
-        SAM_OPT_GLOBAL_OPTIONS('-', 0, 0, 0, 0),
+        SAM_OPT_GLOBAL_OPTIONS('-', 0, 0, 0, 0,'@'),
         { NULL, 0, NULL, 0 }
     };
 
     flt_flag = UPDATE_NM | UPDATE_MD;
     is_bam_out = is_uncompressed = is_realn = max_nm = capQ = baq_flag = 0;
     strcpy(mode_w, "w");
-    while ((c = getopt_long(argc, argv, "EqreuNhbSC:n:Ad", lopts, NULL)) >= 0) {
+    while ((c = getopt_long(argc, argv, "EqreuNhbSC:n:Ad@:", lopts, NULL)) >= 0) {
         switch (c) {
         case 'r': is_realn = 1; break;
         case 'e': flt_flag |= USE_EQUAL; break;
@@ -415,6 +244,15 @@ int bam_fillmd(int argc, char *argv[])
         goto fail;
     }
 
+    if (ga.nthreads > 0) {
+        if (!(p.pool = hts_tpool_init(ga.nthreads))) {
+            fprintf(stderr, "Error creating thread pool\n");
+            goto fail;
+        }
+        hts_set_opt(fp,    HTS_OPT_THREAD_POOL, &p);
+        hts_set_opt(fpout, HTS_OPT_THREAD_POOL, &p);
+    }
+
     ref_file = argc > optind + 1 ? argv[optind+1] : ga.reference;
     fai = fai_load(ref_file);
 
@@ -440,9 +278,9 @@ int bam_fillmd(int argc, char *argv[])
                     if (is_realn || capQ > 10) goto fail; // Would otherwise crash
                 }
             }
-            if (is_realn) bam_prob_realn_core(b, ref, len, baq_flag);
+            if (is_realn) sam_prob_realn(b, ref, len, baq_flag);
             if (capQ > 10) {
-                int q = bam_cap_mapQ(b, ref, len, capQ);
+                int q = sam_cap_mapq(b, ref, len, capQ);
                 if (b->core.qual > q) b->core.qual = q;
             }
             if (ref) bam_fillmd1_core(b, ref, len, flt_flag, max_nm);
@@ -466,6 +304,8 @@ int bam_fillmd(int argc, char *argv[])
         fprintf(stderr, "[bam_fillmd] error when closing output file\n");
         return 1;
     }
+    if (p.pool) hts_tpool_destroy(p.pool);
+
     return 0;
 
  fail:
@@ -475,5 +315,7 @@ int bam_fillmd(int argc, char *argv[])
     if (fai) fai_destroy(fai);
     if (fp) sam_close(fp);
     if (fpout) sam_close(fpout);
+    if (p.pool) hts_tpool_destroy(p.pool);
+
     return 1;
 }
diff --git a/samtools/bam_md.c.pysam.c b/samtools/bam_md.c.pysam.c
index d00c01d..5e4cdb5 100644
--- a/samtools/bam_md.c.pysam.c
+++ b/samtools/bam_md.c.pysam.c
@@ -27,15 +27,15 @@ DEALINGS IN THE SOFTWARE.  */
 
 #include <config.h>
 
-#include <unistd.h>
+#include <stdio.h>
+#include <stdlib.h>
 #include <string.h>
 #include <ctype.h>
 #include <limits.h>
-#include <math.h>
 #include "htslib/faidx.h"
 #include "htslib/sam.h"
 #include "htslib/kstring.h"
-#include "kprobaln.h"
+#include "htslib/thread_pool.h"
 #include "sam_opts.h"
 #include "samtools.h"
 
@@ -163,178 +163,6 @@ void bam_fillmd1(bam1_t *b, char *ref, int flag)
     bam_fillmd1_core(b, ref, INT_MAX, flag, 0);
 }
 
-int bam_cap_mapQ(bam1_t *b, char *ref, int ref_len, int thres)
-{
-    uint8_t *seq = bam_get_seq(b), *qual = bam_get_qual(b);
-    uint32_t *cigar = bam_get_cigar(b);
-    bam1_core_t *c = &b->core;
-    int i, x, y, mm, q, len, clip_l, clip_q;
-    double t;
-    if (thres < 0) thres = 40; // set the default
-    mm = q = len = clip_l = clip_q = 0;
-    for (i = y = 0, x = c->pos; i < c->n_cigar; ++i) {
-        int j, l = cigar[i]>>4, op = cigar[i]&0xf;
-        if (op == BAM_CMATCH || op == BAM_CEQUAL || op == BAM_CDIFF) {
-            for (j = 0; j < l; ++j) {
-                int c1, c2, z = y + j;
-                if (x+j >= ref_len || ref[x+j] == '\0') break; // out of bounds
-                c1 = bam_seqi(seq, z), c2 = seq_nt16_table[(int)ref[x+j]];
-                if (c2 != 15 && c1 != 15 && qual[z] >= 13) { // not ambiguous
-                    ++len;
-                    if (c1 && c1 != c2 && qual[z] >= 13) { // mismatch
-                        ++mm;
-                        q += qual[z] > 33? 33 : qual[z];
-                    }
-                }
-            }
-            if (j < l) break;
-            x += l; y += l; len += l;
-        } else if (op == BAM_CDEL) {
-            for (j = 0; j < l; ++j)
-                if (x+j >= ref_len || ref[x+j] == '\0') break;
-            if (j < l) break;
-            x += l;
-        } else if (op == BAM_CSOFT_CLIP) {
-            for (j = 0; j < l; ++j) clip_q += qual[y+j];
-            clip_l += l;
-            y += l;
-        } else if (op == BAM_CHARD_CLIP) {
-            clip_q += 13 * l;
-            clip_l += l;
-        } else if (op == BAM_CINS) y += l;
-        else if (op == BAM_CREF_SKIP) x += l;
-    }
-    for (i = 0, t = 1; i < mm; ++i)
-        t *= (double)len / (i+1);
-    t = q - 4.343 * log(t) + clip_q / 5.;
-    if (t > thres) return -1;
-    if (t < 0) t = 0;
-    t = sqrt((thres - t) / thres) * thres;
-//  fprintf(pysam_stderr, "%s %lf %d\n", bam_get_qname(b), t, q);
-    return (int)(t + .499);
-}
-
-int bam_prob_realn_core(bam1_t *b, const char *ref, int ref_len, int flag)
-{
-    int k, i, bw, x, y, yb, ye, xb, xe, apply_baq = flag&1, extend_baq = flag>>1&1, redo_baq = flag&4;
-    uint32_t *cigar = bam_get_cigar(b);
-    bam1_core_t *c = &b->core;
-    kpa_par_t conf = kpa_par_def;
-    uint8_t *bq = 0, *zq = 0, *qual = bam_get_qual(b);
-    if ((c->flag & BAM_FUNMAP) || b->core.l_qseq == 0 || qual[0] == (uint8_t)-1)
-        return -1; // do nothing
-
-    // test if BQ or ZQ is present
-    if ((bq = bam_aux_get(b, "BQ")) != 0) ++bq;
-    if ((zq = bam_aux_get(b, "ZQ")) != 0 && *zq == 'Z') ++zq;
-    if (bq && redo_baq)
-    {
-        bam_aux_del(b, bq-1);
-        bq = 0;
-    }
-    if (bq && zq) { // remove the ZQ tag
-        bam_aux_del(b, zq-1);
-        zq = 0;
-    }
-    if (bq || zq) {
-        if ((apply_baq && zq) || (!apply_baq && bq)) return -3; // in both cases, do nothing
-        if (bq && apply_baq) { // then convert BQ to ZQ
-            for (i = 0; i < c->l_qseq; ++i)
-                qual[i] = qual[i] + 64 < bq[i]? 0 : qual[i] - ((int)bq[i] - 64);
-            *(bq - 3) = 'Z';
-        } else if (zq && !apply_baq) { // then convert ZQ to BQ
-            for (i = 0; i < c->l_qseq; ++i)
-                qual[i] += (int)zq[i] - 64;
-            *(zq - 3) = 'B';
-        }
-        return 0;
-    }
-    // find the start and end of the alignment
-    x = c->pos, y = 0, yb = ye = xb = xe = -1;
-    for (k = 0; k < c->n_cigar; ++k) {
-        int op, l;
-        op = cigar[k]&0xf; l = cigar[k]>>4;
-        if (op == BAM_CMATCH || op == BAM_CEQUAL || op == BAM_CDIFF) {
-            if (yb < 0) yb = y;
-            if (xb < 0) xb = x;
-            ye = y + l; xe = x + l;
-            x += l; y += l;
-        } else if (op == BAM_CSOFT_CLIP || op == BAM_CINS) y += l;
-        else if (op == BAM_CDEL) x += l;
-        else if (op == BAM_CREF_SKIP) return -1; // do nothing if there is a reference skip
-    }
-    // set bandwidth and the start and the end
-    bw = 7;
-    if (abs((xe - xb) - (ye - yb)) > bw)
-        bw = abs((xe - xb) - (ye - yb)) + 3;
-    conf.bw = bw;
-    xb -= yb + bw/2; if (xb < 0) xb = 0;
-    xe += c->l_qseq - ye + bw/2;
-    if (xe - xb - c->l_qseq > bw)
-        xb += (xe - xb - c->l_qseq - bw) / 2, xe -= (xe - xb - c->l_qseq - bw) / 2;
-    { // glocal
-        uint8_t *s, *r, *q, *seq = bam_get_seq(b), *bq;
-        int *state;
-        bq = calloc(c->l_qseq + 1, 1);
-        memcpy(bq, qual, c->l_qseq);
-        s = calloc(c->l_qseq, 1);
-        for (i = 0; i < c->l_qseq; ++i) s[i] = seq_nt16_int[bam_seqi(seq, i)];
-        r = calloc(xe - xb, 1);
-        for (i = xb; i < xe; ++i) {
-            if (i >= ref_len || ref[i] == '\0') { xe = i; break; }
-            r[i-xb] = seq_nt16_int[seq_nt16_table[(int)ref[i]]];
-        }
-        state = calloc(c->l_qseq, sizeof(int));
-        q = calloc(c->l_qseq, 1);
-        kpa_glocal(r, xe-xb, s, c->l_qseq, qual, &conf, state, q);
-        if (!extend_baq) { // in this block, bq[] is capped by base quality qual[]
-            for (k = 0, x = c->pos, y = 0; k < c->n_cigar; ++k) {
-                int op = cigar[k]&0xf, l = cigar[k]>>4;
-                if (op == BAM_CMATCH || op == BAM_CEQUAL || op == BAM_CDIFF) {
-                    for (i = y; i < y + l; ++i) {
-                        if ((state[i]&3) != 0 || state[i]>>2 != x - xb + (i - y)) bq[i] = 0;
-                        else bq[i] = bq[i] < q[i]? bq[i] : q[i];
-                    }
-                    x += l; y += l;
-                } else if (op == BAM_CSOFT_CLIP || op == BAM_CINS) y += l;
-                else if (op == BAM_CDEL) x += l;
-            }
-            for (i = 0; i < c->l_qseq; ++i) bq[i] = qual[i] - bq[i] + 64; // finalize BQ
-        } else { // in this block, bq[] is BAQ that can be larger than qual[] (different from the above!)
-            uint8_t *left, *rght;
-            left = calloc(c->l_qseq, 1); rght = calloc(c->l_qseq, 1);
-            for (k = 0, x = c->pos, y = 0; k < c->n_cigar; ++k) {
-                int op = cigar[k]&0xf, l = cigar[k]>>4;
-                if (op == BAM_CMATCH || op == BAM_CEQUAL || op == BAM_CDIFF) {
-                    for (i = y; i < y + l; ++i)
-                        bq[i] = ((state[i]&3) != 0 || state[i]>>2 != x - xb + (i - y))? 0 : q[i];
-                    for (left[y] = bq[y], i = y + 1; i < y + l; ++i)
-                        left[i] = bq[i] > left[i-1]? bq[i] : left[i-1];
-                    for (rght[y+l-1] = bq[y+l-1], i = y + l - 2; i >= y; --i)
-                        rght[i] = bq[i] > rght[i+1]? bq[i] : rght[i+1];
-                    for (i = y; i < y + l; ++i)
-                        bq[i] = left[i] < rght[i]? left[i] : rght[i];
-                    x += l; y += l;
-                } else if (op == BAM_CSOFT_CLIP || op == BAM_CINS) y += l;
-                else if (op == BAM_CDEL) x += l;
-            }
-            for (i = 0; i < c->l_qseq; ++i) bq[i] = 64 + (qual[i] <= bq[i]? 0 : qual[i] - bq[i]); // finalize BQ
-            free(left); free(rght);
-        }
-        if (apply_baq) {
-            for (i = 0; i < c->l_qseq; ++i) qual[i] -= bq[i] - 64; // modify qual
-            bam_aux_append(b, "ZQ", 'Z', c->l_qseq + 1, bq);
-        } else bam_aux_append(b, "BQ", 'Z', c->l_qseq + 1, bq);
-        free(bq); free(s); free(r); free(q); free(state);
-    }
-    return 0;
-}
-
-int bam_prob_realn(bam1_t *b, const char *ref)
-{
-    return bam_prob_realn_core(b, ref, INT_MAX, 1);
-}
-
 int calmd_usage() {
     fprintf(pysam_stderr,
 "Usage: samtools calmd [-eubrAES] <aln.bam> <ref.fasta>\n"
@@ -347,13 +175,14 @@ int calmd_usage() {
 "  -r       compute the BQ tag (without -A) or cap baseQ by BAQ (with -A)\n"
 "  -E       extended BAQ for better sensitivity but lower specificity\n");
 
-    sam_global_opt_help(pysam_stderr, "-....");
+    sam_global_opt_help(pysam_stderr, "-....@");
     return 1;
 }
 
 int bam_fillmd(int argc, char *argv[])
 {
     int c, flt_flag, tid = -2, ret, len, is_bam_out, is_uncompressed, max_nm, is_realn, capQ, baq_flag;
+    htsThreadPool p = {NULL, 0};
     samFile *fp = NULL, *fpout = NULL;
     bam_hdr_t *header = NULL;
     faidx_t *fai = NULL;
@@ -362,14 +191,14 @@ int bam_fillmd(int argc, char *argv[])
     sam_global_args ga = SAM_GLOBAL_ARGS_INIT;
 
     static const struct option lopts[] = {
-        SAM_OPT_GLOBAL_OPTIONS('-', 0, 0, 0, 0),
+        SAM_OPT_GLOBAL_OPTIONS('-', 0, 0, 0, 0,'@'),
         { NULL, 0, NULL, 0 }
     };
 
     flt_flag = UPDATE_NM | UPDATE_MD;
     is_bam_out = is_uncompressed = is_realn = max_nm = capQ = baq_flag = 0;
     strcpy(mode_w, "w");
-    while ((c = getopt_long(argc, argv, "EqreuNhbSC:n:Ad", lopts, NULL)) >= 0) {
+    while ((c = getopt_long(argc, argv, "EqreuNhbSC:n:Ad@:", lopts, NULL)) >= 0) {
         switch (c) {
         case 'r': is_realn = 1; break;
         case 'e': flt_flag |= USE_EQUAL; break;
@@ -406,7 +235,7 @@ int bam_fillmd(int argc, char *argv[])
         fprintf(pysam_stderr, "[bam_fillmd] input SAM does not have header. Abort!\n");
         goto fail;
     }
-    
+
     fpout = sam_open_format(pysam_stdout_fn, mode_w, &ga.out);
     if (fpout == NULL) {
         print_error_errno("calmd", "Failed to open output");
@@ -417,6 +246,15 @@ int bam_fillmd(int argc, char *argv[])
         goto fail;
     }
 
+    if (ga.nthreads > 0) {
+        if (!(p.pool = hts_tpool_init(ga.nthreads))) {
+            fprintf(pysam_stderr, "Error creating thread pool\n");
+            goto fail;
+        }
+        hts_set_opt(fp,    HTS_OPT_THREAD_POOL, &p);
+        hts_set_opt(fpout, HTS_OPT_THREAD_POOL, &p);
+    }
+
     ref_file = argc > optind + 1 ? argv[optind+1] : ga.reference;
     fai = fai_load(ref_file);
 
@@ -442,9 +280,9 @@ int bam_fillmd(int argc, char *argv[])
                     if (is_realn || capQ > 10) goto fail; // Would otherwise crash
                 }
             }
-            if (is_realn) bam_prob_realn_core(b, ref, len, baq_flag);
+            if (is_realn) sam_prob_realn(b, ref, len, baq_flag);
             if (capQ > 10) {
-                int q = bam_cap_mapQ(b, ref, len, capQ);
+                int q = sam_cap_mapq(b, ref, len, capQ);
                 if (b->core.qual > q) b->core.qual = q;
             }
             if (ref) bam_fillmd1_core(b, ref, len, flt_flag, max_nm);
@@ -468,6 +306,8 @@ int bam_fillmd(int argc, char *argv[])
         fprintf(pysam_stderr, "[bam_fillmd] error when closing output file\n");
         return 1;
     }
+    if (p.pool) hts_tpool_destroy(p.pool);
+
     return 0;
 
  fail:
@@ -477,5 +317,7 @@ int bam_fillmd(int argc, char *argv[])
     if (fai) fai_destroy(fai);
     if (fp) sam_close(fp);
     if (fpout) sam_close(fpout);
+    if (p.pool) hts_tpool_destroy(p.pool);
+
     return 1;
 }
diff --git a/samtools/bam_plcmd.c b/samtools/bam_plcmd.c
index dc12bf3..d17e9d6 100644
--- a/samtools/bam_plcmd.c
+++ b/samtools/bam_plcmd.c
@@ -31,6 +31,7 @@ DEALINGS IN THE SOFTWARE.  */
 #include <unistd.h>
 #include <ctype.h>
 #include <string.h>
+#include <strings.h>
 #include <limits.h>
 #include <errno.h>
 #include <sys/stat.h>
@@ -118,7 +119,7 @@ void bed_destroy(void *_h);
 int bed_overlap(const void *_h, const char *chr, int beg, int end);
 
 typedef struct {
-    int min_mq, flag, min_baseQ, capQ_thres, max_depth, max_indel_depth, fmt_flag;
+    int min_mq, flag, min_baseQ, capQ_thres, max_depth, max_indel_depth, fmt_flag, all;
     int rflag_require, rflag_filter;
     int openQ, extQ, tandemQ, min_support; // for indels
     double min_frac; // for indels
@@ -209,11 +210,22 @@ static int mplp_get_ref(mplp_aux_t *ma, int tid,  char **ref, int *ref_len) {
     return 1;
 }
 
+static void
+print_empty_pileup(FILE *fp, const mplp_conf_t *conf, const char *tname,
+                   int pos, int n, const char *ref, int ref_len)
+{
+    int i;
+    fprintf(fp, "%s\t%d\t%c", tname, pos+1, (ref && pos < ref_len)? ref[pos] : 'N');
+    for (i = 0; i < n; ++i) {
+        fputs("\t0\t*\t*", fp);
+        if (conf->flag & MPLP_PRINT_MAPQ) fputs("\t*", fp);
+        if (conf->flag & MPLP_PRINT_POS) fputs("\t*", fp);
+    }
+    putc('\n', fp);
+}
+
 static int mplp_func(void *data, bam1_t *b)
 {
-    extern int bam_realn(bam1_t *b, const char *ref);
-    extern int bam_prob_realn_core(bam1_t *b, const char *ref, int ref_len, int flag);
-    extern int bam_cap_mapQ(bam1_t *b, char *ref, int ref_len, int thres);
     char *ref;
     mplp_aux_t *ma = (mplp_aux_t*)data;
     int ret, skip = 0, ref_len;
@@ -229,7 +241,7 @@ static int mplp_func(void *data, bam1_t *b)
         }
         if (ma->conf->rflag_require && !(ma->conf->rflag_require&b->core.flag)) { skip = 1; continue; }
         if (ma->conf->rflag_filter && ma->conf->rflag_filter&b->core.flag) { skip = 1; continue; }
-        if (ma->conf->bed) { // test overlap
+        if (ma->conf->bed && ma->conf->all == 0) { // test overlap
             skip = !bed_overlap(ma->conf->bed, ma->h->target_name[b->core.tid], b->core.pos, bam_endpos(b));
             if (skip) continue;
         }
@@ -258,9 +270,9 @@ static int mplp_func(void *data, bam1_t *b)
         }
 
         skip = 0;
-        if (has_ref && (ma->conf->flag&MPLP_REALN)) bam_prob_realn_core(b, ref, ref_len, (ma->conf->flag & MPLP_REDO_BAQ)? 7 : 3);
+        if (has_ref && (ma->conf->flag&MPLP_REALN)) sam_prob_realn(b, ref, ref_len, (ma->conf->flag & MPLP_REDO_BAQ)? 7 : 3);
         if (has_ref && ma->conf->capQ_thres > 10) {
-            int q = bam_cap_mapQ(b, ref, ref_len, ma->conf->capQ_thres);
+            int q = sam_cap_mapq(b, ref, ref_len, ma->conf->capQ_thres);
             if (q < 0) skip = 1;
             else if (b->core.qual > q) b->core.qual = q;
         }
@@ -308,7 +320,7 @@ static int mpileup(mplp_conf_t *conf, int n, char **fn)
     extern void *bcf_call_add_rg(void *rghash, const char *hdtext, const char *list);
     extern void bcf_call_del_rghash(void *rghash);
     mplp_aux_t **data;
-    int i, tid, pos, *n_plp, beg0 = 0, end0 = INT_MAX, ref_len, max_depth, max_indel_depth;
+    int i, tid, pos, *n_plp, beg0 = 0, end0 = INT_MAX, tid0 = 0, ref_len, max_depth, max_indel_depth;
     const bam_pileup1_t **plp;
     mplp_ref_t mp_ref = MPLP_REF_INIT;
     bam_mplp_t iter;
@@ -379,7 +391,7 @@ static int mpileup(mplp_conf_t *conf, int n, char **fn)
                 fprintf(stderr, "[E::%s] fail to parse region '%s' with %s\n", __func__, conf->reg, fn[i]);
                 exit(EXIT_FAILURE);
             }
-            if (i == 0) beg0 = data[i]->iter->beg, end0 = data[i]->iter->end;
+            if (i == 0) beg0 = data[i]->iter->beg, end0 = data[i]->iter->end, tid0 = data[i]->iter->tid;
             hts_idx_destroy(idx);
         }
         else
@@ -551,14 +563,16 @@ static int mpileup(mplp_conf_t *conf, int n, char **fn)
     bam_mplp_set_maxcnt(iter, max_depth);
     bcf1_t *bcf_rec = bcf_init1();
     int ret;
+    int last_tid = -1, last_pos = -1;
+
     // begin pileup
     while ( (ret=bam_mplp_auto(iter, &tid, &pos, n_plp, plp)) > 0) {
         if (conf->reg && (pos < beg0 || pos >= end0)) continue; // out of the region requested
-        if (conf->bed && tid >= 0 && !bed_overlap(conf->bed, h->target_name[tid], pos, pos+1)) continue;
         mplp_get_ref(data[0], tid, &ref, &ref_len);
         //printf("tid=%d len=%d ref=%p/%s\n", tid, ref_len, ref, ref);
         if (conf->flag & MPLP_BCF) {
             int total_depth, _ref0, ref16;
+            if (conf->bed && tid >= 0 && !bed_overlap(conf->bed, h->target_name[tid], pos, pos+1)) continue;
             for (i = total_depth = 0; i < n; ++i) total_depth += n_plp[i];
             group_smpl(&gplp, sm, &buf, n, fn, n_plp, plp, conf->flag & MPLP_IGNORE_RG);
             _ref0 = (ref && pos < ref_len)? ref[pos] : 'N';
@@ -584,6 +598,35 @@ static int mpileup(mplp_conf_t *conf, int n, char **fn)
                 }
             }
         } else {
+            if (conf->all) {
+                // Deal with missing portions of previous tids
+                while (tid > last_tid) {
+                    if (last_tid >= 0 && !conf->reg) {
+                        while (++last_pos < h->target_len[last_tid]) {
+                            if (conf->bed && bed_overlap(conf->bed, h->target_name[last_tid], last_pos, last_pos + 1) == 0)
+                                continue;
+                            print_empty_pileup(pileup_fp, conf, h->target_name[last_tid], last_pos, n, ref, ref_len);
+                        }
+                    }
+                    last_tid++;
+                    last_pos = -1;
+                    if (conf->all < 2)
+                        break;
+                }
+            }
+            if (conf->all) {
+                // Deal with missing portion of current tid
+                while (++last_pos < pos) {
+                    if (conf->reg && last_pos < beg0) continue; // out of range; skip
+                    if (conf->bed && bed_overlap(conf->bed, h->target_name[tid], last_pos, last_pos + 1) == 0)
+                        continue;
+                    print_empty_pileup(pileup_fp, conf, h->target_name[tid], last_pos, n, ref, ref_len);
+                }
+                last_tid = tid;
+                last_pos = pos;
+            }
+            if (conf->bed && tid >= 0 && !bed_overlap(conf->bed, h->target_name[tid], pos, pos+1)) continue;
+
             fprintf(pileup_fp, "%s\t%d\t%c", h->target_name[tid], pos + 1, (ref && pos < ref_len)? ref[pos] : 'N');
             for (i = 0; i < n; ++i) {
                 int j, cnt;
@@ -600,14 +643,18 @@ static int mpileup(mplp_conf_t *conf, int n, char **fn)
                     if (conf->flag & MPLP_PRINT_MAPQ) fputs("\t*", pileup_fp);
                     if (conf->flag & MPLP_PRINT_POS) fputs("\t*", pileup_fp);
                 } else {
+                    int n = 0;
                     for (j = 0; j < n_plp[i]; ++j) {
                         const bam_pileup1_t *p = plp[i] + j;
                         int c = p->qpos < p->b->core.l_qseq
                             ? bam_get_qual(p->b)[p->qpos]
                             : 0;
                         if (c >= conf->min_baseQ)
-                            pileup_seq(pileup_fp, plp[i] + j, pos, ref_len, ref);
+                            n++, pileup_seq(pileup_fp, plp[i] + j, pos, ref_len, ref);
                     }
+                    if (!n) putc('*', pileup_fp);
+
+                    n = 0;
                     putc('\t', pileup_fp);
                     for (j = 0; j < n_plp[i]; ++j) {
                         const bam_pileup1_t *p = plp[i] + j;
@@ -617,9 +664,13 @@ static int mpileup(mplp_conf_t *conf, int n, char **fn)
                         if (c >= conf->min_baseQ) {
                             c = c + 33 < 126? c + 33 : 126;
                             putc(c, pileup_fp);
+                            n++;
                         }
                     }
+                    if (!n) putc('*', pileup_fp);
+
                     if (conf->flag & MPLP_PRINT_MAPQ) {
+                        n = 0;
                         putc('\t', pileup_fp);
                         for (j = 0; j < n_plp[i]; ++j) {
                             const bam_pileup1_t *p = plp[i] + j;
@@ -628,19 +679,24 @@ static int mpileup(mplp_conf_t *conf, int n, char **fn)
                             c = plp[i][j].b->core.qual + 33;
                             if (c > 126) c = 126;
                             putc(c, pileup_fp);
+                            n++;
                         }
+                        if (!n) putc('*', pileup_fp);
                     }
+
                     if (conf->flag & MPLP_PRINT_POS) {
+                        n = 0;
                         putc('\t', pileup_fp);
-                        int last = 0;
                         for (j = 0; j < n_plp[i]; ++j) {
                             const bam_pileup1_t *p = plp[i] + j;
                             int c = bam_get_qual(p->b)[p->qpos];
                             if ( c < conf->min_baseQ ) continue;
 
-                            if (last++) putc(',', pileup_fp);
+                            if (n > 0) putc(',', pileup_fp);
                             fprintf(pileup_fp, "%d", plp[i][j].qpos + 1); // FIXME: printf() is very slow...
+                            n++;
                         }
+                        if (!n) putc('*', pileup_fp);
                     }
                 }
             }
@@ -648,6 +704,27 @@ static int mpileup(mplp_conf_t *conf, int n, char **fn)
         }
     }
 
+    if (conf->all && !(conf->flag & MPLP_BCF)) {
+        // Handle terminating region
+        if (last_tid < 0 && conf->reg && conf->all > 1) {
+            last_tid = tid0;
+            last_pos = beg0-1;
+            mplp_get_ref(data[0], tid0, &ref, &ref_len);
+        }
+       while (last_tid >= 0 && last_tid < h->n_targets) {
+            while (++last_pos < h->target_len[last_tid]) {
+                if (last_pos >= end0) break;
+                if (conf->bed && bed_overlap(conf->bed, h->target_name[last_tid], last_pos, last_pos + 1) == 0)
+                    continue;
+                print_empty_pileup(pileup_fp, conf, h->target_name[last_tid], last_pos, n, ref, ref_len);
+            }
+            last_tid++;
+            last_pos = -1;
+            if (conf->all < 2 || conf->reg)
+                break;
+        }
+    }
+
     // clean up
     free(bc.tmp.s);
     bcf_destroy1(bcf_rec);
@@ -681,6 +758,13 @@ static int mpileup(mplp_conf_t *conf, int n, char **fn)
     return ret;
 }
 
+static int is_url(const char *s)
+{
+    static const char uri_scheme_chars[] =
+        "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+.-";
+    return s[strspn(s, uri_scheme_chars)] == ':';
+}
+
 #define MAX_PATH_LEN 1024
 int read_file_list(const char *file_list,int *n,char **argv[])
 {
@@ -710,7 +794,7 @@ int read_file_list(const char *file_list,int *n,char **argv[])
 
         // check sanity of the file list
         buf[len] = 0;
-        if (stat(buf, &sb) != 0)
+        if (! (is_url(buf) || stat(buf, &sb) == 0))
         {
             // no such file, check if it is safe to print its name
             int i, safe_to_print = 1;
@@ -814,6 +898,8 @@ static void print_usage(FILE *fp, const mplp_conf_t *mplp)
 "Output options for mpileup format (without -g/-v):\n"
 "  -O, --output-BP         output base positions on reads\n"
 "  -s, --output-MQ         output mapping quality\n"
+"  -a                      output all positions (including zero depth)\n"
+"  -a -a (or -aa)          output absolutely all positions, including unused ref. sequences\n"
 "\n"
 "Output options for genotype likelihoods (when -g/-v is used):\n"
 "  -t, --output-tags LIST  optional tags to output:\n"
@@ -836,7 +922,7 @@ static void print_usage(FILE *fp, const mplp_conf_t *mplp)
     fprintf(fp,
 "  -p, --per-sample-mF     apply -m and -F per-sample for increased sensitivity\n"
 "  -P, --platforms STR     comma separated list of platforms for indels [all]\n");
-    sam_global_opt_help(fp, "-.--.");
+    sam_global_opt_help(fp, "-.--.-");
     fprintf(fp,
 "\n"
 "Notes: Assuming diploid individuals.\n");
@@ -862,11 +948,12 @@ int bam_mpileup(int argc, char *argv[])
     mplp.argc = argc; mplp.argv = argv;
     mplp.rflag_filter = BAM_FUNMAP | BAM_FSECONDARY | BAM_FQCFAIL | BAM_FDUP;
     mplp.output_fname = NULL;
+    mplp.all = 0;
     sam_global_args_init(&mplp.ga);
 
     static const struct option lopts[] =
     {
-        SAM_OPT_GLOBAL_OPTIONS('-', 0, '-', '-', 0),
+        SAM_OPT_GLOBAL_OPTIONS('-', 0, '-', '-', 0, '-'),
         {"rf", required_argument, NULL, 1},   // require flag
         {"ff", required_argument, NULL, 2},   // filter flag
         {"incl-flags", required_argument, NULL, 1},
@@ -916,7 +1003,7 @@ int bam_mpileup(int argc, char *argv[])
         {"platforms", required_argument, NULL, 'P'},
         {NULL, 0, NULL, 0}
     };
-    while ((c = getopt_long(argc, argv, "Agf:r:l:q:Q:uRC:BDSd:L:b:P:po:e:h:Im:F:EG:6OsVvxt:",lopts,NULL)) >= 0) {
+    while ((c = getopt_long(argc, argv, "Agf:r:l:q:Q:uRC:BDSd:L:b:P:po:e:h:Im:F:EG:6OsVvxt:a",lopts,NULL)) >= 0) {
         switch (c) {
         case 'x': mplp.flag &= ~MPLP_SMART_OVERLAPS; break;
         case  1 :
@@ -988,6 +1075,7 @@ int bam_mpileup(int argc, char *argv[])
             }
             break;
         case 't': mplp.fmt_flag |= parse_format_flag(optarg); break;
+        case 'a': mplp.all++; break;
         default:
             if (parse_sam_global_opt(c, optarg, lopts, &mplp.ga) == 0) break;
             /* else fall-through */
diff --git a/samtools/bam_plcmd.c.pysam.c b/samtools/bam_plcmd.c.pysam.c
index 650e818..03e5f8a 100644
--- a/samtools/bam_plcmd.c.pysam.c
+++ b/samtools/bam_plcmd.c.pysam.c
@@ -33,6 +33,7 @@ DEALINGS IN THE SOFTWARE.  */
 #include <unistd.h>
 #include <ctype.h>
 #include <string.h>
+#include <strings.h>
 #include <limits.h>
 #include <errno.h>
 #include <sys/stat.h>
@@ -120,7 +121,7 @@ void bed_destroy(void *_h);
 int bed_overlap(const void *_h, const char *chr, int beg, int end);
 
 typedef struct {
-    int min_mq, flag, min_baseQ, capQ_thres, max_depth, max_indel_depth, fmt_flag;
+    int min_mq, flag, min_baseQ, capQ_thres, max_depth, max_indel_depth, fmt_flag, all;
     int rflag_require, rflag_filter;
     int openQ, extQ, tandemQ, min_support; // for indels
     double min_frac; // for indels
@@ -211,11 +212,22 @@ static int mplp_get_ref(mplp_aux_t *ma, int tid,  char **ref, int *ref_len) {
     return 1;
 }
 
+static void
+print_empty_pileup(FILE *fp, const mplp_conf_t *conf, const char *tname,
+                   int pos, int n, const char *ref, int ref_len)
+{
+    int i;
+    fprintf(fp, "%s\t%d\t%c", tname, pos+1, (ref && pos < ref_len)? ref[pos] : 'N');
+    for (i = 0; i < n; ++i) {
+        fputs("\t0\t*\t*", fp);
+        if (conf->flag & MPLP_PRINT_MAPQ) fputs("\t*", fp);
+        if (conf->flag & MPLP_PRINT_POS) fputs("\t*", fp);
+    }
+    putc('\n', fp);
+}
+
 static int mplp_func(void *data, bam1_t *b)
 {
-    extern int bam_realn(bam1_t *b, const char *ref);
-    extern int bam_prob_realn_core(bam1_t *b, const char *ref, int ref_len, int flag);
-    extern int bam_cap_mapQ(bam1_t *b, char *ref, int ref_len, int thres);
     char *ref;
     mplp_aux_t *ma = (mplp_aux_t*)data;
     int ret, skip = 0, ref_len;
@@ -231,7 +243,7 @@ static int mplp_func(void *data, bam1_t *b)
         }
         if (ma->conf->rflag_require && !(ma->conf->rflag_require&b->core.flag)) { skip = 1; continue; }
         if (ma->conf->rflag_filter && ma->conf->rflag_filter&b->core.flag) { skip = 1; continue; }
-        if (ma->conf->bed) { // test overlap
+        if (ma->conf->bed && ma->conf->all == 0) { // test overlap
             skip = !bed_overlap(ma->conf->bed, ma->h->target_name[b->core.tid], b->core.pos, bam_endpos(b));
             if (skip) continue;
         }
@@ -260,9 +272,9 @@ static int mplp_func(void *data, bam1_t *b)
         }
 
         skip = 0;
-        if (has_ref && (ma->conf->flag&MPLP_REALN)) bam_prob_realn_core(b, ref, ref_len, (ma->conf->flag & MPLP_REDO_BAQ)? 7 : 3);
+        if (has_ref && (ma->conf->flag&MPLP_REALN)) sam_prob_realn(b, ref, ref_len, (ma->conf->flag & MPLP_REDO_BAQ)? 7 : 3);
         if (has_ref && ma->conf->capQ_thres > 10) {
-            int q = bam_cap_mapQ(b, ref, ref_len, ma->conf->capQ_thres);
+            int q = sam_cap_mapq(b, ref, ref_len, ma->conf->capQ_thres);
             if (q < 0) skip = 1;
             else if (b->core.qual > q) b->core.qual = q;
         }
@@ -310,7 +322,7 @@ static int mpileup(mplp_conf_t *conf, int n, char **fn)
     extern void *bcf_call_add_rg(void *rghash, const char *hdtext, const char *list);
     extern void bcf_call_del_rghash(void *rghash);
     mplp_aux_t **data;
-    int i, tid, pos, *n_plp, beg0 = 0, end0 = INT_MAX, ref_len, max_depth, max_indel_depth;
+    int i, tid, pos, *n_plp, beg0 = 0, end0 = INT_MAX, tid0 = 0, ref_len, max_depth, max_indel_depth;
     const bam_pileup1_t **plp;
     mplp_ref_t mp_ref = MPLP_REF_INIT;
     bam_mplp_t iter;
@@ -381,7 +393,7 @@ static int mpileup(mplp_conf_t *conf, int n, char **fn)
                 fprintf(pysam_stderr, "[E::%s] fail to parse region '%s' with %s\n", __func__, conf->reg, fn[i]);
                 exit(EXIT_FAILURE);
             }
-            if (i == 0) beg0 = data[i]->iter->beg, end0 = data[i]->iter->end;
+            if (i == 0) beg0 = data[i]->iter->beg, end0 = data[i]->iter->end, tid0 = data[i]->iter->tid;
             hts_idx_destroy(idx);
         }
         else
@@ -553,14 +565,16 @@ static int mpileup(mplp_conf_t *conf, int n, char **fn)
     bam_mplp_set_maxcnt(iter, max_depth);
     bcf1_t *bcf_rec = bcf_init1();
     int ret;
+    int last_tid = -1, last_pos = -1;
+
     // begin pileup
     while ( (ret=bam_mplp_auto(iter, &tid, &pos, n_plp, plp)) > 0) {
         if (conf->reg && (pos < beg0 || pos >= end0)) continue; // out of the region requested
-        if (conf->bed && tid >= 0 && !bed_overlap(conf->bed, h->target_name[tid], pos, pos+1)) continue;
         mplp_get_ref(data[0], tid, &ref, &ref_len);
         //printf("tid=%d len=%d ref=%p/%s\n", tid, ref_len, ref, ref);
         if (conf->flag & MPLP_BCF) {
             int total_depth, _ref0, ref16;
+            if (conf->bed && tid >= 0 && !bed_overlap(conf->bed, h->target_name[tid], pos, pos+1)) continue;
             for (i = total_depth = 0; i < n; ++i) total_depth += n_plp[i];
             group_smpl(&gplp, sm, &buf, n, fn, n_plp, plp, conf->flag & MPLP_IGNORE_RG);
             _ref0 = (ref && pos < ref_len)? ref[pos] : 'N';
@@ -586,6 +600,35 @@ static int mpileup(mplp_conf_t *conf, int n, char **fn)
                 }
             }
         } else {
+            if (conf->all) {
+                // Deal with missing portions of previous tids
+                while (tid > last_tid) {
+                    if (last_tid >= 0 && !conf->reg) {
+                        while (++last_pos < h->target_len[last_tid]) {
+                            if (conf->bed && bed_overlap(conf->bed, h->target_name[last_tid], last_pos, last_pos + 1) == 0)
+                                continue;
+                            print_empty_pileup(pileup_fp, conf, h->target_name[last_tid], last_pos, n, ref, ref_len);
+                        }
+                    }
+                    last_tid++;
+                    last_pos = -1;
+                    if (conf->all < 2)
+                        break;
+                }
+            }
+            if (conf->all) {
+                // Deal with missing portion of current tid
+                while (++last_pos < pos) {
+                    if (conf->reg && last_pos < beg0) continue; // out of range; skip
+                    if (conf->bed && bed_overlap(conf->bed, h->target_name[tid], last_pos, last_pos + 1) == 0)
+                        continue;
+                    print_empty_pileup(pileup_fp, conf, h->target_name[tid], last_pos, n, ref, ref_len);
+                }
+                last_tid = tid;
+                last_pos = pos;
+            }
+            if (conf->bed && tid >= 0 && !bed_overlap(conf->bed, h->target_name[tid], pos, pos+1)) continue;
+
             fprintf(pileup_fp, "%s\t%d\t%c", h->target_name[tid], pos + 1, (ref && pos < ref_len)? ref[pos] : 'N');
             for (i = 0; i < n; ++i) {
                 int j, cnt;
@@ -602,14 +645,18 @@ static int mpileup(mplp_conf_t *conf, int n, char **fn)
                     if (conf->flag & MPLP_PRINT_MAPQ) fputs("\t*", pileup_fp);
                     if (conf->flag & MPLP_PRINT_POS) fputs("\t*", pileup_fp);
                 } else {
+                    int n = 0;
                     for (j = 0; j < n_plp[i]; ++j) {
                         const bam_pileup1_t *p = plp[i] + j;
                         int c = p->qpos < p->b->core.l_qseq
                             ? bam_get_qual(p->b)[p->qpos]
                             : 0;
                         if (c >= conf->min_baseQ)
-                            pileup_seq(pileup_fp, plp[i] + j, pos, ref_len, ref);
+                            n++, pileup_seq(pileup_fp, plp[i] + j, pos, ref_len, ref);
                     }
+                    if (!n) putc('*', pileup_fp);
+
+                    n = 0;
                     putc('\t', pileup_fp);
                     for (j = 0; j < n_plp[i]; ++j) {
                         const bam_pileup1_t *p = plp[i] + j;
@@ -619,9 +666,13 @@ static int mpileup(mplp_conf_t *conf, int n, char **fn)
                         if (c >= conf->min_baseQ) {
                             c = c + 33 < 126? c + 33 : 126;
                             putc(c, pileup_fp);
+                            n++;
                         }
                     }
+                    if (!n) putc('*', pileup_fp);
+
                     if (conf->flag & MPLP_PRINT_MAPQ) {
+                        n = 0;
                         putc('\t', pileup_fp);
                         for (j = 0; j < n_plp[i]; ++j) {
                             const bam_pileup1_t *p = plp[i] + j;
@@ -630,19 +681,24 @@ static int mpileup(mplp_conf_t *conf, int n, char **fn)
                             c = plp[i][j].b->core.qual + 33;
                             if (c > 126) c = 126;
                             putc(c, pileup_fp);
+                            n++;
                         }
+                        if (!n) putc('*', pileup_fp);
                     }
+
                     if (conf->flag & MPLP_PRINT_POS) {
+                        n = 0;
                         putc('\t', pileup_fp);
-                        int last = 0;
                         for (j = 0; j < n_plp[i]; ++j) {
                             const bam_pileup1_t *p = plp[i] + j;
                             int c = bam_get_qual(p->b)[p->qpos];
                             if ( c < conf->min_baseQ ) continue;
 
-                            if (last++) putc(',', pileup_fp);
+                            if (n > 0) putc(',', pileup_fp);
                             fprintf(pileup_fp, "%d", plp[i][j].qpos + 1); // FIXME: fprintf(pysam_stdout, ) is very slow...
+                            n++;
                         }
+                        if (!n) putc('*', pileup_fp);
                     }
                 }
             }
@@ -650,6 +706,27 @@ static int mpileup(mplp_conf_t *conf, int n, char **fn)
         }
     }
 
+    if (conf->all && !(conf->flag & MPLP_BCF)) {
+        // Handle terminating region
+        if (last_tid < 0 && conf->reg && conf->all > 1) {
+            last_tid = tid0;
+            last_pos = beg0-1;
+            mplp_get_ref(data[0], tid0, &ref, &ref_len);
+        }
+       while (last_tid >= 0 && last_tid < h->n_targets) {
+            while (++last_pos < h->target_len[last_tid]) {
+                if (last_pos >= end0) break;
+                if (conf->bed && bed_overlap(conf->bed, h->target_name[last_tid], last_pos, last_pos + 1) == 0)
+                    continue;
+                print_empty_pileup(pileup_fp, conf, h->target_name[last_tid], last_pos, n, ref, ref_len);
+            }
+            last_tid++;
+            last_pos = -1;
+            if (conf->all < 2 || conf->reg)
+                break;
+        }
+    }
+
     // clean up
     free(bc.tmp.s);
     bcf_destroy1(bcf_rec);
@@ -683,6 +760,13 @@ static int mpileup(mplp_conf_t *conf, int n, char **fn)
     return ret;
 }
 
+static int is_url(const char *s)
+{
+    static const char uri_scheme_chars[] =
+        "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+.-";
+    return s[strspn(s, uri_scheme_chars)] == ':';
+}
+
 #define MAX_PATH_LEN 1024
 int read_file_list(const char *file_list,int *n,char **argv[])
 {
@@ -712,7 +796,7 @@ int read_file_list(const char *file_list,int *n,char **argv[])
 
         // check sanity of the file list
         buf[len] = 0;
-        if (stat(buf, &sb) != 0)
+        if (! (is_url(buf) || stat(buf, &sb) == 0))
         {
             // no such file, check if it is safe to print its name
             int i, safe_to_print = 1;
@@ -816,6 +900,8 @@ static void print_usage(FILE *fp, const mplp_conf_t *mplp)
 "Output options for mpileup format (without -g/-v):\n"
 "  -O, --output-BP         output base positions on reads\n"
 "  -s, --output-MQ         output mapping quality\n"
+"  -a                      output all positions (including zero depth)\n"
+"  -a -a (or -aa)          output absolutely all positions, including unused ref. sequences\n"
 "\n"
 "Output options for genotype likelihoods (when -g/-v is used):\n"
 "  -t, --output-tags LIST  optional tags to output:\n"
@@ -838,7 +924,7 @@ static void print_usage(FILE *fp, const mplp_conf_t *mplp)
     fprintf(fp,
 "  -p, --per-sample-mF     apply -m and -F per-sample for increased sensitivity\n"
 "  -P, --platforms STR     comma separated list of platforms for indels [all]\n");
-    sam_global_opt_help(fp, "-.--.");
+    sam_global_opt_help(fp, "-.--.-");
     fprintf(fp,
 "\n"
 "Notes: Assuming diploid individuals.\n");
@@ -864,11 +950,12 @@ int bam_mpileup(int argc, char *argv[])
     mplp.argc = argc; mplp.argv = argv;
     mplp.rflag_filter = BAM_FUNMAP | BAM_FSECONDARY | BAM_FQCFAIL | BAM_FDUP;
     mplp.output_fname = NULL;
+    mplp.all = 0;
     sam_global_args_init(&mplp.ga);
 
     static const struct option lopts[] =
     {
-        SAM_OPT_GLOBAL_OPTIONS('-', 0, '-', '-', 0),
+        SAM_OPT_GLOBAL_OPTIONS('-', 0, '-', '-', 0, '-'),
         {"rf", required_argument, NULL, 1},   // require flag
         {"ff", required_argument, NULL, 2},   // filter flag
         {"incl-flags", required_argument, NULL, 1},
@@ -918,7 +1005,7 @@ int bam_mpileup(int argc, char *argv[])
         {"platforms", required_argument, NULL, 'P'},
         {NULL, 0, NULL, 0}
     };
-    while ((c = getopt_long(argc, argv, "Agf:r:l:q:Q:uRC:BDSd:L:b:P:po:e:h:Im:F:EG:6OsVvxt:",lopts,NULL)) >= 0) {
+    while ((c = getopt_long(argc, argv, "Agf:r:l:q:Q:uRC:BDSd:L:b:P:po:e:h:Im:F:EG:6OsVvxt:a",lopts,NULL)) >= 0) {
         switch (c) {
         case 'x': mplp.flag &= ~MPLP_SMART_OVERLAPS; break;
         case  1 :
@@ -990,6 +1077,7 @@ int bam_mpileup(int argc, char *argv[])
             }
             break;
         case 't': mplp.fmt_flag |= parse_format_flag(optarg); break;
+        case 'a': mplp.all++; break;
         default:
             if (parse_sam_global_opt(c, optarg, lopts, &mplp.ga) == 0) break;
             /* else fall-through */
diff --git a/samtools/bam_quickcheck.c b/samtools/bam_quickcheck.c
index 6c3c664..02616fe 100644
--- a/samtools/bam_quickcheck.c
+++ b/samtools/bam_quickcheck.c
@@ -26,7 +26,6 @@ DEALINGS IN THE SOFTWARE.  */
 
 #include <htslib/hts.h>
 #include <htslib/sam.h>
-#include <htslib/bgzf.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <unistd.h>
@@ -102,7 +101,7 @@ int main_quickcheck(int argc, char** argv)
         // attempt to open
         htsFile *hts_fp = hts_open(fn, "r");
         if (hts_fp == NULL) {
-            if (verbose >= 2) fprintf(stderr, "%s could not be opened for reading\n", fn);
+            if (verbose >= 2) fprintf(stderr, "%s could not be opened for reading.\n", fn);
             file_state |= 2;
         }
         else {
@@ -110,37 +109,54 @@ int main_quickcheck(int argc, char** argv)
             // make sure we have sequence data
             const htsFormat *fmt = hts_get_format(hts_fp);
             if (fmt->category != sequence_data ) {
-                if (verbose >= 2) fprintf(stderr, "%s was not identified as sequence data\n", fn);
+                if (verbose >= 2) fprintf(stderr, "%s was not identified as sequence data.\n", fn);
                 file_state |= 4;
             }
             else {
                 if (verbose >= 3) fprintf(stderr, "%s is sequence data\n", fn);
                 // check header
                 bam_hdr_t *header = sam_hdr_read(hts_fp);
-                if (header->n_targets <= 0) {
-                    if (verbose >= 2) fprintf(stderr, "%s had no targets in header\n", fn);
+                if (header == NULL) {
+                    if (verbose >= 2) fprintf(stderr, "%s caused an error whilst reading its header.\n", fn);
                     file_state |= 8;
-                }
-                else {
-                    if (verbose >= 3) fprintf(stderr, "%s has %d targets in header\n", fn, header->n_targets);
-                }
-
-                // only check EOF on BAM for now
-                // TODO implement and use hts_check_EOF() to include CRAM support
-                if (fmt->format == bam) {
-                    if (bgzf_check_EOF(hts_fp->fp.bgzf) <= 0) {
-                        if (verbose >= 2) fprintf(stderr, "%s was missing EOF block\n", fn);
-                        file_state |= 16;
+                } else {
+                    if (header->n_targets <= 0) {
+                        if (verbose >= 2) fprintf(stderr, "%s had no targets in header.\n", fn);
+                        file_state |= 8;
                     }
                     else {
-                        if (verbose >= 3) fprintf(stderr, "%s has good EOF block\n", fn);
+                        if (verbose >= 3) fprintf(stderr, "%s has %d targets in header.\n", fn, header->n_targets);
                     }
+                    bam_hdr_destroy(header);
+                }
+            }
+            // check EOF on formats that support this
+            int ret;
+            if ((ret = hts_check_EOF(hts_fp)) < 0) {
+                if (verbose >= 2) fprintf(stderr, "%s caused an error whilst checking for EOF block.\n", fn);
+                file_state |= 16;
+            }
+            else {
+                switch (ret) {
+                    case 0:
+                        if (verbose >= 2) fprintf(stderr, "%s was missing EOF block when one should be present.\n", fn);
+                        file_state |= 16;
+                        break;
+                    case 1:
+                        if (verbose >= 3) fprintf(stderr, "%s has good EOF block.\n", fn);
+                        break;
+                    case 2:
+                        if (verbose >= 3) fprintf(stderr, "%s cannot be checked for EOF block as it is not seekable.\n", fn);
+                        break;
+                    case 3:
+                        if (verbose >= 3) fprintf(stderr, "%s cannot be checked for EOF block because its filetype does not contain one.\n", fn);
+                        break;
                 }
             }
 
             if (hts_close(hts_fp) < 0) {
                 file_state |= 32;
-                if (verbose >= 2) fprintf(stderr, "%s did not close cleanly\n", fn);
+                if (verbose >= 2) fprintf(stderr, "%s did not close cleanly.\n", fn);
             }
         }
 
diff --git a/samtools/bam_quickcheck.c.pysam.c b/samtools/bam_quickcheck.c.pysam.c
index 26dbeb9..c9dc3d2 100644
--- a/samtools/bam_quickcheck.c.pysam.c
+++ b/samtools/bam_quickcheck.c.pysam.c
@@ -28,7 +28,6 @@ DEALINGS IN THE SOFTWARE.  */
 
 #include <htslib/hts.h>
 #include <htslib/sam.h>
-#include <htslib/bgzf.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <unistd.h>
@@ -104,7 +103,7 @@ int main_quickcheck(int argc, char** argv)
         // attempt to open
         htsFile *hts_fp = hts_open(fn, "r");
         if (hts_fp == NULL) {
-            if (verbose >= 2) fprintf(pysam_stderr, "%s could not be opened for reading\n", fn);
+            if (verbose >= 2) fprintf(pysam_stderr, "%s could not be opened for reading.\n", fn);
             file_state |= 2;
         }
         else {
@@ -112,37 +111,54 @@ int main_quickcheck(int argc, char** argv)
             // make sure we have sequence data
             const htsFormat *fmt = hts_get_format(hts_fp);
             if (fmt->category != sequence_data ) {
-                if (verbose >= 2) fprintf(pysam_stderr, "%s was not identified as sequence data\n", fn);
+                if (verbose >= 2) fprintf(pysam_stderr, "%s was not identified as sequence data.\n", fn);
                 file_state |= 4;
             }
             else {
                 if (verbose >= 3) fprintf(pysam_stderr, "%s is sequence data\n", fn);
                 // check header
                 bam_hdr_t *header = sam_hdr_read(hts_fp);
-                if (header->n_targets <= 0) {
-                    if (verbose >= 2) fprintf(pysam_stderr, "%s had no targets in header\n", fn);
+                if (header == NULL) {
+                    if (verbose >= 2) fprintf(pysam_stderr, "%s caused an error whilst reading its header.\n", fn);
                     file_state |= 8;
-                }
-                else {
-                    if (verbose >= 3) fprintf(pysam_stderr, "%s has %d targets in header\n", fn, header->n_targets);
-                }
-
-                // only check EOF on BAM for now
-                // TODO implement and use hts_check_EOF() to include CRAM support
-                if (fmt->format == bam) {
-                    if (bgzf_check_EOF(hts_fp->fp.bgzf) <= 0) {
-                        if (verbose >= 2) fprintf(pysam_stderr, "%s was missing EOF block\n", fn);
-                        file_state |= 16;
+                } else {
+                    if (header->n_targets <= 0) {
+                        if (verbose >= 2) fprintf(pysam_stderr, "%s had no targets in header.\n", fn);
+                        file_state |= 8;
                     }
                     else {
-                        if (verbose >= 3) fprintf(pysam_stderr, "%s has good EOF block\n", fn);
+                        if (verbose >= 3) fprintf(pysam_stderr, "%s has %d targets in header.\n", fn, header->n_targets);
                     }
+                    bam_hdr_destroy(header);
+                }
+            }
+            // check EOF on formats that support this
+            int ret;
+            if ((ret = hts_check_EOF(hts_fp)) < 0) {
+                if (verbose >= 2) fprintf(pysam_stderr, "%s caused an error whilst checking for EOF block.\n", fn);
+                file_state |= 16;
+            }
+            else {
+                switch (ret) {
+                    case 0:
+                        if (verbose >= 2) fprintf(pysam_stderr, "%s was missing EOF block when one should be present.\n", fn);
+                        file_state |= 16;
+                        break;
+                    case 1:
+                        if (verbose >= 3) fprintf(pysam_stderr, "%s has good EOF block.\n", fn);
+                        break;
+                    case 2:
+                        if (verbose >= 3) fprintf(pysam_stderr, "%s cannot be checked for EOF block as it is not seekable.\n", fn);
+                        break;
+                    case 3:
+                        if (verbose >= 3) fprintf(pysam_stderr, "%s cannot be checked for EOF block because its filetype does not contain one.\n", fn);
+                        break;
                 }
             }
 
             if (hts_close(hts_fp) < 0) {
                 file_state |= 32;
-                if (verbose >= 2) fprintf(pysam_stderr, "%s did not close cleanly\n", fn);
+                if (verbose >= 2) fprintf(pysam_stderr, "%s did not close cleanly.\n", fn);
             }
         }
 
diff --git a/samtools/bam_reheader.c b/samtools/bam_reheader.c
index 0469c06..acaebd4 100644
--- a/samtools/bam_reheader.c
+++ b/samtools/bam_reheader.c
@@ -91,7 +91,7 @@ int bam_reheader(BGZF *in, bam_hdr_t *h, int fd,
         goto fail;
     }
     if (in->block_offset < in->block_length) {
-        if (bgzf_write(fp, in->uncompressed_block + in->block_offset, in->block_length - in->block_offset) < 0) goto write_fail;
+        if (bgzf_write(fp, (char *)in->uncompressed_block + in->block_offset, in->block_length - in->block_offset) < 0) goto write_fail;
         if (bgzf_flush(fp) < 0) goto write_fail;
     }
     while ((len = bgzf_raw_read(in, buf, BUF_SIZE)) > 0) {
@@ -246,7 +246,7 @@ int cram_reheader_inplace2(cram_fd *fd, const bam_hdr_t *h, const char *arg_list
     int32_put_blk(b, header_len);
     cram_block_append(b, sam_hdr_str(hdr), header_len);
     // Zero the remaining block
-    memset(cram_block_get_data(b)+cram_block_get_offset(b), 0,
+    memset((char *)cram_block_get_data(b)+cram_block_get_offset(b), 0,
            cram_block_get_uncomp_size(b) - cram_block_get_offset(b));
     // Make sure all sizes and byte-offsets are consistent after memset
     cram_block_set_offset(b, cram_block_get_uncomp_size(b));
diff --git a/samtools/bam_reheader.c.pysam.c b/samtools/bam_reheader.c.pysam.c
index 16990e6..18cb6c4 100644
--- a/samtools/bam_reheader.c.pysam.c
+++ b/samtools/bam_reheader.c.pysam.c
@@ -93,7 +93,7 @@ int bam_reheader(BGZF *in, bam_hdr_t *h, int fd,
         goto fail;
     }
     if (in->block_offset < in->block_length) {
-        if (bgzf_write(fp, in->uncompressed_block + in->block_offset, in->block_length - in->block_offset) < 0) goto write_fail;
+        if (bgzf_write(fp, (char *)in->uncompressed_block + in->block_offset, in->block_length - in->block_offset) < 0) goto write_fail;
         if (bgzf_flush(fp) < 0) goto write_fail;
     }
     while ((len = bgzf_raw_read(in, buf, BUF_SIZE)) > 0) {
@@ -248,7 +248,7 @@ int cram_reheader_inplace2(cram_fd *fd, const bam_hdr_t *h, const char *arg_list
     int32_put_blk(b, header_len);
     cram_block_append(b, sam_hdr_str(hdr), header_len);
     // Zero the remaining block
-    memset(cram_block_get_data(b)+cram_block_get_offset(b), 0,
+    memset((char *)cram_block_get_data(b)+cram_block_get_offset(b), 0,
            cram_block_get_uncomp_size(b) - cram_block_get_offset(b));
     // Make sure all sizes and byte-offsets are consistent after memset
     cram_block_set_offset(b, cram_block_get_uncomp_size(b));
@@ -436,7 +436,7 @@ int cram_reheader_inplace(cram_fd *fd, const bam_hdr_t *h, const char *arg_list,
     }
 }
 
-static void usage(FILE *fp, int ret) {
+static int usage(FILE *fp, int ret) {
     fprintf(fp,
            "Usage: samtools reheader [-P] in.header.sam in.bam > out.bam\n"
            "   or  samtools reheader [-P] -i in.header.sam file.bam\n"
@@ -445,7 +445,7 @@ static void usage(FILE *fp, int ret) {
            "    -P, --no-PG      Do not generate an @PG header line.\n"
            "    -i, --in-place   Modify the bam/cram file directly.\n"
            "                     (Defaults to outputting to pysam_stdout.)\n");
-    exit(ret);
+    return(ret);
 }
 
 int main_reheader(int argc, char *argv[])
@@ -466,15 +466,15 @@ int main_reheader(int argc, char *argv[])
         switch (c) {
         case 'P': add_PG = 0; break;
         case 'i': inplace = 1; break;
-        case 'h': usage(pysam_stdout, 0); break;
+        case 'h': return(usage(pysam_stdout, 0)); break;
         default:
             fprintf(pysam_stderr, "Invalid option '%c'\n", c);
-            usage(pysam_stderr, 1);
+            return(usage(pysam_stderr, 1));
         }
     }
 
     if (argc - optind != 2)
-        usage(pysam_stderr, 1);
+      return(usage(pysam_stderr, 1));
 
     { // read the header
         samFile *fph = sam_open(argv[optind], "r");
diff --git a/samtools/bam_rmdup.c b/samtools/bam_rmdup.c
index 57612b4..513848d 100644
--- a/samtools/bam_rmdup.c
+++ b/samtools/bam_rmdup.c
@@ -258,7 +258,7 @@ static int rmdup_usage(void) {
     fprintf(stderr, "Option: -s    rmdup for SE reads\n");
     fprintf(stderr, "        -S    treat PE reads as SE in rmdup (force -s)\n");
 
-    sam_global_opt_help(stderr, "-....");
+    sam_global_opt_help(stderr, "-....-");
     return 1;
 }
 
@@ -271,7 +271,7 @@ int bam_rmdup(int argc, char *argv[])
     sam_global_args ga = SAM_GLOBAL_ARGS_INIT;
 
     static const struct option lopts[] = {
-        SAM_OPT_GLOBAL_OPTIONS('-', 0, 0, 0, 0),
+        SAM_OPT_GLOBAL_OPTIONS('-', 0, 0, 0, 0, '-'),
         { NULL, 0, NULL, 0 }
     };
 
diff --git a/samtools/bam_rmdup.c.pysam.c b/samtools/bam_rmdup.c.pysam.c
index 3c16025..6742fc8 100644
--- a/samtools/bam_rmdup.c.pysam.c
+++ b/samtools/bam_rmdup.c.pysam.c
@@ -260,7 +260,7 @@ static int rmdup_usage(void) {
     fprintf(pysam_stderr, "Option: -s    rmdup for SE reads\n");
     fprintf(pysam_stderr, "        -S    treat PE reads as SE in rmdup (force -s)\n");
 
-    sam_global_opt_help(pysam_stderr, "-....");
+    sam_global_opt_help(pysam_stderr, "-....-");
     return 1;
 }
 
@@ -273,7 +273,7 @@ int bam_rmdup(int argc, char *argv[])
     sam_global_args ga = SAM_GLOBAL_ARGS_INIT;
 
     static const struct option lopts[] = {
-        SAM_OPT_GLOBAL_OPTIONS('-', 0, 0, 0, 0),
+        SAM_OPT_GLOBAL_OPTIONS('-', 0, 0, 0, 0, '-'),
         { NULL, 0, NULL, 0 }
     };
 
diff --git a/samtools/bam_sort.c b/samtools/bam_sort.c
index 4955dcc..be9789c 100644
--- a/samtools/bam_sort.c
+++ b/samtools/bam_sort.c
@@ -43,6 +43,17 @@ DEALINGS IN THE SOFTWARE.  */
 #include "htslib/kstring.h"
 #include "htslib/sam.h"
 #include "sam_opts.h"
+#include "samtools.h"
+
+/* Minimum memory required in megabytes before sort will attempt to run. This
+   is to prevent accidents where failing to use the -m option correctly results
+   in the creation of a temporary file for each read in the input file.
+   Don't forget to update the man page if you change this. */
+const size_t SORT_MIN_MEGS_PER_THREAD = 1;
+
+/* Default per-thread memory for sort. Must be >= SORT_MIN_MEGS_PER_THREAD.
+   Don't forget to update the man page if you change this. */
+const size_t SORT_DEFAULT_MEGS_PER_THREAD = 768;
 
 #if !defined(__DARWIN_C_LEVEL) || __DARWIN_C_LEVEL < 900000L
 #define NEED_MEMSET_PATTERN4
@@ -1098,6 +1109,7 @@ int* rtrans_build(int n, int n_targets, trans_tbl_t* translation_tbl)
   @param  flag        flags that control how the merge is undertaken
   @param  reg         region to merge
   @param  n_threads   number of threads to use (passed to htslib)
+  @param  cmd         command name (used in print_error() etc)
   @param  in_fmt      format options for input files
   @param  out_fmt     output file format and options
   @discussion Padding information may NOT correctly maintained. This
@@ -1105,7 +1117,7 @@ int* rtrans_build(int n, int n_targets, trans_tbl_t* translation_tbl)
  */
 int bam_merge_core2(int by_qname, const char *out, const char *mode,
                     const char *headers, int n, char * const *fn, int flag,
-                    const char *reg, int n_threads,
+                    const char *reg, int n_threads, const char *cmd,
                     const htsFormat *in_fmt, const htsFormat *out_fmt)
 {
     samFile *fpout, **fp = NULL;
@@ -1126,25 +1138,15 @@ int bam_merge_core2(int by_qname, const char *out, const char *mode,
     if (headers) {
         samFile* fpheaders = sam_open(headers, "r");
         if (fpheaders == NULL) {
-            const char *message = strerror(errno);
-            fprintf(stderr, "[bam_merge_core] cannot open '%s': %s\n", headers, message);
+            print_error_errno(cmd, "cannot open \"%s\"", headers);
             return -1;
         }
         hin = sam_hdr_read(fpheaders);
         sam_close(fpheaders);
         if (hin == NULL) {
-            fprintf(stderr, "[bam_merge_core] couldn't read headers for '%s'\n",
-                    headers);
-            goto mem_fail;
-        }
-    } else  {
-        hout = bam_hdr_init();
-        if (!hout) {
-            fprintf(stderr, "[bam_merge_core] couldn't allocate bam header\n");
+            print_error(cmd, "couldn't read headers from \"%s\"", headers);
             goto mem_fail;
         }
-        hout->text = strdup("");
-        if (!hout->text) goto mem_fail;
     }
 
     g_is_by_qname = by_qname;
@@ -1194,13 +1196,12 @@ int bam_merge_core2(int by_qname, const char *out, const char *mode,
         bam_hdr_t *hin;
         fp[i] = sam_open_format(fn[i], "r", in_fmt);
         if (fp[i] == NULL) {
-            fprintf(stderr, "[bam_merge_core] fail to open file %s\n", fn[i]);
+            print_error_errno(cmd, "fail to open \"%s\"", fn[i]);
             goto fail;
         }
         hin = sam_hdr_read(fp[i]);
         if (hin == NULL) {
-            fprintf(stderr, "[bam_merge_core] failed to read header for '%s'\n",
-                    fn[i]);
+            print_error(cmd, "failed to read header from \"%s\"", fn[i]);
             goto fail;
         }
 
@@ -1218,6 +1219,16 @@ int bam_merge_core2(int by_qname, const char *out, const char *mode,
         if ((translation_tbl+i)->lost_coord_sort && !by_qname) {
             fprintf(stderr, "[bam_merge_core] Order of targets in file %s caused coordinate sort to be lost\n", fn[i]);
         }
+
+        // Potential future improvement is to share headers between CRAM files for
+        // samtools sort (where all headers are identical.
+        // Eg:
+        //
+        // if (i > 1) {
+        //     sam_hdr_free(cram_fd_get_header(fp[i]->fp.cram));
+        //     cram_fd_set_header(fp[i]->fp.cram, cram_fd_get_header(fp[0]->fp.cram));
+        //     sam_hdr_incr_ref(cram_fd_get_header(fp[0]->fp.cram));
+        // }
     }
 
     // Did we get an @HD line?
@@ -1326,19 +1337,18 @@ int bam_merge_core2(int by_qname, const char *out, const char *mode,
             bam_destroy1(h->b);
             h->b = NULL;
         } else {
-            fprintf(stderr, "[%s] failed to read first record from %s\n",
-                    __func__, fn[i]);
+            print_error(cmd, "failed to read first record from \"%s\"", fn[i]);
             goto fail;
         }
     }
 
     // Open output file and write header
     if ((fpout = sam_open_format(out, mode, out_fmt)) == 0) {
-        fprintf(stderr, "[%s] failed to create \"%s\": %s\n", __func__, out, strerror(errno));
+        print_error_errno(cmd, "failed to create \"%s\"", out);
         return -1;
     }
     if (sam_hdr_write(fpout, hout) != 0) {
-        fprintf(stderr, "[%s] failed to write header.\n", __func__);
+        print_error_errno(cmd, "failed to write header to \"%s\"", out);
         sam_close(fpout);
         return -1;
     }
@@ -1354,7 +1364,7 @@ int bam_merge_core2(int by_qname, const char *out, const char *mode,
             bam_aux_append(b, "RG", 'Z', RG_len[heap->i] + 1, (uint8_t*)RG[heap->i]);
         }
         if (sam_write1(fpout, hout, b) < 0) {
-            fprintf(stderr, "[%s] failed to write to output file.\n", __func__);
+            print_error_errno(cmd, "failed writing to \"%s\"", out);
             sam_close(fpout);
             return -1;
         }
@@ -1367,8 +1377,7 @@ int bam_merge_core2(int by_qname, const char *out, const char *mode,
             bam_destroy1(heap->b);
             heap->b = NULL;
         } else {
-            fprintf(stderr, "[bam_merge_core] error: '%s' is truncated.\n",
-                    fn[heap->i]);
+            print_error(cmd, "\"%s\" is truncated", fn[heap->i]);
             goto fail;
         }
         ks_heapadjust(heap, 0, n, heap);
@@ -1390,13 +1399,13 @@ int bam_merge_core2(int by_qname, const char *out, const char *mode,
     free_merged_header(merged_hdr);
     free(RG); free(translation_tbl); free(fp); free(heap); free(iter); free(hdr);
     if (sam_close(fpout) < 0) {
-        fprintf(stderr, "[bam_merge_core] error closing output file\n");
+        print_error(cmd, "error closing output file");
         return -1;
     }
     return 0;
 
  mem_fail:
-    fprintf(stderr, "[bam_merge_core] Out of memory\n");
+    print_error(cmd, "Out of memory");
 
  fail:
     if (flag & MERGE_RG) {
@@ -1430,7 +1439,7 @@ int bam_merge_core(int by_qname, const char *out, const char *headers, int n, ch
     strcpy(mode, "wb");
     if (flag & MERGE_UNCOMP) strcat(mode, "0");
     else if (flag & MERGE_LEVEL1) strcat(mode, "1");
-    return bam_merge_core2(by_qname, out, mode, headers, n, fn, flag, reg, 0, NULL, NULL);
+    return bam_merge_core2(by_qname, out, mode, headers, n, fn, flag, reg, 0, "merge", NULL, NULL);
 }
 
 static void merge_usage(FILE *to)
@@ -1450,15 +1459,13 @@ static void merge_usage(FILE *to)
 "  -c         Combine @RG headers with colliding IDs [alter IDs to be distinct]\n"
 "  -p         Combine @PG headers with colliding IDs [alter IDs to be distinct]\n"
 "  -s VALUE   Override random seed\n"
-"  -b FILE    List of input BAM filenames, one per line [null]\n"
-"  -@, --threads INT\n"
-"             Number of BAM/CRAM compression threads [0]\n");
-    sam_global_opt_help(to, "-.O..");
+"  -b FILE    List of input BAM filenames, one per line [null]\n");
+    sam_global_opt_help(to, "-.O..@");
 }
 
 int bam_merge(int argc, char *argv[])
 {
-    int c, is_by_qname = 0, flag = 0, ret = 0, n_threads = 0, level = -1;
+    int c, is_by_qname = 0, flag = 0, ret = 0, level = -1;
     char *fn_headers = NULL, *reg = NULL, mode[12];
     long random_seed = (long)time(NULL);
     char** fn = NULL;
@@ -1466,7 +1473,7 @@ int bam_merge(int argc, char *argv[])
 
     sam_global_args ga = SAM_GLOBAL_ARGS_INIT;
     static const struct option lopts[] = {
-        SAM_OPT_GLOBAL_OPTIONS('-', 0, 'O', 0, 0),
+        SAM_OPT_GLOBAL_OPTIONS('-', 0, 'O', 0, 0, '@'),
         { "threads", required_argument, NULL, '@' },
         { NULL, 0, NULL, 0 }
     };
@@ -1486,7 +1493,6 @@ int bam_merge(int argc, char *argv[])
         case 'u': flag |= MERGE_UNCOMP; level = 0; break;
         case 'R': reg = strdup(optarg); break;
         case 'l': level = atoi(optarg); break;
-        case '@': n_threads = atoi(optarg); break;
         case 'c': flag |= MERGE_COMBINE_RG; break;
         case 'p': flag |= MERGE_COMBINE_PG; break;
         case 's': random_seed = atol(optarg); break;
@@ -1500,9 +1506,10 @@ int bam_merge(int argc, char *argv[])
                 if (fn == NULL) { ret = 1; goto end; }
                 memcpy(fn+fn_size, fn_read, nfiles * sizeof(char*));
                 fn_size += nfiles;
+                free(fn_read);
             }
             else {
-                fprintf(stderr, "[%s] Invalid file list \"%s\"\n", __func__, optarg);
+                print_error("merge", "Invalid file list \"%s\"", optarg);
                 ret = 1;
             }
             break;
@@ -1514,7 +1521,7 @@ int bam_merge(int argc, char *argv[])
         }
     }
     if ( argc - optind < 1 ) {
-        fprintf(stderr, "You must at least specify the output file.\n");
+        print_error("merge", "You must at least specify the output file");
         merge_usage(stderr);
         return 1;
     }
@@ -1537,7 +1544,7 @@ int bam_merge(int argc, char *argv[])
         memcpy(fn+fn_size, argv + (optind+1), nargcfiles * sizeof(char*));
     }
     if (fn_size+nargcfiles < 1) {
-        fprintf(stderr, "You must specify at least one (and usually two or more) input files.\n");
+        print_error("merge", "You must specify at least one (and usually two or more) input files");
         merge_usage(stderr);
         return 1;
     }
@@ -1545,8 +1552,8 @@ int bam_merge(int argc, char *argv[])
     sam_open_mode(mode+1, argv[optind], NULL);
     if (level >= 0) sprintf(strchr(mode, '\0'), "%d", level < 9? level : 9);
     if (bam_merge_core2(is_by_qname, argv[optind], mode, fn_headers,
-                        fn_size+nargcfiles, fn, flag, reg, n_threads,
-                        &ga.in, &ga.out) < 0)
+                        fn_size+nargcfiles, fn, flag, reg, ga.nthreads,
+                        "merge", &ga.in, &ga.out) < 0)
         ret = 1;
 
 end:
@@ -1651,18 +1658,30 @@ static void *worker(void *data)
     name = (char*)calloc(strlen(w->prefix) + 20, 1);
     if (!name) { w->error = errno; return 0; }
     sprintf(name, "%s.%.4d.bam", w->prefix, w->index);
-    if (write_buffer(name, "wbx1", w->buf_len, w->buf, w->h, 0, NULL) < 0)
-        w->error = errno;
-
-// Consider using CRAM temporary files if the final output is CRAM.
-// Typically it is comparable speed while being smaller.
-//    hts_opt opt[2] = {
-//        {"version=3.0", CRAM_OPT_VERSION, {"3.0"}, NULL},
-//        {"no_ref",      CRAM_OPT_NO_REF,  {1},     NULL}
-//    };
-//    opt[0].next = &opt[1];
-//    if (write_buffer(name, "wc1", w->buf_len, w->buf, w->h, 0, opt) < 0)
-//        w->error = errno;
+
+    uint32_t max_ncigar = 0;
+    int i;
+    for (i = 0; i < w->buf_len; i++) {
+        uint32_t nc = w->buf[i]->core.n_cigar;
+        if (max_ncigar < nc)
+            max_ncigar = nc;
+    }
+
+    if (max_ncigar > 65535) {
+        htsFormat fmt;
+        memset(&fmt, 0, sizeof(fmt));
+        if (hts_parse_format(&fmt, "cram,version=3.0,no_ref,seqs_per_slice=1000") < 0) {
+            w->error = errno;
+            free(name);
+            return 0;
+        }
+
+        if (write_buffer(name, "wcx1", w->buf_len, w->buf, w->h, 0, &fmt) < 0)
+            w->error = errno;
+    } else {
+        if (write_buffer(name, "wbx1", w->buf_len, w->buf, w->h, 0, NULL) < 0)
+            w->error = errno;
+    }
 
     free(name);
     return 0;
@@ -1697,7 +1716,8 @@ static int sort_blocks(int n_files, size_t k, bam1_p *buf, const char *prefix, c
     for (i = 0; i < n_threads; ++i) {
         pthread_join(tid[i], 0);
         if (w[i].error != 0) {
-            fprintf(stderr, "[bam_sort_core] failed to create temporary file \"%s.%.4d.bam\": %s\n", prefix, w[i].index, strerror(w[i].error));
+            errno = w[i].error;
+            print_error_errno("sort", "failed to create temporary file \"%s.%.4d.bam\"", prefix, w[i].index);
             n_failed++;
         }
     }
@@ -1741,17 +1761,23 @@ int bam_sort_core_ext(int is_by_qname, const char *fn, const char *prefix,
     buf = NULL;
     fp = sam_open_format(fn, "r", in_fmt);
     if (fp == NULL) {
-        const char *message = strerror(errno);
-        fprintf(stderr, "[bam_sort_core] fail to open '%s': %s\n", fn, message);
+        print_error_errno("sort", "can't open \"%s\"", fn);
         return -2;
     }
     header = sam_hdr_read(fp);
     if (header == NULL) {
-        fprintf(stderr, "[bam_sort_core] failed to read header for '%s'\n", fn);
+        print_error("sort", "failed to read header from \"%s\"", fn);
         goto err;
     }
     if (is_by_qname) change_SO(header, "queryname");
     else change_SO(header, "coordinate");
+
+    // No gain to using the thread pool here as the flow of this code
+    // is such that we are *either* reading *or* sorting.  Hence a shared
+    // pool makes no real difference except to reduce the thread count a little.
+    if (n_threads > 1)
+        hts_set_threads(fp, n_threads);
+
     // write sub files
     for (;;) {
         if (k == max_k) {
@@ -1780,7 +1806,7 @@ int bam_sort_core_ext(int is_by_qname, const char *fn, const char *prefix,
         }
     }
     if (ret != -1) {
-        fprintf(stderr, "[bam_sort_core] truncated file. Aborting.\n");
+        print_error("sort", "truncated file. Aborting");
         ret = -1;
         goto err;
     }
@@ -1789,7 +1815,7 @@ int bam_sort_core_ext(int is_by_qname, const char *fn, const char *prefix,
     if (n_files == 0) { // a single block
         ks_mergesort(sort, k, buf, 0);
         if (write_buffer(fnout, modeout, k, buf, header, n_threads, out_fmt) != 0) {
-            fprintf(stderr, "[bam_sort_core] failed to create \"%s\": %s\n", fnout, strerror(errno));
+            print_error_errno("sort", "failed to create \"%s\"", fnout);
             ret = -1;
             goto err;
         }
@@ -1808,7 +1834,7 @@ int bam_sort_core_ext(int is_by_qname, const char *fn, const char *prefix,
         }
         if (bam_merge_core2(is_by_qname, fnout, modeout, NULL, n_files, fns,
                             MERGE_COMBINE_RG|MERGE_COMBINE_PG|MERGE_FIRST_CO,
-                            NULL, n_threads, in_fmt, out_fmt) < 0) {
+                            NULL, n_threads, "sort", in_fmt, out_fmt) < 0) {
             // Propagate bam_merge_core2() failure; it has already emitted a
             // message explaining the failure, so no further message is needed.
             goto err;
@@ -1851,23 +1877,38 @@ static void sort_usage(FILE *fp)
 "  -m INT     Set maximum memory per thread; suffix K/M/G recognized [768M]\n"
 "  -n         Sort by read name\n"
 "  -o FILE    Write final output to FILE rather than standard output\n"
-"  -T PREFIX  Write temporary files to PREFIX.nnnn.bam\n"
-"  -@, --threads INT\n"
-"             Set number of sorting and compression threads [1]\n");
-    sam_global_opt_help(fp, "-.O..");
+"  -T PREFIX  Write temporary files to PREFIX.nnnn.bam\n");
+    sam_global_opt_help(fp, "-.O..@");
+}
+
+static void complain_about_memory_setting(size_t max_mem) {
+    char  *suffix = "";
+    const size_t nine_k = 9<<10;
+    if (max_mem > nine_k) { max_mem >>= 10; suffix = "K"; }
+    if (max_mem > nine_k) { max_mem >>= 10; suffix = "M"; }
+
+    fprintf(stderr,
+"[bam_sort] -m setting (%zu%s bytes) is less than the minimum required (%zuM).\n\n"
+"Trying to run with -m too small can lead to the creation of a very large number\n"
+"of temporary files.  This may make sort fail due to it exceeding limits on the\n"
+"number of files it can have open at the same time.\n\n"
+"Please check your -m parameter.  It should be an integer followed by one of the\n"
+"letters K (for kilobytes), M (megabytes) or G (gigabytes).  You should ensure it\n"
+"is at least the minimum above, and much higher if you are sorting a large file.\n",
+            max_mem, suffix, SORT_MIN_MEGS_PER_THREAD);
 }
 
 int bam_sort(int argc, char *argv[])
 {
-    size_t max_mem = 768<<20; // 512MB
-    int c, nargs, is_by_qname = 0, ret, o_seen = 0, n_threads = 0, level = -1;
+    size_t max_mem = SORT_DEFAULT_MEGS_PER_THREAD << 20;
+    int c, nargs, is_by_qname = 0, ret, o_seen = 0, level = -1;
     char *fnout = "-", modeout[12];
     kstring_t tmpprefix = { 0, 0, NULL };
     struct stat st;
     sam_global_args ga = SAM_GLOBAL_ARGS_INIT;
 
     static const struct option lopts[] = {
-        SAM_OPT_GLOBAL_OPTIONS('-', 0, 'O', 0, 0),
+        SAM_OPT_GLOBAL_OPTIONS('-', 0, 'O', 0, 0, '@'),
         { "threads", required_argument, NULL, '@' },
         { NULL, 0, NULL, 0 }
     };
@@ -1885,7 +1926,6 @@ int bam_sort(int argc, char *argv[])
                 break;
             }
         case 'T': kputs(optarg, &tmpprefix); break;
-        case '@': n_threads = atoi(optarg); break;
         case 'l': level = atoi(optarg); break;
 
         default:  if (parse_sam_global_opt(c, optarg, lopts, &ga) == 0) break;
@@ -1910,6 +1950,12 @@ int bam_sort(int argc, char *argv[])
         goto sort_end;
     }
 
+    if (max_mem < (SORT_MIN_MEGS_PER_THREAD << 20)) {
+        complain_about_memory_setting(max_mem);
+        ret = EXIT_FAILURE;
+        goto sort_end;
+    }
+
     strcpy(modeout, "wb");
     sam_open_mode(modeout+1, fnout, NULL);
     if (level >= 0) sprintf(strchr(modeout, '\0'), "%d", level < 9? level : 9);
@@ -1925,7 +1971,7 @@ int bam_sort(int argc, char *argv[])
     }
 
     ret = bam_sort_core_ext(is_by_qname, (nargs > 0)? argv[optind] : "-",
-                            tmpprefix.s, fnout, modeout, max_mem, n_threads,
+                            tmpprefix.s, fnout, modeout, max_mem, ga.nthreads,
                             &ga.in, &ga.out);
     if (ret >= 0)
         ret = EXIT_SUCCESS;
diff --git a/samtools/bam_sort.c.pysam.c b/samtools/bam_sort.c.pysam.c
index b2b625d..ea2a30d 100644
--- a/samtools/bam_sort.c.pysam.c
+++ b/samtools/bam_sort.c.pysam.c
@@ -45,6 +45,17 @@ DEALINGS IN THE SOFTWARE.  */
 #include "htslib/kstring.h"
 #include "htslib/sam.h"
 #include "sam_opts.h"
+#include "samtools.h"
+
+/* Minimum memory required in megabytes before sort will attempt to run. This
+   is to prevent accidents where failing to use the -m option correctly results
+   in the creation of a temporary file for each read in the input file.
+   Don't forget to update the man page if you change this. */
+const size_t SORT_MIN_MEGS_PER_THREAD = 1;
+
+/* Default per-thread memory for sort. Must be >= SORT_MIN_MEGS_PER_THREAD.
+   Don't forget to update the man page if you change this. */
+const size_t SORT_DEFAULT_MEGS_PER_THREAD = 768;
 
 #if !defined(__DARWIN_C_LEVEL) || __DARWIN_C_LEVEL < 900000L
 #define NEED_MEMSET_PATTERN4
@@ -1100,6 +1111,7 @@ int* rtrans_build(int n, int n_targets, trans_tbl_t* translation_tbl)
   @param  flag        flags that control how the merge is undertaken
   @param  reg         region to merge
   @param  n_threads   number of threads to use (passed to htslib)
+  @param  cmd         command name (used in print_error() etc)
   @param  in_fmt      format options for input files
   @param  out_fmt     output file format and options
   @discussion Padding information may NOT correctly maintained. This
@@ -1107,7 +1119,7 @@ int* rtrans_build(int n, int n_targets, trans_tbl_t* translation_tbl)
  */
 int bam_merge_core2(int by_qname, const char *out, const char *mode,
                     const char *headers, int n, char * const *fn, int flag,
-                    const char *reg, int n_threads,
+                    const char *reg, int n_threads, const char *cmd,
                     const htsFormat *in_fmt, const htsFormat *out_fmt)
 {
     samFile *fpout, **fp = NULL;
@@ -1128,25 +1140,15 @@ int bam_merge_core2(int by_qname, const char *out, const char *mode,
     if (headers) {
         samFile* fpheaders = sam_open(headers, "r");
         if (fpheaders == NULL) {
-            const char *message = strerror(errno);
-            fprintf(pysam_stderr, "[bam_merge_core] cannot open '%s': %s\n", headers, message);
+            print_error_errno(cmd, "cannot open \"%s\"", headers);
             return -1;
         }
         hin = sam_hdr_read(fpheaders);
         sam_close(fpheaders);
         if (hin == NULL) {
-            fprintf(pysam_stderr, "[bam_merge_core] couldn't read headers for '%s'\n",
-                    headers);
-            goto mem_fail;
-        }
-    } else  {
-        hout = bam_hdr_init();
-        if (!hout) {
-            fprintf(pysam_stderr, "[bam_merge_core] couldn't allocate bam header\n");
+            print_error(cmd, "couldn't read headers from \"%s\"", headers);
             goto mem_fail;
         }
-        hout->text = strdup("");
-        if (!hout->text) goto mem_fail;
     }
 
     g_is_by_qname = by_qname;
@@ -1196,13 +1198,12 @@ int bam_merge_core2(int by_qname, const char *out, const char *mode,
         bam_hdr_t *hin;
         fp[i] = sam_open_format(fn[i], "r", in_fmt);
         if (fp[i] == NULL) {
-            fprintf(pysam_stderr, "[bam_merge_core] fail to open file %s\n", fn[i]);
+            print_error_errno(cmd, "fail to open \"%s\"", fn[i]);
             goto fail;
         }
         hin = sam_hdr_read(fp[i]);
         if (hin == NULL) {
-            fprintf(pysam_stderr, "[bam_merge_core] failed to read header for '%s'\n",
-                    fn[i]);
+            print_error(cmd, "failed to read header from \"%s\"", fn[i]);
             goto fail;
         }
 
@@ -1220,6 +1221,16 @@ int bam_merge_core2(int by_qname, const char *out, const char *mode,
         if ((translation_tbl+i)->lost_coord_sort && !by_qname) {
             fprintf(pysam_stderr, "[bam_merge_core] Order of targets in file %s caused coordinate sort to be lost\n", fn[i]);
         }
+
+        // Potential future improvement is to share headers between CRAM files for
+        // samtools sort (where all headers are identical.
+        // Eg:
+        //
+        // if (i > 1) {
+        //     sam_hdr_free(cram_fd_get_header(fp[i]->fp.cram));
+        //     cram_fd_set_header(fp[i]->fp.cram, cram_fd_get_header(fp[0]->fp.cram));
+        //     sam_hdr_incr_ref(cram_fd_get_header(fp[0]->fp.cram));
+        // }
     }
 
     // Did we get an @HD line?
@@ -1328,19 +1339,18 @@ int bam_merge_core2(int by_qname, const char *out, const char *mode,
             bam_destroy1(h->b);
             h->b = NULL;
         } else {
-            fprintf(pysam_stderr, "[%s] failed to read first record from %s\n",
-                    __func__, fn[i]);
+            print_error(cmd, "failed to read first record from \"%s\"", fn[i]);
             goto fail;
         }
     }
 
     // Open output file and write header
     if ((fpout = sam_open_format(out, mode, out_fmt)) == 0) {
-        fprintf(pysam_stderr, "[%s] failed to create \"%s\": %s\n", __func__, out, strerror(errno));
+        print_error_errno(cmd, "failed to create \"%s\"", out);
         return -1;
     }
     if (sam_hdr_write(fpout, hout) != 0) {
-        fprintf(pysam_stderr, "[%s] failed to write header.\n", __func__);
+        print_error_errno(cmd, "failed to write header to \"%s\"", out);
         sam_close(fpout);
         return -1;
     }
@@ -1356,7 +1366,7 @@ int bam_merge_core2(int by_qname, const char *out, const char *mode,
             bam_aux_append(b, "RG", 'Z', RG_len[heap->i] + 1, (uint8_t*)RG[heap->i]);
         }
         if (sam_write1(fpout, hout, b) < 0) {
-            fprintf(pysam_stderr, "[%s] failed to write to output file.\n", __func__);
+            print_error_errno(cmd, "failed writing to \"%s\"", out);
             sam_close(fpout);
             return -1;
         }
@@ -1369,8 +1379,7 @@ int bam_merge_core2(int by_qname, const char *out, const char *mode,
             bam_destroy1(heap->b);
             heap->b = NULL;
         } else {
-            fprintf(pysam_stderr, "[bam_merge_core] error: '%s' is truncated.\n",
-                    fn[heap->i]);
+            print_error(cmd, "\"%s\" is truncated", fn[heap->i]);
             goto fail;
         }
         ks_heapadjust(heap, 0, n, heap);
@@ -1392,13 +1401,13 @@ int bam_merge_core2(int by_qname, const char *out, const char *mode,
     free_merged_header(merged_hdr);
     free(RG); free(translation_tbl); free(fp); free(heap); free(iter); free(hdr);
     if (sam_close(fpout) < 0) {
-        fprintf(pysam_stderr, "[bam_merge_core] error closing output file\n");
+        print_error(cmd, "error closing output file");
         return -1;
     }
     return 0;
 
  mem_fail:
-    fprintf(pysam_stderr, "[bam_merge_core] Out of memory\n");
+    print_error(cmd, "Out of memory");
 
  fail:
     if (flag & MERGE_RG) {
@@ -1432,7 +1441,7 @@ int bam_merge_core(int by_qname, const char *out, const char *headers, int n, ch
     strcpy(mode, "wb");
     if (flag & MERGE_UNCOMP) strcat(mode, "0");
     else if (flag & MERGE_LEVEL1) strcat(mode, "1");
-    return bam_merge_core2(by_qname, out, mode, headers, n, fn, flag, reg, 0, NULL, NULL);
+    return bam_merge_core2(by_qname, out, mode, headers, n, fn, flag, reg, 0, "merge", NULL, NULL);
 }
 
 static void merge_usage(FILE *to)
@@ -1452,15 +1461,13 @@ static void merge_usage(FILE *to)
 "  -c         Combine @RG headers with colliding IDs [alter IDs to be distinct]\n"
 "  -p         Combine @PG headers with colliding IDs [alter IDs to be distinct]\n"
 "  -s VALUE   Override random seed\n"
-"  -b FILE    List of input BAM filenames, one per line [null]\n"
-"  -@, --threads INT\n"
-"             Number of BAM/CRAM compression threads [0]\n");
-    sam_global_opt_help(to, "-.O..");
+"  -b FILE    List of input BAM filenames, one per line [null]\n");
+    sam_global_opt_help(to, "-.O..@");
 }
 
 int bam_merge(int argc, char *argv[])
 {
-    int c, is_by_qname = 0, flag = 0, ret = 0, n_threads = 0, level = -1;
+    int c, is_by_qname = 0, flag = 0, ret = 0, level = -1;
     char *fn_headers = NULL, *reg = NULL, mode[12];
     long random_seed = (long)time(NULL);
     char** fn = NULL;
@@ -1468,7 +1475,7 @@ int bam_merge(int argc, char *argv[])
 
     sam_global_args ga = SAM_GLOBAL_ARGS_INIT;
     static const struct option lopts[] = {
-        SAM_OPT_GLOBAL_OPTIONS('-', 0, 'O', 0, 0),
+        SAM_OPT_GLOBAL_OPTIONS('-', 0, 'O', 0, 0, '@'),
         { "threads", required_argument, NULL, '@' },
         { NULL, 0, NULL, 0 }
     };
@@ -1488,7 +1495,6 @@ int bam_merge(int argc, char *argv[])
         case 'u': flag |= MERGE_UNCOMP; level = 0; break;
         case 'R': reg = strdup(optarg); break;
         case 'l': level = atoi(optarg); break;
-        case '@': n_threads = atoi(optarg); break;
         case 'c': flag |= MERGE_COMBINE_RG; break;
         case 'p': flag |= MERGE_COMBINE_PG; break;
         case 's': random_seed = atol(optarg); break;
@@ -1502,9 +1508,10 @@ int bam_merge(int argc, char *argv[])
                 if (fn == NULL) { ret = 1; goto end; }
                 memcpy(fn+fn_size, fn_read, nfiles * sizeof(char*));
                 fn_size += nfiles;
+                free(fn_read);
             }
             else {
-                fprintf(pysam_stderr, "[%s] Invalid file list \"%s\"\n", __func__, optarg);
+                print_error("merge", "Invalid file list \"%s\"", optarg);
                 ret = 1;
             }
             break;
@@ -1516,7 +1523,7 @@ int bam_merge(int argc, char *argv[])
         }
     }
     if ( argc - optind < 1 ) {
-        fprintf(pysam_stderr, "You must at least specify the output file.\n");
+        print_error("merge", "You must at least specify the output file");
         merge_usage(pysam_stderr);
         return 1;
     }
@@ -1539,7 +1546,7 @@ int bam_merge(int argc, char *argv[])
         memcpy(fn+fn_size, argv + (optind+1), nargcfiles * sizeof(char*));
     }
     if (fn_size+nargcfiles < 1) {
-        fprintf(pysam_stderr, "You must specify at least one (and usually two or more) input files.\n");
+        print_error("merge", "You must specify at least one (and usually two or more) input files");
         merge_usage(pysam_stderr);
         return 1;
     }
@@ -1547,8 +1554,8 @@ int bam_merge(int argc, char *argv[])
     sam_open_mode(mode+1, argv[optind], NULL);
     if (level >= 0) sprintf(strchr(mode, '\0'), "%d", level < 9? level : 9);
     if (bam_merge_core2(is_by_qname, argv[optind], mode, fn_headers,
-                        fn_size+nargcfiles, fn, flag, reg, n_threads,
-                        &ga.in, &ga.out) < 0)
+                        fn_size+nargcfiles, fn, flag, reg, ga.nthreads,
+                        "merge", &ga.in, &ga.out) < 0)
         ret = 1;
 
 end:
@@ -1653,18 +1660,30 @@ static void *worker(void *data)
     name = (char*)calloc(strlen(w->prefix) + 20, 1);
     if (!name) { w->error = errno; return 0; }
     sprintf(name, "%s.%.4d.bam", w->prefix, w->index);
-    if (write_buffer(name, "wbx1", w->buf_len, w->buf, w->h, 0, NULL) < 0)
-        w->error = errno;
-
-// Consider using CRAM temporary files if the final output is CRAM.
-// Typically it is comparable speed while being smaller.
-//    hts_opt opt[2] = {
-//        {"version=3.0", CRAM_OPT_VERSION, {"3.0"}, NULL},
-//        {"no_ref",      CRAM_OPT_NO_REF,  {1},     NULL}
-//    };
-//    opt[0].next = &opt[1];
-//    if (write_buffer(name, "wc1", w->buf_len, w->buf, w->h, 0, opt) < 0)
-//        w->error = errno;
+
+    uint32_t max_ncigar = 0;
+    int i;
+    for (i = 0; i < w->buf_len; i++) {
+        uint32_t nc = w->buf[i]->core.n_cigar;
+        if (max_ncigar < nc)
+            max_ncigar = nc;
+    }
+
+    if (max_ncigar > 65535) {
+        htsFormat fmt;
+        memset(&fmt, 0, sizeof(fmt));
+        if (hts_parse_format(&fmt, "cram,version=3.0,no_ref,seqs_per_slice=1000") < 0) {
+            w->error = errno;
+            free(name);
+            return 0;
+        }
+
+        if (write_buffer(name, "wcx1", w->buf_len, w->buf, w->h, 0, &fmt) < 0)
+            w->error = errno;
+    } else {
+        if (write_buffer(name, "wbx1", w->buf_len, w->buf, w->h, 0, NULL) < 0)
+            w->error = errno;
+    }
 
     free(name);
     return 0;
@@ -1699,7 +1718,8 @@ static int sort_blocks(int n_files, size_t k, bam1_p *buf, const char *prefix, c
     for (i = 0; i < n_threads; ++i) {
         pthread_join(tid[i], 0);
         if (w[i].error != 0) {
-            fprintf(pysam_stderr, "[bam_sort_core] failed to create temporary file \"%s.%.4d.bam\": %s\n", prefix, w[i].index, strerror(w[i].error));
+            errno = w[i].error;
+            print_error_errno("sort", "failed to create temporary file \"%s.%.4d.bam\"", prefix, w[i].index);
             n_failed++;
         }
     }
@@ -1743,17 +1763,23 @@ int bam_sort_core_ext(int is_by_qname, const char *fn, const char *prefix,
     buf = NULL;
     fp = sam_open_format(fn, "r", in_fmt);
     if (fp == NULL) {
-        const char *message = strerror(errno);
-        fprintf(pysam_stderr, "[bam_sort_core] fail to open '%s': %s\n", fn, message);
+        print_error_errno("sort", "can't open \"%s\"", fn);
         return -2;
     }
     header = sam_hdr_read(fp);
     if (header == NULL) {
-        fprintf(pysam_stderr, "[bam_sort_core] failed to read header for '%s'\n", fn);
+        print_error("sort", "failed to read header from \"%s\"", fn);
         goto err;
     }
     if (is_by_qname) change_SO(header, "queryname");
     else change_SO(header, "coordinate");
+
+    // No gain to using the thread pool here as the flow of this code
+    // is such that we are *either* reading *or* sorting.  Hence a shared
+    // pool makes no real difference except to reduce the thread count a little.
+    if (n_threads > 1)
+        hts_set_threads(fp, n_threads);
+
     // write sub files
     for (;;) {
         if (k == max_k) {
@@ -1782,7 +1808,7 @@ int bam_sort_core_ext(int is_by_qname, const char *fn, const char *prefix,
         }
     }
     if (ret != -1) {
-        fprintf(pysam_stderr, "[bam_sort_core] truncated file. Aborting.\n");
+        print_error("sort", "truncated file. Aborting");
         ret = -1;
         goto err;
     }
@@ -1791,7 +1817,7 @@ int bam_sort_core_ext(int is_by_qname, const char *fn, const char *prefix,
     if (n_files == 0) { // a single block
         ks_mergesort(sort, k, buf, 0);
         if (write_buffer(fnout, modeout, k, buf, header, n_threads, out_fmt) != 0) {
-            fprintf(pysam_stderr, "[bam_sort_core] failed to create \"%s\": %s\n", fnout, strerror(errno));
+            print_error_errno("sort", "failed to create \"%s\"", fnout);
             ret = -1;
             goto err;
         }
@@ -1810,7 +1836,7 @@ int bam_sort_core_ext(int is_by_qname, const char *fn, const char *prefix,
         }
         if (bam_merge_core2(is_by_qname, fnout, modeout, NULL, n_files, fns,
                             MERGE_COMBINE_RG|MERGE_COMBINE_PG|MERGE_FIRST_CO,
-                            NULL, n_threads, in_fmt, out_fmt) < 0) {
+                            NULL, n_threads, "sort", in_fmt, out_fmt) < 0) {
             // Propagate bam_merge_core2() failure; it has already emitted a
             // message explaining the failure, so no further message is needed.
             goto err;
@@ -1853,23 +1879,38 @@ static void sort_usage(FILE *fp)
 "  -m INT     Set maximum memory per thread; suffix K/M/G recognized [768M]\n"
 "  -n         Sort by read name\n"
 "  -o FILE    Write final output to FILE rather than standard output\n"
-"  -T PREFIX  Write temporary files to PREFIX.nnnn.bam\n"
-"  -@, --threads INT\n"
-"             Set number of sorting and compression threads [1]\n");
-    sam_global_opt_help(fp, "-.O..");
+"  -T PREFIX  Write temporary files to PREFIX.nnnn.bam\n");
+    sam_global_opt_help(fp, "-.O..@");
+}
+
+static void complain_about_memory_setting(size_t max_mem) {
+    char  *suffix = "";
+    const size_t nine_k = 9<<10;
+    if (max_mem > nine_k) { max_mem >>= 10; suffix = "K"; }
+    if (max_mem > nine_k) { max_mem >>= 10; suffix = "M"; }
+
+    fprintf(pysam_stderr,
+"[bam_sort] -m setting (%zu%s bytes) is less than the minimum required (%zuM).\n\n"
+"Trying to run with -m too small can lead to the creation of a very large number\n"
+"of temporary files.  This may make sort fail due to it exceeding limits on the\n"
+"number of files it can have open at the same time.\n\n"
+"Please check your -m parameter.  It should be an integer followed by one of the\n"
+"letters K (for kilobytes), M (megabytes) or G (gigabytes).  You should ensure it\n"
+"is at least the minimum above, and much higher if you are sorting a large file.\n",
+            max_mem, suffix, SORT_MIN_MEGS_PER_THREAD);
 }
 
 int bam_sort(int argc, char *argv[])
 {
-    size_t max_mem = 768<<20; // 512MB
-    int c, nargs, is_by_qname = 0, ret, o_seen = 0, n_threads = 0, level = -1;
+    size_t max_mem = SORT_DEFAULT_MEGS_PER_THREAD << 20;
+    int c, nargs, is_by_qname = 0, ret, o_seen = 0, level = -1;
     char *fnout = "-", modeout[12];
     kstring_t tmpprefix = { 0, 0, NULL };
     struct stat st;
     sam_global_args ga = SAM_GLOBAL_ARGS_INIT;
 
     static const struct option lopts[] = {
-        SAM_OPT_GLOBAL_OPTIONS('-', 0, 'O', 0, 0),
+        SAM_OPT_GLOBAL_OPTIONS('-', 0, 'O', 0, 0, '@'),
         { "threads", required_argument, NULL, '@' },
         { NULL, 0, NULL, 0 }
     };
@@ -1887,7 +1928,6 @@ int bam_sort(int argc, char *argv[])
                 break;
             }
         case 'T': kputs(optarg, &tmpprefix); break;
-        case '@': n_threads = atoi(optarg); break;
         case 'l': level = atoi(optarg); break;
 
         default:  if (parse_sam_global_opt(c, optarg, lopts, &ga) == 0) break;
@@ -1912,6 +1952,12 @@ int bam_sort(int argc, char *argv[])
         goto sort_end;
     }
 
+    if (max_mem < (SORT_MIN_MEGS_PER_THREAD << 20)) {
+        complain_about_memory_setting(max_mem);
+        ret = EXIT_FAILURE;
+        goto sort_end;
+    }
+
     strcpy(modeout, "wb");
     sam_open_mode(modeout+1, fnout, NULL);
     if (level >= 0) sprintf(strchr(modeout, '\0'), "%d", level < 9? level : 9);
@@ -1927,7 +1973,7 @@ int bam_sort(int argc, char *argv[])
     }
 
     ret = bam_sort_core_ext(is_by_qname, (nargs > 0)? argv[optind] : "-",
-                            tmpprefix.s, fnout, modeout, max_mem, n_threads,
+                            tmpprefix.s, fnout, modeout, max_mem, ga.nthreads,
                             &ga.in, &ga.out);
     if (ret >= 0)
         ret = EXIT_SUCCESS;
diff --git a/samtools/bam_split.c b/samtools/bam_split.c
index 9a2998a..9bb2030 100644
--- a/samtools/bam_split.c
+++ b/samtools/bam_split.c
@@ -1,6 +1,6 @@
 /*  bam_split.c -- split subcommand.
 
-    Copyright (C) 2013-2015 Genome Research Ltd.
+    Copyright (C) 2013-2016 Genome Research Ltd.
 
     Author: Martin Pollard <mp15 at sanger.ac.uk>
 
@@ -34,7 +34,10 @@ DEALINGS IN THE SOFTWARE.  */
 #include <regex.h>
 #include <htslib/khash.h>
 #include <htslib/kstring.h>
+#include <htslib/cram.h>
+#include "htslib/thread_pool.h"
 #include "sam_opts.h"
+#include "samtools.h"
 
 
 KHASH_MAP_INIT_STR(c2i, int)
@@ -61,6 +64,7 @@ struct state {
     samFile** rg_output_file;
     bam_hdr_t** rg_output_header;
     kh_c2i_t* rg_hash;
+    htsThreadPool p;
 };
 
 typedef struct state state_t;
@@ -78,7 +82,7 @@ static void usage(FILE *write_to)
 "  -u FILE1        put reads with no RG tag or an unrecognised RG tag in FILE1\n"
 "  -u FILE1:FILE2  ...and override the header with FILE2\n"
 "  -v              verbose output\n");
-    sam_global_opt_help(write_to, "-....");
+    sam_global_opt_help(write_to, "-....@");
     fprintf(write_to,
 "\n"
 "Format string expansions:\n"
@@ -95,11 +99,11 @@ static parsed_opts_t* parse_args(int argc, char** argv)
 {
     if (argc == 1) { usage(stdout); return NULL; }
 
-    const char* optstring = "vf:u:";
+    const char* optstring = "vf:u:@:";
     char* delim;
 
     static const struct option lopts[] = {
-        SAM_OPT_GLOBAL_OPTIONS('-', 0, 0, 0, 0),
+        SAM_OPT_GLOBAL_OPTIONS('-', 0, 0, 0, 0, '@'),
         { NULL, 0, NULL, 0 }
     };
 
@@ -143,7 +147,7 @@ static parsed_opts_t* parse_args(int argc, char** argv)
     argv += optind;
 
     if (argc != 1) {
-        fprintf(stderr, "Invalid number of arguments: %d\n", argc);
+        print_error("split", "Invalid number of arguments: %d", argc);
         usage(stderr);
         free(retval);
         return NULL;
@@ -270,7 +274,7 @@ static bool count_RG(bam_hdr_t* hdr, size_t* count, char*** output_name)
 
 // Filters a header of @RG lines where ID != id_keep
 // TODO: strip @PG's descended from other RGs and their descendants
-static bool filter_header_rg(bam_hdr_t* hdr, const char* id_keep)
+static bool filter_header_rg(bam_hdr_t* hdr, const char* id_keep, const char *arg_list)
 {
     kstring_t str = {0, 0, NULL};
 
@@ -315,28 +319,52 @@ static bool filter_header_rg(bam_hdr_t* hdr, const char* id_keep)
     free(hdr->text);
     hdr->text = ks_release(&str);
 
+    // Add the PG line
+    SAM_hdr *sh = sam_hdr_parse_(hdr->text, hdr->l_text);
+    if (sam_hdr_add_PG(sh, "samtools",
+                           "VN", samtools_version(),
+                           arg_list ? "CL": NULL,
+                           arg_list ? arg_list : NULL,
+                           NULL) != 0)
+        return -1;
+
+    free(hdr->text);
+    hdr->text = strdup(sam_hdr_str(sh));
+    hdr->l_text = sam_hdr_length(sh);
+    if (!hdr->text)
+        return false;
+    sam_hdr_free(sh);
+
     return true;
 }
 
 // Set the initial state
-static state_t* init(parsed_opts_t* opts)
+static state_t* init(parsed_opts_t* opts, const char *arg_list)
 {
     state_t* retval = calloc(sizeof(state_t), 1);
     if (!retval) {
-        fprintf(stderr, "Out of memory");
+        print_error_errno("split", "Initialisation failed");
         return NULL;
     }
 
+    if (opts->ga.nthreads > 0) {
+        if (!(retval->p.pool = hts_tpool_init(opts->ga.nthreads))) {
+            fprintf(stderr, "Error creating thread pool\n");
+            return NULL;
+        }
+    }
+
     retval->merged_input_file = sam_open_format(opts->merged_input_name, "rb", &opts->ga.in);
     if (!retval->merged_input_file) {
-        fprintf(stderr, "Could not open input file (%s)\n", opts->merged_input_name);
+        print_error_errno("split", "Could not open \"%s\"", opts->merged_input_name);
         free(retval);
         return NULL;
     }
+    if (retval->p.pool)
+        hts_set_opt(retval->merged_input_file, HTS_OPT_THREAD_POOL, &retval->p);
     retval->merged_input_header = sam_hdr_read(retval->merged_input_file);
     if (retval->merged_input_header == NULL) {
-        fprintf(stderr, "Could not read header for file '%s'\n",
-                opts->merged_input_name);
+        print_error("split", "Could not read header from \"%s\"", opts->merged_input_name);
         cleanup_state(retval, false);
         return NULL;
     }
@@ -345,14 +373,13 @@ static state_t* init(parsed_opts_t* opts)
         if (opts->unaccounted_header_name) {
             samFile* hdr_load = sam_open_format(opts->unaccounted_header_name, "r", &opts->ga.in);
             if (!hdr_load) {
-                fprintf(stderr, "Could not open unaccounted header file (%s)\n", opts->unaccounted_header_name);
+                print_error_errno("split", "Could not open unaccounted header file \"%s\"", opts->unaccounted_header_name);
                 cleanup_state(retval, false);
                 return NULL;
             }
             retval->unaccounted_header = sam_hdr_read(hdr_load);
             if (retval->unaccounted_header == NULL) {
-                fprintf(stderr, "Could not read header for file '%s'\n",
-                        opts->unaccounted_header_name);
+                print_error("split", "Could not read header from \"%s\"", opts->unaccounted_header_name);
                 cleanup_state(retval, false);
                 return NULL;
             }
@@ -363,10 +390,12 @@ static state_t* init(parsed_opts_t* opts)
 
         retval->unaccounted_file = sam_open_format(opts->unaccounted_name, "wb", &opts->ga.out);
         if (retval->unaccounted_file == NULL) {
-            fprintf(stderr, "Could not open unaccounted output file: %s\n", opts->unaccounted_name);
+            print_error_errno("split", "Could not open unaccounted output file \"%s\"", opts->unaccounted_name);
             cleanup_state(retval, false);
             return NULL;
         }
+        if (retval->p.pool)
+            hts_set_opt(retval->unaccounted_file, HTS_OPT_THREAD_POOL, &retval->p);
     }
 
     // Open output files for RGs
@@ -378,7 +407,7 @@ static state_t* init(parsed_opts_t* opts)
     retval->rg_output_header = (bam_hdr_t**)calloc(retval->output_count, sizeof(bam_hdr_t*));
     retval->rg_hash = kh_init_c2i();
     if (!retval->rg_output_file_name || !retval->rg_output_file || !retval->rg_output_header || !retval->rg_hash) {
-        fprintf(stderr, "Could not allocate memory for output file array. Out of memory?");
+        print_error_errno("split", "Could not initialise output file array");
         cleanup_state(retval, false);
         return NULL;
     }
@@ -386,7 +415,7 @@ static state_t* init(parsed_opts_t* opts)
     char* dirsep = strrchr(opts->merged_input_name, '/');
     char* input_base_name = strdup(dirsep? dirsep+1 : opts->merged_input_name);
     if (!input_base_name) {
-        fprintf(stderr, "Out of memory\n");
+        print_error_errno("split", "Filename manipulation failed");
         cleanup_state(retval, false);
         return NULL;
     }
@@ -403,7 +432,7 @@ static state_t* init(parsed_opts_t* opts)
                                                &opts->ga.out);
 
         if ( output_filename == NULL ) {
-            fprintf(stderr, "Error expanding output filename format string.\n");
+            print_error("split", "Error expanding output filename format string");
             cleanup_state(retval, false);
             free(input_base_name);
             return NULL;
@@ -412,11 +441,13 @@ static state_t* init(parsed_opts_t* opts)
         retval->rg_output_file_name[i] = output_filename;
         retval->rg_output_file[i] = sam_open_format(output_filename, "wb", &opts->ga.out);
         if (retval->rg_output_file[i] == NULL) {
-            fprintf(stderr, "Could not open output file: %s\n", output_filename);
+            print_error_errno("split", "Could not open \"%s\"", output_filename);
             cleanup_state(retval, false);
             free(input_base_name);
             return NULL;
         }
+        if (retval->p.pool)
+            hts_set_opt(retval->rg_output_file[i], HTS_OPT_THREAD_POOL, &retval->p);
 
         // Record index in hash
         int ret;
@@ -425,8 +456,8 @@ static state_t* init(parsed_opts_t* opts)
 
         // Set and edit header
         retval->rg_output_header[i] = bam_hdr_dup(retval->merged_input_header);
-        if ( !filter_header_rg(retval->rg_output_header[i], retval->rg_id[i]) ) {
-            fprintf(stderr, "Could not rewrite header for file: %s\n", output_filename);
+        if ( !filter_header_rg(retval->rg_output_header[i], retval->rg_id[i], arg_list) ) {
+            print_error("split", "Could not rewrite header for \"%s\"", output_filename);
             cleanup_state(retval, false);
             free(input_base_name);
             return NULL;
@@ -441,14 +472,13 @@ static state_t* init(parsed_opts_t* opts)
 static bool split(state_t* state)
 {
     if (state->unaccounted_file && sam_hdr_write(state->unaccounted_file, state->unaccounted_header) != 0) {
-        fprintf(stderr, "Could not write output file header\n");
+        print_error_errno("split", "Could not write output file header");
         return false;
     }
     size_t i;
     for (i = 0; i < state->output_count; i++) {
         if (sam_hdr_write(state->rg_output_file[i], state->rg_output_header[i]) != 0) {
-            fprintf(stderr, "Could not write output file header for '%s'\n",
-                    state->rg_output_file_name[i]);
+            print_error_errno("split", "Could not write file header to \"%s\"", state->rg_output_file_name[i]);
             return false;
         }
     }
@@ -461,7 +491,7 @@ static bool split(state_t* state)
         bam_destroy1(file_read);
         file_read = NULL;
         if (r < -1) {
-            fprintf(stderr, "Could not read first input record\n");
+            print_error("split", "Could not read first input record");
             return false;
         }
     }
@@ -482,8 +512,7 @@ static bool split(state_t* state)
             // if found write to the appropriate untangled bam
             int i = kh_val(state->rg_hash,iter);
             if (sam_write1(state->rg_output_file[i], state->rg_output_header[i], file_read) < 0) {
-                fprintf(stderr, "Could not write to output file '%s'\n",
-                        state->rg_output_file_name[i]);
+                print_error_errno("split", "Could not write to \"%s\"", state->rg_output_file_name[i]);
                 bam_destroy1(file_read);
                 return false;
             }
@@ -499,7 +528,7 @@ static bool split(state_t* state)
                 return false;
             } else {
                 if (sam_write1(state->unaccounted_file, state->unaccounted_header, file_read) < 0) {
-                    fprintf(stderr, "Could not write to unaccounted output file\n");
+                    print_error_errno("split", "Could not write to unaccounted output file");
                     bam_destroy1(file_read);
                     return false;
                 }
@@ -512,7 +541,7 @@ static bool split(state_t* state)
             bam_destroy1(file_read);
             file_read = NULL;
             if (r < -1) {
-                fprintf(stderr, "Could not read input record\n");
+                print_error("split", "Could not read input record");
                 return false;
             }
         }
@@ -529,7 +558,7 @@ static int cleanup_state(state_t* status, bool check_close)
     if (status->unaccounted_header) bam_hdr_destroy(status->unaccounted_header);
     if (status->unaccounted_file) {
         if (sam_close(status->unaccounted_file) < 0 && check_close) {
-            fprintf(stderr, "Error on closing unaccounted file\n");
+            print_error("split", "Error on closing unaccounted file");
             ret = -1;
         }
     }
@@ -540,8 +569,7 @@ static int cleanup_state(state_t* status, bool check_close)
             bam_hdr_destroy(status->rg_output_header[i]);
         if (status->rg_output_file && status->rg_output_file[i]) {
             if (sam_close(status->rg_output_file[i]) < 0 && check_close) {
-                fprintf(stderr, "Error on closing output file '%s'\n",
-                        status->rg_output_file_name[i]);
+                print_error("split", "Error on closing output file \"%s\"", status->rg_output_file_name[i]);
                 ret = -1;
             }
         }
@@ -557,6 +585,9 @@ static int cleanup_state(state_t* status, bool check_close)
     free(status->rg_id);
     free(status);
 
+    if (status->p.pool)
+        hts_tpool_destroy(status->p.pool);
+
     return ret;
 }
 
@@ -574,9 +605,10 @@ static void cleanup_opts(parsed_opts_t* opts)
 int main_split(int argc, char** argv)
 {
     int ret = 1;
+    char *arg_list = stringify_argv(argc+1, argv-1);
     parsed_opts_t* opts = parse_args(argc, argv);
     if (!opts) goto cleanup_opts;
-    state_t* status = init(opts);
+    state_t* status = init(opts, arg_list);
     if (!status) goto cleanup_opts;
 
     if (!split(status)) {
@@ -588,6 +620,7 @@ int main_split(int argc, char** argv)
 
 cleanup_opts:
     cleanup_opts(opts);
+    free(arg_list);
 
     return ret;
 }
diff --git a/samtools/bam_split.c.pysam.c b/samtools/bam_split.c.pysam.c
index 2348f48..8a584ed 100644
--- a/samtools/bam_split.c.pysam.c
+++ b/samtools/bam_split.c.pysam.c
@@ -2,7 +2,7 @@
 
 /*  bam_split.c -- split subcommand.
 
-    Copyright (C) 2013-2015 Genome Research Ltd.
+    Copyright (C) 2013-2016 Genome Research Ltd.
 
     Author: Martin Pollard <mp15 at sanger.ac.uk>
 
@@ -36,7 +36,10 @@ DEALINGS IN THE SOFTWARE.  */
 #include <regex.h>
 #include <htslib/khash.h>
 #include <htslib/kstring.h>
+#include <htslib/cram.h>
+#include "htslib/thread_pool.h"
 #include "sam_opts.h"
+#include "samtools.h"
 
 
 KHASH_MAP_INIT_STR(c2i, int)
@@ -63,6 +66,7 @@ struct state {
     samFile** rg_output_file;
     bam_hdr_t** rg_output_header;
     kh_c2i_t* rg_hash;
+    htsThreadPool p;
 };
 
 typedef struct state state_t;
@@ -80,7 +84,7 @@ static void usage(FILE *write_to)
 "  -u FILE1        put reads with no RG tag or an unrecognised RG tag in FILE1\n"
 "  -u FILE1:FILE2  ...and override the header with FILE2\n"
 "  -v              verbose output\n");
-    sam_global_opt_help(write_to, "-....");
+    sam_global_opt_help(write_to, "-....@");
     fprintf(write_to,
 "\n"
 "Format string expansions:\n"
@@ -97,11 +101,11 @@ static parsed_opts_t* parse_args(int argc, char** argv)
 {
     if (argc == 1) { usage(pysam_stdout); return NULL; }
 
-    const char* optstring = "vf:u:";
+    const char* optstring = "vf:u:@:";
     char* delim;
 
     static const struct option lopts[] = {
-        SAM_OPT_GLOBAL_OPTIONS('-', 0, 0, 0, 0),
+        SAM_OPT_GLOBAL_OPTIONS('-', 0, 0, 0, 0, '@'),
         { NULL, 0, NULL, 0 }
     };
 
@@ -145,7 +149,7 @@ static parsed_opts_t* parse_args(int argc, char** argv)
     argv += optind;
 
     if (argc != 1) {
-        fprintf(pysam_stderr, "Invalid number of arguments: %d\n", argc);
+        print_error("split", "Invalid number of arguments: %d", argc);
         usage(pysam_stderr);
         free(retval);
         return NULL;
@@ -272,7 +276,7 @@ static bool count_RG(bam_hdr_t* hdr, size_t* count, char*** output_name)
 
 // Filters a header of @RG lines where ID != id_keep
 // TODO: strip @PG's descended from other RGs and their descendants
-static bool filter_header_rg(bam_hdr_t* hdr, const char* id_keep)
+static bool filter_header_rg(bam_hdr_t* hdr, const char* id_keep, const char *arg_list)
 {
     kstring_t str = {0, 0, NULL};
 
@@ -317,28 +321,52 @@ static bool filter_header_rg(bam_hdr_t* hdr, const char* id_keep)
     free(hdr->text);
     hdr->text = ks_release(&str);
 
+    // Add the PG line
+    SAM_hdr *sh = sam_hdr_parse_(hdr->text, hdr->l_text);
+    if (sam_hdr_add_PG(sh, "samtools",
+                           "VN", samtools_version(),
+                           arg_list ? "CL": NULL,
+                           arg_list ? arg_list : NULL,
+                           NULL) != 0)
+        return -1;
+
+    free(hdr->text);
+    hdr->text = strdup(sam_hdr_str(sh));
+    hdr->l_text = sam_hdr_length(sh);
+    if (!hdr->text)
+        return false;
+    sam_hdr_free(sh);
+
     return true;
 }
 
 // Set the initial state
-static state_t* init(parsed_opts_t* opts)
+static state_t* init(parsed_opts_t* opts, const char *arg_list)
 {
     state_t* retval = calloc(sizeof(state_t), 1);
     if (!retval) {
-        fprintf(pysam_stderr, "Out of memory");
+        print_error_errno("split", "Initialisation failed");
         return NULL;
     }
 
+    if (opts->ga.nthreads > 0) {
+        if (!(retval->p.pool = hts_tpool_init(opts->ga.nthreads))) {
+            fprintf(pysam_stderr, "Error creating thread pool\n");
+            return NULL;
+        }
+    }
+
     retval->merged_input_file = sam_open_format(opts->merged_input_name, "rb", &opts->ga.in);
     if (!retval->merged_input_file) {
-        fprintf(pysam_stderr, "Could not open input file (%s)\n", opts->merged_input_name);
+        print_error_errno("split", "Could not open \"%s\"", opts->merged_input_name);
         free(retval);
         return NULL;
     }
+    if (retval->p.pool)
+        hts_set_opt(retval->merged_input_file, HTS_OPT_THREAD_POOL, &retval->p);
     retval->merged_input_header = sam_hdr_read(retval->merged_input_file);
     if (retval->merged_input_header == NULL) {
-        fprintf(pysam_stderr, "Could not read header for file '%s'\n",
-                opts->merged_input_name);
+        print_error("split", "Could not read header from \"%s\"", opts->merged_input_name);
         cleanup_state(retval, false);
         return NULL;
     }
@@ -347,14 +375,13 @@ static state_t* init(parsed_opts_t* opts)
         if (opts->unaccounted_header_name) {
             samFile* hdr_load = sam_open_format(opts->unaccounted_header_name, "r", &opts->ga.in);
             if (!hdr_load) {
-                fprintf(pysam_stderr, "Could not open unaccounted header file (%s)\n", opts->unaccounted_header_name);
+                print_error_errno("split", "Could not open unaccounted header file \"%s\"", opts->unaccounted_header_name);
                 cleanup_state(retval, false);
                 return NULL;
             }
             retval->unaccounted_header = sam_hdr_read(hdr_load);
             if (retval->unaccounted_header == NULL) {
-                fprintf(pysam_stderr, "Could not read header for file '%s'\n",
-                        opts->unaccounted_header_name);
+                print_error("split", "Could not read header from \"%s\"", opts->unaccounted_header_name);
                 cleanup_state(retval, false);
                 return NULL;
             }
@@ -365,10 +392,12 @@ static state_t* init(parsed_opts_t* opts)
 
         retval->unaccounted_file = sam_open_format(opts->unaccounted_name, "wb", &opts->ga.out);
         if (retval->unaccounted_file == NULL) {
-            fprintf(pysam_stderr, "Could not open unaccounted output file: %s\n", opts->unaccounted_name);
+            print_error_errno("split", "Could not open unaccounted output file \"%s\"", opts->unaccounted_name);
             cleanup_state(retval, false);
             return NULL;
         }
+        if (retval->p.pool)
+            hts_set_opt(retval->unaccounted_file, HTS_OPT_THREAD_POOL, &retval->p);
     }
 
     // Open output files for RGs
@@ -380,7 +409,7 @@ static state_t* init(parsed_opts_t* opts)
     retval->rg_output_header = (bam_hdr_t**)calloc(retval->output_count, sizeof(bam_hdr_t*));
     retval->rg_hash = kh_init_c2i();
     if (!retval->rg_output_file_name || !retval->rg_output_file || !retval->rg_output_header || !retval->rg_hash) {
-        fprintf(pysam_stderr, "Could not allocate memory for output file array. Out of memory?");
+        print_error_errno("split", "Could not initialise output file array");
         cleanup_state(retval, false);
         return NULL;
     }
@@ -388,7 +417,7 @@ static state_t* init(parsed_opts_t* opts)
     char* dirsep = strrchr(opts->merged_input_name, '/');
     char* input_base_name = strdup(dirsep? dirsep+1 : opts->merged_input_name);
     if (!input_base_name) {
-        fprintf(pysam_stderr, "Out of memory\n");
+        print_error_errno("split", "Filename manipulation failed");
         cleanup_state(retval, false);
         return NULL;
     }
@@ -405,7 +434,7 @@ static state_t* init(parsed_opts_t* opts)
                                                &opts->ga.out);
 
         if ( output_filename == NULL ) {
-            fprintf(pysam_stderr, "Error expanding output filename format string.\n");
+            print_error("split", "Error expanding output filename format string");
             cleanup_state(retval, false);
             free(input_base_name);
             return NULL;
@@ -414,11 +443,13 @@ static state_t* init(parsed_opts_t* opts)
         retval->rg_output_file_name[i] = output_filename;
         retval->rg_output_file[i] = sam_open_format(output_filename, "wb", &opts->ga.out);
         if (retval->rg_output_file[i] == NULL) {
-            fprintf(pysam_stderr, "Could not open output file: %s\n", output_filename);
+            print_error_errno("split", "Could not open \"%s\"", output_filename);
             cleanup_state(retval, false);
             free(input_base_name);
             return NULL;
         }
+        if (retval->p.pool)
+            hts_set_opt(retval->rg_output_file[i], HTS_OPT_THREAD_POOL, &retval->p);
 
         // Record index in hash
         int ret;
@@ -427,8 +458,8 @@ static state_t* init(parsed_opts_t* opts)
 
         // Set and edit header
         retval->rg_output_header[i] = bam_hdr_dup(retval->merged_input_header);
-        if ( !filter_header_rg(retval->rg_output_header[i], retval->rg_id[i]) ) {
-            fprintf(pysam_stderr, "Could not rewrite header for file: %s\n", output_filename);
+        if ( !filter_header_rg(retval->rg_output_header[i], retval->rg_id[i], arg_list) ) {
+            print_error("split", "Could not rewrite header for \"%s\"", output_filename);
             cleanup_state(retval, false);
             free(input_base_name);
             return NULL;
@@ -443,14 +474,13 @@ static state_t* init(parsed_opts_t* opts)
 static bool split(state_t* state)
 {
     if (state->unaccounted_file && sam_hdr_write(state->unaccounted_file, state->unaccounted_header) != 0) {
-        fprintf(pysam_stderr, "Could not write output file header\n");
+        print_error_errno("split", "Could not write output file header");
         return false;
     }
     size_t i;
     for (i = 0; i < state->output_count; i++) {
         if (sam_hdr_write(state->rg_output_file[i], state->rg_output_header[i]) != 0) {
-            fprintf(pysam_stderr, "Could not write output file header for '%s'\n",
-                    state->rg_output_file_name[i]);
+            print_error_errno("split", "Could not write file header to \"%s\"", state->rg_output_file_name[i]);
             return false;
         }
     }
@@ -463,7 +493,7 @@ static bool split(state_t* state)
         bam_destroy1(file_read);
         file_read = NULL;
         if (r < -1) {
-            fprintf(pysam_stderr, "Could not read first input record\n");
+            print_error("split", "Could not read first input record");
             return false;
         }
     }
@@ -484,8 +514,7 @@ static bool split(state_t* state)
             // if found write to the appropriate untangled bam
             int i = kh_val(state->rg_hash,iter);
             if (sam_write1(state->rg_output_file[i], state->rg_output_header[i], file_read) < 0) {
-                fprintf(pysam_stderr, "Could not write to output file '%s'\n",
-                        state->rg_output_file_name[i]);
+                print_error_errno("split", "Could not write to \"%s\"", state->rg_output_file_name[i]);
                 bam_destroy1(file_read);
                 return false;
             }
@@ -501,7 +530,7 @@ static bool split(state_t* state)
                 return false;
             } else {
                 if (sam_write1(state->unaccounted_file, state->unaccounted_header, file_read) < 0) {
-                    fprintf(pysam_stderr, "Could not write to unaccounted output file\n");
+                    print_error_errno("split", "Could not write to unaccounted output file");
                     bam_destroy1(file_read);
                     return false;
                 }
@@ -514,7 +543,7 @@ static bool split(state_t* state)
             bam_destroy1(file_read);
             file_read = NULL;
             if (r < -1) {
-                fprintf(pysam_stderr, "Could not read input record\n");
+                print_error("split", "Could not read input record");
                 return false;
             }
         }
@@ -531,7 +560,7 @@ static int cleanup_state(state_t* status, bool check_close)
     if (status->unaccounted_header) bam_hdr_destroy(status->unaccounted_header);
     if (status->unaccounted_file) {
         if (sam_close(status->unaccounted_file) < 0 && check_close) {
-            fprintf(pysam_stderr, "Error on closing unaccounted file\n");
+            print_error("split", "Error on closing unaccounted file");
             ret = -1;
         }
     }
@@ -542,8 +571,7 @@ static int cleanup_state(state_t* status, bool check_close)
             bam_hdr_destroy(status->rg_output_header[i]);
         if (status->rg_output_file && status->rg_output_file[i]) {
             if (sam_close(status->rg_output_file[i]) < 0 && check_close) {
-                fprintf(pysam_stderr, "Error on closing output file '%s'\n",
-                        status->rg_output_file_name[i]);
+                print_error("split", "Error on closing output file \"%s\"", status->rg_output_file_name[i]);
                 ret = -1;
             }
         }
@@ -559,6 +587,9 @@ static int cleanup_state(state_t* status, bool check_close)
     free(status->rg_id);
     free(status);
 
+    if (status->p.pool)
+        hts_tpool_destroy(status->p.pool);
+
     return ret;
 }
 
@@ -576,9 +607,10 @@ static void cleanup_opts(parsed_opts_t* opts)
 int main_split(int argc, char** argv)
 {
     int ret = 1;
+    char *arg_list = stringify_argv(argc+1, argv-1);
     parsed_opts_t* opts = parse_args(argc, argv);
     if (!opts) goto cleanup_opts;
-    state_t* status = init(opts);
+    state_t* status = init(opts, arg_list);
     if (!status) goto cleanup_opts;
 
     if (!split(status)) {
@@ -590,6 +622,7 @@ int main_split(int argc, char** argv)
 
 cleanup_opts:
     cleanup_opts(opts);
+    free(arg_list);
 
     return ret;
 }
diff --git a/samtools/bam_stat.c b/samtools/bam_stat.c
index f6cf1d5..aa5f8d3 100644
--- a/samtools/bam_stat.c
+++ b/samtools/bam_stat.c
@@ -34,6 +34,7 @@ DEALINGS IN THE SOFTWARE.  */
 
 #include "htslib/sam.h"
 #include "samtools.h"
+#include "sam_opts.h"
 
 typedef struct {
     long long n_reads[2], n_mapped[2], n_pair_all[2], n_pair_map[2], n_pair_good[2];
@@ -94,7 +95,8 @@ static const char *percent(char *buffer, long long n, long long total)
 
 static void usage_exit(FILE *fp, int exit_status)
 {
-    fprintf(fp, "Usage: samtools flagstat [--input-fmt-option OPT=VAL] <in.bam>\n");
+    fprintf(fp, "Usage: samtools flagstat [options] <in.bam>\n");
+    sam_global_opt_help(fp, "-.---@");
     exit(exit_status);
 }
 
@@ -104,25 +106,23 @@ int bam_flagstat(int argc, char *argv[])
     bam_hdr_t *header;
     bam_flagstat_t *s;
     char b0[16], b1[16];
-    hts_opt *in_opts = NULL;
     int c;
 
     enum {
         INPUT_FMT_OPTION = CHAR_MAX+1,
     };
 
+    sam_global_args ga = SAM_GLOBAL_ARGS_INIT;
     static const struct option lopts[] = {
-        {"input-fmt-option",  required_argument, NULL, INPUT_FMT_OPTION},
+        SAM_OPT_GLOBAL_OPTIONS('-', 0, '-', '-', '-', '@'),
         {NULL, 0, NULL, 0}
     };
 
-    while ((c = getopt_long(argc, argv, "", lopts, NULL)) >= 0) {
+    while ((c = getopt_long(argc, argv, "@:", lopts, NULL)) >= 0) {
         switch (c) {
-        case INPUT_FMT_OPTION:
-            if (hts_opt_add(&in_opts, optarg) < 0)
-                usage_exit(stderr, EXIT_FAILURE);
-            break;
-        default:
+        default:  if (parse_sam_global_opt(c, optarg, lopts, &ga) == 0) break;
+            /* else fall-through */
+        case '?':
             usage_exit(stderr, EXIT_FAILURE);
         }
     }
@@ -131,15 +131,13 @@ int bam_flagstat(int argc, char *argv[])
         if (argc == optind) usage_exit(stdout, EXIT_SUCCESS);
         else usage_exit(stderr, EXIT_FAILURE);
     }
-    fp = sam_open(argv[optind], "r");
+    fp = sam_open_format(argv[optind], "r", &ga.in);
     if (fp == NULL) {
         print_error_errno("flagstat", "Cannot open input file \"%s\"", argv[optind]);
         return 1;
     }
-    if (hts_opt_apply(fp, in_opts)) {
-        fprintf(stderr, "Failed to apply input-fmt-options\n");
-        return 1;
-    }
+    if (ga.nthreads > 0)
+        hts_set_threads(fp, ga.nthreads);
 
     if (hts_set_opt(fp, CRAM_OPT_REQUIRED_FIELDS,
                     SAM_FLAG | SAM_MAPQ | SAM_RNEXT)) {
@@ -174,6 +172,6 @@ int bam_flagstat(int argc, char *argv[])
     free(s);
     bam_hdr_destroy(header);
     sam_close(fp);
-    hts_opt_free(in_opts);
+    sam_global_args_free(&ga);
     return 0;
 }
diff --git a/samtools/bam_stat.c.pysam.c b/samtools/bam_stat.c.pysam.c
index cdca4dd..bbfe602 100644
--- a/samtools/bam_stat.c.pysam.c
+++ b/samtools/bam_stat.c.pysam.c
@@ -36,6 +36,7 @@ DEALINGS IN THE SOFTWARE.  */
 
 #include "htslib/sam.h"
 #include "samtools.h"
+#include "sam_opts.h"
 
 typedef struct {
     long long n_reads[2], n_mapped[2], n_pair_all[2], n_pair_map[2], n_pair_good[2];
@@ -94,10 +95,11 @@ static const char *percent(char *buffer, long long n, long long total)
     return buffer;
 }
 
-static void usage_exit(FILE *fp, int exit_status)
+static int usage_exit(FILE *fp, int exit_status)
 {
-    fprintf(fp, "Usage: samtools flagstat [--input-fmt-option OPT=VAL] <in.bam>\n");
-    exit(exit_status);
+    fprintf(fp, "Usage: samtools flagstat [options] <in.bam>\n");
+    sam_global_opt_help(fp, "-.---@");
+    return(exit_status);
 }
 
 int bam_flagstat(int argc, char *argv[])
@@ -106,42 +108,38 @@ int bam_flagstat(int argc, char *argv[])
     bam_hdr_t *header;
     bam_flagstat_t *s;
     char b0[16], b1[16];
-    hts_opt *in_opts = NULL;
     int c;
 
     enum {
         INPUT_FMT_OPTION = CHAR_MAX+1,
     };
 
+    sam_global_args ga = SAM_GLOBAL_ARGS_INIT;
     static const struct option lopts[] = {
-        {"input-fmt-option",  required_argument, NULL, INPUT_FMT_OPTION},
+        SAM_OPT_GLOBAL_OPTIONS('-', 0, '-', '-', '-', '@'),
         {NULL, 0, NULL, 0}
     };
 
-    while ((c = getopt_long(argc, argv, "", lopts, NULL)) >= 0) {
+    while ((c = getopt_long(argc, argv, "@:", lopts, NULL)) >= 0) {
         switch (c) {
-        case INPUT_FMT_OPTION:
-            if (hts_opt_add(&in_opts, optarg) < 0)
-                usage_exit(pysam_stderr, EXIT_FAILURE);
-            break;
-        default:
-            usage_exit(pysam_stderr, EXIT_FAILURE);
+        default:  if (parse_sam_global_opt(c, optarg, lopts, &ga) == 0) break;
+            /* else fall-through */
+        case '?':
+	  return(usage_exit(pysam_stderr, EXIT_FAILURE));
         }
     }
 
     if (argc != optind+1) {
-        if (argc == optind) usage_exit(pysam_stdout, EXIT_SUCCESS);
-        else usage_exit(pysam_stderr, EXIT_FAILURE);
+      if (argc == optind) return(usage_exit(pysam_stdout, EXIT_SUCCESS));
+      else return(usage_exit(pysam_stderr, EXIT_FAILURE));
     }
-    fp = sam_open(argv[optind], "r");
+    fp = sam_open_format(argv[optind], "r", &ga.in);
     if (fp == NULL) {
         print_error_errno("flagstat", "Cannot open input file \"%s\"", argv[optind]);
         return 1;
     }
-    if (hts_opt_apply(fp, in_opts)) {
-        fprintf(pysam_stderr, "Failed to apply input-fmt-options\n");
-        return 1;
-    }
+    if (ga.nthreads > 0)
+        hts_set_threads(fp, ga.nthreads);
 
     if (hts_set_opt(fp, CRAM_OPT_REQUIRED_FIELDS,
                     SAM_FLAG | SAM_MAPQ | SAM_RNEXT)) {
@@ -176,6 +174,6 @@ int bam_flagstat(int argc, char *argv[])
     free(s);
     bam_hdr_destroy(header);
     sam_close(fp);
-    hts_opt_free(in_opts);
+    sam_global_args_free(&ga);
     return 0;
 }
diff --git a/samtools/bam_tview.c b/samtools/bam_tview.c
deleted file mode 100644
index f1f0cc7..0000000
--- a/samtools/bam_tview.c
+++ /dev/null
@@ -1,441 +0,0 @@
-/*  bam_tview.c -- tview subcommand.
-
-    Copyright (C) 2008-2015 Genome Research Ltd.
-    Portions copyright (C) 2013 Pierre Lindenbaum, Institut du Thorax, INSERM U1087, Université de Nantes.
-
-    Author: Heng Li <lh3 at sanger.ac.uk>
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notices and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-DEALINGS IN THE SOFTWARE.  */
-
-#include <config.h>
-
-#include <regex.h>
-#include <assert.h>
-#include "bam_tview.h"
-#include <htslib/faidx.h>
-#include <htslib/sam.h>
-#include <htslib/bgzf.h>
-#include "sam_opts.h"
-
-khash_t(kh_rg)* get_rg_sample(const char* header, const char* sample)
-{
-    khash_t(kh_rg)* rg_hash = kh_init(kh_rg);
-    // given sample id return all the RD ID's
-    const char rg_regex[] = "^@RG.*\tID:([!-)+-<>-~][ !-~]*)(\t.*$|$)";
-
-    regex_t rg_id;
-    regmatch_t* matches = (regmatch_t*)calloc(2, sizeof(regmatch_t));
-    if (matches == NULL) { perror("out of memory"); exit(-1); }
-    regcomp(&rg_id, rg_regex, REG_EXTENDED|REG_NEWLINE);
-    char* text = strdup(header);
-    char* end = text + strlen(header);
-    char* tofree = text;
-    while (end > text && regexec(&rg_id, text, 2, matches, 0) == 0) { //    foreach rg id in  header
-        int ret;
-        text[matches[1].rm_eo] = '\0';
-        kh_put(kh_rg, rg_hash, strdup(text+matches[1].rm_so), &ret); // Add the RG to the list
-        text += matches[0].rm_eo + 1; // Move search pointer forward
-    }
-    free(tofree);
-    return rg_hash;
-}
-
-int base_tv_init(tview_t* tv, const char *fn, const char *fn_fa,
-                 const char *samples, const htsFormat *fmt)
-{
-    assert(tv!=NULL);
-    assert(fn!=NULL);
-    tv->mrow = 24; tv->mcol = 80;
-    tv->color_for = TV_COLOR_MAPQ;
-    tv->is_dot = 1;
-
-    tv->fp = sam_open_format(fn, "r", fmt);
-    if(tv->fp == NULL)
-    {
-        fprintf(stderr,"sam_open %s. %s\n", fn,fn_fa);
-        exit(EXIT_FAILURE);
-    }
-    // TODO bgzf_set_cache_size(tv->fp->fp.bgzf, 8 * 1024 *1024);
-    assert(tv->fp);
-
-    tv->header = sam_hdr_read(tv->fp);
-    if(tv->header == NULL)
-    {
-        fprintf(stderr,"Cannot read '%s'.\n", fn);
-        exit(EXIT_FAILURE);
-    }
-    tv->idx = sam_index_load(tv->fp, fn);
-    if (tv->idx == NULL)
-    {
-        fprintf(stderr,"Cannot read index for '%s'.\n", fn);
-        exit(EXIT_FAILURE);
-    }
-    tv->lplbuf = bam_lplbuf_init(tv_pl_func, tv);
-    if (fn_fa) tv->fai = fai_load(fn_fa);
-    tv->bca = bcf_call_init(0.83, 13);
-    tv->ins = 1;
-
-    // If the user has asked for specific samples find out create a list of readgroups make up these samples
-    if ( samples )
-    {
-        tv->rg_hash = get_rg_sample(tv->header->text, samples); // Init the list of rg's
-    }
-
-    return 0;
-}
-
-
-void base_tv_destroy(tview_t* tv)
-{
-    bam_lplbuf_destroy(tv->lplbuf);
-    bcf_call_destroy(tv->bca);
-    hts_idx_destroy(tv->idx);
-    if (tv->fai) fai_destroy(tv->fai);
-    free(tv->ref);
-    bam_hdr_destroy(tv->header);
-    sam_close(tv->fp);
-}
-
-
-int tv_pl_func(uint32_t tid, uint32_t pos, int n, const bam_pileup1_t *pl, void *data)
-{
-    tview_t *tv = (tview_t*)data;
-    int i, j, c, rb, attr, max_ins = 0;
-    uint32_t call = 0;
-    if (pos < tv->left_pos || tv->ccol > tv->mcol) return 0; // out of screen
-    // print reference
-    rb = (tv->ref && pos - tv->left_pos < tv->l_ref)? tv->ref[pos - tv->left_pos] : 'N';
-    for (i = tv->last_pos + 1; i < pos; ++i) {
-        if (i%10 == 0 && tv->mcol - tv->ccol >= 10) tv->my_mvprintw(tv,0, tv->ccol, "%-d", i+1);
-        c = tv->ref? tv->ref[i - tv->left_pos] : 'N';
-        tv->my_mvaddch(tv,1, tv->ccol++, c);
-    }
-    if (pos%10 == 0 && tv->mcol - tv->ccol >= 10) tv->my_mvprintw(tv,0, tv->ccol, "%-d", pos+1);
-    { // call consensus
-        bcf_callret1_t bcr;
-        memset(&bcr, 0, sizeof bcr);
-        int qsum[4], a1, a2, tmp;
-        double p[3], prior = 30;
-        bcf_call_glfgen(n, pl, seq_nt16_table[rb], tv->bca, &bcr);
-        for (i = 0; i < 4; ++i) qsum[i] = ((int)bcr.qsum[i])<<2 | i;
-        for (i = 1; i < 4; ++i) // insertion sort
-            for (j = i; j > 0 && qsum[j] > qsum[j-1]; --j)
-                tmp = qsum[j], qsum[j] = qsum[j-1], qsum[j-1] = tmp;
-        a1 = qsum[0]&3; a2 = qsum[1]&3;
-        p[0] = bcr.p[a1*5+a1]; p[1] = bcr.p[a1*5+a2] + prior; p[2] = bcr.p[a2*5+a2];
-        if ("ACGT"[a1] != toupper(rb)) p[0] += prior + 3;
-        if ("ACGT"[a2] != toupper(rb)) p[2] += prior + 3;
-        if (p[0] < p[1] && p[0] < p[2]) call = (1<<a1)<<16 | (int)((p[1]<p[2]?p[1]:p[2]) - p[0] + .499);
-        else if (p[2] < p[1] && p[2] < p[0]) call = (1<<a2)<<16 | (int)((p[0]<p[1]?p[0]:p[1]) - p[2] + .499);
-        else call = (1<<a1|1<<a2)<<16 | (int)((p[0]<p[2]?p[0]:p[2]) - p[1] + .499);
-    }
-    attr = tv->my_underline(tv);
-    c = ",ACMGRSVTWYHKDBN"[call>>16&0xf];
-    i = (call&0xffff)/10+1;
-    if (i > 4) i = 4;
-    attr |= tv->my_colorpair(tv,i);
-    if (c == toupper(rb)) c = '.';
-    tv->my_attron(tv,attr);
-    tv->my_mvaddch(tv,2, tv->ccol, c);
-    tv->my_attroff(tv,attr);
-    if(tv->ins) {
-        // calculate maximum insert
-        for (i = 0; i < n; ++i) {
-            const bam_pileup1_t *p = pl + i;
-            if (p->indel > 0 && max_ins < p->indel) max_ins = p->indel;
-        }
-    }
-    // core loop
-    for (j = 0; j <= max_ins; ++j) {
-        for (i = 0; i < n; ++i) {
-            const bam_pileup1_t *p = pl + i;
-            int row = TV_MIN_ALNROW + p->level - tv->row_shift;
-            if (j == 0) {
-                if (!p->is_del) {
-                    if (tv->base_for == TV_BASE_COLOR_SPACE &&
-                            (c = bam_aux_getCSi(p->b, p->qpos))) {
-                        // assume that if we found one color, we will be able to get the color error
-                        if (tv->is_dot && '-' == bam_aux_getCEi(p->b, p->qpos)) c = bam_is_rev(p->b)? ',' : '.';
-                    } else {
-                        if (tv->show_name) {
-                            char *name = bam_get_qname(p->b);
-                            c = (p->qpos + 1 >= p->b->core.l_qname)? ' ' : name[p->qpos];
-                        } else {
-                            c = seq_nt16_str[bam_seqi(bam_get_seq(p->b), p->qpos)];
-                            if (tv->is_dot && toupper(c) == toupper(rb)) c = bam_is_rev(p->b)? ',' : '.';
-                        }
-                    }
-                } else c = p->is_refskip? (bam_is_rev(p->b)? '<' : '>') : '*';
-            } else { // padding
-                if (j > p->indel) c = '*';
-                else { // insertion
-                    if (tv->base_for ==  TV_BASE_NUCL) {
-                        if (tv->show_name) {
-                            char *name = bam_get_qname(p->b);
-                            c = (p->qpos + j + 1 >= p->b->core.l_qname)? ' ' : name[p->qpos + j];
-                        } else {
-                            c = seq_nt16_str[bam_seqi(bam_get_seq(p->b), p->qpos + j)];
-                            if (j == 0 && tv->is_dot && toupper(c) == toupper(rb)) c = bam_is_rev(p->b)? ',' : '.';
-                        }
-                    } else {
-                        c = bam_aux_getCSi(p->b, p->qpos + j);
-                        if (tv->is_dot && '-' == bam_aux_getCEi(p->b, p->qpos + j)) c = bam_is_rev(p->b)? ',' : '.';
-                    }
-                }
-            }
-            if (row > TV_MIN_ALNROW && row < tv->mrow) {
-                int x;
-                attr = 0;
-                if (((p->b->core.flag&BAM_FPAIRED) && !(p->b->core.flag&BAM_FPROPER_PAIR))
-                        || (p->b->core.flag & BAM_FSECONDARY)) attr |= tv->my_underline(tv);
-                if (tv->color_for == TV_COLOR_BASEQ) {
-                    x = bam_get_qual(p->b)[p->qpos]/10 + 1;
-                    if (x > 4) x = 4;
-                    attr |= tv->my_colorpair(tv,x);
-                } else if (tv->color_for == TV_COLOR_MAPQ) {
-                    x = p->b->core.qual/10 + 1;
-                    if (x > 4) x = 4;
-                    attr |= tv->my_colorpair(tv,x);
-                } else if (tv->color_for == TV_COLOR_NUCL) {
-                    x = seq_nt16_int[bam_seqi(bam_get_seq(p->b), p->qpos)] + 5;
-                    attr |= tv->my_colorpair(tv,x);
-                } else if(tv->color_for == TV_COLOR_COL) {
-                    x = 0;
-                    switch(bam_aux_getCSi(p->b, p->qpos)) {
-                        case '0': x = 0; break;
-                        case '1': x = 1; break;
-                        case '2': x = 2; break;
-                        case '3': x = 3; break;
-                        case '4': x = 4; break;
-                        default: x = seq_nt16_int[bam_seqi(bam_get_seq(p->b), p->qpos)]; break;
-                    }
-                    x+=5;
-                    attr |= tv->my_colorpair(tv,x);
-                } else if(tv->color_for == TV_COLOR_COLQ) {
-                    x = bam_aux_getCQi(p->b, p->qpos);
-                    if(0 == x) x = bam_get_qual(p->b)[p->qpos];
-                    x = x/10 + 1;
-                    if (x > 4) x = 4;
-                    attr |= tv->my_colorpair(tv,x);
-                }
-                tv->my_attron(tv,attr);
-                tv->my_mvaddch(tv,row, tv->ccol, bam_is_rev(p->b)? tolower(c) : toupper(c));
-                tv->my_attroff(tv,attr);
-            }
-        }
-        c = j? '*' : rb;
-        if (c == '*') {
-            attr = tv->my_colorpair(tv,8);
-            tv->my_attron(tv,attr);
-            tv->my_mvaddch(tv,1, tv->ccol++, c);
-            tv->my_attroff(tv,attr);
-        } else tv->my_mvaddch(tv,1, tv->ccol++, c);
-    }
-    tv->last_pos = pos;
-    return 0;
-}
-
-
-
-
-static int tv_push_aln(const bam1_t *b, tview_t *tv)
-{
-    /* If we are restricted to specific readgroups check RG is in the list */
-    if ( tv->rg_hash )
-    {
-        const uint8_t *rg = bam_aux_get(b, "RG");
-        if ( !rg ) return 0; // If we don't have an RG tag exclude read
-        khiter_t k = kh_get(kh_rg, tv->rg_hash, (const char*)(rg + 1));
-        if ( k == kh_end(tv->rg_hash) ) return 0; // if RG tag is not in list of allowed tags exclude read
-    }
-    if (tv->no_skip) {
-        uint32_t *cigar = bam_get_cigar(b); // this is cheating...
-        int i;
-        for (i = 0; i <b->core.n_cigar; ++i) {
-            if ((cigar[i]&0xf) == BAM_CREF_SKIP)
-                cigar[i] = cigar[i]>>4<<4 | BAM_CDEL;
-        }
-    }
-    bam_lplbuf_push(b, tv->lplbuf);
-    return 0;
-}
-
-int base_draw_aln(tview_t *tv, int tid, int pos)
-{
-    assert(tv!=NULL);
-    // reset
-    tv->my_clear(tv);
-    tv->curr_tid = tid; tv->left_pos = pos;
-    tv->last_pos = tv->left_pos - 1;
-    tv->ccol = 0;
-    // print ref and consensus
-    if (tv->fai) {
-        char *str;
-        if (tv->ref) free(tv->ref);
-        assert(tv->curr_tid>=0);
-
-        str = (char*)calloc(strlen(tv->header->target_name[tv->curr_tid]) + 30, 1);
-        assert(str!=NULL);
-        sprintf(str, "%s:%d-%d", tv->header->target_name[tv->curr_tid], tv->left_pos + 1, tv->left_pos + tv->mcol);
-        tv->ref = fai_fetch(tv->fai, str, &tv->l_ref);
-        free(str);
-        if ( !tv->ref )
-        {
-            fprintf(stderr,"Could not read the reference sequence. Is it seekable (plain text or compressed + .gzi indexed with bgzip)?\n");
-            exit(1);
-        }
-    }
-    // draw aln
-    bam_lplbuf_reset(tv->lplbuf);
-    hts_itr_t *iter = sam_itr_queryi(tv->idx, tv->curr_tid, tv->left_pos, tv->left_pos + tv->mcol);
-    bam1_t *b = bam_init1();
-    while (sam_itr_next(tv->fp, iter, b) >= 0) tv_push_aln(b, tv);
-    bam_destroy1(b);
-    hts_itr_destroy(iter);
-    bam_lplbuf_push(0, tv->lplbuf);
-
-    while (tv->ccol < tv->mcol) {
-        int pos = tv->last_pos + 1;
-        if (pos%10 == 0 && tv->mcol - tv->ccol >= 10) tv->my_mvprintw(tv,0, tv->ccol, "%-d", pos+1);
-        tv->my_mvaddch(tv,1, tv->ccol++, (tv->ref && pos < tv->l_ref)? tv->ref[pos - tv->left_pos] : 'N');
-        ++tv->last_pos;
-    }
-    return 0;
-}
-
-
-
-
-static void error(const char *format, ...)
-{
-    if ( !format )
-    {
-        fprintf(stderr,
-"Usage: samtools tview [options] <aln.bam> [ref.fasta]\n"
-"Options:\n"
-"   -d display      output as (H)tml or (C)urses or (T)ext \n"
-"   -p chr:pos      go directly to this position\n"
-"   -s STR          display only reads from this sample or group\n");
-        sam_global_opt_help(stderr, "-.--.");
-    }
-    else
-    {
-        va_list ap;
-        va_start(ap, format);
-        vfprintf(stderr, format, ap);
-        va_end(ap);
-    }
-    exit(-1);
-}
-
-enum dipsay_mode {display_ncurses,display_html,display_text};
-extern tview_t* curses_tv_init(const char *fn, const char *fn_fa,
-                               const char *samples, const htsFormat *fmt);
-extern tview_t* html_tv_init(const char *fn, const char *fn_fa,
-                             const char *samples, const htsFormat *fmt);
-extern tview_t* text_tv_init(const char *fn, const char *fn_fa,
-                             const char *samples, const htsFormat *fmt);
-
-int bam_tview_main(int argc, char *argv[])
-{
-    int view_mode=display_ncurses;
-    tview_t* tv=NULL;
-    char *samples=NULL, *position=NULL, *ref;
-    int c;
-
-    sam_global_args ga = SAM_GLOBAL_ARGS_INIT;
-    static const struct option lopts[] = {
-        SAM_OPT_GLOBAL_OPTIONS('-', 0, '-', '-', 0),
-        { NULL, 0, NULL, 0 }
-    };
-
-    while ((c = getopt_long(argc, argv, "s:p:d:", lopts, NULL)) >= 0) {
-        switch (c) {
-            case 's': samples=optarg; break;
-            case 'p': position=optarg; break;
-            case 'd':
-            {
-                switch(optarg[0])
-                {
-                    case 'H': case 'h': view_mode=display_html;break;
-                    case 'T': case 't': view_mode=display_text;break;
-                    case 'C': case 'c': view_mode=display_ncurses;break;
-                    default: view_mode=display_ncurses;break;
-                }
-                break;
-            }
-            default:  if (parse_sam_global_opt(c, optarg, lopts, &ga) == 0) break;
-                      /* else fall-through */
-            case '?': error(NULL);
-        }
-    }
-    if (argc==optind) error(NULL);
-
-    ref = (optind+1>=argc)? ga.reference : argv[optind+1];
-
-    switch(view_mode)
-    {
-        case display_ncurses:
-            tv = curses_tv_init(argv[optind], ref, samples, &ga.in);
-            break;
-
-        case display_text:
-            tv = text_tv_init(argv[optind], ref, samples, &ga.in);
-            break;
-
-        case display_html:
-            tv = html_tv_init(argv[optind], ref, samples, &ga.in);
-            break;
-    }
-    if (tv==NULL)
-    {
-        error("cannot create view");
-        return EXIT_FAILURE;
-    }
-
-    if ( position )
-    {
-        int tid, beg, end;
-        char *name_lim = (char *) hts_parse_reg(position, &beg, &end);
-        if (name_lim) *name_lim = '\0';
-        else beg = 0; // region parsing failed, but possibly a seq named "foo:a"
-        tid = bam_name2id(tv->header, position);
-        if (tid >= 0) { tv->curr_tid = tid; tv->left_pos = beg; }
-    }
-    else if ( tv->fai )
-    {
-        // find the first sequence present in both BAM and the reference file
-        int i;
-        for (i=0; i<tv->header->n_targets; i++)
-        {
-            if ( faidx_has_seq(tv->fai, tv->header->target_name[i]) ) break;
-        }
-        if ( i==tv->header->n_targets )
-        {
-            fprintf(stderr,"None of the BAM sequence names present in the fasta file\n");
-            exit(EXIT_FAILURE);
-        }
-        tv->curr_tid = i;
-    }
-    tv->my_drawaln(tv, tv->curr_tid, tv->left_pos);
-    tv->my_loop(tv);
-    tv->my_destroy(tv);
-
-    return EXIT_SUCCESS;
-}
diff --git a/samtools/bam_tview.c.pysam.c b/samtools/bam_tview.c.pysam.c
deleted file mode 100644
index a47bced..0000000
--- a/samtools/bam_tview.c.pysam.c
+++ /dev/null
@@ -1,443 +0,0 @@
-#include "pysam.h"
-
-/*  bam_tview.c -- tview subcommand.
-
-    Copyright (C) 2008-2015 Genome Research Ltd.
-    Portions copyright (C) 2013 Pierre Lindenbaum, Institut du Thorax, INSERM U1087, Université de Nantes.
-
-    Author: Heng Li <lh3 at sanger.ac.uk>
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notices and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-DEALINGS IN THE SOFTWARE.  */
-
-#include <config.h>
-
-#include <regex.h>
-#include <assert.h>
-#include "bam_tview.h"
-#include <htslib/faidx.h>
-#include <htslib/sam.h>
-#include <htslib/bgzf.h>
-#include "sam_opts.h"
-
-khash_t(kh_rg)* get_rg_sample(const char* header, const char* sample)
-{
-    khash_t(kh_rg)* rg_hash = kh_init(kh_rg);
-    // given sample id return all the RD ID's
-    const char rg_regex[] = "^@RG.*\tID:([!-)+-<>-~][ !-~]*)(\t.*$|$)";
-
-    regex_t rg_id;
-    regmatch_t* matches = (regmatch_t*)calloc(2, sizeof(regmatch_t));
-    if (matches == NULL) { perror("out of memory"); exit(-1); }
-    regcomp(&rg_id, rg_regex, REG_EXTENDED|REG_NEWLINE);
-    char* text = strdup(header);
-    char* end = text + strlen(header);
-    char* tofree = text;
-    while (end > text && regexec(&rg_id, text, 2, matches, 0) == 0) { //    foreach rg id in  header
-        int ret;
-        text[matches[1].rm_eo] = '\0';
-        kh_put(kh_rg, rg_hash, strdup(text+matches[1].rm_so), &ret); // Add the RG to the list
-        text += matches[0].rm_eo + 1; // Move search pointer forward
-    }
-    free(tofree);
-    return rg_hash;
-}
-
-int base_tv_init(tview_t* tv, const char *fn, const char *fn_fa,
-                 const char *samples, const htsFormat *fmt)
-{
-    assert(tv!=NULL);
-    assert(fn!=NULL);
-    tv->mrow = 24; tv->mcol = 80;
-    tv->color_for = TV_COLOR_MAPQ;
-    tv->is_dot = 1;
-
-    tv->fp = sam_open_format(fn, "r", fmt);
-    if(tv->fp == NULL)
-    {
-        fprintf(pysam_stderr,"sam_open %s. %s\n", fn,fn_fa);
-        exit(EXIT_FAILURE);
-    }
-    // TODO bgzf_set_cache_size(tv->fp->fp.bgzf, 8 * 1024 *1024);
-    assert(tv->fp);
-
-    tv->header = sam_hdr_read(tv->fp);
-    if(tv->header == NULL)
-    {
-        fprintf(pysam_stderr,"Cannot read '%s'.\n", fn);
-        exit(EXIT_FAILURE);
-    }
-    tv->idx = sam_index_load(tv->fp, fn);
-    if (tv->idx == NULL)
-    {
-        fprintf(pysam_stderr,"Cannot read index for '%s'.\n", fn);
-        exit(EXIT_FAILURE);
-    }
-    tv->lplbuf = bam_lplbuf_init(tv_pl_func, tv);
-    if (fn_fa) tv->fai = fai_load(fn_fa);
-    tv->bca = bcf_call_init(0.83, 13);
-    tv->ins = 1;
-
-    // If the user has asked for specific samples find out create a list of readgroups make up these samples
-    if ( samples )
-    {
-        tv->rg_hash = get_rg_sample(tv->header->text, samples); // Init the list of rg's
-    }
-
-    return 0;
-}
-
-
-void base_tv_destroy(tview_t* tv)
-{
-    bam_lplbuf_destroy(tv->lplbuf);
-    bcf_call_destroy(tv->bca);
-    hts_idx_destroy(tv->idx);
-    if (tv->fai) fai_destroy(tv->fai);
-    free(tv->ref);
-    bam_hdr_destroy(tv->header);
-    sam_close(tv->fp);
-}
-
-
-int tv_pl_func(uint32_t tid, uint32_t pos, int n, const bam_pileup1_t *pl, void *data)
-{
-    tview_t *tv = (tview_t*)data;
-    int i, j, c, rb, attr, max_ins = 0;
-    uint32_t call = 0;
-    if (pos < tv->left_pos || tv->ccol > tv->mcol) return 0; // out of screen
-    // print reference
-    rb = (tv->ref && pos - tv->left_pos < tv->l_ref)? tv->ref[pos - tv->left_pos] : 'N';
-    for (i = tv->last_pos + 1; i < pos; ++i) {
-        if (i%10 == 0 && tv->mcol - tv->ccol >= 10) tv->my_mvprintw(tv,0, tv->ccol, "%-d", i+1);
-        c = tv->ref? tv->ref[i - tv->left_pos] : 'N';
-        tv->my_mvaddch(tv,1, tv->ccol++, c);
-    }
-    if (pos%10 == 0 && tv->mcol - tv->ccol >= 10) tv->my_mvprintw(tv,0, tv->ccol, "%-d", pos+1);
-    { // call consensus
-        bcf_callret1_t bcr;
-        memset(&bcr, 0, sizeof bcr);
-        int qsum[4], a1, a2, tmp;
-        double p[3], prior = 30;
-        bcf_call_glfgen(n, pl, seq_nt16_table[rb], tv->bca, &bcr);
-        for (i = 0; i < 4; ++i) qsum[i] = ((int)bcr.qsum[i])<<2 | i;
-        for (i = 1; i < 4; ++i) // insertion sort
-            for (j = i; j > 0 && qsum[j] > qsum[j-1]; --j)
-                tmp = qsum[j], qsum[j] = qsum[j-1], qsum[j-1] = tmp;
-        a1 = qsum[0]&3; a2 = qsum[1]&3;
-        p[0] = bcr.p[a1*5+a1]; p[1] = bcr.p[a1*5+a2] + prior; p[2] = bcr.p[a2*5+a2];
-        if ("ACGT"[a1] != toupper(rb)) p[0] += prior + 3;
-        if ("ACGT"[a2] != toupper(rb)) p[2] += prior + 3;
-        if (p[0] < p[1] && p[0] < p[2]) call = (1<<a1)<<16 | (int)((p[1]<p[2]?p[1]:p[2]) - p[0] + .499);
-        else if (p[2] < p[1] && p[2] < p[0]) call = (1<<a2)<<16 | (int)((p[0]<p[1]?p[0]:p[1]) - p[2] + .499);
-        else call = (1<<a1|1<<a2)<<16 | (int)((p[0]<p[2]?p[0]:p[2]) - p[1] + .499);
-    }
-    attr = tv->my_underline(tv);
-    c = ",ACMGRSVTWYHKDBN"[call>>16&0xf];
-    i = (call&0xffff)/10+1;
-    if (i > 4) i = 4;
-    attr |= tv->my_colorpair(tv,i);
-    if (c == toupper(rb)) c = '.';
-    tv->my_attron(tv,attr);
-    tv->my_mvaddch(tv,2, tv->ccol, c);
-    tv->my_attroff(tv,attr);
-    if(tv->ins) {
-        // calculate maximum insert
-        for (i = 0; i < n; ++i) {
-            const bam_pileup1_t *p = pl + i;
-            if (p->indel > 0 && max_ins < p->indel) max_ins = p->indel;
-        }
-    }
-    // core loop
-    for (j = 0; j <= max_ins; ++j) {
-        for (i = 0; i < n; ++i) {
-            const bam_pileup1_t *p = pl + i;
-            int row = TV_MIN_ALNROW + p->level - tv->row_shift;
-            if (j == 0) {
-                if (!p->is_del) {
-                    if (tv->base_for == TV_BASE_COLOR_SPACE &&
-                            (c = bam_aux_getCSi(p->b, p->qpos))) {
-                        // assume that if we found one color, we will be able to get the color error
-                        if (tv->is_dot && '-' == bam_aux_getCEi(p->b, p->qpos)) c = bam_is_rev(p->b)? ',' : '.';
-                    } else {
-                        if (tv->show_name) {
-                            char *name = bam_get_qname(p->b);
-                            c = (p->qpos + 1 >= p->b->core.l_qname)? ' ' : name[p->qpos];
-                        } else {
-                            c = seq_nt16_str[bam_seqi(bam_get_seq(p->b), p->qpos)];
-                            if (tv->is_dot && toupper(c) == toupper(rb)) c = bam_is_rev(p->b)? ',' : '.';
-                        }
-                    }
-                } else c = p->is_refskip? (bam_is_rev(p->b)? '<' : '>') : '*';
-            } else { // padding
-                if (j > p->indel) c = '*';
-                else { // insertion
-                    if (tv->base_for ==  TV_BASE_NUCL) {
-                        if (tv->show_name) {
-                            char *name = bam_get_qname(p->b);
-                            c = (p->qpos + j + 1 >= p->b->core.l_qname)? ' ' : name[p->qpos + j];
-                        } else {
-                            c = seq_nt16_str[bam_seqi(bam_get_seq(p->b), p->qpos + j)];
-                            if (j == 0 && tv->is_dot && toupper(c) == toupper(rb)) c = bam_is_rev(p->b)? ',' : '.';
-                        }
-                    } else {
-                        c = bam_aux_getCSi(p->b, p->qpos + j);
-                        if (tv->is_dot && '-' == bam_aux_getCEi(p->b, p->qpos + j)) c = bam_is_rev(p->b)? ',' : '.';
-                    }
-                }
-            }
-            if (row > TV_MIN_ALNROW && row < tv->mrow) {
-                int x;
-                attr = 0;
-                if (((p->b->core.flag&BAM_FPAIRED) && !(p->b->core.flag&BAM_FPROPER_PAIR))
-                        || (p->b->core.flag & BAM_FSECONDARY)) attr |= tv->my_underline(tv);
-                if (tv->color_for == TV_COLOR_BASEQ) {
-                    x = bam_get_qual(p->b)[p->qpos]/10 + 1;
-                    if (x > 4) x = 4;
-                    attr |= tv->my_colorpair(tv,x);
-                } else if (tv->color_for == TV_COLOR_MAPQ) {
-                    x = p->b->core.qual/10 + 1;
-                    if (x > 4) x = 4;
-                    attr |= tv->my_colorpair(tv,x);
-                } else if (tv->color_for == TV_COLOR_NUCL) {
-                    x = seq_nt16_int[bam_seqi(bam_get_seq(p->b), p->qpos)] + 5;
-                    attr |= tv->my_colorpair(tv,x);
-                } else if(tv->color_for == TV_COLOR_COL) {
-                    x = 0;
-                    switch(bam_aux_getCSi(p->b, p->qpos)) {
-                        case '0': x = 0; break;
-                        case '1': x = 1; break;
-                        case '2': x = 2; break;
-                        case '3': x = 3; break;
-                        case '4': x = 4; break;
-                        default: x = seq_nt16_int[bam_seqi(bam_get_seq(p->b), p->qpos)]; break;
-                    }
-                    x+=5;
-                    attr |= tv->my_colorpair(tv,x);
-                } else if(tv->color_for == TV_COLOR_COLQ) {
-                    x = bam_aux_getCQi(p->b, p->qpos);
-                    if(0 == x) x = bam_get_qual(p->b)[p->qpos];
-                    x = x/10 + 1;
-                    if (x > 4) x = 4;
-                    attr |= tv->my_colorpair(tv,x);
-                }
-                tv->my_attron(tv,attr);
-                tv->my_mvaddch(tv,row, tv->ccol, bam_is_rev(p->b)? tolower(c) : toupper(c));
-                tv->my_attroff(tv,attr);
-            }
-        }
-        c = j? '*' : rb;
-        if (c == '*') {
-            attr = tv->my_colorpair(tv,8);
-            tv->my_attron(tv,attr);
-            tv->my_mvaddch(tv,1, tv->ccol++, c);
-            tv->my_attroff(tv,attr);
-        } else tv->my_mvaddch(tv,1, tv->ccol++, c);
-    }
-    tv->last_pos = pos;
-    return 0;
-}
-
-
-
-
-static int tv_push_aln(const bam1_t *b, tview_t *tv)
-{
-    /* If we are restricted to specific readgroups check RG is in the list */
-    if ( tv->rg_hash )
-    {
-        const uint8_t *rg = bam_aux_get(b, "RG");
-        if ( !rg ) return 0; // If we don't have an RG tag exclude read
-        khiter_t k = kh_get(kh_rg, tv->rg_hash, (const char*)(rg + 1));
-        if ( k == kh_end(tv->rg_hash) ) return 0; // if RG tag is not in list of allowed tags exclude read
-    }
-    if (tv->no_skip) {
-        uint32_t *cigar = bam_get_cigar(b); // this is cheating...
-        int i;
-        for (i = 0; i <b->core.n_cigar; ++i) {
-            if ((cigar[i]&0xf) == BAM_CREF_SKIP)
-                cigar[i] = cigar[i]>>4<<4 | BAM_CDEL;
-        }
-    }
-    bam_lplbuf_push(b, tv->lplbuf);
-    return 0;
-}
-
-int base_draw_aln(tview_t *tv, int tid, int pos)
-{
-    assert(tv!=NULL);
-    // reset
-    tv->my_clear(tv);
-    tv->curr_tid = tid; tv->left_pos = pos;
-    tv->last_pos = tv->left_pos - 1;
-    tv->ccol = 0;
-    // print ref and consensus
-    if (tv->fai) {
-        char *str;
-        if (tv->ref) free(tv->ref);
-        assert(tv->curr_tid>=0);
-
-        str = (char*)calloc(strlen(tv->header->target_name[tv->curr_tid]) + 30, 1);
-        assert(str!=NULL);
-        sprintf(str, "%s:%d-%d", tv->header->target_name[tv->curr_tid], tv->left_pos + 1, tv->left_pos + tv->mcol);
-        tv->ref = fai_fetch(tv->fai, str, &tv->l_ref);
-        free(str);
-        if ( !tv->ref )
-        {
-            fprintf(pysam_stderr,"Could not read the reference sequence. Is it seekable (plain text or compressed + .gzi indexed with bgzip)?\n");
-            exit(1);
-        }
-    }
-    // draw aln
-    bam_lplbuf_reset(tv->lplbuf);
-    hts_itr_t *iter = sam_itr_queryi(tv->idx, tv->curr_tid, tv->left_pos, tv->left_pos + tv->mcol);
-    bam1_t *b = bam_init1();
-    while (sam_itr_next(tv->fp, iter, b) >= 0) tv_push_aln(b, tv);
-    bam_destroy1(b);
-    hts_itr_destroy(iter);
-    bam_lplbuf_push(0, tv->lplbuf);
-
-    while (tv->ccol < tv->mcol) {
-        int pos = tv->last_pos + 1;
-        if (pos%10 == 0 && tv->mcol - tv->ccol >= 10) tv->my_mvprintw(tv,0, tv->ccol, "%-d", pos+1);
-        tv->my_mvaddch(tv,1, tv->ccol++, (tv->ref && pos < tv->l_ref)? tv->ref[pos - tv->left_pos] : 'N');
-        ++tv->last_pos;
-    }
-    return 0;
-}
-
-
-
-
-static void error(const char *format, ...)
-{
-    if ( !format )
-    {
-        fprintf(pysam_stderr,
-"Usage: samtools tview [options] <aln.bam> [ref.fasta]\n"
-"Options:\n"
-"   -d display      output as (H)tml or (C)urses or (T)ext \n"
-"   -p chr:pos      go directly to this position\n"
-"   -s STR          display only reads from this sample or group\n");
-        sam_global_opt_help(pysam_stderr, "-.--.");
-    }
-    else
-    {
-        va_list ap;
-        va_start(ap, format);
-        vfprintf(pysam_stderr, format, ap);
-        va_end(ap);
-    }
-    exit(-1);
-}
-
-enum dipsay_mode {display_ncurses,display_html,display_text};
-extern tview_t* curses_tv_init(const char *fn, const char *fn_fa,
-                               const char *samples, const htsFormat *fmt);
-extern tview_t* html_tv_init(const char *fn, const char *fn_fa,
-                             const char *samples, const htsFormat *fmt);
-extern tview_t* text_tv_init(const char *fn, const char *fn_fa,
-                             const char *samples, const htsFormat *fmt);
-
-int bam_tview_main(int argc, char *argv[])
-{
-    int view_mode=display_ncurses;
-    tview_t* tv=NULL;
-    char *samples=NULL, *position=NULL, *ref;
-    int c;
-
-    sam_global_args ga = SAM_GLOBAL_ARGS_INIT;
-    static const struct option lopts[] = {
-        SAM_OPT_GLOBAL_OPTIONS('-', 0, '-', '-', 0),
-        { NULL, 0, NULL, 0 }
-    };
-
-    while ((c = getopt_long(argc, argv, "s:p:d:", lopts, NULL)) >= 0) {
-        switch (c) {
-            case 's': samples=optarg; break;
-            case 'p': position=optarg; break;
-            case 'd':
-            {
-                switch(optarg[0])
-                {
-                    case 'H': case 'h': view_mode=display_html;break;
-                    case 'T': case 't': view_mode=display_text;break;
-                    case 'C': case 'c': view_mode=display_ncurses;break;
-                    default: view_mode=display_ncurses;break;
-                }
-                break;
-            }
-            default:  if (parse_sam_global_opt(c, optarg, lopts, &ga) == 0) break;
-                      /* else fall-through */
-            case '?': error(NULL);
-        }
-    }
-    if (argc==optind) error(NULL);
-
-    ref = (optind+1>=argc)? ga.reference : argv[optind+1];
-
-    switch(view_mode)
-    {
-        case display_ncurses:
-            tv = curses_tv_init(argv[optind], ref, samples, &ga.in);
-            break;
-
-        case display_text:
-            tv = text_tv_init(argv[optind], ref, samples, &ga.in);
-            break;
-
-        case display_html:
-            tv = html_tv_init(argv[optind], ref, samples, &ga.in);
-            break;
-    }
-    if (tv==NULL)
-    {
-        error("cannot create view");
-        return EXIT_FAILURE;
-    }
-
-    if ( position )
-    {
-        int tid, beg, end;
-        char *name_lim = (char *) hts_parse_reg(position, &beg, &end);
-        if (name_lim) *name_lim = '\0';
-        else beg = 0; // region parsing failed, but possibly a seq named "foo:a"
-        tid = bam_name2id(tv->header, position);
-        if (tid >= 0) { tv->curr_tid = tid; tv->left_pos = beg; }
-    }
-    else if ( tv->fai )
-    {
-        // find the first sequence present in both BAM and the reference file
-        int i;
-        for (i=0; i<tv->header->n_targets; i++)
-        {
-            if ( faidx_has_seq(tv->fai, tv->header->target_name[i]) ) break;
-        }
-        if ( i==tv->header->n_targets )
-        {
-            fprintf(pysam_stderr,"None of the BAM sequence names present in the fasta file\n");
-            exit(EXIT_FAILURE);
-        }
-        tv->curr_tid = i;
-    }
-    tv->my_drawaln(tv, tv->curr_tid, tv->left_pos);
-    tv->my_loop(tv);
-    tv->my_destroy(tv);
-
-    return EXIT_SUCCESS;
-}
diff --git a/samtools/bam_tview.h b/samtools/bam_tview.h
deleted file mode 100644
index e11e39d..0000000
--- a/samtools/bam_tview.h
+++ /dev/null
@@ -1,105 +0,0 @@
-/*  bam_tview.h -- tview subcommand.
-
-    Copyright (C) 2008, 2013 Genome Research Ltd.
-    Portions copyright (C) 2013 Pierre Lindenbaum, Institut du Thorax, INSERM U1087, Université de Nantes.
-
-    Author: Heng Li <lh3 at sanger.ac.uk>
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notices and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-DEALINGS IN THE SOFTWARE.  */
-
-#ifndef BAM_TVIEW_H
-#define BAM_TVIEW_H
-
-#include <ctype.h>
-#include <string.h>
-#include <math.h>
-#include <unistd.h>
-#include <stdarg.h>
-#include <htslib/sam.h>
-#include "bam2bcf.h"
-#include <htslib/khash.h>
-#include <htslib/hts.h>
-#include <htslib/faidx.h>
-#include "bam_lpileup.h"
-
-
-KHASH_MAP_INIT_STR(kh_rg, const char *)
-
-/* Holds state of Tview */
-typedef struct AbstractTview {
-    int mrow, mcol;
-
-    hts_idx_t* idx;
-    bam_lplbuf_t* lplbuf;
-    bam_hdr_t* header;
-    samFile* fp;
-    int curr_tid, left_pos;
-    faidx_t* fai;
-    bcf_callaux_t* bca;
-
-    int ccol, last_pos, row_shift, base_for, color_for, is_dot, l_ref, ins;
-    int no_skip, show_name, inverse;
-    char *ref;
-    /* maps @RG ID => SM (sample), in practice only used to determine whether a particular RG is in the list of allowed ones */
-    khash_t(kh_rg) *rg_hash;
-    /* callbacks */
-    void (*my_destroy)(struct AbstractTview* );
-    void (*my_mvprintw)(struct AbstractTview* ,int,int,const char*,...);
-    void (*my_mvaddch)(struct AbstractTview*,int,int,int);
-    void (*my_attron)(struct AbstractTview*,int);
-    void (*my_attroff)(struct AbstractTview*,int);
-    void (*my_clear)(struct AbstractTview*);
-    int (*my_colorpair)(struct AbstractTview*,int);
-    int (*my_drawaln)(struct AbstractTview*,int,int);
-    int (*my_loop)(struct AbstractTview*);
-    int (*my_underline)(struct AbstractTview*);
-} tview_t;
-
-
-char bam_aux_getCEi(bam1_t *b, int i);
-char bam_aux_getCSi(bam1_t *b, int i);
-char bam_aux_getCQi(bam1_t *b, int i);
-
-#define TV_MIN_ALNROW 2
-#define TV_MAX_GOTO  40
-#define TV_LOW_MAPQ  10
-
-#define TV_COLOR_MAPQ   0
-#define TV_COLOR_BASEQ  1
-#define TV_COLOR_NUCL   2
-#define TV_COLOR_COL    3
-#define TV_COLOR_COLQ   4
-
-#define TV_BASE_NUCL 0
-#define TV_BASE_COLOR_SPACE 1
-
-int tv_pl_func(uint32_t tid, uint32_t pos, int n, const bam_pileup1_t *pl, void *data);
-int base_tv_init(tview_t*,const char *fn, const char *fn_fa,
-                 const char *samples, const htsFormat *fmt);
-void base_tv_destroy(tview_t*);
-int base_draw_aln(tview_t *tv, int tid, int pos);
-
-typedef struct Tixel
-    {
-    int ch;
-    int attributes;
-    }tixel_t;
-
-#endif
-
diff --git a/samtools/bam_tview_curses.c b/samtools/bam_tview_curses.c
deleted file mode 100644
index d7edfe8..0000000
--- a/samtools/bam_tview_curses.c
+++ /dev/null
@@ -1,352 +0,0 @@
-/*  bam_tview_curses.c -- curses tview implementation.
-
-    Copyright (C) 2008-2013 Genome Research Ltd.
-    Portions copyright (C) 2013 Pierre Lindenbaum, Institut du Thorax, INSERM U1087, Université de Nantes.
-
-    Author: Heng Li <lh3 at sanger.ac.uk>
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notices and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-DEALINGS IN THE SOFTWARE.  */
-
-#include <config.h>
-
-#include "bam_tview.h"
-
-#ifdef HAVE_CURSES
-
-#if defined HAVE_NCURSESW_CURSES_H
-#include <ncursesw/curses.h>
-#elif defined HAVE_NCURSESW_H
-#include <ncursesw.h>
-#elif defined HAVE_NCURSES_CURSES_H
-#include <ncurses/curses.h>
-#elif defined HAVE_NCURSES_H
-#include <ncurses.h>
-#elif defined HAVE_CURSES_H
-#include <curses.h>
-#endif
-
-typedef struct CursesTview {
-    tview_t view;
-    WINDOW *wgoto, *whelp;
-    } curses_tview_t;
-
-#define FROM_TV(ptr) ((curses_tview_t*)ptr)
-
-static void curses_destroy(tview_t* base)
-    {
-    curses_tview_t* tv=(curses_tview_t*)base;
-
-
-    delwin(tv->wgoto); delwin(tv->whelp);
-    endwin();
-
-    base_tv_destroy(base);
-
-    free(tv);
-    }
-
-/*
- void (*my_mvprintw)(struct AbstractTview* ,int,int,const char*,...);
-    void (*my_)(struct AbstractTview*,int,int,int);
-    void (*my_attron)(struct AbstractTview*,int);
-    void (*my_attroff)(struct AbstractTview*,int);
-    void (*my_clear)(struct AbstractTview*);
-    int (*my_colorpair)(struct AbstractTview*,int);
-*/
-
-static void curses_mvprintw(struct AbstractTview* tv,int y ,int x,const char* fmt,...)
-    {
-    unsigned int size=tv->mcol+2;
-    char* str=malloc(size);
-    if(str==0) exit(EXIT_FAILURE);
-    va_list argptr;
-    va_start(argptr, fmt);
-    vsnprintf(str,size, fmt, argptr);
-    va_end(argptr);
-    mvprintw(y,x,str);
-    free(str);
-    }
-
-static void curses_mvaddch(struct AbstractTview* tv,int y,int x,int ch)
-    {
-    mvaddch(y,x,ch);
-    }
-
-static void curses_attron(struct AbstractTview* tv,int flag)
-    {
-    attron(flag);
-    }
-static void curses_attroff(struct AbstractTview* tv,int flag)
-    {
-    attroff(flag);
-    }
-static void curses_clear(struct AbstractTview* tv)
-    {
-    clear();
-    }
-
-static int curses_init_colors(int inverse)
-{
-    if (inverse) {
-        init_pair(1, COLOR_WHITE, COLOR_BLUE);
-        init_pair(2, COLOR_BLACK, COLOR_GREEN);
-        init_pair(3, COLOR_BLACK, COLOR_YELLOW);
-        init_pair(4, COLOR_BLACK, COLOR_WHITE);
-        init_pair(5, COLOR_BLACK, COLOR_GREEN);
-        init_pair(6, COLOR_BLACK, COLOR_CYAN);
-        init_pair(7, COLOR_WHITE, COLOR_MAGENTA);
-        init_pair(8, COLOR_WHITE, COLOR_RED);
-        init_pair(9, COLOR_WHITE, COLOR_BLUE);
-    } else {
-        init_pair(1, COLOR_BLUE, COLOR_BLACK);
-        init_pair(2, COLOR_GREEN, COLOR_BLACK);
-        init_pair(3, COLOR_YELLOW, COLOR_BLACK);
-        init_pair(4, COLOR_WHITE, COLOR_BLACK);
-        init_pair(5, COLOR_GREEN, COLOR_BLACK);
-        init_pair(6, COLOR_CYAN, COLOR_BLACK);
-        init_pair(7, COLOR_MAGENTA, COLOR_BLACK);
-        init_pair(8, COLOR_RED, COLOR_BLACK);
-        init_pair(9, COLOR_BLUE, COLOR_BLACK);
-    }
-
-    return 0;
-}
-
-static int curses_colorpair(struct AbstractTview* tv,int flag)
-    {
-    return COLOR_PAIR(flag);
-    }
-
-static int curses_drawaln(struct AbstractTview* tv, int tid, int pos)
-    {
-    return base_draw_aln(tv,  tid, pos);
-    }
-
-
-
-static void tv_win_goto(curses_tview_t *tv, int *tid, int *pos)
-    {
-    char str[256], *p;
-    int i, l = 0;
-    tview_t *base=(tview_t*)tv;
-    wborder(tv->wgoto, '|', '|', '-', '-', '+', '+', '+', '+');
-    mvwprintw(tv->wgoto, 1, 2, "Goto: ");
-    for (;;) {
-        int invalid = 0;
-        int c = wgetch(tv->wgoto);
-        wrefresh(tv->wgoto);
-        if (c == KEY_BACKSPACE || c == '\010' || c == '\177') {
-            if(l > 0) --l;
-        } else if (c == KEY_ENTER || c == '\012' || c == '\015') {
-            int _tid = -1, _beg, _end;
-            if (str[0] == '=') {
-                _beg = strtol(str+1, &p, 10) - 1;
-                if (_beg > 0) {
-                    *pos = _beg;
-                    return;
-                }
-            } else {
-                char *name_lim = (char *) hts_parse_reg(str, &_beg, &_end);
-                if (name_lim) {
-                    char name_terminator = *name_lim;
-                    *name_lim = '\0';
-                    _tid = bam_name2id(base->header, str);
-                    *name_lim = name_terminator;
-                }
-                else {
-                    // Unparsable region, but possibly a sequence named "foo:a"
-                    _tid = bam_name2id(base->header, str);
-                    _beg = 0;
-                }
-
-                if (_tid >= 0) {
-                    *tid = _tid; *pos = _beg;
-                    return;
-                }
-            }
-
-            // If we get here, the region string is invalid
-            invalid = 1;
-        } else if (isgraph(c)) {
-            if (l < TV_MAX_GOTO) str[l++] = c;
-        } else if (c == '\027') l = 0;
-        else if (c == '\033') return;
-        str[l] = '\0';
-        for (i = 0; i < TV_MAX_GOTO; ++i) mvwaddch(tv->wgoto, 1, 8 + i, ' ');
-        if (invalid) mvwprintw(tv->wgoto, 1, TV_MAX_GOTO - 1, "[Invalid]");
-        mvwprintw(tv->wgoto, 1, 8, "%s", str);
-    }
-}
-
-
-
-
-static void tv_win_help(curses_tview_t *tv) {
-    int r = 1;
-    tview_t* base=(tview_t*)base;
-    WINDOW *win = tv->whelp;
-    wborder(win, '|', '|', '-', '-', '+', '+', '+', '+');
-    mvwprintw(win, r++, 2, "        -=-    Help    -=- ");
-    r++;
-    mvwprintw(win, r++, 2, "?          This window");
-    mvwprintw(win, r++, 2, "Arrows     Small scroll movement");
-    mvwprintw(win, r++, 2, "h,j,k,l    Small scroll movement");
-    mvwprintw(win, r++, 2, "H,J,K,L    Large scroll movement");
-    mvwprintw(win, r++, 2, "ctrl-H     Scroll 1k left");
-    mvwprintw(win, r++, 2, "ctrl-L     Scroll 1k right");
-    mvwprintw(win, r++, 2, "space      Scroll one screen");
-    mvwprintw(win, r++, 2, "backspace  Scroll back one screen");
-    mvwprintw(win, r++, 2, "g          Go to specific location");
-    mvwprintw(win, r++, 2, "m          Color for mapping qual");
-    mvwprintw(win, r++, 2, "n          Color for nucleotide");
-    mvwprintw(win, r++, 2, "b          Color for base quality");
-    mvwprintw(win, r++, 2, "c          Color for cs color");
-    mvwprintw(win, r++, 2, "z          Color for cs qual");
-    mvwprintw(win, r++, 2, ".          Toggle on/off dot view");
-    mvwprintw(win, r++, 2, "s          Toggle on/off ref skip");
-    mvwprintw(win, r++, 2, "r          Toggle on/off rd name");
-    mvwprintw(win, r++, 2, "N          Turn on nt view");
-    mvwprintw(win, r++, 2, "C          Turn on cs view");
-    mvwprintw(win, r++, 2, "i          Toggle on/off ins");
-    mvwprintw(win, r++, 2, "v          Inverse video");
-    mvwprintw(win, r++, 2, "q          Exit");
-    r++;
-    mvwprintw(win, r++, 2, "Underline:      Secondary or orphan");
-    mvwprintw(win, r++, 2, "Blue:    0-9    Green: 10-19");
-    mvwprintw(win, r++, 2, "Yellow: 20-29   White: >=30");
-    wrefresh(win);
-    wgetch(win);
-}
-
-static int curses_underline(tview_t* tv)
-    {
-    return A_UNDERLINE;
-    }
-
-static int curses_loop(tview_t* tv)
-    {
-    int tid, pos;
-    curses_tview_t *CTV=(curses_tview_t *)tv;
-    tid = tv->curr_tid; pos = tv->left_pos;
-    while (1) {
-        int c = getch();
-        switch (c) {
-            case '?': tv_win_help(CTV); break;
-            case '\033':
-            case 'q': goto end_loop;
-            case '/':
-            case 'g': tv_win_goto(CTV, &tid, &pos); break;
-            case 'm': tv->color_for = TV_COLOR_MAPQ; break;
-            case 'b': tv->color_for = TV_COLOR_BASEQ; break;
-            case 'n': tv->color_for = TV_COLOR_NUCL; break;
-            case 'c': tv->color_for = TV_COLOR_COL; break;
-            case 'z': tv->color_for = TV_COLOR_COLQ; break;
-            case 'v': curses_init_colors(tv->inverse = !tv->inverse); break;
-            case 's': tv->no_skip = !tv->no_skip; break;
-            case 'r': tv->show_name = !tv->show_name; break;
-            case KEY_LEFT:
-            case 'h': --pos; break;
-            case KEY_RIGHT:
-            case 'l': ++pos; break;
-            case KEY_SLEFT:
-            case 'H': pos -= 20; break;
-            case KEY_SRIGHT:
-            case 'L': pos += 20; break;
-            case '.': tv->is_dot = !tv->is_dot; break;
-            case 'N': tv->base_for = TV_BASE_NUCL; break;
-            case 'C': tv->base_for = TV_BASE_COLOR_SPACE; break;
-            case 'i': tv->ins = !tv->ins; break;
-            case '\010': pos -= 1000; break;
-            case '\014': pos += 1000; break;
-            case ' ': pos += tv->mcol; break;
-            case KEY_UP:
-            case 'j': --tv->row_shift; break;
-            case KEY_DOWN:
-            case 'k': ++tv->row_shift; break;
-            case KEY_BACKSPACE:
-            case '\177': pos -= tv->mcol; break;
-            case KEY_RESIZE: getmaxyx(stdscr, tv->mrow, tv->mcol); break;
-            default: continue;
-        }
-        if (pos < 0) pos = 0;
-        if (tv->row_shift < 0) tv->row_shift = 0;
-        tv->my_drawaln(tv, tid, pos);
-    }
-end_loop:
-    return 0;
-}
-
-
-
-
-tview_t* curses_tv_init(const char *fn, const char *fn_fa, const char *samples,
-                        const htsFormat *fmt)
-    {
-    curses_tview_t *tv = (curses_tview_t*)calloc(1, sizeof(curses_tview_t));
-    tview_t* base=(tview_t*)tv;
-    if(tv==0)
-        {
-        fprintf(stderr,"Calloc failed\n");
-        return 0;
-        }
-
-    base_tv_init(base,fn,fn_fa,samples,fmt);
-    /* initialize callbacks */
-#define SET_CALLBACK(fun) base->my_##fun=curses_##fun;
-    SET_CALLBACK(destroy);
-    SET_CALLBACK(mvprintw);
-    SET_CALLBACK(mvaddch);
-    SET_CALLBACK(attron);
-    SET_CALLBACK(attroff);
-    SET_CALLBACK(clear);
-    SET_CALLBACK(colorpair);
-    SET_CALLBACK(drawaln);
-    SET_CALLBACK(loop);
-    SET_CALLBACK(underline);
-#undef SET_CALLBACK
-
-    initscr();
-    keypad(stdscr, TRUE);
-    clear();
-    noecho();
-    cbreak();
-
-    getmaxyx(stdscr, base->mrow, base->mcol);
-    tv->wgoto = newwin(3, TV_MAX_GOTO + 10, 10, 5);
-    tv->whelp = newwin(30, 40, 5, 5);
-
-    start_color();
-    curses_init_colors(0);
-    return base;
-    }
-
-#else // !HAVE_CURSES
-
-#warning "No curses library is available; tview with curses is disabled."
-
-extern tview_t* text_tv_init(const char *fn, const char *fn_fa, const char *samples,
-                             const htsFormat *fmt);
-
-tview_t* curses_tv_init(const char *fn, const char *fn_fa, const char *samples,
-                        const htsFormat *fmt)
-    {
-    return text_tv_init(fn,fn_fa,samples,fmt);
-    }
-
-#endif
diff --git a/samtools/bam_tview_curses.c.pysam.c b/samtools/bam_tview_curses.c.pysam.c
deleted file mode 100644
index 90a8335..0000000
--- a/samtools/bam_tview_curses.c.pysam.c
+++ /dev/null
@@ -1,354 +0,0 @@
-#include "pysam.h"
-
-/*  bam_tview_curses.c -- curses tview implementation.
-
-    Copyright (C) 2008-2013 Genome Research Ltd.
-    Portions copyright (C) 2013 Pierre Lindenbaum, Institut du Thorax, INSERM U1087, Université de Nantes.
-
-    Author: Heng Li <lh3 at sanger.ac.uk>
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notices and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-DEALINGS IN THE SOFTWARE.  */
-
-#include <config.h>
-
-#include "bam_tview.h"
-
-#ifdef HAVE_CURSES
-
-#if defined HAVE_NCURSESW_CURSES_H
-#include <ncursesw/curses.h>
-#elif defined HAVE_NCURSESW_H
-#include <ncursesw.h>
-#elif defined HAVE_NCURSES_CURSES_H
-#include <ncurses/curses.h>
-#elif defined HAVE_NCURSES_H
-#include <ncurses.h>
-#elif defined HAVE_CURSES_H
-#include <curses.h>
-#endif
-
-typedef struct CursesTview {
-    tview_t view;
-    WINDOW *wgoto, *whelp;
-    } curses_tview_t;
-
-#define FROM_TV(ptr) ((curses_tview_t*)ptr)
-
-static void curses_destroy(tview_t* base)
-    {
-    curses_tview_t* tv=(curses_tview_t*)base;
-
-
-    delwin(tv->wgoto); delwin(tv->whelp);
-    endwin();
-
-    base_tv_destroy(base);
-
-    free(tv);
-    }
-
-/*
- void (*my_mvprintw)(struct AbstractTview* ,int,int,const char*,...);
-    void (*my_)(struct AbstractTview*,int,int,int);
-    void (*my_attron)(struct AbstractTview*,int);
-    void (*my_attroff)(struct AbstractTview*,int);
-    void (*my_clear)(struct AbstractTview*);
-    int (*my_colorpair)(struct AbstractTview*,int);
-*/
-
-static void curses_mvprintw(struct AbstractTview* tv,int y ,int x,const char* fmt,...)
-    {
-    unsigned int size=tv->mcol+2;
-    char* str=malloc(size);
-    if(str==0) exit(EXIT_FAILURE);
-    va_list argptr;
-    va_start(argptr, fmt);
-    vsnprintf(str,size, fmt, argptr);
-    va_end(argptr);
-    mvprintw(y,x,str);
-    free(str);
-    }
-
-static void curses_mvaddch(struct AbstractTview* tv,int y,int x,int ch)
-    {
-    mvaddch(y,x,ch);
-    }
-
-static void curses_attron(struct AbstractTview* tv,int flag)
-    {
-    attron(flag);
-    }
-static void curses_attroff(struct AbstractTview* tv,int flag)
-    {
-    attroff(flag);
-    }
-static void curses_clear(struct AbstractTview* tv)
-    {
-    clear();
-    }
-
-static int curses_init_colors(int inverse)
-{
-    if (inverse) {
-        init_pair(1, COLOR_WHITE, COLOR_BLUE);
-        init_pair(2, COLOR_BLACK, COLOR_GREEN);
-        init_pair(3, COLOR_BLACK, COLOR_YELLOW);
-        init_pair(4, COLOR_BLACK, COLOR_WHITE);
-        init_pair(5, COLOR_BLACK, COLOR_GREEN);
-        init_pair(6, COLOR_BLACK, COLOR_CYAN);
-        init_pair(7, COLOR_WHITE, COLOR_MAGENTA);
-        init_pair(8, COLOR_WHITE, COLOR_RED);
-        init_pair(9, COLOR_WHITE, COLOR_BLUE);
-    } else {
-        init_pair(1, COLOR_BLUE, COLOR_BLACK);
-        init_pair(2, COLOR_GREEN, COLOR_BLACK);
-        init_pair(3, COLOR_YELLOW, COLOR_BLACK);
-        init_pair(4, COLOR_WHITE, COLOR_BLACK);
-        init_pair(5, COLOR_GREEN, COLOR_BLACK);
-        init_pair(6, COLOR_CYAN, COLOR_BLACK);
-        init_pair(7, COLOR_MAGENTA, COLOR_BLACK);
-        init_pair(8, COLOR_RED, COLOR_BLACK);
-        init_pair(9, COLOR_BLUE, COLOR_BLACK);
-    }
-
-    return 0;
-}
-
-static int curses_colorpair(struct AbstractTview* tv,int flag)
-    {
-    return COLOR_PAIR(flag);
-    }
-
-static int curses_drawaln(struct AbstractTview* tv, int tid, int pos)
-    {
-    return base_draw_aln(tv,  tid, pos);
-    }
-
-
-
-static void tv_win_goto(curses_tview_t *tv, int *tid, int *pos)
-    {
-    char str[256], *p;
-    int i, l = 0;
-    tview_t *base=(tview_t*)tv;
-    wborder(tv->wgoto, '|', '|', '-', '-', '+', '+', '+', '+');
-    mvwprintw(tv->wgoto, 1, 2, "Goto: ");
-    for (;;) {
-        int invalid = 0;
-        int c = wgetch(tv->wgoto);
-        wrefresh(tv->wgoto);
-        if (c == KEY_BACKSPACE || c == '\010' || c == '\177') {
-            if(l > 0) --l;
-        } else if (c == KEY_ENTER || c == '\012' || c == '\015') {
-            int _tid = -1, _beg, _end;
-            if (str[0] == '=') {
-                _beg = strtol(str+1, &p, 10) - 1;
-                if (_beg > 0) {
-                    *pos = _beg;
-                    return;
-                }
-            } else {
-                char *name_lim = (char *) hts_parse_reg(str, &_beg, &_end);
-                if (name_lim) {
-                    char name_terminator = *name_lim;
-                    *name_lim = '\0';
-                    _tid = bam_name2id(base->header, str);
-                    *name_lim = name_terminator;
-                }
-                else {
-                    // Unparsable region, but possibly a sequence named "foo:a"
-                    _tid = bam_name2id(base->header, str);
-                    _beg = 0;
-                }
-
-                if (_tid >= 0) {
-                    *tid = _tid; *pos = _beg;
-                    return;
-                }
-            }
-
-            // If we get here, the region string is invalid
-            invalid = 1;
-        } else if (isgraph(c)) {
-            if (l < TV_MAX_GOTO) str[l++] = c;
-        } else if (c == '\027') l = 0;
-        else if (c == '\033') return;
-        str[l] = '\0';
-        for (i = 0; i < TV_MAX_GOTO; ++i) mvwaddch(tv->wgoto, 1, 8 + i, ' ');
-        if (invalid) mvwprintw(tv->wgoto, 1, TV_MAX_GOTO - 1, "[Invalid]");
-        mvwprintw(tv->wgoto, 1, 8, "%s", str);
-    }
-}
-
-
-
-
-static void tv_win_help(curses_tview_t *tv) {
-    int r = 1;
-    tview_t* base=(tview_t*)base;
-    WINDOW *win = tv->whelp;
-    wborder(win, '|', '|', '-', '-', '+', '+', '+', '+');
-    mvwprintw(win, r++, 2, "        -=-    Help    -=- ");
-    r++;
-    mvwprintw(win, r++, 2, "?          This window");
-    mvwprintw(win, r++, 2, "Arrows     Small scroll movement");
-    mvwprintw(win, r++, 2, "h,j,k,l    Small scroll movement");
-    mvwprintw(win, r++, 2, "H,J,K,L    Large scroll movement");
-    mvwprintw(win, r++, 2, "ctrl-H     Scroll 1k left");
-    mvwprintw(win, r++, 2, "ctrl-L     Scroll 1k right");
-    mvwprintw(win, r++, 2, "space      Scroll one screen");
-    mvwprintw(win, r++, 2, "backspace  Scroll back one screen");
-    mvwprintw(win, r++, 2, "g          Go to specific location");
-    mvwprintw(win, r++, 2, "m          Color for mapping qual");
-    mvwprintw(win, r++, 2, "n          Color for nucleotide");
-    mvwprintw(win, r++, 2, "b          Color for base quality");
-    mvwprintw(win, r++, 2, "c          Color for cs color");
-    mvwprintw(win, r++, 2, "z          Color for cs qual");
-    mvwprintw(win, r++, 2, ".          Toggle on/off dot view");
-    mvwprintw(win, r++, 2, "s          Toggle on/off ref skip");
-    mvwprintw(win, r++, 2, "r          Toggle on/off rd name");
-    mvwprintw(win, r++, 2, "N          Turn on nt view");
-    mvwprintw(win, r++, 2, "C          Turn on cs view");
-    mvwprintw(win, r++, 2, "i          Toggle on/off ins");
-    mvwprintw(win, r++, 2, "v          Inverse video");
-    mvwprintw(win, r++, 2, "q          Exit");
-    r++;
-    mvwprintw(win, r++, 2, "Underline:      Secondary or orphan");
-    mvwprintw(win, r++, 2, "Blue:    0-9    Green: 10-19");
-    mvwprintw(win, r++, 2, "Yellow: 20-29   White: >=30");
-    wrefresh(win);
-    wgetch(win);
-}
-
-static int curses_underline(tview_t* tv)
-    {
-    return A_UNDERLINE;
-    }
-
-static int curses_loop(tview_t* tv)
-    {
-    int tid, pos;
-    curses_tview_t *CTV=(curses_tview_t *)tv;
-    tid = tv->curr_tid; pos = tv->left_pos;
-    while (1) {
-        int c = getch();
-        switch (c) {
-            case '?': tv_win_help(CTV); break;
-            case '\033':
-            case 'q': goto end_loop;
-            case '/':
-            case 'g': tv_win_goto(CTV, &tid, &pos); break;
-            case 'm': tv->color_for = TV_COLOR_MAPQ; break;
-            case 'b': tv->color_for = TV_COLOR_BASEQ; break;
-            case 'n': tv->color_for = TV_COLOR_NUCL; break;
-            case 'c': tv->color_for = TV_COLOR_COL; break;
-            case 'z': tv->color_for = TV_COLOR_COLQ; break;
-            case 'v': curses_init_colors(tv->inverse = !tv->inverse); break;
-            case 's': tv->no_skip = !tv->no_skip; break;
-            case 'r': tv->show_name = !tv->show_name; break;
-            case KEY_LEFT:
-            case 'h': --pos; break;
-            case KEY_RIGHT:
-            case 'l': ++pos; break;
-            case KEY_SLEFT:
-            case 'H': pos -= 20; break;
-            case KEY_SRIGHT:
-            case 'L': pos += 20; break;
-            case '.': tv->is_dot = !tv->is_dot; break;
-            case 'N': tv->base_for = TV_BASE_NUCL; break;
-            case 'C': tv->base_for = TV_BASE_COLOR_SPACE; break;
-            case 'i': tv->ins = !tv->ins; break;
-            case '\010': pos -= 1000; break;
-            case '\014': pos += 1000; break;
-            case ' ': pos += tv->mcol; break;
-            case KEY_UP:
-            case 'j': --tv->row_shift; break;
-            case KEY_DOWN:
-            case 'k': ++tv->row_shift; break;
-            case KEY_BACKSPACE:
-            case '\177': pos -= tv->mcol; break;
-            case KEY_RESIZE: getmaxyx(stdscr, tv->mrow, tv->mcol); break;
-            default: continue;
-        }
-        if (pos < 0) pos = 0;
-        if (tv->row_shift < 0) tv->row_shift = 0;
-        tv->my_drawaln(tv, tid, pos);
-    }
-end_loop:
-    return 0;
-}
-
-
-
-
-tview_t* curses_tv_init(const char *fn, const char *fn_fa, const char *samples,
-                        const htsFormat *fmt)
-    {
-    curses_tview_t *tv = (curses_tview_t*)calloc(1, sizeof(curses_tview_t));
-    tview_t* base=(tview_t*)tv;
-    if(tv==0)
-        {
-        fprintf(pysam_stderr,"Calloc failed\n");
-        return 0;
-        }
-
-    base_tv_init(base,fn,fn_fa,samples,fmt);
-    /* initialize callbacks */
-#define SET_CALLBACK(fun) base->my_##fun=curses_##fun;
-    SET_CALLBACK(destroy);
-    SET_CALLBACK(mvprintw);
-    SET_CALLBACK(mvaddch);
-    SET_CALLBACK(attron);
-    SET_CALLBACK(attroff);
-    SET_CALLBACK(clear);
-    SET_CALLBACK(colorpair);
-    SET_CALLBACK(drawaln);
-    SET_CALLBACK(loop);
-    SET_CALLBACK(underline);
-#undef SET_CALLBACK
-
-    initscr();
-    keypad(stdscr, TRUE);
-    clear();
-    noecho();
-    cbreak();
-
-    getmaxyx(stdscr, base->mrow, base->mcol);
-    tv->wgoto = newwin(3, TV_MAX_GOTO + 10, 10, 5);
-    tv->whelp = newwin(30, 40, 5, 5);
-
-    start_color();
-    curses_init_colors(0);
-    return base;
-    }
-
-#else // !HAVE_CURSES
-
-#warning "No curses library is available; tview with curses is disabled."
-
-extern tview_t* text_tv_init(const char *fn, const char *fn_fa, const char *samples,
-                             const htsFormat *fmt);
-
-tview_t* curses_tv_init(const char *fn, const char *fn_fa, const char *samples,
-                        const htsFormat *fmt)
-    {
-    return text_tv_init(fn,fn_fa,samples,fmt);
-    }
-
-#endif
diff --git a/samtools/bam_tview_html.c b/samtools/bam_tview_html.c
deleted file mode 100644
index e3aecda..0000000
--- a/samtools/bam_tview_html.c
+++ /dev/null
@@ -1,377 +0,0 @@
-/*  bam_tview_html.c -- HTML tview output.
-
-    Copyright (C) 2013 Pierre Lindenbaum, Institut du Thorax, INSERM U1087, Université de Nantes.
-
-    Author: Pierre Lindenbaum <plindenbaum at yahoo.fr>
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-DEALINGS IN THE SOFTWARE.  */
-
-#include <config.h>
-
-#include <unistd.h>
-#include "bam_tview.h"
-
-#define UNDERLINE_FLAG 10
-
-typedef struct HtmlTview {
-    tview_t view;
-    int row_count;
-    tixel_t** screen;
-    FILE* out;
-    int attributes;/* color... */
-    } html_tview_t;
-
-#define FROM_TV(ptr) ((html_tview_t*)ptr)
-
-static void html_destroy(tview_t* base)
-    {
-    int i;
-    html_tview_t* tv=(html_tview_t*)base;
-    if(tv->screen!=NULL)
-        {
-        for(i=0;i< tv->row_count;++i) free(tv->screen[i]);
-        free(tv->screen);
-        }
-    base_tv_destroy(base);
-    free(tv);
-    }
-
-/*
- void (*my_mvprintw)(struct AbstractTview* ,int,int,const char*,...);
-    void (*my_)(struct AbstractTview*,int,int,int);
-    void (*my_attron)(struct AbstractTview*,int);
-    void (*my_attroff)(struct AbstractTview*,int);
-    void (*my_clear)(struct AbstractTview*);
-    int (*my_colorpair)(struct AbstractTview*,int);
-*/
-
-static void html_mvprintw(struct AbstractTview* tv,int y ,int x,const char* fmt,...)
-    {
-    int i,nchars=0;
-    unsigned int size=tv->mcol+2;
-    char* str=malloc(size);
-    if(str==0) exit(EXIT_FAILURE);
-    va_list argptr;
-    va_start(argptr, fmt);
-    nchars=vsnprintf(str,size, fmt, argptr);
-    va_end(argptr);
-
-    for(i=0;i< nchars;++i)
-        {
-        tv->my_mvaddch(tv,y,x+i,str[i]);
-        }
-    free(str);
-    }
-
-static void html_mvaddch(struct AbstractTview* tv,int y,int x,int ch)
-    {
-    tixel_t* row=NULL;
-    html_tview_t* ptr=FROM_TV(tv);
-    if( x >= tv->mcol ) return; //out of screen
-    while(ptr->row_count<=y)
-        {
-        int x;
-        row=(tixel_t*)calloc(tv->mcol,sizeof(tixel_t));
-        if(row==0)  exit(EXIT_FAILURE);
-        for(x=0;x<tv->mcol;++x) {row[x].ch=' ';row[x].attributes=0;}
-        ptr->screen=(tixel_t**)realloc(ptr->screen,sizeof(tixel_t*)*(ptr->row_count+1));
-        ptr->screen[ptr->row_count++]=row;
-        }
-    row=ptr->screen[y];
-    row[x].ch=ch;
-    row[x].attributes=ptr->attributes;
-    }
-
-static void html_attron(struct AbstractTview* tv,int flag)
-    {
-    html_tview_t* ptr=FROM_TV(tv);
-    ptr->attributes |=  flag;
-
-
-    }
-
-static void html_attroff(struct AbstractTview* tv,int flag)
-    {
-    html_tview_t* ptr=FROM_TV(tv);
-    ptr->attributes &= ~(flag);
-    }
-
-static void html_clear(struct AbstractTview* tv)
-    {
-    html_tview_t* ptr=FROM_TV(tv);
-    if(ptr->screen!=NULL)
-    {
-    int i;
-    for(i=0;i< ptr->row_count;++i) free(ptr->screen[i]);
-    free(ptr->screen);
-    ptr->screen=NULL;
-    }
-    ptr->row_count=0;
-    ptr->attributes=0;
-    }
-
-static int html_colorpair(struct AbstractTview* tv,int flag)
-    {
-    return (1 << (flag));
-    }
-
-static int html_drawaln(struct AbstractTview* tv, int tid, int pos)
-    {
-    int y,x;
-    html_tview_t* ptr=FROM_TV(tv);
-    html_clear(tv);
-    base_draw_aln(tv,  tid, pos);
-    fputs("<html><head>",ptr->out);
-    fprintf(ptr->out,"<title>%s:%d</title>",
-        tv->header->target_name[tid],
-        pos+1
-        );
-    //style
-
-    fputs("<style type='text/css'>\n",ptr->out);
-    fputs(".tviewbody { margin:5px; background-color:white;text-align:center;}\n",ptr->out);
-    fputs(".tviewtitle {text-align:center;}\n",ptr->out);
-    fputs(".tviewpre { margin:5px; background-color:white;}\n",ptr->out);
-    #define CSS(id,col) fprintf(ptr->out,".tviewc%d {color:%s;}\n.tviewcu%d {color:%s;text-decoration:underline;}\n",id,col,id,col);
-        CSS(0, "black");
-        CSS(1, "blue");
-    CSS(2, "green");
-    CSS(3, "yellow");
-    CSS(4, "black");
-    CSS(5, "green");
-    CSS(6, "cyan");
-    CSS(7, "yellow");
-    CSS(8, "red");
-    CSS(9, "blue");
-    #undef CSS
-    fputs("</style>",ptr->out);
-
-    fputs("</head><body>",ptr->out);
-
-      fprintf(ptr->out,"<div class='tviewbody'><div class='tviewtitle'>%s:%d</div>",
-        tv->header->target_name[tid],
-        pos+1
-        );
-
-    fputs("<pre class='tviewpre'>",ptr->out);
-    for(y=0;y< ptr->row_count;++y)
-        {
-
-        for(x=0;x< tv->mcol;++x)
-            {
-
-
-        if(x== 0 || ptr->screen[y][x].attributes != ptr->screen[y][x-1].attributes)
-                {
-                int css=0;
-            fprintf(ptr->out,"<span");
-                while(css<32)
-                    {
-                    //if(y>1) fprintf(stderr,"css=%d pow2=%d vs %d\n",css,(1 << (css)),ptr->screen[y][x].attributes);
-                    if(( (ptr->screen[y][x].attributes) & (1 << (css)))!=0)
-                        {
-
-                        fprintf(ptr->out," class='tviewc%s%d'",
-                            (( (ptr->screen[y][x].attributes) & (1 << (UNDERLINE_FLAG)) )!=0?"u":""),
-                            css);
-                        break;
-                        }
-                    ++css;
-                    }
-
-
-                fputs(">",ptr->out);
-                }
-
-        int ch=ptr->screen[y][x].ch;
-        switch(ch)
-            {
-            case '<': fputs("<",ptr->out);break;
-            case '>': fputs(">",ptr->out);break;
-            case '&': fputs("&",ptr->out);break;
-            default: fputc(ch,ptr->out); break;
-            }
-
-
-            if(x+1 == tv->mcol  || ptr->screen[y][x].attributes!=ptr->screen[y][x+1].attributes)
-                {
-                fputs("</span>",ptr->out);
-                }
-            }
-        if(y+1 < ptr->row_count) fputs("<br/>",ptr->out);
-        }
-    fputs("</pre></div></body></html>",ptr->out);
-    return 0;
-    }
-
-
-#define ANSI_COLOR_RED "\x1b[31m"
-#define ANSI_COLOR_GREEN "\x1b[32m"
-#define ANSI_COLOR_YELLOW "\x1b[33m"
-#define ANSI_COLOR_BLUE "\x1b[34m"
-#define ANSI_COLOR_MAGENTA "\x1b[35m"
-#define ANSI_COLOR_CYAN "\x1b[36m"
-#define ANSI_COLOR_BLACK "\x1b[0m"
-#define ANSI_COLOR_RESET ANSI_COLOR_BLACK
-
-#define ANSI_UNDERLINE_SET "\033[4m"
-#define ANSI_UNDERLINE_UNSET "\033[0m"
-
-static int text_drawaln(struct AbstractTview* tv, int tid, int pos)
-    {
-    int y,x;
-    html_tview_t* ptr=FROM_TV(tv);
-    html_clear(tv);
-    base_draw_aln(tv,  tid, pos);
-    int is_term= isatty(fileno(ptr->out));
-
-    for(y=0;y< ptr->row_count;++y)
-        {
-        for(x=0;x< tv->mcol;++x)
-            {
-            if(is_term)
-                {
-                int css=0;
-                while(css<32)
-                    {
-                    if(( (ptr->screen[y][x].attributes) & (1 << (css)))!=0)
-                        {
-                        break;
-                        }
-                    ++css;
-                    }
-                switch(css)
-                    {
-                    //CSS(0, "black");
-                    case 1: fputs(ANSI_COLOR_BLUE,ptr->out); break;
-                case 2: fputs(ANSI_COLOR_GREEN,ptr->out); break;
-                case 3: fputs(ANSI_COLOR_YELLOW,ptr->out); break;
-                //CSS(4, "black");
-                case 5: fputs(ANSI_COLOR_GREEN,ptr->out); break;
-                case 6: fputs(ANSI_COLOR_CYAN,ptr->out); break;
-                case 7: fputs(ANSI_COLOR_YELLOW,ptr->out); break;
-                case 8: fputs(ANSI_COLOR_RED,ptr->out); break;
-                case 9: fputs(ANSI_COLOR_BLUE,ptr->out); break;
-                default:break;
-                    }
-                if(( (ptr->screen[y][x].attributes) & (1 << (UNDERLINE_FLAG)))!=0)
-                    {
-                    fputs(ANSI_UNDERLINE_SET,ptr->out);
-                    }
-
-                }
-
-
-            int ch=ptr->screen[y][x].ch;
-
-            fputc(ch,ptr->out);
-            if(is_term)
-                {
-                fputs(ANSI_COLOR_RESET,ptr->out);
-                if(( (ptr->screen[y][x].attributes) & (1 << (UNDERLINE_FLAG)))!=0)
-                    {
-                    fputs(ANSI_UNDERLINE_UNSET,ptr->out);
-                    }
-                }
-            }
-        fputc('\n',ptr->out);
-        }
-    return 0;
-    }
-
-
-static int html_loop(tview_t* tv)
-    {
-    //tv->my_drawaln(tv, tv->curr_tid, tv->left_pos);
-    return 0;
-    }
-
-static int html_underline(tview_t* tv)
-    {
-    return (1 << UNDERLINE_FLAG);
-    }
-
-/*
-static void init_pair(html_tview_t *tv,int id_ge_1, const char* pen, const char* paper)
-    {
-
-    }
-*/
-
-tview_t* html_tv_init(const char *fn, const char *fn_fa, const char *samples,
-                      const htsFormat *fmt)
-    {
-    char* colstr=getenv("COLUMNS");
-    html_tview_t *tv = (html_tview_t*)calloc(1, sizeof(html_tview_t));
-    tview_t* base=(tview_t*)tv;
-    if(tv==0)
-        {
-        fprintf(stderr,"Calloc failed\n");
-        return 0;
-        }
-    tv->row_count=0;
-    tv->screen=NULL;
-    tv->out=stdout;
-    tv->attributes=0;
-    base_tv_init(base,fn,fn_fa,samples,fmt);
-    /* initialize callbacks */
-#define SET_CALLBACK(fun) base->my_##fun=html_##fun;
-    SET_CALLBACK(destroy);
-    SET_CALLBACK(mvprintw);
-    SET_CALLBACK(mvaddch);
-    SET_CALLBACK(attron);
-    SET_CALLBACK(attroff);
-    SET_CALLBACK(clear);
-    SET_CALLBACK(colorpair);
-    SET_CALLBACK(drawaln);
-    SET_CALLBACK(loop);
-    SET_CALLBACK(underline);
-#undef SET_CALLBACK
-
-
-    if(colstr!=0)
-        {
-        base->mcol=atoi(colstr);
-        if(base->mcol<10) base->mcol=80;
-        }
-    base->mrow=99999;
-
-/*
-    init_pair(tv,1, "blue", "white");
-    init_pair(tv,2, "green", "white");
-    init_pair(tv,3, "yellow", "white");
-    init_pair(tv,4, "white", "white");
-    init_pair(tv,5, "green", "white");
-    init_pair(tv,6, "cyan", "white");
-    init_pair(tv,7, "yellow", "white");
-    init_pair(tv,8, "red", "white");
-    init_pair(tv,9, "blue", "white");
-    */
-    return base;
-    }
-
-
-tview_t* text_tv_init(const char *fn, const char *fn_fa, const char *samples,
-                      const htsFormat *fmt)
-    {
-    tview_t* tv=html_tv_init(fn,fn_fa,samples,fmt);
-    tv->my_drawaln=text_drawaln;
-    return tv;
-    }
-
diff --git a/samtools/bam_tview_html.c.pysam.c b/samtools/bam_tview_html.c.pysam.c
deleted file mode 100644
index 164e33d..0000000
--- a/samtools/bam_tview_html.c.pysam.c
+++ /dev/null
@@ -1,379 +0,0 @@
-#include "pysam.h"
-
-/*  bam_tview_html.c -- HTML tview output.
-
-    Copyright (C) 2013 Pierre Lindenbaum, Institut du Thorax, INSERM U1087, Université de Nantes.
-
-    Author: Pierre Lindenbaum <plindenbaum at yahoo.fr>
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-DEALINGS IN THE SOFTWARE.  */
-
-#include <config.h>
-
-#include <unistd.h>
-#include "bam_tview.h"
-
-#define UNDERLINE_FLAG 10
-
-typedef struct HtmlTview {
-    tview_t view;
-    int row_count;
-    tixel_t** screen;
-    FILE* out;
-    int attributes;/* color... */
-    } html_tview_t;
-
-#define FROM_TV(ptr) ((html_tview_t*)ptr)
-
-static void html_destroy(tview_t* base)
-    {
-    int i;
-    html_tview_t* tv=(html_tview_t*)base;
-    if(tv->screen!=NULL)
-        {
-        for(i=0;i< tv->row_count;++i) free(tv->screen[i]);
-        free(tv->screen);
-        }
-    base_tv_destroy(base);
-    free(tv);
-    }
-
-/*
- void (*my_mvprintw)(struct AbstractTview* ,int,int,const char*,...);
-    void (*my_)(struct AbstractTview*,int,int,int);
-    void (*my_attron)(struct AbstractTview*,int);
-    void (*my_attroff)(struct AbstractTview*,int);
-    void (*my_clear)(struct AbstractTview*);
-    int (*my_colorpair)(struct AbstractTview*,int);
-*/
-
-static void html_mvprintw(struct AbstractTview* tv,int y ,int x,const char* fmt,...)
-    {
-    int i,nchars=0;
-    unsigned int size=tv->mcol+2;
-    char* str=malloc(size);
-    if(str==0) exit(EXIT_FAILURE);
-    va_list argptr;
-    va_start(argptr, fmt);
-    nchars=vsnprintf(str,size, fmt, argptr);
-    va_end(argptr);
-
-    for(i=0;i< nchars;++i)
-        {
-        tv->my_mvaddch(tv,y,x+i,str[i]);
-        }
-    free(str);
-    }
-
-static void html_mvaddch(struct AbstractTview* tv,int y,int x,int ch)
-    {
-    tixel_t* row=NULL;
-    html_tview_t* ptr=FROM_TV(tv);
-    if( x >= tv->mcol ) return; //out of screen
-    while(ptr->row_count<=y)
-        {
-        int x;
-        row=(tixel_t*)calloc(tv->mcol,sizeof(tixel_t));
-        if(row==0)  exit(EXIT_FAILURE);
-        for(x=0;x<tv->mcol;++x) {row[x].ch=' ';row[x].attributes=0;}
-        ptr->screen=(tixel_t**)realloc(ptr->screen,sizeof(tixel_t*)*(ptr->row_count+1));
-        ptr->screen[ptr->row_count++]=row;
-        }
-    row=ptr->screen[y];
-    row[x].ch=ch;
-    row[x].attributes=ptr->attributes;
-    }
-
-static void html_attron(struct AbstractTview* tv,int flag)
-    {
-    html_tview_t* ptr=FROM_TV(tv);
-    ptr->attributes |=  flag;
-
-
-    }
-
-static void html_attroff(struct AbstractTview* tv,int flag)
-    {
-    html_tview_t* ptr=FROM_TV(tv);
-    ptr->attributes &= ~(flag);
-    }
-
-static void html_clear(struct AbstractTview* tv)
-    {
-    html_tview_t* ptr=FROM_TV(tv);
-    if(ptr->screen!=NULL)
-    {
-    int i;
-    for(i=0;i< ptr->row_count;++i) free(ptr->screen[i]);
-    free(ptr->screen);
-    ptr->screen=NULL;
-    }
-    ptr->row_count=0;
-    ptr->attributes=0;
-    }
-
-static int html_colorpair(struct AbstractTview* tv,int flag)
-    {
-    return (1 << (flag));
-    }
-
-static int html_drawaln(struct AbstractTview* tv, int tid, int pos)
-    {
-    int y,x;
-    html_tview_t* ptr=FROM_TV(tv);
-    html_clear(tv);
-    base_draw_aln(tv,  tid, pos);
-    fputs("<html><head>",ptr->out);
-    fprintf(ptr->out,"<title>%s:%d</title>",
-        tv->header->target_name[tid],
-        pos+1
-        );
-    //style
-
-    fputs("<style type='text/css'>\n",ptr->out);
-    fputs(".tviewbody { margin:5px; background-color:white;text-align:center;}\n",ptr->out);
-    fputs(".tviewtitle {text-align:center;}\n",ptr->out);
-    fputs(".tviewpre { margin:5px; background-color:white;}\n",ptr->out);
-    #define CSS(id,col) fprintf(ptr->out,".tviewc%d {color:%s;}\n.tviewcu%d {color:%s;text-decoration:underline;}\n",id,col,id,col);
-        CSS(0, "black");
-        CSS(1, "blue");
-    CSS(2, "green");
-    CSS(3, "yellow");
-    CSS(4, "black");
-    CSS(5, "green");
-    CSS(6, "cyan");
-    CSS(7, "yellow");
-    CSS(8, "red");
-    CSS(9, "blue");
-    #undef CSS
-    fputs("</style>",ptr->out);
-
-    fputs("</head><body>",ptr->out);
-
-      fprintf(ptr->out,"<div class='tviewbody'><div class='tviewtitle'>%s:%d</div>",
-        tv->header->target_name[tid],
-        pos+1
-        );
-
-    fputs("<pre class='tviewpre'>",ptr->out);
-    for(y=0;y< ptr->row_count;++y)
-        {
-
-        for(x=0;x< tv->mcol;++x)
-            {
-
-
-        if(x== 0 || ptr->screen[y][x].attributes != ptr->screen[y][x-1].attributes)
-                {
-                int css=0;
-            fprintf(ptr->out,"<span");
-                while(css<32)
-                    {
-                    //if(y>1) fprintf(pysam_stderr,"css=%d pow2=%d vs %d\n",css,(1 << (css)),ptr->screen[y][x].attributes);
-                    if(( (ptr->screen[y][x].attributes) & (1 << (css)))!=0)
-                        {
-
-                        fprintf(ptr->out," class='tviewc%s%d'",
-                            (( (ptr->screen[y][x].attributes) & (1 << (UNDERLINE_FLAG)) )!=0?"u":""),
-                            css);
-                        break;
-                        }
-                    ++css;
-                    }
-
-
-                fputs(">",ptr->out);
-                }
-
-        int ch=ptr->screen[y][x].ch;
-        switch(ch)
-            {
-            case '<': fputs("<",ptr->out);break;
-            case '>': fputs(">",ptr->out);break;
-            case '&': fputs("&",ptr->out);break;
-            default: fputc(ch,ptr->out); break;
-            }
-
-
-            if(x+1 == tv->mcol  || ptr->screen[y][x].attributes!=ptr->screen[y][x+1].attributes)
-                {
-                fputs("</span>",ptr->out);
-                }
-            }
-        if(y+1 < ptr->row_count) fputs("<br/>",ptr->out);
-        }
-    fputs("</pre></div></body></html>",ptr->out);
-    return 0;
-    }
-
-
-#define ANSI_COLOR_RED "\x1b[31m"
-#define ANSI_COLOR_GREEN "\x1b[32m"
-#define ANSI_COLOR_YELLOW "\x1b[33m"
-#define ANSI_COLOR_BLUE "\x1b[34m"
-#define ANSI_COLOR_MAGENTA "\x1b[35m"
-#define ANSI_COLOR_CYAN "\x1b[36m"
-#define ANSI_COLOR_BLACK "\x1b[0m"
-#define ANSI_COLOR_RESET ANSI_COLOR_BLACK
-
-#define ANSI_UNDERLINE_SET "\033[4m"
-#define ANSI_UNDERLINE_UNSET "\033[0m"
-
-static int text_drawaln(struct AbstractTview* tv, int tid, int pos)
-    {
-    int y,x;
-    html_tview_t* ptr=FROM_TV(tv);
-    html_clear(tv);
-    base_draw_aln(tv,  tid, pos);
-    int is_term= isatty(fileno(ptr->out));
-
-    for(y=0;y< ptr->row_count;++y)
-        {
-        for(x=0;x< tv->mcol;++x)
-            {
-            if(is_term)
-                {
-                int css=0;
-                while(css<32)
-                    {
-                    if(( (ptr->screen[y][x].attributes) & (1 << (css)))!=0)
-                        {
-                        break;
-                        }
-                    ++css;
-                    }
-                switch(css)
-                    {
-                    //CSS(0, "black");
-                    case 1: fputs(ANSI_COLOR_BLUE,ptr->out); break;
-                case 2: fputs(ANSI_COLOR_GREEN,ptr->out); break;
-                case 3: fputs(ANSI_COLOR_YELLOW,ptr->out); break;
-                //CSS(4, "black");
-                case 5: fputs(ANSI_COLOR_GREEN,ptr->out); break;
-                case 6: fputs(ANSI_COLOR_CYAN,ptr->out); break;
-                case 7: fputs(ANSI_COLOR_YELLOW,ptr->out); break;
-                case 8: fputs(ANSI_COLOR_RED,ptr->out); break;
-                case 9: fputs(ANSI_COLOR_BLUE,ptr->out); break;
-                default:break;
-                    }
-                if(( (ptr->screen[y][x].attributes) & (1 << (UNDERLINE_FLAG)))!=0)
-                    {
-                    fputs(ANSI_UNDERLINE_SET,ptr->out);
-                    }
-
-                }
-
-
-            int ch=ptr->screen[y][x].ch;
-
-            fputc(ch,ptr->out);
-            if(is_term)
-                {
-                fputs(ANSI_COLOR_RESET,ptr->out);
-                if(( (ptr->screen[y][x].attributes) & (1 << (UNDERLINE_FLAG)))!=0)
-                    {
-                    fputs(ANSI_UNDERLINE_UNSET,ptr->out);
-                    }
-                }
-            }
-        fputc('\n',ptr->out);
-        }
-    return 0;
-    }
-
-
-static int html_loop(tview_t* tv)
-    {
-    //tv->my_drawaln(tv, tv->curr_tid, tv->left_pos);
-    return 0;
-    }
-
-static int html_underline(tview_t* tv)
-    {
-    return (1 << UNDERLINE_FLAG);
-    }
-
-/*
-static void init_pair(html_tview_t *tv,int id_ge_1, const char* pen, const char* paper)
-    {
-
-    }
-*/
-
-tview_t* html_tv_init(const char *fn, const char *fn_fa, const char *samples,
-                      const htsFormat *fmt)
-    {
-    char* colstr=getenv("COLUMNS");
-    html_tview_t *tv = (html_tview_t*)calloc(1, sizeof(html_tview_t));
-    tview_t* base=(tview_t*)tv;
-    if(tv==0)
-        {
-        fprintf(pysam_stderr,"Calloc failed\n");
-        return 0;
-        }
-    tv->row_count=0;
-    tv->screen=NULL;
-    tv->out=pysam_stdout;
-    tv->attributes=0;
-    base_tv_init(base,fn,fn_fa,samples,fmt);
-    /* initialize callbacks */
-#define SET_CALLBACK(fun) base->my_##fun=html_##fun;
-    SET_CALLBACK(destroy);
-    SET_CALLBACK(mvprintw);
-    SET_CALLBACK(mvaddch);
-    SET_CALLBACK(attron);
-    SET_CALLBACK(attroff);
-    SET_CALLBACK(clear);
-    SET_CALLBACK(colorpair);
-    SET_CALLBACK(drawaln);
-    SET_CALLBACK(loop);
-    SET_CALLBACK(underline);
-#undef SET_CALLBACK
-
-
-    if(colstr!=0)
-        {
-        base->mcol=atoi(colstr);
-        if(base->mcol<10) base->mcol=80;
-        }
-    base->mrow=99999;
-
-/*
-    init_pair(tv,1, "blue", "white");
-    init_pair(tv,2, "green", "white");
-    init_pair(tv,3, "yellow", "white");
-    init_pair(tv,4, "white", "white");
-    init_pair(tv,5, "green", "white");
-    init_pair(tv,6, "cyan", "white");
-    init_pair(tv,7, "yellow", "white");
-    init_pair(tv,8, "red", "white");
-    init_pair(tv,9, "blue", "white");
-    */
-    return base;
-    }
-
-
-tview_t* text_tv_init(const char *fn, const char *fn_fa, const char *samples,
-                      const htsFormat *fmt)
-    {
-    tview_t* tv=html_tv_init(fn,fn_fa,samples,fmt);
-    tv->my_drawaln=text_drawaln;
-    return tv;
-    }
-
diff --git a/samtools/bamshuf.c b/samtools/bamshuf.c
index 044bc4e..e24689e 100644
--- a/samtools/bamshuf.c
+++ b/samtools/bamshuf.c
@@ -34,6 +34,7 @@ DEALINGS IN THE SOFTWARE.  */
 #include "htslib/hts.h"
 #include "htslib/ksort.h"
 #include "samtools.h"
+#include "htslib/thread_pool.h"
 #include "sam_opts.h"
 
 #define DEF_CLEVEL 1
@@ -86,6 +87,14 @@ static int bamshuf(const char *fn, int n_files, const char *pre, int clevel,
     bam_hdr_t *h = NULL;
     int64_t j, max_cnt = 0, *cnt = NULL;
     elem_t *a = NULL;
+    htsThreadPool p = {NULL, 0};
+
+    if (ga->nthreads > 0) {
+        if (!(p.pool = hts_tpool_init(ga->nthreads))) {
+            print_error_errno("collate", "Error creating thread pool\n");
+            return 1;
+        }
+    }
 
     // Read input, distribute reads pseudo-randomly into n_files temporary
     // files.
@@ -94,6 +103,7 @@ static int bamshuf(const char *fn, int n_files, const char *pre, int clevel,
         print_error_errno("collate", "Cannot open input file \"%s\"", fn);
         return 1;
     }
+    if (p.pool) hts_set_opt(fp, HTS_OPT_THREAD_POOL, &p);
 
     h = sam_hdr_read(fp);
     if (h == NULL) {
@@ -173,6 +183,7 @@ static int bamshuf(const char *fn, int n_files, const char *pre, int clevel,
         else print_error_errno("collate", "Cannot open output file \"%s.bam\"", pre);
         goto fail;
     }
+    if (p.pool) hts_set_opt(fpw, HTS_OPT_THREAD_POOL, &p);
 
     if (sam_hdr_write(fpw, h) < 0) {
         print_error_errno("collate", "Couldn't write header");
@@ -193,6 +204,7 @@ static int bamshuf(const char *fn, int n_files, const char *pre, int clevel,
             print_error_errno("collate", "Couldn't open \"%s\"", fnt[i]);
             goto fail;
         }
+        if (p.pool) hts_set_opt(fp, HTS_OPT_THREAD_POOL, &p);
         bam_hdr_destroy(sam_hdr_read(fp)); // Skip over header
 
         // Slurp in one of the split files
@@ -228,6 +240,7 @@ static int bamshuf(const char *fn, int n_files, const char *pre, int clevel,
         return 1;
     }
 
+    if (p.pool) hts_tpool_destroy(p.pool);
     return 0;
 
  mem_fail:
@@ -249,13 +262,14 @@ static int bamshuf(const char *fn, int n_files, const char *pre, int clevel,
     free(fnt);
     free(fpt);
     free(cnt);
+    if (p.pool) hts_tpool_destroy(p.pool);
     sam_global_args_free(ga);
     return 1;
 }
 
 static int usage(FILE *fp, int n_files) {
     fprintf(fp,
-            "Usage:   samtools collate [-Ou] [-n nFiles] [-c cLevel] <in.bam> <out.prefix>\n\n"
+            "Usage:   samtools collate [-Ou] [-n nFiles] [-l cLevel] <in.bam> <out.prefix>\n\n"
             "Options:\n"
             "      -O       output to stdout\n"
             "      -u       uncompressed BAM output\n"
@@ -263,7 +277,7 @@ static int usage(FILE *fp, int n_files) {
             "      -n INT   number of temporary files [%d]\n", // n_files
             DEF_CLEVEL, n_files);
 
-    sam_global_opt_help(fp, "-....");
+    sam_global_opt_help(fp, "-....@");
 
     return 1;
 }
@@ -273,11 +287,11 @@ int main_bamshuf(int argc, char *argv[])
     int c, n_files = 64, clevel = DEF_CLEVEL, is_stdout = 0, is_un = 0;
     sam_global_args ga = SAM_GLOBAL_ARGS_INIT;
     static const struct option lopts[] = {
-        SAM_OPT_GLOBAL_OPTIONS('-', 0, 0, 0, 0),
+        SAM_OPT_GLOBAL_OPTIONS('-', 0, 0, 0, 0, '@'),
         { NULL, 0, NULL, 0 }
     };
 
-    while ((c = getopt_long(argc, argv, "n:l:uO", lopts, NULL)) >= 0) {
+    while ((c = getopt_long(argc, argv, "n:l:uO@:", lopts, NULL)) >= 0) {
         switch (c) {
         case 'n': n_files = atoi(optarg); break;
         case 'l': clevel = atoi(optarg); break;
diff --git a/samtools/bamshuf.c.pysam.c b/samtools/bamshuf.c.pysam.c
index fb1a5ac..04cd37b 100644
--- a/samtools/bamshuf.c.pysam.c
+++ b/samtools/bamshuf.c.pysam.c
@@ -36,6 +36,7 @@ DEALINGS IN THE SOFTWARE.  */
 #include "htslib/hts.h"
 #include "htslib/ksort.h"
 #include "samtools.h"
+#include "htslib/thread_pool.h"
 #include "sam_opts.h"
 
 #define DEF_CLEVEL 1
@@ -88,6 +89,14 @@ static int bamshuf(const char *fn, int n_files, const char *pre, int clevel,
     bam_hdr_t *h = NULL;
     int64_t j, max_cnt = 0, *cnt = NULL;
     elem_t *a = NULL;
+    htsThreadPool p = {NULL, 0};
+
+    if (ga->nthreads > 0) {
+        if (!(p.pool = hts_tpool_init(ga->nthreads))) {
+            print_error_errno("collate", "Error creating thread pool\n");
+            return 1;
+        }
+    }
 
     // Read input, distribute reads pseudo-randomly into n_files temporary
     // files.
@@ -96,6 +105,7 @@ static int bamshuf(const char *fn, int n_files, const char *pre, int clevel,
         print_error_errno("collate", "Cannot open input file \"%s\"", fn);
         return 1;
     }
+    if (p.pool) hts_set_opt(fp, HTS_OPT_THREAD_POOL, &p);
 
     h = sam_hdr_read(fp);
     if (h == NULL) {
@@ -175,6 +185,7 @@ static int bamshuf(const char *fn, int n_files, const char *pre, int clevel,
         else print_error_errno("collate", "Cannot open output file \"%s.bam\"", pre);
         goto fail;
     }
+    if (p.pool) hts_set_opt(fpw, HTS_OPT_THREAD_POOL, &p);
 
     if (sam_hdr_write(fpw, h) < 0) {
         print_error_errno("collate", "Couldn't write header");
@@ -195,6 +206,7 @@ static int bamshuf(const char *fn, int n_files, const char *pre, int clevel,
             print_error_errno("collate", "Couldn't open \"%s\"", fnt[i]);
             goto fail;
         }
+        if (p.pool) hts_set_opt(fp, HTS_OPT_THREAD_POOL, &p);
         bam_hdr_destroy(sam_hdr_read(fp)); // Skip over header
 
         // Slurp in one of the split files
@@ -230,6 +242,7 @@ static int bamshuf(const char *fn, int n_files, const char *pre, int clevel,
         return 1;
     }
 
+    if (p.pool) hts_tpool_destroy(p.pool);
     return 0;
 
  mem_fail:
@@ -251,13 +264,14 @@ static int bamshuf(const char *fn, int n_files, const char *pre, int clevel,
     free(fnt);
     free(fpt);
     free(cnt);
+    if (p.pool) hts_tpool_destroy(p.pool);
     sam_global_args_free(ga);
     return 1;
 }
 
 static int usage(FILE *fp, int n_files) {
     fprintf(fp,
-            "Usage:   samtools collate [-Ou] [-n nFiles] [-c cLevel] <in.bam> <out.prefix>\n\n"
+            "Usage:   samtools collate [-Ou] [-n nFiles] [-l cLevel] <in.bam> <out.prefix>\n\n"
             "Options:\n"
             "      -O       output to pysam_stdout\n"
             "      -u       uncompressed BAM output\n"
@@ -265,7 +279,7 @@ static int usage(FILE *fp, int n_files) {
             "      -n INT   number of temporary files [%d]\n", // n_files
             DEF_CLEVEL, n_files);
 
-    sam_global_opt_help(fp, "-....");
+    sam_global_opt_help(fp, "-....@");
 
     return 1;
 }
@@ -275,11 +289,11 @@ int main_bamshuf(int argc, char *argv[])
     int c, n_files = 64, clevel = DEF_CLEVEL, is_pysam_stdout = 0, is_un = 0;
     sam_global_args ga = SAM_GLOBAL_ARGS_INIT;
     static const struct option lopts[] = {
-        SAM_OPT_GLOBAL_OPTIONS('-', 0, 0, 0, 0),
+        SAM_OPT_GLOBAL_OPTIONS('-', 0, 0, 0, 0, '@'),
         { NULL, 0, NULL, 0 }
     };
 
-    while ((c = getopt_long(argc, argv, "n:l:uO", lopts, NULL)) >= 0) {
+    while ((c = getopt_long(argc, argv, "n:l:uO@:", lopts, NULL)) >= 0) {
         switch (c) {
         case 'n': n_files = atoi(optarg); break;
         case 'l': clevel = atoi(optarg); break;
diff --git a/samtools/bamtk.c b/samtools/bamtk.c
index 5c1c60d..bd520b6 100644
--- a/samtools/bamtk.c
+++ b/samtools/bamtk.c
@@ -1,6 +1,6 @@
 /*  bamtk.c -- main samtools command front-end.
 
-    Copyright (C) 2008-2016 Genome Research Ltd.
+    Copyright (C) 2008-2017 Genome Research Ltd.
 
     Author: Heng Li <lh3 at sanger.ac.uk>
 
@@ -27,9 +27,8 @@ DEALINGS IN THE SOFTWARE.  */
 #include <stdio.h>
 #include <unistd.h>
 #include <fcntl.h>
-#include <stdarg.h>
 #include <string.h>
-#include <errno.h>
+
 #include "htslib/hts.h"
 #include "samtools.h"
 #include "version.h"
@@ -69,34 +68,6 @@ const char *samtools_version()
     return SAMTOOLS_VERSION;
 }
 
-static void vprint_error_core(const char *subcommand, const char *format, va_list args, const char *extra)
-{
-    fflush(stdout);
-    if (subcommand && *subcommand) fprintf(stderr, "samtools %s: ", subcommand);
-    else fprintf(stderr, "samtools: ");
-    vfprintf(stderr, format, args);
-    if (extra) fprintf(stderr, ": %s\n", extra);
-    else fprintf(stderr, "\n");
-    fflush(stderr);
-}
-
-void print_error(const char *subcommand, const char *format, ...)
-{
-    va_list args;
-    va_start(args, format);
-    vprint_error_core(subcommand, format, args, NULL);
-    va_end(args);
-}
-
-void print_error_errno(const char *subcommand, const char *format, ...)
-{
-    int err = errno;
-    va_list args;
-    va_start(args, format);
-    vprint_error_core(subcommand, format, args, strerror(err));
-    va_end(args);
-}
-
 static void usage(FILE *fp)
 {
     /* Please improve the grouping */
@@ -215,7 +186,7 @@ int main(int argc, char *argv[])
         printf(
 "samtools %s\n"
 "Using htslib %s\n"
-"Copyright (C) 2016 Genome Research Ltd.\n",
+"Copyright (C) 2017 Genome Research Ltd.\n",
                samtools_version(), hts_version());
     }
     else if (strcmp(argv[1], "--version-only") == 0) {
diff --git a/samtools/bamtk.c.pysam.c b/samtools/bamtk.c.pysam.c
index 1f3d938..8956b1f 100644
--- a/samtools/bamtk.c.pysam.c
+++ b/samtools/bamtk.c.pysam.c
@@ -2,7 +2,7 @@
 
 /*  bamtk.c -- main samtools command front-end.
 
-    Copyright (C) 2008-2016 Genome Research Ltd.
+    Copyright (C) 2008-2017 Genome Research Ltd.
 
     Author: Heng Li <lh3 at sanger.ac.uk>
 
@@ -29,9 +29,8 @@ DEALINGS IN THE SOFTWARE.  */
 #include <stdio.h>
 #include <unistd.h>
 #include <fcntl.h>
-#include <stdarg.h>
 #include <string.h>
-#include <errno.h>
+
 #include "htslib/hts.h"
 #include "samtools.h"
 #include "version.h"
@@ -41,7 +40,7 @@ int bam_mpileup(int argc, char *argv[]);
 int bam_merge(int argc, char *argv[]);
 int bam_index(int argc, char *argv[]);
 int bam_sort(int argc, char *argv[]);
-int bam_tview_main(int argc, char *argv[]);
+/* AH: int bam_tview_main(int argc, char *argv[]); */
 int bam_mating(int argc, char *argv[]);
 int bam_rmdup(int argc, char *argv[]);
 int bam_flagstat(int argc, char *argv[]);
@@ -71,34 +70,6 @@ const char *samtools_version()
     return SAMTOOLS_VERSION;
 }
 
-static void vprint_error_core(const char *subcommand, const char *format, va_list args, const char *extra)
-{
-    fflush(pysam_stdout);
-    if (subcommand && *subcommand) fprintf(pysam_stderr, "samtools %s: ", subcommand);
-    else fprintf(pysam_stderr, "samtools: ");
-    vfprintf(pysam_stderr, format, args);
-    if (extra) fprintf(pysam_stderr, ": %s\n", extra);
-    else fprintf(pysam_stderr, "\n");
-    fflush(pysam_stderr);
-}
-
-void print_error(const char *subcommand, const char *format, ...)
-{
-    va_list args;
-    va_start(args, format);
-    vprint_error_core(subcommand, format, args, NULL);
-    va_end(args);
-}
-
-void print_error_errno(const char *subcommand, const char *format, ...)
-{
-    int err = errno;
-    va_list args;
-    va_start(args, format);
-    vprint_error_core(subcommand, format, args, strerror(err));
-    va_end(args);
-}
-
 static void usage(FILE *fp)
 {
     /* Please improve the grouping */
@@ -212,12 +183,12 @@ int samtools_main(int argc, char *argv[])
         fprintf(pysam_stderr, "[main] The `pileup' command has been removed. Please use `mpileup' instead.\n");
         return 1;
     }
-    else if (strcmp(argv[1], "tview") == 0)   ret = bam_tview_main(argc-1, argv+1);
+/* AH:    else if (strcmp(argv[1], "tview") == 0)   ret = bam_tview_main(argc-1, argv+1); */
     else if (strcmp(argv[1], "--version") == 0) {
         fprintf(pysam_stdout, 
 "samtools %s\n"
 "Using htslib %s\n"
-"Copyright (C) 2016 Genome Research Ltd.\n",
+"Copyright (C) 2017 Genome Research Ltd.\n",
                samtools_version(), hts_version());
     }
     else if (strcmp(argv[1], "--version-only") == 0) {
diff --git a/samtools/bedcov.c b/samtools/bedcov.c
index d4dceee..1113e17 100644
--- a/samtools/bedcov.c
+++ b/samtools/bedcov.c
@@ -33,6 +33,7 @@ DEALINGS IN THE SOFTWARE.  */
 #include <unistd.h>
 #include "htslib/kstring.h"
 #include "htslib/sam.h"
+#include "htslib/thread_pool.h"
 #include "sam_opts.h"
 
 #include "htslib/kseq.h"
@@ -74,7 +75,7 @@ int main_bedcov(int argc, char *argv[])
 
     sam_global_args ga = SAM_GLOBAL_ARGS_INIT;
     static const struct option lopts[] = {
-        SAM_OPT_GLOBAL_OPTIONS('-', 0, '-', '-', 0),
+        SAM_OPT_GLOBAL_OPTIONS('-', 0, '-', '-', 0, '-'),
         { NULL, 0, NULL, 0 }
     };
 
@@ -89,8 +90,9 @@ int main_bedcov(int argc, char *argv[])
     }
     if (usage || optind + 2 > argc) {
         fprintf(stderr, "Usage: samtools bedcov [options] <in.bed> <in1.bam> [...]\n\n");
-        fprintf(stderr, "  -Q INT       Only count bases of at least INT quality [0]\n");
-        sam_global_opt_help(stderr, "-.--.");
+        fprintf(stderr, "Options:\n");
+        fprintf(stderr, "   -Q <int>            mapping quality threshold [0]\n");
+        sam_global_opt_help(stderr, "-.--.-");
         return 1;
     }
     memset(&str, 0, sizeof(kstring_t));
diff --git a/samtools/bedcov.c.pysam.c b/samtools/bedcov.c.pysam.c
index 25fdffc..3fd6d4c 100644
--- a/samtools/bedcov.c.pysam.c
+++ b/samtools/bedcov.c.pysam.c
@@ -35,6 +35,7 @@ DEALINGS IN THE SOFTWARE.  */
 #include <unistd.h>
 #include "htslib/kstring.h"
 #include "htslib/sam.h"
+#include "htslib/thread_pool.h"
 #include "sam_opts.h"
 
 #include "htslib/kseq.h"
@@ -76,7 +77,7 @@ int main_bedcov(int argc, char *argv[])
 
     sam_global_args ga = SAM_GLOBAL_ARGS_INIT;
     static const struct option lopts[] = {
-        SAM_OPT_GLOBAL_OPTIONS('-', 0, '-', '-', 0),
+        SAM_OPT_GLOBAL_OPTIONS('-', 0, '-', '-', 0, '-'),
         { NULL, 0, NULL, 0 }
     };
 
@@ -91,8 +92,9 @@ int main_bedcov(int argc, char *argv[])
     }
     if (usage || optind + 2 > argc) {
         fprintf(pysam_stderr, "Usage: samtools bedcov [options] <in.bed> <in1.bam> [...]\n\n");
-        fprintf(pysam_stderr, "  -Q INT       Only count bases of at least INT quality [0]\n");
-        sam_global_opt_help(pysam_stderr, "-.--.");
+        fprintf(pysam_stderr, "Options:\n");
+        fprintf(pysam_stderr, "   -Q <int>            mapping quality threshold [0]\n");
+        sam_global_opt_help(pysam_stderr, "-.--.-");
         return 1;
     }
     memset(&str, 0, sizeof(kstring_t));
diff --git a/samtools/cut_target.c b/samtools/cut_target.c
index 71a6c85..7d541fa 100644
--- a/samtools/cut_target.c
+++ b/samtools/cut_target.c
@@ -1,7 +1,7 @@
 /*  cut_target.c -- targetcut subcommand.
 
     Copyright (C) 2011 Broad Institute.
-    Copyright (C) 2012-2013, 2015 Genome Research Ltd.
+    Copyright (C) 2012-2013, 2015, 2016 Genome Research Ltd.
 
     Author: Heng Li <lh3 at sanger.ac.uk>
 
@@ -28,9 +28,10 @@ DEALINGS IN THE SOFTWARE.  */
 #include <unistd.h>
 #include <stdlib.h>
 #include <string.h>
+#include "htslib/hts.h"
 #include "htslib/sam.h"
-#include "errmod.h"
 #include "htslib/faidx.h"
+#include "samtools.h"
 #include "sam_opts.h"
 
 #define ERR_DEP 0.83
@@ -146,7 +147,6 @@ static void process_cns(bam_hdr_t *h, int tid, int l, uint16_t *cns)
 
 static int read_aln(void *data, bam1_t *b)
 {
-    extern int bam_prob_realn_core(bam1_t *b, const char *ref, int ref_len, int flag);
     ct_t *g = (ct_t*)data;
     int ret;
     while (1)
@@ -160,7 +160,7 @@ static int read_aln(void *data, bam1_t *b)
                 g->ref = fai_fetch(g->fai, g->h->target_name[b->core.tid], &g->len);
                 g->tid = b->core.tid;
             }
-            bam_prob_realn_core(b, g->ref, g->len, 1<<1|1);
+            sam_prob_realn(b, g->ref, g->len, 1<<1|1);
         }
         break;
     }
@@ -177,7 +177,7 @@ int main_cut_target(int argc, char *argv[])
 
     sam_global_args ga = SAM_GLOBAL_ARGS_INIT;
     static const struct option lopts[] = {
-        SAM_OPT_GLOBAL_OPTIONS('-', 0, '-', '-', 'f'),
+        SAM_OPT_GLOBAL_OPTIONS('-', 0, '-', '-', 'f', '-'),
         { NULL, 0, NULL, 0 }
     };
 
@@ -201,14 +201,19 @@ int main_cut_target(int argc, char *argv[])
     }
     if (usage || argc == optind) {
         fprintf(stderr, "Usage: samtools targetcut [-Q minQ] [-i inPen] [-0 em0] [-1 em1] [-2 em2] <in.bam>\n");
-        sam_global_opt_help(stderr, "-.--f");
+        sam_global_opt_help(stderr, "-.--f-");
         return 1;
     }
     l = max_l = 0; cns = 0;
     g.fp = sam_open_format(argv[optind], "r", &ga.in);
+    if (g.fp == NULL) {
+        print_error_errno("targetcut", "can't open \"%s\"", argv[optind]);
+        return 1;
+    }
+
     g.h = sam_hdr_read(g.fp);
     if (g.h == NULL) {
-        fprintf(stderr, "Couldn't read header for '%s'\n", argv[optind]);
+        print_error("targetcut", "couldn't read header for \"%s\"", argv[optind]);
         sam_close(g.fp);
         return 1;
     }
diff --git a/samtools/cut_target.c.pysam.c b/samtools/cut_target.c.pysam.c
index 82a4c4c..e55f749 100644
--- a/samtools/cut_target.c.pysam.c
+++ b/samtools/cut_target.c.pysam.c
@@ -3,7 +3,7 @@
 /*  cut_target.c -- targetcut subcommand.
 
     Copyright (C) 2011 Broad Institute.
-    Copyright (C) 2012-2013, 2015 Genome Research Ltd.
+    Copyright (C) 2012-2013, 2015, 2016 Genome Research Ltd.
 
     Author: Heng Li <lh3 at sanger.ac.uk>
 
@@ -30,9 +30,10 @@ DEALINGS IN THE SOFTWARE.  */
 #include <unistd.h>
 #include <stdlib.h>
 #include <string.h>
+#include "htslib/hts.h"
 #include "htslib/sam.h"
-#include "errmod.h"
 #include "htslib/faidx.h"
+#include "samtools.h"
 #include "sam_opts.h"
 
 #define ERR_DEP 0.83
@@ -148,7 +149,6 @@ static void process_cns(bam_hdr_t *h, int tid, int l, uint16_t *cns)
 
 static int read_aln(void *data, bam1_t *b)
 {
-    extern int bam_prob_realn_core(bam1_t *b, const char *ref, int ref_len, int flag);
     ct_t *g = (ct_t*)data;
     int ret;
     while (1)
@@ -162,7 +162,7 @@ static int read_aln(void *data, bam1_t *b)
                 g->ref = fai_fetch(g->fai, g->h->target_name[b->core.tid], &g->len);
                 g->tid = b->core.tid;
             }
-            bam_prob_realn_core(b, g->ref, g->len, 1<<1|1);
+            sam_prob_realn(b, g->ref, g->len, 1<<1|1);
         }
         break;
     }
@@ -179,7 +179,7 @@ int main_cut_target(int argc, char *argv[])
 
     sam_global_args ga = SAM_GLOBAL_ARGS_INIT;
     static const struct option lopts[] = {
-        SAM_OPT_GLOBAL_OPTIONS('-', 0, '-', '-', 'f'),
+        SAM_OPT_GLOBAL_OPTIONS('-', 0, '-', '-', 'f', '-'),
         { NULL, 0, NULL, 0 }
     };
 
@@ -203,14 +203,19 @@ int main_cut_target(int argc, char *argv[])
     }
     if (usage || argc == optind) {
         fprintf(pysam_stderr, "Usage: samtools targetcut [-Q minQ] [-i inPen] [-0 em0] [-1 em1] [-2 em2] <in.bam>\n");
-        sam_global_opt_help(pysam_stderr, "-.--f");
+        sam_global_opt_help(pysam_stderr, "-.--f-");
         return 1;
     }
     l = max_l = 0; cns = 0;
     g.fp = sam_open_format(argv[optind], "r", &ga.in);
+    if (g.fp == NULL) {
+        print_error_errno("targetcut", "can't open \"%s\"", argv[optind]);
+        return 1;
+    }
+
     g.h = sam_hdr_read(g.fp);
     if (g.h == NULL) {
-        fprintf(pysam_stderr, "Couldn't read header for '%s'\n", argv[optind]);
+        print_error("targetcut", "couldn't read header for \"%s\"", argv[optind]);
         sam_close(g.fp);
         return 1;
     }
diff --git a/samtools/errmod.c b/samtools/errmod.c
deleted file mode 100644
index c37c6d1..0000000
--- a/samtools/errmod.c
+++ /dev/null
@@ -1,194 +0,0 @@
-/*  errmod.c -- revised MAQ error model.
-
-    Copyright (C) 2010 Broad Institute.
-    Copyright (C) 2012, 2013 Genome Research Ltd.
-
-    Author: Heng Li <lh3 at sanger.ac.uk>
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-DEALINGS IN THE SOFTWARE.  */
-
-#include <config.h>
-
-#include <math.h>
-#include "errmod.h"
-#include "htslib/ksort.h"
-KSORT_INIT_GENERIC(uint16_t)
-
-/* table of constants generated for given depcorr and eta */
-typedef struct __errmod_coef_t {
-    double *fk, *beta, *lhet;
-} errmod_coef_t;
-
-typedef struct {
-    double fsum[16], bsum[16];
-    uint32_t c[16];
-} call_aux_t;
-
-/* \Gamma(n) = (n-1)! */
-#define lfact(n) lgamma(n+1)
-
-/* generates a success * trials table of bionomial probability densities (log transformed) */
-static double* logbinomial_table( const int n_size )
-{
-    /* prob distribution for binom var is p(k) = {n! \over k! (n-k)! } p^k (1-p)^{n-k} */
-    /* this calcs p(k) = {log(n!) - log(k!) - log((n-k)!) */
-    int k, n;
-    double *logbinom = (double*)calloc(n_size * n_size, sizeof(double));
-    for (n = 1; n < n_size; ++n) {
-        double lfn = lfact(n);
-        for (k = 1; k <= n; ++k)
-            logbinom[n<<8|k] = lfn - lfact(k) - lfact(n-k);
-    }
-    return logbinom;
-}
-
-static errmod_coef_t *cal_coef(double depcorr, double eta)
-{
-    int k, n, q;
-    long double sum, sum1;
-    double *lC;
-    errmod_coef_t *ec;
-
-    ec = calloc(1, sizeof(errmod_coef_t));
-    // initialize ->fk
-    ec->fk = (double*)calloc(256, sizeof(double));
-    ec->fk[0] = 1.0;
-    for (n = 1; n < 256; ++n)
-        ec->fk[n] = pow(1. - depcorr, n) * (1.0 - eta) + eta;
-    // initialize ->coef
-    ec->beta = (double*)calloc(256 * 256 * 64, sizeof(double));
-
-    lC = logbinomial_table( 256 );
-
-    for (q = 1; q < 64; ++q) {
-        double e = pow(10.0, -q/10.0);
-        double le = log(e);
-        double le1 = log(1.0 - e);
-        for (n = 1; n <= 255; ++n) {
-            double *beta = ec->beta + (q<<16|n<<8);
-            sum1 = sum = 0.0;
-            for (k = n; k >= 0; --k, sum1 = sum) {
-                sum = sum1 + expl(lC[n<<8|k] + k*le + (n-k)*le1);
-                beta[k] = -10. / M_LN10 * logl(sum1 / sum);
-            }
-        }
-    }
-    // initialize ->lhet
-    ec->lhet = (double*)calloc(256 * 256, sizeof(double));
-    for (n = 0; n < 256; ++n)
-        for (k = 0; k < 256; ++k)
-            ec->lhet[n<<8|k] = lC[n<<8|k] - M_LN2 * n;
-    free(lC);
-    return ec;
-}
-
-/**
- * Create errmod_t object with obj.depcorr set to depcorr and initialise
- */
-errmod_t *errmod_init(double depcorr)
-{
-    errmod_t *em;
-    em = (errmod_t*)calloc(1, sizeof(errmod_t));
-    em->depcorr = depcorr;
-    em->coef = cal_coef(depcorr, 0.03);
-    return em;
-}
-
-/**
- * Deallocate an errmod_t object
- */
-void errmod_destroy(errmod_t *em)
-{
-    if (em == 0) return;
-    free(em->coef->lhet); free(em->coef->fk); free(em->coef->beta);
-    free(em->coef); free(em);
-}
-
-//
-// em: error model to fit to data
-// m: number of alleles across all samples
-// n: number of bases observed in sample
-// bases[i]: bases observed in pileup [6 bit quality|1 bit strand|4 bit base]
-// q[i*m+j]: (Output) phred-scaled likelihood of each genotype (i,j)
-int errmod_cal(const errmod_t *em, int n, int m, uint16_t *bases, float *q)
-{
-    // Aux
-    // aux.c is total count of each base observed (ignoring strand)
-    call_aux_t aux;
-    // Loop variables
-    int i, j, k;
-    // The total count of each base observed per strand
-    int w[32];
-
-    memset(q, 0, m * m * sizeof(float)); // initialise q to 0
-    if (n == 0) return 0;
-    // This section randomly downsamples to 255 depth so as not to go beyond our precalculated matrix
-    if (n > 255) { // if we exceed 255 bases observed then shuffle them to sample and only keep the first 255
-        ks_shuffle(uint16_t, n, bases);
-        n = 255;
-    }
-    ks_introsort(uint16_t, n, bases);
-    /* zero out w and aux */
-    memset(w, 0, 32 * sizeof(int));
-    memset(&aux, 0, sizeof(call_aux_t));
-
-    for (j = n - 1; j >= 0; --j) { // calculate esum and fsum
-        uint16_t b = bases[j];
-        /* extract quality and cap at 63 */
-        int qual = b>>5 < 4? 4 : b>>5;
-        if (qual > 63) qual = 63;
-        /* extract base ORed with strand */
-        int basestrand = b&0x1f;
-        /* extract base */
-        int base = b&0xf;
-        aux.fsum[base] += em->coef->fk[w[basestrand]];
-        aux.bsum[base] += em->coef->fk[w[basestrand]] * em->coef->beta[qual<<16|n<<8|aux.c[base]];
-        ++aux.c[base];
-        ++w[basestrand];
-    }
-
-    // generate likelihood
-    for (j = 0; j < m; ++j) {
-        float tmp1, tmp3;
-        int tmp2;
-        // homozygous
-        for (k = 0, tmp1 = tmp3 = 0.0, tmp2 = 0; k < m; ++k) {
-            if (k == j) continue;
-            tmp1 += aux.bsum[k]; tmp2 += aux.c[k]; tmp3 += aux.fsum[k];
-        }
-        if (tmp2) {
-            q[j*m+j] = tmp1;
-        }
-        // heterozygous
-        for (k = j + 1; k < m; ++k) {
-            int cjk = aux.c[j] + aux.c[k];
-            for (i = 0, tmp2 = 0, tmp1 = tmp3 = 0.0; i < m; ++i) {
-                if (i == j || i == k) continue;
-                tmp1 += aux.bsum[i]; tmp2 += aux.c[i]; tmp3 += aux.fsum[i];
-            }
-            if (tmp2) {
-                q[j*m+k] = q[k*m+j] = -4.343 * em->coef->lhet[cjk<<8|aux.c[k]] + tmp1;
-            } else q[j*m+k] = q[k*m+j] = -4.343 * em->coef->lhet[cjk<<8|aux.c[k]]; // all the bases are either j or k
-        }
-        /* clamp to greater than 0 */
-        for (k = 0; k < m; ++k) if (q[j*m+k] < 0.0) q[j*m+k] = 0.0;
-    }
-
-    return 0;
-}
diff --git a/samtools/errmod.c.pysam.c b/samtools/errmod.c.pysam.c
deleted file mode 100644
index 12176cf..0000000
--- a/samtools/errmod.c.pysam.c
+++ /dev/null
@@ -1,196 +0,0 @@
-#include "pysam.h"
-
-/*  errmod.c -- revised MAQ error model.
-
-    Copyright (C) 2010 Broad Institute.
-    Copyright (C) 2012, 2013 Genome Research Ltd.
-
-    Author: Heng Li <lh3 at sanger.ac.uk>
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-DEALINGS IN THE SOFTWARE.  */
-
-#include <config.h>
-
-#include <math.h>
-#include "errmod.h"
-#include "htslib/ksort.h"
-KSORT_INIT_GENERIC(uint16_t)
-
-/* table of constants generated for given depcorr and eta */
-typedef struct __errmod_coef_t {
-    double *fk, *beta, *lhet;
-} errmod_coef_t;
-
-typedef struct {
-    double fsum[16], bsum[16];
-    uint32_t c[16];
-} call_aux_t;
-
-/* \Gamma(n) = (n-1)! */
-#define lfact(n) lgamma(n+1)
-
-/* generates a success * trials table of bionomial probability densities (log transformed) */
-static double* logbinomial_table( const int n_size )
-{
-    /* prob distribution for binom var is p(k) = {n! \over k! (n-k)! } p^k (1-p)^{n-k} */
-    /* this calcs p(k) = {log(n!) - log(k!) - log((n-k)!) */
-    int k, n;
-    double *logbinom = (double*)calloc(n_size * n_size, sizeof(double));
-    for (n = 1; n < n_size; ++n) {
-        double lfn = lfact(n);
-        for (k = 1; k <= n; ++k)
-            logbinom[n<<8|k] = lfn - lfact(k) - lfact(n-k);
-    }
-    return logbinom;
-}
-
-static errmod_coef_t *cal_coef(double depcorr, double eta)
-{
-    int k, n, q;
-    long double sum, sum1;
-    double *lC;
-    errmod_coef_t *ec;
-
-    ec = calloc(1, sizeof(errmod_coef_t));
-    // initialize ->fk
-    ec->fk = (double*)calloc(256, sizeof(double));
-    ec->fk[0] = 1.0;
-    for (n = 1; n < 256; ++n)
-        ec->fk[n] = pow(1. - depcorr, n) * (1.0 - eta) + eta;
-    // initialize ->coef
-    ec->beta = (double*)calloc(256 * 256 * 64, sizeof(double));
-
-    lC = logbinomial_table( 256 );
-
-    for (q = 1; q < 64; ++q) {
-        double e = pow(10.0, -q/10.0);
-        double le = log(e);
-        double le1 = log(1.0 - e);
-        for (n = 1; n <= 255; ++n) {
-            double *beta = ec->beta + (q<<16|n<<8);
-            sum1 = sum = 0.0;
-            for (k = n; k >= 0; --k, sum1 = sum) {
-                sum = sum1 + expl(lC[n<<8|k] + k*le + (n-k)*le1);
-                beta[k] = -10. / M_LN10 * logl(sum1 / sum);
-            }
-        }
-    }
-    // initialize ->lhet
-    ec->lhet = (double*)calloc(256 * 256, sizeof(double));
-    for (n = 0; n < 256; ++n)
-        for (k = 0; k < 256; ++k)
-            ec->lhet[n<<8|k] = lC[n<<8|k] - M_LN2 * n;
-    free(lC);
-    return ec;
-}
-
-/**
- * Create errmod_t object with obj.depcorr set to depcorr and initialise
- */
-errmod_t *errmod_init(double depcorr)
-{
-    errmod_t *em;
-    em = (errmod_t*)calloc(1, sizeof(errmod_t));
-    em->depcorr = depcorr;
-    em->coef = cal_coef(depcorr, 0.03);
-    return em;
-}
-
-/**
- * Deallocate an errmod_t object
- */
-void errmod_destroy(errmod_t *em)
-{
-    if (em == 0) return;
-    free(em->coef->lhet); free(em->coef->fk); free(em->coef->beta);
-    free(em->coef); free(em);
-}
-
-//
-// em: error model to fit to data
-// m: number of alleles across all samples
-// n: number of bases observed in sample
-// bases[i]: bases observed in pileup [6 bit quality|1 bit strand|4 bit base]
-// q[i*m+j]: (Output) phred-scaled likelihood of each genotype (i,j)
-int errmod_cal(const errmod_t *em, int n, int m, uint16_t *bases, float *q)
-{
-    // Aux
-    // aux.c is total count of each base observed (ignoring strand)
-    call_aux_t aux;
-    // Loop variables
-    int i, j, k;
-    // The total count of each base observed per strand
-    int w[32];
-
-    memset(q, 0, m * m * sizeof(float)); // initialise q to 0
-    if (n == 0) return 0;
-    // This section randomly downsamples to 255 depth so as not to go beyond our precalculated matrix
-    if (n > 255) { // if we exceed 255 bases observed then shuffle them to sample and only keep the first 255
-        ks_shuffle(uint16_t, n, bases);
-        n = 255;
-    }
-    ks_introsort(uint16_t, n, bases);
-    /* zero out w and aux */
-    memset(w, 0, 32 * sizeof(int));
-    memset(&aux, 0, sizeof(call_aux_t));
-
-    for (j = n - 1; j >= 0; --j) { // calculate esum and fsum
-        uint16_t b = bases[j];
-        /* extract quality and cap at 63 */
-        int qual = b>>5 < 4? 4 : b>>5;
-        if (qual > 63) qual = 63;
-        /* extract base ORed with strand */
-        int basestrand = b&0x1f;
-        /* extract base */
-        int base = b&0xf;
-        aux.fsum[base] += em->coef->fk[w[basestrand]];
-        aux.bsum[base] += em->coef->fk[w[basestrand]] * em->coef->beta[qual<<16|n<<8|aux.c[base]];
-        ++aux.c[base];
-        ++w[basestrand];
-    }
-
-    // generate likelihood
-    for (j = 0; j < m; ++j) {
-        float tmp1, tmp3;
-        int tmp2;
-        // homozygous
-        for (k = 0, tmp1 = tmp3 = 0.0, tmp2 = 0; k < m; ++k) {
-            if (k == j) continue;
-            tmp1 += aux.bsum[k]; tmp2 += aux.c[k]; tmp3 += aux.fsum[k];
-        }
-        if (tmp2) {
-            q[j*m+j] = tmp1;
-        }
-        // heterozygous
-        for (k = j + 1; k < m; ++k) {
-            int cjk = aux.c[j] + aux.c[k];
-            for (i = 0, tmp2 = 0, tmp1 = tmp3 = 0.0; i < m; ++i) {
-                if (i == j || i == k) continue;
-                tmp1 += aux.bsum[i]; tmp2 += aux.c[i]; tmp3 += aux.fsum[i];
-            }
-            if (tmp2) {
-                q[j*m+k] = q[k*m+j] = -4.343 * em->coef->lhet[cjk<<8|aux.c[k]] + tmp1;
-            } else q[j*m+k] = q[k*m+j] = -4.343 * em->coef->lhet[cjk<<8|aux.c[k]]; // all the bases are either j or k
-        }
-        /* clamp to greater than 0 */
-        for (k = 0; k < m; ++k) if (q[j*m+k] < 0.0) q[j*m+k] = 0.0;
-    }
-
-    return 0;
-}
diff --git a/samtools/faidx.c b/samtools/faidx.c
index 336bde5..c5c9ed6 100644
--- a/samtools/faidx.c
+++ b/samtools/faidx.c
@@ -1,6 +1,6 @@
 /*  faidx.c -- faidx subcommand.
 
-    Copyright (C) 2008, 2009, 2013 Genome Research Ltd.
+    Copyright (C) 2008, 2009, 2013, 2016 Genome Research Ltd.
     Portions copyright (C) 2011 Broad Institute.
 
     Author: Heng Li <lh3 at sanger.ac.uk>
@@ -25,34 +25,19 @@ DEALINGS IN THE SOFTWARE.  */
 
 #include <config.h>
 
-#include <ctype.h>
-#include <string.h>
 #include <stdlib.h>
 #include <stdio.h>
-#include <stdint.h>
 #include <unistd.h>
-#include <stdarg.h>
+
 #include <htslib/faidx.h>
+#include "samtools.h"
 
-static void error(const char *format, ...)
+static int usage(FILE *fp, int exit_status)
 {
-    if ( format )
-    {
-        va_list ap;
-        va_start(ap, format);
-        vfprintf(stderr, format, ap);
-        va_end(ap);
-    }
-    else
-    {
-        fprintf(stderr, "\n");
-        fprintf(stderr, "Usage:   samtools faidx <file.fa|file.fa.gz> [<reg> [...]]\n");
-        fprintf(stderr, "\n");
-    }
-    exit(-1);
+    fprintf(fp, "Usage: samtools faidx <file.fa|file.fa.gz> [<reg> [...]]\n");
+    return exit_status;
 }
 
-
 int faidx_main(int argc, char *argv[])
 {
     int c;
@@ -61,39 +46,60 @@ int faidx_main(int argc, char *argv[])
         switch(c)
         {
             case 'h':
+                return usage(stdout, EXIT_SUCCESS);
+
             default:
-                error(NULL);
+                return usage(stderr, EXIT_FAILURE);
         }
     }
     if ( argc==optind )
-        error(NULL);
+        return usage(stdout, EXIT_SUCCESS);
     if ( argc==2 )
     {
         if (fai_build(argv[optind]) != 0) {
-            error("Could not build fai index %s.fai\n", argv[optind]);
+            fprintf(stderr, "Could not build fai index %s.fai\n", argv[optind]);
+            return EXIT_FAILURE;
         }
         return 0;
     }
 
     faidx_t *fai = fai_load(argv[optind]);
-    if ( !fai ) error("Could not load fai index of %s\n", argv[optind]);
+    if ( !fai ) {
+        fprintf(stderr, "Could not load fai index of %s\n", argv[optind]);
+        return EXIT_FAILURE;
+    }
+
+    int exit_status = EXIT_SUCCESS;
 
-    while ( ++optind<argc )
+    while ( ++optind<argc && exit_status == EXIT_SUCCESS)
     {
         printf(">%s\n", argv[optind]);
-        int i, j, seq_len;
+        int seq_len;
         char *seq = fai_fetch(fai, argv[optind], &seq_len);
-        if ( seq_len < 0 ) error("Failed to fetch sequence in %s\n", argv[optind]);
-        for (i=0; i<seq_len; i+=60)
+        if ( seq_len < 0 ) {
+            fprintf(stderr, "Failed to fetch sequence in %s\n", argv[optind]);
+            exit_status = EXIT_FAILURE;
+            break;
+        }
+        size_t i, seq_sz = seq_len;
+        for (i=0; i<seq_sz; i+=60)
         {
-            for (j=0; j<60 && i+j<seq_len; j++)
-                putchar(seq[i+j]);
-            putchar('\n');
+            size_t len = i + 60 < seq_sz ? 60 : seq_sz - i;
+            if (fwrite(seq + i, 1, len, stdout) < len ||
+                putchar('\n') == EOF) {
+                print_error_errno("faidx", "failed to write output");
+                exit_status = EXIT_FAILURE;
+                break;
+            }
         }
         free(seq);
     }
     fai_destroy(fai);
 
-    return 0;
-}
+    if (fflush(stdout) == EOF) {
+        print_error_errno("faidx", "failed to flush output");
+        exit_status = EXIT_FAILURE;
+    }
 
+    return exit_status;
+}
diff --git a/samtools/faidx.c.pysam.c b/samtools/faidx.c.pysam.c
index ac06647..ec8c90f 100644
--- a/samtools/faidx.c.pysam.c
+++ b/samtools/faidx.c.pysam.c
@@ -2,7 +2,7 @@
 
 /*  faidx.c -- faidx subcommand.
 
-    Copyright (C) 2008, 2009, 2013 Genome Research Ltd.
+    Copyright (C) 2008, 2009, 2013, 2016 Genome Research Ltd.
     Portions copyright (C) 2011 Broad Institute.
 
     Author: Heng Li <lh3 at sanger.ac.uk>
@@ -27,34 +27,19 @@ DEALINGS IN THE SOFTWARE.  */
 
 #include <config.h>
 
-#include <ctype.h>
-#include <string.h>
 #include <stdlib.h>
 #include <stdio.h>
-#include <stdint.h>
 #include <unistd.h>
-#include <stdarg.h>
+
 #include <htslib/faidx.h>
+#include "samtools.h"
 
-static void error(const char *format, ...)
+static int usage(FILE *fp, int exit_status)
 {
-    if ( format )
-    {
-        va_list ap;
-        va_start(ap, format);
-        vfprintf(pysam_stderr, format, ap);
-        va_end(ap);
-    }
-    else
-    {
-        fprintf(pysam_stderr, "\n");
-        fprintf(pysam_stderr, "Usage:   samtools faidx <file.fa|file.fa.gz> [<reg> [...]]\n");
-        fprintf(pysam_stderr, "\n");
-    }
-    exit(-1);
+    fprintf(fp, "Usage: samtools faidx <file.fa|file.fa.gz> [<reg> [...]]\n");
+    return exit_status;
 }
 
-
 int faidx_main(int argc, char *argv[])
 {
     int c;
@@ -63,39 +48,60 @@ int faidx_main(int argc, char *argv[])
         switch(c)
         {
             case 'h':
+                return usage(pysam_stdout, EXIT_SUCCESS);
+
             default:
-                error(NULL);
+                return usage(pysam_stderr, EXIT_FAILURE);
         }
     }
     if ( argc==optind )
-        error(NULL);
+        return usage(pysam_stdout, EXIT_SUCCESS);
     if ( argc==2 )
     {
         if (fai_build(argv[optind]) != 0) {
-            error("Could not build fai index %s.fai\n", argv[optind]);
+            fprintf(pysam_stderr, "Could not build fai index %s.fai\n", argv[optind]);
+            return EXIT_FAILURE;
         }
         return 0;
     }
 
     faidx_t *fai = fai_load(argv[optind]);
-    if ( !fai ) error("Could not load fai index of %s\n", argv[optind]);
+    if ( !fai ) {
+        fprintf(pysam_stderr, "Could not load fai index of %s\n", argv[optind]);
+        return EXIT_FAILURE;
+    }
+
+    int exit_status = EXIT_SUCCESS;
 
-    while ( ++optind<argc )
+    while ( ++optind<argc && exit_status == EXIT_SUCCESS)
     {
         fprintf(pysam_stdout, ">%s\n", argv[optind]);
-        int i, j, seq_len;
+        int seq_len;
         char *seq = fai_fetch(fai, argv[optind], &seq_len);
-        if ( seq_len < 0 ) error("Failed to fetch sequence in %s\n", argv[optind]);
-        for (i=0; i<seq_len; i+=60)
+        if ( seq_len < 0 ) {
+            fprintf(pysam_stderr, "Failed to fetch sequence in %s\n", argv[optind]);
+            exit_status = EXIT_FAILURE;
+            break;
+        }
+        size_t i, seq_sz = seq_len;
+        for (i=0; i<seq_sz; i+=60)
         {
-            for (j=0; j<60 && i+j<seq_len; j++)
-                fputc(seq[i+j], pysam_stdout);
-            fputc('\n', pysam_stdout);
+            size_t len = i + 60 < seq_sz ? 60 : seq_sz - i;
+            if (fwrite(seq + i, 1, len, pysam_stdout) < len ||
+                fputc('\n', pysam_stdout) == EOF) {
+                print_error_errno("faidx", "failed to write output");
+                exit_status = EXIT_FAILURE;
+                break;
+            }
         }
         free(seq);
     }
     fai_destroy(fai);
 
-    return 0;
-}
+    if (fflush(pysam_stdout) == EOF) {
+        print_error_errno("faidx", "failed to flush output");
+        exit_status = EXIT_FAILURE;
+    }
 
+    return exit_status;
+}
diff --git a/samtools/kprobaln.c b/samtools/kprobaln.c
deleted file mode 100644
index e319708..0000000
--- a/samtools/kprobaln.c
+++ /dev/null
@@ -1,282 +0,0 @@
-/* The MIT License
-
-   Copyright (C) 2003-2006, 2008-2010 by Heng Li <lh3lh3 at live.co.uk>
-
-   Permission is hereby granted, free of charge, to any person obtaining
-   a copy of this software and associated documentation files (the
-   "Software"), to deal in the Software without restriction, including
-   without limitation the rights to use, copy, modify, merge, publish,
-   distribute, sublicense, and/or sell copies of the Software, and to
-   permit persons to whom the Software is furnished to do so, subject to
-   the following conditions:
-
-   The above copyright notice and this permission notice shall be
-   included in all copies or substantial portions of the Software.
-
-   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-   NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
-   BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
-   ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-   CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-   SOFTWARE.
-*/
-
-#include <config.h>
-
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-#include <stdint.h>
-#include <math.h>
-#include "kprobaln.h"
-
-/*****************************************
- * Probabilistic banded glocal alignment *
- *****************************************/
-
-#define EI .25
-#define EM .33333333333
-
-static float g_qual2prob[256];
-
-#define set_u(u, b, i, k) { int x=(i)-(b); x=x>0?x:0; (u)=((k)-x+1)*3; }
-
-kpa_par_t kpa_par_def = { 0.001, 0.1, 10 };
-kpa_par_t kpa_par_alt = { 0.0001, 0.01, 10 };
-
-/*
-  The topology of the profile HMM:
-
-           /\             /\        /\             /\
-           I[1]           I[k-1]    I[k]           I[L]
-            ^   \      \    ^    \   ^   \      \   ^
-            |    \      \   |     \  |    \      \  |
-    M[0]   M[1] -> ... -> M[k-1] -> M[k] -> ... -> M[L]   M[L+1]
-                \      \/        \/      \/      /
-                 \     /\        /\      /\     /
-                       -> D[k-1] -> D[k] ->
-
-   M[0] points to every {M,I}[k] and every {M,I}[k] points M[L+1].
-
-   On input, _ref is the reference sequence and _query is the query
-   sequence. Both are sequences of 0/1/2/3/4 where 4 stands for an
-   ambiguous residue. iqual is the base quality. c sets the gap open
-   probability, gap extension probability and band width.
-
-   On output, state and q are arrays of length l_query. The higher 30
-   bits give the reference position the query base is matched to and the
-   lower two bits can be 0 (an alignment match) or 1 (an
-   insertion). q[i] gives the phred scaled posterior probability of
-   state[i] being wrong.
- */
-int kpa_glocal(const uint8_t *_ref, int l_ref, const uint8_t *_query, int l_query, const uint8_t *iqual,
-			   const kpa_par_t *c, int *state, uint8_t *q)
-{
-	double **f, **b = 0, *s, m[9], sI, sM, bI, bM, pb;
-	float *qual, *_qual;
-	const uint8_t *ref, *query;
-	int bw, bw2, i, k, is_diff = 0, is_backward = 1, Pr;
-
-    if ( l_ref<=0 || l_query<=0 ) return 0; // FIXME: this may not be an ideal fix, just prevents sefgault
-
-	/*** initialization ***/
-	is_backward = state && q? 1 : 0;
-	ref = _ref - 1; query = _query - 1; // change to 1-based coordinate
-	bw = l_ref > l_query? l_ref : l_query;
-	if (bw > c->bw) bw = c->bw;
-	if (bw < abs(l_ref - l_query)) bw = abs(l_ref - l_query);
-	bw2 = bw * 2 + 1;
-	// allocate the forward and backward matrices f[][] and b[][] and the scaling array s[]
-	f = calloc(l_query+1, sizeof(double*));
-	if (is_backward) b = calloc(l_query+1, sizeof(double*));
-	for (i = 0; i <= l_query; ++i) {    // FIXME: this will lead in segfault for l_query==0
-		f[i] = calloc(bw2 * 3 + 6, sizeof(double)); // FIXME: this is over-allocated for very short seqs
-		if (is_backward) b[i] = calloc(bw2 * 3 + 6, sizeof(double));
-	}
-	s = calloc(l_query+2, sizeof(double)); // s[] is the scaling factor to avoid underflow
-	// initialize qual
-	_qual = calloc(l_query, sizeof(float));
-	if (g_qual2prob[0] == 0)
-		for (i = 0; i < 256; ++i)
-			g_qual2prob[i] = pow(10, -i/10.);
-	for (i = 0; i < l_query; ++i) _qual[i] = g_qual2prob[iqual? iqual[i] : 30];
-	qual = _qual - 1;
-	// initialize transition probability
-	sM = sI = 1. / (2 * l_query + 2); // the value here seems not to affect results; FIXME: need proof
-	m[0*3+0] = (1 - c->d - c->d) * (1 - sM); m[0*3+1] = m[0*3+2] = c->d * (1 - sM);
-	m[1*3+0] = (1 - c->e) * (1 - sI); m[1*3+1] = c->e * (1 - sI); m[1*3+2] = 0.;
-	m[2*3+0] = 1 - c->e; m[2*3+1] = 0.; m[2*3+2] = c->e;
-	bM = (1 - c->d) / l_ref; bI = c->d / l_ref; // (bM+bI)*l_ref==1
-	/*** forward ***/
-	// f[0]
-	set_u(k, bw, 0, 0);
-	f[0][k] = s[0] = 1.;
-	{ // f[1]
-		double *fi = f[1], sum;
-		int beg = 1, end = l_ref < bw + 1? l_ref : bw + 1, _beg, _end;
-		for (k = beg, sum = 0.; k <= end; ++k) {
-			int u;
-			double e = (ref[k] > 3 || query[1] > 3)? 1. : ref[k] == query[1]? 1. - qual[1] : qual[1] * EM;
-			set_u(u, bw, 1, k);
-			fi[u+0] = e * bM; fi[u+1] = EI * bI;
-			sum += fi[u] + fi[u+1];
-		}
-		// rescale
-		s[1] = sum;
-		set_u(_beg, bw, 1, beg); set_u(_end, bw, 1, end); _end += 2;
-		for (k = _beg; k <= _end; ++k) fi[k] /= sum;
-	}
-	// f[2..l_query]
-	for (i = 2; i <= l_query; ++i) {
-		double *fi = f[i], *fi1 = f[i-1], sum, qli = qual[i];
-		int beg = 1, end = l_ref, x, _beg, _end;
-		uint8_t qyi = query[i];
-		x = i - bw; beg = beg > x? beg : x; // band start
-		x = i + bw; end = end < x? end : x; // band end
-		for (k = beg, sum = 0.; k <= end; ++k) {
-			int u, v11, v01, v10;
-			double e;
-			e = (ref[k] > 3 || qyi > 3)? 1. : ref[k] == qyi? 1. - qli : qli * EM;
-			set_u(u, bw, i, k); set_u(v11, bw, i-1, k-1); set_u(v10, bw, i-1, k); set_u(v01, bw, i, k-1);
-			fi[u+0] = e * (m[0] * fi1[v11+0] + m[3] * fi1[v11+1] + m[6] * fi1[v11+2]);
-			fi[u+1] = EI * (m[1] * fi1[v10+0] + m[4] * fi1[v10+1]);
-			fi[u+2] = m[2] * fi[v01+0] + m[8] * fi[v01+2];
-			sum += fi[u] + fi[u+1] + fi[u+2];
-//			fprintf(stderr, "F (%d,%d;%d): %lg,%lg,%lg\n", i, k, u, fi[u], fi[u+1], fi[u+2]); // DEBUG
-		}
-		// rescale
-		s[i] = sum;
-		set_u(_beg, bw, i, beg); set_u(_end, bw, i, end); _end += 2;
-		for (k = _beg, sum = 1./sum; k <= _end; ++k) fi[k] *= sum;
-	}
-	{ // f[l_query+1]
-		double sum;
-		for (k = 1, sum = 0.; k <= l_ref; ++k) {
-			int u;
-			set_u(u, bw, l_query, k);
-			if (u < 3 || u >= bw2*3+3) continue;
-		    sum += f[l_query][u+0] * sM + f[l_query][u+1] * sI;
-		}
-		s[l_query+1] = sum; // the last scaling factor
-	}
-	{ // compute likelihood
-		double p = 1., Pr1 = 0.;
-		for (i = 0; i <= l_query + 1; ++i) {
-			p *= s[i];
-			if (p < 1e-100) Pr1 += -4.343 * log(p), p = 1.;
-		}
-		Pr1 += -4.343 * log(p * l_ref * l_query);
-		Pr = (int)(Pr1 + .499);
-		if (!is_backward) { // skip backward and MAP
-			for (i = 0; i <= l_query; ++i) free(f[i]);
-			free(f); free(s); free(_qual);
-			return Pr;
-		}
-	}
-	/*** backward ***/
-	// b[l_query] (b[l_query+1][0]=1 and thus \tilde{b}[][]=1/s[l_query+1]; this is where s[l_query+1] comes from)
-	for (k = 1; k <= l_ref; ++k) {
-		int u;
-		double *bi = b[l_query];
-		set_u(u, bw, l_query, k);
-		if (u < 3 || u >= bw2*3+3) continue;
-		bi[u+0] = sM / s[l_query] / s[l_query+1]; bi[u+1] = sI / s[l_query] / s[l_query+1];
-	}
-	// b[l_query-1..1]
-	for (i = l_query - 1; i >= 1; --i) {
-		int beg = 1, end = l_ref, x, _beg, _end;
-		double *bi = b[i], *bi1 = b[i+1], y = (i > 1), qli1 = qual[i+1];
-		uint8_t qyi1 = query[i+1];
-		x = i - bw; beg = beg > x? beg : x;
-		x = i + bw; end = end < x? end : x;
-		for (k = end; k >= beg; --k) {
-			int u, v11, v01, v10;
-			double e;
-			set_u(u, bw, i, k); set_u(v11, bw, i+1, k+1); set_u(v10, bw, i+1, k); set_u(v01, bw, i, k+1);
-			e = (k >= l_ref? 0 : (ref[k+1] > 3 || qyi1 > 3)? 1. : ref[k+1] == qyi1? 1. - qli1 : qli1 * EM) * bi1[v11];
-			bi[u+0] = e * m[0] + EI * m[1] * bi1[v10+1] + m[2] * bi[v01+2]; // bi1[v11] has been foled into e.
-			bi[u+1] = e * m[3] + EI * m[4] * bi1[v10+1];
-			bi[u+2] = (e * m[6] + m[8] * bi[v01+2]) * y;
-//			fprintf(stderr, "B (%d,%d;%d): %lg,%lg,%lg\n", i, k, u, bi[u], bi[u+1], bi[u+2]); // DEBUG
-		}
-		// rescale
-		set_u(_beg, bw, i, beg); set_u(_end, bw, i, end); _end += 2;
-		for (k = _beg, y = 1./s[i]; k <= _end; ++k) bi[k] *= y;
-	}
-	{ // b[0]
-		int beg = 1, end = l_ref < bw + 1? l_ref : bw + 1;
-		double sum = 0.;
-		for (k = end; k >= beg; --k) {
-			int u;
-			double e = (ref[k] > 3 || query[1] > 3)? 1. : ref[k] == query[1]? 1. - qual[1] : qual[1] * EM;
-			set_u(u, bw, 1, k);
-			if (u < 3 || u >= bw2*3+3) continue;
-		    sum += e * b[1][u+0] * bM + EI * b[1][u+1] * bI;
-		}
-		set_u(k, bw, 0, 0);
-		pb = b[0][k] = sum / s[0]; // if everything works as is expected, pb == 1.0
-	}
-	is_diff = fabs(pb - 1.) > 1e-7? 1 : 0;
-	/*** MAP ***/
-	for (i = 1; i <= l_query; ++i) {
-		double sum = 0., *fi = f[i], *bi = b[i], max = 0.;
-		int beg = 1, end = l_ref, x, max_k = -1;
-		x = i - bw; beg = beg > x? beg : x;
-		x = i + bw; end = end < x? end : x;
-		for (k = beg; k <= end; ++k) {
-			int u;
-			double z;
-			set_u(u, bw, i, k);
-			z = fi[u+0] * bi[u+0]; if (z > max) max = z, max_k = (k-1)<<2 | 0; sum += z;
-			z = fi[u+1] * bi[u+1]; if (z > max) max = z, max_k = (k-1)<<2 | 1; sum += z;
-		}
-		max /= sum; sum *= s[i]; // if everything works as is expected, sum == 1.0
-		if (state) state[i-1] = max_k;
-		if (q) k = (int)(-4.343 * log(1. - max) + .499), q[i-1] = k > 100? 99 : k;
-#ifdef _MAIN
-		fprintf(stderr, "(%.10lg,%.10lg) (%d,%d:%c,%c:%d) %lg\n", pb, sum, i-1, max_k>>2,
-				"ACGT"[query[i]], "ACGT"[ref[(max_k>>2)+1]], max_k&3, max); // DEBUG
-#endif
-	}
-	/*** free ***/
-	for (i = 0; i <= l_query; ++i) {
-		free(f[i]); free(b[i]);
-	}
-	free(f); free(b); free(s); free(_qual);
-	return Pr;
-}
-
-#ifdef _MAIN
-#include <unistd.h>
-int main(int argc, char *argv[])
-{
-	uint8_t conv[256], *iqual, *ref, *query;
-	int c, l_ref, l_query, i, q = 30, b = 10, P;
-	while ((c = getopt(argc, argv, "b:q:")) >= 0) {
-		switch (c) {
-		case 'b': b = atoi(optarg); break;
-		case 'q': q = atoi(optarg); break;
-		}
-	}
-	if (optind + 2 > argc) {
-		fprintf(stderr, "Usage: %s [-q %d] [-b %d] <ref> <query>\n", argv[0], q, b); // example: acttc attc
-		return 1;
-	}
-	memset(conv, 4, 256);
-	conv['a'] = conv['A'] = 0; conv['c'] = conv['C'] = 1;
-	conv['g'] = conv['G'] = 2; conv['t'] = conv['T'] = 3;
-	ref = (uint8_t*)argv[optind]; query = (uint8_t*)argv[optind+1];
-	l_ref = strlen((char*)ref); l_query = strlen((char*)query);
-	for (i = 0; i < l_ref; ++i) ref[i] = conv[ref[i]];
-	for (i = 0; i < l_query; ++i) query[i] = conv[query[i]];
-	iqual = malloc(l_query);
-	memset(iqual, q, l_query);
-	kpa_par_def.bw = b;
-	P = kpa_glocal(ref, l_ref, query, l_query, iqual, &kpa_par_alt, 0, 0);
-	fprintf(stderr, "%d\n", P);
-	free(iqual);
-	return 0;
-}
-#endif
diff --git a/samtools/kprobaln.c.pysam.c b/samtools/kprobaln.c.pysam.c
deleted file mode 100644
index 630b730..0000000
--- a/samtools/kprobaln.c.pysam.c
+++ /dev/null
@@ -1,284 +0,0 @@
-#include "pysam.h"
-
-/* The MIT License
-
-   Copyright (C) 2003-2006, 2008-2010 by Heng Li <lh3lh3 at live.co.uk>
-
-   Permission is hereby granted, free of charge, to any person obtaining
-   a copy of this software and associated documentation files (the
-   "Software"), to deal in the Software without restriction, including
-   without limitation the rights to use, copy, modify, merge, publish,
-   distribute, sublicense, and/or sell copies of the Software, and to
-   permit persons to whom the Software is furnished to do so, subject to
-   the following conditions:
-
-   The above copyright notice and this permission notice shall be
-   included in all copies or substantial portions of the Software.
-
-   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-   NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
-   BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
-   ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-   CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-   SOFTWARE.
-*/
-
-#include <config.h>
-
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-#include <stdint.h>
-#include <math.h>
-#include "kprobaln.h"
-
-/*****************************************
- * Probabilistic banded glocal alignment *
- *****************************************/
-
-#define EI .25
-#define EM .33333333333
-
-static float g_qual2prob[256];
-
-#define set_u(u, b, i, k) { int x=(i)-(b); x=x>0?x:0; (u)=((k)-x+1)*3; }
-
-kpa_par_t kpa_par_def = { 0.001, 0.1, 10 };
-kpa_par_t kpa_par_alt = { 0.0001, 0.01, 10 };
-
-/*
-  The topology of the profile HMM:
-
-           /\             /\        /\             /\
-           I[1]           I[k-1]    I[k]           I[L]
-            ^   \      \    ^    \   ^   \      \   ^
-            |    \      \   |     \  |    \      \  |
-    M[0]   M[1] -> ... -> M[k-1] -> M[k] -> ... -> M[L]   M[L+1]
-                \      \/        \/      \/      /
-                 \     /\        /\      /\     /
-                       -> D[k-1] -> D[k] ->
-
-   M[0] points to every {M,I}[k] and every {M,I}[k] points M[L+1].
-
-   On input, _ref is the reference sequence and _query is the query
-   sequence. Both are sequences of 0/1/2/3/4 where 4 stands for an
-   ambiguous residue. iqual is the base quality. c sets the gap open
-   probability, gap extension probability and band width.
-
-   On output, state and q are arrays of length l_query. The higher 30
-   bits give the reference position the query base is matched to and the
-   lower two bits can be 0 (an alignment match) or 1 (an
-   insertion). q[i] gives the phred scaled posterior probability of
-   state[i] being wrong.
- */
-int kpa_glocal(const uint8_t *_ref, int l_ref, const uint8_t *_query, int l_query, const uint8_t *iqual,
-			   const kpa_par_t *c, int *state, uint8_t *q)
-{
-	double **f, **b = 0, *s, m[9], sI, sM, bI, bM, pb;
-	float *qual, *_qual;
-	const uint8_t *ref, *query;
-	int bw, bw2, i, k, is_diff = 0, is_backward = 1, Pr;
-
-    if ( l_ref<=0 || l_query<=0 ) return 0; // FIXME: this may not be an ideal fix, just prevents sefgault
-
-	/*** initialization ***/
-	is_backward = state && q? 1 : 0;
-	ref = _ref - 1; query = _query - 1; // change to 1-based coordinate
-	bw = l_ref > l_query? l_ref : l_query;
-	if (bw > c->bw) bw = c->bw;
-	if (bw < abs(l_ref - l_query)) bw = abs(l_ref - l_query);
-	bw2 = bw * 2 + 1;
-	// allocate the forward and backward matrices f[][] and b[][] and the scaling array s[]
-	f = calloc(l_query+1, sizeof(double*));
-	if (is_backward) b = calloc(l_query+1, sizeof(double*));
-	for (i = 0; i <= l_query; ++i) {    // FIXME: this will lead in segfault for l_query==0
-		f[i] = calloc(bw2 * 3 + 6, sizeof(double)); // FIXME: this is over-allocated for very short seqs
-		if (is_backward) b[i] = calloc(bw2 * 3 + 6, sizeof(double));
-	}
-	s = calloc(l_query+2, sizeof(double)); // s[] is the scaling factor to avoid underflow
-	// initialize qual
-	_qual = calloc(l_query, sizeof(float));
-	if (g_qual2prob[0] == 0)
-		for (i = 0; i < 256; ++i)
-			g_qual2prob[i] = pow(10, -i/10.);
-	for (i = 0; i < l_query; ++i) _qual[i] = g_qual2prob[iqual? iqual[i] : 30];
-	qual = _qual - 1;
-	// initialize transition probability
-	sM = sI = 1. / (2 * l_query + 2); // the value here seems not to affect results; FIXME: need proof
-	m[0*3+0] = (1 - c->d - c->d) * (1 - sM); m[0*3+1] = m[0*3+2] = c->d * (1 - sM);
-	m[1*3+0] = (1 - c->e) * (1 - sI); m[1*3+1] = c->e * (1 - sI); m[1*3+2] = 0.;
-	m[2*3+0] = 1 - c->e; m[2*3+1] = 0.; m[2*3+2] = c->e;
-	bM = (1 - c->d) / l_ref; bI = c->d / l_ref; // (bM+bI)*l_ref==1
-	/*** forward ***/
-	// f[0]
-	set_u(k, bw, 0, 0);
-	f[0][k] = s[0] = 1.;
-	{ // f[1]
-		double *fi = f[1], sum;
-		int beg = 1, end = l_ref < bw + 1? l_ref : bw + 1, _beg, _end;
-		for (k = beg, sum = 0.; k <= end; ++k) {
-			int u;
-			double e = (ref[k] > 3 || query[1] > 3)? 1. : ref[k] == query[1]? 1. - qual[1] : qual[1] * EM;
-			set_u(u, bw, 1, k);
-			fi[u+0] = e * bM; fi[u+1] = EI * bI;
-			sum += fi[u] + fi[u+1];
-		}
-		// rescale
-		s[1] = sum;
-		set_u(_beg, bw, 1, beg); set_u(_end, bw, 1, end); _end += 2;
-		for (k = _beg; k <= _end; ++k) fi[k] /= sum;
-	}
-	// f[2..l_query]
-	for (i = 2; i <= l_query; ++i) {
-		double *fi = f[i], *fi1 = f[i-1], sum, qli = qual[i];
-		int beg = 1, end = l_ref, x, _beg, _end;
-		uint8_t qyi = query[i];
-		x = i - bw; beg = beg > x? beg : x; // band start
-		x = i + bw; end = end < x? end : x; // band end
-		for (k = beg, sum = 0.; k <= end; ++k) {
-			int u, v11, v01, v10;
-			double e;
-			e = (ref[k] > 3 || qyi > 3)? 1. : ref[k] == qyi? 1. - qli : qli * EM;
-			set_u(u, bw, i, k); set_u(v11, bw, i-1, k-1); set_u(v10, bw, i-1, k); set_u(v01, bw, i, k-1);
-			fi[u+0] = e * (m[0] * fi1[v11+0] + m[3] * fi1[v11+1] + m[6] * fi1[v11+2]);
-			fi[u+1] = EI * (m[1] * fi1[v10+0] + m[4] * fi1[v10+1]);
-			fi[u+2] = m[2] * fi[v01+0] + m[8] * fi[v01+2];
-			sum += fi[u] + fi[u+1] + fi[u+2];
-//			fprintf(pysam_stderr, "F (%d,%d;%d): %lg,%lg,%lg\n", i, k, u, fi[u], fi[u+1], fi[u+2]); // DEBUG
-		}
-		// rescale
-		s[i] = sum;
-		set_u(_beg, bw, i, beg); set_u(_end, bw, i, end); _end += 2;
-		for (k = _beg, sum = 1./sum; k <= _end; ++k) fi[k] *= sum;
-	}
-	{ // f[l_query+1]
-		double sum;
-		for (k = 1, sum = 0.; k <= l_ref; ++k) {
-			int u;
-			set_u(u, bw, l_query, k);
-			if (u < 3 || u >= bw2*3+3) continue;
-		    sum += f[l_query][u+0] * sM + f[l_query][u+1] * sI;
-		}
-		s[l_query+1] = sum; // the last scaling factor
-	}
-	{ // compute likelihood
-		double p = 1., Pr1 = 0.;
-		for (i = 0; i <= l_query + 1; ++i) {
-			p *= s[i];
-			if (p < 1e-100) Pr1 += -4.343 * log(p), p = 1.;
-		}
-		Pr1 += -4.343 * log(p * l_ref * l_query);
-		Pr = (int)(Pr1 + .499);
-		if (!is_backward) { // skip backward and MAP
-			for (i = 0; i <= l_query; ++i) free(f[i]);
-			free(f); free(s); free(_qual);
-			return Pr;
-		}
-	}
-	/*** backward ***/
-	// b[l_query] (b[l_query+1][0]=1 and thus \tilde{b}[][]=1/s[l_query+1]; this is where s[l_query+1] comes from)
-	for (k = 1; k <= l_ref; ++k) {
-		int u;
-		double *bi = b[l_query];
-		set_u(u, bw, l_query, k);
-		if (u < 3 || u >= bw2*3+3) continue;
-		bi[u+0] = sM / s[l_query] / s[l_query+1]; bi[u+1] = sI / s[l_query] / s[l_query+1];
-	}
-	// b[l_query-1..1]
-	for (i = l_query - 1; i >= 1; --i) {
-		int beg = 1, end = l_ref, x, _beg, _end;
-		double *bi = b[i], *bi1 = b[i+1], y = (i > 1), qli1 = qual[i+1];
-		uint8_t qyi1 = query[i+1];
-		x = i - bw; beg = beg > x? beg : x;
-		x = i + bw; end = end < x? end : x;
-		for (k = end; k >= beg; --k) {
-			int u, v11, v01, v10;
-			double e;
-			set_u(u, bw, i, k); set_u(v11, bw, i+1, k+1); set_u(v10, bw, i+1, k); set_u(v01, bw, i, k+1);
-			e = (k >= l_ref? 0 : (ref[k+1] > 3 || qyi1 > 3)? 1. : ref[k+1] == qyi1? 1. - qli1 : qli1 * EM) * bi1[v11];
-			bi[u+0] = e * m[0] + EI * m[1] * bi1[v10+1] + m[2] * bi[v01+2]; // bi1[v11] has been foled into e.
-			bi[u+1] = e * m[3] + EI * m[4] * bi1[v10+1];
-			bi[u+2] = (e * m[6] + m[8] * bi[v01+2]) * y;
-//			fprintf(pysam_stderr, "B (%d,%d;%d): %lg,%lg,%lg\n", i, k, u, bi[u], bi[u+1], bi[u+2]); // DEBUG
-		}
-		// rescale
-		set_u(_beg, bw, i, beg); set_u(_end, bw, i, end); _end += 2;
-		for (k = _beg, y = 1./s[i]; k <= _end; ++k) bi[k] *= y;
-	}
-	{ // b[0]
-		int beg = 1, end = l_ref < bw + 1? l_ref : bw + 1;
-		double sum = 0.;
-		for (k = end; k >= beg; --k) {
-			int u;
-			double e = (ref[k] > 3 || query[1] > 3)? 1. : ref[k] == query[1]? 1. - qual[1] : qual[1] * EM;
-			set_u(u, bw, 1, k);
-			if (u < 3 || u >= bw2*3+3) continue;
-		    sum += e * b[1][u+0] * bM + EI * b[1][u+1] * bI;
-		}
-		set_u(k, bw, 0, 0);
-		pb = b[0][k] = sum / s[0]; // if everything works as is expected, pb == 1.0
-	}
-	is_diff = fabs(pb - 1.) > 1e-7? 1 : 0;
-	/*** MAP ***/
-	for (i = 1; i <= l_query; ++i) {
-		double sum = 0., *fi = f[i], *bi = b[i], max = 0.;
-		int beg = 1, end = l_ref, x, max_k = -1;
-		x = i - bw; beg = beg > x? beg : x;
-		x = i + bw; end = end < x? end : x;
-		for (k = beg; k <= end; ++k) {
-			int u;
-			double z;
-			set_u(u, bw, i, k);
-			z = fi[u+0] * bi[u+0]; if (z > max) max = z, max_k = (k-1)<<2 | 0; sum += z;
-			z = fi[u+1] * bi[u+1]; if (z > max) max = z, max_k = (k-1)<<2 | 1; sum += z;
-		}
-		max /= sum; sum *= s[i]; // if everything works as is expected, sum == 1.0
-		if (state) state[i-1] = max_k;
-		if (q) k = (int)(-4.343 * log(1. - max) + .499), q[i-1] = k > 100? 99 : k;
-#ifdef _MAIN
-		fprintf(pysam_stderr, "(%.10lg,%.10lg) (%d,%d:%c,%c:%d) %lg\n", pb, sum, i-1, max_k>>2,
-				"ACGT"[query[i]], "ACGT"[ref[(max_k>>2)+1]], max_k&3, max); // DEBUG
-#endif
-	}
-	/*** free ***/
-	for (i = 0; i <= l_query; ++i) {
-		free(f[i]); free(b[i]);
-	}
-	free(f); free(b); free(s); free(_qual);
-	return Pr;
-}
-
-#ifdef _MAIN
-#include <unistd.h>
-int samtools_kprobaln_main(int argc, char *argv[])
-{
-	uint8_t conv[256], *iqual, *ref, *query;
-	int c, l_ref, l_query, i, q = 30, b = 10, P;
-	while ((c = getopt(argc, argv, "b:q:")) >= 0) {
-		switch (c) {
-		case 'b': b = atoi(optarg); break;
-		case 'q': q = atoi(optarg); break;
-		}
-	}
-	if (optind + 2 > argc) {
-		fprintf(pysam_stderr, "Usage: %s [-q %d] [-b %d] <ref> <query>\n", argv[0], q, b); // example: acttc attc
-		return 1;
-	}
-	memset(conv, 4, 256);
-	conv['a'] = conv['A'] = 0; conv['c'] = conv['C'] = 1;
-	conv['g'] = conv['G'] = 2; conv['t'] = conv['T'] = 3;
-	ref = (uint8_t*)argv[optind]; query = (uint8_t*)argv[optind+1];
-	l_ref = strlen((char*)ref); l_query = strlen((char*)query);
-	for (i = 0; i < l_ref; ++i) ref[i] = conv[ref[i]];
-	for (i = 0; i < l_query; ++i) query[i] = conv[query[i]];
-	iqual = malloc(l_query);
-	memset(iqual, q, l_query);
-	kpa_par_def.bw = b;
-	P = kpa_glocal(ref, l_ref, query, l_query, iqual, &kpa_par_alt, 0, 0);
-	fprintf(pysam_stderr, "%d\n", P);
-	free(iqual);
-	return 0;
-}
-#endif
diff --git a/samtools/kprobaln.h b/samtools/kprobaln.h
deleted file mode 100644
index 50ae77b..0000000
--- a/samtools/kprobaln.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/* The MIT License
-
-   Copyright (C) 2003-2006, 2008-2010 by Heng Li <lh3 at live.co.uk>
-
-   Permission is hereby granted, free of charge, to any person obtaining
-   a copy of this software and associated documentation files (the
-   "Software"), to deal in the Software without restriction, including
-   without limitation the rights to use, copy, modify, merge, publish,
-   distribute, sublicense, and/or sell copies of the Software, and to
-   permit persons to whom the Software is furnished to do so, subject to
-   the following conditions:
-
-   The above copyright notice and this permission notice shall be
-   included in all copies or substantial portions of the Software.
-
-   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-   NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
-   BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
-   ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-   CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-   SOFTWARE.
-*/
-
-#ifndef LH3_KPROBALN_H_
-#define LH3_KPROBALN_H_
-
-#include <stdint.h>
-
-typedef struct {
-	float d, e;
-	int bw;
-} kpa_par_t;
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-	int kpa_glocal(const uint8_t *_ref, int l_ref, const uint8_t *_query, int l_query, const uint8_t *iqual,
-				   const kpa_par_t *c, int *state, uint8_t *q);
-
-#ifdef __cplusplus
-}
-#endif
-
-extern kpa_par_t kpa_par_def, kpa_par_alt;
-
-#endif
diff --git a/samtools/misc/ace2sam.c b/samtools/misc/ace2sam.c
index 77b9993..19727eb 100644
--- a/samtools/misc/ace2sam.c
+++ b/samtools/misc/ace2sam.c
@@ -161,7 +161,10 @@ int main(int argc, char *argv[])
             }
             if (dret != '\n') ks_getuntil(ks, '\n', &s, &dret);
             ks_getuntil(ks, '\n', &s, &dret); // skip the empty line
-            if (write_cns) puts(t[4].s); t[4].l = 0;
+            if (write_cns) {
+                if (t[4].l) puts(t[4].s);
+                t[4].l = 0;
+            }
         } else if (strcmp(s.s, "AF") == 0) { // padded read position
             int reversed, neg, pos;
             if (t[0].l == 0) fatal("come to 'AF' before reading 'CO'");
diff --git a/samtools/misc/ace2sam.c.pysam.c b/samtools/misc/ace2sam.c.pysam.c
index a663399..02d2f58 100644
--- a/samtools/misc/ace2sam.c.pysam.c
+++ b/samtools/misc/ace2sam.c.pysam.c
@@ -163,7 +163,10 @@ int samtools_ace2sam_main(int argc, char *argv[])
             }
             if (dret != '\n') ks_getuntil(ks, '\n', &s, &dret);
             ks_getuntil(ks, '\n', &s, &dret); // skip the empty line
-            if (write_cns) fputs(t[4].s, pysam_stdout) & fputc('\n', pysam_stdout); t[4].l = 0;
+            if (write_cns) {
+                if (t[4].l) fputs(t[4].s, pysam_stdout) & fputc('\n', pysam_stdout);
+                t[4].l = 0;
+            }
         } else if (strcmp(s.s, "AF") == 0) { // padded read position
             int reversed, neg, pos;
             if (t[0].l == 0) fatal("come to 'AF' before reading 'CO'");
diff --git a/samtools/padding.c b/samtools/padding.c
index cea79cf..2f10e86 100644
--- a/samtools/padding.c
+++ b/samtools/padding.c
@@ -491,7 +491,7 @@ int main_pad2unpad(int argc, char *argv[])
     sam_global_args ga = SAM_GLOBAL_ARGS_INIT;
 
     static const struct option lopts[] = {
-        SAM_OPT_GLOBAL_OPTIONS('-', 0, 0, 0, 'T'),
+        SAM_OPT_GLOBAL_OPTIONS('-', 0, 0, 0, 'T', '-'),
         { NULL, 0, NULL, 0 }
     };
 
@@ -603,7 +603,7 @@ static int usage(int is_long_help)
     fprintf(stderr, "               Padded reference sequence file [null]\n");
     fprintf(stderr, "  -o FILE      Output file name [stdout]\n");
     fprintf(stderr, "  -?           Longer help\n");
-    sam_global_opt_help(stderr, "-...-");
+    sam_global_opt_help(stderr, "-...--");
 
     if (is_long_help)
         fprintf(stderr,
diff --git a/samtools/padding.c.pysam.c b/samtools/padding.c.pysam.c
index 9f85c95..a3461e4 100644
--- a/samtools/padding.c.pysam.c
+++ b/samtools/padding.c.pysam.c
@@ -493,7 +493,7 @@ int main_pad2unpad(int argc, char *argv[])
     sam_global_args ga = SAM_GLOBAL_ARGS_INIT;
 
     static const struct option lopts[] = {
-        SAM_OPT_GLOBAL_OPTIONS('-', 0, 0, 0, 'T'),
+        SAM_OPT_GLOBAL_OPTIONS('-', 0, 0, 0, 'T', '-'),
         { NULL, 0, NULL, 0 }
     };
 
@@ -605,7 +605,7 @@ static int usage(int is_long_help)
     fprintf(pysam_stderr, "               Padded reference sequence file [null]\n");
     fprintf(pysam_stderr, "  -o FILE      Output file name [pysam_stdout]\n");
     fprintf(pysam_stderr, "  -?           Longer help\n");
-    sam_global_opt_help(pysam_stderr, "-...-");
+    sam_global_opt_help(pysam_stderr, "-...--");
 
     if (is_long_help)
         fprintf(pysam_stderr,
diff --git a/samtools/phase.c b/samtools/phase.c
index 6909912..584334d 100644
--- a/samtools/phase.c
+++ b/samtools/phase.c
@@ -31,9 +31,9 @@ DEALINGS IN THE SOFTWARE.  */
 #include <stdint.h>
 #include <math.h>
 #include <zlib.h>
+#include "htslib/hts.h"
 #include "htslib/sam.h"
 #include "htslib/kstring.h"
-#include "errmod.h"
 #include "sam_opts.h"
 #include "samtools.h"
 
@@ -580,7 +580,7 @@ int main_phase(int argc, char *argv[])
 
     sam_global_args ga = SAM_GLOBAL_ARGS_INIT;
     static const struct option lopts[] = {
-        SAM_OPT_GLOBAL_OPTIONS('-', 0, 0, 0, 0),
+        SAM_OPT_GLOBAL_OPTIONS('-', 0, 0, 0, 0, '-'),
         { NULL, 0, NULL, 0 }
     };
 
@@ -620,7 +620,7 @@ int main_phase(int argc, char *argv[])
 //      fprintf(stderr, "         -e        do not discover SNPs (effective with -l)\n");
         fprintf(stderr, "\n");
 
-        sam_global_opt_help(stderr, "-....");
+        sam_global_opt_help(stderr, "-....-");
 
         return 1;
     }
diff --git a/samtools/phase.c.pysam.c b/samtools/phase.c.pysam.c
index 3babd37..4226c03 100644
--- a/samtools/phase.c.pysam.c
+++ b/samtools/phase.c.pysam.c
@@ -33,9 +33,9 @@ DEALINGS IN THE SOFTWARE.  */
 #include <stdint.h>
 #include <math.h>
 #include <zlib.h>
+#include "htslib/hts.h"
 #include "htslib/sam.h"
 #include "htslib/kstring.h"
-#include "errmod.h"
 #include "sam_opts.h"
 #include "samtools.h"
 
@@ -582,7 +582,7 @@ int main_phase(int argc, char *argv[])
 
     sam_global_args ga = SAM_GLOBAL_ARGS_INIT;
     static const struct option lopts[] = {
-        SAM_OPT_GLOBAL_OPTIONS('-', 0, 0, 0, 0),
+        SAM_OPT_GLOBAL_OPTIONS('-', 0, 0, 0, 0, '-'),
         { NULL, 0, NULL, 0 }
     };
 
@@ -622,7 +622,7 @@ int main_phase(int argc, char *argv[])
 //      fprintf(pysam_stderr, "         -e        do not discover SNPs (effective with -l)\n");
         fprintf(pysam_stderr, "\n");
 
-        sam_global_opt_help(pysam_stderr, "-....");
+        sam_global_opt_help(pysam_stderr, "-....-");
 
         return 1;
     }
diff --git a/samtools/sam.h b/samtools/sam.h
index 5130105..6545e64 100644
--- a/samtools/sam.h
+++ b/samtools/sam.h
@@ -50,7 +50,7 @@ typedef struct {
     samFile *file;
     struct { BGZF *bam; } x;  // Hack so that fp->x.bam still works
     bam_hdr_t *header;
-    short is_write:1;
+    unsigned short is_write:1;
 } samfile_t;
 
 #ifdef __cplusplus
diff --git a/samtools/sam_opts.c b/samtools/sam_opts.c
index 9369145..9e7a8de 100644
--- a/samtools/sam_opts.c
+++ b/samtools/sam_opts.c
@@ -72,6 +72,9 @@ int parse_sam_global_opt(int c, const char *optarg, const struct option *lopt,
             r |= hts_opt_add((hts_opt **)&ga->out.specific, ref);
             free(ref);
             break;
+        } else if (strcmp(lopt->name, "threads") == 0) {
+            ga->nthreads = atoi(optarg);
+            break;
 //      } else if (strcmp(lopt->name, "verbose") == 0) {
 //          ga->verbosity++;
 //          break;
@@ -100,7 +103,7 @@ void sam_global_opt_help(FILE *fp, const char *shortopts) {
     int i = 0;
 
     static const struct option lopts[] = {
-        SAM_OPT_GLOBAL_OPTIONS(0,0,0,0,0),
+        SAM_OPT_GLOBAL_OPTIONS(0,0,0,0,0,0),
         { NULL, 0, NULL, 0 }
     };
 
@@ -130,6 +133,9 @@ void sam_global_opt_help(FILE *fp, const char *shortopts) {
         else if (strcmp(lopts[i].name, "reference") == 0)
             fprintf(fp,"reference FILE\n"
                     "               Reference sequence FASTA FILE [null]\n");
+        else if (strcmp(lopts[i].name, "threads") == 0)
+            fprintf(fp,"threads INT\n"
+                    "               Number of additional threads to use [0]\n");
 //      else if (strcmp(lopts[i].name, "verbose") == 0)
 //          fprintf(fp,"verbose\n"
 //                  "               Increment level of verbosity\n");
diff --git a/samtools/sam_opts.c.pysam.c b/samtools/sam_opts.c.pysam.c
index d0b56a3..aed4869 100644
--- a/samtools/sam_opts.c.pysam.c
+++ b/samtools/sam_opts.c.pysam.c
@@ -74,6 +74,9 @@ int parse_sam_global_opt(int c, const char *optarg, const struct option *lopt,
             r |= hts_opt_add((hts_opt **)&ga->out.specific, ref);
             free(ref);
             break;
+        } else if (strcmp(lopt->name, "threads") == 0) {
+            ga->nthreads = atoi(optarg);
+            break;
 //      } else if (strcmp(lopt->name, "verbose") == 0) {
 //          ga->verbosity++;
 //          break;
@@ -102,7 +105,7 @@ void sam_global_opt_help(FILE *fp, const char *shortopts) {
     int i = 0;
 
     static const struct option lopts[] = {
-        SAM_OPT_GLOBAL_OPTIONS(0,0,0,0,0),
+        SAM_OPT_GLOBAL_OPTIONS(0,0,0,0,0,0),
         { NULL, 0, NULL, 0 }
     };
 
@@ -132,6 +135,9 @@ void sam_global_opt_help(FILE *fp, const char *shortopts) {
         else if (strcmp(lopts[i].name, "reference") == 0)
             fprintf(fp,"reference FILE\n"
                     "               Reference sequence FASTA FILE [null]\n");
+        else if (strcmp(lopts[i].name, "threads") == 0)
+            fprintf(fp,"threads INT\n"
+                    "               Number of additional threads to use [0]\n");
 //      else if (strcmp(lopts[i].name, "verbose") == 0)
 //          fprintf(fp,"verbose\n"
 //                  "               Increment level of verbosity\n");
diff --git a/samtools/sam_opts.h b/samtools/sam_opts.h
index 25e9279..6edbf64 100644
--- a/samtools/sam_opts.h
+++ b/samtools/sam_opts.h
@@ -34,6 +34,7 @@ typedef struct sam_global_args {
     htsFormat in;
     htsFormat out;
     char *reference;
+    int nthreads;
     //int verbosity;
 } sam_global_args;
 
@@ -45,6 +46,7 @@ enum {
     SAM_OPT_OUTPUT_FMT,
     SAM_OPT_OUTPUT_FMT_OPTION,
     SAM_OPT_REFERENCE,
+    SAM_OPT_NTHREADS,
     //SAM_OPT_VERBOSE
 };
 
@@ -56,12 +58,13 @@ enum {
 // 0      No short option has been assigned. Use --long-opt only.
 // '-'    Both long and short options are disabled.
 // <c>    Otherwise the equivalent short option is character <c>.
-#define SAM_OPT_GLOBAL_OPTIONS(o1, o2, o3, o4, o5) \
+#define SAM_OPT_GLOBAL_OPTIONS(o1, o2, o3, o4, o5, o6) \
     {"input-fmt",         required_argument, NULL, SAM_OPT_VAL(o1, SAM_OPT_INPUT_FMT)}, \
     {"input-fmt-option",  required_argument, NULL, SAM_OPT_VAL(o2, SAM_OPT_INPUT_FMT_OPTION)}, \
     {"output-fmt",        required_argument, NULL, SAM_OPT_VAL(o3, SAM_OPT_OUTPUT_FMT)}, \
     {"output-fmt-option", required_argument, NULL, SAM_OPT_VAL(o4, SAM_OPT_OUTPUT_FMT_OPTION)}, \
-    {"reference",         required_argument, NULL, SAM_OPT_VAL(o5, SAM_OPT_REFERENCE)}
+    {"reference",         required_argument, NULL, SAM_OPT_VAL(o5, SAM_OPT_REFERENCE)}, \
+    {"threads",           required_argument, NULL, SAM_OPT_VAL(o6, SAM_OPT_NTHREADS)}
     //{"verbose",           no_argument,       NULL, SAM_OPT_VERBOSE}
 
 /*
diff --git a/samtools/test/test.c b/samtools/sam_utils.c
similarity index 52%
copy from samtools/test/test.c
copy to samtools/sam_utils.c
index 7ab38af..4f8964a 100644
--- a/samtools/test/test.c
+++ b/samtools/sam_utils.c
@@ -1,8 +1,8 @@
-/*  test/test.c -- test harness utility routines.
+/*  sam_utils.c -- various utilities internal to samtools.
 
-    Copyright (C) 2014 Genome Research Ltd.
+    Copyright (C) 2014-2016 Genome Research Ltd.
 
-    Author: Martin O. Pollard <mp15 at sanger.ac.uk>
+    Author: John Marshall <jm18 at sanger.ac.uk>
 
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
@@ -24,32 +24,37 @@ DEALINGS IN THE SOFTWARE.  */
 
 #include <config.h>
 
-#include <errno.h>
 #include <stdio.h>
-#include <stdlib.h>
+#include <stdarg.h>
 #include <string.h>
-#include <htslib/sam.h>
+#include <errno.h>
+
+#include "samtools.h"
 
-#include "test.h"
+static void vprint_error_core(const char *subcommand, const char *format, va_list args, const char *extra)
+{
+    fflush(stdout);
+    if (subcommand && *subcommand) fprintf(stderr, "samtools %s: ", subcommand);
+    else fprintf(stderr, "samtools: ");
+    vfprintf(stderr, format, args);
+    if (extra) fprintf(stderr, ": %s\n", extra);
+    else fprintf(stderr, "\n");
+    fflush(stderr);
+}
 
-void xfreopen(const char *path, const char *mode, FILE *stream)
+void print_error(const char *subcommand, const char *format, ...)
 {
-    if (freopen(path, mode, stream) == NULL) {
-        fprintf(stderr, __FILE__": error reopening %s: %s\n",
-                path, strerror(errno));
-        exit(2);
-    }
+    va_list args;
+    va_start(args, format);
+    vprint_error_core(subcommand, format, args, NULL);
+    va_end(args);
 }
 
-void dump_hdr(const bam_hdr_t* hdr)
+void print_error_errno(const char *subcommand, const char *format, ...)
 {
-    printf("n_targets: %d\n", hdr->n_targets);
-    printf("ignore_sam_err: %d\n", hdr->ignore_sam_err);
-    printf("l_text: %u\n", hdr->l_text);
-    printf("idx\ttarget_len\ttarget_name:\n");
-    int32_t target;
-    for (target = 0; target < hdr->n_targets; ++target) {
-        printf("%d\t%u\t\"%s\"\n", target, hdr->target_len[target], hdr->target_name[target]);
-    }
-    printf("text: \"%s\"\n", hdr->text);
+    int err = errno;
+    va_list args;
+    va_start(args, format);
+    vprint_error_core(subcommand, format, args, err? strerror(err) : NULL);
+    va_end(args);
 }
diff --git a/samtools/test/test.c.pysam.c b/samtools/sam_utils.c.pysam.c
similarity index 52%
copy from samtools/test/test.c.pysam.c
copy to samtools/sam_utils.c.pysam.c
index a8295b5..0a78619 100644
--- a/samtools/test/test.c.pysam.c
+++ b/samtools/sam_utils.c.pysam.c
@@ -1,10 +1,10 @@
 #include "pysam.h"
 
-/*  test/test.c -- test harness utility routines.
+/*  sam_utils.c -- various utilities internal to samtools.
 
-    Copyright (C) 2014 Genome Research Ltd.
+    Copyright (C) 2014-2016 Genome Research Ltd.
 
-    Author: Martin O. Pollard <mp15 at sanger.ac.uk>
+    Author: John Marshall <jm18 at sanger.ac.uk>
 
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
@@ -26,32 +26,37 @@ DEALINGS IN THE SOFTWARE.  */
 
 #include <config.h>
 
-#include <errno.h>
 #include <stdio.h>
-#include <stdlib.h>
+#include <stdarg.h>
 #include <string.h>
-#include <htslib/sam.h>
+#include <errno.h>
+
+#include "samtools.h"
 
-#include "test.h"
+static void vprint_error_core(const char *subcommand, const char *format, va_list args, const char *extra)
+{
+    fflush(pysam_stdout);
+    if (subcommand && *subcommand) fprintf(pysam_stderr, "samtools %s: ", subcommand);
+    else fprintf(pysam_stderr, "samtools: ");
+    vfprintf(pysam_stderr, format, args);
+    if (extra) fprintf(pysam_stderr, ": %s\n", extra);
+    else fprintf(pysam_stderr, "\n");
+    fflush(pysam_stderr);
+}
 
-void xfreopen(const char *path, const char *mode, FILE *stream)
+void print_error(const char *subcommand, const char *format, ...)
 {
-    if (freopen(path, mode, stream) == NULL) {
-        fprintf(pysam_stderr, __FILE__": error reopening %s: %s\n",
-                path, strerror(errno));
-        exit(2);
-    }
+    va_list args;
+    va_start(args, format);
+    vprint_error_core(subcommand, format, args, NULL);
+    va_end(args);
 }
 
-void dump_hdr(const bam_hdr_t* hdr)
+void print_error_errno(const char *subcommand, const char *format, ...)
 {
-    fprintf(pysam_stdout, "n_targets: %d\n", hdr->n_targets);
-    fprintf(pysam_stdout, "ignore_sam_err: %d\n", hdr->ignore_sam_err);
-    fprintf(pysam_stdout, "l_text: %u\n", hdr->l_text);
-    fprintf(pysam_stdout, "idx\ttarget_len\ttarget_name:\n");
-    int32_t target;
-    for (target = 0; target < hdr->n_targets; ++target) {
-        fprintf(pysam_stdout, "%d\t%u\t\"%s\"\n", target, hdr->target_len[target], hdr->target_name[target]);
-    }
-    fprintf(pysam_stdout, "text: \"%s\"\n", hdr->text);
+    int err = errno;
+    va_list args;
+    va_start(args, format);
+    vprint_error_core(subcommand, format, args, err? strerror(err) : NULL);
+    va_end(args);
 }
diff --git a/samtools/sam_view.c b/samtools/sam_view.c
index 402e1d3..9c2d15b 100644
--- a/samtools/sam_view.c
+++ b/samtools/sam_view.c
@@ -1,6 +1,6 @@
 /*  sam_view.c -- SAM<->BAM<->CRAM conversion.
 
-    Copyright (C) 2009-2015 Genome Research Ltd.
+    Copyright (C) 2009-2017 Genome Research Ltd.
     Portions copyright (C) 2009, 2011, 2012 Broad Institute.
 
     Author: Heng Li <lh3 at sanger.ac.uk>
@@ -27,6 +27,7 @@ DEALINGS IN THE SOFTWARE.  */
 
 #include <stdlib.h>
 #include <string.h>
+#include <strings.h>
 #include <stdio.h>
 #include <unistd.h>
 #include <math.h>
@@ -34,12 +35,18 @@ DEALINGS IN THE SOFTWARE.  */
 #include <stdbool.h>
 #include <assert.h>
 #include <getopt.h>
+#include <ctype.h>
 #include "htslib/sam.h"
 #include "htslib/faidx.h"
 #include "htslib/kstring.h"
 #include "htslib/khash.h"
+#include "htslib/thread_pool.h"
 #include "samtools.h"
 #include "sam_opts.h"
+
+#define DEFAULT_BARCODE_TAG "BC"
+#define DEFAULT_QUALITY_TAG "QT"
+
 KHASH_SET_INIT_STR(rg)
 
 typedef khash_t(rg) *rghash_t;
@@ -50,6 +57,7 @@ typedef struct samview_settings {
     int min_mapQ;
     int flag_on;
     int flag_off;
+    int flag_alloff;
     int min_qlen;
     int remove_B;
     uint32_t subsam_seed;
@@ -83,6 +91,8 @@ static int process_aln(const bam_hdr_t *h, bam1_t *b, samview_settings_t* settin
     }
     if (b->core.qual < settings->min_mapQ || ((b->core.flag & settings->flag_on) != settings->flag_on) || (b->core.flag & settings->flag_off))
         return 1;
+    if (settings->flag_alloff && ((b->core.flag & settings->flag_alloff) == settings->flag_alloff))
+        return 1;
     if (settings->bed && (b->core.tid < 0 || !bed_overlap(settings->bed, h->target_name[b->core.tid], b->core.pos, bam_endpos(b))))
         return 1;
     if (settings->subsam_frac > 0.) {
@@ -231,19 +241,22 @@ static void check_sam_close(const char *subcmd, samFile *fp, const char *fname,
 int main_samview(int argc, char *argv[])
 {
     int c, is_header = 0, is_header_only = 0, ret = 0, compress_level = -1, is_count = 0;
-    int is_long_help = 0, n_threads = 0;
+    int is_long_help = 0;
     int64_t count = 0;
     samFile *in = 0, *out = 0, *un_out=0;
+    FILE *fp_out = NULL;
     bam_hdr_t *header = NULL;
     char out_mode[5], out_un_mode[5], *out_format = "";
     char *fn_in = 0, *fn_out = 0, *fn_list = 0, *q, *fn_un_out = 0;
     sam_global_args ga = SAM_GLOBAL_ARGS_INIT;
+    htsThreadPool p = {NULL, 0};
 
     samview_settings_t settings = {
         .rghash = NULL,
         .min_mapQ = 0,
         .flag_on = 0,
         .flag_off = 0,
+        .flag_alloff = 0,
         .min_qlen = 0,
         .remove_B = 0,
         .subsam_seed = 0,
@@ -253,8 +266,7 @@ int main_samview(int argc, char *argv[])
     };
 
     static const struct option lopts[] = {
-        SAM_OPT_GLOBAL_OPTIONS('-', 0, 'O', 0, 'T'),
-        { "threads", required_argument, NULL, '@' },
+        SAM_OPT_GLOBAL_OPTIONS('-', 0, 'O', 0, 'T', '@'),
         { NULL, 0, NULL, 0 }
     };
 
@@ -262,11 +274,13 @@ int main_samview(int argc, char *argv[])
     strcpy(out_mode, "w");
     strcpy(out_un_mode, "w");
     while ((c = getopt_long(argc, argv,
-                            "SbBcCt:h1Ho:O:q:f:F:ul:r:?T:R:L:s:@:m:x:U:",
+                            "SbBcCt:h1Ho:O:q:f:F:G:ul:r:?T:R:L:s:@:m:x:U:",
                             lopts, NULL)) >= 0) {
         switch (c) {
         case 's':
             if ((settings.subsam_seed = strtol(optarg, &q, 10)) != 0) {
+                // Convert likely user input 0,1,2,... to pseudo-random
+                // values with more entropy and more bits set
                 srand(settings.subsam_seed);
                 settings.subsam_seed = rand();
             }
@@ -284,6 +298,7 @@ int main_samview(int argc, char *argv[])
         case 'U': fn_un_out = strdup(optarg); break;
         case 'f': settings.flag_on |= strtol(optarg, 0, 0); break;
         case 'F': settings.flag_off |= strtol(optarg, 0, 0); break;
+        case 'G': settings.flag_alloff |= strtol(optarg, 0, 0); break;
         case 'q': settings.min_mapQ = atoi(optarg); break;
         case 'u': compress_level = 0; break;
         case '1': compress_level = 1; break;
@@ -313,7 +328,6 @@ int main_samview(int argc, char *argv[])
                  */
         case '?': is_long_help = 1; break;
         case 'B': settings.remove_B = 1; break;
-        case '@': n_threads = strtol(optarg, 0, 0); break;
         case 'x':
             {
                 if (strlen(optarg) != 2) {
@@ -425,8 +439,26 @@ int main_samview(int argc, char *argv[])
             }
         }
     }
+    else {
+        if (fn_out) {
+            fp_out = fopen(fn_out, "w");
+            if (fp_out == NULL) {
+                print_error_errno("view", "can't create \"%s\"", fn_out);
+                ret = EXIT_FAILURE;
+                goto view_end;
+            }
+        }
+    }
 
-    if (n_threads > 1) { if (out) hts_set_threads(out, n_threads); }
+    if (ga.nthreads > 1) {
+        if (!(p.pool = hts_tpool_init(ga.nthreads))) {
+            fprintf(stderr, "Error creating thread pool\n");
+            ret = 1;
+            goto view_end;
+        }
+        hts_set_opt(in,  HTS_OPT_THREAD_POOL, &p);
+        if (out) hts_set_opt(out, HTS_OPT_THREAD_POOL, &p);
+    }
     if (is_header_only) goto view_end; // no need to print alignments
 
     if (optind + 1 >= argc) { // convert/print the entire file
@@ -487,13 +519,19 @@ int main_samview(int argc, char *argv[])
     }
 
 view_end:
-    if (is_count && ret == 0)
-        printf("%" PRId64 "\n", count);
+    if (is_count && ret == 0) {
+        if (fprintf(fn_out? fp_out : stdout, "%" PRId64 "\n", count) < 0) {
+            if (fn_out) print_error_errno("view", "writing to \"%s\" failed", fn_out);
+            else print_error_errno("view", "writing to standard output failed");
+            ret = EXIT_FAILURE;
+        }
+    }
 
     // close files, free and return
     if (in) check_sam_close("view", in, fn_in, "standard input", &ret);
     if (out) check_sam_close("view", out, fn_out, "standard output", &ret);
     if (un_out) check_sam_close("view", un_out, fn_un_out, "file", &ret);
+    if (fp_out) fclose(fp_out);
 
     free(fn_list); free(fn_out); free(settings.library);  free(fn_un_out);
     sam_global_args_free(&ga);
@@ -508,6 +546,10 @@ view_end:
     if (settings.remove_aux_len) {
         free(settings.remove_aux);
     }
+
+    if (p.pool)
+        hts_tpool_destroy(p.pool);
+
     return ret;
 }
 
@@ -538,20 +580,19 @@ static int usage(FILE *fp, int exit_status, int is_long_help)
 "  -l STR   only include reads in library STR [null]\n"
 "  -m INT   only include reads with number of CIGAR operations consuming\n"
 "           query sequence >= INT [0]\n"
-"  -f INT   only include reads with all bits set in INT set in FLAG [0]\n"
-"  -F INT   only include reads with none of the bits set in INT set in FLAG [0]\n"
+"  -f INT   only include reads with all  of the FLAGs in INT present [0]\n"       //   F&x == x
+"  -F INT   only include reads with none of the FLAGS in INT present [0]\n"       //   F&x == 0
+"  -G INT   only EXCLUDE reads with all  of the FLAGs in INT present [0]\n"       // !(F&x == x)
+"  -s FLOAT subsample reads (given INT.FRAC option value, 0.FRAC is the\n"
+"           fraction of templates/read pairs to keep; INT part sets seed)\n"
 // read processing
 "  -x STR   read tag to strip (repeatable) [null]\n"
 "  -B       collapse the backward CIGAR operation\n"
-"  -s FLOAT integer part sets seed of random number generator [0];\n"
-"           rest sets fraction of templates to subsample [no subsampling]\n"
 // general options
-"  -@, --threads INT\n"
-"           number of BAM/CRAM compression threads [0]\n"
 "  -?       print long help, including note about region specification\n"
 "  -S       ignored (input format is auto-detected)\n");
 
-    sam_global_opt_help(fp, "-.O.T");
+    sam_global_opt_help(fp, "-.O.T@");
     fprintf(fp, "\n");
 
     if (is_long_help)
@@ -620,21 +661,37 @@ static void bam2fq_usage(FILE *to, const char *command)
 "Usage: samtools %s [options...] <in.bam>\n", command);
     fprintf(to,
 "Options:\n"
-"  -0 FILE   write paired reads flagged both or neither READ1 and READ2 to FILE\n"
-"  -1 FILE   write paired reads flagged READ1 to FILE\n"
-"  -2 FILE   write paired reads flagged READ2 to FILE\n"
-"  -f INT    only include reads with all bits set in INT set in FLAG [0]\n"
-"  -F INT    only include reads with none of the bits set in INT set in FLAG [0]\n"
-"  -n        don't append /1 and /2 to the read name\n");
+"  -0 FILE              write paired reads flagged both or neither READ1 and READ2 to FILE\n"
+"  -1 FILE              write paired reads flagged READ1 to FILE\n"
+"  -2 FILE              write paired reads flagged READ2 to FILE\n"
+"  -f INT               only include reads with all  of the FLAGs in INT present [0]\n"       //   F&x == x
+"  -F INT               only include reads with none of the FLAGS in INT present [0]\n"       //   F&x == 0
+"  -G INT               only EXCLUDE reads with all  of the FLAGs in INT present [0]\n"       // !(F&x == x)
+"  -n                   don't append /1 and /2 to the read name\n"
+"  -N                   always append /1 and /2 to the read name\n");
     if (fq) fprintf(to,
-"  -O        output quality in the OQ tag if present\n");
+"  -O                   output quality in the OQ tag if present\n");
     fprintf(to,
-"  -s FILE   write singleton reads to FILE [assume single-end]\n"
-"  -t        copy RG, BC and QT tags to the %s header line\n",
+"  -s FILE              write singleton reads to FILE [assume single-end]\n"
+"  -t                   copy RG, BC and QT tags to the %s header line\n",
     fq ? "FASTQ" : "FASTA");
     if (fq) fprintf(to,
-"  -v INT    default quality score if not given in file [1]\n");
-    sam_global_opt_help(to, "-.--.");
+"  -v INT               default quality score if not given in file [1]\n"
+"  --i1 FILE            write first index reads to FILE\n"
+"  --i2 FILE            write second index reads to FILE\n"
+"  --barcode-tag TAG    Barcode tag [default: " DEFAULT_BARCODE_TAG "]\n"
+"  --quality-tag TAG    Quality tag [default: " DEFAULT_QUALITY_TAG "]\n"
+"  --index-format STR   How to parse barcode and quality tags\n\n");
+    sam_global_opt_help(to, "-.--.@");
+    fprintf(to,
+"   \n"
+"   The index-format string describes how to parse the barcode and quality tags, for example:\n"
+"   i14i8       the first 14 characters are index 1, the next 8 characters are index 2\n"
+"   n8i14       ignore the first 8 characters, and use the next 14 characters for index 1\n"
+"   If the tag contains a separator, then the numeric part can be replaced with '*' to mean\n"
+"   'read until the separator or end of tag', for example:\n"
+"   n*i*        ignore the left part of the tag until the separator, then use the second part\n"
+"               of the tag as index 1\n");
 }
 
 typedef enum { READ_UNKNOWN = 0, READ_1 = 1, READ_2 = 2 } readpart;
@@ -643,24 +700,97 @@ typedef struct bam2fq_opts {
     char *fnse;
     char *fnr[3];
     char *fn_input; // pointer to input filename in argv do not free
-    bool has12, use_oq, copy_tags;
-    int flag_on, flag_off;
+    bool has12, has12always, use_oq, copy_tags;
+    int flag_on, flag_off, flag_alloff;
     sam_global_args ga;
     fastfile filetype;
     int def_qual;
+    char *barcode_tag;
+    char *quality_tag;
+    char *index_file[2];
+    char *index_format;
 } bam2fq_opts_t;
 
 typedef struct bam2fq_state {
     samFile *fp;
     FILE *fpse;
     FILE *fpr[3];
+    FILE *fpi[2];
     bam_hdr_t *h;
     bool has12, use_oq, copy_tags;
-    int flag_on, flag_off;
+    int flag_on, flag_off, flag_alloff;
     fastfile filetype;
     int def_qual;
 } bam2fq_state_t;
 
+/*
+ * Get and decode the read from a BAM record.
+ *
+ * TODO: htslib really needs an interface for this.  Consider this or perhaps
+ * bam_get_seq_str (current vs original orientation) and bam_get_qual_str
+ * functions as string formatted equivalents to bam_get_{seq,qual}?
+ */
+
+/*
+ * Reverse a string in place.
+ * From http://stackoverflow.com/questions/8534274/is-the-strrev-function-not-available-in-linux.
+ * Author Sumit-naik: http://stackoverflow.com/users/4590926/sumit-naik
+ */
+static char *reverse(char *str)
+{
+    int i = strlen(str)-1,j=0;
+    char ch;
+    while (i>j) {
+        ch = str[i];
+        str[i]= str[j];
+        str[j] = ch;
+        i--;
+        j++;
+    }
+    return str;
+}
+
+/* return the read, reverse complemented if necessary */
+static char *get_read(const bam1_t *rec)
+{
+    int len = rec->core.l_qseq + 1;
+    char *read = calloc(1, len);
+    char *seq = (char *)bam_get_seq(rec);
+    int n;
+
+    if (!read) return NULL;
+
+    for (n=0; n < rec->core.l_qseq; n++) {
+        if (rec->core.flag & BAM_FREVERSE) read[n] = seq_nt16_str[seq_comp_table[bam_seqi(seq,n)]];
+        else                               read[n] = seq_nt16_str[bam_seqi(seq,n)];
+    }
+    if (rec->core.flag & BAM_FREVERSE) reverse(read);
+    return read;
+}
+
+/*
+ * get and decode the quality from a BAM record
+ */
+static char *get_quality(const bam1_t *rec)
+{
+    char *quality = calloc(1, rec->core.l_qseq + 1);
+    char *q = (char *)bam_get_qual(rec);
+    int n;
+
+    if (*q == '\xff') { free(quality); return NULL; }
+
+    for (n=0; n < rec->core.l_qseq; n++) {
+        quality[n] = q[n]+33;
+    }
+    if (rec->core.flag & BAM_FREVERSE) reverse(quality);
+    return quality;
+}
+
+//
+// End of htslib complaints
+//
+
+
 static readpart which_readpart(const bam1_t *b)
 {
     if ((b->core.flag & BAM_FREAD1) && !(b->core.flag & BAM_FREAD2)) {
@@ -672,85 +802,60 @@ static readpart which_readpart(const bam1_t *b)
     }
 }
 
-// Transform a bam1_t record into a string with the FASTQ representation of it
-// @returns false for error, true for success
-static bool bam1_to_fq(const bam1_t *b, kstring_t *linebuf, const bam2fq_state_t *state)
+/*
+ * parse the length part from the index-format string
+ */
+static int getLength(char **s)
 {
-    int i;
-    int32_t qlen = b->core.l_qseq;
-    assert(qlen >= 0);
-    uint8_t *seq;
-    uint8_t *qual = bam_get_qual(b);
-    const uint8_t *oq = NULL;
-    if (state->use_oq) {
-        oq = bam_aux_get(b, "OQ");
-        if (oq) oq++; // skip tag type
+    int n = 0;
+    while (**s) {
+        if (**s == '*') { n=-1; (*s)++; break; }
+        if ( !isdigit(**s)) break;
+        n = n*10 + ((**s)-'0');
+        (*s)++;
     }
-    bool has_qual = (qual[0] != 0xff || (state->use_oq && oq)); // test if there is quality
+    return n;
+}
+
+static bool make_fq_line(const bam1_t *rec, char *seq, char *qual, kstring_t *linebuf, const bam2fq_state_t *state)
+{
+    int i;
 
     linebuf->l = 0;
     // Write read name
-    readpart readpart = which_readpart(b);
     kputc(state->filetype == FASTA? '>' : '@', linebuf);
-    kputs(bam_get_qname(b), linebuf);
+    kputs(bam_get_qname(rec), linebuf);
     // Add the /1 /2 if requested
     if (state->has12) {
+        readpart readpart = which_readpart(rec);
         if (readpart == READ_1) kputs("/1", linebuf);
         else if (readpart == READ_2) kputs("/2", linebuf);
     }
     if (state->copy_tags) {
         for (i = 0; copied_tags[i]; ++i) {
             uint8_t *s;
-            if ((s = bam_aux_get(b, copied_tags[i])) != 0) {
-                kputc('\t', linebuf);
-                kputsn(copied_tags[i], 2, linebuf);
-                kputsn(":Z:", 3, linebuf);
-                kputs(bam_aux2Z(s), linebuf);
+            if ((s = bam_aux_get(rec, copied_tags[i])) != 0) {
+                if (*s == 'Z') {
+                    kputc('\t', linebuf);
+                    kputsn(copied_tags[i], 2, linebuf);
+                    kputsn(":Z:", 3, linebuf);
+                    kputs(bam_aux2Z(s), linebuf);
+                }
             }
         }
     }
     kputc('\n', linebuf);
-
-    seq = bam_get_seq(b);
-
-    if (b->core.flag & BAM_FREVERSE) { // read is reverse complemented
-        for (i = qlen-1; i > -1; --i) {
-            char c = seq_nt16_str[seq_comp_table[bam_seqi(seq,i)]];
-            kputc(c, linebuf);
-        }
-    } else {
-        for (i = 0; i < qlen; ++i) {
-            char c = seq_nt16_str[bam_seqi(seq,i)];
-            kputc(c, linebuf);
-        }
-    }
+    kputs(seq, linebuf);
     kputc('\n', linebuf);
 
     if (state->filetype == FASTQ) {
         // Write quality
         kputs("+\n", linebuf);
-        if (has_qual) {
-            if (state->use_oq && oq) {
-                if (b->core.flag & BAM_FREVERSE) { // read is reverse complemented
-                    for (i = qlen-1; i > -1; --i) {
-                        kputc(oq[i], linebuf);
-                    }
-                } else {
-                    kputs((char*)oq, linebuf);
-                }
-            } else {
-                if (b->core.flag & BAM_FREVERSE) { // read is reverse complemented
-                    for (i = qlen-1; i > -1; --i) {
-                        kputc(33 + qual[i], linebuf);
-                    }
-                } else {
-                    for (i = 0; i < qlen; ++i) {
-                        kputc(33 + qual[i], linebuf);
-                    }
-                }
-            }
+        if (qual && *qual) {
+            kputs(qual, linebuf);
         } else {
-            for (i = 0; i < qlen; ++i) {
+            int len = strlen(seq);
+            for (i = 0; i < len; ++i) {
                 kputc(33 + state->def_qual, linebuf);
             }
         }
@@ -759,49 +864,214 @@ static bool bam1_to_fq(const bam1_t *b, kstring_t *linebuf, const bam2fq_state_t
     return true;
 }
 
+/*
+ * Create FASTQ lines from the barcode tag using the index-format 
+ */
+static bool tags2fq(bam1_t *rec, const bam2fq_state_t *state, const bam2fq_opts_t* opts)
+{
+    uint8_t *p;
+    char *ifmt = opts->index_format;
+    char *tag = NULL;
+    char *qual = NULL;
+    int file_number = 0;
+    kstring_t linebuf = { 0, 0, NULL }; // Buffer
+
+    // read barcode tag
+    p = bam_aux_get(rec,opts->barcode_tag);
+    if (p) tag = bam_aux2Z(p);
+
+    if (!tag) return true; // there is no tag
+
+    // read quality tag
+    p = bam_aux_get(rec, opts->quality_tag);
+    if (p) qual = bam_aux2Z(p);
+
+    // Parse the index-format string
+    while (*ifmt) {
+        if (file_number > 1) break;     // shouldn't happen if we've validated paramaters correctly
+        char action = *ifmt;        // should be 'i' or 'n'
+        ifmt++; // skip over action
+        int index_len = getLength(&ifmt);
+
+        char *sub_tag = calloc(1, strlen(tag)+1);
+        char *sub_qual = calloc(1, strlen(tag)+1);
+        int n = 0;
+
+        if (index_len < 0) {
+            // read until separator
+            while (isalpha(*tag)) {
+                sub_tag[n] = *tag++;
+                if (qual) sub_qual[n] = *qual++;
+                n++;
+            }
+            if (*tag) { // skip separator
+                tag++;
+                if (qual) qual++;
+            }
+        } else {
+            // read index_len characters
+            while (index_len-- && *tag) {
+                sub_tag[n] = *tag++;
+                if (qual) sub_qual[n] = *qual++;
+                n++;
+            }
+        }
+
+        if (action=='i' && *sub_tag && state->fpi[file_number]) {
+            make_fq_line(rec, sub_tag, sub_qual, &linebuf, state);
+            fputs(linebuf.s, state->fpi[file_number++]);
+        }
+        free(sub_qual); free(sub_tag);
+
+    }
+
+    free(linebuf.s);
+    return true;
+}
+
+// Transform a bam1_t record into a string with the FASTQ representation of it
+// @returns false for error, true for success
+static bool bam1_to_fq(const bam1_t *b, kstring_t *linebuf, const bam2fq_state_t *state)
+{
+    int32_t qlen = b->core.l_qseq;
+    assert(qlen >= 0);
+    const uint8_t *oq = NULL;
+    char *qual = NULL;
+
+    char *seq = get_read(b);
+
+    if (state->use_oq) {
+        oq = bam_aux_get(b, "OQ");
+        if (oq) {
+            oq++; 
+            qual = strdup(bam_aux2Z(oq));
+            if (b->core.flag & BAM_FREVERSE) { // read is reverse complemented
+                reverse(qual);
+            }
+        }
+    } else {
+        qual = get_quality(b);
+    }
+
+    make_fq_line(b, seq, qual, linebuf, state);
+
+    free(qual);
+    free(seq);
+    return true;
+}
+
+static void free_opts(bam2fq_opts_t *opts)
+{
+    free(opts->barcode_tag);
+    free(opts->quality_tag);
+    free(opts->index_format);
+    free(opts);
+}
+
 // return true if valid
 static bool parse_opts(int argc, char *argv[], bam2fq_opts_t** opts_out)
 {
     // Parse args
     bam2fq_opts_t* opts = calloc(1, sizeof(bam2fq_opts_t));
     opts->has12 = true;
+    opts->has12always = false;
     opts->filetype = FASTQ;
     opts->def_qual = 1;
+    opts->barcode_tag = NULL;
+    opts->quality_tag = NULL;
+    opts->index_format = NULL;
+    opts->index_file[0] = NULL;
+    opts->index_file[1] = NULL;
 
     int c;
     sam_global_args_init(&opts->ga);
     static const struct option lopts[] = {
-        SAM_OPT_GLOBAL_OPTIONS('-', 0, '-', '-', 0),
+        SAM_OPT_GLOBAL_OPTIONS('-', 0, '-', '-', 0, '@'),
+        {"i1", required_argument, NULL, 1},
+        {"I1", required_argument, NULL, 1},
+        {"i2", required_argument, NULL, 2},
+        {"I2", required_argument, NULL, 2},
+        {"if", required_argument, NULL, 3},
+        {"IF", required_argument, NULL, 3},
+        {"index-format", required_argument, NULL, 3},
+        {"barcode-tag", required_argument, NULL, 'b'},
+        {"quality-tag", required_argument, NULL, 'q'},
         { NULL, 0, NULL, 0 }
     };
-    while ((c = getopt_long(argc, argv, "0:1:2:f:F:nOs:tv:", lopts, NULL)) > 0) {
+    while ((c = getopt_long(argc, argv, "0:1:2:f:F:G:nNOs:tv:@:", lopts, NULL)) > 0) {
         switch (c) {
+            case 'b': opts->barcode_tag = strdup(optarg); break;
+            case 'q': opts->quality_tag = strdup(optarg); break;
+            case  1 : opts->index_file[0] = optarg; break;
+            case  2 : opts->index_file[1] = optarg; break;
+            case  3 : opts->index_format = strdup(optarg); break;
             case '0': opts->fnr[0] = optarg; break;
             case '1': opts->fnr[1] = optarg; break;
             case '2': opts->fnr[2] = optarg; break;
             case 'f': opts->flag_on |= strtol(optarg, 0, 0); break;
             case 'F': opts->flag_off |= strtol(optarg, 0, 0); break;
+            case 'G': opts->flag_alloff |= strtol(optarg, 0, 0); break;
             case 'n': opts->has12 = false; break;
+            case 'N': opts->has12always = true; break;
             case 'O': opts->use_oq = true; break;
             case 's': opts->fnse = optarg; break;
             case 't': opts->copy_tags = true; break;
             case 'v': opts->def_qual = atoi(optarg); break;
-            case '?': bam2fq_usage(stderr, argv[0]); free(opts); return false;
+            case '?': bam2fq_usage(stderr, argv[0]); free_opts(opts); return false;
             default:
                 if (parse_sam_global_opt(c, optarg, lopts, &opts->ga) != 0) {
-                    bam2fq_usage(stderr, argv[0]); free(opts); return false;
+                    bam2fq_usage(stderr, argv[0]); free_opts(opts); return false;
                 }
                 break;
         }
     }
 
     if (opts->fnr[1] || opts->fnr[2]) opts->has12 = false;
+    if (opts->has12always) opts->has12 = true;
+
+    if (!opts->barcode_tag) opts->barcode_tag = strdup(DEFAULT_BARCODE_TAG);
+    if (!opts->quality_tag) opts->quality_tag = strdup(DEFAULT_QUALITY_TAG);
+
+    int nIndex = 0;
+    if (opts->index_format) {
+        char *s;
+        for (s = opts->index_format; *s; s++) {
+            if (*s == 'i') nIndex++;
+        }
+    }
+    if (nIndex>2) {
+        fprintf(stderr,"Invalid index format: more than 2 indexes\n");
+        bam2fq_usage(stderr, argv[0]);
+        free_opts(opts);
+        return false;
+    }
+
+    if (opts->index_file[1] && !opts->index_file[0]) {
+        fprintf(stderr, "Index one specified, but index two not given\n");
+        bam2fq_usage(stderr, argv[0]);
+        free_opts(opts);
+        return false;
+    }
+
+    if (nIndex==2 && !opts->index_file[1]) {
+        fprintf(stderr, "index_format specifies two indexes, but only one index file given\n");
+        bam2fq_usage(stderr, argv[0]);
+        free_opts(opts);
+        return false;
+    }
+
+    if (nIndex==1 && !opts->index_file[0]) {
+        fprintf(stderr, "index_format specifies an index, but no index file given\n");
+        bam2fq_usage(stderr, argv[0]);
+        free_opts(opts);
+        return false;
+    }
 
     if (opts->def_qual < 0 || 93 < opts->def_qual) {
         fprintf(stderr, "Invalid -v default quality %i, allowed range 0 to 93\n", opts->def_qual);
         bam2fq_usage(stderr, argv[0]);
-        free(opts);
-        return true;
+        free_opts(opts);
+        return false;
     }
 
     const char* type_str = argv[0];
@@ -812,20 +1082,21 @@ static bool parse_opts(int argc, char *argv[], bam2fq_opts_t** opts_out)
     } else {
         print_error("bam2fq", "Unrecognised type call \"%s\", this should be impossible... but you managed it!", type_str);
         bam2fq_usage(stderr, argv[0]);
-        free(opts);
+        free_opts(opts);
         return false;
     }
 
     if ((argc - (optind)) == 0) {
+        fprintf(stderr, "No input file specified.\n");
         bam2fq_usage(stdout, argv[0]);
-        free(opts);
+        free_opts(opts);
         return false;
     }
 
     if ((argc - (optind)) != 1) {
         fprintf(stderr, "Too many arguments.\n");
         bam2fq_usage(stderr, argv[0]);
-        free(opts);
+        free_opts(opts);
         return false;
     }
     opts->fn_input = argv[optind];
@@ -838,6 +1109,7 @@ static bool init_state(const bam2fq_opts_t* opts, bam2fq_state_t** state_out)
     bam2fq_state_t* state = calloc(1, sizeof(bam2fq_state_t));
     state->flag_on = opts->flag_on;
     state->flag_off = opts->flag_off;
+    state->flag_alloff = opts->flag_alloff;
     state->has12 = opts->has12;
     state->use_oq = opts->use_oq;
     state->copy_tags = opts->copy_tags;
@@ -850,6 +1122,8 @@ static bool init_state(const bam2fq_opts_t* opts, bam2fq_state_t** state_out)
         free(state);
         return false;
     }
+    if (opts->ga.nthreads > 0)
+        hts_set_threads(state->fp, opts->ga.nthreads);
     uint32_t rf = SAM_QNAME | SAM_FLAG | SAM_SEQ | SAM_QUAL;
     if (opts->use_oq) rf |= SAM_AUX;
     if (hts_set_opt(state->fp, CRAM_OPT_REQUIRED_FIELDS, rf)) {
@@ -884,6 +1158,17 @@ static bool init_state(const bam2fq_opts_t* opts, bam2fq_state_t** state_out)
             state->fpr[i] = stdout;
         }
     }
+    for (i = 0; i < 2; i++) {
+        state->fpi[i] = NULL;
+        if (opts->index_file[i]) {
+            state->fpi[i] = fopen(opts->index_file[i], "w");
+            if (state->fpi[i] == NULL) {
+                print_error_errno("bam2fq", "Cannot write to i%d file \"%s\"", i+1, opts->index_file[i]);
+                free(state);
+                return false;
+            }
+        }
+    }
 
     state->h = sam_hdr_read(state->fp);
     if (state->h == NULL) {
@@ -906,6 +1191,12 @@ static bool destroy_state(const bam2fq_opts_t *opts, bam2fq_state_t *state, int*
     for (i = 0; i < 3; ++i) {
         if (state->fpr[i] != stdout && fclose(state->fpr[i])) { print_error_errno("bam2fq", "Error closing r%d file \"%s\"", i, opts->fnr[i]); valid = false; }
     }
+    for (i = 0; i < 2; i++) {
+        if (state->fpi[i] && fclose(state->fpi[i])) { 
+            print_error_errno("bam2fq", "Error closing i%d file \"%s\"", i+1, opts->index_file[i]);
+            valid = false;
+        }
+    }
     free(state);
     return valid;
 }
@@ -914,11 +1205,12 @@ static inline bool filter_it_out(const bam1_t *b, const bam2fq_state_t *state)
 {
     return (b->core.flag&(BAM_FSECONDARY|BAM_FSUPPLEMENTARY) // skip secondary and supplementary alignments
         || (b->core.flag&(state->flag_on)) != state->flag_on // or reads indicated by filter flags
-        || (b->core.flag&(state->flag_off)) != 0);
+        || (b->core.flag&(state->flag_off)) != 0
+        || (b->core.flag&(state->flag_alloff) && (b->core.flag&(state->flag_alloff)) == state->flag_alloff));
 
 }
 
-static bool bam2fq_mainloop_singletontrack(bam2fq_state_t *state)
+static bool bam2fq_mainloop_singletontrack(bam2fq_state_t *state, bam2fq_opts_t* opts)
 {
     bam1_t* b = bam_init1();
     char *current_qname = NULL;
@@ -974,6 +1266,7 @@ static bool bam2fq_mainloop_singletontrack(bam2fq_state_t *state)
                 return false;
             }
             score[which_readpart(b)] = b_score;
+            if (state->fpi[0]) tags2fq(b, state, opts);
         }
     }
     if (!valid)
@@ -991,7 +1284,7 @@ static bool bam2fq_mainloop_singletontrack(bam2fq_state_t *state)
     return valid;
 }
 
-static bool bam2fq_mainloop(bam2fq_state_t *state)
+static bool bam2fq_mainloop(bam2fq_state_t *state, bam2fq_opts_t* opts)
 {
     // process a name collated BAM into fastq
     bam1_t* b = bam_init1();
@@ -1002,13 +1295,12 @@ static bool bam2fq_mainloop(bam2fq_state_t *state)
     int64_t n_reads = 0; // Statistics
     kstring_t linebuf = { 0, 0, NULL }; // Buffer
     while (sam_read1(state->fp, state->h, b) >= 0) {
-        if (b->core.flag&(BAM_FSECONDARY|BAM_FSUPPLEMENTARY) // skip secondary and supplementary alignments
-            || (b->core.flag&(state->flag_on)) != state->flag_on             // or reads indicated by filter flags
-            || (b->core.flag&(state->flag_off)) != 0) continue;
+        if (filter_it_out(b, state)) continue;
         ++n_reads;
 
         if (!bam1_to_fq(b, &linebuf, state)) return false;
         fputs(linebuf.s, state->fpr[which_readpart(b)]);
+        if (state->fpi[0]) tags2fq(b, state, opts);
     }
     free(linebuf.s);
     bam_destroy1(b);
@@ -1029,14 +1321,14 @@ int main_bam2fq(int argc, char *argv[])
     if (!init_state(opts, &state)) return EXIT_FAILURE;
 
     if (state->fpse) {
-        if (!bam2fq_mainloop_singletontrack(state)) status = EXIT_FAILURE;
+        if (!bam2fq_mainloop_singletontrack(state,opts)) status = EXIT_FAILURE;
     } else {
-        if (!bam2fq_mainloop(state)) status = EXIT_FAILURE;
+        if (!bam2fq_mainloop(state,opts)) status = EXIT_FAILURE;
     }
 
     if (!destroy_state(opts, state, &status)) return EXIT_FAILURE;
     sam_global_args_free(&opts->ga);
-    free(opts);
+    free_opts(opts);
 
     return status;
 }
diff --git a/samtools/sam_view.c.pysam.c b/samtools/sam_view.c.pysam.c
index 8c883b0..6df47c9 100644
--- a/samtools/sam_view.c.pysam.c
+++ b/samtools/sam_view.c.pysam.c
@@ -2,7 +2,7 @@
 
 /*  sam_view.c -- SAM<->BAM<->CRAM conversion.
 
-    Copyright (C) 2009-2015 Genome Research Ltd.
+    Copyright (C) 2009-2017 Genome Research Ltd.
     Portions copyright (C) 2009, 2011, 2012 Broad Institute.
 
     Author: Heng Li <lh3 at sanger.ac.uk>
@@ -29,6 +29,7 @@ DEALINGS IN THE SOFTWARE.  */
 
 #include <stdlib.h>
 #include <string.h>
+#include <strings.h>
 #include <stdio.h>
 #include <unistd.h>
 #include <math.h>
@@ -36,12 +37,18 @@ DEALINGS IN THE SOFTWARE.  */
 #include <stdbool.h>
 #include <assert.h>
 #include <getopt.h>
+#include <ctype.h>
 #include "htslib/sam.h"
 #include "htslib/faidx.h"
 #include "htslib/kstring.h"
 #include "htslib/khash.h"
+#include "htslib/thread_pool.h"
 #include "samtools.h"
 #include "sam_opts.h"
+
+#define DEFAULT_BARCODE_TAG "BC"
+#define DEFAULT_QUALITY_TAG "QT"
+
 KHASH_SET_INIT_STR(rg)
 
 typedef khash_t(rg) *rghash_t;
@@ -52,6 +59,7 @@ typedef struct samview_settings {
     int min_mapQ;
     int flag_on;
     int flag_off;
+    int flag_alloff;
     int min_qlen;
     int remove_B;
     uint32_t subsam_seed;
@@ -85,6 +93,8 @@ static int process_aln(const bam_hdr_t *h, bam1_t *b, samview_settings_t* settin
     }
     if (b->core.qual < settings->min_mapQ || ((b->core.flag & settings->flag_on) != settings->flag_on) || (b->core.flag & settings->flag_off))
         return 1;
+    if (settings->flag_alloff && ((b->core.flag & settings->flag_alloff) == settings->flag_alloff))
+        return 1;
     if (settings->bed && (b->core.tid < 0 || !bed_overlap(settings->bed, h->target_name[b->core.tid], b->core.pos, bam_endpos(b))))
         return 1;
     if (settings->subsam_frac > 0.) {
@@ -233,19 +243,22 @@ static void check_sam_close(const char *subcmd, samFile *fp, const char *fname,
 int main_samview(int argc, char *argv[])
 {
     int c, is_header = 0, is_header_only = 0, ret = 0, compress_level = -1, is_count = 0;
-    int is_long_help = 0, n_threads = 0;
+    int is_long_help = 0;
     int64_t count = 0;
     samFile *in = 0, *out = 0, *un_out=0;
+    FILE *fp_out = NULL;
     bam_hdr_t *header = NULL;
     char out_mode[5], out_un_mode[5], *out_format = "";
     char *fn_in = 0, *fn_out = 0, *fn_list = 0, *q, *fn_un_out = 0;
     sam_global_args ga = SAM_GLOBAL_ARGS_INIT;
+    htsThreadPool p = {NULL, 0};
 
     samview_settings_t settings = {
         .rghash = NULL,
         .min_mapQ = 0,
         .flag_on = 0,
         .flag_off = 0,
+        .flag_alloff = 0,
         .min_qlen = 0,
         .remove_B = 0,
         .subsam_seed = 0,
@@ -255,8 +268,7 @@ int main_samview(int argc, char *argv[])
     };
 
     static const struct option lopts[] = {
-        SAM_OPT_GLOBAL_OPTIONS('-', 0, 'O', 0, 'T'),
-        { "threads", required_argument, NULL, '@' },
+        SAM_OPT_GLOBAL_OPTIONS('-', 0, 'O', 0, 'T', '@'),
         { NULL, 0, NULL, 0 }
     };
 
@@ -264,11 +276,13 @@ int main_samview(int argc, char *argv[])
     strcpy(out_mode, "w");
     strcpy(out_un_mode, "w");
     while ((c = getopt_long(argc, argv,
-                            "SbBcCt:h1Ho:O:q:f:F:ul:r:?T:R:L:s:@:m:x:U:",
+                            "SbBcCt:h1Ho:O:q:f:F:G:ul:r:?T:R:L:s:@:m:x:U:",
                             lopts, NULL)) >= 0) {
         switch (c) {
         case 's':
             if ((settings.subsam_seed = strtol(optarg, &q, 10)) != 0) {
+                // Convert likely user input 0,1,2,... to pseudo-random
+                // values with more entropy and more bits set
                 srand(settings.subsam_seed);
                 settings.subsam_seed = rand();
             }
@@ -286,6 +300,7 @@ int main_samview(int argc, char *argv[])
         case 'U': fn_un_out = strdup(optarg); break;
         case 'f': settings.flag_on |= strtol(optarg, 0, 0); break;
         case 'F': settings.flag_off |= strtol(optarg, 0, 0); break;
+        case 'G': settings.flag_alloff |= strtol(optarg, 0, 0); break;
         case 'q': settings.min_mapQ = atoi(optarg); break;
         case 'u': compress_level = 0; break;
         case '1': compress_level = 1; break;
@@ -315,7 +330,6 @@ int main_samview(int argc, char *argv[])
                  */
         case '?': is_long_help = 1; break;
         case 'B': settings.remove_B = 1; break;
-        case '@': n_threads = strtol(optarg, 0, 0); break;
         case 'x':
             {
                 if (strlen(optarg) != 2) {
@@ -427,8 +441,26 @@ int main_samview(int argc, char *argv[])
             }
         }
     }
+    else {
+        if (fn_out) {
+            fp_out = fopen(fn_out, "w");
+            if (fp_out == NULL) {
+                print_error_errno("view", "can't create \"%s\"", fn_out);
+                ret = EXIT_FAILURE;
+                goto view_end;
+            }
+        }
+    }
 
-    if (n_threads > 1) { if (out) hts_set_threads(out, n_threads); }
+    if (ga.nthreads > 1) {
+        if (!(p.pool = hts_tpool_init(ga.nthreads))) {
+            fprintf(pysam_stderr, "Error creating thread pool\n");
+            ret = 1;
+            goto view_end;
+        }
+        hts_set_opt(in,  HTS_OPT_THREAD_POOL, &p);
+        if (out) hts_set_opt(out, HTS_OPT_THREAD_POOL, &p);
+    }
     if (is_header_only) goto view_end; // no need to print alignments
 
     if (optind + 1 >= argc) { // convert/print the entire file
@@ -489,13 +521,19 @@ int main_samview(int argc, char *argv[])
     }
 
 view_end:
-    if (is_count && ret == 0) 
-        fprintf(pysam_stdout, "%" PRId64 "\n", count);
-    
+    if (is_count && ret == 0) {
+        if (fprintf(fn_out? fp_out : pysam_stdout, "%" PRId64 "\n", count) < 0) {
+            if (fn_out) print_error_errno("view", "writing to \"%s\" failed", fn_out);
+            else print_error_errno("view", "writing to standard output failed");
+            ret = EXIT_FAILURE;
+        }
+    }
+
     // close files, free and return
     if (in) check_sam_close("view", in, fn_in, "standard input", &ret);
     if (out) check_sam_close("view", out, fn_out, "standard output", &ret);
     if (un_out) check_sam_close("view", un_out, fn_un_out, "file", &ret);
+    if (fp_out) fclose(fp_out);
 
     free(fn_list); free(fn_out); free(settings.library);  free(fn_un_out);
     sam_global_args_free(&ga);
@@ -510,6 +548,10 @@ view_end:
     if (settings.remove_aux_len) {
         free(settings.remove_aux);
     }
+
+    if (p.pool)
+        hts_tpool_destroy(p.pool);
+
     return ret;
 }
 
@@ -540,20 +582,19 @@ static int usage(FILE *fp, int exit_status, int is_long_help)
 "  -l STR   only include reads in library STR [null]\n"
 "  -m INT   only include reads with number of CIGAR operations consuming\n"
 "           query sequence >= INT [0]\n"
-"  -f INT   only include reads with all bits set in INT set in FLAG [0]\n"
-"  -F INT   only include reads with none of the bits set in INT set in FLAG [0]\n"
+"  -f INT   only include reads with all  of the FLAGs in INT present [0]\n"       //   F&x == x
+"  -F INT   only include reads with none of the FLAGS in INT present [0]\n"       //   F&x == 0
+"  -G INT   only EXCLUDE reads with all  of the FLAGs in INT present [0]\n"       // !(F&x == x)
+"  -s FLOAT subsample reads (given INT.FRAC option value, 0.FRAC is the\n"
+"           fraction of templates/read pairs to keep; INT part sets seed)\n"
 // read processing
 "  -x STR   read tag to strip (repeatable) [null]\n"
 "  -B       collapse the backward CIGAR operation\n"
-"  -s FLOAT integer part sets seed of random number generator [0];\n"
-"           rest sets fraction of templates to subsample [no subsampling]\n"
 // general options
-"  -@, --threads INT\n"
-"           number of BAM/CRAM compression threads [0]\n"
 "  -?       print long help, including note about region specification\n"
 "  -S       ignored (input format is auto-detected)\n");
 
-    sam_global_opt_help(fp, "-.O.T");
+    sam_global_opt_help(fp, "-.O.T@");
     fprintf(fp, "\n");
 
     if (is_long_help)
@@ -622,21 +663,37 @@ static void bam2fq_usage(FILE *to, const char *command)
 "Usage: samtools %s [options...] <in.bam>\n", command);
     fprintf(to,
 "Options:\n"
-"  -0 FILE   write paired reads flagged both or neither READ1 and READ2 to FILE\n"
-"  -1 FILE   write paired reads flagged READ1 to FILE\n"
-"  -2 FILE   write paired reads flagged READ2 to FILE\n"
-"  -f INT    only include reads with all bits set in INT set in FLAG [0]\n"
-"  -F INT    only include reads with none of the bits set in INT set in FLAG [0]\n"
-"  -n        don't append /1 and /2 to the read name\n");
+"  -0 FILE              write paired reads flagged both or neither READ1 and READ2 to FILE\n"
+"  -1 FILE              write paired reads flagged READ1 to FILE\n"
+"  -2 FILE              write paired reads flagged READ2 to FILE\n"
+"  -f INT               only include reads with all  of the FLAGs in INT present [0]\n"       //   F&x == x
+"  -F INT               only include reads with none of the FLAGS in INT present [0]\n"       //   F&x == 0
+"  -G INT               only EXCLUDE reads with all  of the FLAGs in INT present [0]\n"       // !(F&x == x)
+"  -n                   don't append /1 and /2 to the read name\n"
+"  -N                   always append /1 and /2 to the read name\n");
     if (fq) fprintf(to,
-"  -O        output quality in the OQ tag if present\n");
+"  -O                   output quality in the OQ tag if present\n");
     fprintf(to,
-"  -s FILE   write singleton reads to FILE [assume single-end]\n"
-"  -t        copy RG, BC and QT tags to the %s header line\n",
+"  -s FILE              write singleton reads to FILE [assume single-end]\n"
+"  -t                   copy RG, BC and QT tags to the %s header line\n",
     fq ? "FASTQ" : "FASTA");
     if (fq) fprintf(to,
-"  -v INT    default quality score if not given in file [1]\n");
-    sam_global_opt_help(to, "-.--.");
+"  -v INT               default quality score if not given in file [1]\n"
+"  --i1 FILE            write first index reads to FILE\n"
+"  --i2 FILE            write second index reads to FILE\n"
+"  --barcode-tag TAG    Barcode tag [default: " DEFAULT_BARCODE_TAG "]\n"
+"  --quality-tag TAG    Quality tag [default: " DEFAULT_QUALITY_TAG "]\n"
+"  --index-format STR   How to parse barcode and quality tags\n\n");
+    sam_global_opt_help(to, "-.--.@");
+    fprintf(to,
+"   \n"
+"   The index-format string describes how to parse the barcode and quality tags, for example:\n"
+"   i14i8       the first 14 characters are index 1, the next 8 characters are index 2\n"
+"   n8i14       ignore the first 8 characters, and use the next 14 characters for index 1\n"
+"   If the tag contains a separator, then the numeric part can be replaced with '*' to mean\n"
+"   'read until the separator or end of tag', for example:\n"
+"   n*i*        ignore the left part of the tag until the separator, then use the second part\n"
+"               of the tag as index 1\n");
 }
 
 typedef enum { READ_UNKNOWN = 0, READ_1 = 1, READ_2 = 2 } readpart;
@@ -645,24 +702,97 @@ typedef struct bam2fq_opts {
     char *fnse;
     char *fnr[3];
     char *fn_input; // pointer to input filename in argv do not free
-    bool has12, use_oq, copy_tags;
-    int flag_on, flag_off;
+    bool has12, has12always, use_oq, copy_tags;
+    int flag_on, flag_off, flag_alloff;
     sam_global_args ga;
     fastfile filetype;
     int def_qual;
+    char *barcode_tag;
+    char *quality_tag;
+    char *index_file[2];
+    char *index_format;
 } bam2fq_opts_t;
 
 typedef struct bam2fq_state {
     samFile *fp;
     FILE *fpse;
     FILE *fpr[3];
+    FILE *fpi[2];
     bam_hdr_t *h;
     bool has12, use_oq, copy_tags;
-    int flag_on, flag_off;
+    int flag_on, flag_off, flag_alloff;
     fastfile filetype;
     int def_qual;
 } bam2fq_state_t;
 
+/*
+ * Get and decode the read from a BAM record.
+ *
+ * TODO: htslib really needs an interface for this.  Consider this or perhaps
+ * bam_get_seq_str (current vs original orientation) and bam_get_qual_str
+ * functions as string formatted equivalents to bam_get_{seq,qual}?
+ */
+
+/*
+ * Reverse a string in place.
+ * From http://stackoverflow.com/questions/8534274/is-the-strrev-function-not-available-in-linux.
+ * Author Sumit-naik: http://stackoverflow.com/users/4590926/sumit-naik
+ */
+static char *reverse(char *str)
+{
+    int i = strlen(str)-1,j=0;
+    char ch;
+    while (i>j) {
+        ch = str[i];
+        str[i]= str[j];
+        str[j] = ch;
+        i--;
+        j++;
+    }
+    return str;
+}
+
+/* return the read, reverse complemented if necessary */
+static char *get_read(const bam1_t *rec)
+{
+    int len = rec->core.l_qseq + 1;
+    char *read = calloc(1, len);
+    char *seq = (char *)bam_get_seq(rec);
+    int n;
+
+    if (!read) return NULL;
+
+    for (n=0; n < rec->core.l_qseq; n++) {
+        if (rec->core.flag & BAM_FREVERSE) read[n] = seq_nt16_str[seq_comp_table[bam_seqi(seq,n)]];
+        else                               read[n] = seq_nt16_str[bam_seqi(seq,n)];
+    }
+    if (rec->core.flag & BAM_FREVERSE) reverse(read);
+    return read;
+}
+
+/*
+ * get and decode the quality from a BAM record
+ */
+static char *get_quality(const bam1_t *rec)
+{
+    char *quality = calloc(1, rec->core.l_qseq + 1);
+    char *q = (char *)bam_get_qual(rec);
+    int n;
+
+    if (*q == '\xff') { free(quality); return NULL; }
+
+    for (n=0; n < rec->core.l_qseq; n++) {
+        quality[n] = q[n]+33;
+    }
+    if (rec->core.flag & BAM_FREVERSE) reverse(quality);
+    return quality;
+}
+
+//
+// End of htslib complaints
+//
+
+
 static readpart which_readpart(const bam1_t *b)
 {
     if ((b->core.flag & BAM_FREAD1) && !(b->core.flag & BAM_FREAD2)) {
@@ -674,85 +804,60 @@ static readpart which_readpart(const bam1_t *b)
     }
 }
 
-// Transform a bam1_t record into a string with the FASTQ representation of it
-// @returns false for error, true for success
-static bool bam1_to_fq(const bam1_t *b, kstring_t *linebuf, const bam2fq_state_t *state)
+/*
+ * parse the length part from the index-format string
+ */
+static int getLength(char **s)
 {
-    int i;
-    int32_t qlen = b->core.l_qseq;
-    assert(qlen >= 0);
-    uint8_t *seq;
-    uint8_t *qual = bam_get_qual(b);
-    const uint8_t *oq = NULL;
-    if (state->use_oq) {
-        oq = bam_aux_get(b, "OQ");
-        if (oq) oq++; // skip tag type
+    int n = 0;
+    while (**s) {
+        if (**s == '*') { n=-1; (*s)++; break; }
+        if ( !isdigit(**s)) break;
+        n = n*10 + ((**s)-'0');
+        (*s)++;
     }
-    bool has_qual = (qual[0] != 0xff || (state->use_oq && oq)); // test if there is quality
+    return n;
+}
+
+static bool make_fq_line(const bam1_t *rec, char *seq, char *qual, kstring_t *linebuf, const bam2fq_state_t *state)
+{
+    int i;
 
     linebuf->l = 0;
     // Write read name
-    readpart readpart = which_readpart(b);
     kputc(state->filetype == FASTA? '>' : '@', linebuf);
-    kputs(bam_get_qname(b), linebuf);
+    kputs(bam_get_qname(rec), linebuf);
     // Add the /1 /2 if requested
     if (state->has12) {
+        readpart readpart = which_readpart(rec);
         if (readpart == READ_1) kputs("/1", linebuf);
         else if (readpart == READ_2) kputs("/2", linebuf);
     }
     if (state->copy_tags) {
         for (i = 0; copied_tags[i]; ++i) {
             uint8_t *s;
-            if ((s = bam_aux_get(b, copied_tags[i])) != 0) {
-                kputc('\t', linebuf);
-                kputsn(copied_tags[i], 2, linebuf);
-                kputsn(":Z:", 3, linebuf);
-                kputs(bam_aux2Z(s), linebuf);
+            if ((s = bam_aux_get(rec, copied_tags[i])) != 0) {
+                if (*s == 'Z') {
+                    kputc('\t', linebuf);
+                    kputsn(copied_tags[i], 2, linebuf);
+                    kputsn(":Z:", 3, linebuf);
+                    kputs(bam_aux2Z(s), linebuf);
+                }
             }
         }
     }
     kputc('\n', linebuf);
-
-    seq = bam_get_seq(b);
-
-    if (b->core.flag & BAM_FREVERSE) { // read is reverse complemented
-        for (i = qlen-1; i > -1; --i) {
-            char c = seq_nt16_str[seq_comp_table[bam_seqi(seq,i)]];
-            kputc(c, linebuf);
-        }
-    } else {
-        for (i = 0; i < qlen; ++i) {
-            char c = seq_nt16_str[bam_seqi(seq,i)];
-            kputc(c, linebuf);
-        }
-    }
+    kputs(seq, linebuf);
     kputc('\n', linebuf);
 
     if (state->filetype == FASTQ) {
         // Write quality
         kputs("+\n", linebuf);
-        if (has_qual) {
-            if (state->use_oq && oq) {
-                if (b->core.flag & BAM_FREVERSE) { // read is reverse complemented
-                    for (i = qlen-1; i > -1; --i) {
-                        kputc(oq[i], linebuf);
-                    }
-                } else {
-                    kputs((char*)oq, linebuf);
-                }
-            } else {
-                if (b->core.flag & BAM_FREVERSE) { // read is reverse complemented
-                    for (i = qlen-1; i > -1; --i) {
-                        kputc(33 + qual[i], linebuf);
-                    }
-                } else {
-                    for (i = 0; i < qlen; ++i) {
-                        kputc(33 + qual[i], linebuf);
-                    }
-                }
-            }
+        if (qual && *qual) {
+            kputs(qual, linebuf);
         } else {
-            for (i = 0; i < qlen; ++i) {
+            int len = strlen(seq);
+            for (i = 0; i < len; ++i) {
                 kputc(33 + state->def_qual, linebuf);
             }
         }
@@ -761,49 +866,214 @@ static bool bam1_to_fq(const bam1_t *b, kstring_t *linebuf, const bam2fq_state_t
     return true;
 }
 
+/*
+ * Create FASTQ lines from the barcode tag using the index-format 
+ */
+static bool tags2fq(bam1_t *rec, const bam2fq_state_t *state, const bam2fq_opts_t* opts)
+{
+    uint8_t *p;
+    char *ifmt = opts->index_format;
+    char *tag = NULL;
+    char *qual = NULL;
+    int file_number = 0;
+    kstring_t linebuf = { 0, 0, NULL }; // Buffer
+
+    // read barcode tag
+    p = bam_aux_get(rec,opts->barcode_tag);
+    if (p) tag = bam_aux2Z(p);
+
+    if (!tag) return true; // there is no tag
+
+    // read quality tag
+    p = bam_aux_get(rec, opts->quality_tag);
+    if (p) qual = bam_aux2Z(p);
+
+    // Parse the index-format string
+    while (*ifmt) {
+        if (file_number > 1) break;     // shouldn't happen if we've validated paramaters correctly
+        char action = *ifmt;        // should be 'i' or 'n'
+        ifmt++; // skip over action
+        int index_len = getLength(&ifmt);
+
+        char *sub_tag = calloc(1, strlen(tag)+1);
+        char *sub_qual = calloc(1, strlen(tag)+1);
+        int n = 0;
+
+        if (index_len < 0) {
+            // read until separator
+            while (isalpha(*tag)) {
+                sub_tag[n] = *tag++;
+                if (qual) sub_qual[n] = *qual++;
+                n++;
+            }
+            if (*tag) { // skip separator
+                tag++;
+                if (qual) qual++;
+            }
+        } else {
+            // read index_len characters
+            while (index_len-- && *tag) {
+                sub_tag[n] = *tag++;
+                if (qual) sub_qual[n] = *qual++;
+                n++;
+            }
+        }
+
+        if (action=='i' && *sub_tag && state->fpi[file_number]) {
+            make_fq_line(rec, sub_tag, sub_qual, &linebuf, state);
+            fputs(linebuf.s, state->fpi[file_number++]);
+        }
+        free(sub_qual); free(sub_tag);
+
+    }
+
+    free(linebuf.s);
+    return true;
+}
+
+// Transform a bam1_t record into a string with the FASTQ representation of it
+// @returns false for error, true for success
+static bool bam1_to_fq(const bam1_t *b, kstring_t *linebuf, const bam2fq_state_t *state)
+{
+    int32_t qlen = b->core.l_qseq;
+    assert(qlen >= 0);
+    const uint8_t *oq = NULL;
+    char *qual = NULL;
+
+    char *seq = get_read(b);
+
+    if (state->use_oq) {
+        oq = bam_aux_get(b, "OQ");
+        if (oq) {
+            oq++; 
+            qual = strdup(bam_aux2Z(oq));
+            if (b->core.flag & BAM_FREVERSE) { // read is reverse complemented
+                reverse(qual);
+            }
+        }
+    } else {
+        qual = get_quality(b);
+    }
+
+    make_fq_line(b, seq, qual, linebuf, state);
+
+    free(qual);
+    free(seq);
+    return true;
+}
+
+static void free_opts(bam2fq_opts_t *opts)
+{
+    free(opts->barcode_tag);
+    free(opts->quality_tag);
+    free(opts->index_format);
+    free(opts);
+}
+
 // return true if valid
 static bool parse_opts(int argc, char *argv[], bam2fq_opts_t** opts_out)
 {
     // Parse args
     bam2fq_opts_t* opts = calloc(1, sizeof(bam2fq_opts_t));
     opts->has12 = true;
+    opts->has12always = false;
     opts->filetype = FASTQ;
     opts->def_qual = 1;
+    opts->barcode_tag = NULL;
+    opts->quality_tag = NULL;
+    opts->index_format = NULL;
+    opts->index_file[0] = NULL;
+    opts->index_file[1] = NULL;
 
     int c;
     sam_global_args_init(&opts->ga);
     static const struct option lopts[] = {
-        SAM_OPT_GLOBAL_OPTIONS('-', 0, '-', '-', 0),
+        SAM_OPT_GLOBAL_OPTIONS('-', 0, '-', '-', 0, '@'),
+        {"i1", required_argument, NULL, 1},
+        {"I1", required_argument, NULL, 1},
+        {"i2", required_argument, NULL, 2},
+        {"I2", required_argument, NULL, 2},
+        {"if", required_argument, NULL, 3},
+        {"IF", required_argument, NULL, 3},
+        {"index-format", required_argument, NULL, 3},
+        {"barcode-tag", required_argument, NULL, 'b'},
+        {"quality-tag", required_argument, NULL, 'q'},
         { NULL, 0, NULL, 0 }
     };
-    while ((c = getopt_long(argc, argv, "0:1:2:f:F:nOs:tv:", lopts, NULL)) > 0) {
+    while ((c = getopt_long(argc, argv, "0:1:2:f:F:G:nNOs:tv:@:", lopts, NULL)) > 0) {
         switch (c) {
+            case 'b': opts->barcode_tag = strdup(optarg); break;
+            case 'q': opts->quality_tag = strdup(optarg); break;
+            case  1 : opts->index_file[0] = optarg; break;
+            case  2 : opts->index_file[1] = optarg; break;
+            case  3 : opts->index_format = strdup(optarg); break;
             case '0': opts->fnr[0] = optarg; break;
             case '1': opts->fnr[1] = optarg; break;
             case '2': opts->fnr[2] = optarg; break;
             case 'f': opts->flag_on |= strtol(optarg, 0, 0); break;
             case 'F': opts->flag_off |= strtol(optarg, 0, 0); break;
+            case 'G': opts->flag_alloff |= strtol(optarg, 0, 0); break;
             case 'n': opts->has12 = false; break;
+            case 'N': opts->has12always = true; break;
             case 'O': opts->use_oq = true; break;
             case 's': opts->fnse = optarg; break;
             case 't': opts->copy_tags = true; break;
             case 'v': opts->def_qual = atoi(optarg); break;
-            case '?': bam2fq_usage(pysam_stderr, argv[0]); free(opts); return false;
+            case '?': bam2fq_usage(pysam_stderr, argv[0]); free_opts(opts); return false;
             default:
                 if (parse_sam_global_opt(c, optarg, lopts, &opts->ga) != 0) {
-                    bam2fq_usage(pysam_stderr, argv[0]); free(opts); return false;
+                    bam2fq_usage(pysam_stderr, argv[0]); free_opts(opts); return false;
                 }
                 break;
         }
     }
 
     if (opts->fnr[1] || opts->fnr[2]) opts->has12 = false;
+    if (opts->has12always) opts->has12 = true;
+
+    if (!opts->barcode_tag) opts->barcode_tag = strdup(DEFAULT_BARCODE_TAG);
+    if (!opts->quality_tag) opts->quality_tag = strdup(DEFAULT_QUALITY_TAG);
+
+    int nIndex = 0;
+    if (opts->index_format) {
+        char *s;
+        for (s = opts->index_format; *s; s++) {
+            if (*s == 'i') nIndex++;
+        }
+    }
+    if (nIndex>2) {
+        fprintf(pysam_stderr,"Invalid index format: more than 2 indexes\n");
+        bam2fq_usage(pysam_stderr, argv[0]);
+        free_opts(opts);
+        return false;
+    }
+
+    if (opts->index_file[1] && !opts->index_file[0]) {
+        fprintf(pysam_stderr, "Index one specified, but index two not given\n");
+        bam2fq_usage(pysam_stderr, argv[0]);
+        free_opts(opts);
+        return false;
+    }
+
+    if (nIndex==2 && !opts->index_file[1]) {
+        fprintf(pysam_stderr, "index_format specifies two indexes, but only one index file given\n");
+        bam2fq_usage(pysam_stderr, argv[0]);
+        free_opts(opts);
+        return false;
+    }
+
+    if (nIndex==1 && !opts->index_file[0]) {
+        fprintf(pysam_stderr, "index_format specifies an index, but no index file given\n");
+        bam2fq_usage(pysam_stderr, argv[0]);
+        free_opts(opts);
+        return false;
+    }
 
     if (opts->def_qual < 0 || 93 < opts->def_qual) {
         fprintf(pysam_stderr, "Invalid -v default quality %i, allowed range 0 to 93\n", opts->def_qual);
         bam2fq_usage(pysam_stderr, argv[0]);
-        free(opts);
-        return true;
+        free_opts(opts);
+        return false;
     }
 
     const char* type_str = argv[0];
@@ -814,20 +1084,21 @@ static bool parse_opts(int argc, char *argv[], bam2fq_opts_t** opts_out)
     } else {
         print_error("bam2fq", "Unrecognised type call \"%s\", this should be impossible... but you managed it!", type_str);
         bam2fq_usage(pysam_stderr, argv[0]);
-        free(opts);
+        free_opts(opts);
         return false;
     }
 
     if ((argc - (optind)) == 0) {
+        fprintf(pysam_stderr, "No input file specified.\n");
         bam2fq_usage(pysam_stdout, argv[0]);
-        free(opts);
+        free_opts(opts);
         return false;
     }
 
     if ((argc - (optind)) != 1) {
         fprintf(pysam_stderr, "Too many arguments.\n");
         bam2fq_usage(pysam_stderr, argv[0]);
-        free(opts);
+        free_opts(opts);
         return false;
     }
     opts->fn_input = argv[optind];
@@ -840,6 +1111,7 @@ static bool init_state(const bam2fq_opts_t* opts, bam2fq_state_t** state_out)
     bam2fq_state_t* state = calloc(1, sizeof(bam2fq_state_t));
     state->flag_on = opts->flag_on;
     state->flag_off = opts->flag_off;
+    state->flag_alloff = opts->flag_alloff;
     state->has12 = opts->has12;
     state->use_oq = opts->use_oq;
     state->copy_tags = opts->copy_tags;
@@ -852,6 +1124,8 @@ static bool init_state(const bam2fq_opts_t* opts, bam2fq_state_t** state_out)
         free(state);
         return false;
     }
+    if (opts->ga.nthreads > 0)
+        hts_set_threads(state->fp, opts->ga.nthreads);
     uint32_t rf = SAM_QNAME | SAM_FLAG | SAM_SEQ | SAM_QUAL;
     if (opts->use_oq) rf |= SAM_AUX;
     if (hts_set_opt(state->fp, CRAM_OPT_REQUIRED_FIELDS, rf)) {
@@ -886,6 +1160,17 @@ static bool init_state(const bam2fq_opts_t* opts, bam2fq_state_t** state_out)
             state->fpr[i] = pysam_stdout;
         }
     }
+    for (i = 0; i < 2; i++) {
+        state->fpi[i] = NULL;
+        if (opts->index_file[i]) {
+            state->fpi[i] = fopen(opts->index_file[i], "w");
+            if (state->fpi[i] == NULL) {
+                print_error_errno("bam2fq", "Cannot write to i%d file \"%s\"", i+1, opts->index_file[i]);
+                free(state);
+                return false;
+            }
+        }
+    }
 
     state->h = sam_hdr_read(state->fp);
     if (state->h == NULL) {
@@ -908,6 +1193,12 @@ static bool destroy_state(const bam2fq_opts_t *opts, bam2fq_state_t *state, int*
     for (i = 0; i < 3; ++i) {
         if (state->fpr[i] != pysam_stdout && fclose(state->fpr[i])) { print_error_errno("bam2fq", "Error closing r%d file \"%s\"", i, opts->fnr[i]); valid = false; }
     }
+    for (i = 0; i < 2; i++) {
+        if (state->fpi[i] && fclose(state->fpi[i])) { 
+            print_error_errno("bam2fq", "Error closing i%d file \"%s\"", i+1, opts->index_file[i]);
+            valid = false;
+        }
+    }
     free(state);
     return valid;
 }
@@ -916,11 +1207,12 @@ static inline bool filter_it_out(const bam1_t *b, const bam2fq_state_t *state)
 {
     return (b->core.flag&(BAM_FSECONDARY|BAM_FSUPPLEMENTARY) // skip secondary and supplementary alignments
         || (b->core.flag&(state->flag_on)) != state->flag_on // or reads indicated by filter flags
-        || (b->core.flag&(state->flag_off)) != 0);
+        || (b->core.flag&(state->flag_off)) != 0
+        || (b->core.flag&(state->flag_alloff) && (b->core.flag&(state->flag_alloff)) == state->flag_alloff));
 
 }
 
-static bool bam2fq_mainloop_singletontrack(bam2fq_state_t *state)
+static bool bam2fq_mainloop_singletontrack(bam2fq_state_t *state, bam2fq_opts_t* opts)
 {
     bam1_t* b = bam_init1();
     char *current_qname = NULL;
@@ -976,6 +1268,7 @@ static bool bam2fq_mainloop_singletontrack(bam2fq_state_t *state)
                 return false;
             }
             score[which_readpart(b)] = b_score;
+            if (state->fpi[0]) tags2fq(b, state, opts);
         }
     }
     if (!valid)
@@ -993,7 +1286,7 @@ static bool bam2fq_mainloop_singletontrack(bam2fq_state_t *state)
     return valid;
 }
 
-static bool bam2fq_mainloop(bam2fq_state_t *state)
+static bool bam2fq_mainloop(bam2fq_state_t *state, bam2fq_opts_t* opts)
 {
     // process a name collated BAM into fastq
     bam1_t* b = bam_init1();
@@ -1004,13 +1297,12 @@ static bool bam2fq_mainloop(bam2fq_state_t *state)
     int64_t n_reads = 0; // Statistics
     kstring_t linebuf = { 0, 0, NULL }; // Buffer
     while (sam_read1(state->fp, state->h, b) >= 0) {
-        if (b->core.flag&(BAM_FSECONDARY|BAM_FSUPPLEMENTARY) // skip secondary and supplementary alignments
-            || (b->core.flag&(state->flag_on)) != state->flag_on             // or reads indicated by filter flags
-            || (b->core.flag&(state->flag_off)) != 0) continue;
+        if (filter_it_out(b, state)) continue;
         ++n_reads;
 
         if (!bam1_to_fq(b, &linebuf, state)) return false;
         fputs(linebuf.s, state->fpr[which_readpart(b)]);
+        if (state->fpi[0]) tags2fq(b, state, opts);
     }
     free(linebuf.s);
     bam_destroy1(b);
@@ -1031,14 +1323,14 @@ int main_bam2fq(int argc, char *argv[])
     if (!init_state(opts, &state)) return EXIT_FAILURE;
 
     if (state->fpse) {
-        if (!bam2fq_mainloop_singletontrack(state)) status = EXIT_FAILURE;
+        if (!bam2fq_mainloop_singletontrack(state,opts)) status = EXIT_FAILURE;
     } else {
-        if (!bam2fq_mainloop(state)) status = EXIT_FAILURE;
+        if (!bam2fq_mainloop(state,opts)) status = EXIT_FAILURE;
     }
 
     if (!destroy_state(opts, state, &status)) return EXIT_FAILURE;
     sam_global_args_free(&opts->ga);
-    free(opts);
+    free_opts(opts);
 
     return status;
 }
diff --git a/samtools/stats.c b/samtools/stats.c
index eb6bb52..35574ed 100644
--- a/samtools/stats.c
+++ b/samtools/stats.c
@@ -828,8 +828,8 @@ void collect_stats(bam1_t *bam_line, stats_t *stats)
         // reads. Mates mapped to different chromosomes have isize==0.
         int32_t isize = bam_line->core.isize;
         if ( isize<0 ) isize = -isize;
-        if ( stats->info->nisize > 0 && isize >= stats->info->nisize )
-            isize = stats->info->nisize-1;
+        if ( stats->info->nisize > 0 && isize > stats->info->nisize )
+            isize = stats->info->nisize;
         if ( isize>0 || bam_line->core.tid==bam_line->core.mtid )
         {
             int pos_fst = bam_line->core.mpos - bam_line->core.pos;
@@ -1263,7 +1263,7 @@ void init_regions(stats_t *stats, const char *file)
             stats->regions[tid].pos = realloc(stats->regions[tid].pos,sizeof(pos_t)*stats->regions[tid].mpos);
         }
 
-        if ( (sscanf(&line.s[i+1],"%d %d",&stats->regions[tid].pos[npos].from,&stats->regions[tid].pos[npos].to))!=2 ) error("Could not parse the region [%s]\n", &line.s[i+1]);
+        if ( (sscanf(&line.s[i+1],"%u %u",&stats->regions[tid].pos[npos].from,&stats->regions[tid].pos[npos].to))!=2 ) error("Could not parse the region [%s]\n", &line.s[i+1]);
         if ( prev_tid==-1 || prev_tid!=tid )
         {
             prev_tid = tid;
@@ -1375,7 +1375,7 @@ static void error(const char *format, ...)
         printf("    -S, --split <tag>                   Also write statistics to separate files split by tagged field.\n");
         printf("    -t, --target-regions <file>         Do stats in these regions only. Tab-delimited file chr,from,to, 1-based, inclusive.\n");
         printf("    -x, --sparse                        Suppress outputting IS rows where there are no insertions.\n");
-        sam_global_opt_help(stdout, "-.--.");
+        sam_global_opt_help(stdout, "-.--.@");
         printf("\n");
     }
     else
@@ -1481,13 +1481,13 @@ int init_stat_info_fname(stats_info_t* info, const char* bam_fname, const htsFor
     // .. bam
     samFile* sam;
     if ((sam = sam_open_format(bam_fname, "r", in_fmt)) == 0) {
-        error("Failed to open: %s\n", bam_fname);
+        print_error_errno("stats", "failed to open \"%s\"", bam_fname);
         return 1;
     }
     info->sam = sam;
     info->sam_header = sam_hdr_read(sam);
     if (info->sam_header == NULL) {
-        error("Failed to read header for '%s'\n", bam_fname);
+        print_error("stats", "failed to read header for \"%s\"", bam_fname);
         return 1;
     }
     return 0;
@@ -1537,7 +1537,7 @@ static void init_stat_structs(stats_t* stats, stats_info_t* info, const char* gr
     stats->quals_2nd      = calloc(stats->nquals*stats->nbases,sizeof(uint64_t));
     stats->gc_1st         = calloc(stats->ngc,sizeof(uint64_t));
     stats->gc_2nd         = calloc(stats->ngc,sizeof(uint64_t));
-    stats->isize          = init_isize_t(info->nisize);
+    stats->isize          = init_isize_t(info->nisize ?info->nisize+1 :0);
     stats->gcd            = calloc(stats->ngcd,sizeof(gc_depth_t));
     stats->mpc_buf        = info->fai ? calloc(stats->nquals*stats->nbases,sizeof(uint64_t)) : NULL;
     stats->acgtno_cycles  = calloc(stats->nbases,sizeof(acgtno_count_t));
@@ -1596,7 +1596,7 @@ int main_stats(int argc, char *argv[])
 
     static const struct option loptions[] =
     {
-        SAM_OPT_GLOBAL_OPTIONS('-', 0, '-', '-', 0),
+        SAM_OPT_GLOBAL_OPTIONS('-', 0, '-', '-', 0, '@'),
         {"help", no_argument, NULL, 'h'},
         {"remove-dups", no_argument, NULL, 'd'},
         {"sam", no_argument, NULL, 's'},
@@ -1618,7 +1618,7 @@ int main_stats(int argc, char *argv[])
     };
     int opt;
 
-    while ( (opt=getopt_long(argc,argv,"?hdsxr:c:l:i:t:m:q:f:F:I:1:S:P:",loptions,NULL))>0 )
+    while ( (opt=getopt_long(argc,argv,"?hdsxr:c:l:i:t:m:q:f:F:I:1:S:P:@:",loptions,NULL))>0 )
     {
         switch (opt)
         {
@@ -1662,6 +1662,8 @@ int main_stats(int argc, char *argv[])
     }
 
     if (init_stat_info_fname(info, bam_fname, &ga.in)) return 1;
+    if (ga.nthreads > 0)
+        hts_set_threads(info->sam, ga.nthreads);
 
     stats_t *all_stats = stats_init();
     stats_t *curr_stats = NULL;
diff --git a/samtools/stats.c.pysam.c b/samtools/stats.c.pysam.c
index da187ac..8ebb52a 100644
--- a/samtools/stats.c.pysam.c
+++ b/samtools/stats.c.pysam.c
@@ -220,7 +220,7 @@ typedef struct
 stats_t;
 KHASH_MAP_INIT_STR(c2stats, stats_t*)
 
-static void error(const char *format, ...);
+static int error(const char *format, ...);
 int is_in_regions(bam1_t *bam_line, stats_t *stats);
 void realloc_buffers(stats_t *stats, int seq_len);
 
@@ -830,8 +830,8 @@ void collect_stats(bam1_t *bam_line, stats_t *stats)
         // reads. Mates mapped to different chromosomes have isize==0.
         int32_t isize = bam_line->core.isize;
         if ( isize<0 ) isize = -isize;
-        if ( stats->info->nisize > 0 && isize >= stats->info->nisize )
-            isize = stats->info->nisize-1;
+        if ( stats->info->nisize > 0 && isize > stats->info->nisize )
+            isize = stats->info->nisize;
         if ( isize>0 || bam_line->core.tid==bam_line->core.mtid )
         {
             int pos_fst = bam_line->core.mpos - bam_line->core.pos;
@@ -1265,7 +1265,7 @@ void init_regions(stats_t *stats, const char *file)
             stats->regions[tid].pos = realloc(stats->regions[tid].pos,sizeof(pos_t)*stats->regions[tid].mpos);
         }
 
-        if ( (sscanf(&line.s[i+1],"%d %d",&stats->regions[tid].pos[npos].from,&stats->regions[tid].pos[npos].to))!=2 ) error("Could not parse the region [%s]\n", &line.s[i+1]);
+        if ( (sscanf(&line.s[i+1],"%u %u",&stats->regions[tid].pos[npos].from,&stats->regions[tid].pos[npos].to))!=2 ) error("Could not parse the region [%s]\n", &line.s[i+1]);
         if ( prev_tid==-1 || prev_tid!=tid )
         {
             prev_tid = tid;
@@ -1352,7 +1352,7 @@ void init_group_id(stats_t *stats, const char *id)
 }
 
 
-static void error(const char *format, ...)
+static int error(const char *format, ...)
 {
     if ( !format )
     {
@@ -1377,8 +1377,9 @@ static void error(const char *format, ...)
         fprintf(pysam_stdout, "    -S, --split <tag>                   Also write statistics to separate files split by tagged field.\n");
         fprintf(pysam_stdout, "    -t, --target-regions <file>         Do stats in these regions only. Tab-delimited file chr,from,to, 1-based, inclusive.\n");
         fprintf(pysam_stdout, "    -x, --sparse                        Suppress outputting IS rows where there are no insertions.\n");
-        sam_global_opt_help(pysam_stdout, "-.--.");
+        sam_global_opt_help(pysam_stdout, "-.--.@");
         fprintf(pysam_stdout, "\n");
+	return(0);
     }
     else
     {
@@ -1483,13 +1484,13 @@ int init_stat_info_fname(stats_info_t* info, const char* bam_fname, const htsFor
     // .. bam
     samFile* sam;
     if ((sam = sam_open_format(bam_fname, "r", in_fmt)) == 0) {
-        error("Failed to open: %s\n", bam_fname);
+        print_error_errno("stats", "failed to open \"%s\"", bam_fname);
         return 1;
     }
     info->sam = sam;
     info->sam_header = sam_hdr_read(sam);
     if (info->sam_header == NULL) {
-        error("Failed to read header for '%s'\n", bam_fname);
+        print_error("stats", "failed to read header for \"%s\"", bam_fname);
         return 1;
     }
     return 0;
@@ -1539,7 +1540,7 @@ static void init_stat_structs(stats_t* stats, stats_info_t* info, const char* gr
     stats->quals_2nd      = calloc(stats->nquals*stats->nbases,sizeof(uint64_t));
     stats->gc_1st         = calloc(stats->ngc,sizeof(uint64_t));
     stats->gc_2nd         = calloc(stats->ngc,sizeof(uint64_t));
-    stats->isize          = init_isize_t(info->nisize);
+    stats->isize          = init_isize_t(info->nisize ?info->nisize+1 :0);
     stats->gcd            = calloc(stats->ngcd,sizeof(gc_depth_t));
     stats->mpc_buf        = info->fai ? calloc(stats->nquals*stats->nbases,sizeof(uint64_t)) : NULL;
     stats->acgtno_cycles  = calloc(stats->nbases,sizeof(acgtno_count_t));
@@ -1598,7 +1599,7 @@ int main_stats(int argc, char *argv[])
 
     static const struct option loptions[] =
     {
-        SAM_OPT_GLOBAL_OPTIONS('-', 0, '-', '-', 0),
+        SAM_OPT_GLOBAL_OPTIONS('-', 0, '-', '-', 0, '@'),
         {"help", no_argument, NULL, 'h'},
         {"remove-dups", no_argument, NULL, 'd'},
         {"sam", no_argument, NULL, 's'},
@@ -1620,7 +1621,7 @@ int main_stats(int argc, char *argv[])
     };
     int opt;
 
-    while ( (opt=getopt_long(argc,argv,"?hdsxr:c:l:i:t:m:q:f:F:I:1:S:P:",loptions,NULL))>0 )
+    while ( (opt=getopt_long(argc,argv,"?hdsxr:c:l:i:t:m:q:f:F:I:1:S:P:@:",loptions,NULL))>0 )
     {
         switch (opt)
         {
@@ -1646,7 +1647,7 @@ int main_stats(int argc, char *argv[])
             case 'S': info->split_tag = optarg; break;
             case 'P': info->split_prefix = optarg; break;
             case '?':
-            case 'h': error(NULL);
+	    case 'h': return(error(NULL));
             default:
                 if (parse_sam_global_opt(opt, optarg, loptions, &ga) != 0)
                     error("Unknown argument: %s\n", optarg);
@@ -1659,11 +1660,13 @@ int main_stats(int argc, char *argv[])
     if ( !bam_fname )
     {
         if ( isatty(STDIN_FILENO) )
-            error(NULL);
+	  return(error(NULL));
         bam_fname = "-";
     }
 
     if (init_stat_info_fname(info, bam_fname, &ga.in)) return 1;
+    if (ga.nthreads > 0)
+        hts_set_threads(info->sam, ga.nthreads);
 
     stats_t *all_stats = stats_init();
     stats_t *curr_stats = NULL;
diff --git a/samtools/test/split/test_filter_header_rg.c b/samtools/test/split/test_filter_header_rg.c
index d9505d6..cccf0e9 100644
--- a/samtools/test/split/test_filter_header_rg.c
+++ b/samtools/test/split/test_filter_header_rg.c
@@ -42,7 +42,8 @@ void setup_test_1(bam_hdr_t** hdr_in)
 bool check_test_1(const bam_hdr_t* hdr) {
     const char *test1_res =
     "@HD\tVN:1.4\n"
-    "@SQ\tSN:blah\n";
+    "@SQ\tSN:blah\n"
+    "@PG\tID:samtools\tPN:samtools\tVN:x.y.test\tCL:test_filter_header_rg foo bar baz\n";
 
     if (strcmp(hdr->text, test1_res)) {
         return false;
@@ -65,7 +66,8 @@ bool check_test_2(const bam_hdr_t* hdr) {
     const char *test2_res =
     "@HD\tVN:1.4\n"
     "@SQ\tSN:blah\n"
-    "@RG\tID:fish\n";
+    "@RG\tID:fish\n"
+    "@PG\tID:samtools\tPN:samtools\tVN:x.y.test\tCL:test_filter_header_rg foo bar baz\n";
 
     if (strcmp(hdr->text, test2_res)) {
         return false;
@@ -73,7 +75,7 @@ bool check_test_2(const bam_hdr_t* hdr) {
     return true;
 }
 
-int main(int argc, char**argv)
+int main(int argc, char *argv[])
 {
     // test state
     const int NUM_TESTS = 2;
@@ -82,6 +84,8 @@ int main(int argc, char**argv)
     int failure = 0;
 
     int getopt_char;
+    char *test_argv[] = { "test_filter_header_rg", "foo\tbar", "baz" };
+    char *arg_list = stringify_argv(3, test_argv);
     while ((getopt_char = getopt(argc, argv, "v")) != -1) {
         switch (getopt_char) {
             case 'v':
@@ -116,7 +120,7 @@ int main(int argc, char**argv)
 
     // test
     xfreopen(tempfname, "w", stderr); // Redirect stderr to pipe
-    bool result_1 = filter_header_rg(hdr1, id_to_keep_1);
+    bool result_1 = filter_header_rg(hdr1, id_to_keep_1, arg_list);
     fclose(stderr);
 
     if (verbose) printf("END RUN test 1\n");
@@ -155,7 +159,7 @@ int main(int argc, char**argv)
 
     // test
     xfreopen(tempfname, "w", stderr); // Redirect stderr to pipe
-    bool result_2 = filter_header_rg(hdr2, id_to_keep_2);
+    bool result_2 = filter_header_rg(hdr2, id_to_keep_2, arg_list);
     fclose(stderr);
 
     if (verbose) printf("END RUN test 2\n");
@@ -185,6 +189,7 @@ int main(int argc, char**argv)
 
     // Cleanup
     free(res.s);
+    free(arg_list);
     remove(tempfname);
     if (failure > 0)
         fprintf(orig_stderr, "%d failures %d successes\n", failure, success);
diff --git a/samtools/test/split/test_filter_header_rg.c.pysam.c b/samtools/test/split/test_filter_header_rg.c.pysam.c
index 97b3573..c9284f6 100644
--- a/samtools/test/split/test_filter_header_rg.c.pysam.c
+++ b/samtools/test/split/test_filter_header_rg.c.pysam.c
@@ -44,7 +44,8 @@ void setup_test_1(bam_hdr_t** hdr_in)
 bool check_test_1(const bam_hdr_t* hdr) {
     const char *test1_res =
     "@HD\tVN:1.4\n"
-    "@SQ\tSN:blah\n";
+    "@SQ\tSN:blah\n"
+    "@PG\tID:samtools\tPN:samtools\tVN:x.y.test\tCL:test_filter_header_rg foo bar baz\n";
 
     if (strcmp(hdr->text, test1_res)) {
         return false;
@@ -67,7 +68,8 @@ bool check_test_2(const bam_hdr_t* hdr) {
     const char *test2_res =
     "@HD\tVN:1.4\n"
     "@SQ\tSN:blah\n"
-    "@RG\tID:fish\n";
+    "@RG\tID:fish\n"
+    "@PG\tID:samtools\tPN:samtools\tVN:x.y.test\tCL:test_filter_header_rg foo bar baz\n";
 
     if (strcmp(hdr->text, test2_res)) {
         return false;
@@ -75,7 +77,7 @@ bool check_test_2(const bam_hdr_t* hdr) {
     return true;
 }
 
-int samtools_test_filter_header_rg_main(int argc, char**argv)
+int samtools_test_filter_header_rg_main(int argc, char *argv[])
 {
     // test state
     const int NUM_TESTS = 2;
@@ -84,6 +86,8 @@ int samtools_test_filter_header_rg_main(int argc, char**argv)
     int failure = 0;
 
     int getopt_char;
+    char *test_argv[] = { "test_filter_header_rg", "foo\tbar", "baz" };
+    char *arg_list = stringify_argv(3, test_argv);
     while ((getopt_char = getopt(argc, argv, "v")) != -1) {
         switch (getopt_char) {
             case 'v':
@@ -118,7 +122,7 @@ int samtools_test_filter_header_rg_main(int argc, char**argv)
 
     // test
     xfreopen(tempfname, "w", pysam_stderr); // Redirect pysam_stderr to pipe
-    bool result_1 = filter_header_rg(hdr1, id_to_keep_1);
+    bool result_1 = filter_header_rg(hdr1, id_to_keep_1, arg_list);
     fclose(pysam_stderr);
 
     if (verbose) fprintf(pysam_stdout, "END RUN test 1\n");
@@ -157,7 +161,7 @@ int samtools_test_filter_header_rg_main(int argc, char**argv)
 
     // test
     xfreopen(tempfname, "w", pysam_stderr); // Redirect pysam_stderr to pipe
-    bool result_2 = filter_header_rg(hdr2, id_to_keep_2);
+    bool result_2 = filter_header_rg(hdr2, id_to_keep_2, arg_list);
     fclose(pysam_stderr);
 
     if (verbose) fprintf(pysam_stdout, "END RUN test 2\n");
@@ -187,6 +191,7 @@ int samtools_test_filter_header_rg_main(int argc, char**argv)
 
     // Cleanup
     free(res.s);
+    free(arg_list);
     remove(tempfname);
     if (failure > 0)
         fprintf(orig_pysam_stderr, "%d failures %d successes\n", failure, success);
diff --git a/samtools/test/test.c b/samtools/test/test.c
index 7ab38af..fb0b549 100644
--- a/samtools/test/test.c
+++ b/samtools/test/test.c
@@ -1,6 +1,6 @@
 /*  test/test.c -- test harness utility routines.
 
-    Copyright (C) 2014 Genome Research Ltd.
+    Copyright (C) 2014, 2016 Genome Research Ltd.
 
     Author: Martin O. Pollard <mp15 at sanger.ac.uk>
 
@@ -53,3 +53,9 @@ void dump_hdr(const bam_hdr_t* hdr)
     }
     printf("text: \"%s\"\n", hdr->text);
 }
+
+// For tests, just return a constant that can be embedded in expected output.
+const char *samtools_version(void)
+{
+    return "x.y.test";
+}
diff --git a/samtools/test/test.c.pysam.c b/samtools/test/test.c.pysam.c
index a8295b5..bf460e8 100644
--- a/samtools/test/test.c.pysam.c
+++ b/samtools/test/test.c.pysam.c
@@ -2,7 +2,7 @@
 
 /*  test/test.c -- test harness utility routines.
 
-    Copyright (C) 2014 Genome Research Ltd.
+    Copyright (C) 2014, 2016 Genome Research Ltd.
 
     Author: Martin O. Pollard <mp15 at sanger.ac.uk>
 
@@ -55,3 +55,9 @@ void dump_hdr(const bam_hdr_t* hdr)
     }
     fprintf(pysam_stdout, "text: \"%s\"\n", hdr->text);
 }
+
+// For tests, just return a constant that can be embedded in expected output.
+const char *samtools_version(void)
+{
+    return "x.y.test";
+}
diff --git a/samtools/version.h b/samtools/version.h
index ec46e67..004d7ed 100644
--- a/samtools/version.h
+++ b/samtools/version.h
@@ -1 +1 @@
-#define SAMTOOLS_VERSION "1.3.1"
+#define SAMTOOLS_VERSION "1.4.1"
diff --git a/setup.py b/setup.py
index 6d52617..5b23d20 100644
--- a/setup.py
+++ b/setup.py
@@ -13,7 +13,7 @@ This module provides a low-level wrapper around the htslib C-API as
 using cython and a high-level API for convenient access to the data
 within standard genomic file formats.
 
-The current version wraps htslib-1.3.1, samtools-1.3.1 and bcftools-1.3.1.
+The current version wraps htslib-1.4.1, samtools-1.4.1 and bcftools-1.4.1.
 
 See:
 http://www.htslib.org
@@ -78,6 +78,11 @@ def configure_library(library_dir, env_options=None, options=[]):
 
     configure_script = os.path.join(library_dir, "configure")
 
+    on_rtd = os.environ.get("READTHEDOCS") == "True"
+    # RTD has no bzip2 development libraries installed:
+    if on_rtd:
+        env_options = "--disable-bz2"
+
     if not os.path.exists(configure_script):
         raise ValueError(
             "configure script {} does not exist".format(configure_script))
@@ -246,8 +251,8 @@ elif HTSLIB_MODE == 'shared':
     # htslib built from sources included in the pysam
     # package.
     htslib_library_dirs = [
-        'pysam',
-        ".",
+        "pysam",  # when using setup.py develop?
+        ".",  # when using setup.py develop?
         os.path.join("build", distutils_dir_name("lib"), "pysam")]
 
     htslib_include_dirs = ['htslib']
@@ -255,7 +260,15 @@ elif HTSLIB_MODE == 'shared':
 else:
     raise ValueError("unknown HTSLIB value '%s'" % HTSLIB_MODE)
 
-internal_htslib_libraries = [os.path.splitext("chtslib{}".format(sysconfig.get_config_var('SO')))[0]]
+suffix = sysconfig.get_config_var('EXT_SUFFIX')
+if not suffix:
+    suffix = sysconfig.get_config_var('SO')
+internal_htslib_libraries = [os.path.splitext("chtslib{}".format(suffix))[0]]
+
+internal_tools_libraries = [
+    os.path.splitext("csamtools{}".format(suffix))[0],
+    os.path.splitext("cbcftools{}".format(suffix))[0],
+    ]
 
 # build config.py
 with open(os.path.join("pysam", "config.py"), "w") as outf:
@@ -268,7 +281,7 @@ with open(os.path.join("pysam", "config.py"), "w") as outf:
                 if line.startswith("#define"):
                     key, value = re.match(
                         "#define (\S+)\s+(\S+)", line).groups()
-                    config_values[key] = int(value)
+                    config_values[key] = value
             for key in ["ENABLE_PLUGINS",
                         "HAVE_COMMONCRYPTO",
                         "HAVE_GMTIME_R",
@@ -353,7 +366,6 @@ chtslib = Extension(
     shared_htslib_sources +
     os_c_files,
     library_dirs=htslib_library_dirs,
-    runtime_library_dirs=htslib_library_dirs,
     include_dirs=["pysam", "."] + include_os + htslib_include_dirs,
     libraries=external_htslib_libraries,
     language="c",
@@ -369,8 +381,7 @@ csamfile = Extension(
     "pysam.libcsamfile",
     [source_pattern % "samfile",
      "pysam/htslib_util.c",
-     "pysam/samfile_util.c",
-     "samtools/kprobaln.c"] +
+     "pysam/samfile_util.c"] +
     htslib_sources +
     os_c_files,
     library_dirs=htslib_library_dirs,
@@ -389,8 +400,7 @@ calignmentfile = Extension(
     "pysam.libcalignmentfile",
     [source_pattern % "alignmentfile",
      "pysam/htslib_util.c",
-     "pysam/samfile_util.c",
-     "samtools/kprobaln.c"] +
+     "pysam/samfile_util.c"] +
     htslib_sources +
     os_c_files,
     library_dirs=htslib_library_dirs,
@@ -409,8 +419,7 @@ calignedsegment = Extension(
     "pysam.libcalignedsegment",
     [source_pattern % "alignedsegment",
      "pysam/htslib_util.c",
-     "pysam/samfile_util.c",
-     "samtools/kprobaln.c"] +
+     "pysam/samfile_util.c"] +
     htslib_sources +
     os_c_files,
     library_dirs=htslib_library_dirs,
@@ -435,17 +444,45 @@ ctabix = Extension(
     define_macros=define_macros
 )
 
+
+
 cutils = Extension(
     "pysam.libcutils",
     [source_pattern % "utils", "pysam/pysam_util.c"] +
+    htslib_sources +
+    os_c_files,
+    library_dirs=["pysam"] + htslib_library_dirs,
+    include_dirs=["pysam", "."] +
+    include_os + htslib_include_dirs,
+    libraries=external_htslib_libraries + internal_htslib_libraries + internal_tools_libraries,
+    language="c",
+    extra_compile_args=extra_compile_args,
+    define_macros=define_macros
+)
+
+csamtools = Extension(
+    "pysam.libcsamtools",
+    [source_pattern % "samtools"] +
     glob.glob(os.path.join("samtools", "*.pysam.c")) +
-    # glob.glob(os.path.join("samtools", "*", "*.pysam.c")) +
+    htslib_sources +
+    os_c_files,
+    library_dirs=["pysam"] + htslib_library_dirs,
+    include_dirs=["samtools", "pysam", "."] +
+    include_os + htslib_include_dirs,
+    libraries=external_htslib_libraries + internal_htslib_libraries,
+    language="c",
+    extra_compile_args=extra_compile_args,
+    define_macros=define_macros
+)
+
+cbcftools = Extension(
+    "pysam.libcbcftools",
+    [source_pattern % "bcftools"] +
     glob.glob(os.path.join("bcftools", "*.pysam.c")) +
-    # glob.glob(os.path.join("bcftools", "*", "*.pysam.c")) +
     htslib_sources +
     os_c_files,
     library_dirs=["pysam"] + htslib_library_dirs,
-    include_dirs=["samtools", "bcftools", "pysam", "."] +
+    include_dirs=["bcftools", "pysam", "."] +
     include_os + htslib_include_dirs,
     libraries=external_htslib_libraries + internal_htslib_libraries,
     language="c",
@@ -538,6 +575,8 @@ metadata = {
                     cbcf,
                     cbgzf,
                     cfaidx,
+                    csamtools,
+                    cbcftools,
                     cutils],
     'cmdclass': cmdclass,
     'package_dir': package_dirs,
diff --git a/tests/AlignedSegment_test.py b/tests/AlignedSegment_test.py
index b0a3466..6d9101c 100644
--- a/tests/AlignedSegment_test.py
+++ b/tests/AlignedSegment_test.py
@@ -234,20 +234,46 @@ class TestAlignedSegment(ReadTest):
     def test_infer_query_length(self):
         '''Test infer_query_length on M|=|X|I|D|H|S cigar ops'''
         a = self.buildRead()
-        a.cigarstring = '15M'
-        self.assertEqual(a.infer_query_length(), 15)
-        a.cigarstring = '15='
-        self.assertEqual(a.infer_query_length(), 15)
-        a.cigarstring = '15X'
-        self.assertEqual(a.infer_query_length(), 15)
-        a.cigarstring = '5M5I5M'
-        self.assertEqual(a.infer_query_length(), 15)
-        a.cigarstring = '5M5D5M'
-        self.assertEqual(a.infer_query_length(), 10)
-        a.cigarstring = '5H10M'
-        self.assertEqual(a.infer_query_length(), 15)
-        a.cigarstring = '5S10M'
-        self.assertEqual(a.infer_query_length(), 15)
+        a.cigarstring = '40M'
+        self.assertEqual(a.infer_query_length(), 40)
+        a.cigarstring = '40='
+        self.assertEqual(a.infer_query_length(), 40)
+        a.cigarstring = '40X'
+        self.assertEqual(a.infer_query_length(), 40)
+        a.cigarstring = '20M5I20M'
+        self.assertEqual(a.infer_query_length(), 45)
+        a.cigarstring = '20M5D20M'
+        self.assertEqual(a.infer_query_length(), 40)
+        a.cigarstring = '5H35M'
+        self.assertEqual(a.infer_query_length(), 35)
+        a.cigarstring = '5S35M'
+        self.assertEqual(a.infer_query_length(), 40)
+        a.cigarstring = '35M5H'
+        self.assertEqual(a.infer_query_length(), 35)
+        a.cigarstring = '35M5S'
+        self.assertEqual(a.infer_query_length(), 40)
+
+    def test_infer_read_length(self):
+        '''Test infer_read_length on M|=|X|I|D|H|S cigar ops'''
+        a = self.buildRead()
+        a.cigarstring = '40M'
+        self.assertEqual(a.infer_read_length(), 40)
+        a.cigarstring = '40='
+        self.assertEqual(a.infer_read_length(), 40)
+        a.cigarstring = '40X'
+        self.assertEqual(a.infer_read_length(), 40)
+        a.cigarstring = '20M5I20M'
+        self.assertEqual(a.infer_read_length(), 45)
+        a.cigarstring = '20M5D20M'
+        self.assertEqual(a.infer_read_length(), 40)
+        a.cigarstring = '5H35M'
+        self.assertEqual(a.infer_read_length(), 40)
+        a.cigarstring = '5S35M'
+        self.assertEqual(a.infer_read_length(), 40)
+        a.cigarstring = '35M5H'
+        self.assertEqual(a.infer_read_length(), 40)
+        a.cigarstring = '35M5S'
+        self.assertEqual(a.infer_read_length(), 40)
 
     def test_get_aligned_pairs_soft_clipping(self):
         a = self.buildRead()
@@ -388,22 +414,28 @@ class TestAlignedSegment(ReadTest):
         self.assertEqual(a.query_alignment_length, 20)
         a.cigarstring = "20M1S"
         self.assertEqual(a.query_alignment_length, 20)
+        a.cigarstring = "20M1H"
+        self.assertEqual(a.query_alignment_length, 20)
         a.cigarstring = "1S20M"
         self.assertEqual(a.query_alignment_length, 20)
+        a.cigarstring = "1H20M"
+        self.assertEqual(a.query_alignment_length, 20)
         a.cigarstring = "1S20M1S"
         self.assertEqual(a.query_alignment_length, 20)
+        a.cigarstring = "1H20M1H"
+        self.assertEqual(a.query_alignment_length, 20)
 
     def test_query_length_is_limited(self):
         
         a = self.buildRead()
         a.query_name = "A" * 1
-        a.query_name = "A" * 254
+        a.query_name = "A" * 251
         self.assertRaises(
             ValueError,
             setattr,
             a,
             "query_name",
-            "A" * 255)
+            "A" * 252)
 
 
 class TestCigarStats(ReadTest):
@@ -785,5 +817,34 @@ class TestAsString(unittest.TestCase):
                 self.assertEqual(s, p.tostring(pysamf))
 
 
+class TestEnums(unittest.TestCase):
+    
+    def test_cigar_enums_are_defined(self):
+        self.assertEqual(pysam.CMATCH, 0)
+        self.assertEqual(pysam.CINS, 1)
+        self.assertEqual(pysam.CDEL, 2)
+        self.assertEqual(pysam.CREF_SKIP, 3)
+        self.assertEqual(pysam.CSOFT_CLIP, 4)
+        self.assertEqual(pysam.CHARD_CLIP, 5)
+        self.assertEqual(pysam.CPAD, 6)
+        self.assertEqual(pysam.CEQUAL, 7)
+        self.assertEqual(pysam.CDIFF, 8)
+        self.assertEqual(pysam.CBACK, 9)
+
+    def test_sam_flags_are_defined(self):
+        self.assertEqual(pysam.FPAIRED, 1)
+        self.assertEqual(pysam.FPROPER_PAIR, 2)
+        self.assertEqual(pysam.FUNMAP, 4)
+        self.assertEqual(pysam.FMUNMAP, 8)
+        self.assertEqual(pysam.FREVERSE, 16)
+        self.assertEqual(pysam.FMREVERSE, 32)
+        self.assertEqual(pysam.FREAD1, 64)
+        self.assertEqual(pysam.FREAD2, 128)
+        self.assertEqual(pysam.FSECONDARY, 256)
+        self.assertEqual(pysam.FQCFAIL, 512)
+        self.assertEqual(pysam.FDUP, 1024)
+        self.assertEqual(pysam.FSUPPLEMENTARY, 2048)
+
+
 if __name__ == "__main__":
     unittest.main()
diff --git a/tests/AlignmentFile_test.py b/tests/AlignmentFile_test.py
index 18fb05b..a866881 100644
--- a/tests/AlignmentFile_test.py
+++ b/tests/AlignmentFile_test.py
@@ -439,10 +439,12 @@ class TestIO(unittest.TestCase):
                   input_filename,
                   reference_filename,
                   output_filename,
-                  input_mode, output_mode,
+                  input_mode,
+                  output_mode,
                   sequence_filename=None,
                   use_template=True,
-                  checkf=checkBinaryEqual):
+                  checkf=checkBinaryEqual,
+                  **kwargs):
         '''iterate through *input_filename* writing to
         *output_filename* and comparing the output to
         *reference_filename*.
@@ -477,7 +479,7 @@ class TestIO(unittest.TestCase):
                     output_filename,
                     output_mode,
                     reference_filename=sequence_filename,
-                    template=infile)
+                    template=infile, **kwargs)
             else:
                 outfile = pysam.AlignmentFile(
                     output_filename,
@@ -485,7 +487,8 @@ class TestIO(unittest.TestCase):
                     reference_names=infile.references,
                     reference_lengths=infile.lengths,
                     reference_filename=sequence_filename,
-                    add_sq_text=False)
+                    add_sq_text=False,
+                    **kwargs)
 
             iter = infile.fetch()
 
@@ -509,6 +512,13 @@ class TestIO(unittest.TestCase):
                        "tmp_ex2.sam",
                        "r", "wh")
 
+    def testSAM2SAMWithoutHeader(self):
+        self.checkEcho("ex2.sam",
+                       "ex1.sam",
+                       "tmp_ex2.sam",
+                       "r", "w",
+                       add_sam_header=False)
+
     def testBAM2BAM(self):
         self.checkEcho("ex2.bam",
                        "ex2.bam",
@@ -588,14 +598,6 @@ class TestIO(unittest.TestCase):
     #     self.checkEcho(input_filename, reference_filename, output_filename,
     #                    "rb", "wb", use_template=False)
 
-    # Release 0.8.0
-    # no samfiles without header
-    def testSAM2SAMWithoutHeader(self):
-        self.checkEcho("ex2.sam",
-                       "ex1.sam",
-                       "tmp_ex2.sam",
-                       "r", "w")
-
     def testReadSamWithoutTargetNames(self):
         '''see issue 104.'''
         input_filename = os.path.join(
@@ -614,14 +616,12 @@ class TestIO(unittest.TestCase):
                           input_filename, "r",
                           check_header=True)
 
-        infile = pysam.AlignmentFile(
+        with pysam.AlignmentFile(
             input_filename,
             check_header=False,
-            check_sq=False)
-
-        # TODO
-        # result = list(infile.fetch(until_eof=True))
-        # self.assertEqual(2, len(result))
+            check_sq=False) as infile:
+            result = list(infile.fetch(until_eof=True))
+            self.assertEqual(2, len(result))
 
     def testReadBamWithoutTargetNames(self):
         '''see issue 104.'''
@@ -641,52 +641,43 @@ class TestIO(unittest.TestCase):
                           "r",
                           check_header=True)
 
-        infile = pysam.AlignmentFile(
-            input_filename, check_header=False, check_sq=False)
-        result = list(infile.fetch(until_eof=True))
+        with pysam.AlignmentFile(
+            input_filename, check_sq=False) as infile:
+            result = list(infile.fetch(until_eof=True))
 
-    # TODO
-    def testReadSamWithoutHeader(self):
+    def test_fail_read_sam_without_header(self):
         input_filename = os.path.join(DATADIR, "ex1.sam")
 
-        # reading from a samfile without header is not
-        # implemented
         self.assertRaises(ValueError,
                           pysam.AlignmentFile,
                           input_filename,
                           "r")
 
-        # TODO
-        # without check_header header is no read
-        # leading to segfault
-        # self.assertRaises(ValueError,
-        #                   pysam.AlignmentFile,
-        #                   input_filename,
-        #                   "r",
-        #                   check_header=False)
+    def test_pass_read_sam_without_header_with_refs(self):
+        with pysam.AlignmentFile(os.path.join(DATADIR, "ex1.sam"),
+                                 "r",
+                                 reference_names=["chr1", "chr2"],
+                                 reference_lengths=[1575, 1584]) as samfile:
+            self.assertEqual(len(list(samfile.fetch(until_eof=True))), 3270)
 
-    # TODO
-    # def testReadUnformattedFile(self):
-    #     '''test reading from a file that is not bam/sam formatted'''
-    #     input_filename = os.path.join(DATADIR, 'Makefile')
-
-    #     # bam - file raise error
-    #     self.assertRaises(ValueError,
-    #                       pysam.AlignmentFile,
-    #                       input_filename,
-    #                       "rb")
-
-    #     # sam - file error, but can't fetch
-    #     self.assertRaises(ValueError,
-    #                       pysam.AlignmentFile,
-    #                       input_filename,
-    #                       "r")
-
-    #     self.assertRaises(ValueError,
-    #                       pysam.AlignmentFile,
-    #                       input_filename,
-    #                       "r",
-    #                       check_header=False)
+    def test_pass_read_sam_with_header_without_header_check(self):
+        with pysam.AlignmentFile(os.path.join(DATADIR, "ex2.sam"),
+                                 "r", check_header=False) as samfile:
+            self.assertEqual(len(list(samfile.fetch(until_eof=True))), 3270)
+
+    def test_fail_when_reading_unformatted_files(self):
+        '''test reading from a file that is not bam/sam formatted'''
+        input_filename = os.path.join(DATADIR, 'Makefile')
+
+        self.assertRaises(ValueError,
+                          pysam.AlignmentFile,
+                          input_filename,
+                          "rb")
+
+        self.assertRaises(ValueError,
+                          pysam.AlignmentFile,
+                          input_filename,
+                          "r")
 
     def testBAMWithoutAlignedSegments(self):
         '''see issue 117'''
@@ -854,7 +845,23 @@ class TestIO(unittest.TestCase):
                                       check_sq=False)
         samfile.fetch('chr2')
 
-
+    def test_fetch_by_tid(self):
+        with pysam.AlignmentFile(os.path.join(DATADIR, "ex1.bam"), "rb") as samfile:
+            self.assertEqual(len(list(samfile.fetch('chr1'))),
+                             len(list(samfile.fetch(tid=0))))
+            self.assertEqual(len(list(samfile.fetch('chr2'))),
+                             len(list(samfile.fetch(tid=1))))
+            self.assertRaises(
+                IndexError,
+                samfile.fetch,
+                tid=2)
+            self.assertRaises(
+                IndexError,
+                samfile.fetch,
+                tid=-1)
+            self.assertEqual(len(list(samfile.fetch('chr1',start=1000, end=2000))),
+                             len(list(samfile.fetch(tid=0, start=1000, end=2000))))
+            
 
 class TestAutoDetect(unittest.TestCase):
 
@@ -1761,7 +1768,7 @@ class TestDeNovoConstruction(unittest.TestCase):
 
     #     os.unlink(tmpfilename)
 
-    def testBAMPerRead(self):
+    def test_pass_if_reads_binary_equal(self):
         '''check if individual reads are binary equal.'''
         infile = pysam.AlignmentFile(self.bamfile, "rb")
 
@@ -1846,25 +1853,17 @@ class TestTruncatedBAM(unittest.TestCase):
 
     '''see pull request 50.'''
 
-    def testTruncatedBam(self):
+    def testTruncatedBam2(self):
+        self.assertRaises(IOError,
+                          pysam.AlignmentFile,
+                          os.path.join(DATADIR, 'ex2_truncated.bam'))
 
-        s = pysam.AlignmentFile(
-            os.path.join(DATADIR, 'ex2_truncated.bam'))
+    def testTruncatedBam2(self):
+        s = pysam.AlignmentFile(os.path.join(DATADIR, 'ex2_truncated.bam'),
+                                ignore_truncation=True)
         iterall = lambda x: len([a for a in x])
         self.assertRaises(IOError, iterall, s)
 
-    def testTruncatedBamFetch(self):
-        '''See comments for pull request at 
-        https://github.com/pysam-developers/pysam/pull/50#issuecomment-64928625
-        '''
-        # Currently there is no way to detect truncated
-        # files through hts_iter_fetch, so this test is
-        # disabled
-        return
-        s = pysam.AlignmentFile(
-            os.path.join(DATADIR, 'ex2_truncated.bam'))
-        iterall = lambda x: len([a for a in x])
-        self.assertRaises(IOError, iterall, s.fetch())
 
 COMPARE_BTAG = [100, 1, 91, 0, 7, 101, 0, 201, 96, 204,
                 0, 0, 87, 109, 0, 7, 97, 112, 1, 12, 78,
diff --git a/tests/SamFile_test.py b/tests/SamFile_test.py
deleted file mode 100644
index ff13045..0000000
--- a/tests/SamFile_test.py
+++ /dev/null
@@ -1,1990 +0,0 @@
-#!/usr/bin/env python
-'''unit testing code for pysam.
-
-Execute in the :file:`tests` directory as it requires the Makefile
-and data files located there.
-'''
-
-import pysam
-import pysam.samtools
-import unittest
-import os
-import shutil
-import sys
-import collections
-import subprocess
-import logging
-import array
-from TestUtils import checkBinaryEqual, checkURL, force_str
-
-DATADIR = "pysam_data"
-
-
-class BasicTestBAMFetch(unittest.TestCase):
-
-    '''basic first test - detailed testing
-    if information in file is consistent
-    with information in AlignedRead object.'''
-
-    def setUp(self):
-        self.samfile = pysam.Samfile(
-            os.path.join(DATADIR, "ex3.bam"),
-            "rb")
-        self.reads = list(self.samfile.fetch())
-
-    def testARqname(self):
-        self.assertEqual(
-            self.reads[0].qname,
-            "read_28833_29006_6945",
-            "read name mismatch in read 1: %s != %s" % (
-                self.reads[0].qname, "read_28833_29006_6945"))
-        self.assertEqual(
-            self.reads[1].qname,
-            "read_28701_28881_323b",
-            "read name mismatch in read 2: %s != %s" % (
-                self.reads[1].qname, "read_28701_28881_323b"))
-
-    def testARflag(self):
-        self.assertEqual(
-            self.reads[0].flag, 99,
-            "flag mismatch in read 1: %s != %s" % (
-                self.reads[0].flag, 99))
-        self.assertEqual(
-            self.reads[1].flag, 147,
-            "flag mismatch in read 2: %s != %s" % (
-                self.reads[1].flag, 147))
-
-    def testARrname(self):
-        self.assertEqual(
-            self.reads[0].rname, 0,
-            "chromosome/target id mismatch in read 1: %s != %s" %
-            (self.reads[0].rname, 0))
-        self.assertEqual(
-            self.reads[1].rname, 1,
-            "chromosome/target id mismatch in read 2: %s != %s" %
-            (self.reads[1].rname, 1))
-
-    def testARpos(self):
-        self.assertEqual(
-            self.reads[0].pos, 33 - 1,
-            "mapping position mismatch in read 1: %s != %s" %
-            (self.reads[0].pos, 33 - 1))
-        self.assertEqual(
-            self.reads[1].pos, 88 - 1,
-            "mapping position mismatch in read 2: %s != %s" %
-            (self.reads[1].pos, 88 - 1))
-
-    def testARmapq(self):
-        self.assertEqual(
-            self.reads[0].mapq, 20,
-            "mapping quality mismatch in read 1: %s != %s" %
-            (self.reads[0].mapq, 20))
-        self.assertEqual(
-            self.reads[1].mapq, 30,
-            "mapping quality mismatch in read 2: %s != %s" % (
-                self.reads[1].mapq, 30))
-
-    def testARcigar(self):
-        self.assertEqual(
-            self.reads[0].cigar,
-            [(0, 10), (2, 1), (0, 25)],
-            "read name length mismatch in read 1: %s != %s" %
-            (self.reads[0].cigar, [(0, 10), (2, 1), (0, 25)]))
-        self.assertEqual(
-            self.reads[1].cigar, [(0, 35)],
-            "read name length mismatch in read 2: %s != %s" %
-            (self.reads[1].cigar, [(0, 35)]))
-
-    def testARcigarstring(self):
-        self.assertEqual(self.reads[0].cigarstring, '10M1D25M')
-        self.assertEqual(self.reads[1].cigarstring, '35M')
-
-    def testARmrnm(self):
-        self.assertEqual(
-            self.reads[0].mrnm, 0,
-            "mate reference sequence name mismatch in read 1: %s != %s" %
-            (self.reads[0].mrnm, 0))
-        self.assertEqual(
-            self.reads[1].mrnm, 1,
-            "mate reference sequence name mismatch in read 2: %s != %s" %
-            (self.reads[1].mrnm, 1))
-        self.assertEqual(
-            self.reads[0].rnext, 0,
-            "mate reference sequence name mismatch in read 1: %s != %s" %
-            (self.reads[0].rnext, 0))
-        self.assertEqual(
-            self.reads[1].rnext, 1,
-            "mate reference sequence name mismatch in read 2: %s != %s" %
-            (self.reads[1].rnext, 1))
-
-    def testARmpos(self):
-        self.assertEqual(self.reads[
-                         0].mpos, 200 - 1, "mate mapping position mismatch in read 1: %s != %s" % (self.reads[0].mpos, 200 - 1))
-        self.assertEqual(self.reads[
-                         1].mpos, 500 - 1, "mate mapping position mismatch in read 2: %s != %s" % (self.reads[1].mpos, 500 - 1))
-        self.assertEqual(self.reads[
-                         0].pnext, 200 - 1, "mate mapping position mismatch in read 1: %s != %s" % (self.reads[0].pnext, 200 - 1))
-        self.assertEqual(self.reads[
-                         1].pnext, 500 - 1, "mate mapping position mismatch in read 2: %s != %s" % (self.reads[1].pnext, 500 - 1))
-
-    def testARisize(self):
-        self.assertEqual(self.reads[0].isize, 167, "insert size mismatch in read 1: %s != %s" % (
-            self.reads[0].isize, 167))
-        self.assertEqual(self.reads[1].isize, 412, "insert size mismatch in read 2: %s != %s" % (
-            self.reads[1].isize, 412))
-        self.assertEqual(self.reads[0].tlen, 167, "insert size mismatch in read 1: %s != %s" % (
-            self.reads[0].tlen, 167))
-        self.assertEqual(self.reads[1].tlen, 412, "insert size mismatch in read 2: %s != %s" % (
-            self.reads[1].tlen, 412))
-
-    def testARseq(self):
-        self.assertEqual(self.reads[0].seq, "AGCTTAGCTAGCTACCTATATCTTGGTCTTGGCCG", "sequence mismatch in read 1: %s != %s" % (
-            self.reads[0].seq, "AGCTTAGCTAGCTACCTATATCTTGGTCTTGGCCG"))
-        self.assertEqual(self.reads[1].seq, "ACCTATATCTTGGCCTTGGCCGATGCGGCCTTGCA", "sequence size mismatch in read 2: %s != %s" % (
-            self.reads[1].seq, "ACCTATATCTTGGCCTTGGCCGATGCGGCCTTGCA"))
-        self.assertEqual(self.reads[3].seq, "AGCTTAGCTAGCTACCTATATCTTGGTCTTGGCCG", "sequence mismatch in read 4: %s != %s" % (
-            self.reads[3].seq, "AGCTTAGCTAGCTACCTATATCTTGGTCTTGGCCG"))
-
-    def testARqual(self):
-        self.assertEqual(self.reads[0].qual, "<<<<<<<<<<<<<<<<<<<<<:<9/,&,22;;<<<",
-                         "quality string mismatch in read 1: %s != %s" % (self.reads[0].qual, "<<<<<<<<<<<<<<<<<<<<<:<9/,&,22;;<<<"))
-        self.assertEqual(self.reads[1].qual, "<<<<<;<<<<7;:<<<6;<<<<<<<<<<<<7<<<<", "quality string mismatch in read 2: %s != %s" % (
-            self.reads[1].qual, "<<<<<;<<<<7;:<<<6;<<<<<<<<<<<<7<<<<"))
-        self.assertEqual(self.reads[3].qual, "<<<<<<<<<<<<<<<<<<<<<:<9/,&,22;;<<<",
-                         "quality string mismatch in read 3: %s != %s" % (self.reads[3].qual, "<<<<<<<<<<<<<<<<<<<<<:<9/,&,22;;<<<"))
-
-    def testARquery(self):
-        self.assertEqual(self.reads[0].query, "AGCTTAGCTAGCTACCTATATCTTGGTCTTGGCCG", "query mismatch in read 1: %s != %s" % (
-            self.reads[0].query, "AGCTTAGCTAGCTACCTATATCTTGGTCTTGGCCG"))
-        self.assertEqual(self.reads[1].query, "ACCTATATCTTGGCCTTGGCCGATGCGGCCTTGCA", "query size mismatch in read 2: %s != %s" % (
-            self.reads[1].query, "ACCTATATCTTGGCCTTGGCCGATGCGGCCTTGCA"))
-        self.assertEqual(self.reads[3].query, "TAGCTAGCTACCTATATCTTGGTCTT", "query mismatch in read 4: %s != %s" % (
-            self.reads[3].query, "TAGCTAGCTACCTATATCTTGGTCTT"))
-
-    def testARqqual(self):
-        self.assertEqual(
-            self.reads[0].qqual, "<<<<<<<<<<<<<<<<<<<<<:<9/,&,22;;<<<",
-            "qquality string mismatch in read 1: %s != %s" %
-            (self.reads[0].qqual, "<<<<<<<<<<<<<<<<<<<<<:<9/,&,22;;<<<"))
-        self.assertEqual(
-            self.reads[1].qqual, "<<<<<;<<<<7;:<<<6;<<<<<<<<<<<<7<<<<",
-            "qquality string mismatch in read 2: %s != %s" %
-            (self.reads[1].qqual, "<<<<<;<<<<7;:<<<6;<<<<<<<<<<<<7<<<<"))
-        self.assertEqual(
-            self.reads[3].qqual, "<<<<<<<<<<<<<<<<<:<9/,&,22",
-            "qquality string mismatch in read 3: %s != %s" %
-            (self.reads[3].qqual, "<<<<<<<<<<<<<<<<<:<9/,&,22"))
-
-    def testPresentOptionalFields(self):
-        self.assertEqual(
-            self.reads[0].opt('NM'), 1,
-            "optional field mismatch in read 1, NM: %s != %s" %
-            (self.reads[0].opt('NM'), 1))
-        self.assertEqual(
-            self.reads[0].opt('RG'), 'L1',
-            "optional field mismatch in read 1, RG: %s != %s" %
-            (self.reads[0].opt('RG'), 'L1'))
-        self.assertEqual(
-            self.reads[1].opt('RG'), 'L2',
-            "optional field mismatch in read 2, RG: %s != %s" %
-            (self.reads[1].opt('RG'), 'L2'))
-        self.assertEqual(
-            self.reads[1].opt('MF'), 18,
-            "optional field mismatch in read 2, MF: %s != %s" %
-            (self.reads[1].opt('MF'), 18))
-
-    def testPairedBools(self):
-        self.assertEqual(self.reads[0].is_paired, True,
-                         "is paired mismatch in read 1: %s != %s" % (
-            self.reads[0].is_paired, True))
-        self.assertEqual(self.reads[1].is_paired, True,
-                         "is paired mismatch in read 2: %s != %s" % (
-            self.reads[1].is_paired, True))
-        self.assertEqual(self.reads[0].is_proper_pair, True,
-                         "is proper pair mismatch in read 1: %s != %s" % (
-            self.reads[0].is_proper_pair, True))
-        self.assertEqual(self.reads[1].is_proper_pair, True,
-                         "is proper pair mismatch in read 2: %s != %s" % (
-            self.reads[1].is_proper_pair, True))
-
-    def testTags(self):
-        self.assertEqual(self.reads[0].tags,
-                         [('NM', 1), ('RG', 'L1'),
-                          ('PG', 'P1'), ('XT', 'U')])
-        self.assertEqual(self.reads[1].tags,
-                         [('MF', 18), ('RG', 'L2'),
-                          ('PG', 'P2'), ('XT', 'R')])
-
-    def testAddTags(self):
-        self.assertEqual(sorted(self.reads[0].tags),
-                         sorted([('NM', 1), ('RG', 'L1'),
-                                 ('PG', 'P1'), ('XT', 'U')]))
-
-        self.reads[0].setTag('X1', 'C')
-        self.assertEqual(sorted(self.reads[0].tags),
-                         sorted([('X1', 'C'), ('NM', 1), ('RG', 'L1'),
-                                 ('PG', 'P1'), ('XT', 'U'), ]))
-        self.reads[0].setTag('X2', 5)
-        self.assertEqual(sorted(self.reads[0].tags),
-                         sorted([('X2', 5), ('X1', 'C'),
-                                 ('NM', 1), ('RG', 'L1'),
-                                 ('PG', 'P1'), ('XT', 'U'), ]))
-        # add with replacement
-        self.reads[0].setTag('X2', 10)
-        self.assertEqual(sorted(self.reads[0].tags),
-                         sorted([('X2', 10), ('X1', 'C'),
-                                 ('NM', 1), ('RG', 'L1'),
-                                 ('PG', 'P1'), ('XT', 'U'), ]))
-
-        # add without replacement
-        self.reads[0].setTag('X2', 5, replace=False)
-        self.assertEqual(sorted(self.reads[0].tags),
-                         sorted([('X2', 10), ('X1', 'C'),
-                                 ('X2', 5),
-                                 ('NM', 1), ('RG', 'L1'),
-                                 ('PG', 'P1'), ('XT', 'U'), ]))
-
-    def testAddTagsType(self):
-        self.reads[0].tags = None
-        self.assertEqual(self.reads[0].tags, [])
-
-        self.reads[0].setTag('X1', 5.0)
-        self.reads[0].setTag('X2', "5.0")
-        self.reads[0].setTag('X3', 5)
-
-        self.assertEqual(sorted(self.reads[0].tags),
-                         sorted([('X1', 5.0),
-                                 ('X2', "5.0"),
-                                 ('X3', 5)]))
-
-        # test setting float for int value
-        self.reads[0].setTag('X4', 5, value_type='d')
-        self.assertEqual(sorted(self.reads[0].tags),
-                         sorted([('X1', 5.0),
-                                 ('X2', "5.0"),
-                                 ('X3', 5),
-                                 ('X4', 5.0)]))
-
-        # test setting int for float value - the
-        # value will be rounded.
-        self.reads[0].setTag('X5', 5.2, value_type='i')
-        self.assertEqual(sorted(self.reads[0].tags),
-                         sorted([('X1', 5.0),
-                                 ('X2', "5.0"),
-                                 ('X3', 5),
-                                 ('X4', 5.0),
-                                 ('X5', 5)]))
-
-        # test setting invalid type code
-        self.assertRaises(ValueError, self.reads[0].setTag, 'X6', 5.2, 'g')
-
-    def testTagsUpdatingFloat(self):
-        self.assertEqual(self.reads[0].tags,
-                         [('NM', 1), ('RG', 'L1'),
-                          ('PG', 'P1'), ('XT', 'U')])
-        self.reads[0].tags += [('XC', 5.0)]
-        self.assertEqual(self.reads[0].tags,
-                         [('NM', 1), ('RG', 'L1'),
-                          ('PG', 'P1'), ('XT', 'U'), ('XC', 5.0)])
-
-    def testOpt(self):
-        self.assertEqual(self.reads[0].opt("XT"), "U")
-        self.assertEqual(self.reads[1].opt("XT"), "R")
-
-    def testMissingOpt(self):
-        self.assertRaises(KeyError, self.reads[0].opt, "XP")
-
-    def testEmptyOpt(self):
-        self.assertRaises(KeyError, self.reads[2].opt, "XT")
-
-    def tearDown(self):
-        self.samfile.close()
-
-
-class BasicTestBAMFile(BasicTestBAMFetch):
-
-    def setUp(self):
-        self.samfile = pysam.Samfile(
-            os.path.join(DATADIR, "ex3.sam"),
-            "r")
-        self.reads = [r for r in self.samfile]
-
-
-class BasicTestSAMFile(BasicTestBAMFetch):
-
-    def setUp(self):
-        self.samfile = pysam.Samfile(
-            os.path.join(DATADIR, "ex3.sam"),
-            "r")
-        self.reads = [r for r in self.samfile]
-
-
-class BasicTestSAMFetch(BasicTestBAMFetch):
-
-    def setUp(self):
-        self.samfile = pysam.Samfile(
-            os.path.join(DATADIR, "ex3.sam"),
-            "r")
-        self.reads = list(self.samfile.fetch())
-
-
-# needs to be implemented
-# class TestAlignedReadFromSamWithoutHeader(TestAlignedReadFromBam):
-#
-#     def setUp(self):
-#         self.samfile=pysam.Samfile( "ex7.sam","r" )
-#         self.reads=list(self.samfile.fetch())
-
-
-class TestIO(unittest.TestCase):
-
-    '''check if reading samfile and writing a samfile are consistent.'''
-
-    def checkEcho(self,
-                  input_filename,
-                  reference_filename,
-                  output_filename,
-                  input_mode, output_mode,
-                  use_template=True):
-        '''iterate through *input_filename* writing to *output_filename* and
-        comparing the output to *reference_filename*.
-
-        The files are opened according to the *input_mode* and *output_mode*.
-
-        If *use_template* is set, the header is copied from infile
-        using the template mechanism, otherwise target names and
-        lengths are passed explicitly.
-
-        '''
-
-        infile = pysam.Samfile(os.path.join(DATADIR, input_filename),
-                               input_mode)
-        if use_template:
-            outfile = pysam.Samfile(output_filename,
-                                    output_mode,
-                                    template=infile)
-        else:
-            outfile = pysam.Samfile(output_filename,
-                                    output_mode,
-                                    referencenames=infile.references,
-                                    referencelengths=infile.lengths,
-                                    add_sq_text=False)
-
-        iter = infile.fetch()
-
-        for x in iter:
-            outfile.write(x)
-        infile.close()
-        outfile.close()
-
-        self.assertTrue(
-            checkBinaryEqual(os.path.join(DATADIR, reference_filename),
-                             output_filename),
-            "files %s and %s are not the same" % (reference_filename,
-                                                  output_filename))
-
-    def testReadWriteBam(self):
-
-        input_filename = "ex1.bam"
-        output_filename = "pysam_ex1.bam"
-        reference_filename = "ex1.bam"
-
-        self.checkEcho(input_filename, reference_filename, output_filename,
-                       "rb", "wb", use_template=True)
-
-    # Disabled - should work, files are not binary equal, but are
-    # non-binary equal:
-    # diff <(samtools view pysam_ex1.bam) <(samtools view pysam_data/ex1.bam)
-    # def testReadWriteBamWithTargetNames(self):
-    #     input_filename = "ex1.bam"
-    #     output_filename = "pysam_ex1.bam"
-    #     reference_filename = "ex1.bam"
-
-    #     self.checkEcho(input_filename, reference_filename, output_filename,
-    #                    "rb", "wb", use_template=False)
-
-    def testReadWriteSamWithHeader(self):
-
-        input_filename = "ex2.sam"
-        output_filename = "pysam_ex2.sam"
-        reference_filename = "ex2.sam"
-
-        self.checkEcho(input_filename,
-                       reference_filename,
-                       output_filename,
-                       "r", "wh")
-
-    # Release 0.8.0
-    # no samfiles without header
-    def testReadWriteSamWithoutHeader(self):
-
-        input_filename = "ex2.sam"
-        output_filename = "pysam_ex2.sam"
-        reference_filename = "ex1.sam"
-
-        self.checkEcho(input_filename,
-                       reference_filename,
-                       output_filename,
-                       "r", "w")
-
-    def testReadSamWithoutTargetNames(self):
-        '''see issue 104.'''
-        input_filename = os.path.join(DATADIR,
-                                      "example_unmapped_reads_no_sq.sam")
-
-        # raise exception in default mode
-        self.assertRaises(ValueError, pysam.Samfile, input_filename, "r")
-
-        # raise exception if no SQ files
-        self.assertRaises(ValueError, pysam.Samfile,
-                          input_filename, "r",
-                          check_header=True)
-
-        infile = pysam.Samfile(
-            input_filename,
-            check_header=False,
-            check_sq=False)
-
-        # TODO
-        # result = list(infile.fetch(until_eof=True))
-        # self.assertEqual(2, len(result))
-
-    def testReadBamWithoutTargetNames(self):
-        '''see issue 104.'''
-        input_filename = os.path.join(
-            DATADIR, "example_unmapped_reads_no_sq.bam")
-
-        # raise exception in default mode
-        self.assertRaises(ValueError, pysam.Samfile, input_filename, "r")
-
-        # raise exception if no SQ files
-        self.assertRaises(ValueError, pysam.Samfile, input_filename, "r",
-                          check_header=True)
-
-        infile = pysam.Samfile(
-            input_filename, check_header=False, check_sq=False)
-        result = list(infile.fetch(until_eof=True))
-
-    # TODO
-    def testReadSamWithoutHeader(self):
-        input_filename = os.path.join(DATADIR, "ex1.sam")
-
-        # reading from a samfile without header is not
-        # implemented
-        self.assertRaises(ValueError,
-                          pysam.Samfile,
-                          input_filename,
-                          "r")
-
-        # TODO
-        # without check_header header is no read
-        # leading to segfault
-        # self.assertRaises(ValueError,
-        #                   pysam.Samfile,
-        #                   input_filename,
-        #                   "r",
-        #                   check_header=False)
-
-    # TODO
-    # def testReadUnformattedFile(self):
-    #     '''test reading from a file that is not bam/sam formatted'''
-    #     input_filename = os.path.join(DATADIR, 'Makefile')
-
-    #     # bam - file raise error
-    #     self.assertRaises(ValueError,
-    #                       pysam.Samfile,
-    #                       input_filename,
-    #                       "rb")
-
-    #     # sam - file error, but can't fetch
-    #     self.assertRaises(ValueError,
-    #                       pysam.Samfile,
-    #                       input_filename,
-    #                       "r")
-
-    #     self.assertRaises(ValueError,
-    #                       pysam.Samfile,
-    #                       input_filename,
-    #                       "r",
-    #                       check_header=False)
-
-    def testBAMWithoutAlignedReads(self):
-        '''see issue 117'''
-        input_filename = os.path.join(DATADIR, "test_unaligned.bam")
-        samfile = pysam.Samfile(input_filename, "rb", check_sq=False)
-        samfile.fetch(until_eof=True)
-
-    def testBAMWithShortBAI(self):
-        '''see issue 116'''
-        input_filename = os.path.join(DATADIR, "example_bai.bam")
-        samfile = pysam.Samfile(input_filename, "rb", check_sq=False)
-        samfile.fetch('chr2')
-
-    def testFetchFromClosedFile(self):
-
-        samfile = pysam.Samfile(os.path.join(DATADIR, "ex1.bam"),
-                                "rb")
-        samfile.close()
-        self.assertRaises(ValueError, samfile.fetch, 'chr1', 100, 120)
-
-    def testClosedFile(self):
-        '''test that access to a closed samfile raises ValueError.'''
-
-        samfile = pysam.Samfile(os.path.join(DATADIR, "ex1.bam"),
-                                "rb")
-        samfile.close()
-        self.assertRaises(ValueError, samfile.fetch, 'chr1', 100, 120)
-        self.assertRaises(ValueError, samfile.pileup, 'chr1', 100, 120)
-        self.assertRaises(ValueError, samfile.getrname, 0)
-        # TODO
-        self.assertRaises(ValueError, samfile.tell)
-        self.assertRaises(ValueError, samfile.seek, 0)
-        self.assertRaises(ValueError, getattr, samfile, "nreferences")
-        self.assertRaises(ValueError, getattr, samfile, "references")
-        self.assertRaises(ValueError, getattr, samfile, "lengths")
-        self.assertRaises(ValueError, getattr, samfile, "text")
-        self.assertRaises(ValueError, getattr, samfile, "header")
-
-        # write on closed file
-        self.assertEqual(0, samfile.write(None))
-
-    def testAutoDetection(self):
-        '''test if autodetection works.'''
-
-        # TODO
-        # samfile = pysam.Samfile(os.path.join(DATADIR, "ex3.sam"))
-        # self.assertRaises(ValueError, samfile.fetch, 'chr1')
-        # samfile.close()
-
-        samfile = pysam.Samfile(os.path.join(DATADIR, "ex3.bam"))
-        samfile.fetch('chr1')
-        samfile.close()
-
-    # TOOD
-    # def testReadingFromSamFileWithoutHeader(self):
-    #     '''read from samfile without header.
-    #     '''
-    #     samfile = pysam.Samfile(os.path.join(DATADIR, "ex7.sam"),
-    #                             check_header=False,
-    #                             check_sq=False)
-    #     self.assertRaises(NotImplementedError, samfile.__iter__)
-
-    def testReadingFromFileWithoutIndex(self):
-        '''read from bam file without index.'''
-
-        shutil.copyfile(os.path.join(DATADIR, "ex2.bam"), 'tmp_ex2.bam')
-        samfile = pysam.Samfile('tmp_ex2.bam',
-                                "rb")
-        self.assertRaises(ValueError, samfile.fetch)
-        self.assertEqual(len(list(samfile.fetch(until_eof=True))),
-                         3270)
-        os.unlink('tmp_ex2.bam')
-
-    # def testReadingUniversalFileMode(self):
-    #     '''read from samfile without header.
-    #     '''
-
-    #     input_filename = "ex2.sam"
-    #     output_filename = "pysam_ex2.sam"
-    #     reference_filename = "ex1.sam"
-
-    #     self.checkEcho(input_filename,
-    #                    reference_filename,
-    #                    output_filename,
-    #                    "rU", "w")
-
-    def testHead(self):
-        '''test IteratorRowHead'''
-        samfile = pysam.Samfile(os.path.join(DATADIR, "ex1.bam"),
-                                "rb")
-        l10 = list(samfile.head(10))
-        l100 = list(samfile.head(100))
-        self.assertEqual(len(l10), 10)
-        self.assertEqual(len(l100), 100)
-        self.assertEqual(list(map(str, l10)),
-                         list(map(str, l100[:10])))
-
-
-class TestFloatTagBug(unittest.TestCase):
-
-    '''see issue 71'''
-
-    def testFloatTagBug(self):
-        '''a float tag before another exposed a parsing bug in bam_aux_get.
-
-        Fixed in 0.1.19
-        '''
-        samfile = pysam.Samfile(os.path.join(DATADIR, "tag_bug.bam"))
-        read = next(samfile.fetch(until_eof=True))
-        self.assertTrue(('XC', 1) in read.tags)
-        self.assertEqual(read.opt('XC'), 1)
-
-
-class TestLargeFieldBug(unittest.TestCase):
-
-    '''see issue 100'''
-
-    def testLargeFileBug(self):
-        '''when creating a read with a large entry in the tag field
-        causes an errror:
-            NotImplementedError: tags field too large
-        '''
-        samfile = pysam.Samfile(os.path.join(DATADIR, "issue100.bam"))
-        read = next(samfile.fetch(until_eof=True))
-        new_read = pysam.AlignedRead()
-        new_read.tags = read.tags
-        self.assertEqual(new_read.tags, read.tags)
-
-
-class TestTagParsing(unittest.TestCase):
-
-    '''tests checking the accuracy of tag setting and retrieval.'''
-
-    def makeRead(self):
-        a = pysam.AlignedRead()
-        a.qname = "read_12345"
-        a.tid = 0
-        a.seq = "ACGT" * 3
-        a.flag = 0
-        a.rname = 0
-        a.pos = 1
-        a.mapq = 20
-        a.cigar = ((0, 10), (2, 1), (0, 25))
-        a.mrnm = 0
-        a.mpos = 200
-        a.isize = 0
-        a.qual = "1234" * 3
-        # todo: create tags
-        return a
-
-    def testNegativeIntegers(self):
-        x = -2
-        aligned_read = self.makeRead()
-        aligned_read.tags = [("XD", int(x))]
-        # print (aligned_read.tags)
-
-    def testNegativeIntegers2(self):
-        x = -2
-        r = self.makeRead()
-        r.tags = [("XD", int(x))]
-        outfile = pysam.Samfile("test.bam",
-                                "wb",
-                                referencenames=("chr1",),
-                                referencelengths = (1000,))
-        outfile.write(r)
-        outfile.close()
-
-    def testCigarString(self):
-        r = self.makeRead()
-        self.assertEqual(r.cigarstring, "10M1D25M")
-        r.cigarstring = "20M10D20M"
-        self.assertEqual(r.cigar, [(0, 20), (2, 10), (0, 20)])
-        # unsetting cigar string
-        r.cigarstring = None
-        self.assertEqual(r.cigarstring, None)
-
-    def testCigar(self):
-        r = self.makeRead()
-        self.assertEqual(r.cigar, [(0, 10), (2, 1), (0, 25)])
-        # unsetting cigar string
-        r.cigar = None
-        self.assertEqual(r.cigar, [])
-
-    def testLongTags(self):
-        '''see issue 115'''
-
-        r = self.makeRead()
-        rg = 'HS2000-899_199.L3'
-        tags = [('XC', 85), ('XT', 'M'), ('NM', 5),
-                ('SM', 29), ('AM', 29), ('XM', 1),
-                ('XO', 1), ('XG', 4), ('MD', '37^ACCC29T18'),
-                ('XA', '5,+11707,36M1I48M,2;21,-48119779,46M1I38M,2;hs37d5,-10060835,40M1D45M,3;5,+11508,36M1I48M,3;hs37d5,+6743812,36M1I48M,3;19,-59118894,46M1I38M,3;4,-191044002,6M1I78M,3;')]
-
-        r.tags = tags
-        r.tags += [("RG", rg)] * 100
-        tags += [("RG", rg)] * 100
-
-        self.assertEqual(tags, r.tags)
-
-
-class TestClipping(unittest.TestCase):
-
-    def testClipping(self):
-
-        self.samfile = pysam.Samfile(os.path.join(DATADIR, "softclip.bam"),
-                                     "rb")
-        for read in self.samfile:
-
-            if read.qname == "r001":
-                self.assertEqual(read.seq, 'AAAAGATAAGGATA')
-                self.assertEqual(read.query, 'AGATAAGGATA')
-                self.assertEqual(read.qual, None)
-                self.assertEqual(read.qqual, None)
-
-            elif read.qname == "r002":
-
-                self.assertEqual(read.seq, 'GCCTAAGCTAA')
-                self.assertEqual(read.query, 'AGCTAA')
-                self.assertEqual(read.qual, '01234567890')
-                self.assertEqual(read.qqual, '567890')
-
-            elif read.qname == "r003":
-
-                self.assertEqual(read.seq, 'GCCTAAGCTAA')
-                self.assertEqual(read.query, 'GCCTAA')
-                self.assertEqual(read.qual, '01234567890')
-                self.assertEqual(read.qqual, '012345')
-
-            elif read.qname == "r004":
-
-                self.assertEqual(read.seq, 'TAGGC')
-                self.assertEqual(read.query, 'TAGGC')
-                self.assertEqual(read.qual, '01234')
-                self.assertEqual(read.qqual, '01234')
-
-
-class TestIteratorRow(unittest.TestCase):
-
-    def setUp(self):
-        self.samfile = pysam.Samfile(os.path.join(DATADIR, "ex1.bam"),
-                                     "rb")
-
-    def checkRange(self, rnge):
-        '''compare results from iterator with those from samtools.'''
-        ps = list(self.samfile.fetch(region=rnge))
-        sa = force_str(
-            pysam.samtools.view(
-                os.path.join(DATADIR, "ex1.bam"),
-                rnge,
-                raw=True)).splitlines(True)
-        self.assertEqual(
-            len(ps), len(sa),
-            "unequal number of results for range %s: %i != %i" %
-            (rnge, len(ps), len(sa)))
-        # check if the same reads are returned and in the same order
-        for line, (a, b) in enumerate(list(zip(ps, sa))):
-            d = b.split("\t")
-            self.assertEqual(
-                a.qname, d[0],
-                "line %i: read id mismatch: %s != %s" %
-                (line, a.rname, d[0]))
-            self.assertEqual(
-                a.pos, int(d[3]) - 1,
-                "line %i: read position mismatch: %s != %s, "
-                "\n%s\n%s\n" %
-                (line, a.pos, int(d[3]) - 1,
-                 str(a), str(d)))
-            qual = d[10]
-            self.assertEqual(
-                a.qual, qual,
-                "line %i: quality mismatch: %s != %s, \n%s\n%s\n" %
-                (line, a.qual, qual,
-                 str(a), str(d)))
-
-    def testIteratePerContig(self):
-        '''check random access per contig'''
-        for contig in self.samfile.references:
-            self.checkRange(contig)
-
-    def testIterateRanges(self):
-        '''check random access per range'''
-        for contig, length in zip(self.samfile.references, self.samfile.lengths):
-            for start in range(1, length, 90):
-                # this includes empty ranges
-                self.checkRange("%s:%i-%i" % (contig, start, start + 90))
-
-    def tearDown(self):
-        self.samfile.close()
-
-
-class TestIteratorRowAll(unittest.TestCase):
-
-    def setUp(self):
-        self.samfile = pysam.Samfile(os.path.join(DATADIR, "ex1.bam"),
-                                     "rb")
-
-    def testIterate(self):
-        '''compare results from iterator with those from samtools.'''
-        ps = list(self.samfile.fetch())
-        sa = force_str(
-            pysam.samtools.view(
-                os.path.join(DATADIR, "ex1.bam"),
-                raw=True)).splitlines(True)
-
-        self.assertEqual(
-            len(ps), len(sa), "unequal number of results: %i != %i" % (len(ps), len(sa)))
-        # check if the same reads are returned
-        for line, pair in enumerate(list(zip(ps, sa))):
-            data = pair[1].split("\t")
-            self.assertEqual(pair[0].qname, data[
-                             0], "read id mismatch in line %i: %s != %s" % (line, pair[0].rname, data[0]))
-
-    def tearDown(self):
-        self.samfile.close()
-
-
-class TestIteratorColumn(unittest.TestCase):
-
-    '''test iterator column against contents of ex4.bam.'''
-
-    # note that samfile contains 1-based coordinates
-    # 1D means deletion with respect to reference sequence
-    #
-    mCoverages = {'chr1': [0] * 20 + [1] * 36 + [0] * (100 - 20 - 35),
-                  'chr2': [0] * 20 + [1] * 35 + [0] * (100 - 20 - 35),
-                  }
-
-    def setUp(self):
-        self.samfile = pysam.Samfile(os.path.join(DATADIR, "ex4.bam"),
-                                     "rb")
-
-    def checkRange(self, contig, start=None, end=None, truncate=False):
-        '''compare results from iterator with those from samtools.'''
-        # check if the same reads are returned and in the same order
-        for column in self.samfile.pileup(contig, start, end,
-                                          truncate=truncate):
-            if truncate:
-                self.assertGreaterEqual(column.pos, start)
-                self.assertLess(column.pos, end)
-            thiscov = len(column.pileups)
-            refcov = self.mCoverages[
-                self.samfile.getrname(column.tid)][column.pos]
-            self.assertEqual(
-                thiscov, refcov, "wrong coverage at pos %s:%i %i should be %i" % (
-                    self.samfile.getrname(column.tid), column.pos, thiscov, refcov))
-
-    def testIterateAll(self):
-        '''check random access per contig'''
-        self.checkRange(None)
-
-    def testIteratePerContig(self):
-        '''check random access per contig'''
-        for contig in self.samfile.references:
-            self.checkRange(contig)
-
-    def testIterateRanges(self):
-        '''check random access per range'''
-        for contig, length in zip(
-                self.samfile.references, self.samfile.lengths):
-            for start in range(1, length, 90):
-                # this includes empty ranges
-                self.checkRange(contig, start, start + 90)
-
-    def testInverse(self):
-        '''test the inverse, is point-wise pileup accurate.'''
-        for contig, refseq in list(self.mCoverages.items()):
-            refcolumns = sum(refseq)
-            for pos, refcov in enumerate(refseq):
-                columns = list(self.samfile.pileup(contig, pos, pos + 1))
-                if refcov == 0:
-                    # if no read, no coverage
-                    self.assertEqual(
-                        len(columns),
-                        refcov,
-                        "wrong number of pileup columns returned for position %s:%i, %i should be %i" % (
-                            contig, pos,
-                            len(columns), refcov))
-                elif refcov == 1:
-                    # one read, all columns of the read are returned
-                    self.assertEqual(
-                        len(columns),
-                        refcolumns,
-                        "pileup incomplete at position %i: got %i, expected %i " %
-                        (pos, len(columns), refcolumns))
-
-    def testIterateTruncate(self):
-        '''check random access per range'''
-        for contig, length in zip(self.samfile.references, self.samfile.lengths):
-            for start in range(1, length, 90):
-                # this includes empty ranges
-                self.checkRange(contig, start, start + 90, truncate=True)
-
-    def tearDown(self):
-        self.samfile.close()
-
-
-class TestIteratorColumn2(unittest.TestCase):
-
-    '''test iterator column against contents of ex1.bam.'''
-
-    def setUp(self):
-        self.samfile = pysam.Samfile(os.path.join(DATADIR, "ex1.bam"),
-                                     "rb")
-
-    def testStart(self):
-        # print self.samfile.fetch().next().pos
-        # print self.samfile.pileup().next().pos
-        pass
-
-    def testTruncate(self):
-        '''see issue 107.'''
-        # note that ranges in regions start from 1
-        p = self.samfile.pileup(region='chr1:170:172', truncate=True)
-        columns = [x.pos for x in p]
-        self.assertEqual(len(columns), 3)
-        self.assertEqual(columns, [169, 170, 171])
-
-        p = self.samfile.pileup('chr1', 169, 172, truncate=True)
-        columns = [x.pos for x in p]
-
-        self.assertEqual(len(columns), 3)
-        self.assertEqual(columns, [169, 170, 171])
-
-    def testAccessOnClosedIterator(self):
-        '''see issue 131
-
-        Accessing pileup data after iterator has closed.
-        '''
-        pcolumn = self.samfile.pileup('chr1', 170, 180).__next__()
-        self.assertRaises(ValueError, getattr, pcolumn, "pileups")
-
-
-class TestHeaderSam(unittest.TestCase):
-
-    header = {'SQ': [{'LN': 1575, 'SN': 'chr1', 'AH': 'chr1:5000000-5010000'},
-                     {'LN': 1584, 'SN': 'chr2', 'AH': '*'}],
-              'RG': [{'LB': 'SC_1', 'ID': 'L1', 'SM': 'NA12891', 'PU': 'SC_1_10', "CN": "name:with:colon"},
-                     {'LB': 'SC_2', 'ID': 'L2', 'SM': 'NA12891', 'PU': 'SC_2_12', "CN": "name:with:colon"}],
-              'PG': [{'ID': 'P1', 'VN': '1.0'}, {'ID': 'P2', 'VN': '1.1'}],
-              'HD': {'VN': '1.0'},
-              'CO': ['this is a comment', 'this is another comment'],
-              }
-
-    def compareHeaders(self, a, b):
-        '''compare two headers a and b.'''
-        for ak, av in a.items():
-            self.assertTrue(ak in b, "key '%s' not in '%s' " % (ak, b))
-            self.assertEqual(av, b[ak])
-
-    def setUp(self):
-        self.samfile = pysam.Samfile(os.path.join(DATADIR, "ex3.sam"),
-                                     "r")
-
-    def testHeaders(self):
-        self.compareHeaders(self.header, self.samfile.header)
-        self.compareHeaders(self.samfile.header, self.header)
-
-    def testNameMapping(self):
-        for x, y in enumerate(("chr1", "chr2")):
-            tid = self.samfile.gettid(y)
-            ref = self.samfile.getrname(x)
-            self.assertEqual(tid, x)
-            self.assertEqual(ref, y)
-
-        self.assertEqual(self.samfile.gettid("chr?"), -1)
-        self.assertRaises(ValueError, self.samfile.getrname, 2)
-
-    def tearDown(self):
-        self.samfile.close()
-
-
-class TestHeaderBam(TestHeaderSam):
-
-    def setUp(self):
-        self.samfile = pysam.Samfile(os.path.join(DATADIR, "ex3.bam"),
-                                     "rb")
-
-
-class TestHeaderFromRefs(unittest.TestCase):
-
-    '''see issue 144
-
-    reference names need to be converted to string for python 3
-    '''
-
-    # def testHeader( self ):
-    #     refs = ['chr1', 'chr2']
-    #     tmpfile = "tmp_%i" % id(self)
-    #     s = pysam.Samfile(tmpfile, 'wb',
-    #                       referencenames=refs,
-    #                       referencelengths=[100]*len(refs))
-    #     s.close()
-
-    #     self.assertTrue( checkBinaryEqual( 'issue144.bam', tmpfile ),
-    #                      'bam files differ')
-    #     os.unlink( tmpfile )
-
-
-class TestHeader1000Genomes(unittest.TestCase):
-
-    '''see issue 110'''
-    # bamfile = "http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/phase2b_alignment/data/NA07048/exome_alignment/NA07048.unmapped.ILLUMINA.bwa.CEU.exome.20120522_p2b.bam"
-    bamfile = "http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/phase3_EX_or_LC_only_alignment/data/HG00104/alignment/HG00104.chrom11.ILLUMINA.bwa.GBR.low_coverage.20130415.bam"
-
-    def testRead(self):
-
-        if not checkURL(self.bamfile):
-            return
-
-        f = pysam.Samfile(self.bamfile, "rb")
-        data = f.header.copy()
-        self.assertTrue(data)
-
-
-class TestUnmappedReads(unittest.TestCase):
-
-    # TODO
-    # def testSAM(self):
-    #     samfile = pysam.Samfile(os.path.join(DATADIR, "ex5.sam"),
-    #                             "r")
-    #     self.assertEqual(len(list(samfile.fetch(until_eof=True))), 2)
-    #     samfile.close()
-
-    def testBAM(self):
-        samfile = pysam.Samfile(os.path.join(DATADIR, "ex5.bam"),
-                                "rb")
-        self.assertEqual(len(list(samfile.fetch(until_eof=True))), 2)
-        samfile.close()
-
-
-class TestPileupObjects(unittest.TestCase):
-
-    def setUp(self):
-        self.samfile = pysam.Samfile(os.path.join(DATADIR, "ex1.bam"),
-                                     "rb")
-
-    def testPileupColumn(self):
-        for pcolumn1 in self.samfile.pileup(region="chr1:105"):
-            if pcolumn1.pos == 104:
-                self.assertEqual(
-                    pcolumn1.tid, 0, "chromosome/target id mismatch in position 1: %s != %s" % (pcolumn1.tid, 0))
-                self.assertEqual(
-                    pcolumn1.pos, 105 - 1, "position mismatch in position 1: %s != %s" % (pcolumn1.pos, 105 - 1))
-                self.assertEqual(
-                    pcolumn1.n, 2, "# reads mismatch in position 1: %s != %s" % (pcolumn1.n, 2))
-        for pcolumn2 in self.samfile.pileup(region="chr2:1480"):
-            if pcolumn2.pos == 1479:
-                self.assertEqual(
-                    pcolumn2.tid, 1, "chromosome/target id mismatch in position 1: %s != %s" % (pcolumn2.tid, 1))
-                self.assertEqual(
-                    pcolumn2.pos, 1480 - 1, "position mismatch in position 1: %s != %s" % (pcolumn2.pos, 1480 - 1))
-                self.assertEqual(
-                    pcolumn2.n, 12, "# reads mismatch in position 1: %s != %s" % (pcolumn2.n, 12))
-
-    def testPileupRead(self):
-        for pcolumn1 in self.samfile.pileup(region="chr1:105"):
-            if pcolumn1.pos == 104:
-                self.assertEqual(
-                    len(pcolumn1.pileups), 2,
-                    "# reads aligned to column mismatch in position 1"
-                    ": %s != %s" %
-                    (len(pcolumn1.pileups), 2))
-
-
-# self.assertEqual( pcolumn1.pileups[0]  # need to test additional
-# properties here
-
-    def tearDown(self):
-        self.samfile.close()
-
-    def testIteratorOutOfScope(self):
-        '''test if exception is raised if pileup col is accessed after
-        iterator is exhausted.'''
-
-        for pileupcol in self.samfile.pileup():
-            pass
-
-        self.assertRaises(ValueError, getattr, pileupcol, "pileups")
-
-
-class TestContextManager(unittest.TestCase):
-
-    def testManager(self):
-        with pysam.Samfile(os.path.join(DATADIR, 'ex1.bam'),
-                           'rb') as samfile:
-            samfile.fetch()
-        self.assertEqual(samfile.closed, True)
-
-
-class TestExceptions(unittest.TestCase):
-
-    def setUp(self):
-        self.samfile = pysam.Samfile(os.path.join(DATADIR, "ex1.bam"),
-                                     "rb")
-
-    def testMissingFile(self):
-
-        self.assertRaises(IOError, pysam.Samfile, "exdoesntexist.bam", "rb")
-        self.assertRaises(IOError, pysam.Samfile, "exdoesntexist.sam", "r")
-        self.assertRaises(IOError, pysam.Samfile, "exdoesntexist.bam", "r")
-        self.assertRaises(IOError, pysam.Samfile, "exdoesntexist.sam", "rb")
-
-    def testBadContig(self):
-        self.assertRaises(ValueError, self.samfile.fetch, "chr88")
-
-    def testMeaninglessCrap(self):
-        self.assertRaises(ValueError, self.samfile.fetch, "skljf")
-
-    def testBackwardsOrderNewFormat(self):
-        self.assertRaises(ValueError, self.samfile.fetch, 'chr1', 100, 10)
-
-    def testBackwardsOrderOldFormat(self):
-        self.assertRaises(ValueError, self.samfile.fetch, region="chr1:100-10")
-
-    def testOutOfRangeNegativeNewFormat(self):
-        self.assertRaises(ValueError, self.samfile.fetch, "chr1", 5, -10)
-        self.assertRaises(ValueError, self.samfile.fetch, "chr1", 5, 0)
-        self.assertRaises(ValueError, self.samfile.fetch, "chr1", -5, -10)
-
-        self.assertRaises(ValueError, self.samfile.count, "chr1", 5, -10)
-        self.assertRaises(ValueError, self.samfile.count, "chr1", 5, 0)
-        self.assertRaises(ValueError, self.samfile.count, "chr1", -5, -10)
-
-    def testOutOfRangeNegativeOldFormat(self):
-        self.assertRaises(ValueError, self.samfile.fetch, region="chr1:-5-10")
-        self.assertRaises(ValueError, self.samfile.fetch, region="chr1:-5-0")
-        self.assertRaises(ValueError, self.samfile.fetch, region="chr1:-5--10")
-
-        self.assertRaises(ValueError, self.samfile.count, region="chr1:-5-10")
-        self.assertRaises(ValueError, self.samfile.count, region="chr1:-5-0")
-        self.assertRaises(ValueError, self.samfile.count, region="chr1:-5--10")
-
-    def testOutOfRangNewFormat(self):
-        self.assertRaises(
-            ValueError, self.samfile.fetch, "chr1", 9999999999, 99999999999)
-        self.assertRaises(
-            ValueError, self.samfile.count, "chr1", 9999999999, 99999999999)
-
-    def testOutOfRangeLargeNewFormat(self):
-        self.assertRaises(ValueError, self.samfile.fetch, "chr1",
-                          9999999999999999999999999999999, 9999999999999999999999999999999999999999)
-        self.assertRaises(ValueError, self.samfile.count, "chr1",
-                          9999999999999999999999999999999, 9999999999999999999999999999999999999999)
-
-    def testOutOfRangeLargeOldFormat(self):
-        self.assertRaises(
-            ValueError, self.samfile.fetch, "chr1:99999999999999999-999999999999999999")
-        self.assertRaises(
-            ValueError, self.samfile.count, "chr1:99999999999999999-999999999999999999")
-
-    def testZeroToZero(self):
-        '''see issue 44'''
-        self.assertEqual(len(list(self.samfile.fetch('chr1', 0, 0))), 0)
-
-    def tearDown(self):
-        self.samfile.close()
-
-
-class TestWrongFormat(unittest.TestCase):
-
-    '''test cases for opening files not in bam/sam format.'''
-
-    def testOpenSamAsBam(self):
-        self.assertRaises(ValueError,
-                          pysam.Samfile,
-                          os.path.join(DATADIR, 'ex1.sam'),
-                          'rb')
-
-    def testOpenBamAsSam(self):
-        # test fails, needs to be implemented.
-        # sam.fetch() fails on reading, not on opening
-        # self.assertRaises( ValueError, pysam.Samfile, 'ex1.bam', 'r' )
-        pass
-
-    def testOpenFastaAsSam(self):
-        # test fails, needs to be implemented.
-        # sam.fetch() fails on reading, not on opening
-        # self.assertRaises( ValueError, pysam.Samfile, 'ex1.fa', 'r' )
-        pass
-
-    def testOpenFastaAsBam(self):
-        self.assertRaises(ValueError,
-                          pysam.Samfile,
-                          os.path.join(DATADIR, 'ex1.fa'),
-                          'rb')
-
-
-class ReadTest(unittest.TestCase):
-
-    def checkFieldEqual(self, read1, read2, exclude=[]):
-        '''check if two reads are equal by comparing each field.'''
-
-        # add the . for refactoring purposes.
-        for x in (".qname", ".seq", ".flag",
-                  ".rname", ".pos", ".mapq", ".cigar",
-                  ".mrnm", ".mpos", ".isize",
-                  ".qual",
-                  ".bin",
-                  ".is_paired", ".is_proper_pair",
-                  ".is_unmapped", ".mate_is_unmapped",
-                  ".is_reverse", ".mate_is_reverse",
-                  ".is_read1", ".is_read2",
-                  ".is_secondary", ".is_qcfail",
-                  ".is_duplicate"):
-            n = x[1:]
-            if n in exclude:
-                continue
-            self.assertEqual(getattr(read1, n), getattr(read2, n),
-                             "attribute mismatch for %s: %s != %s" %
-                             (n, getattr(read1, n), getattr(read2, n)))
-
-
-class TestAlignedRead(ReadTest):
-
-    '''tests to check if aligned read can be constructed
-    and manipulated.
-    '''
-
-    def testEmpty(self):
-        a = pysam.AlignedRead()
-        self.assertEqual(a.qname, None)
-        self.assertEqual(a.seq, None)
-        self.assertEqual(a.qual, None)
-        self.assertEqual(a.flag, 0)
-        self.assertEqual(a.rname, -1)
-        self.assertEqual(a.mapq, 0)
-        self.assertEqual(a.cigar, [])
-        self.assertEqual(a.tags, [])
-        self.assertEqual(a.mrnm, -1)
-        self.assertEqual(a.mpos, -1)
-        self.assertEqual(a.isize, 0)
-
-    def testStrOfEmptyRead(self):
-        a = pysam.AlignedRead()
-        s = str(a)
-        self.assertEqual(
-            "None\t0\t-1\t-1\t0\tNone\t-1\t-1\t0\tNone\tNone\t[]",
-            s)
-
-    def buildRead(self):
-        '''build an example read.'''
-
-        a = pysam.AlignedRead()
-        a.qname = "read_12345"
-        a.seq = "ACGT" * 10
-        a.flag = 0
-        a.rname = 0
-        a.pos = 20
-        a.mapq = 20
-        a.cigar = ((0, 10), (2, 1), (0, 9), (1, 1), (0, 20))
-        a.mrnm = 0
-        a.mpos = 200
-        a.isize = 167
-        a.qual = "1234" * 10
-        # todo: create tags
-        return a
-
-    def testUpdate(self):
-        '''check if updating fields affects other variable length data
-        '''
-        a = self.buildRead()
-        b = self.buildRead()
-
-        # check qname
-        b.qname = "read_123"
-        self.checkFieldEqual(a, b, "qname")
-        b.qname = "read_12345678"
-        self.checkFieldEqual(a, b, "qname")
-        b.qname = "read_12345"
-        self.checkFieldEqual(a, b)
-
-        # check cigar
-        b.cigar = ((0, 10), )
-        self.checkFieldEqual(a, b, "cigar")
-        b.cigar = ((0, 10), (2, 1), (0, 10))
-        self.checkFieldEqual(a, b, "cigar")
-        b.cigar = ((0, 10), (2, 1), (0, 9), (1, 1), (0, 20))
-        self.checkFieldEqual(a, b)
-
-        # check seq
-        b.seq = "ACGT"
-        self.checkFieldEqual(a, b, ("seq", "qual"))
-        b.seq = "ACGT" * 3
-        self.checkFieldEqual(a, b, ("seq", "qual"))
-        b.seq = "ACGT" * 10
-        self.checkFieldEqual(a, b, ("qual",))
-
-        # reset qual
-        b = self.buildRead()
-
-        # check flags:
-        for x in (
-                "is_paired", "is_proper_pair",
-                "is_unmapped", "mate_is_unmapped",
-                "is_reverse", "mate_is_reverse",
-                "is_read1", "is_read2",
-                "is_secondary", "is_qcfail",
-                "is_duplicate"):
-            setattr(b, x, True)
-            self.assertEqual(getattr(b, x), True)
-            self.checkFieldEqual(a, b, ("flag", x,))
-            setattr(b, x, False)
-            self.assertEqual(getattr(b, x), False)
-            self.checkFieldEqual(a, b)
-
-    def testUpdate2(self):
-        '''issue 135: inplace update of sequence and quality score.
-
-        This does not work as setting the sequence will erase
-        the quality scores.
-        '''
-        a = self.buildRead()
-        a.seq = a.seq[5:10]
-        self.assertEqual(a.qual, None)
-
-        a = self.buildRead()
-        s = a.qual
-        a.seq = a.seq[5:10]
-        a.qual = s[5:10]
-
-        self.assertEqual(a.qual, s[5:10])
-
-    def testLargeRead(self):
-        '''build an example read.'''
-
-        a = pysam.AlignedRead()
-        a.qname = "read_12345"
-        a.seq = "ACGT" * 200
-        a.flag = 0
-        a.rname = 0
-        a.pos = 20
-        a.mapq = 20
-        a.cigar = ((0, 4 * 200), )
-        a.mrnm = 0
-        a.mpos = 200
-        a.isize = 167
-        a.qual = "1234" * 200
-
-        return a
-
-    def testTagParsing(self):
-        '''test for tag parsing
-
-        see http://groups.google.com/group/pysam-user-group/browse_thread/thread/67ca204059ea465a
-        '''
-        samfile = pysam.Samfile(os.path.join(DATADIR, "ex8.bam"),
-                                "rb")
-
-        for entry in samfile:
-            before = entry.tags
-            entry.tags = entry.tags
-            after = entry.tags
-            self.assertEqual(after, before)
-
-    def testUpdateTlen(self):
-        '''check if updating tlen works'''
-        a = self.buildRead()
-        oldlen = a.tlen
-        oldlen *= 2
-        a.tlen = oldlen
-        self.assertEqual(a.tlen, oldlen)
-
-    def testPositions(self):
-        a = self.buildRead()
-        self.assertEqual(a.positions,
-                         [20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
-                          31, 32, 33, 34, 35, 36, 37, 38, 39,
-                          40, 41, 42, 43, 44, 45, 46, 47, 48, 49,
-                          50, 51, 52, 53, 54, 55, 56, 57, 58, 59])
-
-        self.assertEqual(a.aligned_pairs,
-                         [(0, 20), (1, 21), (2, 22), (3, 23), (4, 24),
-                          (5, 25), (6, 26), (7, 27), (8, 28), (9, 29),
-                          (None, 30),
-                          (10, 31), (11, 32), (12, 33), (13, 34), (14, 35),
-                          (15, 36), (16, 37), (17, 38), (18, 39), (19, None),
-                          (20, 40), (21, 41), (22, 42), (23, 43), (24, 44),
-                          (25, 45), (26, 46), (27, 47), (28, 48), (29, 49),
-                          (30, 50), (31, 51), (32, 52), (33, 53), (34, 54),
-                          (35, 55), (36, 56), (37, 57), (38, 58), (39, 59)])
-
-        self.assertEqual(
-            a.positions,
-            [x[1] for x in a.aligned_pairs
-             if x[0] is not None and x[1] is not None])
-        # alen is the length of the aligned read in genome
-        self.assertEqual(a.alen, a.aligned_pairs[-1][0] + 1)
-        # aend points to one beyond last aligned base in ref
-        self.assertEqual(a.positions[-1], a.aend - 1)
-
-    def testBlocks(self):
-        a = self.buildRead()
-        self.assertEqual(a.blocks,
-                         [(20, 30), (31, 40), (40, 60)])
-
-    # Disabled as not backwards compatible
-    # def testFancyStr(self):
-    #     a = self.buildRead()
-    #     output = a.fancy_str()
-    #     self.assertEqual(len(output), 9)
-
-
-class TestDeNovoConstruction(ReadTest):
-
-    '''check BAM/SAM file construction using ex6.sam
-
-    (note these are +1 coordinates):
-
-    read_28833_29006_6945	99	chr1	33	20	10M1D25M	=	200	167	AGCTTAGCTAGCTACCTATATCTTGGTCTTGGCCG	<<<<<<<<<<<<<<<<<<<<<:<9/,&,22;;<<<	NM:i:1	RG:Z:L1
-    read_28701_28881_323b	147	chr2	88	30	35M	=	500	412	ACCTATATCTTGGCCTTGGCCGATGCGGCCTTGCA	<<<<<;<<<<7;:<<<6;<<<<<<<<<<<<7<<<<	MF:i:18	RG:Z:L2
-    '''
-
-    header = {'HD': {'VN': '1.0'},
-              'SQ': [{'LN': 1575, 'SN': 'chr1'},
-                     {'LN': 1584, 'SN': 'chr2'}], }
-
-    bamfile = os.path.join(DATADIR, "ex6.bam")
-    samfile = os.path.join(DATADIR, "ex6.sam")
-
-    def setUp(self):
-
-        a = pysam.AlignedRead()
-        a.qname = "read_28833_29006_6945"
-        a.seq = "AGCTTAGCTAGCTACCTATATCTTGGTCTTGGCCG"
-        a.flag = 99
-        a.rname = 0
-        a.pos = 32
-        a.mapq = 20
-        a.cigar = ((0, 10), (2, 1), (0, 25))
-        a.mrnm = 0
-        a.mpos = 199
-        a.isize = 167
-        a.qual = "<<<<<<<<<<<<<<<<<<<<<:<9/,&,22;;<<<"
-        a.tags = (("NM", 1),
-                  ("RG", "L1"))
-
-        b = pysam.AlignedRead()
-        b.qname = "read_28701_28881_323b"
-        b.seq = "ACCTATATCTTGGCCTTGGCCGATGCGGCCTTGCA"
-        b.flag = 147
-        b.rname = 1
-        b.pos = 87
-        b.mapq = 30
-        b.cigar = ((0, 35), )
-        b.mrnm = 1
-        b.mpos = 499
-        b.isize = 412
-        b.qual = "<<<<<;<<<<7;:<<<6;<<<<<<<<<<<<7<<<<"
-        b.tags = (("MF", 18),
-                  ("RG", "L2"))
-
-        self.reads = (a, b)
-
-    # TODO
-    # def testSAMWholeFile(self):
-
-    #     tmpfilename = "tmp_%i.sam" % id(self)
-
-    #     outfile = pysam.Samfile(tmpfilename,
-    #                             "wh",
-    #                             header=self.header)
-
-    #     for x in self.reads:
-    #         outfile.write(x)
-    #     outfile.close()
-    #     self.assertTrue(checkBinaryEqual(tmpfilename, self.samfile),
-    #                     "mismatch when construction SAM file, see %s %s" % (tmpfilename, self.samfile))
-
-    #     os.unlink(tmpfilename)
-
-    def testBAMPerRead(self):
-        '''check if individual reads are binary equal.'''
-        infile = pysam.Samfile(self.bamfile, "rb")
-
-        others = list(infile)
-        for denovo, other in zip(others, self.reads):
-            self.checkFieldEqual(other, denovo)
-            self.assertEqual(other.compare(denovo), 0)
-
-    # TODO
-    # def testSAMPerRead(self):
-    #     '''check if individual reads are binary equal.'''
-    #     infile = pysam.Samfile(self.samfile, "r")
-
-    #     others = list(infile)
-    #     for denovo, other in zip(others, self.reads):
-    #         self.checkFieldEqual(other, denovo)
-    #         self.assertEqual(other.compare(denovo), 0)
-
-    def testBAMWholeFile(self):
-
-        tmpfilename = "tmp_%i.bam" % id(self)
-
-        outfile = pysam.Samfile(tmpfilename, "wb", header=self.header)
-
-        for x in self.reads:
-            outfile.write(x)
-        outfile.close()
-
-        self.assertTrue(checkBinaryEqual(tmpfilename, self.bamfile),
-                        "mismatch when construction BAM file, see %s %s" % (tmpfilename, self.bamfile))
-
-        os.unlink(tmpfilename)
-
-
-class TestDeNovoConstructionUserTags(TestDeNovoConstruction):
-
-    '''test de novo construction with a header that contains lower-case tags.'''
-
-    header = {'HD': {'VN': '1.0'},
-              'SQ': [{'LN': 1575, 'SN': 'chr1'},
-                     {'LN': 1584, 'SN': 'chr2'}],
-              'x1': {'A': 2, 'B': 5},
-              'x3': {'A': 6, 'B': 5},
-              'x2': {'A': 4, 'B': 5}}
-
-    bamfile = os.path.join(DATADIR, "example_user_header.bam")
-    samfile = os.path.join(DATADIR, "example_user_header.sam")
-
-
-class TestEmptyHeader(unittest.TestCase):
-
-    '''see issue 84.'''
-
-    def testEmptyHeader(self):
-
-        s = pysam.Samfile(os.path.join(DATADIR, 'example_empty_header.bam'))
-        self.assertEqual(s.header, {'SQ': [{'LN': 1000, 'SN': 'chr1'}]})
-
-COMPARE_BTAG = [100, 1, 91, 0, 7, 101, 0, 201, 96, 204,
-                0, 0, 87, 109, 0, 7, 97, 112, 1, 12, 78,
-                197, 0, 7, 100, 95, 101, 202, 0, 6, 0, 1,
-                186, 0, 84, 0, 244, 0, 0, 324, 0, 107, 195,
-                101, 113, 0, 102, 0, 104, 3, 0, 101, 1, 0,
-                212, 6, 0, 0, 1, 0, 74, 1, 11, 0, 196, 2,
-                197, 103, 0, 108, 98, 2, 7, 0, 1, 2, 194,
-                0, 180, 0, 108, 0, 203, 104, 16, 5, 205,
-                0, 0, 0, 1, 1, 100, 98, 0, 0, 204, 6, 0,
-                79, 0, 0, 101, 7, 109, 90, 265, 1, 27, 10,
-                109, 102, 9, 0, 292, 0, 110, 0, 0, 102,
-                112, 0, 0, 84, 100, 103, 2, 81, 126, 0, 2,
-                90, 0, 15, 96, 15, 1, 0, 2, 0, 107, 92, 0,
-                0, 101, 3, 98, 15, 102, 13, 116, 116, 90, 93,
-                198, 0, 0, 0, 199, 92, 26, 495, 100, 5, 0,
-                100, 5, 209, 0, 92, 107, 90, 0, 0, 0, 0, 109,
-                194, 7, 94, 200, 0, 40, 197, 0, 11, 0, 0, 112,
-                110, 6, 4, 200, 28, 0, 196, 0, 203, 1, 129,
-                0, 0, 1, 0, 94, 0, 1, 0, 107, 5, 201, 3, 3, 100,
-                0, 121, 0, 7, 0, 1, 105, 306, 3, 86, 8, 183, 0,
-                12, 163, 17, 83, 22, 0, 0, 1, 8, 109, 103, 0, 0,
-                295, 0, 200, 16, 172, 3, 16, 182, 3, 11, 0, 0,
-                223, 111, 103, 0, 5, 225, 0, 95]
-
-
-class TestBTagSam(unittest.TestCase):
-
-    '''see issue 81.'''
-
-    compare = [COMPARE_BTAG,
-               [-100, 200, -300, -400],
-               [-100, 12],
-               [12, 15],
-               [-1.0, 5.0, 2.5]]
-
-    filename = os.path.join(DATADIR, 'example_btag.sam')
-
-    read0 = [('RG', 'QW85I'),
-             ('PG', 'tmap'),
-             ('MD', '140'),
-             ('NM', 0),
-             ('AS', 140),
-             ('FZ', array.array('H', COMPARE_BTAG)),
-             ('XA', 'map2-1'),
-             ('XS', 53),
-             ('XT', 38),
-             ('XF', 1),
-             ('XE', 0)]
-
-    def testReadTags(self):
-
-        s = pysam.Samfile(self.filename)
-        for x, read in enumerate(s):
-            tags = read.tags
-            if x == 0:
-                self.assertEqual(tags, self.read0)
-            
-            fz = list(dict(tags)["FZ"])
-            self.assertEqual(fz, self.compare[x])
-            self.assertEqual(list(read.opt("FZ")), self.compare[x])
-            self.assertEqual(tags, read.get_tags())
-            for tag, value in tags:
-                self.assertEqual(value, read.get_tag(tag))
-            
-    def testReadWriteTags(self):
-
-        s = pysam.Samfile(self.filename)
-        for read in s:
-            before = read.tags
-            read.tags = before
-            self.assertEqual(read.tags, before)
-            
-            read.set_tags(before)
-            self.assertEqual(read.tags, before)
-
-            for tag, value in before:
-                read.set_tag(tag, value)
-                self.assertEqual(value, read.get_tag(tag))
-
-
-class TestBTagBam(TestBTagSam):
-    filename = os.path.join(DATADIR, 'example_btag.bam')
-
-
-class TestDoubleFetch(unittest.TestCase):
-
-    '''check if two iterators on the same bamfile are independent.'''
-
-    filename = os.path.join(DATADIR, 'ex1.bam')
-
-    def testDoubleFetch(self):
-
-        samfile1 = pysam.Samfile(self.filename, 'rb')
-
-        for a, b in zip(samfile1.fetch(multiple_iterators=True),
-                        samfile1.fetch(multiple_iterators=True)):
-            self.assertEqual(a.compare(b), 0)
-
-    def testDoubleFetchWithRegion(self):
-
-        samfile1 = pysam.Samfile(self.filename, 'rb')
-        chr, start, stop = 'chr1', 200, 3000000
-        # just making sure the test has something to catch
-        self.assertTrue(len(list(samfile1.fetch(chr, start, stop))) > 0)
-
-        for a, b in zip(samfile1.fetch(chr, start, stop),
-                        samfile1.fetch(chr, start, stop,
-                                       multiple_iterators=True)):
-            self.assertEqual(a.compare(b), 0)
-
-    def testDoubleFetchUntilEOF(self):
-
-        samfile1 = pysam.Samfile(self.filename, 'rb')
-
-        for a, b in zip(samfile1.fetch(until_eof=True),
-                        samfile1.fetch(until_eof=True,
-                                       multiple_iterators=True)):
-            self.assertEqual(a.compare(b), 0)
-
-
-class TestRemoteFileFTP(unittest.TestCase):
-
-    '''test remote access.
-
-    '''
-
-    # Need to find an ftp server without password on standard
-    # port.
-
-    url = "ftp://ftp.sanger.ac.uk/pub/rd/humanSequences/CV.bam"
-    region = "1:1-1000"
-
-    def testFTPView(self):
-        return
-        if not checkURL(self.url):
-            return
-
-        result = pysam.samtools.view(self.url, self.region)
-        self.assertEqual(len(result), 36)
-
-    def testFTPFetch(self):
-        return
-        if not checkURL(self.url):
-            return
-
-        samfile = pysam.Samfile(self.url, "rb")
-        result = list(samfile.fetch(region=self.region))
-        self.assertEqual(len(result), 36)
-
-
-class TestRemoteFileHTTP(unittest.TestCase):
-
-    url = "http://genserv.anat.ox.ac.uk/downloads/pysam/test/ex1.bam"
-    region = "chr1:1-1000"
-    local = os.path.join(DATADIR, "ex1.bam")
-
-    def testView(self):
-        if not checkURL(self.url):
-            return
-
-        samfile_local = pysam.Samfile(self.local, "rb")
-        ref = list(samfile_local.fetch(region=self.region))
-
-        result = pysam.samtools.view(
-            self.url, self.region).splitlines(True)
-        self.assertEqual(len(result), len(ref))
-
-    def testFetch(self):
-        if not checkURL(self.url):
-            return
-
-        samfile = pysam.Samfile(self.url, "rb")
-        result = list(samfile.fetch(region=self.region))
-        samfile_local = pysam.Samfile(self.local, "rb")
-        ref = list(samfile_local.fetch(region=self.region))
-
-        self.assertEqual(len(ref), len(result))
-        for x, y in zip(result, ref):
-            self.assertEqual(x.compare(y), 0)
-
-    def testFetchAll(self):
-        if not checkURL(self.url):
-            return
-
-        samfile = pysam.Samfile(self.url, "rb")
-        result = list(samfile.fetch())
-        samfile_local = pysam.Samfile(self.local, "rb")
-        ref = list(samfile_local.fetch())
-
-        self.assertEqual(len(ref), len(result))
-        for x, y in zip(result, ref):
-            self.assertEqual(x.compare(y), 0)
-
-
-class TestLargeOptValues(unittest.TestCase):
-
-    ints = (65536, 214748, 2147484, 2147483647)
-    floats = (65536.0, 214748.0, 2147484.0)
-
-    def check(self, samfile):
-
-        i = samfile.fetch()
-        for exp in self.ints:
-            rr = next(i)
-            obs = rr.opt("ZP")
-            self.assertEqual(exp, obs,
-                             "expected %s, got %s\n%s" %
-                             (str(exp), str(obs), str(rr)))
-
-        for exp in [-x for x in self.ints]:
-            rr = next(i)
-            obs = rr.opt("ZP")
-            self.assertEqual(exp, obs,
-                             "expected %s, got %s\n%s" %
-                             (str(exp), str(obs), str(rr)))
-
-        for exp in self.floats:
-            rr = next(i)
-            obs = rr.opt("ZP")
-            self.assertEqual(exp, obs,
-                             "expected %s, got %s\n%s" %
-                             (str(exp), str(obs), str(rr)))
-
-        for exp in [-x for x in self.floats]:
-            rr = next(i)
-            obs = rr.opt("ZP")
-            self.assertEqual(exp, obs, "expected %s, got %s\n%s" %
-                             (str(exp), str(obs), str(rr)))
-
-    def testSAM(self):
-        samfile = pysam.Samfile(
-            os.path.join(DATADIR, "ex10.sam"),
-            "r")
-        self.check(samfile)
-
-    def testBAM(self):
-        samfile = pysam.Samfile(
-            os.path.join(DATADIR, "ex10.bam"),
-            "rb")
-        self.check(samfile)
-
-
-class TestPileup(unittest.TestCase):
-
-    '''test pileup functionality.'''
-
-    samfilename = "pysam_data/ex1.bam"
-    fastafilename = "pysam_data/ex1.fa"
-
-    def setUp(self):
-
-        self.samfile = pysam.Samfile(self.samfilename)
-        self.fastafile = pysam.Fastafile(self.fastafilename)
-
-    def checkEqual(self, references, iterator):
-
-        for x, column in enumerate(iterator):
-            (contig, pos, reference_base,
-             read_bases, read_qualities, alignment_mapping_qualities) \
-                = references[x][:-1].split("\t")
-            self.assertEqual(int(pos) - 1, column.pos)
-
-    def testSamtoolsStepper(self):
-        refs = force_str(
-            pysam.samtools.mpileup(
-                "-f", self.fastafilename,
-                self.samfilename)).splitlines(True)
-        iterator = self.samfile.pileup(
-            stepper="samtools",
-            fastafile=self.fastafile)
-        self.checkEqual(refs, iterator)
-
-    def testAllStepper(self):
-        refs = force_str(
-            pysam.samtools.mpileup(
-                "-f", self.fastafilename,
-                "-A", "-B",
-                self.samfilename)).splitlines(True)
-            
-        iterator = self.samfile.pileup(
-            stepper="all",
-            fastafile=self.fastafile)
-        self.checkEqual(refs, iterator)
-
-
-class TestLogging(unittest.TestCase):
-
-    '''test around bug issue 42,
-
-    failed in versions < 0.4
-    '''
-
-    def check(self, bamfile, log):
-
-        if log:
-            logger = logging.getLogger('franklin')
-            logger.setLevel(logging.INFO)
-            formatter = logging.Formatter(
-                '%(asctime)s %(levelname)s %(message)s')
-            log_hand = logging.FileHandler('log.txt')
-            log_hand.setFormatter(formatter)
-            logger.addHandler(log_hand)
-
-        bam = pysam.Samfile(bamfile, 'rb')
-        cols = bam.pileup()
-        self.assertTrue(True)
-
-    def testFail1(self):
-        self.check(os.path.join(DATADIR, "ex9_fail.bam"),
-                   False)
-        self.check(os.path.join(DATADIR, "ex9_fail.bam"),
-                   True)
-
-    def testNoFail1(self):
-        self.check(os.path.join(DATADIR, "ex9_nofail.bam"),
-                   False)
-        self.check(os.path.join(DATADIR, "ex9_nofail.bam"),
-                   True)
-
-    def testNoFail2(self):
-        self.check(os.path.join(DATADIR, "ex9_nofail.bam"),
-                   True)
-        self.check(os.path.join(DATADIR, "ex9_nofail.bam"),
-                   True)
-
-# TODOS
-# 1. finish testing all properties within pileup objects
-# 2. check exceptions and bad input problems (missing files, optional fields that aren't present, etc...)
-# 3. check: presence of sequence
-
-
-class TestSamfileUtilityFunctions(unittest.TestCase):
-
-    def testCount(self):
-
-        samfile = pysam.Samfile(os.path.join(DATADIR, "ex1.bam"),
-                                "rb")
-
-        for contig in ("chr1", "chr2"):
-            for start in range(0, 2000, 100):
-                end = start + 1
-                self.assertEqual(
-                    len(list(samfile.fetch(contig, start, end))),
-                    samfile.count(contig, start, end),
-                    'number mismatch for %s:%i-%i %i != %i' % (
-                        contig, start, end,
-                        len(list(samfile.fetch(contig, start, end))),
-                        samfile.count(contig, start, end)))
-
-                # test empty intervals
-                self.assertEqual(
-                    len(list(samfile.fetch(contig, start, start))),
-                    samfile.count(contig, start, start),
-                    'number mismatch for %s:%i-%i %i != %i' % (
-                        contig, start, start,
-                        len(list(samfile.fetch(contig, start, start))),
-                        samfile.count(contig, start, start)))
-
-                # test half empty intervals
-                self.assertEqual(len(list(samfile.fetch(contig, start))),
-                                 samfile.count(contig, start))
-
-                self.assertEqual(
-                    len(list(samfile.fetch(contig, start))),
-                    samfile.count(contig, start),
-                    'number mismatch for %s:%i %i != %i' % (
-                        contig, start,
-                        len(list(samfile.fetch(contig, start))),
-                        samfile.count(contig, start)))
-
-    def testMate(self):
-        '''test mate access.'''
-
-        with open(os.path.join(DATADIR, "ex1.sam"), "rb") as inf:
-            readnames = [x.split(b"\t")[0] for x in inf.readlines()]
-        if sys.version_info[0] >= 3:
-            readnames = [name.decode('ascii') for name in readnames]
-
-        counts = collections.defaultdict(int)
-        for x in readnames:
-            counts[x] += 1
-
-        samfile = pysam.Samfile(os.path.join(DATADIR, "ex1.bam"),
-                                "rb")
-
-        for read in samfile.fetch():
-            if not read.is_paired:
-                self.assertRaises(ValueError, samfile.mate, read)
-            elif read.mate_is_unmapped:
-                self.assertRaises(ValueError, samfile.mate, read)
-            else:
-                if counts[read.qname] == 1:
-                    self.assertRaises(ValueError, samfile.mate, read)
-                else:
-                    mate = samfile.mate(read)
-                    self.assertEqual(read.qname, mate.qname)
-                    self.assertEqual(read.is_read1, mate.is_read2)
-                    self.assertEqual(read.is_read2, mate.is_read1)
-                    self.assertEqual(read.pos, mate.mpos)
-                    self.assertEqual(read.mpos, mate.pos)
-
-    def testIndexStats(self):
-        '''test if total number of mapped/unmapped reads is correct.'''
-
-        samfile = pysam.Samfile(os.path.join(DATADIR, "ex1.bam"),
-                                "rb")
-        self.assertEqual(samfile.mapped, 3235)
-        self.assertEqual(samfile.unmapped, 35)
-        self.assertEqual(samfile.nocoordinate, 0)
-
-
-class TestSamtoolsProxy(unittest.TestCase):
-
-    '''tests for sanity checking access to samtools functions.'''
-
-    def testIndex(self):
-        self.assertRaises(IOError, pysam.samtools.index, "missing_file")
-
-    def testView(self):
-        # note that view still echos "open: No such file or directory"
-        self.assertRaises(pysam.SamtoolsError, pysam.samtools.view, "missing_file")
-
-    def testSort(self):
-        self.assertRaises(pysam.SamtoolsError, pysam.samtools.sort, "missing_file")
-
-
-class TestSamfileIndex(unittest.TestCase):
-
-    def testIndex(self):
-        samfile = pysam.Samfile(os.path.join(DATADIR, "ex1.bam"),
-                                "rb")
-        index = pysam.IndexedReads(samfile)
-        index.build()
-        reads = collections.defaultdict(int)
-
-        for read in samfile:
-            reads[read.qname] += 1
-
-        for qname, counts in reads.items():
-            found = list(index.find(qname))
-            self.assertEqual(len(found), counts)
-            for x in found:
-                self.assertEqual(x.qname, qname)
-
-
-if __name__ == "__main__":
-    # build data files
-    print ("building data files")
-    subprocess.call("make -C %s" % DATADIR, shell=True)
-    print ("starting tests")
-    unittest.main()
-    print ("completed tests")
diff --git a/tests/StreamFiledescriptors_test.py b/tests/StreamFiledescriptors_test.py
index ce59da7..de54de5 100644
--- a/tests/StreamFiledescriptors_test.py
+++ b/tests/StreamFiledescriptors_test.py
@@ -1,4 +1,5 @@
 import os
+import sys
 import subprocess
 import threading
 import errno
@@ -6,6 +7,8 @@ import unittest
 
 from pysam import AlignmentFile
 
+IS_PYTHON2 = sys.version_info[0] == 2
+
 DATADIR = os.path.abspath(os.path.join(
     os.path.dirname(__file__),
     "pysam_data"))
@@ -13,7 +16,7 @@ DATADIR = os.path.abspath(os.path.join(
 
 def alignmentfile_writer_thread(infile, outfile):
     def _writer_thread(infile, outfile):
-        """read  from infile and write to outfile"""
+        """read from infile and write to outfile"""
         try:
             i = 0
             for record in infile:
@@ -41,42 +44,48 @@ class StreamTest(unittest.TestCase):
                 read += 1
         return 0, read
 
+    @unittest.skipIf(IS_PYTHON2, "no context manager in py2")
     def test_text_processing(self):
 
-        proc = subprocess.Popen('head -n200',
-                                stdin=subprocess.PIPE,
-                                stdout=subprocess.PIPE,
-                                shell=True)
+        with subprocess.Popen('head -n200',
+                              stdin=subprocess.PIPE,
+                              stdout=subprocess.PIPE,
+                              shell=True) as proc:
 
-        in_stream = AlignmentFile('pysam_data/ex1.bam')
-        out_stream = AlignmentFile(proc.stdin, 'wh', header=in_stream.header)
-        writer = alignmentfile_writer_thread(in_stream,
-                                             out_stream)
+            in_stream = AlignmentFile('pysam_data/ex1.bam')
+            out_stream = AlignmentFile(proc.stdin, 'wh', header=in_stream.header)
+            writer = alignmentfile_writer_thread(in_stream,
+                                                 out_stream)
 
-        written, read = self.stream_process(proc,
-                                            in_stream,
-                                            out_stream,
-                                            writer)
-        self.assertEqual(read, 198)
+            written, read = self.stream_process(proc,
+                                                in_stream,
+                                                out_stream,
+                                                writer)
+            self.assertEqual(read, 198)
 
+    @unittest.skip("test contains bug")
     def test_samtools_processing(self):
-
-        proc = subprocess.Popen('samtools view -b -f 4',
-                                stdin=subprocess.PIPE,
-                                stdout=subprocess.PIPE,
-                                shell=True)
-
-        in_stream = AlignmentFile('pysam_data/ex1.bam')
-        out_stream = AlignmentFile(proc.stdin, 'wb', header=in_stream.header)
-        writer = alignmentfile_writer_thread(in_stream,
-                                             out_stream)
-
-        written, read = self.stream_process(proc,
-                                            in_stream,
-                                            out_stream,
-                                            writer)
-        self.assertEqual(read, 35)
-
+        
+        # The following test causes the suite to hang
+        # as the stream_processor raises:
+        # ValueError: file has no sequences defined (mode='r') - is it SAM/BAM format?
+        # The whole setup then hangs during exception handling.
+        with subprocess.Popen('samtools view -b -f 4',
+                              stdin=subprocess.PIPE,
+                              stdout=subprocess.PIPE,
+                              shell=True) as proc:
+        
+            in_stream = AlignmentFile('pysam_data/ex1.bam')
+            out_stream = AlignmentFile(proc.stdin, 'wb', header=in_stream.header)
+            writer = alignmentfile_writer_thread(in_stream,
+                                                 out_stream)
+
+            written, read = self.stream_process(proc,
+                                                in_stream,
+                                                out_stream,
+                                                writer)
+            self.assertEqual(read, 35)
+        
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/tests/TestUtils.py b/tests/TestUtils.py
index 71ab22a..1168926 100644
--- a/tests/TestUtils.py
+++ b/tests/TestUtils.py
@@ -78,7 +78,6 @@ def check_samtools_view_equal(
     '''return true if the two files are equal in their
     content through samtools view.
     '''
-
     # strip MD and NM tags, as not preserved in CRAM files
     args = ["-x", "MD", "-x", "NM"]
     if not without_header:
@@ -161,8 +160,10 @@ def check_lines_equal(cls, a, b, sort=False, filter_f=None, msg=None):
     filter_f:
        remover lines in both a and b where expression is True
     """
-    aa = openfile(a).readlines()
-    bb = openfile(b).readlines()
+    with openfile(a) as inf:
+        aa = inf.readlines()
+    with openfile(b) as inf:
+        bb = inf.readlines()
 
     if filter_f is not None:
         aa = [x for x in aa if not filter_f(x)]
@@ -183,3 +184,28 @@ def get_temp_filename(suffix=""):
         dir=".")
     f.close()
     return f.name
+
+
+def load_and_convert(filename, encode=True):
+    '''load data from filename and convert all fields to string.
+
+    Filename can be either plain or compressed (ending in .gz).
+    '''
+    data = []
+    if filename.endswith(".gz"):
+        with gzip.open(filename) as inf:
+            for line in inf:
+                line = line.decode("ascii")
+                if line.startswith("#"):
+                    continue
+                d = line.strip().split("\t")
+                data.append(d)
+    else:
+        with open(filename) as f:
+            for line in f:
+                if line.startswith("#"):
+                    continue
+                d = line.strip().split("\t")
+                data.append(d)
+
+    return data
diff --git a/tests/VariantFile_test.py b/tests/VariantFile_test.py
index aa82c66..93307e9 100644
--- a/tests/VariantFile_test.py
+++ b/tests/VariantFile_test.py
@@ -10,10 +10,9 @@ try:
 except ImportError:
     Path = None
 
-from TestUtils import get_temp_filename, check_lines_equal
+from TestUtils import get_temp_filename, check_lines_equal, load_and_convert
 
 DATADIR="cbcf_data"
-from tabix_test import loadAndConvert
 
 
 def read_header(filename):
@@ -37,7 +36,7 @@ class TestMissingGenotypes(unittest.TestCase):
     filename = "missing_genotypes.vcf"
 
     def setUp(self):
-        self.compare = loadAndConvert(
+        self.compare = load_and_convert(
             os.path.join(DATADIR, self.filename),
             encode=False)
 
diff --git a/tests/faidx_test.py b/tests/faidx_test.py
index a123550..c87394d 100644
--- a/tests/faidx_test.py
+++ b/tests/faidx_test.py
@@ -222,15 +222,27 @@ class TestRemoteFileFTP(unittest.TestCase):
 
     url = "ftp://ftp-trace.ncbi.nih.gov/1000genomes/ftp/technical/reference/GRCh38_reference_genome/GRCh38_full_analysis_set_plus_decoy_hla.fa"
 
-
     def testFTPView(self):
         if not checkURL(self.url):
             return
+
         with pysam.Fastafile(self.url) as f:
             self.assertEqual(
                 len(f.fetch("chr1", 0, 1000)),
                 1000)
 
+    def test_sequence_lengths_are_available(self):
+        if not checkURL(self.url):
+            return
+
+        with pysam.Fastafile(self.url) as f:
+            self.assertEqual(len(f.references), 3366)
+            self.assertTrue("chr1" in f.references)
+            self.assertEqual(f.lengths[0],
+                             248956422)
+            self.assertEqual(f.get_reference_length("chr1"),
+                             248956422)
+        
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/tests/samtools_test.py b/tests/samtools_test.py
index aa4c554..7eec832 100644
--- a/tests/samtools_test.py
+++ b/tests/samtools_test.py
@@ -63,7 +63,10 @@ class SamtoolsTest(unittest.TestCase):
         "ex1.fa", "ex1.fa.fai",
         "ex1.sam.gz",
         "ex1.bam", "ex1.bam.bai",
-        "ex1.sam", "ex2.bam",
+        "ex1.sam",
+        "ex1.sam",
+        "ex2.bam",
+        "ex2.sam",
         "ex1.bed"]
 
     # a list of statements to test
@@ -92,7 +95,7 @@ class SamtoolsTest(unittest.TestCase):
         # unknow option
         # "rmdup -s ex1.bam %(out)s_ex1.rmdup.bam",
         # "merge -f %(out)s_ex1.merge.bam ex1.bam ex1.bam",
-        "reheader ex1.sam ex1.bam > %(out)s_ex1.reheader",
+        "reheader ex2.sam ex1.bam > %(out)s_ex1.reheader.bam",
         "cat -o %(out)s_ex1.cat.bam ex1.bam ex1.bam",
         "targetcut ex1.bam > %(out)s_ex1.targetcut",
         "phase ex1.bam > %(out)s_ex1.phase",
@@ -143,7 +146,6 @@ class SamtoolsTest(unittest.TestCase):
         files.
 
         '''
-
         self.check_version()
 
         if not os.path.exists(WORKDIR):
@@ -158,14 +160,23 @@ class SamtoolsTest(unittest.TestCase):
 
         return
 
+    def get_command(self, statement, map_to_internal=True):
+        """return samtools command from statement"""
+        parts = statement.split(" ")
+        command = parts[0]
+        if map_to_internal:
+            return self.map_command.get(command, command)
+        else:
+            return command
+
     def check_statement(self, statement):
 
         parts = statement.split(" ")
         r_samtools = {"out": self.executable}
         r_pysam = {"out": "pysam"}
 
-        command = parts[0]
-        command = self.map_command.get(command, command)
+        command = self.get_command(statement)
+
         # self.assertTrue(command in pysam.SAMTOOLS_DISPATCH)
 
         targets = [x for x in parts if "%(out)s" in x]
@@ -217,9 +228,10 @@ class SamtoolsTest(unittest.TestCase):
                         check_samtools_view_equal(
                             s, p, without_header=True),
                         error_msg)
-                check_lines_equal(
-                    self, s, p,
-                    filter_f=lambda x: x.startswith("#"),
+                else:
+                    check_lines_equal(
+                        self, s, p,
+                        filter_f=lambda x: x.startswith("#"),
                     msg=error_msg)
 
     def testStatements(self):
@@ -232,6 +244,22 @@ class SamtoolsTest(unittest.TestCase):
                 continue
             self.check_statement(statement)
 
+    @unittest.skipIf(sys.platform == "darwin", "not supported, pattern does not match")
+    def testUsage(self):
+        if self.executable == "bcftools":
+            # bcftools usage messages end with exit(1)
+            return
+
+        for statement in self.statements:
+            command = self.get_command(statement, map_to_internal=False)
+            if command == "bam2fq":
+                continue
+            mapped_command = self.get_command(statement, map_to_internal=True)
+            pysam_method = getattr(self.module, mapped_command)
+            usage_msg = pysam_method.usage()
+            expected = "Usage:\s+{} {}".format(self.executable, command)
+            self.assertTrue(re.search(expected, usage_msg) is not None)
+
     def tearDown(self):
         if os.path.exists(WORKDIR):
             shutil.rmtree(WORKDIR)
@@ -342,7 +370,8 @@ class BcftoolsTest(SamtoolsTest):
         # "filter -s A ex1.vcf.gz  > %(out)s_ex1.filter",
         # exit
         # "gtcheck -s A ex1.vcf.gz  > %(out)s_ex1.gtcheck",
-        "roh -s A ex1.vcf.gz > %(out)s_ex1.roh",
+        # segfauld, used to work wit bcftools 1.3
+        # "roh -s A ex1.vcf.gz > %(out)s_ex1.roh",
         "stats ex1.vcf.gz > %(out)s_ex1.stats",
     ]
 
diff --git a/tests/tabix_data/example.gff2.gz b/tests/tabix_data/example.gff2.gz
new file mode 100644
index 0000000..4084a74
Binary files /dev/null and b/tests/tabix_data/example.gff2.gz differ
diff --git a/tests/tabix_data/example.gff2.gz.tbi b/tests/tabix_data/example.gff2.gz.tbi
new file mode 100644
index 0000000..30d39ae
Binary files /dev/null and b/tests/tabix_data/example.gff2.gz.tbi differ
diff --git a/tests/tabix_data/example.gff3.gz b/tests/tabix_data/example.gff3.gz
new file mode 100644
index 0000000..b42b41b
Binary files /dev/null and b/tests/tabix_data/example.gff3.gz differ
diff --git a/tests/tabix_data/example.gff3.gz.tbi b/tests/tabix_data/example.gff3.gz.tbi
new file mode 100644
index 0000000..855e139
Binary files /dev/null and b/tests/tabix_data/example.gff3.gz.tbi differ
diff --git a/tests/tabix_test.py b/tests/tabix_test.py
index ec1e37e..87de282 100644
--- a/tests/tabix_test.py
+++ b/tests/tabix_test.py
@@ -14,7 +14,7 @@ import unittest
 import glob
 import re
 import copy
-from TestUtils import checkURL
+from TestUtils import checkURL, load_and_convert
 
 DATADIR = 'tabix_data'
 
@@ -35,31 +35,6 @@ def myzip_open(infile, mode="r"):
         return gzip.open(mode)
 
 
-def loadAndConvert(filename, encode=True):
-    '''load data from filename and convert all fields to string.
-
-    Filename can be either plain or compressed (ending in .gz).
-    '''
-    data = []
-    if filename.endswith(".gz"):
-        with gzip.open(filename) as inf:
-            for line in inf:
-                line = line.decode("ascii")
-                if line.startswith("#"):
-                    continue
-                d = line.strip().split("\t")
-                data.append(d)
-    else:
-        with open(filename) as f:
-            for line in f:
-                if line.startswith("#"):
-                    continue
-                d = line.strip().split("\t")
-                data.append(d)
-
-    return data
-
-
 def splitToBytes(s):
     '''split string and return list of bytes.'''
     return [x.encode("ascii") for x in s.split("\t")]
@@ -396,150 +371,8 @@ class TestIterationWithComments(TestIterationWithoutComments):
         TestIterationWithoutComments.setUp(self)
 
 
-class TestParser(unittest.TestCase):
-
-    filename = os.path.join(DATADIR, "example.gtf.gz")
-
-    def setUp(self):
-
-        self.tabix = pysam.TabixFile(self.filename)
-        self.compare = loadAndConvert(self.filename)
-
-    def tearDown(self):
-        self.tabix.close()
-
-    def testRead(self):
-
-        for x, r in enumerate(self.tabix.fetch(parser=pysam.asTuple())):
-            c = self.compare[x]
-            self.assertEqual(c, list(r))
-            self.assertEqual(len(c), len(r))
-
-            # test indexing
-            for y in range(0, len(r)):
-                self.assertEqual(c[y], r[y])
-
-            # test slicing access
-            for y in range(0, len(r) - 1):
-                for cc in range(y + 1, len(r)):
-                    self.assertEqual(c[y:cc],
-                                     r[y:cc])
-            self.assertEqual("\t".join(map(str, c)),
-                             str(r))
-
-    def testWrite(self):
-
-        for x, r in enumerate(self.tabix.fetch(parser=pysam.asTuple())):
-            self.assertEqual(self.compare[x], list(r))
-            c = list(r)
-            for y in range(len(r)):
-                r[y] = "test_%05i" % y
-                c[y] = "test_%05i" % y
-            self.assertEqual([x for x in c], list(r))
-            self.assertEqual("\t".join(c), str(r))
-            # check second assignment
-            for y in range(len(r)):
-                r[y] = "test_%05i" % y
-            self.assertEqual([x for x in c], list(r))
-            self.assertEqual("\t".join(c), str(r))
-
-    def testUnset(self):
-        for x, r in enumerate(self.tabix.fetch(parser=pysam.asTuple())):
-            self.assertEqual(self.compare[x], list(r))
-            c = list(r)
-            e = list(r)
-            for y in range(len(r)):
-                r[y] = None
-                c[y] = None
-                e[y] = ""
-                self.assertEqual(c, list(r))
-                self.assertEqual("\t".join(e), str(r))
-
-    def testIteratorCompressed(self):
-        '''test iteration from compressed file.'''
-        with gzip.open(self.filename) as infile:
-            for x, r in enumerate(pysam.tabix_iterator(
-                    infile, pysam.asTuple())):
-                self.assertEqual(self.compare[x], list(r))
-                self.assertEqual(len(self.compare[x]), len(r))
-
-                # test indexing
-                for c in range(0, len(r)):
-                    self.assertEqual(self.compare[x][c], r[c])
-
-                # test slicing access
-                for c in range(0, len(r) - 1):
-                    for cc in range(c + 1, len(r)):
-                        self.assertEqual(self.compare[x][c:cc],
-                                         r[c:cc])
-
-    def testIteratorUncompressed(self):
-        '''test iteration from uncompressed file.'''
-        tmpfilename = 'tmp_testIteratorUncompressed'
-        with gzip.open(self.filename, "rb") as infile, \
-             open(tmpfilename, "wb") as outfile:
-            outfile.write(infile.read())
-
-        with open(tmpfilename) as infile:
-            for x, r in enumerate(pysam.tabix_iterator(
-                    infile, pysam.asTuple())):
-                self.assertEqual(self.compare[x], list(r))
-                self.assertEqual(len(self.compare[x]), len(r))
-
-                # test indexing
-                for c in range(0, len(r)):
-                    self.assertEqual(self.compare[x][c], r[c])
-
-                # test slicing access
-                for c in range(0, len(r) - 1):
-                    for cc in range(c + 1, len(r)):
-                        self.assertEqual(self.compare[x][c:cc],
-                                         r[c:cc])
-
-        os.unlink(tmpfilename)
-
-    def testCopy(self):
-        a = self.tabix.fetch(parser=pysam.asTuple()).next()
-        b = copy.copy(a)
-        self.assertEqual(a, b)
-
-        a = self.tabix.fetch(parser=pysam.asGTF()).next()
-        b = copy.copy(a)
-        self.assertEqual(a, b)
-
-
-class TestGTF(TestParser):
-
-    def testRead(self):
-
-        for x, r in enumerate(self.tabix.fetch(parser=pysam.asGTF())):
-            c = self.compare[x]
-            self.assertEqual(len(c), len(r))
-            self.assertEqual(list(c), list(r))
-            self.assertEqual(c, str(r).split("\t"))
-            self.assertTrue(r.gene_id.startswith("ENSG"))
-            if r.feature != 'gene':
-                self.assertTrue(r.transcript_id.startswith("ENST"))
-            self.assertEqual(c[0], r.contig)
-            self.assertEqual("\t".join(map(str, c)),
-                             str(r))
-
-    def testSetting(self):
-
-        for r in self.tabix.fetch(parser=pysam.asGTF()):
-            r.contig = r.contig + "_test"          
-            r.source = r.source + "_test"
-            r.feature = r.feature + "_test"
-            r.start += 10
-            r.end += 10
-            r.score = 20
-            r.strand = "+"
-            r.frame = 0
-            r.attributes = 'gene_id "0001";'
-
-
+            
 class TestIterators(unittest.TestCase):
-
     filename = os.path.join(DATADIR, "example.gtf.gz")
 
     iterator = pysam.tabix_generic_iterator
@@ -549,7 +382,7 @@ class TestIterators(unittest.TestCase):
     def setUp(self):
 
         self.tabix = pysam.TabixFile(self.filename)
-        self.compare = loadAndConvert(self.filename)
+        self.compare = load_and_convert(self.filename)
         self.tmpfilename_uncompressed = 'tmp_TestIterators'
         with gzip.open(self.filename, "rb") as infile, \
              open(self.tmpfilename_uncompressed, "wb") as outfile:
@@ -622,7 +455,6 @@ class TestIterationMalformattedGTFFiles(unittest.TestCase):
 
     '''test reading from malformatted gtf files.'''
 
-    parser = pysam.asGTF
     iterator = pysam.tabix_generic_iterator
     parser = pysam.asGTF
 
@@ -653,7 +485,7 @@ class TestBed(unittest.TestCase):
     def setUp(self):
 
         self.tabix = pysam.TabixFile(self.filename)
-        self.compare = loadAndConvert(self.filename)
+        self.compare = load_and_convert(self.filename)
 
     def tearDown(self):
         self.tabix.close()
@@ -751,7 +583,7 @@ class TestVCFFromTabix(TestVCF):
         TestVCF.setUp(self)
 
         self.tabix = pysam.TabixFile(self.tmpfilename + ".gz")
-        self.compare = loadAndConvert(self.filename)
+        self.compare = load_and_convert(self.filename)
 
     def tearDown(self):
         self.tabix.close()
@@ -858,42 +690,44 @@ class TestVCFFromVCF(TestVCF):
         TestVCF.setUp(self)
 
         self.vcf = pysam.VCF()
-        self.compare = loadAndConvert(self.filename, encode=False)
+        self.compare = load_and_convert(self.filename, encode=False)
 
     def tearDown(self):
         self.vcf.close()
 
-    def testConnecting(self):
+    def open_vcf(self, fn):
+        return self.vcf.connect(fn)
+
+    def get_failure_stage(self):
 
         fn = os.path.basename(self.filename)
         for x, msg in self.fail_on_opening:
-            if "%i.vcf" % x == fn:
-                self.assertRaises(ValueError,
-                                  self.vcf.connect,
-                                  self.tmpfilename + ".gz")
-            else:
-                self.vcf.connect(self.tmpfilename + ".gz")
+            if "{}.vcf".format(x) == fn:
+                return "opening"
+
+        for x, msg in self.fail_on_parsing:
+            if "{}.vcf".format(x) == fn:
+                return "parsing"
+        
+        for x, msg in self.fail_on_samples:
+            if "{}.vcf".format(x) == fn:
+                return "samples"
+
+        return None
+
+    def testConnecting(self):
+
+        if self.get_failure_stage() == "opening":
+            self.assertRaises(ValueError,
+                              self.open_vcf,
+                              self.tmpfilename + ".gz")
+        else:
+            self.open_vcf(self.tmpfilename + ".gz")
 
     def get_iterator(self):
 
         with open(self.filename) as f:
             fn = os.path.basename(self.filename)
-
-            for x, msg in self.fail_on_opening:
-                if "%i.vcf" % x == fn:
-                    self.assertRaises(ValueError, self.vcf.parse, f)
-                    return
-
-            for vcf_code, msg in self.fail_on_parsing:
-                if "%i.vcf" % vcf_code == fn:
-                    self.assertRaises((ValueError,
-                                       AssertionError),
-                                      list, self.vcf.parse(f))
-                    return
-                # python 2.7
-                # self.assertRaisesRegexp(
-                # ValueError, re.compile(msg), self.vcf.parse, f)
-
             return list(self.vcf.parse(f))
 
     def get_field_value(self, record, field):
@@ -918,22 +752,15 @@ class TestVCFFromVCF(TestVCF):
 
     def testParsing(self):
 
+        if self.get_failure_stage() in ("opening", "parsing"):
+            return
+
         itr = self.get_iterator()
         if itr is None:
             return
 
         fn = os.path.basename(self.filename)
 
-        for vcf_code, msg in self.fail_on_parsing:
-            if "%i.vcf" % vcf_code == fn:
-                self.assertRaises((ValueError,
-                                   AssertionError),
-                                  list, itr)
-                return
-                # python 2.7
-                # self.assertRaisesRegexp(
-                # ValueError, re.compile(msg), self.vcf.parse, f)
-
         check_samples = self.check_samples
         for vcf_code, msg in self.fail_on_samples:
             if "%i.vcf" % vcf_code == fn:
@@ -1079,8 +906,14 @@ class TestVCFFromVariantFile(TestVCFFromVCF):
                "ref", "alts", "qual",
                "filter", "info", "format")
 
-    fail_on_parsing = []
-    fail_on_opening = []
+    fail_on_parsing = [
+        (24, "Could not parse the header, sample line not found"),
+        ("issue85", "empty VCF"),
+    ]
+    fail_on_opening = [
+        (24, "Could not parse the header, sample line not found"),
+        ("issue85", "empty VCF"),
+    ]
     coordinate_offset = 0
     check_samples = True
     fail_on_samples = [
@@ -1134,7 +967,7 @@ class TestVCFFromVariantFile(TestVCFFromVCF):
 
     def setUp(self):
         TestVCF.setUp(self)
-        self.compare = loadAndConvert(self.filename, encode=False)
+        self.compare = load_and_convert(self.filename, encode=False)
 
     def tearDown(self):
         if self.vcf:
@@ -1148,9 +981,14 @@ class TestVCFFromVariantFile(TestVCFFromVCF):
     def get_field_value(self, record, field):
         return getattr(record, field)
 
+    def open_vcf(self, fn):
+        with pysam.VariantFile(fn) as inf:
+            pass
+
 
 for vcf_file in vcf_files:
-    n = "TestVCFFromVariantFile_%s" % os.path.basename(vcf_file[:-4])
+    p = os.path.basename(vcf_file[:-4])
+    n = "TestVCFFromVariantFile_%s" % p
     globals()[n] = type(n, (TestVCFFromVariantFile,), dict(filename=vcf_file,))
 
 
@@ -1241,7 +1079,7 @@ class TestBackwardsCompatibility(unittest.TestCase):
 
     def check(self, filename, raises=None):
         with pysam.TabixFile(filename) as tf:
-            ref = loadAndConvert(filename)
+            ref = load_and_convert(filename)
             if raises is None:
                 self.assertEqual(len(list(tf.fetch())), len(ref))
             else:
diff --git a/tests/tabixproxies_test.py b/tests/tabixproxies_test.py
new file mode 100644
index 0000000..cff0e59
--- /dev/null
+++ b/tests/tabixproxies_test.py
@@ -0,0 +1,318 @@
+import unittest
+import pysam
+import os
+import sys
+import re
+import copy
+import gzip
+from TestUtils import load_and_convert
+
+DATADIR = 'tabix_data'
+
+
+class TestParser(unittest.TestCase):
+
+    filename = os.path.join(DATADIR, "example.gtf.gz")
+
+    def setUp(self):
+
+        self.tabix = pysam.TabixFile(self.filename)
+        self.compare = load_and_convert(self.filename)
+
+    def tearDown(self):
+        self.tabix.close()
+
+    def testRead(self):
+
+        for x, r in enumerate(self.tabix.fetch(parser=pysam.asTuple())):
+            c = self.compare[x]
+            self.assertEqual(c, list(r))
+            self.assertEqual(len(c), len(r))
+
+            # test indexing
+            for y in range(0, len(r)):
+                self.assertEqual(c[y], r[y])
+
+            # test slicing access
+            for y in range(0, len(r) - 1):
+                for cc in range(y + 1, len(r)):
+                    self.assertEqual(c[y:cc],
+                                     r[y:cc])
+            self.assertEqual("\t".join(map(str, c)),
+                             str(r))
+
+    def testWrite(self):
+
+        for x, r in enumerate(self.tabix.fetch(parser=pysam.asTuple())):
+            self.assertEqual(self.compare[x], list(r))
+            c = list(r)
+            for y in range(len(r)):
+                r[y] = "test_%05i" % y
+                c[y] = "test_%05i" % y
+            self.assertEqual([x for x in c], list(r))
+            self.assertEqual("\t".join(c), str(r))
+            # check second assignment
+            for y in range(len(r)):
+                r[y] = "test_%05i" % y
+            self.assertEqual([x for x in c], list(r))
+            self.assertEqual("\t".join(c), str(r))
+
+    def testUnset(self):
+        for x, r in enumerate(self.tabix.fetch(parser=pysam.asTuple())):
+            self.assertEqual(self.compare[x], list(r))
+            c = list(r)
+            e = list(r)
+            for y in range(len(r)):
+                r[y] = None
+                c[y] = None
+                e[y] = ""
+                self.assertEqual(c, list(r))
+                self.assertEqual("\t".join(e), str(r))
+
+    def testIteratorCompressed(self):
+        '''test iteration from compressed file.'''
+        with gzip.open(self.filename) as infile:
+            for x, r in enumerate(pysam.tabix_iterator(
+                    infile, pysam.asTuple())):
+                self.assertEqual(self.compare[x], list(r))
+                self.assertEqual(len(self.compare[x]), len(r))
+
+                # test indexing
+                for c in range(0, len(r)):
+                    self.assertEqual(self.compare[x][c], r[c])
+
+                # test slicing access
+                for c in range(0, len(r) - 1):
+                    for cc in range(c + 1, len(r)):
+                        self.assertEqual(self.compare[x][c:cc],
+                                         r[c:cc])
+
+    def testIteratorUncompressed(self):
+        '''test iteration from uncompressed file.'''
+        tmpfilename = 'tmp_testIteratorUncompressed'
+        with gzip.open(self.filename, "rb") as infile, \
+             open(tmpfilename, "wb") as outfile:
+            outfile.write(infile.read())
+
+        with open(tmpfilename) as infile:
+            for x, r in enumerate(pysam.tabix_iterator(
+                    infile, pysam.asTuple())):
+                self.assertEqual(self.compare[x], list(r))
+                self.assertEqual(len(self.compare[x]), len(r))
+
+                # test indexing
+                for c in range(0, len(r)):
+                    self.assertEqual(self.compare[x][c], r[c])
+
+                # test slicing access
+                for c in range(0, len(r) - 1):
+                    for cc in range(c + 1, len(r)):
+                        self.assertEqual(self.compare[x][c:cc],
+                                         r[c:cc])
+
+        os.unlink(tmpfilename)
+
+    def testCopy(self):
+        a = self.tabix.fetch(parser=pysam.asTuple()).next()
+        b = copy.copy(a)
+        self.assertEqual(a, b)
+
+        a = self.tabix.fetch(parser=pysam.asGTF()).next()
+        b = copy.copy(a)
+        self.assertEqual(a, b)
+
+
+class TestGTF(TestParser):
+
+    parser = pysam.asGTF
+
+    def testRead(self):
+
+        for x, r in enumerate(self.tabix.fetch(parser=self.parser())):
+            c = self.compare[x]
+            self.assertEqual(len(c), len(r))
+            self.assertEqual(list(c), list(r))
+            self.assertEqual(c, str(r).split("\t"))
+            self.assertTrue(r.gene_id.startswith("ENSG"))
+            if r.feature != 'gene':
+                self.assertTrue(r.transcript_id.startswith("ENST"))
+            self.assertEqual(c[0], r.contig)
+            self.assertEqual("\t".join(map(str, c)),
+                             str(r))
+
+    def testSetting(self):
+
+        r = self.tabix.fetch(parser=self.parser()).next()
+
+        r.contig = r.contig + "_test_contig"          
+        r.source = r.source + "_test_source"
+        r.feature = r.feature + "_test_feature"
+        r.start += 10
+        r.end += 10
+        r.score = 20
+        r.strand = "+"
+        r.frame = 0
+        r.attributes = 'gene_id "0001";'
+        r.transcript_id = "0002"
+        sr = str(r)
+        self.assertTrue("_test_contig" in sr)
+        self.assertTrue("_test_source" in sr)
+        self.assertTrue("_test_feature" in sr)
+        self.assertTrue("gene_id \"0001\"" in sr)
+        self.assertTrue("transcript_id \"0002\"" in sr)
+
+    def test_added_attribute_is_output(self):
+        r = self.tabix.fetch(parser=self.parser()).next()
+
+        r.new_int_attribute = 12
+        self.assertTrue("new_int_attribute 12" in str(r).split("\t")[8])
+
+        r.new_float_attribute = 12.0
+        self.assertTrue("new_float_attribute 12.0" in str(r).split("\t")[8])
+
+        r.new_text_attribute = "abc"
+        self.assertTrue("new_text_attribute \"abc\"" in str(r).split("\t")[8])
+
+    def test_setting_start_is_one_based(self):
+        
+        r = self.tabix.fetch(parser=self.parser()).next()
+        r.start = 1800
+        self.assertEqual(r.start, 1800)
+        self.assertEqual(str(r).split("\t")[3], "1801")
+
+    def test_setting_end_is_one_based(self):
+        
+        r = self.tabix.fetch(parser=self.parser()).next()
+        r.end = 2100
+        self.assertEqual(r.end, 2100)
+        self.assertEqual(str(r).split("\t")[4], "2100")
+
+    def test_setting_frame_to_none_produces_dot(self):
+
+        r = self.tabix.fetch(parser=self.parser()).next()
+        r.frame = None
+        self.assertEqual(str(r).split("\t")[7], ".")
+
+        r.frame = 2
+        self.assertEqual(str(r).split("\t")[7], "2")
+
+        r = self.tabix.fetch(parser=self.parser()).next()
+        r.frame = "."
+        self.assertEqual(r.frame, None)
+        self.assertEqual(str(r).split("\t")[7], ".")
+
+    def test_setting_source_to_none_produces_dot(self):
+
+        r = self.tabix.fetch(parser=self.parser()).next()
+        r.source = None
+        self.assertEqual(str(r).split("\t")[1], ".")
+
+        r.source = "source"
+        self.assertEqual(str(r).split("\t")[1], "source")
+
+        r = self.tabix.fetch(parser=self.parser()).next()
+        r.source = "."
+        self.assertEqual(r.source, None)
+        self.assertEqual(str(r).split("\t")[1], ".")
+
+    def test_setting_feature_to_none_produces_dot(self):
+
+        r = self.tabix.fetch(parser=self.parser()).next()
+        r.feature = None
+        self.assertEqual(str(r).split("\t")[2], ".")
+
+        r.feature = "feature"
+        self.assertEqual(str(r).split("\t")[2], "feature")
+
+        r = self.tabix.fetch(parser=self.parser()).next()
+        r.feature = "."
+        self.assertEqual(r.feature, None)
+        self.assertEqual(str(r).split("\t")[2], ".")
+
+    def test_setting_strand_to_none_produces_dot(self):
+
+        r = self.tabix.fetch(parser=self.parser()).next()
+        r.strand = None
+        self.assertEqual(str(r).split("\t")[6], ".")
+
+        r.strand = "-"
+        self.assertEqual(str(r).split("\t")[6], "-")
+
+        r = self.tabix.fetch(parser=self.parser()).next()
+        r.strand = "."
+        self.assertEqual(r.strand, None)
+        self.assertEqual(str(r).split("\t")[6], ".")
+
+    def test_setting_score_to_none_produces_dot(self):
+
+        r = self.tabix.fetch(parser=self.parser()).next()
+        r.score = None
+        self.assertEqual(str(r).split("\t")[5], ".")
+
+        r.score = 12.0
+        self.assertEqual(str(r).split("\t")[5], "12.0")
+
+        r.score = -12.0
+        self.assertEqual(str(r).split("\t")[5], "-12.0")
+
+        r = self.tabix.fetch(parser=self.parser()).next()
+        r.score = "."
+        self.assertEqual(r.score, None)
+        self.assertEqual(str(r).split("\t")[5], ".")
+
+        r.score = 12
+        self.assertEqual(str(r).split("\t")[5], "12")
+
+        r.score = -12
+        self.assertEqual(str(r).split("\t")[5], "-12")
+
+
+class TestGFF3(TestGTF):
+
+    parser = pysam.asGFF3
+    filename = os.path.join(DATADIR, "example.gff3.gz")
+
+    def testRead(self):
+        for x, r in enumerate(self.tabix.fetch(parser=self.parser())):
+            c = self.compare[x]
+            self.assertEqual(len(c), len(r))
+            self.assertEqual(list(c), list(r))
+            self.assertEqual(c, str(r).split("\t"))
+            self.assertEqual(c[0], r.contig)
+            self.assertEqual("\t".join(map(str, c)),
+                             str(r))
+            self.assertTrue(r.ID.startswith("MI00"))
+
+    def testSetting(self):
+
+        for r in self.tabix.fetch(parser=self.parser()):
+            r.contig = r.contig + "_test_contig"          
+            r.source = "test_source"
+            r.feature = "test_feature"
+            r.start += 10
+            r.end += 10
+            r.score = 20
+            r.strand = "+"
+            r.frame = 0
+            r.ID="test"
+            sr = str(r)
+            self.assertTrue("test_contig" in sr)
+            self.assertTrue("test_source" in sr)
+            self.assertTrue("test_feature" in sr)
+            self.assertTrue("ID=test" in sr)
+            
+    def test_added_attribute_is_output(self):
+        r = self.tabix.fetch(parser=self.parser()).next()
+
+        r.new_int_attribute = 12
+        self.assertTrue("new_int_attribute=12" in str(r).split("\t")[8])
+
+        r.new_float_attribute = 12.0
+        self.assertTrue("new_float_attribute=12.0" in str(r).split("\t")[8])
+
+        r.new_text_attribute = "abc"
+        self.assertTrue("new_text_attribute=abc" in str(r).split("\t")[8])
+
+
+if __name__ == "__main__":
+    unittest.main()

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/python-pysam.git