[med-svn] [gmap] 01/04: New upstream version 2016-11-07

Alex Mestiashvili malex-guest at moszumanska.debian.org
Wed Nov 9 15:43:16 UTC 2016


This is an automated email from the git hooks/post-receive script.

malex-guest pushed a commit to branch master
in repository gmap.

commit d49f40eb341e3ecf71335183fd5c5df670b3d661
Author: Alexandre Mestiashvili <alex at biotec.tu-dresden.de>
Date:   Wed Nov 9 16:10:11 2016 +0100

    New upstream version 2016-11-07
---
 ChangeLog               |  57 ++++++
 VERSION                 |   2 +-
 configure               |  24 +--
 src/bitpack64-access.c  |  38 ++--
 src/bitpack64-incr.c    |  36 ++--
 src/bitpack64-read.c    | 247 ++++++++++++++----------
 src/bitpack64-read.h    |   3 +
 src/bitpack64-readtwo.c |  50 +++--
 src/bitpack64-write.c   | 144 +++++++-------
 src/cpuid.c             |  26 ++-
 src/cpuid.h             |   5 +-
 src/gmap.c              |  10 +-
 src/gmap_select.c       |   4 +-
 src/gmapl_select.c      |   4 +-
 src/gsnap.c             |   4 +-
 src/gsnap_select.c      |   4 +-
 src/gsnapl_select.c     |   4 +-
 src/indexdb-write.c     |  17 +-
 src/indexdb.c           |   3 +-
 src/pair.c              | 496 ++++++++++++++++++++++++++++++++++++++++++++++--
 src/pair.h              |   4 +-
 src/sam_sort.c          |  28 ++-
 src/samprint.c          | 482 ++++++++++++++++++++++++++++++++++++++--------
 src/stage1hr.c          |   5 +-
 src/stage3hr.c          |  33 +++-
 src/stage3hr.h          |   4 +-
 src/substring.c         |  22 ++-
 src/substring.h         |   5 +-
 src/uniqscan.c          |   4 +-
 29 files changed, 1404 insertions(+), 361 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index 9296781..7530342 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,60 @@
+2016-11-08  twu
+
+    * sam_sort.c: Added printing at monitor intervals
+
+    * stage3hr.c: Checking for cases where insertions and deletions extend past
+      genomicpos 0
+
+    * samprint.c: Added preliminary code for printing extended cigar strings
+
+    * pair.c, pair.h: Added code for printing extended cigar strings.  Not
+      printing BLAST e-values.
+
+    * indexdb.c: Removed unused statement
+
+    * gsnap.c, uniqscan.c: Using new interface to Pair_setup
+
+    * gmap_select.c, gmapl_select.c, gsnap_select.c, gsnapl_select.c: Using new
+      interface to CPUID_support
+
+    * gmap.c: Added option --sam-cigar-extended
+
+    * cpuid.c, cpuid.h: Added provisions for AVX512
+
+2016-10-24  twu
+
+    * stage1hr.c: Not computing floors if querylength is less than index1part
+
+    * pair.c: Showing blast_evalue function for GMAP
+
+    * samprint.c: Removing assertions and aborts that do not hold for DNA-Seq
+      chimeras
+
+2016-10-23  twu
+
+    * bitpack64-read.c, pair.c, samprint.c, stage3hr.c, stage3hr.h, substring.c,
+      substring.h: Printing BLAST e-values
+
+    * indexdb-write.c: For initializing counters, using packsizes from
+      offsetsmeta, rather than recomputing them from offsetsstrm
+
+    * bitpack64-write.c: Handling problem with ptri overflowing a signed int. 
+      Now just advancing a pointer.
+
+    * bitpack64-read.h: Added interface for a function
+
+    * bitpack64-access.c, bitpack64-incr.c, bitpack64-read.c,
+      bitpack64-readtwo.c: Handling the case where nwritten*4 overflows an
+      unsigned int.  Casting it first to UINT8.
+
+2016-10-17  twu
+
+    * bitpack64-access.c: Fixed an increment of out in extract_28
+
+2016-09-26  twu
+
+    * archive.html, index.html: Updated for latest version
+
 2016-09-24  twu
 
     * stage3.c: In solving dual introns, handling the case where single_gappairs
diff --git a/VERSION b/VERSION
index 6daa5bd..6f55558 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-2016-09-23
\ No newline at end of file
+2016-11-07
\ No newline at end of file
diff --git a/configure b/configure
index ed5376d..e5f319c 100755
--- a/configure
+++ b/configure
@@ -1,6 +1,6 @@
 #! /bin/sh
 # Guess values for system-dependent variables and create Makefiles.
-# Generated by GNU Autoconf 2.69 for gmap 2016-09-23.
+# Generated by GNU Autoconf 2.69 for gmap 2016-11-07.
 #
 # Report bugs to <Thomas Wu <twu at gene.com>>.
 #
@@ -590,8 +590,8 @@ MAKEFLAGS=
 # Identity of this package.
 PACKAGE_NAME='gmap'
 PACKAGE_TARNAME='gmap'
-PACKAGE_VERSION='2016-09-23'
-PACKAGE_STRING='gmap 2016-09-23'
+PACKAGE_VERSION='2016-11-07'
+PACKAGE_STRING='gmap 2016-11-07'
 PACKAGE_BUGREPORT='Thomas Wu <twu at gene.com>'
 PACKAGE_URL=''
 
@@ -1372,7 +1372,7 @@ if test "$ac_init_help" = "long"; then
   # Omit some internal or obsolete options to make the list less imposing.
   # This message is too long to be a string in the A/UX 3.1 sh.
   cat <<_ACEOF
-\`configure' configures gmap 2016-09-23 to adapt to many kinds of systems.
+\`configure' configures gmap 2016-11-07 to adapt to many kinds of systems.
 
 Usage: $0 [OPTION]... [VAR=VALUE]...
 
@@ -1443,7 +1443,7 @@ fi
 
 if test -n "$ac_init_help"; then
   case $ac_init_help in
-     short | recursive ) echo "Configuration of gmap 2016-09-23:";;
+     short | recursive ) echo "Configuration of gmap 2016-11-07:";;
    esac
   cat <<\_ACEOF
 
@@ -1582,7 +1582,7 @@ fi
 test -n "$ac_init_help" && exit $ac_status
 if $ac_init_version; then
   cat <<\_ACEOF
-gmap configure 2016-09-23
+gmap configure 2016-11-07
 generated by GNU Autoconf 2.69
 
 Copyright (C) 2012 Free Software Foundation, Inc.
@@ -2188,7 +2188,7 @@ cat >config.log <<_ACEOF
 This file contains any messages produced by compilers while
 running configure, to aid debugging if configure makes a mistake.
 
-It was created by gmap $as_me 2016-09-23, which was
+It was created by gmap $as_me 2016-11-07, which was
 generated by GNU Autoconf 2.69.  Invocation command line was
 
   $ $0 $@
@@ -2538,8 +2538,8 @@ ac_compiler_gnu=$ac_cv_c_compiler_gnu
 
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking package version" >&5
 $as_echo_n "checking package version... " >&6; }
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: 2016-09-23" >&5
-$as_echo "2016-09-23" >&6; }
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: 2016-11-07" >&5
+$as_echo "2016-11-07" >&6; }
 
 
 ### Read defaults
@@ -4404,7 +4404,7 @@ fi
 
 # Define the identity of the package.
  PACKAGE='gmap'
- VERSION='2016-09-23'
+ VERSION='2016-11-07'
 
 
 cat >>confdefs.h <<_ACEOF
@@ -20109,7 +20109,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
 # report actual input values of CONFIG_FILES etc. instead of their
 # values after options handling.
 ac_log="
-This file was extended by gmap $as_me 2016-09-23, which was
+This file was extended by gmap $as_me 2016-11-07, which was
 generated by GNU Autoconf 2.69.  Invocation command line was
 
   CONFIG_FILES    = $CONFIG_FILES
@@ -20175,7 +20175,7 @@ _ACEOF
 cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
 ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
 ac_cs_version="\\
-gmap config.status 2016-09-23
+gmap config.status 2016-11-07
 configured by $0, generated by GNU Autoconf 2.69,
   with options \\"\$ac_cs_config\\"
 
diff --git a/src/bitpack64-access.c b/src/bitpack64-access.c
index c21d45c..7b5dfcc 100644
--- a/src/bitpack64-access.c
+++ b/src/bitpack64-access.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: bitpack64-access.c 180341 2015-12-07 18:29:40Z twu $";
+static char rcsid[] = "$Id: bitpack64-access.c 199469 2016-10-23 03:49:07Z twu $";
 #ifdef HAVE_CONFIG_H
 #include <config.h>
 #endif
@@ -2073,7 +2073,8 @@ static Accessor_T accessor_table[272] =
 
 UINT4
 Bitpack64_access (Oligospace_T oligo, UINT4 *ptrs, UINT4 *comp) {
-  UINT4 *info, start;
+  UINT4 *info, start4;
+  UINT8 start8;
   int nwritten, remainder;
   UINT4 *bitpack;
   int index, row;
@@ -2084,13 +2085,15 @@ Bitpack64_access (Oligospace_T oligo, UINT4 *ptrs, UINT4 *comp) {
   info = &(ptrs[oligo/BLOCKSIZE * DIRECT_METAINFO_SIZE]);
 
 #ifdef WORDS_BIGENDIAN
-  start = Bigendian_convert_uint(info[0]);
-  bitpack = (UINT4 *) &(comp[start*4]);
-  nwritten = Bigendian_convert_uint(info[1]) - start;	/* In 128-bit registers */
+  start4 = Bigendian_convert_uint(info[0]); /* In 128-bit registers */
+  start8 = 4 * (UINT8) start4;		    /* In 32-bit words */
+  bitpack = (UINT4 *) &(comp[start8]);
+  nwritten = Bigendian_convert_uint(info[1]) - start4;	/* In 128-bit registers */
 #else
-  start = info[0];
-  bitpack = (UINT4 *) &(comp[start*4]);
-  nwritten = info[1] - start;	/* In 128-bit registers */
+  start4 = info[0];	/* In 128-bit registers */
+  start8 = 4 * (UINT8) start4;	/* In 32-bit words */
+  bitpack = (UINT4 *) &(comp[start8]);
+  nwritten = info[1] - start4;
 #endif
 
   remainder = oligo % BLOCKSIZE;
@@ -2115,7 +2118,8 @@ Bitpack64_access (Oligospace_T oligo, UINT4 *ptrs, UINT4 *comp) {
 
 UINT4
 Bitpack64_access (Oligospace_T oligo, UINT4 *ptrs, UINT4 *comp) {
-  UINT4 *info, start;
+  UINT4 *info, start4;
+  UINT8 start8;
   int nwritten, remainder;
   UINT4 *bitpack;
   int index, column;
@@ -2126,13 +2130,15 @@ Bitpack64_access (Oligospace_T oligo, UINT4 *ptrs, UINT4 *comp) {
   info = &(ptrs[oligo/BLOCKSIZE * DIRECT_METAINFO_SIZE]);
 
 #ifdef WORDS_BIGENDIAN
-  start = Bigendian_convert_uint(info[0]);
-  bitpack = (UINT4 *) &(comp[start*4]);
-  nwritten = Bigendian_convert_uint(info[1]) - start;	/* In 128-bit registers */
+  start4 = Bigendian_convert_uint(info[0]); /* In 128-bit registers */
+  start8 = 4 * (UINT8) start4;		    /* In 32-bit words */
+  bitpack = (UINT4 *) &(comp[start8]);
+  nwritten = Bigendian_convert_uint(info[1]) - start4;
 #else
-  start = info[0];
-  bitpack = (UINT4 *) &(comp[start*4]);
-  nwritten = info[1] - start;	/* In 128-bit registers */
+  start4 = info[0];		/* In 128-bit registers */
+  start8 = 4 * (UINT8) start4;	/* In 32-bit words */
+  bitpack = (UINT4 *) &(comp[start8]);
+  nwritten = info[1] - start4;
 #endif
 
   remainder = oligo % BLOCKSIZE;
@@ -3394,7 +3400,7 @@ extract_30 (UINT4 *out, const UINT4 *in) {
   value = ( CONVERT(*in) >>  4  )   % (1U << 30 ) ;
   in += WORD_INCR;
   value |= (CONVERT(*in) % (1U<< 2 ))<<( 30 - 2 );
-  out += WORD_INCR;
+  /* in += WORD_INCR; was out += WORD_INCR, but don't want either */
   out[56] = value;
 
   /* 15 */
diff --git a/src/bitpack64-incr.c b/src/bitpack64-incr.c
index ab5276e..ae0a3ea 100644
--- a/src/bitpack64-incr.c
+++ b/src/bitpack64-incr.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: bitpack64-incr.c 197549 2016-09-08 01:14:55Z twu $";
+static char rcsid[] = "$Id: bitpack64-incr.c 199469 2016-10-23 03:49:07Z twu $";
 #ifdef HAVE_CONFIG_H
 #include <config.h>
 #endif
@@ -8368,7 +8368,8 @@ static Subtractor_T subtractor_table[272] =
 /* Vertical */
 UINT4
 Bitpack64_incr (Oligospace_T oligo, UINT4 *ptrs, UINT4 *comp) {
-  UINT4 *info, start;
+  UINT4 *info, start4;
+  UINT8 start8;
   int nwritten, remainder;
   UINT4 *bitpack;
   int index, column;
@@ -8380,13 +8381,15 @@ Bitpack64_incr (Oligospace_T oligo, UINT4 *ptrs, UINT4 *comp) {
   info = &(ptrs[oligo/BLOCKSIZE * DIRECT_METAINFO_SIZE]);
 
 #ifdef WORDS_BIGENDIAN
-  start = Bigendian_convert_uint(info[0]);
-  bitpack = (UINT4 *) &(comp[start*4]);
-  nwritten = Bigendian_convert_uint(info[1]) - start;	/* In 128-bit registers */
+  start4 = Bigendian_convert_uint(info[0]);	/* In 128-bit registers */
+  start8 = 4 * (UINT8) start4;			/* In 32-bit words */
+  bitpack = (UINT4 *) &(comp[start8]);
+  nwritten = Bigendian_convert_uint(info[1]) - start4;
 #else
-  start = info[0];
-  bitpack = (UINT4 *) &(comp[start*4]);
-  nwritten = info[1] - start;	/* In 128-bit registers */
+  start4 = info[0];		/* In 128-bit registers */
+  start8 = 4 * (UINT8) start4;	/* In 32-bit words */
+  bitpack = (UINT4 *) &(comp[start8]);
+  nwritten = info[1] - start4;
 #endif
 
   remainder = oligo % BLOCKSIZE;
@@ -8445,7 +8448,8 @@ Bitpack64_incr_bitpack (Oligospace_T oligo, int packsize, UINT4 *bitpack) {
 
 void
 Bitpack64_add (Oligospace_T oligo, UINT4 *ptrs, UINT4 *comp, UINT4 increment) {
-  UINT4 *info, start;
+  UINT4 *info, start4;
+  UINT8 start8;
   int nwritten, remainder;
   UINT4 *bitpack;
   int index, column;
@@ -8457,13 +8461,15 @@ Bitpack64_add (Oligospace_T oligo, UINT4 *ptrs, UINT4 *comp, UINT4 increment) {
   info = &(ptrs[oligo/BLOCKSIZE * DIRECT_METAINFO_SIZE]);
 
 #ifdef WORDS_BIGENDIAN
-  start = Bigendian_convert_uint(info[0]);
-  bitpack = (UINT4 *) &(comp[start*4]);
-  nwritten = Bigendian_convert_uint(info[1]) - start;	/* In 128-bit registers */
+  start4 = Bigendian_convert_uint(info[0]);	/* In 128-bit registers */
+  start8 = 4 * (UINT8) start4;			/* In 32-bit words */
+  bitpack = (UINT4 *) &(comp[start8]);
+  nwritten = Bigendian_convert_uint(info[1]) - start4;
 #else
-  start = info[0];
-  bitpack = (UINT4 *) &(comp[start*4]);
-  nwritten = info[1] - start;	/* In 128-bit registers */
+  start4 = info[0];		/* In 128-bit registers */
+  start8 = 4 * (UINT8) start4;	/* In 32-bit words */
+  bitpack = (UINT4 *) &(comp[start8]);
+  nwritten = info[1] - start4;
 #endif
 
   remainder = oligo % BLOCKSIZE;
diff --git a/src/bitpack64-read.c b/src/bitpack64-read.c
index d6b1025..1e478b6 100644
--- a/src/bitpack64-read.c
+++ b/src/bitpack64-read.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: bitpack64-read.c 184374 2016-02-16 23:39:06Z twu $";
+static char rcsid[] = "$Id: bitpack64-read.c 199475 2016-10-23 23:21:59Z twu $";
 #ifdef HAVE_CONFIG_H
 #include <config.h>
 #endif
@@ -13771,7 +13771,8 @@ static Unpacker_T unpacker_table[17][17] =
 UINT4
 Bitpack64_offsetptr (UINT4 *end0, Oligospace_T oligo, UINT4 *bitpackptrs, UINT4 *bitpackcomp) {
   Oligospace_T bmer;
-  UINT4 *info, nwritten, packsize_div2;
+  UINT4 *info, nwritten4, packsize_div2;
+  UINT8 nwritten8;
   int delta, remainder0, remainder1, quarter_block_0, quarter_block_1, column, row;
 #ifdef HAVE_SSE2
 #ifdef BRANCH_FREE_ROW_SUM
@@ -13800,15 +13801,16 @@ Bitpack64_offsetptr (UINT4 *end0, Oligospace_T oligo, UINT4 *bitpackptrs, UINT4
 
   debug(printf("Entered Bitpack64_offsetptr with oligo %u => bmer %u\n",oligo,bmer));
 
-  nwritten = info[0];		/* In 128-bit registers */
+  nwritten4 = info[0];		     /* In 128-bit registers */
+  nwritten8 = 4 * (UINT8) nwritten4; /* In 32-bit words */
 #ifdef HAVE_SSE2  
-  bitpack = (__m128i *) &(bitpackcomp[nwritten*4]);
+  bitpack = (__m128i *) &(bitpackcomp[nwritten8]);
 #else
-  bitpack = (UINT4 *) &(bitpackcomp[nwritten*4]);
+  bitpack = (UINT4 *) &(bitpackcomp[nwritten8]);
 #endif
 
-  /* packsize = (info[DIFFERENTIAL_METAINFO_SIZE] - nwritten)*2; */
-  packsize_div2 = (info[DIFFERENTIAL_METAINFO_SIZE] - nwritten);
+  /* packsize = (info[DIFFERENTIAL_METAINFO_SIZE] - nwritten4)*2; */
+  packsize_div2 = (info[DIFFERENTIAL_METAINFO_SIZE] - nwritten4);
 
   remainder0 = oligo % BLOCKSIZE;
   remainder1 = remainder0 + 1;
@@ -14079,7 +14081,8 @@ UINT8
 Bitpack64_offsetptr_huge (UINT8 *end0, Oligospace_T oligo,
 			  UINT4 *bitpackpages, UINT4 *bitpackptrs, UINT4 *bitpackcomp) {
   Oligospace_T bmer;
-  UINT4 *info, nwritten;
+  UINT4 *info, nwritten4;
+  UINT8 nwritten8;
   UINT8 offset0, offset1;
   UINT4 packsize_div2;
   int delta, remainder0, remainder1, quarter_block_0, quarter_block_1, column, row;
@@ -14110,11 +14113,12 @@ Bitpack64_offsetptr_huge (UINT8 *end0, Oligospace_T oligo,
 
   debug(printf("Entered Bitpack64_offsetptr_huge with oligo %u => bmer %u\n",oligo,bmer));
 
-  nwritten = info[0];
+  nwritten4 = info[0];		     /* In 128-bit registers */
+  nwritten8 = 4 * (UINT8) nwritten4; /* In 32-bit words */
 #ifdef HAVE_SSE2  
-  bitpack = (__m128i *) &(bitpackcomp[nwritten*4]);
+  bitpack = (__m128i *) &(bitpackcomp[nwritten8]);
 #else
-  bitpack = (UINT4 *) &(bitpackcomp[nwritten*4]);
+  bitpack = (UINT4 *) &(bitpackcomp[nwritten8]);
 #endif
 
   if (bitpackpages == NULL) {
@@ -14138,8 +14142,8 @@ Bitpack64_offsetptr_huge (UINT8 *end0, Oligospace_T oligo,
     offset1 += info[DIFFERENTIAL_METAINFO_SIZE+1];
   }
 
-  /* packsize = (info[DIFFERENTIAL_METAINFO_SIZE] - nwritten)*2; */
-  packsize_div2 = (info[DIFFERENTIAL_METAINFO_SIZE] - nwritten);
+  /* packsize = (info[DIFFERENTIAL_METAINFO_SIZE] - nwritten4)*2; */
+  packsize_div2 = (info[DIFFERENTIAL_METAINFO_SIZE] - nwritten4);
 
   remainder0 = oligo % BLOCKSIZE;
   remainder1 = remainder0 + 1;
@@ -14408,7 +14412,8 @@ UINT4
 Bitpack64_offsetptr_paired (UINT4 *end0, Oligospace_T oligo, UINT4 *bitpackptrs, UINT4 *bitpackcomp) {
   UINT4 ptr0;
   Oligospace_T bmer;
-  UINT4 *info, nwritten, packsize_div2;
+  UINT4 *info, nwritten4, packsize_div2;
+  UINT8 nwritten8;
   int delta, remainder, quarter_block, column, row;
 #ifdef HAVE_SSE2
 #ifdef BRANCH_FREE_ROW_SUM
@@ -14437,15 +14442,16 @@ Bitpack64_offsetptr_paired (UINT4 *end0, Oligospace_T oligo, UINT4 *bitpackptrs,
 
   debug(printf("Entered Bitpack64_offsetptr_paired with oligo %u => bmer %u\n",oligo,bmer));
 
-  nwritten = info[0];		/* In 128-bit registers */
+  nwritten4 = info[0];		/* In 128-bit registers */
+  nwritten8 = 4 * (UINT8) nwritten4;			/* In 32-bit words */
 #ifdef HAVE_SSE2  
-  bitpack = (__m128i *) &(bitpackcomp[nwritten*4]);
+  bitpack = (__m128i *) &(bitpackcomp[nwritten8]);
 #else
-  bitpack = (UINT4 *) &(bitpackcomp[nwritten*4]);
+  bitpack = (UINT4 *) &(bitpackcomp[nwritten8]);
 #endif
 
-  /* packsize = (info[DIFFERENTIAL_METAINFO_SIZE] - nwritten)*2; */
-  packsize_div2 = (info[2] - nwritten);
+  /* packsize = (info[DIFFERENTIAL_METAINFO_SIZE] - nwritten4)*2; */
+  packsize_div2 = (info[2] - nwritten4);
 
   remainder = oligo % BLOCKSIZE;
   if (remainder == 31) {
@@ -14662,7 +14668,8 @@ Bitpack64_offsetptr_paired (UINT4 *end0, Oligospace_T oligo, UINT4 *bitpackptrs,
 UINT4
 Bitpack64_read_one (Oligospace_T oligo, UINT4 *bitpackptrs, UINT4 *bitpackcomp) {
   Oligospace_T bmer;
-  UINT4 *info, nwritten, packsize_div2;
+  UINT4 *info, nwritten4, packsize_div2;
+  UINT8 nwritten8;
   int delta, remainder, column, row;
 #if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
   UINT4 ptr;
@@ -14692,20 +14699,23 @@ Bitpack64_read_one (Oligospace_T oligo, UINT4 *bitpackptrs, UINT4 *bitpackcomp)
   debug(printf("Entered Bitpack64_read_one with oligo %u => bmer %u\n",oligo,bmer));
 
 #if defined(WORDS_BIGENDIAN)
-  nwritten = Bigendian_convert_uint(info[0]);		/* In 128-bit registers */
-  bitpack = (UINT4 *) &(bitpackcomp[nwritten*4]);
-  packsize_div2 = (Bigendian_convert_uint(info[DIFFERENTIAL_METAINFO_SIZE]) - nwritten);
+  nwritten4 = Bigendian_convert_uint(info[0]);		/* In 128-bit registers */
+  nwritten8 = 4 * (UINT8) nwritten4;			/* In 32-bit words */
+  bitpack = (UINT4 *) &(bitpackcomp[nwritten8]);
+  packsize_div2 = (Bigendian_convert_uint(info[DIFFERENTIAL_METAINFO_SIZE]) - nwritten4);
 
 #elif !defined(HAVE_SSE2)
-  nwritten = info[0];		/* In 128-bit registers */
-  bitpack = (UINT4 *) &(bitpackcomp[nwritten*4]);
-  packsize_div2 = (info[DIFFERENTIAL_METAINFO_SIZE] - nwritten);
+  nwritten4 = info[0];		/* In 128-bit registers */
+  nwritten8 = 4 * (UINT8) nwritten4; /* In 32-bit words */
+  bitpack = (UINT4 *) &(bitpackcomp[nwritten8]);
+  packsize_div2 = (info[DIFFERENTIAL_METAINFO_SIZE] - nwritten4);
 
 #else
-  nwritten = info[0];		/* In 128-bit registers */
-  bitpack = (__m128i *) &(bitpackcomp[nwritten*4]);
+  nwritten4 = info[0];		/* In 128-bit registers */
+  nwritten8 = 4 * (UINT8) nwritten4; /* In 32-bit words */
+  bitpack = (__m128i *) &(bitpackcomp[nwritten8]);
   /* packsize = (info[DIFFERENTIAL_METAINFO_SIZE] - nwritten)*2; */
-  packsize_div2 = (info[DIFFERENTIAL_METAINFO_SIZE] - nwritten);
+  packsize_div2 = (info[DIFFERENTIAL_METAINFO_SIZE] - nwritten4);
 #endif
 
   remainder = oligo % BLOCKSIZE;
@@ -14863,9 +14873,10 @@ Bitpack64_read_one (Oligospace_T oligo, UINT4 *bitpackptrs, UINT4 *bitpackcomp)
 UINT8
 Bitpack64_read_one_huge (Oligospace_T oligo, UINT4 *bitpackpages,
 			 UINT4 *bitpackptrs, UINT4 *bitpackcomp) {
-  UINT4 *pageptr;
+  /* UINT4 *pageptr; */
   Oligospace_T bmer;
-  UINT4 *info, nwritten, packsize_div2;
+  UINT4 *info, nwritten4, packsize_div2;
+  UINT8 nwritten8;
   int remainder, column, row;
 #if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
   UINT8 ptr;
@@ -14896,20 +14907,23 @@ Bitpack64_read_one_huge (Oligospace_T oligo, UINT4 *bitpackpages,
   debug(printf("Entered Bitpack64_read_one_huge with oligo %llu => bmer %u\n",oligo,bmer));
 
 #ifdef WORDS_BIGENDIAN
-  nwritten = Bigendian_convert_uint(info[0]);		/* In 128-bit registers */
-  bitpack = (UINT4 *) &(bitpackcomp[nwritten*4]);
-  packsize_div2 = (Bigendian_convert_uint(info[DIFFERENTIAL_METAINFO_SIZE]) - nwritten);
+  nwritten4 = Bigendian_convert_uint(info[0]);		/* In 128-bit registers */
+  nwritten8 = 4 * (UINT8) nwritten4;			/* In 32-bit words */
+  bitpack = (UINT4 *) &(bitpackcomp[nwritten8]);
+  packsize_div2 = (Bigendian_convert_uint(info[DIFFERENTIAL_METAINFO_SIZE]) - nwritten4);
 
 #elif !defined(HAVE_SSE2)
-  nwritten = info[0];		/* In 128-bit registers */
-  bitpack = (UINT4 *) &(bitpackcomp[nwritten*4]);
-  packsize_div2 = (info[DIFFERENTIAL_METAINFO_SIZE] - nwritten);
+  nwritten4 = info[0];		     /* In 128-bit registers */
+  nwritten8 = 4 * (UINT8) nwritten4; /* In 32-bit words */
+  bitpack = (UINT4 *) &(bitpackcomp[nwritten8]);
+  packsize_div2 = (info[DIFFERENTIAL_METAINFO_SIZE] - nwritten4);
 
 #else
-  nwritten = info[0];		/* In 128-bit registers */
-  bitpack = (__m128i *) &(bitpackcomp[nwritten*4]);
-  /* packsize = (info[DIFFERENTIAL_METAINFO_SIZE] - nwritten)*2; */
-  packsize_div2 = (info[DIFFERENTIAL_METAINFO_SIZE] - nwritten);
+  nwritten4 = info[0];		     /* In 128-bit registers */
+  nwritten8 = 4 * (UINT8) nwritten4; /* In 32-bit words */
+  bitpack = (__m128i *) &(bitpackcomp[nwritten8]);
+  /* packsize = (info[DIFFERENTIAL_METAINFO_SIZE] - nwritten4)*2; */
+  packsize_div2 = (info[DIFFERENTIAL_METAINFO_SIZE] - nwritten4);
 #endif
 
 
@@ -14929,19 +14943,19 @@ Bitpack64_read_one_huge (Oligospace_T oligo, UINT4 *bitpackpages,
 #ifdef WORDS_BIGENDIAN
     ptr = Bigendian_convert_uint(/*offset0*/info[1]);
     if (bitpackpages != NULL) {
-      pageptr = bitpackpages;
-      while (bmer >= Bigendian_convert_uint(*pageptr)) {
+      /* pageptr = bitpackpages; */
+      while (bmer >= Bigendian_convert_uint(*bitpackpages)) {
 	ptr += POSITIONS_PAGE;
-	pageptr++;
+	bitpackpages++;
       }
     }
 #else
     ptr = /*offset0*/ (UINT8) info[1];
     if (bitpackpages != NULL) {
-      pageptr = bitpackpages;
-      while (bmer >= *pageptr) {
+      /* pageptr = bitpackpages; */
+      while (bmer >= *bitpackpages) {
 	ptr += POSITIONS_PAGE;
-	pageptr++;
+	bitpackpages++;
       }
     }
 #endif
@@ -14950,19 +14964,19 @@ Bitpack64_read_one_huge (Oligospace_T oligo, UINT4 *bitpackpages,
 #ifdef WORDS_BIGENDIAN
     ptr = Bigendian_convert_uint(/*offset0*/info[1]);
     if (bitpackpages != NULL) {
-      pageptr = bitpackpages;
-      while (bmer >= Bigendian_convert_uint(*pageptr)) {
+      /* pageptr = bitpackpages; */
+      while (bmer >= Bigendian_convert_uint(*bitpackpages)) {
 	ptr += POSITIONS_PAGE;
-	pageptr++;
+	bitpackpages++;
       }
     }
 #else
     ptr = /*offset0*/ (UINT8) info[1];
     if (bitpackpages != NULL) {
-      pageptr = bitpackpages;
-      while (bmer >= *pageptr) {
+      /* pageptr = bitpackpages; */
+      while (bmer >= *bitpackpages) {
 	ptr += POSITIONS_PAGE;
-	pageptr++;
+	bitpackpages++;
       }
     }
 #endif
@@ -14982,19 +14996,19 @@ Bitpack64_read_one_huge (Oligospace_T oligo, UINT4 *bitpackpages,
 #ifdef WORDS_BIGENDIAN
     ptr = Bigendian_convert_uint(/*offset0*/info[1]);
     if (bitpackpages != NULL) {
-      pageptr = bitpackpages;
-      while (bmer >= Bigendian_convert_uint(*pageptr)) {
+      /* pageptr = bitpackpages; */
+      while (bmer >= Bigendian_convert_uint(*bitpackpages)) {
 	ptr += POSITIONS_PAGE;
-	pageptr++;
+	bitpackpages++;
       }
     }
 #else
     ptr = /*offset0*/ (UINT8) info[1];
     if (bitpackpages != NULL) {
-      pageptr = bitpackpages;
-      while (bmer >= *pageptr) {
+      /* pageptr = bitpackpages; */
+      while (bmer >= *bitpackpages) {
 	ptr += POSITIONS_PAGE;
-	pageptr++;
+	bitpackpages++;
       }
     }
 #endif
@@ -15019,19 +15033,19 @@ Bitpack64_read_one_huge (Oligospace_T oligo, UINT4 *bitpackpages,
 #ifdef WORDS_BIGENDIAN
     ptr = Bigendian_convert_uint(/*offset1*/info[DIFFERENTIAL_METAINFO_SIZE+1]);
     if (bitpackpages != NULL) {
-      pageptr = bitpackpages;
-      while (bmer + 1 >= Bigendian_convert_uint(*pageptr)) {
+      /* pageptr = bitpackpages; */
+      while (bmer + 1 >= Bigendian_convert_uint(*bitpackpages)) {
 	ptr += POSITIONS_PAGE;
-	pageptr++;
+	bitpackpages++;
       }
     }
 #else
     ptr = /*offset1*/ (UINT8) info[DIFFERENTIAL_METAINFO_SIZE+1];
     if (bitpackpages != NULL) {
-      pageptr = bitpackpages;
-      while (bmer + 1 >= *pageptr) {
+      /* pageptr = bitpackpages; */
+      while (bmer + 1 >= *bitpackpages) {
 	ptr += POSITIONS_PAGE;
-	pageptr++;
+	bitpackpages++;
       }
     }
 #endif
@@ -15056,19 +15070,19 @@ Bitpack64_read_one_huge (Oligospace_T oligo, UINT4 *bitpackpages,
 #ifdef WORDS_BIGENDIAN
     ptr = Bigendian_convert_uint(/*offset1*/info[DIFFERENTIAL_METAINFO_SIZE+1]);
     if (bitpackpages != NULL) {
-      pageptr = bitpackpages;
-      while (bmer + 1 >= Bigendian_convert_uint(*pageptr)) {
+      /* pageptr = bitpackpages; */
+      while (bmer + 1 >= Bigendian_convert_uint(*bitpackpages)) {
 	ptr += POSITIONS_PAGE;
-	pageptr++;
+	bitpackpages++;
       }
     }
 #else
     ptr = /*offset1*/(UINT8) info[DIFFERENTIAL_METAINFO_SIZE+1];
     if (bitpackpages != NULL) {
-      pageptr = bitpackpages;
-      while (bmer + 1 >= *pageptr) {
+      /* pageptr = bitpackpages; */
+      while (bmer + 1 >= *bitpackpages) {
 	ptr += POSITIONS_PAGE;
-	pageptr++;
+	bitpackpages++;
       }
     }
 #endif
@@ -15097,16 +15111,16 @@ Bitpack64_read_one_huge (Oligospace_T oligo, UINT4 *bitpackpages,
     offset1 = info[DIFFERENTIAL_METAINFO_SIZE+1];
   } else {
     offset0 = 0UL;
-    pageptr = bitpackpages;
-    while (bmer >= *pageptr) {
+    /* pageptr = bitpackpages; */
+    while (bmer >= *bitpackpages) {
       offset0 += POSITIONS_PAGE;
-      pageptr++;
+      bitpackpages++;
     }
 
     offset1 = offset0;
-    if (bmer + 1 >= *pageptr) {
+    if (bmer + 1 >= *bitpackpages) {
       offset1 += POSITIONS_PAGE;
-      pageptr++;
+      bitpackpages++;
     }
 
     offset0 += info[1];
@@ -15139,10 +15153,10 @@ Bitpack64_read_one_huge (Oligospace_T oligo, UINT4 *bitpackpages,
   if (quarter_block <= 1) {
     offset0 = (UINT8) info[1];
     if (bitpackpages != NULL) {
-      pageptr = bitpackpages;
-      while (bmer >= *pageptr) {
+      /* pageptr = bitpackpages; */
+      while (bmer >= *bitpackpages) {
 	offset0 += POSITIONS_PAGE;
-	pageptr++;
+	bitpackpages++;
       }
     }
 
@@ -15168,10 +15182,10 @@ Bitpack64_read_one_huge (Oligospace_T oligo, UINT4 *bitpackpages,
   } else {
     offset1 = (UINT8) info[DIFFERENTIAL_METAINFO_SIZE+1];
     if (bitpackpages != NULL) {
-      pageptr = bitpackpages;
-      while (bmer + 1 >= *pageptr) {
+      /* pageptr = bitpackpages; */
+      while (bmer + 1 >= *bitpackpages) {
 	offset1 += POSITIONS_PAGE;
-	pageptr++;
+	bitpackpages++;
       }
     }
 
@@ -15204,7 +15218,8 @@ Bitpack64_read_one_huge (Oligospace_T oligo, UINT4 *bitpackpages,
 void
 Bitpack64_block_offsets (UINT4 *offsets, Oligospace_T oligo,
 			 UINT4 *bitpackptrs, UINT4 *bitpackcomp) {
-  UINT4 *info, nwritten;
+  UINT4 *info, nwritten4;
+  UINT8 nwritten8;
   UINT4 offset0, offset1, temp;
   int packsize, k;
 #if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
@@ -15221,31 +15236,34 @@ Bitpack64_block_offsets (UINT4 *offsets, Oligospace_T oligo,
 
   info = &(bitpackptrs[oligo/BLOCKSIZE * DIFFERENTIAL_METAINFO_SIZE]);
 #ifdef WORDS_BIGENDIAN
-  nwritten = Bigendian_convert_uint(info[0]);
-  bitpack = (UINT4 *) &(bitpackcomp[nwritten*4]);
+  nwritten4 = Bigendian_convert_uint(info[0]); /* In 128-bit registers */
+  nwritten8 = 4 * (UINT8) nwritten4; /* In 32-bit words */
+  bitpack = (UINT4 *) &(bitpackcomp[nwritten8]);
   offset0 = Bigendian_convert_uint(info[1]);
   offset1 = Bigendian_convert_uint(info[DIFFERENTIAL_METAINFO_SIZE+1]);
-  packsize = (Bigendian_convert_uint(info[DIFFERENTIAL_METAINFO_SIZE]) - nwritten)*2;
+  packsize = (Bigendian_convert_uint(info[DIFFERENTIAL_METAINFO_SIZE]) - nwritten4)*2;
 
 #elif !defined(HAVE_SSE2)
-  nwritten = info[0];		/* In 128-bit registers */
-  bitpack = (UINT4 *) &(bitpackcomp[nwritten*4]);
+  nwritten4 = info[0];		     /* In 128-bit registers */
+  nwritten8 = 4 * (UINT8) nwritten4; /* In 32-bit words */
+  bitpack = (UINT4 *) &(bitpackcomp[nwritten8]);
   offset0 = info[1];
   offset1 = info[DIFFERENTIAL_METAINFO_SIZE+1];
-  packsize = (info[DIFFERENTIAL_METAINFO_SIZE] - nwritten)*2;
+  packsize = (info[DIFFERENTIAL_METAINFO_SIZE] - nwritten4)*2;
 
 #else
-  nwritten = info[0];		/* In 128-bit registers */
-  bitpack = (__m128i *) &(bitpackcomp[nwritten*4]);
+  nwritten4 = info[0];		     /* In 128-bit registers */
+  nwritten8 = 4 * (UINT8) nwritten4; /* In 32-bit words */
+  bitpack = (__m128i *) &(bitpackcomp[nwritten8]);
   offset0 = info[1];
   offset1 = info[DIFFERENTIAL_METAINFO_SIZE+1];
-  packsize = (info[DIFFERENTIAL_METAINFO_SIZE] - nwritten)*2;
+  packsize = (info[DIFFERENTIAL_METAINFO_SIZE] - nwritten4)*2;
 #endif
 
 
 #ifdef DEBUG
   printf("oligo: %08X, nwritten %u, offset0 %u, offset1 %u, packsize %d\n",
-	 oligo,nwritten,offset0,offset1,packsize);
+	 oligo,nwritten4,offset0,offset1,packsize);
 #endif
 
 
@@ -15360,7 +15378,8 @@ void
 Bitpack64_block_offsets_huge (UINT8 *offsets, Oligospace_T oligo,
 			      UINT4 *bitpackpages, UINT4 *bitpackptrs, UINT4 *bitpackcomp) {
   UINT4 *pageptr;
-  UINT4 *info, nwritten;
+  UINT4 *info, nwritten4;
+  UINT8 nwritten8;
   Oligospace_T bmer;
   UINT8 offset0, offset1, temp;
   int packsize, k;
@@ -15380,16 +15399,19 @@ Bitpack64_block_offsets_huge (UINT8 *offsets, Oligospace_T oligo,
   info = &(bitpackptrs[bmer * DIFFERENTIAL_METAINFO_SIZE]);
 
 #ifdef WORDS_BIGENDIAN
-  nwritten = Bigendian_convert_uint(info[0]); /* In 128-bit registers */
-  bitpack = (UINT4 *) &(bitpackcomp[nwritten*4]);
+  nwritten4 = Bigendian_convert_uint(info[0]); /* In 128-bit registers */
+  nwritten8 = 4 * (UINT8) nwritten4;			/* In 32-bit words */
+  bitpack = (UINT4 *) &(bitpackcomp[nwritten8]);
 
 #elif !defined(HAVE_SSE2)
-  nwritten = info[0];		/* In 128-bit registers */
-  bitpack = (UINT4 *) &(bitpackcomp[nwritten*4]);
+  nwritten4 = info[0];		/* In 128-bit registers */
+  nwritten8 = 4 * (UINT8) nwritten4;			/* In 32-bit words */
+  bitpack = (UINT4 *) &(bitpackcomp[nwritten8]);
 
 #else
-  nwritten = info[0];		/* In 128-bit registers */
-  bitpack = (__m128i *) &(bitpackcomp[nwritten*4]);
+  nwritten4 = info[0];		/* In 128-bit registers */
+  nwritten8 = 4 * (UINT8) nwritten4;			/* In 32-bit words */
+  bitpack = (__m128i *) &(bitpackcomp[nwritten8]);
 #endif
 
 #ifdef WORDS_BIGENDIAN
@@ -15422,17 +15444,17 @@ Bitpack64_block_offsets_huge (UINT8 *offsets, Oligospace_T oligo,
 #ifdef WORDS_BIGENDIAN
   offset0 += Bigendian_convert_uint(info[1]);
   offset1 += Bigendian_convert_uint(info[DIFFERENTIAL_METAINFO_SIZE+1]);
-  packsize = (Bigendian_convert_uint(info[DIFFERENTIAL_METAINFO_SIZE]) - nwritten)*2;
+  packsize = (Bigendian_convert_uint(info[DIFFERENTIAL_METAINFO_SIZE]) - nwritten4)*2;
 #else
   offset0 += info[1];
   offset1 += info[DIFFERENTIAL_METAINFO_SIZE+1];
-  packsize = (info[DIFFERENTIAL_METAINFO_SIZE] - nwritten)*2;
+  packsize = (info[DIFFERENTIAL_METAINFO_SIZE] - nwritten4)*2;
 #endif
 
 
 #ifdef DEBUG
   printf("oligo: %08X, nwritten %u, offset0 %u, offset1 %u, packsize %d\n",
-	 oligo,nwritten,offset0,offset1,packsize);
+	 oligo,nwritten4,offset0,offset1,packsize);
 #endif
 
 
@@ -15540,3 +15562,22 @@ Bitpack64_block_offsets_huge (UINT8 *offsets, Oligospace_T oligo,
   return;
 }
 #endif
+
+
+
+int
+Bitpack64_differential_packsize (Oligospace_T oligo, UINT4 *bitpackptrs) {
+  UINT4 *info, nwritten4;
+
+  info = &(bitpackptrs[oligo/BLOCKSIZE * DIFFERENTIAL_METAINFO_SIZE]);
+
+#ifdef WORDS_BIGENDIAN
+  nwritten4 = Bigendian_convert_uint(info[0]); /* In 128-bit registers */
+  return (Bigendian_convert_uint(info[DIFFERENTIAL_METAINFO_SIZE]) - nwritten4)*2;
+
+#else
+  nwritten4 = info[0];		/* In 128-bit registers */
+  return (info[DIFFERENTIAL_METAINFO_SIZE] - nwritten4)*2;
+#endif
+}
+
diff --git a/src/bitpack64-read.h b/src/bitpack64-read.h
index 2a40624..5b7766e 100644
--- a/src/bitpack64-read.h
+++ b/src/bitpack64-read.h
@@ -27,4 +27,7 @@ Bitpack64_block_offsets_huge (UINT8 *offsets, Oligospace_T oligo,
 			      UINT4 *bitpackpages, UINT4 *bitpackptrs, UINT4 *bitpackcomp);
 #endif
 
+extern int
+Bitpack64_differential_packsize (Oligospace_T oligo, UINT4 *bitpackptrs);
+
 #endif
diff --git a/src/bitpack64-readtwo.c b/src/bitpack64-readtwo.c
index b4ad85b..5fb7fe4 100644
--- a/src/bitpack64-readtwo.c
+++ b/src/bitpack64-readtwo.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: bitpack64-readtwo.c 184456 2016-02-18 00:05:04Z twu $";
+static char rcsid[] = "$Id: bitpack64-readtwo.c 199469 2016-10-23 03:49:07Z twu $";
 #ifdef HAVE_CONFIG_H
 #include <config.h>
 #endif
@@ -14208,7 +14208,8 @@ static Unpacker_T unpacker_table[17][17] =
 UINT4
 Bitpack64_read_two (UINT4 *end0, Oligospace_T oligo, UINT4 *bitpackptrs, UINT4 *bitpackcomp) {
   Oligospace_T bmer;
-  UINT4 *info, nwritten, packsize_div2;
+  UINT4 *info, nwritten4, packsize_div2;
+  UINT8 nwritten8;
   int remainder0, column;
 #if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
   UINT4 offset0, offset1;
@@ -14236,20 +14237,23 @@ Bitpack64_read_two (UINT4 *end0, Oligospace_T oligo, UINT4 *bitpackptrs, UINT4 *
   debug(printf("Entered Bitpack64_read_two with oligo %llu => bmer %u\n",oligo,bmer));
 
 #ifdef WORDS_BIGENDIAN
-  nwritten = Bigendian_convert_uint(info[0]);		/* In 128-bit registers */
-  bitpack = (UINT4 *) &(bitpackcomp[nwritten*4]);
-  packsize_div2 = (Bigendian_convert_uint(info[METAINFO_SIZE]) - nwritten);
+  nwritten4 = Bigendian_convert_uint(info[0]); /* In 128-bit registers */
+  nwritten8 = 4 * (UINT8) nwritten4;	       /* In 32-bit words */
+  bitpack = (UINT4 *) &(bitpackcomp[nwritten8]);
+  packsize_div2 = (Bigendian_convert_uint(info[METAINFO_SIZE]) - nwritten4);
 
 #elif !defined(HAVE_SSE2)
-  nwritten = info[0];		/* In 128-bit registers */
-  bitpack = (UINT4 *) &(bitpackcomp[nwritten*4]);
-  packsize_div2 = (info[METAINFO_SIZE] - nwritten);
+  nwritten4 = info[0];		/* In 128-bit registers */
+  nwritten8 = 4 * (UINT8) nwritten4;	       /* In 32-bit words */
+  bitpack = (UINT4 *) &(bitpackcomp[nwritten8]);
+  packsize_div2 = (info[METAINFO_SIZE] - nwritten4);
 
 #else
-  nwritten = info[0];		/* In 128-bit registers */
-  bitpack = (__m128i *) &(bitpackcomp[nwritten*4]);
-  /* packsize = (info[METAINFO_SIZE] - nwritten)*2; */
-  packsize_div2 = (info[METAINFO_SIZE] - nwritten);
+  nwritten4 = info[0];		     /* In 128-bit registers */
+  nwritten8 = 4 * (UINT8) nwritten4; /* In 32-bit words */
+  bitpack = (__m128i *) &(bitpackcomp[nwritten8]);
+  /* packsize = (info[METAINFO_SIZE] - nwritten4)*2; */
+  packsize_div2 = (info[METAINFO_SIZE] - nwritten4);
 #endif
 
   remainder0 = oligo % BLOCKSIZE;
@@ -14522,7 +14526,8 @@ UINT8
 Bitpack64_read_two_huge (UINT8 *end0, Oligospace_T oligo,
 			 UINT4 *bitpackpages, UINT4 *bitpackptrs, UINT4 *bitpackcomp) {
   Oligospace_T bmer;
-  UINT4 *info, nwritten;
+  UINT4 *info, nwritten4;
+  UINT8 nwritten8;
   UINT8 offset0, offset1;
   UINT4 packsize_div2;
   int remainder0, column;
@@ -14556,20 +14561,23 @@ Bitpack64_read_two_huge (UINT8 *end0, Oligospace_T oligo,
   debug(printf("Entered Bitpack64_read_two_huge with oligo %u => bmer %u\n",oligo,bmer));
 
 #ifdef WORDS_BIGENDIAN
-  nwritten = Bigendian_convert_uint(info[0]);
-  bitpack = (UINT4 *) &(bitpackcomp[nwritten*4]);
+  nwritten4 = Bigendian_convert_uint(info[0]); /* In 128-bit registers */
+  nwritten8 = 4 * (UINT8) nwritten4; /* In 32-bit words */
+  bitpack = (UINT4 *) &(bitpackcomp[nwritten8]);
   offset0 = (UINT8) Bigendian_convert_uint(info[1]);
   offset1 = (UINT8) Bigendian_convert_uint(info[METAINFO_SIZE+1]);
 
 #elif !defined(HAVE_SSE2)
-  nwritten = info[0];
-  bitpack = (UINT4 *) &(bitpackcomp[nwritten*4]);
+  nwritten4 = info[0];		/* In 128-bit registers */
+  nwritten8 = 4 * (UINT8) nwritten4; /* In 32-bit words */
+  bitpack = (UINT4 *) &(bitpackcomp[nwritten8]);
   offset0 = (UINT8) info[1];
   offset1 = (UINT8) info[METAINFO_SIZE+1];
 
 #else
-  nwritten = info[0];
-  bitpack = (__m128i *) &(bitpackcomp[nwritten*4]);
+  nwritten4 = info[0];		/* In 128-bit registers */
+  nwritten8 = 4 * (UINT8) nwritten4; /* In 32-bit words */
+  bitpack = (__m128i *) &(bitpackcomp[nwritten8]);
   offset0 = (UINT8) info[1];
   offset1 = (UINT8) info[METAINFO_SIZE+1];
 #endif
@@ -14592,7 +14600,7 @@ Bitpack64_read_two_huge (UINT8 *end0, Oligospace_T oligo,
     }
   }
   debug(printf("offsets are %llu, %llu\n",offset0,offset1));
-  packsize_div2 = (Bigendian_convert_uint(info[METAINFO_SIZE]) - nwritten);
+  packsize_div2 = (Bigendian_convert_uint(info[METAINFO_SIZE]) - nwritten4);
 
 #else
   if (bitpackpages != NULL) {
@@ -14610,7 +14618,7 @@ Bitpack64_read_two_huge (UINT8 *end0, Oligospace_T oligo,
     }
   }
   debug(printf("offsets are %llu, %llu\n",offset0,offset1));
-  packsize_div2 = (info[METAINFO_SIZE] - nwritten);
+  packsize_div2 = (info[METAINFO_SIZE] - nwritten4);
 #endif
 
   remainder0 = oligo % BLOCKSIZE;
diff --git a/src/bitpack64-write.c b/src/bitpack64-write.c
index 9f2e7d9..e5f54ab 100644
--- a/src/bitpack64-write.c
+++ b/src/bitpack64-write.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: bitpack64-write.c 183888 2016-02-05 20:33:29Z twu $";
+static char rcsid[] = "$Id: bitpack64-write.c 199471 2016-10-23 03:54:54Z twu $";
 #ifdef HAVE_CONFIG_H
 #include <config.h>
 #endif
@@ -4590,8 +4590,9 @@ compute_q4_diffs_bidir_huge (UINT4 *diffs, UINT8 *values) {
 void
 Bitpack64_write_differential (char *ptrsfile, char *compfile, UINT4 *ascending, Oligospace_T n) {
   FILE *ptrs_fp, *comp_fp;
-  UINT4 *ptrs;
-  int ptri, i;
+  UINT4 *ptrs, *p;
+  size_t nptrs;
+  int i;
   Oligospace_T positioni;
 
   /* Buffer is used to avoid frequent writes to the file */
@@ -4612,8 +4613,7 @@ Bitpack64_write_differential (char *ptrsfile, char *compfile, UINT4 *ascending,
   /* 2 metavalues: nwritten (pointer) and cumulative sum for block.
      Packsize can be computed from difference between successive
      pointers, if only even packsizes are allowed */
-  ptrs = (UINT4 *) CALLOC(((n + BLOCKSIZE)/BLOCKSIZE + 1) * DIFFERENTIAL_METAINFO_SIZE,sizeof(UINT4));
-  ptri = 0;
+  p = ptrs = (UINT4 *) CALLOC(((n + BLOCKSIZE)/BLOCKSIZE + 1) * DIFFERENTIAL_METAINFO_SIZE,sizeof(UINT4));
 
   if ((comp_fp = FOPEN_WRITE_BINARY(compfile)) == NULL) {
     fprintf(stderr,"Can't write to file %s\n",compfile);
@@ -4628,10 +4628,10 @@ Bitpack64_write_differential (char *ptrsfile, char *compfile, UINT4 *ascending,
   /* Use <= n instead of < n, because we want ascending[n] to be taken care of by unpack_00, not a check for remainder == 0 */
   for (positioni = 0; positioni + BLOCKSIZE <= n; positioni += BLOCKSIZE) {
     /* Pointer */
-    ptrs[ptri++] = nwritten/4;	/* In 128-bit registers */
+    *p++ = nwritten/4;	/* In 128-bit registers */
 
     /* Value for start of block */
-    ptrs[ptri++] = ascending[positioni];
+    *p++ = ascending[positioni];
 	
     /* Pack block of 64 diffs */
     packsize = Bitpack64_compute_q4_diffs_bidir(diffs,&(ascending[positioni]));
@@ -4648,10 +4648,10 @@ Bitpack64_write_differential (char *ptrsfile, char *compfile, UINT4 *ascending,
   /* Use <= n instead of < n, because we want ascending[n] to be taken care of by unpack_00, not a check for remainder == 0 */
   if (positioni <= n) {
     /* Finish last block of 64 */
-    ptrs[ptri++] = nwritten/4;	/* In 128-bit registers */
+    *p++ = nwritten/4;	/* In 128-bit registers */
 
     /* Value for start of block */
-    ptrs[ptri++] = ascending[positioni];
+    *p++ = ascending[positioni];
 
     /* For differential, want <=.  For direct, want < */
     for (i = 0; i <= (int) (n - positioni); i++) {
@@ -4675,16 +4675,17 @@ Bitpack64_write_differential (char *ptrsfile, char *compfile, UINT4 *ascending,
 
 
   /* Write the final pointer, which will point after the end of the file */
-  ptrs[ptri++] = nwritten/4;	/* In 128-bit registers */
+  *p++ = nwritten/4;	/* In 128-bit registers */
 
   /* Value for end of block */
-  ptrs[ptri++] = ascending[n];
+  *p++ = ascending[n];
 
   if ((ptrs_fp = FOPEN_WRITE_BINARY(ptrsfile)) == NULL) {
     fprintf(stderr,"Can't write to file %s\n",ptrsfile);
     exit(9);
   } else {
-    FWRITE_UINTS(ptrs,ptri,ptrs_fp);
+    nptrs = p - ptrs;
+    FWRITE_UINTS(ptrs,nptrs,ptrs_fp);
     FREE(ptrs);
     fclose(ptrs_fp);
   }
@@ -4720,9 +4721,9 @@ void
 Bitpack64_write_differential_bitpacks (char *ptrsfile, char *compfile, char *packsizes, UINT4 **bitpacks,
 				       Oligospace_T n) {
   FILE *ptrs_fp, *comp_fp;
-  UINT4 *ptrs, nregisters;
+  UINT4 *ptrs, *p, nregisters;
   UINT4 totalcount;
-  UINT4 ptri;
+  size_t nptrs;
   int i;
   Oligospace_T positioni, bmer;
 
@@ -4742,8 +4743,7 @@ Bitpack64_write_differential_bitpacks (char *ptrsfile, char *compfile, char *pac
   /* 2 metavalues: nwritten (pointer) and cumulative sum for block.
      Packsize can be computed from difference between successive
      pointers, if only even packsizes are allowed */
-  ptrs = (UINT4 *) CALLOC(((n + BLOCKSIZE)/BLOCKSIZE + 1) * DIFFERENTIAL_METAINFO_SIZE,sizeof(UINT4));
-  ptri = 0;
+  p = ptrs = (UINT4 *) CALLOC(((n + BLOCKSIZE)/BLOCKSIZE + 1) * DIFFERENTIAL_METAINFO_SIZE,sizeof(UINT4));
 
   if ((comp_fp = FOPEN_WRITE_BINARY(compfile)) == NULL) {
     fprintf(stderr,"Can't write to file %s\n",compfile);
@@ -4759,10 +4759,10 @@ Bitpack64_write_differential_bitpacks (char *ptrsfile, char *compfile, char *pac
   totalcount = 0;
   for (positioni = 0, bmer = 0; positioni + BLOCKSIZE <= n; positioni += BLOCKSIZE, bmer++) {
     /* Pointer */
-    ptrs[ptri++] = nregisters;	/* In 128-bit registers */
+    *p++ = nregisters;	/* In 128-bit registers */
 
     /* Value for start of block */
-    ptrs[ptri++] = totalcount;
+    *p++ = totalcount;
 	
     /* Pack block of 64 diffs */
     Bitpack64_extract_bitpack(counts,packsizes[bmer],bitpacks[bmer]);
@@ -4778,13 +4778,13 @@ Bitpack64_write_differential_bitpacks (char *ptrsfile, char *compfile, char *pac
   /* For nucleotides, expect a single final block where positioni == n */
   if (positioni <= n) {
     /* Finish last block of 64 */
-    ptrs[ptri++] = nregisters;	/* In 128-bit registers */
+    *p++ = nregisters;	/* In 128-bit registers */
 
     /* Value for start of block */
-    ptrs[ptri++] = totalcount;
+    *p++ = totalcount;
 
     if (positioni == n) {
-      /* Don't have a bitpack at [bmerspace] */
+      /* Don't have a bitpack at [bmerspace].  Just fills counts with zeroes. */
       Bitpack64_extract_bitpack(counts,/*packsize*/0,/*bitpack*/NULL);
     } else {
       Bitpack64_extract_bitpack(counts,packsizes[bmer],bitpacks[bmer]);
@@ -4808,16 +4808,17 @@ Bitpack64_write_differential_bitpacks (char *ptrsfile, char *compfile, char *pac
   }
 
   /* Write the final pointer, which will point after the end of the file */
-  ptrs[ptri++] = nregisters;	/* In 128-bit registers */
+  *p++ = nregisters;	/* In 128-bit registers */
 
   /* Value for end of block */
-  ptrs[ptri++] = totalcount;
+  *p++ = totalcount;
 
   if ((ptrs_fp = FOPEN_WRITE_BINARY(ptrsfile)) == NULL) {
     fprintf(stderr,"Can't write to file %s\n",ptrsfile);
     exit(9);
   } else {
-    FWRITE_UINTS(ptrs,ptri,ptrs_fp);
+    nptrs = p - ptrs;
+    FWRITE_UINTS(ptrs,nptrs,ptrs_fp);
     FREE(ptrs);
     fclose(ptrs_fp);
   }
@@ -4841,8 +4842,9 @@ Bitpack64_write_differential_bitpacks (char *ptrsfile, char *compfile, char *pac
 void
 Bitpack64_write_differential_paired (char *ptrsfile, char *compfile, UINT4 *ascending, Oligospace_T n) {
   FILE *ptrs_fp, *comp_fp;
-  UINT4 *ptrs;
-  int ptri, i;
+  UINT4 *ptrs, *p;
+  size_t nptrs;
+  int i;
   Oligospace_T positioni;
 
   /* Buffer is used to avoid frequent writes to the file */
@@ -4861,8 +4863,7 @@ Bitpack64_write_differential_paired (char *ptrsfile, char *compfile, UINT4 *asce
   /* 2 metavalues: nwritten (pointer) and cumulative sum for block.
      Packsize can be computed from difference between successive
      pointers, if only even packsizes are allowed */
-  ptrs = (UINT4 *) CALLOC(((n + BLOCKSIZE)/BLOCKSIZE + 1) * PAIRED_METAINFO_SIZE,sizeof(UINT4));
-  ptri = 0;
+  p = ptrs = (UINT4 *) CALLOC(((n + BLOCKSIZE)/BLOCKSIZE + 1) * PAIRED_METAINFO_SIZE,sizeof(UINT4));
 
   if ((comp_fp = FOPEN_WRITE_BINARY(compfile)) == NULL) {
     fprintf(stderr,"Can't write to file %s\n",compfile);
@@ -4877,10 +4878,10 @@ Bitpack64_write_differential_paired (char *ptrsfile, char *compfile, UINT4 *asce
   /* Use <= n instead of < n, because we want ascending[n] to be taken care of by unpack_00, not a check for remainder == 0 */
   for (positioni = 0; positioni + BLOCKSIZE <= n; positioni += BLOCKSIZE) {
     /* Pointer to D4 */
-    ptrs[ptri++] = nwritten/4;	/* In 128-bit registers */
+    *p++ = nwritten/4;	/* In 128-bit registers */
 
     /* Prefix sum for start of block */
-    ptrs[ptri++] = ascending[positioni];
+    *p++ = ascending[positioni];
 	
     /* D4: Pack block of 64 diffs */
     packsize = Bitpack64_compute_q4_diffs_bidir(diffs,&(ascending[positioni]));
@@ -4893,7 +4894,7 @@ Bitpack64_write_differential_paired (char *ptrsfile, char *compfile, UINT4 *asce
 #endif
 
     /* Pointer to D1 */
-    ptrs[ptri++] = nwritten/4;	/* In 128-bit registers */
+    *p++ = nwritten/4;	/* In 128-bit registers */
 
     /* D1: Pack block of 64 diffs */
     packsize = compute_q1_diffs(diffs,&(ascending[positioni]));
@@ -4911,10 +4912,10 @@ Bitpack64_write_differential_paired (char *ptrsfile, char *compfile, UINT4 *asce
   if (positioni <= n) {
     /* Finish last block of 64 */
     /* Pointer to D4 */
-    ptrs[ptri++] = nwritten/4;	/* In 128-bit registers */
+    *p++ = nwritten/4;	/* In 128-bit registers */
 
     /* Prefix sum for start of block */
-    ptrs[ptri++] = ascending[positioni];
+    *p++ = ascending[positioni];
 
     /* For differential, want <=.  For direct, want < */
     for (i = 0; i <= (int) (n - positioni); i++) {
@@ -4936,7 +4937,7 @@ Bitpack64_write_differential_paired (char *ptrsfile, char *compfile, UINT4 *asce
 #endif
 
     /* Pointer to D1 */
-    ptrs[ptri++] = nwritten/4;	/* In 128-bit registers */
+    *p++ = nwritten/4;	/* In 128-bit registers */
 
     /* D1: Pack block of < 64 diffs */
     packsize = compute_q1_diffs(diffs,last_block);
@@ -4944,16 +4945,17 @@ Bitpack64_write_differential_paired (char *ptrsfile, char *compfile, UINT4 *asce
   }
 
   /* Write the final pointer, which will point after the end of the file */
-  ptrs[ptri++] = nwritten/4;	/* In 128-bit registers */
+  *p++ = nwritten/4;	/* In 128-bit registers */
 
   /* Prefix sum for end of block */
-  ptrs[ptri++] = ascending[n];
+  *p++ = ascending[n];
 
   if ((ptrs_fp = FOPEN_WRITE_BINARY(ptrsfile)) == NULL) {
     fprintf(stderr,"Can't write to file %s\n",ptrsfile);
     exit(9);
   } else {
-    FWRITE_UINTS(ptrs,ptri,ptrs_fp);
+    nptrs = p - ptrs;
+    FWRITE_UINTS(ptrs,nptrs,ptrs_fp);
     FREE(ptrs);
     fclose(ptrs_fp);
   }
@@ -4990,7 +4992,8 @@ Bitpack64_write_fixed10 (char *ptrsfile, char *compfile, UINT4 *ascending, Oligo
 #endif
   FILE *comp_fp;
   UINT4 *ptrs;
-  int ptri, i;
+  UINT4 ptri;
+  int i;
   Oligospace_T positioni;
 
   /* Buffer is used to avoid frequent writes to the file */
@@ -5127,8 +5130,8 @@ Bitpack64_write_differential_huge (char *pagesfile, char *ptrsfile, char *compfi
   UINT8 currpage, nextpage;
   FILE *pages_fp, *ptrs_fp, *comp_fp;
   UINT4 pages[25];	/* Allows us to handle up to 100 billion positions */
-  UINT4 *ptrs;
-  int ptri;
+  UINT4 *ptrs, *p;
+  size_t nptrs;
   Oligospace_T positioni;
 
   /* Buffer is used to avoid frequent writes to the file */
@@ -5149,8 +5152,7 @@ Bitpack64_write_differential_huge (char *pagesfile, char *ptrsfile, char *compfi
   /* 2 metavalues: nwritten (pointer) and cumulative sum for block.
      Packsize can be computed from difference between successive
      pointers, if only even packsizes are allowed */
-  ptrs = (UINT4 *) CALLOC(((n + BLOCKSIZE)/BLOCKSIZE + 1) * DIFFERENTIAL_METAINFO_SIZE,sizeof(UINT4));
-  ptri = 0;
+  p = ptrs = (UINT4 *) CALLOC(((n + BLOCKSIZE)/BLOCKSIZE + 1) * DIFFERENTIAL_METAINFO_SIZE,sizeof(UINT4));
 
   if ((comp_fp = FOPEN_WRITE_BINARY(compfile)) == NULL) {
     fprintf(stderr,"Can't write to file %s\n",compfile);
@@ -5167,7 +5169,7 @@ Bitpack64_write_differential_huge (char *pagesfile, char *ptrsfile, char *compfi
   /* Use <= n instead of < n, because we want ascending[n] to be taken care of by unpack_00, not a check for remainder == 0 */
   for (positioni = 0; positioni + BLOCKSIZE <= n; positioni += BLOCKSIZE) {
     /* Pointer */
-    ptrs[ptri++] = nwritten/4;	/* In 128-bit registers */
+    *p++ = nwritten/4;	/* In 128-bit registers */
 
     /* Value for start of block */
     while (ascending[positioni] >= nextpage) {
@@ -5177,7 +5179,7 @@ Bitpack64_write_differential_huge (char *pagesfile, char *ptrsfile, char *compfi
       currpage = nextpage;
       nextpage += POSITIONS_PAGE;
     }
-    ptrs[ptri++] = ascending[positioni] - currpage;
+    *p++ = ascending[positioni] - currpage;
 
 	
     /* Pack block of 64 diffs */
@@ -5195,7 +5197,7 @@ Bitpack64_write_differential_huge (char *pagesfile, char *ptrsfile, char *compfi
   /* Use <= n instead of < n, because we want ascending[n] to be taken care of by unpack_00, not a check for remainder == 0 */
   if (positioni <= n) {
     /* Finish last block of 64 */
-    ptrs[ptri++] = nwritten/4;	/* In 128-bit registers */
+    *p++ = nwritten/4;	/* In 128-bit registers */
 
     /* Value for start of block */
     while (ascending[positioni] >= nextpage) {
@@ -5205,7 +5207,7 @@ Bitpack64_write_differential_huge (char *pagesfile, char *ptrsfile, char *compfi
       currpage = nextpage;
       nextpage += POSITIONS_PAGE;
     }
-    ptrs[ptri++] = ascending[positioni] - currpage;
+    *p++ = ascending[positioni] - currpage;
 
     /* For differential, want <=.  For direct, want < */
     for (i = 0; i <= (int) (n - positioni); i++) {
@@ -5229,7 +5231,7 @@ Bitpack64_write_differential_huge (char *pagesfile, char *ptrsfile, char *compfi
 
 
   /* Write the final pointer, which will point after the end of the file */
-  ptrs[ptri++] = nwritten/4;	/* In 128-bit registers */
+  *p++ = nwritten/4;	/* In 128-bit registers */
 
   /* Value for end of block */
   if (ascending[n] >= nextpage) {
@@ -5239,7 +5241,7 @@ Bitpack64_write_differential_huge (char *pagesfile, char *ptrsfile, char *compfi
     currpage = nextpage;
     /* nextpage += POSITIONS_PAGE; */
   }
-  ptrs[ptri++] = ascending[n] - currpage;
+  *p++ = ascending[n] - currpage;
 
 
   /* Write pages */
@@ -5264,7 +5266,8 @@ Bitpack64_write_differential_huge (char *pagesfile, char *ptrsfile, char *compfi
     fprintf(stderr,"Can't write to file %s\n",ptrsfile);
     exit(9);
   } else {
-    FWRITE_UINTS(ptrs,ptri,ptrs_fp);
+    nptrs = p - ptrs;
+    FWRITE_UINTS(ptrs,nptrs,ptrs_fp);
     FREE(ptrs);
     fclose(ptrs_fp);
   }
@@ -5286,10 +5289,10 @@ Bitpack64_write_differential_huge_bitpacks (char *pagesfile, char *ptrsfile, cha
 					    char *packsizes, UINT4 **bitpacks, Oligospace_T n) {
   UINT8 currpage, nextpage;
   FILE *pages_fp, *ptrs_fp, *comp_fp;
-  UINT4 pages[25];	/* Allows us to handle up to 100 billion positions */
-  UINT4 *ptrs, nregisters;
+  UINT4 pages[25];	/* Allows us to handle up to 100 billion positions.  At q3, means 300 billion nt */
+  UINT4 *ptrs, *p, nregisters;
   UINT8 totalcount;
-  int ptri;
+  size_t nptrs;
   Oligospace_T positioni, bmer;
 
   /* Buffer is used to avoid frequent writes to the file */
@@ -5308,8 +5311,7 @@ Bitpack64_write_differential_huge_bitpacks (char *pagesfile, char *ptrsfile, cha
   /* 2 metavalues: nwritten (pointer) and cumulative sum for block.
      Packsize can be computed from difference between successive
      pointers, if only even packsizes are allowed */
-  ptrs = (UINT4 *) CALLOC(((n + BLOCKSIZE)/BLOCKSIZE + 1) * DIFFERENTIAL_METAINFO_SIZE,sizeof(UINT4));
-  ptri = 0;
+  p = ptrs = (UINT4 *) CALLOC(((n + BLOCKSIZE)/BLOCKSIZE + 1) * DIFFERENTIAL_METAINFO_SIZE,sizeof(UINT4));
 
   if ((comp_fp = FOPEN_WRITE_BINARY(compfile)) == NULL) {
     fprintf(stderr,"Can't write to file %s\n",compfile);
@@ -5327,7 +5329,7 @@ Bitpack64_write_differential_huge_bitpacks (char *pagesfile, char *ptrsfile, cha
   totalcount = 0;
   for (positioni = 0, bmer = 0; positioni + BLOCKSIZE <= n; positioni += BLOCKSIZE, bmer++) {
     /* Pointer */
-    ptrs[ptri++] = nregisters;	/* In 128-bit registers */
+    *p++ = nregisters;	/* In 128-bit registers */
 
     /* Value for start of block */
     while (totalcount >= nextpage) {
@@ -5337,7 +5339,7 @@ Bitpack64_write_differential_huge_bitpacks (char *pagesfile, char *ptrsfile, cha
       currpage = nextpage;
       nextpage += POSITIONS_PAGE;
     }
-    ptrs[ptri++] = totalcount - currpage;
+    *p++ = totalcount - currpage;
 
 	
     /* Pack block of 64 diffs */
@@ -5354,7 +5356,7 @@ Bitpack64_write_differential_huge_bitpacks (char *pagesfile, char *ptrsfile, cha
   /* For nucleotides, expect a single final block where positioni == n */
   if (positioni <= n) {
     /* Finish last block of 64 */
-    ptrs[ptri++] = nregisters;	/* In 128-bit registers */
+    *p++ = nregisters;	/* In 128-bit registers */
 
     /* Value for start of block */
     while (totalcount >= nextpage) {
@@ -5364,7 +5366,7 @@ Bitpack64_write_differential_huge_bitpacks (char *pagesfile, char *ptrsfile, cha
       currpage = nextpage;
       nextpage += POSITIONS_PAGE;
     }
-    ptrs[ptri++] = totalcount - currpage;
+    *p++ = totalcount - currpage;
 
     if (positioni == n) {
       /* Don't have a bitpack at [bmerspace] */
@@ -5392,7 +5394,7 @@ Bitpack64_write_differential_huge_bitpacks (char *pagesfile, char *ptrsfile, cha
 
 
   /* Write the final pointer, which will point after the end of the file */
-  ptrs[ptri++] = nregisters;	/* In 128-bit registers */
+  *p++ = nregisters;	/* In 128-bit registers */
 
   /* Value for end of block */
   if (totalcount >= nextpage) {
@@ -5402,7 +5404,7 @@ Bitpack64_write_differential_huge_bitpacks (char *pagesfile, char *ptrsfile, cha
     currpage = nextpage;
     /* nextpage += POSITIONS_PAGE; */
   }
-  ptrs[ptri++] = totalcount - currpage;
+  *p++ = totalcount - currpage;
 
 
   /* Write pages */
@@ -5427,7 +5429,8 @@ Bitpack64_write_differential_huge_bitpacks (char *pagesfile, char *ptrsfile, cha
     fprintf(stderr,"Can't write to file %s\n",ptrsfile);
     exit(9);
   } else {
-    FWRITE_UINTS(ptrs,ptri,ptrs_fp);
+    nptrs = p - ptrs;
+    FWRITE_UINTS(ptrs,nptrs,ptrs_fp);
     FREE(ptrs);
     fclose(ptrs_fp);
   }
@@ -5456,7 +5459,7 @@ Bitpack64_write_fixed10_huge (char *pagesfile, char *ptrsfile, char *compfile,
   FILE *pages_fp, *comp_fp;
   UINT4 pages[25];	/* Allows us to handle up to 100 billion positions */
   UINT4 *ptrs;
-  int ptri;
+  UINT4 ptri;
   Oligospace_T positioni;
 
   /* Buffer is used to avoid frequent writes to the file */
@@ -5677,8 +5680,9 @@ compute_packsize (UINT4 *values) {
 void
 Bitpack64_write_direct (char *ptrsfile, char *compfile, UINT4 *direct, Oligospace_T n) {
   FILE *ptrs_fp, *comp_fp;
-  UINT4 *ptrs;
-  int ptri, i;
+  UINT4 *ptrs, *p;
+  size_t nptrs;
+  int i;
   Oligospace_T positioni;
 
   UINT4 *buffer;
@@ -5696,8 +5700,7 @@ Bitpack64_write_direct (char *ptrsfile, char *compfile, UINT4 *direct, Oligospac
   /* 1 metavalue: nwritten (pointer).  Packsize can be
      computed from difference between successive pointers, if only
      even packsizes are allowed */
-  ptrs = (UINT4 *) CALLOC(((n + BLOCKSIZE - 1)/BLOCKSIZE + 1) * DIRECT_METAINFO_SIZE,sizeof(UINT4));
-  ptri = 0;
+  p = ptrs = (UINT4 *) CALLOC(((n + BLOCKSIZE - 1)/BLOCKSIZE + 1) * DIRECT_METAINFO_SIZE,sizeof(UINT4));
 
   if ((comp_fp = FOPEN_WRITE_BINARY(compfile)) == NULL) {
     fprintf(stderr,"Can't write to file %s\n",compfile);
@@ -5710,7 +5713,7 @@ Bitpack64_write_direct (char *ptrsfile, char *compfile, UINT4 *direct, Oligospac
 
   for (positioni = 0; positioni + BLOCKSIZE < n; positioni += BLOCKSIZE) {
     /* Pointer */
-    ptrs[ptri++] = nwritten/4;	/* In 128-bit registers */
+    *p++ = nwritten/4;	/* In 128-bit registers */
 
     /* Pack block of 64 diffs */
     packsize = compute_packsize(&(direct[positioni]));
@@ -5725,7 +5728,7 @@ Bitpack64_write_direct (char *ptrsfile, char *compfile, UINT4 *direct, Oligospac
 
   if (positioni < n) {
     /* Finish last block of 64 */
-    ptrs[ptri++] = nwritten/4;	/* In 128-bit registers */
+    *p++ = nwritten/4;	/* In 128-bit registers */
     
     i = 0;
     while (positioni < n) {
@@ -5747,13 +5750,14 @@ Bitpack64_write_direct (char *ptrsfile, char *compfile, UINT4 *direct, Oligospac
 
   /* Write the final pointer, which will point after the end of the
      file */
-  ptrs[ptri++] = nwritten/4;	/* In 128-bit registers */
+  *p++ = nwritten/4;	/* In 128-bit registers */
 
   if ((ptrs_fp = FOPEN_WRITE_BINARY(ptrsfile)) == NULL) {
     fprintf(stderr,"Can't write to file %s\n",ptrsfile);
     exit(9);
   } else {
-    FWRITE_UINTS(ptrs,ptri,ptrs_fp);
+    nptrs = p - ptrs;
+    FWRITE_UINTS(ptrs,nptrs,ptrs_fp);
     FREE(ptrs);
     fclose(ptrs_fp);
   }
diff --git a/src/cpuid.c b/src/cpuid.c
index 45d722e..eadd351 100644
--- a/src/cpuid.c
+++ b/src/cpuid.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: cpuid.c 191640 2016-06-09 22:32:52Z twu $";
+static char rcsid[] = "$Id: cpuid.c 200231 2016-11-08 00:55:17Z twu $";
 #ifdef HAVE_CONFIG_H
 #include <config.h>
 #endif
@@ -10,12 +10,14 @@ static char rcsid[] = "$Id: cpuid.c 191640 2016-06-09 22:32:52Z twu $";
 #if defined(AX_HOST_POWER8)
 
 void
-CPUID_support (bool *sse2_support_p, bool *ssse3_support_p, bool *sse4_1_support_p, bool *sse4_2_support_p, bool *avx2_support_p) {
+CPUID_support (bool *sse2_support_p, bool *ssse3_support_p, bool *sse4_1_support_p, bool *sse4_2_support_p,
+	       bool *avx2_support_p, bool *avx512_support_p) {
   *sse2_support_p = false;
   *ssse3_support_p = false;
   *sse4_1_support_p = false;
   *sse4_2_support_p = false;
   *avx2_support_p = false;
+  *avx512_support_p = false;
 
   return;
 }
@@ -26,12 +28,14 @@ CPUID_support (bool *sse2_support_p, bool *ssse3_support_p, bool *sse4_1_support
 #include <immintrin.h>
 
 void
-CPUID_support (bool *sse2_support_p, bool *ssse3_support_p, bool *sse4_1_support_p, bool *sse4_2_support_p, bool *avx2_support_p) {
+CPUID_support (bool *sse2_support_p, bool *ssse3_support_p, bool *sse4_1_support_p, bool *sse4_2_support_p,
+	       bool *avx2_support_p, bool *avx512_support_p) {
   *sse2_support_p = _may_i_use_cpu_feature(_FEATURE_SSE2);
   *ssse3_support_p = _may_i_use_cpu_feature(_FEATURE_SSSE3);
   *sse4_1_support_p = _may_i_use_cpu_feature(_FEATURE_SSE4_1);
   *sse4_2_support_p = _may_i_use_cpu_feature(_FEATURE_SSE4_2);
   *avx2_support_p = _may_i_use_cpu_feature(_FEATURE_AVX2 | _FEATURE_FMA | _FEATURE_BMI | _FEATURE_LZCNT | _FEATURE_MOVBE);
+  *avx512_support_p = _may_i_use_cpu_feature(_FEATURE_512F);
 
   return;
 }
@@ -78,7 +82,8 @@ check_xcr0_ymm () {
 }
 
 void
-CPUID_support (bool *sse2_support_p, bool *ssse3_support_p, bool *sse4_1_support_p, bool *sse4_2_support_p, bool *avx2_support_p) {
+CPUID_support (bool *sse2_support_p, bool *ssse3_support_p, bool *sse4_1_support_p, bool *sse4_2_support_p,
+	       bool *avx2_support_p, bool *avx512_support_p) {
   uint32_t abcd[4];
   uint32_t sse2_mask = (1 << 26); /* edx */
   uint32_t ssse3_mask = (1 << 9); /* ecx */
@@ -90,6 +95,7 @@ CPUID_support (bool *sse2_support_p, bool *ssse3_support_p, bool *sse4_1_support
   uint32_t fma_movbe_osxsave_mask = ((1 << 12) | (1 << 22) | (1 << 27)); /* ecx */
   uint32_t avx2_bmi12_mask = ((1 << 5) | (1 << 3) | (1 << 8)); /* ebx */
   uint32_t lzcnt_mask = (1 << 5); /* ecx */
+  uint32_t avx512_mask = (1 << 16);
 
 
   run_cpuid(1, 0, abcd);
@@ -105,8 +111,10 @@ CPUID_support (bool *sse2_support_p, bool *ssse3_support_p, bool *sse4_1_support
 
   if ((abcd[ECX] & fma_movbe_osxsave_mask) != fma_movbe_osxsave_mask) {
     *avx2_support_p = false;
+    *avx512_support_p = false;
   } else if (!check_xcr0_ymm()) {
     *avx2_support_p = false;
+    *avx512_support_p = false;
   } else {
     run_cpuid(7, 0, abcd);
 #ifdef MAIN
@@ -115,6 +123,7 @@ CPUID_support (bool *sse2_support_p, bool *ssse3_support_p, bool *sse4_1_support
 
     if ((abcd[EBX] & avx2_bmi12_mask) != avx2_bmi12_mask) {
       *avx2_support_p = false;
+      *avx2_support_p = false;
     } else {
       run_cpuid(0x80000001, 0, abcd);
 #ifdef MAIN
@@ -123,12 +132,16 @@ CPUID_support (bool *sse2_support_p, bool *ssse3_support_p, bool *sse4_1_support
 
       if ((abcd[ECX] & lzcnt_mask) != lzcnt_mask) {
 	*avx2_support_p = false;
+	*avx512_support_p = false;
       } else {
 	*avx2_support_p = true;
+	*avx512_support_p = ((abcd[ECX] & avx512_mask) == avx512_mask) ? true : false;
       }
     }
   }
 
+  
+
   return;
 }
 
@@ -143,14 +156,17 @@ main (int argc, char *argv[]) {
   bool sse4_1_support_p;
   bool sse4_2_support_p;
   bool avx2_support_p;
+  bool avx512_support_p;
 
-  CPUID_support(&sse2_support_p,&ssse3_support_p,&sse4_1_support_p,&sse4_2_support_p,&avx2_support_p);
+  CPUID_support(&sse2_support_p,&ssse3_support_p,&sse4_1_support_p,&sse4_2_support_p,
+		&avx2_support_p,&avx512_support_p);
 
   printf("sse2 support: %d\n",sse2_support_p);
   printf("ssse3 support: %d\n",ssse3_support_p);
   printf("sse4.1 support: %d\n",sse4_1_support_p);
   printf("sse4.2 support: %d\n",sse4_2_support_p);
   printf("avx2 support: %d\n",avx2_support_p);
+  printf("avx512 support: %d\n",avx512_support_p);
 
   return 0;
 }
diff --git a/src/cpuid.h b/src/cpuid.h
index 97048ab..287e91c 100644
--- a/src/cpuid.h
+++ b/src/cpuid.h
@@ -1,10 +1,11 @@
-/* $Id: cpuid.h 171614 2015-08-10 23:27:29Z twu $ */
+/* $Id: cpuid.h 200231 2016-11-08 00:55:17Z twu $ */
 #ifndef CPUID_INCLUDED
 #define CPUID_INCLUDED
 #include "bool.h"
 
 extern void
-CPUID_support (bool *sse2_support_p, bool *ssse3_support_p, bool *sse4_1_support_p, bool *sse4_2_support_p, bool *avx2_support_p);
+CPUID_support (bool *sse2_support_p, bool *ssse3_support_p, bool *sse4_1_support_p, bool *sse4_2_support_p,
+	       bool *avx2_support_p, bool *avx512_support_p);
 
 #endif
 
diff --git a/src/gmap.c b/src/gmap.c
index 5a0fe5f..26a6c10 100644
--- a/src/gmap.c
+++ b/src/gmap.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: gmap.c 197391 2016-09-03 00:43:23Z twu $";
+static char rcsid[] = "$Id: gmap.c 200232 2016-11-08 00:55:43Z twu $";
 #ifdef HAVE_CONFIG_H
 #include <config.h>
 #endif
@@ -395,6 +395,7 @@ static char *sam_read_group_name = NULL;
 static char *sam_read_group_library = NULL;
 static char *sam_read_group_platform = NULL;
 static bool sam_insert_0M_p = false;
+static bool sam_cigar_extended_p = false;
 
 static bool orderedp = false;
 static bool failsonlyp = false;
@@ -565,6 +566,7 @@ static struct option long_options[] = {
   {"quality-print-shift", required_argument, 0, 'j'}, /* quality_shift */
   {"no-sam-headers", no_argument, 0, 0},	/* sam_headers_p */
   {"sam-use-0M", no_argument, 0, 0},		/* sam_insert_0M_p */
+  {"sam-extended-cigar", no_argument, 0, 0},	/* sam_cigar_extended_p */
   {"read-group-id", required_argument, 0, 0},	/* sam_read_group_id */
   {"read-group-name", required_argument, 0, 0},	/* sam_read_group_name */
   {"read-group-library", required_argument, 0, 0}, /* sam_read_group_library */
@@ -5326,6 +5328,8 @@ parse_command_line (int argc, char *argv[], int optind) {
 	sam_headers_p = false;
       } else if (!strcmp(long_name,"sam-use-0M")) {
 	sam_insert_0M_p = true;
+      } else if (!strcmp(long_name,"sam-extended-cigar")) {
+	sam_cigar_extended_p = true;
       } else if (!strcmp(long_name,"quality-protocol")) {
 	if (user_quality_shift == true) {
 	  fprintf(stderr,"Cannot specify both -j (--quality-print-shift) and --quality-protocol\n");
@@ -6589,7 +6593,7 @@ main (int argc, char *argv[]) {
 	     force_xs_direction_p,md_lowercase_variant_p,
 	     /*snps_p*/genomecomp_alt ? true : false,
 	     /*print_nsnpdiffs_p*/genomecomp_alt ? true : false,genomelength,
-	     gff3_phase_swap_p);
+	     gff3_phase_swap_p,sam_cigar_extended_p);
   Stage3_setup(/*splicingp*/novelsplicingp == true || knownsplicingp == true,novelsplicingp,
 	       require_splicedir_p,splicing_iit,splicing_divint_crosstable,
 	       donor_typeint,acceptor_typeint,splicesites,altlocp,alias_starts,alias_ends,
@@ -7209,6 +7213,8 @@ Output options\n\
   --no-sam-headers               Do not print headers beginning with '@'\n\
   --sam-use-0M                   Insert 0M in CIGAR between adjacent insertions and deletions\n\
                                    Required by Picard, but can cause errors in other tools\n\
+  --sam-extended-cigar           Use extended CIGAR format (using X and = symbols instead of M,\n\
+                                   to indicate matches and mismatches, respectively\n\
   --force-xs-dir                 For RNA-Seq alignments, disallows XS:A:? when the sense direction\n\
                                    is unclear, and replaces this value arbitrarily with XS:A:+.\n\
                                    May be useful for some programs, such as Cufflinks, that cannot\n\
diff --git a/src/gmap_select.c b/src/gmap_select.c
index b7ecfcf..7bbffb9 100644
--- a/src/gmap_select.c
+++ b/src/gmap_select.c
@@ -27,11 +27,13 @@ main (int argc, char *argv[]) {
   bool sse4_1_support_p;
   bool sse4_2_support_p;
   bool avx2_support_p;
+  bool avx512_support_p;
   char *dir, **new_argv;
   int i;
   int rc;
 
-  CPUID_support(&sse2_support_p,&ssse3_support_p,&sse4_1_support_p,&sse4_2_support_p,&avx2_support_p);
+  CPUID_support(&sse2_support_p,&ssse3_support_p,&sse4_1_support_p,&sse4_2_support_p,
+		&avx2_support_p,&avx512_support_p);
 
   new_argv = (char **) malloc((argc + 1) * sizeof(char *));
   for (i = 1; i < argc; i++) {
diff --git a/src/gmapl_select.c b/src/gmapl_select.c
index 5020d38..816b356 100644
--- a/src/gmapl_select.c
+++ b/src/gmapl_select.c
@@ -27,11 +27,13 @@ main (int argc, char *argv[]) {
   bool sse4_1_support_p;
   bool sse4_2_support_p;
   bool avx2_support_p;
+  bool avx512_support_p;
   char *dir, **new_argv;
   int i;
   int rc;
 
-  CPUID_support(&sse2_support_p,&ssse3_support_p,&sse4_1_support_p,&sse4_2_support_p,&avx2_support_p);
+  CPUID_support(&sse2_support_p,&ssse3_support_p,&sse4_1_support_p,&sse4_2_support_p,
+		&avx2_support_p,&avx512_support_p);
 
   new_argv = (char **) malloc((argc + 1) * sizeof(char *));
   for (i = 1; i < argc; i++) {
diff --git a/src/gsnap.c b/src/gsnap.c
index 963f3f7..d61e979 100644
--- a/src/gsnap.c
+++ b/src/gsnap.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: gsnap.c 197391 2016-09-03 00:43:23Z twu $";
+static char rcsid[] = "$Id: gsnap.c 200234 2016-11-08 00:56:52Z twu $";
 #ifdef HAVE_CONFIG_H
 #include <config.h>
 #endif
@@ -3302,7 +3302,7 @@ worker_setup (char *genomesubdir, char *fileroot) {
 	     force_xs_direction_p,md_lowercase_variant_p,
 	     /*snps_p*/snps_iit ? true : false,print_nsnpdiffs_p,
 	     Univ_IIT_genomelength(chromosome_iit,/*with_circular_alias*/false),
-	     /*gff3_phase_swap_p*/false);
+	     /*gff3_phase_swap_p*/false,/*cigar_extended_p*/false);
   Stage3_setup(/*splicingp*/novelsplicingp == true || knownsplicingp == true,novelsplicingp,
 	       /*require_splicedir_p*/true,splicing_iit,splicing_divint_crosstable,
 	       donor_typeint,acceptor_typeint,splicesites,altlocp,alias_starts,alias_ends,
diff --git a/src/gsnap_select.c b/src/gsnap_select.c
index 001d338..788a29a 100644
--- a/src/gsnap_select.c
+++ b/src/gsnap_select.c
@@ -27,11 +27,13 @@ main (int argc, char *argv[]) {
   bool sse4_1_support_p;
   bool sse4_2_support_p;
   bool avx2_support_p;
+  bool avx512_support_p;
   char *dir, **new_argv;
   int i;
   int rc;
 
-  CPUID_support(&sse2_support_p,&ssse3_support_p,&sse4_1_support_p,&sse4_2_support_p,&avx2_support_p);
+  CPUID_support(&sse2_support_p,&ssse3_support_p,&sse4_1_support_p,&sse4_2_support_p,
+		&avx2_support_p,&avx512_support_p);
 
   new_argv = (char **) malloc((argc + 1) * sizeof(char *));
   for (i = 1; i < argc; i++) {
diff --git a/src/gsnapl_select.c b/src/gsnapl_select.c
index 1730264..d2d20dc 100644
--- a/src/gsnapl_select.c
+++ b/src/gsnapl_select.c
@@ -27,11 +27,13 @@ main (int argc, char *argv[]) {
   bool sse4_1_support_p;
   bool sse4_2_support_p;
   bool avx2_support_p;
+  bool avx512_support_p;
   char *dir, **new_argv;
   int i;
   int rc;
 
-  CPUID_support(&sse2_support_p,&ssse3_support_p,&sse4_1_support_p,&sse4_2_support_p,&avx2_support_p);
+  CPUID_support(&sse2_support_p,&ssse3_support_p,&sse4_1_support_p,&sse4_2_support_p,
+		&avx2_support_p,&avx512_support_p);
 
   new_argv = (char **) malloc((argc + 1) * sizeof(char *));
   for (i = 1; i < argc; i++) {
diff --git a/src/indexdb-write.c b/src/indexdb-write.c
index faf0eb3..a6077db 100644
--- a/src/indexdb-write.c
+++ b/src/indexdb-write.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: indexdb-write.c 184465 2016-02-18 00:09:34Z twu $";
+static char rcsid[] = "$Id: indexdb-write.c 199472 2016-10-23 04:00:39Z twu $";
 #ifdef HAVE_CONFIG_H
 #include <config.h>
 #endif
@@ -695,6 +695,7 @@ Indexdb_write_offsets (char *destdir, char interval_char, FILE *sequence_fp, Uni
 	  ) {
 	masked = oligo & mask;
 	bmer = masked/BLOCKSIZE;
+
 	if (Bitpack64_access_filledp(masked,(int) packsizes[bmer],bitpacks[bmer]) == true) {
 	  bitpacks[bmer] = Bitpack64_realloc_one((int) packsizes[bmer],bitpacks[bmer]);
 	  packsizes[bmer] += 2;
@@ -1006,6 +1007,7 @@ Indexdb_write_offsets_huge (char *destdir, char interval_char, FILE *sequence_fp
 	aaindex = Alphabet_get_aa_index(high,low,watsonp,index1part_nt);
 #endif
 	bmer = aaindex/BLOCKSIZE;
+
 	if (Bitpack64_access_filledp(aaindex,(int) packsizes[bmer],bitpacks[bmer]) == true) {
 	  bitpacks[bmer] = Bitpack64_realloc_one((int) packsizes[bmer],bitpacks[bmer]);
 	  packsizes[bmer] += 2;
@@ -1028,6 +1030,7 @@ Indexdb_write_offsets_huge (char *destdir, char interval_char, FILE *sequence_fp
 	  ) {
 	masked = oligo & mask;
 	bmer = masked/BLOCKSIZE;
+
 	if (Bitpack64_access_filledp(masked,(int) packsizes[bmer],bitpacks[bmer]) == true) {
 	  bitpacks[bmer] = Bitpack64_realloc_one((int) packsizes[bmer],bitpacks[bmer]);
 	  packsizes[bmer] += 2;
@@ -1167,6 +1170,7 @@ Indexdb_bitpack_counter (UINT4 **counterstrm, UINT4 *offsetsmeta, UINT4 *offsets
   for (oligoi = 0ULL; oligoi < oligospace; oligoi += blocksize) {
     countermeta[k++] = nvectors;
 
+#if 0
     Bitpack64_block_offsets(offsets,oligoi,offsetsmeta,offsetsstrm);
 
     maxdiff = 0;
@@ -1190,8 +1194,12 @@ Indexdb_bitpack_counter (UINT4 **counterstrm, UINT4 *offsetsmeta, UINT4 *offsets
       packsize = 32 - firstbit;
 #endif
     }
-
     packsize = (packsize + 1) & ~1; /* Converts packsizes to the next multiple of 2 */
+
+#else
+    packsize = Bitpack64_differential_packsize(oligoi,offsetsmeta);
+#endif    
+
     /* nvectors += (packsize * blocksize)/128; */
     nvectors += packsize / 2;	/* For blocksize of 64 => *64/128 */
   }
@@ -1248,6 +1256,7 @@ Indexdb_bitpack_counter_huge (UINT4 **counterstrm, UINT4 *offsetspages, UINT4 *o
   for (oligoi = 0ULL; oligoi < oligospace; oligoi += BLOCKSIZE) {
     countermeta[k++] = nvectors;
 
+#if 0
     Bitpack64_block_offsets_huge(offsets,oligoi,offsetspages,offsetsmeta,offsetsstrm);
 
     maxdiff = 0;
@@ -1273,6 +1282,10 @@ Indexdb_bitpack_counter_huge (UINT4 **counterstrm, UINT4 *offsetspages, UINT4 *o
     }
 
     packsize = (packsize + 1) & ~1; /* Converts packsizes to the next multiple of 2 */
+#else
+    packsize = Bitpack64_differential_packsize(oligoi,offsetsmeta);
+#endif
+
     /* nvectors += (packsize * blocksize)/128; */
     nvectors += packsize / 2;	/* For blocksize of 64 => *64/128 */
   }
diff --git a/src/indexdb.c b/src/indexdb.c
index 699b6e5..47c98a6 100644
--- a/src/indexdb.c
+++ b/src/indexdb.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: indexdb.c 197240 2016-09-01 17:07:26Z twu $";
+static char rcsid[] = "$Id: indexdb.c 200235 2016-11-08 00:57:16Z twu $";
 #ifdef HAVE_CONFIG_H
 #include <config.h>
 #endif
@@ -1518,7 +1518,6 @@ Indexdb_new_genome (Width_T *index1part, Width_T *index1interval,
 #endif
 
 #ifdef LARGE_GENOMES
-      /*ptr0 =*/ Bitpack64_read_two_huge(&end0,poly_T,new->offsetspages,new->offsetsmeta,new->offsetsstrm);
       if ((filesize = Access_filesize(filenames->positions_high_filename)) != end0 * sizeof(unsigned char)) {
 	fprintf(stderr,"Something is wrong with the genomic index: expected file size for %s is %zu, but observed %zu.\n",
 		filenames->positions_high_filename,(size_t) (end0*sizeof(unsigned char)),filesize);
diff --git a/src/pair.c b/src/pair.c
index ae3ddda..8a80e3f 100644
--- a/src/pair.c
+++ b/src/pair.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: pair.c 198279 2016-09-24 00:54:22Z twu $";
+static char rcsid[] = "$Id: pair.c 200236 2016-11-08 00:58:17Z twu $";
 #ifdef HAVE_CONFIG_H
 #include <config.h>
 #endif
@@ -150,13 +150,14 @@ static bool print_nsnpdiffs_p;
 static double genomelength;	/* For BLAST E-value */
 
 static bool gff3_phase_swap_p = true;
+static bool cigar_extended_p = true;
 
 
 void
 Pair_setup (int trim_mismatch_score_in, int trim_indel_score_in,
 	    bool gff3_separators_p_in, bool sam_insert_0M_p_in, bool force_xs_direction_p_in,
 	    bool md_lowercase_variant_p_in, bool snps_p_in, bool print_nsnpdiffs_p_in,
-	    Univcoord_T genomelength_in, bool gff3_phase_swap_p_in) {
+	    Univcoord_T genomelength_in, bool gff3_phase_swap_p_in, bool cigar_extended_p_in) {
   trim_mismatch_score = trim_mismatch_score_in;
   trim_indel_score = trim_indel_score_in;
   gff3_separators_p = gff3_separators_p_in;
@@ -167,6 +168,7 @@ Pair_setup (int trim_mismatch_score_in, int trim_indel_score_in,
   print_nsnpdiffs_p = print_nsnpdiffs_p_in;
   genomelength = (double) genomelength_in;
   gff3_phase_swap_p = gff3_phase_swap_p_in;
+  cigar_extended_p = cigar_extended_p_in;
 
   return;
 }
@@ -4087,7 +4089,6 @@ Pair_print_gsnap (Filestring_T fp, struct T *pairs_querydir, int npairs, int nse
 #endif	/* PMAP */
 
 
-#ifdef GSNAP
 /* Taken from NCBI Blast 2.2.29, algo/blast/core/blast_stat.c */
 /* Karlin-Altschul formula: m n exp(-lambda * S + log k) = k m n exp(-lambda * S) */
 /* Also in substring.c */
@@ -4103,6 +4104,8 @@ blast_evalue (int alignlength, int nmismatches) {
   return k * (double) alignlength * genomelength * exp(-lambda * score);
 }
 
+
+#ifdef GSNAP
 static double
 blast_bitscore (int alignlength, int nmismatches) {
   double k = 0.1;
@@ -4307,6 +4310,133 @@ Pair_print_m8 (Filestring_T fp, struct T *pairs_querydir, int npairs, bool inver
 #endif
 
 
+#if 0
+double
+Pair_min_evalue (struct T *pairarray, int npairs) {
+  double min_evalue = 1000.0, evalue;
+  bool in_exon = true;
+  struct T *ptr, *ptr0, *this = NULL;
+  int alignlength_trim, exon_querystart = -1, exon_queryend;
+  int nmismatches_bothdiff, i;
+  int last_querypos = -1;
+
+
+  ptr = pairarray;
+  exon_querystart = ptr->querypos + 1;
+  nmismatches_bothdiff = 0;
+
+  i = 0;
+  while (i < npairs) {
+    this = ptr++;
+    i++;
+
+    if (this->gapp) {
+      if (in_exon == true) {
+	/* SPLICE START */
+	ptr0 = ptr;
+	while (ptr0->gapp) {
+	  ptr0++;
+	}
+	exon_queryend = last_querypos + 1;
+
+	alignlength_trim = exon_queryend - exon_querystart;
+	assert(alignlength_trim >= 0);
+	if ((evalue = blast_evalue(alignlength_trim,nmismatches_bothdiff)) < min_evalue) {
+	  min_evalue = evalue;
+	}
+
+	nmismatches_bothdiff = 0;
+
+	in_exon = false;
+      }
+    } else if (this->comp == INTRONGAP_COMP) {
+      /* May want to print dinucleotides */
+
+    } else {
+      /* Remaining possibilities are MATCH_COMP, DYNPROG_MATCH_COMP, AMBIGUOUS_COMP, INDEL_COMP, 
+	 SHORTGAP_COMP, or MISMATCH_COMP */
+      if (in_exon == false) {
+	/* SPLICE CONTINUATION */
+	exon_querystart = this->querypos + 1;
+
+	in_exon = true;
+      }
+      if (this->comp == INDEL_COMP || this->comp == SHORTGAP_COMP) {
+	if (this->genome == ' ') {
+	  /* INSERTION */
+	  exon_queryend = last_querypos + 1;
+
+	  /* indel_pos = this->querypos; */
+	  while (i < npairs && this->gapp == false && this->genome == ' ') {
+	    this = ptr++;
+	    i++;
+	  }
+	  ptr--;
+	  i--;
+
+	  this = ptr;
+	  exon_querystart = this->querypos + 1;
+	  nmismatches_bothdiff = 0;
+
+	} else if (this->cdna == ' ') {
+	  /* DELETION */
+	  exon_queryend = last_querypos + 1;
+
+	  /* indel_pos = this->querypos; */
+	  while (i < npairs && this->gapp == false && this->cdna == ' ') {
+	    this = ptr++;
+	    i++;
+	  }
+	  ptr--;
+	  i--;
+
+	  /* Finish rest of this line */
+	  alignlength_trim = exon_queryend - exon_querystart;
+	  assert(alignlength_trim >= 0);
+	  if ((evalue = blast_evalue(alignlength_trim,nmismatches_bothdiff)) < min_evalue) {
+	    min_evalue = evalue;
+	  }
+
+	  this = ptr;
+	  exon_querystart = this->querypos + 1;
+	  nmismatches_bothdiff = 0;
+
+	} else {
+	  fprintf(stderr,"Error at %c%c%c\n",this->genome,this->comp,this->cdna);
+	  exit(9);
+	}
+
+      } else {
+	/* c = this->genome; */
+	if (this->genome == this->cdna) {
+	  /* nmatches++; */
+	} else if (this->genomealt == this->cdna) {
+	  /* nmismatches_refdiff++; */
+	} else {
+	  nmismatches_bothdiff++;
+	  /* nmismatches_refdiff++; */
+	}
+      }
+    }
+
+    if (this->cdna != ' ') {
+      last_querypos = this->querypos;
+    }
+  }
+
+  exon_queryend = last_querypos + 1;
+
+  alignlength_trim = exon_queryend - exon_querystart;
+  assert(alignlength_trim >= 0);
+  if ((evalue = blast_evalue(alignlength_trim,nmismatches_bothdiff)) < min_evalue) {
+    min_evalue = evalue;
+  }
+
+  return min_evalue;
+}
+#endif
+
+
 /* Modified from print_endtypes */
 static void
 splice_site_probs (double *sense_prob, double *antisense_prob,
@@ -4698,7 +4828,7 @@ Pair_tokens_cigarlength (List_T tokens) {
     token = (char *) List_head(p);
     type = token[strlen(token)-1];
     /* Should include 'H', but that gets added according to hardclip_low and hardclip_high */
-    if (type == 'S' || type == 'I' || type == 'M') {
+    if (type == 'S' || type == 'I' || type == 'M' || type == 'X' || type == '=') {
       sscanf(token,"%d",&tokenlength);
       length += tokenlength;
     }
@@ -4923,7 +5053,7 @@ print_sam_line (Filestring_T fp, char *abbrev, bool first_read_p, char *acc1, ch
 #else
 		int mapq_score,
 #endif
-		char *sam_read_group_id, bool merged_overlap_p, bool sarrayp) {
+		double min_evalue, char *sam_read_group_id, bool merged_overlap_p, bool sarrayp) {
 
 #if 0
   /* Should already be checked when Stage3_T or Stage3end_T object was created */
@@ -5149,6 +5279,11 @@ print_sam_line (Filestring_T fp, char *abbrev, bool first_read_p, char *acc1, ch
     FPRINTF(fp,"\tXG:Z:M");
   }
 
+#if 0
+  /* 12. TAGS: XE (BLAST E-value) */
+  FPRINTF(fp,"\tXE:f:%.2g",min_evalue);
+#endif
+
   FPRINTF(fp,"\n");
 
   return;
@@ -5345,9 +5480,9 @@ Pair_clean_cigar (List_T tokens, bool watsonp) {
 }
 
 
-List_T
-Pair_compute_cigar (bool *intronp, int *hardclip_start, int *hardclip_end, struct T *pairs, int npairs, int querylength_given,
-		    bool watsonp, int sensedir, int chimera_part) {
+static List_T
+compute_cigar_standard (bool *intronp, int *hardclip_start, int *hardclip_end, struct T *pairs, int npairs, int querylength_given,
+			bool watsonp, int sensedir, int chimera_part) {
   List_T tokens = NULL;
   char token[11];
   int Mlength = 0, Ilength = 0, Dlength = 0;
@@ -5621,6 +5756,340 @@ Pair_compute_cigar (bool *intronp, int *hardclip_start, int *hardclip_end, struc
 }
 
 
+static List_T
+compute_cigar_extended (bool *intronp, int *hardclip_start, int *hardclip_end, struct T *pairs, int npairs, int querylength_given,
+			bool watsonp, int sensedir, int chimera_part) {
+  List_T tokens = NULL;
+  char token[11];
+  int Elength = 0, Xlength = 0, Ilength = 0, Dlength = 0;
+  bool in_exon = false, deletionp;
+  struct T *ptr, *prev, *this = NULL;
+  int exon_queryend = -1;
+  Chrpos_T exon_genomestart = -1;
+  Chrpos_T exon_genomeend, genome_gap;
+  int query_gap;
+  int last_querypos = -1;
+  Chrpos_T last_genomepos = -1U;
+  int i;
+
+  /* *chimera_hardclip_start = *chimera_hardclip_high = 0; */
+  *intronp = false;
+
+  ptr = pairs;
+
+  if (chimera_part == +1) {
+    if (ptr->querypos > *hardclip_start) {
+      if (ptr->querypos > 0) {
+	/* Clip to beginning */
+	*hardclip_start = ptr->querypos;
+	sprintf(token,"%dH",*hardclip_start);
+	tokens = push_token(tokens,token);
+      }
+    } else {
+      if (*hardclip_start > 0) {
+	/* Clip to hard clip boundary */
+	sprintf(token,"%dH",*hardclip_start);
+	tokens = push_token(tokens,token);
+      }
+    }
+  } else {
+    if (*hardclip_start > 0) {
+      sprintf(token,"%dH",*hardclip_start);
+      tokens = push_token(tokens,token);
+    }
+    if (ptr->querypos > (*hardclip_start)) {
+      sprintf(token,"%dS",ptr->querypos - (*hardclip_start));
+      tokens = push_token(tokens,token);
+    }
+  }
+
+  this = (T) NULL;
+  for (i = 0; i < npairs; i++) {
+    prev = this;
+    this = ptr++;
+
+#if 0
+    /* print_tokens_sam(stdout,tokens); */
+    Pair_dump_one(this,true);
+    printf("\n");
+#endif
+
+    if (this->gapp) {
+      if (in_exon == true) {
+	exon_queryend = last_querypos + 1;
+	exon_genomeend = last_genomepos + 1;
+#if 0
+	if (watsonp) {
+	  intron_start = exon_genomeend + 1;
+	} else {
+	  intron_start = exon_genomeend - 1;
+	}
+#endif
+	
+	if (Elength > 0) {
+	  sprintf(token,"%d=",Elength);
+	  tokens = push_token(tokens,token);
+	} else if (Xlength > 0) {
+	  sprintf(token,"%dX",Xlength);
+	  tokens = push_token(tokens,token);
+	} else if (Ilength > 0) {
+	  sprintf(token,"%dI",Ilength);
+	  tokens = push_token(tokens,token);
+	} else if (Dlength > 0) {
+	  sprintf(token,"%dD",Dlength);
+	  tokens = push_token(tokens,token);
+	}
+
+	Elength = Xlength = Ilength = Dlength = 0;
+
+	in_exon = false;
+      }
+
+    } else if (this->comp == INTRONGAP_COMP) {
+      /* Do nothing */
+
+    } else {
+      /* Remaining possibilities are MATCH_COMP, DYNPROG_MATCH_COMP, AMBIGUOUS_COMP, INDEL_COMP, 
+	 SHORTGAP_COMP, or MISMATCH_COMP */
+      if (in_exon == false) {
+	/* exon_querystart = this->querypos + 1; */
+	exon_genomestart = this->genomepos + 1;
+
+	if (prev != NULL) {
+	  /* Gap */
+	  /* abs() gives a large value when flag -m64 is specified */
+	  /* genome_gap = abs(intron_end - intron_start) + 1; */
+	  if (watsonp) {
+	    /* intron_end = exon_genomestart - 1; */
+	    /* genome_gap = (intron_end - intron_start) + 1; */
+	    genome_gap = exon_genomestart - exon_genomeend - 1;
+	  } else {
+	    /* intron_end = exon_genomestart + 1; */
+	    /* genome_gap = (intron_start - intron_end) + 1; */
+	    genome_gap = exon_genomeend - exon_genomestart - 1;
+	  }
+
+	  deletionp = false;
+#ifdef CONVERT_INTRONS_TO_DELETIONS
+	  if (sensedir == SENSE_FORWARD) {
+	    if (prev->comp == FWD_CANONICAL_INTRON_COMP ||
+		prev->comp == FWD_GCAG_INTRON_COMP ||
+		prev->comp == FWD_ATAC_INTRON_COMP) {
+	      sprintf(token,"%uN",genome_gap);
+	      *intronp = true;
+	    } else if (cigar_noncanonical_splices_p == true && genome_gap >= MIN_INTRONLEN) {
+	      sprintf(token,"%uN",genome_gap);
+	      *intronp = true;
+	    } else {
+	      sprintf(token,"%uD",genome_gap);
+	      deletionp = true;
+	    }
+	  } else if (sensedir == SENSE_ANTI) {
+	    if (prev->comp == REV_CANONICAL_INTRON_COMP ||
+		prev->comp == REV_GCAG_INTRON_COMP ||
+		prev->comp == REV_ATAC_INTRON_COMP) {
+	      sprintf(token,"%uN",genome_gap);
+	      *intronp = true;
+	    } else if (cigar_noncanonical_splices_p == true && genome_gap >= MIN_INTRONLEN) {
+	      sprintf(token,"%uN",genome_gap);
+	      *intronp = true;
+	    } else {
+	      sprintf(token,"%uD",genome_gap);
+	      deletionp = true;
+	    }
+	  } else if (cigar_noncanonical_splices_p == true && genome_gap >= MIN_INTRONLEN){
+	    sprintf(token,"%uN",genome_gap);
+	    *intronp = true;
+	  } else {
+	    sprintf(token,"%uD",genome_gap);
+	    deletionp = true;
+	  }
+#else
+	  sprintf(token,"%uN",genome_gap);
+	  *intronp = true;
+#endif
+	  tokens = push_token(tokens,token);
+
+	  /* Check for dual gap.  Doesn't work for hard clipping. */
+	  /* assert(exon_queryend >= 0); */
+
+	  query_gap = this->querypos - exon_queryend;
+	  assert(query_gap >= 0);
+	  if (query_gap > 0) {
+	    if (deletionp == true && sam_insert_0M_p == true) {
+	      /* Put zero matches between deletion and insertion, since some programs will complain */
+	      sprintf(token,"0M");
+	      tokens = push_token(tokens,token);
+	    }
+
+	    sprintf(token,"%uI",query_gap);
+	    tokens = push_token(tokens,token);
+	  }
+	}
+
+	in_exon = true;
+      }
+
+      if (this->comp == INDEL_COMP || this->comp == SHORTGAP_COMP) {
+	/* Gap in upper or lower sequence */
+	if (this->genome == ' ') {
+	  /* Insertion relative to genome */
+	  if (Elength > 0) {
+	    sprintf(token,"%d=",Elength);
+	    tokens = push_token(tokens,token);
+	    Elength = 0;
+	  } else if (Xlength > 0) {
+	    sprintf(token,"%dX",Xlength);
+	    tokens = push_token(tokens,token);
+	    Xlength = 0;
+	  } else if (Dlength > 0) {
+	    /* unlikely */
+	    sprintf(token,"%dD",Dlength);
+	    tokens = push_token(tokens,token);
+	    Dlength = 0;
+	  }
+	  Ilength++;
+	} else if (this->cdna == ' ') {
+	  /* Deletion relative to genome */
+	  if (Elength > 0) {
+	    sprintf(token,"%d=",Elength);
+	    tokens = push_token(tokens,token);
+	    Elength = 0;
+	  } else if (Xlength > 0) {
+	    sprintf(token,"%dX",Xlength);
+	    tokens = push_token(tokens,token);
+	    Xlength = 0;
+	  } else if (Ilength > 0) {
+	    sprintf(token,"%dI",Ilength);
+	    tokens = push_token(tokens,token);
+	    Ilength = 0;
+	  }
+	  Dlength++;
+	} else {
+	  fprintf(stderr,"Error at %c%c%c\n",this->genome,this->comp,this->cdna);
+	  exit(9);
+	}
+
+      } else {
+	/* Count even if unknown base */
+
+	if (Ilength > 0) {
+	  sprintf(token,"%dI",Ilength);
+	  tokens = push_token(tokens,token);
+	  Ilength = 0;
+	} else if (Dlength > 0) {
+	  sprintf(token,"%dD",Dlength);
+	  tokens = push_token(tokens,token);
+	  Dlength = 0;
+	}
+
+	if (prev == NULL || prev->gapp || prev->comp == INDEL_COMP || prev->comp == SHORTGAP_COMP) {
+	  if (this->cdna == this->genome) {
+	    Elength++;
+	  } else {
+	    Xlength++;
+	  }
+
+	} else if (prev->cdna == prev->genome) {
+	  if (this->cdna == this->genome) {
+	    Elength++;
+	  } else {
+	    if (Elength > 0) {
+	      sprintf(token,"%d=",Elength);
+	      tokens = push_token(tokens,token);
+	      Elength = 0;
+	    }
+	    Xlength++;
+	  }
+
+	} else {
+	  if (this->cdna != this->genome) {
+	    Xlength++;
+	  } else {
+	    if (Xlength > 0) {
+	      sprintf(token,"%dX",Xlength);
+	      tokens = push_token(tokens,token);
+	      Xlength = 0;
+	    }
+	    Elength++;
+	  }
+	}
+      }
+    }
+
+    if (this != NULL) {
+      if (this->cdna != ' ') {
+	last_querypos = this->querypos;
+      }
+      if (this->genome != ' ') {
+	last_genomepos = this->genomepos;
+      }
+    }
+  }
+
+  /* prev = this; */
+  /* exon_queryend = last_querypos + 1; */
+  /* exon_genomeend = last_genomepos + 1; */
+
+  if (Elength > 0) {
+    sprintf(token,"%d=",Elength);
+    tokens = push_token(tokens,token);
+  } else if (Xlength > 0) {
+    sprintf(token,"%dX",Xlength);
+    tokens = push_token(tokens,token);
+  } else if (Ilength > 0) {
+    sprintf(token,"%dI",Ilength);
+    tokens = push_token(tokens,token);
+  } else if (Dlength > 0) {
+    sprintf(token,"%dD",Dlength);
+    tokens = push_token(tokens,token);
+  }
+
+
+  /* Terminal clipping */
+  if (chimera_part == -1) {
+    if (last_querypos < querylength_given - 1 - (*hardclip_end)) {
+      if (last_querypos < querylength_given - 1) {
+	/* Clip to end */
+	*hardclip_end = querylength_given - 1 - last_querypos;
+	sprintf(token,"%dH",*hardclip_end);
+	tokens = push_token(tokens,token);
+      }
+    } else {
+      if (*hardclip_end > 0) {
+	/* Clip to hard clip boundary */
+	sprintf(token,"%dH",*hardclip_end);
+	tokens = push_token(tokens,token);
+      }
+    }
+  } else {
+    if (last_querypos < querylength_given - 1 - (*hardclip_end)) {
+      sprintf(token,"%dS",querylength_given - 1 - (*hardclip_end) - last_querypos);
+      tokens = push_token(tokens,token);
+    }
+    if (*hardclip_end > 0) {
+      sprintf(token,"%dH",*hardclip_end);
+      tokens = push_token(tokens,token);
+    }
+  }
+
+  return Pair_clean_cigar(tokens,watsonp);
+}
+
+
+List_T
+Pair_compute_cigar (bool *intronp, int *hardclip_start, int *hardclip_end, struct T *pairs, int npairs, int querylength_given,
+		    bool watsonp, int sensedir, int chimera_part) {
+  if (cigar_extended_p == true) {
+    return compute_cigar_extended(&(*intronp),&(*hardclip_start),&(*hardclip_end),pairs,npairs,querylength_given,
+				  watsonp,sensedir,chimera_part);
+  } else {
+    return compute_cigar_standard(&(*intronp),&(*hardclip_start),&(*hardclip_end),pairs,npairs,querylength_given,
+				  watsonp,sensedir,chimera_part);
+  }
+}
+
+
 #if 0
 /* Copied from samprint.c */
 static bool
@@ -6176,7 +6645,7 @@ compute_md_string (int *nmismatches_refdiff, int *nmismatches_bothdiff, int *nin
       } else if (type == 'S') {
 	/* k += length; */
 
-      } else if (type == 'M') {
+      } else if (type == 'M' || type == 'X' || type == '=') {
 	for (i = 0; i < length; i++, k++) {
 	  this = &(pairs[k]);
 	  debug4(printf("M %d/%d comp %c\n",i,length,this->comp));
@@ -6271,7 +6740,7 @@ compute_md_string (int *nmismatches_refdiff, int *nmismatches_bothdiff, int *nin
       } else if (type == 'S') {
 	/* k += length; */
 
-      } else if (type == 'M') {
+      } else if (type == 'M' || type == 'X' || type == '=') {
 	if (state == IN_DELETION) {
 	  md_tokens = push_token(md_tokens,"^");
 	}
@@ -6415,6 +6884,7 @@ Pair_print_sam (Filestring_T fp, char *abbrev, struct T *pairarray, int npairs,
   struct T *clipped_pairarray;
   int clipped_npairs;
   bool cigar_tokens_alloc;
+  double min_evalue, max_bitscore;
 
 
   if (chrnum == 0) {
@@ -6484,6 +6954,10 @@ Pair_print_sam (Filestring_T fp, char *abbrev, struct T *pairarray, int npairs,
   md_tokens = compute_md_string(&nmismatches_refdiff,&nmismatches_bothdiff,&nindels,
 				clipped_pairarray,clipped_npairs,watsonp,cigar_tokens);
 
+#if 0
+  min_evalue = Pair_min_evalue(clipped_pairarray,clipped_npairs);
+#endif
+
   print_sam_line(fp,abbrev,first_read_p,acc1,acc2,chrstring,
 		 watsonp,sensedir,cigar_tokens,md_tokens,
 		 nmismatches_refdiff,nmismatches_bothdiff,nindels,
@@ -6496,7 +6970,7 @@ Pair_print_sam (Filestring_T fp, char *abbrev, struct T *pairarray, int npairs,
 #else
 		 mapq_score,
 #endif
-		 sam_read_group_id,merged_overlap_p,sarrayp);
+		 min_evalue,sam_read_group_id,merged_overlap_p,sarrayp);
 
   /* Print procedures free the character strings */
   Pair_tokens_free(&md_tokens);
diff --git a/src/pair.h b/src/pair.h
index 3123737..f461b2f 100644
--- a/src/pair.h
+++ b/src/pair.h
@@ -1,4 +1,4 @@
-/* $Id: pair.h 196403 2016-08-16 14:33:56Z twu $ */
+/* $Id: pair.h 200236 2016-11-08 00:58:17Z twu $ */
 #ifndef PAIR_INCLUDED
 #define PAIR_INCLUDED
 
@@ -32,7 +32,7 @@ extern void
 Pair_setup (int trim_mismatch_score_in, int trim_indel_score_in,
 	    bool gff3_separators_p_in, bool sam_insert_0M_p_in, bool force_xs_direction_p_in,
 	    bool md_lowercase_variant_p_in, bool snps_p_in, bool print_nsnpdiffs_p_in,
-	    Univcoord_T genomelength_in, bool gff3_phase_swap_p_in);
+	    Univcoord_T genomelength_in, bool gff3_phase_swap_p_in, bool cigar_extended_p_in);
 extern int
 Pair_querypos (T this);
 extern Chrpos_T
diff --git a/src/sam_sort.c b/src/sam_sort.c
index adfeb5f..bcf37e9 100644
--- a/src/sam_sort.c
+++ b/src/sam_sort.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: sam_sort.c 191131 2016-06-03 17:23:29Z twu $";
+static char rcsid[] = "$Id: sam_sort.c 200239 2016-11-08 01:00:30Z twu $";
 #ifdef HAVE_CONFIG_H
 #include <config.h>
 #endif
@@ -27,6 +27,9 @@ static char rcsid[] = "$Id: sam_sort.c 191131 2016-06-03 17:23:29Z twu $";
 #include "getopt.h"
 
 
+#define MONITOR_INTERVAL 100000
+
+
 /************************************************************************
  *
  *  Check for correctness:
@@ -613,7 +616,7 @@ process_without_dups (FILE **sam_inputs, int *headerlengths, int *ncells, int ni
   int n_mappers = 0, n_nomappers = 0;
   Intlist_T l;
   struct T *cells_allocated, *ptr;
-  int i, j, k;
+  int b, i, j, k;
 
   size_t fileposition;
   int linelen;
@@ -664,7 +667,16 @@ process_without_dups (FILE **sam_inputs, int *headerlengths, int *ncells, int ni
 
     Stopwatch_start(stopwatch);
     fprintf(stderr,"Printing entries...");
-    for (k = 0; k < ncells_total; k++) {
+    for (b = 0; b + MONITOR_INTERVAL < ncells_total; b += MONITOR_INTERVAL) {
+      fprintf(stderr,"Printed %d entries\n",b);
+      for (i = 0; i < MONITOR_INTERVAL; i++) {
+	k = b + i;
+	debug(printf("%u\t%u\t%d\n",cells[k]->genomicpos,cells[k]->linestart,cells[k]->linelen));
+	fp_sam = sam_inputs[cells[k]->filei];
+	Cell_print_fromfile(fp_sam,cells[k],headers);
+      }
+    }
+    for (k = b; k < ncells_total; k++) {
       debug(printf("%u\t%u\t%d\n",cells[k]->genomicpos,cells[k]->linestart,cells[k]->linelen));
       fp_sam = sam_inputs[cells[k]->filei];
       Cell_print_fromfile(fp_sam,cells[k],headers);
@@ -679,7 +691,15 @@ process_without_dups (FILE **sam_inputs, int *headerlengths, int *ncells, int ni
 
     Stopwatch_start(stopwatch);
     fprintf(stderr,"Printing entries...");
-    for (k = 0; k < ncells_total; k++) {
+    for (b = 0; b + MONITOR_INTERVAL < ncells_total; b += MONITOR_INTERVAL) {
+      fprintf(stderr,"Printed %d entries\n",b);
+      for (i = 0; i < MONITOR_INTERVAL; i++) {
+	k = b + i;
+	fp_sam = sam_inputs[cells[k]->filei];
+	Cell_print_fromfile(fp_sam,cells[k],headers);
+      }
+    }
+    for (k = b; k < ncells_total; k++) {
       fp_sam = sam_inputs[cells[k]->filei];
       Cell_print_fromfile(fp_sam,cells[k],headers);
     }
diff --git a/src/samprint.c b/src/samprint.c
index 1fe4f71..a377944 100644
--- a/src/samprint.c
+++ b/src/samprint.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: samprint.c 197889 2016-09-15 23:19:09Z twu $";
+static char rcsid[] = "$Id: samprint.c 200237 2016-11-08 00:58:55Z twu $";
 #ifdef HAVE_CONFIG_H
 #include <config.h>
 #endif
@@ -67,6 +67,7 @@ static int maxpaths_report;
 static char *failedinput_root;
 static bool fastq_format_p;
 static bool hide_soft_clips_p;
+static bool cigar_extended_p = false;
 
 static bool clip_overlap_p;
 static bool merge_overlap_p;
@@ -1143,6 +1144,257 @@ print_md_string (bool *printp, int *nmismatches_refdiff, int *nmismatches_bothdi
 }
 
 
+/* Based on print_md_string */
+static void
+print_extended_cigar (Filestring_T fp, char *genomicfwd_refdiff,
+		      int stringlength, int querypos, int querylength,
+		      int hardclip_low, int hardclip_high, bool plusp, bool lastp) {
+  int nmatches = 0, nmismatches = 0;
+  int starti, endi, i;
+  bool hardclip_end_p = false;
+  int cliplength, endpos;
+
+  if (plusp == true) {
+    debug2(printf("\nEntering print_extended_cigar with querypos %d, querylength %d, hardclip_low %d, hardclip_high %d, plus: %s ref, %s both\n",
+		  querypos,querylength,hardclip_low,hardclip_high,genomicfwd_refdiff,genomicfwd_bothdiff));
+    if (hardclip_low == 0) {
+      starti = 0;
+      hardclip_end_p = true;
+    } else if (hardclip_low > querypos) {
+      /* startpos = hardclip_low; */
+      starti = hardclip_low - querypos;
+      hardclip_end_p = true;
+      debug2(printf("  Setting starti %d = hardclip_low %d - querypos %d\n",
+		    starti,hardclip_low,querypos));
+    } else {
+      /* startpos = querypos; */
+      starti = 0;
+    }
+
+    if (querylength - hardclip_high < querypos + stringlength) {
+      endpos = querylength - hardclip_high;
+      endi = (querylength - hardclip_high) - querypos;
+      debug2(printf("  Setting endi %d = (querylength %d - hardclip_high %d) - querypos %d\n",
+		    endi,querylength,hardclip_high,querypos));
+    } else {
+      endpos = querypos + stringlength;
+      endi = stringlength;
+    }
+
+    debug2(printf("  Counting matches from %d to %d\n",starti,endi));
+
+    if (genomicfwd_refdiff == NULL) {
+      if (endi > starti) {
+	nmatches += (endi - starti);
+      }
+
+    } else if (md_lowercase_variant_p == false) {
+      for (i = starti; i < endi; i++) {
+	if (isupper(genomicfwd_refdiff[i])) {
+	  if (nmismatches > 0 || hardclip_end_p == true) {
+	    FPRINTF(fp,"%dX",nmismatches);
+	    nmismatches = 0;
+	    hardclip_end_p = false;
+	  }
+	  nmatches++;
+
+	} else {
+	  /* A true mismatch against both variants */
+	  if (nmatches > 0 || hardclip_end_p == true) {
+	    FPRINTF(fp,"%d=",nmatches);
+	    nmatches = 0;
+	    hardclip_end_p = false;
+	  }
+	  nmismatches++;
+	}
+      }
+
+    } else {
+      for (i = starti; i < endi; i++) {
+	if (isupper(genomicfwd_refdiff[i])) {
+	  if (nmismatches > 0 || hardclip_end_p == true) {
+	    FPRINTF(fp,"%dX",nmismatches);
+	    nmismatches = 0;
+	    hardclip_end_p = false;
+	  }
+	  nmatches++;
+
+#if 0
+	} else if (isupper(genomicfwd_bothdiff[i])) {
+	  /* A mismatch against the reference only => alternate variant */
+	  if (nmatches > 0 || hardclip_end_p == true) {
+	    FPRINTF(fp,"%d=",nmatches);
+	    nmatches = 0;
+	    hardclip_end_p = false;
+	  }
+	  nmismatches++;
+#endif
+
+	} else {
+	  /* A true mismatch against both variants */
+	  if (nmatches > 0 || hardclip_end_p == true) {
+	    FPRINTF(fp,"%d=",nmatches);
+	    nmatches = 0;
+	    hardclip_end_p = false;
+	  }
+	  nmismatches++;
+	}
+      }
+    }
+
+    if (nmatches > 0) {
+      FPRINTF(fp,"%d=",nmatches);
+    } else if (nmismatches > 0) {
+      FPRINTF(fp,"%dX",nmismatches);
+    }
+
+    if (lastp == true) {
+      /* cliplength = querypos + stringlength - endpos; */
+      cliplength = querylength - endpos;
+      if (cliplength > 0) {
+	debug1(printf("  Pushing final %dH\n",cliplength));
+	FPRINTF(fp,"%dH",cliplength);
+      }
+    }
+
+  } else {
+    debug2(printf("\nEntering print_extended_cigar with querypos %d, querylength %d, hardclip_low %d, hardclip_high %d, minus: %s ref, %s both\n",
+		  querypos,querylength,hardclip_low,hardclip_high,genomicfwd_refdiff,genomicfwd_bothdiff));
+    querypos = querylength - querypos - stringlength;
+    debug2(printf("  Revising querypos to be %d\n",querypos));
+
+    if (hardclip_low == 0) {
+      starti = 0;
+      hardclip_end_p = true;
+    } else if (hardclip_low > querypos) {
+      /* startpos = hardclip_low; */
+      starti = hardclip_low - querypos;
+      hardclip_end_p = true;
+      debug2(printf("  Setting starti %d = hardclip_low %d - querypos %d\n",
+		    starti,hardclip_low,querypos));
+    } else {
+      /* startpos = querypos; */
+      starti = 0;
+    }
+
+    if (querylength - hardclip_high < querypos + stringlength) {
+      endpos = querylength - hardclip_high;
+      endi = (querylength - hardclip_high) - querypos;
+      debug2(printf("  Setting endi %d = (querylength %d - hardclip_high %d) - querypos %d\n",
+		    endi,querylength,hardclip_high,querypos));
+    } else {
+      endpos = querypos + stringlength;
+      endi = stringlength;
+    }
+
+    debug2(printf("  Counting matches from %d to %d\n",starti,endi));
+
+    if (genomicfwd_refdiff == NULL) {
+      if (endi > starti) {
+	nmatches += (endi - starti);
+      }
+
+    } else if (md_lowercase_variant_p == false) {
+      for (i = starti; i < endi; i++) {
+	if (isupper(genomicfwd_refdiff[i])) {
+	  if (nmismatches > 0 || hardclip_end_p == true) {
+	    FPRINTF(fp,"%dX",nmismatches);
+	    nmismatches = 0;
+	    hardclip_end_p = false;
+	  }
+	  nmatches++;
+
+	} else {
+	  /* A true mismatch against both variants */
+	  if (nmatches > 0 || hardclip_end_p == true) {
+	    FPRINTF(fp,"%d=",nmatches);
+	    nmatches = 0;
+	    hardclip_end_p = false;
+	  }
+	  nmismatches++;
+	}
+      }
+
+    } else {
+      for (i = starti; i < endi; i++) {
+	if (isupper(genomicfwd_refdiff[i])) {
+	  if (nmismatches > 0 || hardclip_end_p == true) {
+	    FPRINTF(fp,"%dX",nmismatches);
+	    nmismatches = 0;
+	    hardclip_end_p = false;
+	  }
+	  nmatches++;
+
+#if 0
+	} else if (isupper(genomicfwd_bothdiff[i])) {
+	  /* A mismatch against the reference only => alternate variant */
+	  if (nmatches > 0 || hardclip_end_p == true) {
+	    FPRINTF(fp,"%d=",nmatches);
+	    nmatches = 0;
+	    hardclip_end_p = false;
+	  }
+	  nmismatches++;
+#endif
+
+	} else {
+	  /* A true mismatch against both variants */
+	  if (nmatches > 0 || hardclip_end_p == true) {
+	    FPRINTF(fp,"%d=",nmatches);
+	    nmatches = 0;
+	    hardclip_end_p = false;
+	  }
+	  nmismatches++;
+	}
+      }
+    }
+
+    if (nmatches > 0) {
+      FPRINTF(fp,"%d=",nmatches);
+    } else if (nmismatches > 0) {
+      FPRINTF(fp,"%dX",nmismatches);
+    }
+
+    if (lastp == true) {
+      cliplength = endpos;
+      if (cliplength > 0) {
+	debug1(printf("  Pushing final %dH\n",cliplength));
+	FPRINTF(fp,"%dH",cliplength);
+      }
+    }
+  }
+
+  return;
+}
+
+
+static void
+print_cigar_M (Filestring_T fp, Substring_T substring, int substring_length, int substring_start,
+	       int stringlength, int querypos, int querylength,
+	       int hardclip_low, int hardclip_high, bool plusp, bool lastp, int trimlength) {
+  char *genomicfwd_refdiff, *genomicdir_refdiff;
+  
+  if (cigar_extended_p == false) {
+    print_cigar(fp,/*type*/'M',stringlength,querypos,querylength,
+		hardclip_low,hardclip_high,plusp,lastp,trimlength);
+  } else if ((genomicdir_refdiff = Substring_genomic_refdiff(substring)) == NULL) {
+    print_extended_cigar(fp,/*genomicfwd_refdiff*/NULL,/*stringlength*/substring_length,
+			 /*querypos*/substring_start,querylength,
+			 hardclip_low,hardclip_high,plusp,lastp);
+  } else if (plusp == true) {
+    print_extended_cigar(fp,&(genomicdir_refdiff[substring_start]),/*stringlength*/substring_length,
+			 /*querypos*/substring_start,querylength,
+			 hardclip_low,hardclip_high,plusp,lastp);
+  } else {
+    genomicfwd_refdiff = (char *) MALLOCA((substring_length+1) * sizeof(char));
+    make_complement_buffered(genomicfwd_refdiff,&(genomicdir_refdiff[substring_start]),substring_length);
+    print_extended_cigar(fp,genomicfwd_refdiff,/*stringlength*/substring_length,
+			 /*querypos*/substring_start,querylength,
+			 hardclip_low,hardclip_high,plusp,lastp);
+    FREEA(genomicfwd_refdiff);
+  }
+}
+
+
 #if 0
 /* Copy also in pair.c for GMAP */
 static bool
@@ -1281,17 +1533,22 @@ print_substrings (Filestring_T fp, char *abbrev, Stage3end_T stage3end, Stage3en
 		   Substring_queryend((Substring_T) List_head(p))));
 
       if (hide_soft_clips_p == true) {
-	print_cigar(fp,/*type*/'M',
-		    Substring_querystart(substring) + Substring_match_length(substring) +
-		    (querylength - Substring_queryend(substring)),/*querypos*/0,querylength,
-		    hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/true,/*trimlength*/0);
+	substring_start = Substring_querystart_orig(substring);
+	substring_length = Substring_match_length_orig(substring);
+	print_cigar_M(fp,substring,substring_length,substring_start,
+		      Substring_querystart(substring) + Substring_match_length(substring) +
+		      (querylength - Substring_queryend(substring)),/*querypos*/0,querylength,
+		      hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/true,/*trimlength*/0);
       } else {
+	substring_start = Substring_querystart(substring);
+	substring_length = Substring_match_length(substring);
 	print_cigar(fp,/*type*/'S',Substring_querystart(substring),
 		    /*querypos*/0,querylength,hardclip_low,hardclip_high,
 		    /*plusp*/true,/*lastp*/false,/*trimlength*/0);
-	print_cigar(fp,/*type*/'M',Substring_match_length(substring),
-		    /*querypos*/Substring_querystart(substring),querylength,
-		    hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/false,/*trimlength*/0);
+	print_cigar_M(fp,substring,substring_length,substring_start,
+		      Substring_match_length(substring),
+		      /*querypos*/Substring_querystart(substring),querylength,
+		      hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/false,/*trimlength*/0);
 	print_cigar(fp,/*type*/'S',querylength - Substring_queryend(substring),
 		    /*querypos*/Substring_queryend(substring),querylength,
 		    hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/true,/*trimlength*/0);
@@ -1307,18 +1564,23 @@ print_substrings (Filestring_T fp, char *abbrev, Stage3end_T stage3end, Stage3en
       post_junction = (Junction_T) List_head(q);
 
       if (hide_soft_clips_p == true) {
-	print_cigar(fp,/*type*/'M',
-		    Substring_querystart(substring) +
-		    Substring_match_length(substring),
-		    /*querypos*/0,querylength,hardclip_low,hardclip_high,
-		    /*plusp*/true,/*lastp*/false,/*trimlength*/0);
+	substring_start = Substring_querystart_orig(substring);
+	substring_length = Substring_match_length_orig(substring);
+	print_cigar_M(fp,substring,substring_length,substring_start,
+		      Substring_querystart(substring) +
+		      Substring_match_length(substring),
+		      /*querypos*/0,querylength,hardclip_low,hardclip_high,
+		      /*plusp*/true,/*lastp*/false,/*trimlength*/0);
       } else {
+	substring_start = Substring_querystart(substring);
+	substring_length = Substring_match_length(substring);
 	print_cigar(fp,/*type*/'S',Substring_querystart(substring),
 		    /*querypos*/0,querylength,hardclip_low,hardclip_high,
 		    /*plusp*/true,/*lastp*/false,/*trimlength*/0);
-	print_cigar(fp,/*type*/'M',Substring_match_length(substring),
-		    /*querypos*/Substring_querystart(substring),querylength,
-		    hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/false,/*trimlength*/0);
+	print_cigar_M(fp,substring,substring_length,substring_start,
+		      Substring_match_length(substring),
+		      /*querypos*/Substring_querystart(substring),querylength,
+		      hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/false,/*trimlength*/0);
       }
       p = List_next(p);
       
@@ -1345,15 +1607,19 @@ print_substrings (Filestring_T fp, char *abbrev, Stage3end_T stage3end, Stage3en
 		       Substring_queryend((Substring_T) List_head(p))));
 	  
 	  if (hide_soft_clips_p == true) {
-	    print_cigar(fp,/*type*/'M',
-			Substring_match_length(substring) +
-			(querylength - Substring_queryend(substring)),
-			/*querypos*/Substring_querystart(substring),querylength,
-			hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/true,/*trimlength*/0);
+	    substring_start = Substring_querystart_orig(substring);
+	    substring_length = Substring_match_length_orig(substring);
+	    print_cigar_M(fp,substring,substring_length,substring_start,
+			  Substring_match_length(substring) +
+			  (querylength - Substring_queryend(substring)),
+			  /*querypos*/Substring_querystart(substring),querylength,
+			  hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/true,/*trimlength*/0);
 	  } else {
-	    print_cigar(fp,/*type*/'M',Substring_match_length(substring),
-			/*querypos*/Substring_querystart(substring),querylength,
-			hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/false,/*trimlength*/0);
+	    substring_start = Substring_querystart(substring);
+	    substring_length = Substring_match_length(substring);
+	    print_cigar_M(fp,substring,substring_length,substring_start,Substring_match_length(substring),
+			  /*querypos*/Substring_querystart(substring),querylength,
+			  hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/false,/*trimlength*/0);
 	    print_cigar(fp,/*type*/'S',querylength - Substring_queryend(substring),
 			/*querypos*/Substring_queryend(substring),querylength,
 			hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/true,/*trimlength*/0);
@@ -1365,10 +1631,13 @@ print_substrings (Filestring_T fp, char *abbrev, Stage3end_T stage3end, Stage3en
 	  /* Middle substring, plus */
 	  debug(printf("Middle substring, plus %d..%d\n",Substring_querystart((Substring_T) List_head(p)), 
 		       Substring_queryend((Substring_T) List_head(p))));
+	  substring_start = Substring_querystart(substring);
+	  substring_length = Substring_match_length(substring);
 
-	  print_cigar(fp,/*type*/'M',Substring_match_length(substring),
-		      /*querypos*/Substring_querystart(substring),querylength,
-		      hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/false,/*trimlength*/0);
+	  print_cigar_M(fp,substring,substring_length,substring_start,
+			Substring_match_length(substring),
+			/*querypos*/Substring_querystart(substring),querylength,
+			hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/false,/*trimlength*/0);
 	}
 	p = List_next(p);
       }
@@ -1389,15 +1658,20 @@ print_substrings (Filestring_T fp, char *abbrev, Stage3end_T stage3end, Stage3en
 	debug(printf("Last substring, plus, hard-clipped %d..%d\n",Substring_querystart((Substring_T) List_head(p)),
 		     Substring_queryend((Substring_T) List_head(p))));
 	if (hide_soft_clips_p == true) {
-	  print_cigar(fp,/*type*/'M',
-		      Substring_match_length(substring) +
-		      (querylength - Substring_queryend(substring)),
-		      /*querypos*/Substring_querystart(substring),querylength,
-		      hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/true,/*trimlength*/0);
+	  substring_start = Substring_querystart_orig(substring);
+	  substring_length = Substring_match_length_orig(substring);
+	  print_cigar_M(fp,substring,substring_length,substring_start,
+			Substring_match_length(substring) +
+			(querylength - Substring_queryend(substring)),
+			/*querypos*/Substring_querystart(substring),querylength,
+			hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/true,/*trimlength*/0);
 	} else {
-	  print_cigar(fp,/*type*/'M',Substring_match_length(substring),
-		      /*querypos*/Substring_querystart(substring),querylength,
-		      hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/false,/*trimlength*/0);
+	  substring_start = Substring_querystart(substring);
+	  substring_length = Substring_match_length(substring);
+	  print_cigar_M(fp,substring,substring_length,substring_start,
+			Substring_match_length(substring),
+			/*querypos*/Substring_querystart(substring),querylength,
+			hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/false,/*trimlength*/0);
 	  print_cigar(fp,/*type*/'S',querylength - Substring_queryend(substring),
 		      /*querypos*/Substring_queryend(substring),querylength,
 		      hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/true,/*trimlength*/0);
@@ -1426,18 +1700,23 @@ print_substrings (Filestring_T fp, char *abbrev, Stage3end_T stage3end, Stage3en
 		   Substring_queryend((Substring_T) List_head(p))));
 
       if (hide_soft_clips_p == true) {
-	print_cigar(fp,/*type*/'M',
-		    (querylength - Substring_queryend(substring)) + 
-		    Substring_match_length(substring) + Substring_querystart(substring),
-		    /*querypos*/querylength,querylength,hardclip_low,hardclip_high,
-		    /*plusp*/false,/*lastp*/true,/*trimlength*/0);
+	substring_start = Substring_querystart_orig(substring);
+	substring_length = Substring_match_length_orig(substring);
+	print_cigar_M(fp,substring,substring_length,substring_start,
+		      (querylength - Substring_queryend(substring)) + 
+		      Substring_match_length(substring) + Substring_querystart(substring),
+		      /*querypos*/querylength,querylength,hardclip_low,hardclip_high,
+		      /*plusp*/false,/*lastp*/true,/*trimlength*/0);
       } else {
+	substring_start = Substring_querystart(substring);
+	substring_length = Substring_match_length(substring);
 	print_cigar(fp,/*type*/'S',querylength - Substring_queryend(substring),
 		    /*querypos*/querylength,querylength,
 		    hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/false,/*trimlength*/0);
-	print_cigar(fp,/*type*/'M',Substring_match_length(substring),
-		    /*querypos*/Substring_queryend(substring),querylength,
-		    hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/false,/*trimlength*/0);
+	print_cigar_M(fp,substring,substring_length,substring_start,
+		      Substring_match_length(substring),
+		      /*querypos*/Substring_queryend(substring),querylength,
+		      hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/false,/*trimlength*/0);
 	print_cigar(fp,/*type*/'S',Substring_querystart(substring),
 		    /*querypos*/Substring_querystart(substring),querylength,
 		    hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/true,/*trimlength*/0);
@@ -1453,18 +1732,23 @@ print_substrings (Filestring_T fp, char *abbrev, Stage3end_T stage3end, Stage3en
       post_junction = (Junction_T) List_head(q);
 
       if (hide_soft_clips_p == true) {
-	print_cigar(fp,/*type*/'M',
-		    (querylength - Substring_queryend(substring)) +
-		    Substring_match_length(substring),
-		    /*querypos*/querylength,querylength,hardclip_low,hardclip_high,
-		    /*plusp*/false,/*lastp*/false,/*trimlength*/0);
+	substring_start = Substring_querystart_orig(substring);
+	substring_length = Substring_match_length_orig(substring);
+	print_cigar_M(fp,substring,substring_length,substring_start,
+		      (querylength - Substring_queryend(substring)) +
+		      Substring_match_length(substring),
+		      /*querypos*/querylength,querylength,hardclip_low,hardclip_high,
+		      /*plusp*/false,/*lastp*/false,/*trimlength*/0);
       } else {
+	substring_start = Substring_querystart(substring);
+	substring_length = Substring_match_length(substring);
 	print_cigar(fp,/*type*/'S',querylength - Substring_queryend(substring),
 		    /*querypos*/querylength,querylength,
 		    hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/false,/*trimlength*/0);
-	print_cigar(fp,/*type*/'M',Substring_match_length(substring),
-		    /*querypos*/Substring_queryend(substring),querylength,
-		    hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/false,/*trimlength*/0);
+	print_cigar_M(fp,substring,substring_length,substring_start,
+		      Substring_match_length(substring),
+		      /*querypos*/Substring_queryend(substring),querylength,
+		      hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/false,/*trimlength*/0);
       }
       p = List_next(p);
 
@@ -1491,15 +1775,20 @@ print_substrings (Filestring_T fp, char *abbrev, Stage3end_T stage3end, Stage3en
 		       Substring_queryend((Substring_T) List_head(p))));
 
 	  if (hide_soft_clips_p == true) {
-	    print_cigar(fp,/*type*/'M',
-			Substring_match_length(substring) +
-			Substring_querystart(substring),
-			/*querypos*/Substring_queryend(substring),querylength,
-			hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/true,/*trimlength*/0);
+	    substring_start = Substring_querystart_orig(substring);
+	    substring_length = Substring_match_length_orig(substring);
+	    print_cigar_M(fp,substring,substring_length,substring_start,
+			  Substring_match_length(substring) +
+			  Substring_querystart(substring),
+			  /*querypos*/Substring_queryend(substring),querylength,
+			  hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/true,/*trimlength*/0);
 	  } else {
-	    print_cigar(fp,/*type*/'M',Substring_match_length(substring),
-			/*querypos*/Substring_queryend(substring),querylength,
-			hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/false,/*trimlength*/0);
+	    substring_start = Substring_querystart(substring);
+	    substring_length = Substring_match_length(substring);
+	    print_cigar_M(fp,substring,substring_length,substring_start,
+			  Substring_match_length(substring),
+			  /*querypos*/Substring_queryend(substring),querylength,
+			  hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/false,/*trimlength*/0);
 	    print_cigar(fp,/*type*/'S',Substring_querystart(substring),
 			/*querypos*/Substring_querystart(substring),querylength,hardclip_low,hardclip_high,
 			/*plusp*/false,/*lastp*/true,/*trimlength*/0);
@@ -1511,10 +1800,13 @@ print_substrings (Filestring_T fp, char *abbrev, Stage3end_T stage3end, Stage3en
 	  /* Middle substring, minus */
 	  debug(printf("Middle substring, minus %d..%d\n",Substring_querystart((Substring_T) List_head(p)),
 		       Substring_queryend((Substring_T) List_head(p))));
+	  substring_start = Substring_querystart(substring);
+	  substring_length = Substring_match_length(substring);
 
-	  print_cigar(fp,/*type*/'M',Substring_match_length(substring),
-		      /*querypos*/Substring_queryend(substring),querylength,
-		      hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/false,/*trimlength*/0);
+	  print_cigar_M(fp,substring,substring_length,substring_start,
+			Substring_match_length(substring),
+			/*querypos*/Substring_queryend(substring),querylength,
+			hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/false,/*trimlength*/0);
 	}
 	p = List_next(p);
       }
@@ -1536,15 +1828,20 @@ print_substrings (Filestring_T fp, char *abbrev, Stage3end_T stage3end, Stage3en
 		     Substring_queryend((Substring_T) List_head(p))));
 
 	if (hide_soft_clips_p == true) {
-	  print_cigar(fp,/*type*/'M',
-		      Substring_match_length(substring) +
-		      Substring_querystart(substring),
-		      /*querypos*/Substring_queryend(substring),querylength,
-		      hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/true,/*trimlength*/0);
+	  substring_start = Substring_querystart_orig(substring);
+	  substring_length = Substring_match_length_orig(substring);
+	  print_cigar_M(fp,substring,substring_length,substring_start,
+			Substring_match_length(substring) +
+			Substring_querystart(substring),
+			/*querypos*/Substring_queryend(substring),querylength,
+			hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/true,/*trimlength*/0);
 	} else {
-	  print_cigar(fp,/*type*/'M',Substring_match_length(substring),
-		      /*querypos*/Substring_queryend(substring),querylength,
-		      hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/false,/*trimlength*/0);
+	  substring_start = Substring_querystart(substring);
+	  substring_length = Substring_match_length(substring);
+	  print_cigar_M(fp,substring,substring_length,substring_start,
+			Substring_match_length(substring),
+			/*querypos*/Substring_queryend(substring),querylength,
+			hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/false,/*trimlength*/0);
 	  print_cigar(fp,/*type*/'S',Substring_querystart(substring),
 		      /*querypos*/Substring_querystart(substring),querylength,hardclip_low,hardclip_high,
 		      /*plusp*/false,/*lastp*/true,/*trimlength*/0);
@@ -2136,6 +2433,11 @@ print_substrings (Filestring_T fp, char *abbrev, Stage3end_T stage3end, Stage3en
     FPRINTF(fp,"\tXG:Z:A");
   }
 
+#if 0
+  /* 12. TAGS: XE (BLAST E-value) */
+  FPRINTF(fp,"\tXE:f:%.2g",Stage3end_min_evalue(stage3end));
+#endif
+
   FPRINTF(fp,"\n");
   return;
 }
@@ -2269,7 +2571,8 @@ print_halfdonor (Filestring_T fp, char *abbrev, Substring_T donor, Substring_T a
 
 
   if (sensep == true) {
-    assert(Substring_siteD_pos(donor) == Substring_queryend(donor));
+    /* Doesn't hold for DNA-Seq chimeras */
+    /* assert(Substring_siteD_pos(donor) == Substring_queryend(donor)); */
     if (plusp == true) {
       /* sensep true, plusp true */
       /* FPRINTF(fp,"donor sensep true, plusp true\n"); */
@@ -2328,7 +2631,8 @@ print_halfdonor (Filestring_T fp, char *abbrev, Substring_T donor, Substring_T a
     }
 
   } else {
-    assert(Substring_siteD_pos(donor) == Substring_querystart(donor));
+    /* Doesn't hold for DNA-Seq chimeras */
+    /* assert(Substring_siteD_pos(donor) == Substring_querystart(donor)); */
     if (plusp == true) {
       /* sensep false, plusp true */
       /* FPRINTF(fp,"donor sensep false, plusp true\n"); */
@@ -2588,8 +2892,11 @@ print_halfdonor (Filestring_T fp, char *abbrev, Substring_T donor, Substring_T a
   FPRINTF(fp,"\tXO:Z:%s",abbrev);
 
   /* 12. TAGS: XS */
-  assert(donor_sensedir != SENSE_NULL);
-  FPRINTF(fp,"\tXS:A:%c",donor_strand);
+  /* Doesn't hold for DNA-Seq chimeras */
+  /* assert(donor_sensedir != SENSE_NULL); */
+  if (donor_sensedir != SENSE_NULL) {
+    FPRINTF(fp,"\tXS:A:%c",donor_strand);
+  }
 
   /* 12. TAGS: XA */
   if ((start_ambig = Stage3end_start_ambiguous_p(this)) == true ||
@@ -2783,7 +3090,8 @@ print_halfacceptor (Filestring_T fp, char *abbrev, Substring_T donor, Substring_
 
 
   if (sensep == true) {
-    assert(Substring_siteA_pos(acceptor) == Substring_querystart(acceptor));
+    /* Doesn't hold for DNA-Seq chimeras */
+    /* assert(Substring_siteA_pos(acceptor) == Substring_querystart(acceptor)); */
     if (plusp == true) {
       /* sensep true, plusp true */
       /* FPRINTF(fp,"acceptor sensep true, plusp true\n"); */
@@ -2837,7 +3145,8 @@ print_halfacceptor (Filestring_T fp, char *abbrev, Substring_T donor, Substring_
 
   } else {
     /* sensep false, plusp true */
-    assert(Substring_siteA_pos(acceptor) == Substring_queryend(acceptor));
+    /* Doesn't hold for DNA-Seq chimeras */
+    /* assert(Substring_siteA_pos(acceptor) == Substring_queryend(acceptor)); */
     if (plusp == true) {
       /* FPRINTF(fp,"acceptor sensep false, plusp true\n"); */
       if (hide_soft_clips_p == true) {
@@ -3094,8 +3403,11 @@ print_halfacceptor (Filestring_T fp, char *abbrev, Substring_T donor, Substring_
   FPRINTF(fp,"\tXO:Z:%s",abbrev);
 
   /* 12. TAGS: XS */
-  assert(acceptor_sensedir != SENSE_NULL);
-  FPRINTF(fp,"\tXS:A:%c",acceptor_strand);
+  /* Doesn't hold for DNA-Seq chimeras */
+  /* assert(acceptor_sensedir != SENSE_NULL); */
+  if (acceptor_sensedir != SENSE_NULL) {
+    FPRINTF(fp,"\tXS:A:%c",acceptor_strand);
+  }
 
   /* 12. TAGS: XA */
   if ((start_ambig = Stage3end_start_ambiguous_p(this)) == true ||
@@ -3257,7 +3569,12 @@ print_exon_exon (Filestring_T fp, char *abbrev, Stage3end_T this, Stage3end_T ma
       donor_strand = '+';
     }
   } else {
-    abort();
+    /* For DNA-Seq chimeras, SENSE_NULL is possible */
+    if (Substring_plusp(donor) == true) {
+      donor_strand = '+';
+    } else {
+      donor_strand = '-';
+    }
   }
 
   if ((acceptor_sensedir = Substring_sensedir(acceptor)) == SENSE_FORWARD) {
@@ -3273,7 +3590,12 @@ print_exon_exon (Filestring_T fp, char *abbrev, Stage3end_T this, Stage3end_T ma
       acceptor_strand = '+';
     }
   } else {
-    abort();
+    /* For DNA-Seq chimeras, SENSE_NULL is possible */
+    if (Substring_plusp(acceptor) == true) {
+      acceptor_strand = '+';
+    } else {
+      acceptor_strand = '-';
+    }
   }
 
   if (sensedir == SENSE_FORWARD) {
diff --git a/src/stage1hr.c b/src/stage1hr.c
index d5019a3..7178440 100644
--- a/src/stage1hr.c
+++ b/src/stage1hr.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: stage1hr.c 198074 2016-09-21 00:25:02Z twu $";
+static char rcsid[] = "$Id: stage1hr.c 199517 2016-10-24 23:55:08Z twu $";
 #ifdef HAVE_CONFIG_H
 #include <config.h>
 #endif
@@ -14301,6 +14301,9 @@ compute_floors (bool *any_omitted_p, bool *alloc_floors_p, Floors_T *floors_arra
   } else if (keep_floors_p == false) {
     floors = Floors_new_standard(querylength,max_end_insertions,/*keep_floors_p*/false);
     *alloc_floors_p = true;
+  } else if (querylength <= index1part) {
+    *alloc_floors_p = false;
+    return (Floors_T) NULL;
   } else {
     if (floors_array[querylength] == NULL) {
       floors_array[querylength] = Floors_new_standard(querylength,max_end_insertions,/*keep_floors_p*/true);
diff --git a/src/stage3hr.c b/src/stage3hr.c
index 3b36519..f0e8170 100644
--- a/src/stage3hr.c
+++ b/src/stage3hr.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: stage3hr.c 198077 2016-09-21 00:34:35Z twu $";
+static char rcsid[] = "$Id: stage3hr.c 200238 2016-11-08 00:59:56Z twu $";
 #ifdef HAVE_CONFIG_H
 #include <config.h>
 #endif
@@ -1173,6 +1173,21 @@ Stage3end_substring_containing (T this, int querypos) {
 }
 
 
+double
+Stage3end_min_evalue (T this) {
+  double min_evalue = 1000.0, evalue;
+  Substring_T substring;
+  List_T p;
+
+  for (p = this->substrings_LtoH; p != NULL; p = List_next(p)) {
+    substring = (Substring_T) List_head(p);
+    if ((evalue = Substring_evalue(substring)) < min_evalue) {
+      min_evalue = evalue;
+    }
+  }
+
+  return min_evalue;
+}
 
 struct Pair_T *
 Stage3end_pairarray (T this) {
@@ -7223,6 +7238,10 @@ Stage3end_new_insertion (int *found_score, int nindels, int indel_pos, int nmism
   queryend2 = querylength;
 
   if (plusp == true) {
+    if (left + querylength < (Univcoord_T) nindels) {
+      return (T) NULL;
+    }
+
     alignstart1 = left /*+ querystart1 (0)*/;
     alignend1 = alignstart2 = left + /*queryend1*/indel_pos;
     alignend2 = (left - nindels) + /*queryend2*/querylength;
@@ -7282,6 +7301,10 @@ Stage3end_new_insertion (int *found_score, int nindels, int indel_pos, int nmism
     }
 
   } else {
+    if (left + querylength < (Univcoord_T) (nindels + indel_pos)) {
+      return (T) NULL;
+    }
+
     alignstart1 = (left - nindels) + (querylength /*- querystart (0)*/);
     alignend1 = alignstart2 = (left - nindels) + (querylength - indel_pos);
     alignend2 = left /* + (querylength - queryend)*/;
@@ -7527,6 +7550,10 @@ Stage3end_new_deletion (int *found_score, int nindels, int indel_pos, int nmisma
 
 
   if (plusp == true) {
+    if (left + nindels + querylength >= chrhigh) {
+      return (T) NULL;
+    }
+
     alignstart1 = left /*+ querystart1 (0)*/;
     alignend1 = left + indel_pos;
     alignstart2 = (left + nindels) + indel_pos;
@@ -7593,6 +7620,10 @@ Stage3end_new_deletion (int *found_score, int nindels, int indel_pos, int nmisma
     }
 
   } else {
+    if (left + querylength < (Univcoord_T) (nindels + indel_pos)) {
+      return (T) NULL;
+    }
+
     alignstart1 = left + (querylength /*- querystart (0)*/);
     alignend1 = left + (querylength - indel_pos);
     alignstart2 = (left - nindels) + (querylength - indel_pos);
diff --git a/src/stage3hr.h b/src/stage3hr.h
index b7b6566..5c20725 100644
--- a/src/stage3hr.h
+++ b/src/stage3hr.h
@@ -1,4 +1,4 @@
-/* $Id: stage3hr.h 198078 2016-09-21 00:34:48Z twu $ */
+/* $Id: stage3hr.h 199475 2016-10-23 23:21:59Z twu $ */
 #ifndef STAGE3HR_INCLUDED
 #define STAGE3HR_INCLUDED
 
@@ -167,6 +167,8 @@ extern Substring_T
 Stage3end_substring_low (T this, int hardclip_low);
 extern Substring_T
 Stage3end_substring_containing (T this, int querypos);
+extern double
+Stage3end_min_evalue (T this);
 extern struct Pair_T *
 Stage3end_pairarray (T this);
 extern int
diff --git a/src/substring.c b/src/substring.c
index 64a3b29..90b4603 100644
--- a/src/substring.c
+++ b/src/substring.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: substring.c 198079 2016-09-21 00:35:41Z twu $";
+static char rcsid[] = "$Id: substring.c 199475 2016-10-23 23:21:59Z twu $";
 #ifdef HAVE_CONFIG_H
 #include <config.h>
 #endif
@@ -4973,6 +4973,26 @@ blast_bitscore (int alignlength, int nmismatches) {
 }
 
 
+double
+Substring_evalue (T substring) {
+  int alignlength_trim;
+
+#if 0
+  /* Doesn't work for ambiguous substrings */
+  if (substring->plusp == true) {
+    alignlength_trim = (int) (substring->alignend_trim - substring->alignstart_trim);
+  } else {
+    alignlength_trim = (int) (substring->alignstart_trim - substring->alignend_trim);
+  }
+  assert(alignlength_trim == substring->queryend - substring->querystart);
+#else
+  alignlength_trim = substring->queryend - substring->querystart;
+#endif
+
+  return blast_evalue(alignlength_trim,substring->nmismatches_bothdiff);
+}
+
+
 void
 Substring_print_m8 (Filestring_T fp, T substring, Shortread_T headerseq, char *acc_suffix,
 		    char *chr, bool invertp) {
diff --git a/src/substring.h b/src/substring.h
index 4f6f570..ce4f283 100644
--- a/src/substring.h
+++ b/src/substring.h
@@ -1,4 +1,4 @@
-/* $Id: substring.h 198080 2016-09-21 00:35:55Z twu $ */
+/* $Id: substring.h 199475 2016-10-23 23:21:59Z twu $ */
 #ifndef SUBSTRING_INCLUDED
 #define SUBSTRING_INCLUDED
 
@@ -325,6 +325,9 @@ Substring_sort_siteN_halves (List_T hitlist, bool ascendingp);
 extern Chrpos_T
 Substring_compute_chrpos (T this, int hardclip_low, bool hide_soft_clips_p);
 
+extern double
+Substring_evalue (T substring);
+
 extern void
 Substring_print_m8 (Filestring_T fp, T substring, Shortread_T headerseq, char *acc_suffix,
 		    char *chr, bool invertp);
diff --git a/src/uniqscan.c b/src/uniqscan.c
index d5c3fbe..e6c1d92 100644
--- a/src/uniqscan.c
+++ b/src/uniqscan.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: uniqscan.c 197391 2016-09-03 00:43:23Z twu $";
+static char rcsid[] = "$Id: uniqscan.c 200234 2016-11-08 00:56:52Z twu $";
 #ifdef HAVE_CONFIG_H
 #include <config.h>
 #endif
@@ -1325,7 +1325,7 @@ main (int argc, char *argv[]) {
 	     /*force_xs_direction_p*/false,/*md_lowercase_variant_p*/false,
 	     /*snps_p*/snps_iit ? true : false,/*print_nsnpdiffs_p*/snps_iit ? true : false,
 	     Univ_IIT_genomelength(chromosome_iit,/*with_circular_alias*/false),
-	     /*gff3_phase_swap_p*/false);
+	     /*gff3_phase_swap_p*/false,/*cigar_extended_p*/false);
   Stage3_setup(/*splicingp*/novelsplicingp == true || knownsplicingp == true,novelsplicingp,
 	       /*require_splicedir_p*/false,splicing_iit,splicing_divint_crosstable,
 	       donor_typeint,acceptor_typeint,splicesites,altlocp,alias_starts,alias_ends,

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/gmap.git



More information about the debian-med-commit mailing list