[med-svn] [gmap] 01/04: New upstream version 2016-11-07
Alex Mestiashvili
malex-guest at moszumanska.debian.org
Wed Nov 9 15:43:16 UTC 2016
This is an automated email from the git hooks/post-receive script.
malex-guest pushed a commit to branch master
in repository gmap.
commit d49f40eb341e3ecf71335183fd5c5df670b3d661
Author: Alexandre Mestiashvili <alex at biotec.tu-dresden.de>
Date: Wed Nov 9 16:10:11 2016 +0100
New upstream version 2016-11-07
---
ChangeLog | 57 ++++++
VERSION | 2 +-
configure | 24 +--
src/bitpack64-access.c | 38 ++--
src/bitpack64-incr.c | 36 ++--
src/bitpack64-read.c | 247 ++++++++++++++----------
src/bitpack64-read.h | 3 +
src/bitpack64-readtwo.c | 50 +++--
src/bitpack64-write.c | 144 +++++++-------
src/cpuid.c | 26 ++-
src/cpuid.h | 5 +-
src/gmap.c | 10 +-
src/gmap_select.c | 4 +-
src/gmapl_select.c | 4 +-
src/gsnap.c | 4 +-
src/gsnap_select.c | 4 +-
src/gsnapl_select.c | 4 +-
src/indexdb-write.c | 17 +-
src/indexdb.c | 3 +-
src/pair.c | 496 ++++++++++++++++++++++++++++++++++++++++++++++--
src/pair.h | 4 +-
src/sam_sort.c | 28 ++-
src/samprint.c | 482 ++++++++++++++++++++++++++++++++++++++--------
src/stage1hr.c | 5 +-
src/stage3hr.c | 33 +++-
src/stage3hr.h | 4 +-
src/substring.c | 22 ++-
src/substring.h | 5 +-
src/uniqscan.c | 4 +-
29 files changed, 1404 insertions(+), 361 deletions(-)
diff --git a/ChangeLog b/ChangeLog
index 9296781..7530342 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,60 @@
+2016-11-08 twu
+
+ * sam_sort.c: Added printing at monitor intervals
+
+ * stage3hr.c: Checking for cases where insertions and deletions extend past
+ genomicpos 0
+
+ * samprint.c: Added preliminary code for printing extended cigar strings
+
+ * pair.c, pair.h: Added code for printing extended cigar strings. Not
+ printing BLAST e-values.
+
+ * indexdb.c: Removed unused statement
+
+ * gsnap.c, uniqscan.c: Using new interface to Pair_setup
+
+ * gmap_select.c, gmapl_select.c, gsnap_select.c, gsnapl_select.c: Using new
+ interface to CPUID_support
+
+ * gmap.c: Added option --sam-cigar-extended
+
+ * cpuid.c, cpuid.h: Added provisions for AVX512
+
+2016-10-24 twu
+
+ * stage1hr.c: Not computing floors if querylength is less than index1part
+
+ * pair.c: Showing blast_evalue function for GMAP
+
+ * samprint.c: Removing assertions and aborts that do not hold for DNA-Seq
+ chimeras
+
+2016-10-23 twu
+
+ * bitpack64-read.c, pair.c, samprint.c, stage3hr.c, stage3hr.h, substring.c,
+ substring.h: Printing BLAST e-values
+
+ * indexdb-write.c: For initializing counters, using packsizes from
+ offsetsmeta, rather than recomputing them from offsetsstrm
+
+ * bitpack64-write.c: Handling problem with ptri overflowing a signed int.
+ Now just advancing a pointer.
+
+ * bitpack64-read.h: Added interface for a function
+
+ * bitpack64-access.c, bitpack64-incr.c, bitpack64-read.c,
+ bitpack64-readtwo.c: Handling the case where nwritten*4 overflows an
+ unsigned int. Casting it first to UINT8.
+
+2016-10-17 twu
+
+ * bitpack64-access.c: Fixed an increment of out in extract_28
+
+2016-09-26 twu
+
+ * archive.html, index.html: Updated for latest version
+
2016-09-24 twu
* stage3.c: In solving dual introns, handling the case where single_gappairs
diff --git a/VERSION b/VERSION
index 6daa5bd..6f55558 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-2016-09-23
\ No newline at end of file
+2016-11-07
\ No newline at end of file
diff --git a/configure b/configure
index ed5376d..e5f319c 100755
--- a/configure
+++ b/configure
@@ -1,6 +1,6 @@
#! /bin/sh
# Guess values for system-dependent variables and create Makefiles.
-# Generated by GNU Autoconf 2.69 for gmap 2016-09-23.
+# Generated by GNU Autoconf 2.69 for gmap 2016-11-07.
#
# Report bugs to <Thomas Wu <twu at gene.com>>.
#
@@ -590,8 +590,8 @@ MAKEFLAGS=
# Identity of this package.
PACKAGE_NAME='gmap'
PACKAGE_TARNAME='gmap'
-PACKAGE_VERSION='2016-09-23'
-PACKAGE_STRING='gmap 2016-09-23'
+PACKAGE_VERSION='2016-11-07'
+PACKAGE_STRING='gmap 2016-11-07'
PACKAGE_BUGREPORT='Thomas Wu <twu at gene.com>'
PACKAGE_URL=''
@@ -1372,7 +1372,7 @@ if test "$ac_init_help" = "long"; then
# Omit some internal or obsolete options to make the list less imposing.
# This message is too long to be a string in the A/UX 3.1 sh.
cat <<_ACEOF
-\`configure' configures gmap 2016-09-23 to adapt to many kinds of systems.
+\`configure' configures gmap 2016-11-07 to adapt to many kinds of systems.
Usage: $0 [OPTION]... [VAR=VALUE]...
@@ -1443,7 +1443,7 @@ fi
if test -n "$ac_init_help"; then
case $ac_init_help in
- short | recursive ) echo "Configuration of gmap 2016-09-23:";;
+ short | recursive ) echo "Configuration of gmap 2016-11-07:";;
esac
cat <<\_ACEOF
@@ -1582,7 +1582,7 @@ fi
test -n "$ac_init_help" && exit $ac_status
if $ac_init_version; then
cat <<\_ACEOF
-gmap configure 2016-09-23
+gmap configure 2016-11-07
generated by GNU Autoconf 2.69
Copyright (C) 2012 Free Software Foundation, Inc.
@@ -2188,7 +2188,7 @@ cat >config.log <<_ACEOF
This file contains any messages produced by compilers while
running configure, to aid debugging if configure makes a mistake.
-It was created by gmap $as_me 2016-09-23, which was
+It was created by gmap $as_me 2016-11-07, which was
generated by GNU Autoconf 2.69. Invocation command line was
$ $0 $@
@@ -2538,8 +2538,8 @@ ac_compiler_gnu=$ac_cv_c_compiler_gnu
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking package version" >&5
$as_echo_n "checking package version... " >&6; }
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: 2016-09-23" >&5
-$as_echo "2016-09-23" >&6; }
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: 2016-11-07" >&5
+$as_echo "2016-11-07" >&6; }
### Read defaults
@@ -4404,7 +4404,7 @@ fi
# Define the identity of the package.
PACKAGE='gmap'
- VERSION='2016-09-23'
+ VERSION='2016-11-07'
cat >>confdefs.h <<_ACEOF
@@ -20109,7 +20109,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
# report actual input values of CONFIG_FILES etc. instead of their
# values after options handling.
ac_log="
-This file was extended by gmap $as_me 2016-09-23, which was
+This file was extended by gmap $as_me 2016-11-07, which was
generated by GNU Autoconf 2.69. Invocation command line was
CONFIG_FILES = $CONFIG_FILES
@@ -20175,7 +20175,7 @@ _ACEOF
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
ac_cs_version="\\
-gmap config.status 2016-09-23
+gmap config.status 2016-11-07
configured by $0, generated by GNU Autoconf 2.69,
with options \\"\$ac_cs_config\\"
diff --git a/src/bitpack64-access.c b/src/bitpack64-access.c
index c21d45c..7b5dfcc 100644
--- a/src/bitpack64-access.c
+++ b/src/bitpack64-access.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: bitpack64-access.c 180341 2015-12-07 18:29:40Z twu $";
+static char rcsid[] = "$Id: bitpack64-access.c 199469 2016-10-23 03:49:07Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -2073,7 +2073,8 @@ static Accessor_T accessor_table[272] =
UINT4
Bitpack64_access (Oligospace_T oligo, UINT4 *ptrs, UINT4 *comp) {
- UINT4 *info, start;
+ UINT4 *info, start4;
+ UINT8 start8;
int nwritten, remainder;
UINT4 *bitpack;
int index, row;
@@ -2084,13 +2085,15 @@ Bitpack64_access (Oligospace_T oligo, UINT4 *ptrs, UINT4 *comp) {
info = &(ptrs[oligo/BLOCKSIZE * DIRECT_METAINFO_SIZE]);
#ifdef WORDS_BIGENDIAN
- start = Bigendian_convert_uint(info[0]);
- bitpack = (UINT4 *) &(comp[start*4]);
- nwritten = Bigendian_convert_uint(info[1]) - start; /* In 128-bit registers */
+ start4 = Bigendian_convert_uint(info[0]); /* In 128-bit registers */
+ start8 = 4 * (UINT8) start4; /* In 32-bit words */
+ bitpack = (UINT4 *) &(comp[start8]);
+ nwritten = Bigendian_convert_uint(info[1]) - start4; /* In 128-bit registers */
#else
- start = info[0];
- bitpack = (UINT4 *) &(comp[start*4]);
- nwritten = info[1] - start; /* In 128-bit registers */
+ start4 = info[0]; /* In 128-bit registers */
+ start8 = 4 * (UINT8) start4; /* In 32-bit words */
+ bitpack = (UINT4 *) &(comp[start8]);
+ nwritten = info[1] - start4;
#endif
remainder = oligo % BLOCKSIZE;
@@ -2115,7 +2118,8 @@ Bitpack64_access (Oligospace_T oligo, UINT4 *ptrs, UINT4 *comp) {
UINT4
Bitpack64_access (Oligospace_T oligo, UINT4 *ptrs, UINT4 *comp) {
- UINT4 *info, start;
+ UINT4 *info, start4;
+ UINT8 start8;
int nwritten, remainder;
UINT4 *bitpack;
int index, column;
@@ -2126,13 +2130,15 @@ Bitpack64_access (Oligospace_T oligo, UINT4 *ptrs, UINT4 *comp) {
info = &(ptrs[oligo/BLOCKSIZE * DIRECT_METAINFO_SIZE]);
#ifdef WORDS_BIGENDIAN
- start = Bigendian_convert_uint(info[0]);
- bitpack = (UINT4 *) &(comp[start*4]);
- nwritten = Bigendian_convert_uint(info[1]) - start; /* In 128-bit registers */
+ start4 = Bigendian_convert_uint(info[0]); /* In 128-bit registers */
+ start8 = 4 * (UINT8) start4; /* In 32-bit words */
+ bitpack = (UINT4 *) &(comp[start8]);
+ nwritten = Bigendian_convert_uint(info[1]) - start4;
#else
- start = info[0];
- bitpack = (UINT4 *) &(comp[start*4]);
- nwritten = info[1] - start; /* In 128-bit registers */
+ start4 = info[0]; /* In 128-bit registers */
+ start8 = 4 * (UINT8) start4; /* In 32-bit words */
+ bitpack = (UINT4 *) &(comp[start8]);
+ nwritten = info[1] - start4;
#endif
remainder = oligo % BLOCKSIZE;
@@ -3394,7 +3400,7 @@ extract_30 (UINT4 *out, const UINT4 *in) {
value = ( CONVERT(*in) >> 4 ) % (1U << 30 ) ;
in += WORD_INCR;
value |= (CONVERT(*in) % (1U<< 2 ))<<( 30 - 2 );
- out += WORD_INCR;
+ /* in += WORD_INCR; was out += WORD_INCR, but don't want either */
out[56] = value;
/* 15 */
diff --git a/src/bitpack64-incr.c b/src/bitpack64-incr.c
index ab5276e..ae0a3ea 100644
--- a/src/bitpack64-incr.c
+++ b/src/bitpack64-incr.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: bitpack64-incr.c 197549 2016-09-08 01:14:55Z twu $";
+static char rcsid[] = "$Id: bitpack64-incr.c 199469 2016-10-23 03:49:07Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -8368,7 +8368,8 @@ static Subtractor_T subtractor_table[272] =
/* Vertical */
UINT4
Bitpack64_incr (Oligospace_T oligo, UINT4 *ptrs, UINT4 *comp) {
- UINT4 *info, start;
+ UINT4 *info, start4;
+ UINT8 start8;
int nwritten, remainder;
UINT4 *bitpack;
int index, column;
@@ -8380,13 +8381,15 @@ Bitpack64_incr (Oligospace_T oligo, UINT4 *ptrs, UINT4 *comp) {
info = &(ptrs[oligo/BLOCKSIZE * DIRECT_METAINFO_SIZE]);
#ifdef WORDS_BIGENDIAN
- start = Bigendian_convert_uint(info[0]);
- bitpack = (UINT4 *) &(comp[start*4]);
- nwritten = Bigendian_convert_uint(info[1]) - start; /* In 128-bit registers */
+ start4 = Bigendian_convert_uint(info[0]); /* In 128-bit registers */
+ start8 = 4 * (UINT8) start4; /* In 32-bit words */
+ bitpack = (UINT4 *) &(comp[start8]);
+ nwritten = Bigendian_convert_uint(info[1]) - start4;
#else
- start = info[0];
- bitpack = (UINT4 *) &(comp[start*4]);
- nwritten = info[1] - start; /* In 128-bit registers */
+ start4 = info[0]; /* In 128-bit registers */
+ start8 = 4 * (UINT8) start4; /* In 32-bit words */
+ bitpack = (UINT4 *) &(comp[start8]);
+ nwritten = info[1] - start4;
#endif
remainder = oligo % BLOCKSIZE;
@@ -8445,7 +8448,8 @@ Bitpack64_incr_bitpack (Oligospace_T oligo, int packsize, UINT4 *bitpack) {
void
Bitpack64_add (Oligospace_T oligo, UINT4 *ptrs, UINT4 *comp, UINT4 increment) {
- UINT4 *info, start;
+ UINT4 *info, start4;
+ UINT8 start8;
int nwritten, remainder;
UINT4 *bitpack;
int index, column;
@@ -8457,13 +8461,15 @@ Bitpack64_add (Oligospace_T oligo, UINT4 *ptrs, UINT4 *comp, UINT4 increment) {
info = &(ptrs[oligo/BLOCKSIZE * DIRECT_METAINFO_SIZE]);
#ifdef WORDS_BIGENDIAN
- start = Bigendian_convert_uint(info[0]);
- bitpack = (UINT4 *) &(comp[start*4]);
- nwritten = Bigendian_convert_uint(info[1]) - start; /* In 128-bit registers */
+ start4 = Bigendian_convert_uint(info[0]); /* In 128-bit registers */
+ start8 = 4 * (UINT8) start4; /* In 32-bit words */
+ bitpack = (UINT4 *) &(comp[start8]);
+ nwritten = Bigendian_convert_uint(info[1]) - start4;
#else
- start = info[0];
- bitpack = (UINT4 *) &(comp[start*4]);
- nwritten = info[1] - start; /* In 128-bit registers */
+ start4 = info[0]; /* In 128-bit registers */
+ start8 = 4 * (UINT8) start4; /* In 32-bit words */
+ bitpack = (UINT4 *) &(comp[start8]);
+ nwritten = info[1] - start4;
#endif
remainder = oligo % BLOCKSIZE;
diff --git a/src/bitpack64-read.c b/src/bitpack64-read.c
index d6b1025..1e478b6 100644
--- a/src/bitpack64-read.c
+++ b/src/bitpack64-read.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: bitpack64-read.c 184374 2016-02-16 23:39:06Z twu $";
+static char rcsid[] = "$Id: bitpack64-read.c 199475 2016-10-23 23:21:59Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -13771,7 +13771,8 @@ static Unpacker_T unpacker_table[17][17] =
UINT4
Bitpack64_offsetptr (UINT4 *end0, Oligospace_T oligo, UINT4 *bitpackptrs, UINT4 *bitpackcomp) {
Oligospace_T bmer;
- UINT4 *info, nwritten, packsize_div2;
+ UINT4 *info, nwritten4, packsize_div2;
+ UINT8 nwritten8;
int delta, remainder0, remainder1, quarter_block_0, quarter_block_1, column, row;
#ifdef HAVE_SSE2
#ifdef BRANCH_FREE_ROW_SUM
@@ -13800,15 +13801,16 @@ Bitpack64_offsetptr (UINT4 *end0, Oligospace_T oligo, UINT4 *bitpackptrs, UINT4
debug(printf("Entered Bitpack64_offsetptr with oligo %u => bmer %u\n",oligo,bmer));
- nwritten = info[0]; /* In 128-bit registers */
+ nwritten4 = info[0]; /* In 128-bit registers */
+ nwritten8 = 4 * (UINT8) nwritten4; /* In 32-bit words */
#ifdef HAVE_SSE2
- bitpack = (__m128i *) &(bitpackcomp[nwritten*4]);
+ bitpack = (__m128i *) &(bitpackcomp[nwritten8]);
#else
- bitpack = (UINT4 *) &(bitpackcomp[nwritten*4]);
+ bitpack = (UINT4 *) &(bitpackcomp[nwritten8]);
#endif
- /* packsize = (info[DIFFERENTIAL_METAINFO_SIZE] - nwritten)*2; */
- packsize_div2 = (info[DIFFERENTIAL_METAINFO_SIZE] - nwritten);
+ /* packsize = (info[DIFFERENTIAL_METAINFO_SIZE] - nwritten4)*2; */
+ packsize_div2 = (info[DIFFERENTIAL_METAINFO_SIZE] - nwritten4);
remainder0 = oligo % BLOCKSIZE;
remainder1 = remainder0 + 1;
@@ -14079,7 +14081,8 @@ UINT8
Bitpack64_offsetptr_huge (UINT8 *end0, Oligospace_T oligo,
UINT4 *bitpackpages, UINT4 *bitpackptrs, UINT4 *bitpackcomp) {
Oligospace_T bmer;
- UINT4 *info, nwritten;
+ UINT4 *info, nwritten4;
+ UINT8 nwritten8;
UINT8 offset0, offset1;
UINT4 packsize_div2;
int delta, remainder0, remainder1, quarter_block_0, quarter_block_1, column, row;
@@ -14110,11 +14113,12 @@ Bitpack64_offsetptr_huge (UINT8 *end0, Oligospace_T oligo,
debug(printf("Entered Bitpack64_offsetptr_huge with oligo %u => bmer %u\n",oligo,bmer));
- nwritten = info[0];
+ nwritten4 = info[0]; /* In 128-bit registers */
+ nwritten8 = 4 * (UINT8) nwritten4; /* In 32-bit words */
#ifdef HAVE_SSE2
- bitpack = (__m128i *) &(bitpackcomp[nwritten*4]);
+ bitpack = (__m128i *) &(bitpackcomp[nwritten8]);
#else
- bitpack = (UINT4 *) &(bitpackcomp[nwritten*4]);
+ bitpack = (UINT4 *) &(bitpackcomp[nwritten8]);
#endif
if (bitpackpages == NULL) {
@@ -14138,8 +14142,8 @@ Bitpack64_offsetptr_huge (UINT8 *end0, Oligospace_T oligo,
offset1 += info[DIFFERENTIAL_METAINFO_SIZE+1];
}
- /* packsize = (info[DIFFERENTIAL_METAINFO_SIZE] - nwritten)*2; */
- packsize_div2 = (info[DIFFERENTIAL_METAINFO_SIZE] - nwritten);
+ /* packsize = (info[DIFFERENTIAL_METAINFO_SIZE] - nwritten4)*2; */
+ packsize_div2 = (info[DIFFERENTIAL_METAINFO_SIZE] - nwritten4);
remainder0 = oligo % BLOCKSIZE;
remainder1 = remainder0 + 1;
@@ -14408,7 +14412,8 @@ UINT4
Bitpack64_offsetptr_paired (UINT4 *end0, Oligospace_T oligo, UINT4 *bitpackptrs, UINT4 *bitpackcomp) {
UINT4 ptr0;
Oligospace_T bmer;
- UINT4 *info, nwritten, packsize_div2;
+ UINT4 *info, nwritten4, packsize_div2;
+ UINT8 nwritten8;
int delta, remainder, quarter_block, column, row;
#ifdef HAVE_SSE2
#ifdef BRANCH_FREE_ROW_SUM
@@ -14437,15 +14442,16 @@ Bitpack64_offsetptr_paired (UINT4 *end0, Oligospace_T oligo, UINT4 *bitpackptrs,
debug(printf("Entered Bitpack64_offsetptr_paired with oligo %u => bmer %u\n",oligo,bmer));
- nwritten = info[0]; /* In 128-bit registers */
+ nwritten4 = info[0]; /* In 128-bit registers */
+ nwritten8 = 4 * (UINT8) nwritten4; /* In 32-bit words */
#ifdef HAVE_SSE2
- bitpack = (__m128i *) &(bitpackcomp[nwritten*4]);
+ bitpack = (__m128i *) &(bitpackcomp[nwritten8]);
#else
- bitpack = (UINT4 *) &(bitpackcomp[nwritten*4]);
+ bitpack = (UINT4 *) &(bitpackcomp[nwritten8]);
#endif
- /* packsize = (info[DIFFERENTIAL_METAINFO_SIZE] - nwritten)*2; */
- packsize_div2 = (info[2] - nwritten);
+ /* packsize = (info[DIFFERENTIAL_METAINFO_SIZE] - nwritten4)*2; */
+ packsize_div2 = (info[2] - nwritten4);
remainder = oligo % BLOCKSIZE;
if (remainder == 31) {
@@ -14662,7 +14668,8 @@ Bitpack64_offsetptr_paired (UINT4 *end0, Oligospace_T oligo, UINT4 *bitpackptrs,
UINT4
Bitpack64_read_one (Oligospace_T oligo, UINT4 *bitpackptrs, UINT4 *bitpackcomp) {
Oligospace_T bmer;
- UINT4 *info, nwritten, packsize_div2;
+ UINT4 *info, nwritten4, packsize_div2;
+ UINT8 nwritten8;
int delta, remainder, column, row;
#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
UINT4 ptr;
@@ -14692,20 +14699,23 @@ Bitpack64_read_one (Oligospace_T oligo, UINT4 *bitpackptrs, UINT4 *bitpackcomp)
debug(printf("Entered Bitpack64_read_one with oligo %u => bmer %u\n",oligo,bmer));
#if defined(WORDS_BIGENDIAN)
- nwritten = Bigendian_convert_uint(info[0]); /* In 128-bit registers */
- bitpack = (UINT4 *) &(bitpackcomp[nwritten*4]);
- packsize_div2 = (Bigendian_convert_uint(info[DIFFERENTIAL_METAINFO_SIZE]) - nwritten);
+ nwritten4 = Bigendian_convert_uint(info[0]); /* In 128-bit registers */
+ nwritten8 = 4 * (UINT8) nwritten4; /* In 32-bit words */
+ bitpack = (UINT4 *) &(bitpackcomp[nwritten8]);
+ packsize_div2 = (Bigendian_convert_uint(info[DIFFERENTIAL_METAINFO_SIZE]) - nwritten4);
#elif !defined(HAVE_SSE2)
- nwritten = info[0]; /* In 128-bit registers */
- bitpack = (UINT4 *) &(bitpackcomp[nwritten*4]);
- packsize_div2 = (info[DIFFERENTIAL_METAINFO_SIZE] - nwritten);
+ nwritten4 = info[0]; /* In 128-bit registers */
+ nwritten8 = 4 * (UINT8) nwritten4; /* In 32-bit words */
+ bitpack = (UINT4 *) &(bitpackcomp[nwritten8]);
+ packsize_div2 = (info[DIFFERENTIAL_METAINFO_SIZE] - nwritten4);
#else
- nwritten = info[0]; /* In 128-bit registers */
- bitpack = (__m128i *) &(bitpackcomp[nwritten*4]);
+ nwritten4 = info[0]; /* In 128-bit registers */
+ nwritten8 = 4 * (UINT8) nwritten4; /* In 32-bit words */
+ bitpack = (__m128i *) &(bitpackcomp[nwritten8]);
/* packsize = (info[DIFFERENTIAL_METAINFO_SIZE] - nwritten)*2; */
- packsize_div2 = (info[DIFFERENTIAL_METAINFO_SIZE] - nwritten);
+ packsize_div2 = (info[DIFFERENTIAL_METAINFO_SIZE] - nwritten4);
#endif
remainder = oligo % BLOCKSIZE;
@@ -14863,9 +14873,10 @@ Bitpack64_read_one (Oligospace_T oligo, UINT4 *bitpackptrs, UINT4 *bitpackcomp)
UINT8
Bitpack64_read_one_huge (Oligospace_T oligo, UINT4 *bitpackpages,
UINT4 *bitpackptrs, UINT4 *bitpackcomp) {
- UINT4 *pageptr;
+ /* UINT4 *pageptr; */
Oligospace_T bmer;
- UINT4 *info, nwritten, packsize_div2;
+ UINT4 *info, nwritten4, packsize_div2;
+ UINT8 nwritten8;
int remainder, column, row;
#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
UINT8 ptr;
@@ -14896,20 +14907,23 @@ Bitpack64_read_one_huge (Oligospace_T oligo, UINT4 *bitpackpages,
debug(printf("Entered Bitpack64_read_one_huge with oligo %llu => bmer %u\n",oligo,bmer));
#ifdef WORDS_BIGENDIAN
- nwritten = Bigendian_convert_uint(info[0]); /* In 128-bit registers */
- bitpack = (UINT4 *) &(bitpackcomp[nwritten*4]);
- packsize_div2 = (Bigendian_convert_uint(info[DIFFERENTIAL_METAINFO_SIZE]) - nwritten);
+ nwritten4 = Bigendian_convert_uint(info[0]); /* In 128-bit registers */
+ nwritten8 = 4 * (UINT8) nwritten4; /* In 32-bit words */
+ bitpack = (UINT4 *) &(bitpackcomp[nwritten8]);
+ packsize_div2 = (Bigendian_convert_uint(info[DIFFERENTIAL_METAINFO_SIZE]) - nwritten4);
#elif !defined(HAVE_SSE2)
- nwritten = info[0]; /* In 128-bit registers */
- bitpack = (UINT4 *) &(bitpackcomp[nwritten*4]);
- packsize_div2 = (info[DIFFERENTIAL_METAINFO_SIZE] - nwritten);
+ nwritten4 = info[0]; /* In 128-bit registers */
+ nwritten8 = 4 * (UINT8) nwritten4; /* In 32-bit words */
+ bitpack = (UINT4 *) &(bitpackcomp[nwritten8]);
+ packsize_div2 = (info[DIFFERENTIAL_METAINFO_SIZE] - nwritten4);
#else
- nwritten = info[0]; /* In 128-bit registers */
- bitpack = (__m128i *) &(bitpackcomp[nwritten*4]);
- /* packsize = (info[DIFFERENTIAL_METAINFO_SIZE] - nwritten)*2; */
- packsize_div2 = (info[DIFFERENTIAL_METAINFO_SIZE] - nwritten);
+ nwritten4 = info[0]; /* In 128-bit registers */
+ nwritten8 = 4 * (UINT8) nwritten4; /* In 32-bit words */
+ bitpack = (__m128i *) &(bitpackcomp[nwritten8]);
+ /* packsize = (info[DIFFERENTIAL_METAINFO_SIZE] - nwritten4)*2; */
+ packsize_div2 = (info[DIFFERENTIAL_METAINFO_SIZE] - nwritten4);
#endif
@@ -14929,19 +14943,19 @@ Bitpack64_read_one_huge (Oligospace_T oligo, UINT4 *bitpackpages,
#ifdef WORDS_BIGENDIAN
ptr = Bigendian_convert_uint(/*offset0*/info[1]);
if (bitpackpages != NULL) {
- pageptr = bitpackpages;
- while (bmer >= Bigendian_convert_uint(*pageptr)) {
+ /* pageptr = bitpackpages; */
+ while (bmer >= Bigendian_convert_uint(*bitpackpages)) {
ptr += POSITIONS_PAGE;
- pageptr++;
+ bitpackpages++;
}
}
#else
ptr = /*offset0*/ (UINT8) info[1];
if (bitpackpages != NULL) {
- pageptr = bitpackpages;
- while (bmer >= *pageptr) {
+ /* pageptr = bitpackpages; */
+ while (bmer >= *bitpackpages) {
ptr += POSITIONS_PAGE;
- pageptr++;
+ bitpackpages++;
}
}
#endif
@@ -14950,19 +14964,19 @@ Bitpack64_read_one_huge (Oligospace_T oligo, UINT4 *bitpackpages,
#ifdef WORDS_BIGENDIAN
ptr = Bigendian_convert_uint(/*offset0*/info[1]);
if (bitpackpages != NULL) {
- pageptr = bitpackpages;
- while (bmer >= Bigendian_convert_uint(*pageptr)) {
+ /* pageptr = bitpackpages; */
+ while (bmer >= Bigendian_convert_uint(*bitpackpages)) {
ptr += POSITIONS_PAGE;
- pageptr++;
+ bitpackpages++;
}
}
#else
ptr = /*offset0*/ (UINT8) info[1];
if (bitpackpages != NULL) {
- pageptr = bitpackpages;
- while (bmer >= *pageptr) {
+ /* pageptr = bitpackpages; */
+ while (bmer >= *bitpackpages) {
ptr += POSITIONS_PAGE;
- pageptr++;
+ bitpackpages++;
}
}
#endif
@@ -14982,19 +14996,19 @@ Bitpack64_read_one_huge (Oligospace_T oligo, UINT4 *bitpackpages,
#ifdef WORDS_BIGENDIAN
ptr = Bigendian_convert_uint(/*offset0*/info[1]);
if (bitpackpages != NULL) {
- pageptr = bitpackpages;
- while (bmer >= Bigendian_convert_uint(*pageptr)) {
+ /* pageptr = bitpackpages; */
+ while (bmer >= Bigendian_convert_uint(*bitpackpages)) {
ptr += POSITIONS_PAGE;
- pageptr++;
+ bitpackpages++;
}
}
#else
ptr = /*offset0*/ (UINT8) info[1];
if (bitpackpages != NULL) {
- pageptr = bitpackpages;
- while (bmer >= *pageptr) {
+ /* pageptr = bitpackpages; */
+ while (bmer >= *bitpackpages) {
ptr += POSITIONS_PAGE;
- pageptr++;
+ bitpackpages++;
}
}
#endif
@@ -15019,19 +15033,19 @@ Bitpack64_read_one_huge (Oligospace_T oligo, UINT4 *bitpackpages,
#ifdef WORDS_BIGENDIAN
ptr = Bigendian_convert_uint(/*offset1*/info[DIFFERENTIAL_METAINFO_SIZE+1]);
if (bitpackpages != NULL) {
- pageptr = bitpackpages;
- while (bmer + 1 >= Bigendian_convert_uint(*pageptr)) {
+ /* pageptr = bitpackpages; */
+ while (bmer + 1 >= Bigendian_convert_uint(*bitpackpages)) {
ptr += POSITIONS_PAGE;
- pageptr++;
+ bitpackpages++;
}
}
#else
ptr = /*offset1*/ (UINT8) info[DIFFERENTIAL_METAINFO_SIZE+1];
if (bitpackpages != NULL) {
- pageptr = bitpackpages;
- while (bmer + 1 >= *pageptr) {
+ /* pageptr = bitpackpages; */
+ while (bmer + 1 >= *bitpackpages) {
ptr += POSITIONS_PAGE;
- pageptr++;
+ bitpackpages++;
}
}
#endif
@@ -15056,19 +15070,19 @@ Bitpack64_read_one_huge (Oligospace_T oligo, UINT4 *bitpackpages,
#ifdef WORDS_BIGENDIAN
ptr = Bigendian_convert_uint(/*offset1*/info[DIFFERENTIAL_METAINFO_SIZE+1]);
if (bitpackpages != NULL) {
- pageptr = bitpackpages;
- while (bmer + 1 >= Bigendian_convert_uint(*pageptr)) {
+ /* pageptr = bitpackpages; */
+ while (bmer + 1 >= Bigendian_convert_uint(*bitpackpages)) {
ptr += POSITIONS_PAGE;
- pageptr++;
+ bitpackpages++;
}
}
#else
ptr = /*offset1*/(UINT8) info[DIFFERENTIAL_METAINFO_SIZE+1];
if (bitpackpages != NULL) {
- pageptr = bitpackpages;
- while (bmer + 1 >= *pageptr) {
+ /* pageptr = bitpackpages; */
+ while (bmer + 1 >= *bitpackpages) {
ptr += POSITIONS_PAGE;
- pageptr++;
+ bitpackpages++;
}
}
#endif
@@ -15097,16 +15111,16 @@ Bitpack64_read_one_huge (Oligospace_T oligo, UINT4 *bitpackpages,
offset1 = info[DIFFERENTIAL_METAINFO_SIZE+1];
} else {
offset0 = 0UL;
- pageptr = bitpackpages;
- while (bmer >= *pageptr) {
+ /* pageptr = bitpackpages; */
+ while (bmer >= *bitpackpages) {
offset0 += POSITIONS_PAGE;
- pageptr++;
+ bitpackpages++;
}
offset1 = offset0;
- if (bmer + 1 >= *pageptr) {
+ if (bmer + 1 >= *bitpackpages) {
offset1 += POSITIONS_PAGE;
- pageptr++;
+ bitpackpages++;
}
offset0 += info[1];
@@ -15139,10 +15153,10 @@ Bitpack64_read_one_huge (Oligospace_T oligo, UINT4 *bitpackpages,
if (quarter_block <= 1) {
offset0 = (UINT8) info[1];
if (bitpackpages != NULL) {
- pageptr = bitpackpages;
- while (bmer >= *pageptr) {
+ /* pageptr = bitpackpages; */
+ while (bmer >= *bitpackpages) {
offset0 += POSITIONS_PAGE;
- pageptr++;
+ bitpackpages++;
}
}
@@ -15168,10 +15182,10 @@ Bitpack64_read_one_huge (Oligospace_T oligo, UINT4 *bitpackpages,
} else {
offset1 = (UINT8) info[DIFFERENTIAL_METAINFO_SIZE+1];
if (bitpackpages != NULL) {
- pageptr = bitpackpages;
- while (bmer + 1 >= *pageptr) {
+ /* pageptr = bitpackpages; */
+ while (bmer + 1 >= *bitpackpages) {
offset1 += POSITIONS_PAGE;
- pageptr++;
+ bitpackpages++;
}
}
@@ -15204,7 +15218,8 @@ Bitpack64_read_one_huge (Oligospace_T oligo, UINT4 *bitpackpages,
void
Bitpack64_block_offsets (UINT4 *offsets, Oligospace_T oligo,
UINT4 *bitpackptrs, UINT4 *bitpackcomp) {
- UINT4 *info, nwritten;
+ UINT4 *info, nwritten4;
+ UINT8 nwritten8;
UINT4 offset0, offset1, temp;
int packsize, k;
#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
@@ -15221,31 +15236,34 @@ Bitpack64_block_offsets (UINT4 *offsets, Oligospace_T oligo,
info = &(bitpackptrs[oligo/BLOCKSIZE * DIFFERENTIAL_METAINFO_SIZE]);
#ifdef WORDS_BIGENDIAN
- nwritten = Bigendian_convert_uint(info[0]);
- bitpack = (UINT4 *) &(bitpackcomp[nwritten*4]);
+ nwritten4 = Bigendian_convert_uint(info[0]); /* In 128-bit registers */
+ nwritten8 = 4 * (UINT8) nwritten4; /* In 32-bit words */
+ bitpack = (UINT4 *) &(bitpackcomp[nwritten8]);
offset0 = Bigendian_convert_uint(info[1]);
offset1 = Bigendian_convert_uint(info[DIFFERENTIAL_METAINFO_SIZE+1]);
- packsize = (Bigendian_convert_uint(info[DIFFERENTIAL_METAINFO_SIZE]) - nwritten)*2;
+ packsize = (Bigendian_convert_uint(info[DIFFERENTIAL_METAINFO_SIZE]) - nwritten4)*2;
#elif !defined(HAVE_SSE2)
- nwritten = info[0]; /* In 128-bit registers */
- bitpack = (UINT4 *) &(bitpackcomp[nwritten*4]);
+ nwritten4 = info[0]; /* In 128-bit registers */
+ nwritten8 = 4 * (UINT8) nwritten4; /* In 32-bit words */
+ bitpack = (UINT4 *) &(bitpackcomp[nwritten8]);
offset0 = info[1];
offset1 = info[DIFFERENTIAL_METAINFO_SIZE+1];
- packsize = (info[DIFFERENTIAL_METAINFO_SIZE] - nwritten)*2;
+ packsize = (info[DIFFERENTIAL_METAINFO_SIZE] - nwritten4)*2;
#else
- nwritten = info[0]; /* In 128-bit registers */
- bitpack = (__m128i *) &(bitpackcomp[nwritten*4]);
+ nwritten4 = info[0]; /* In 128-bit registers */
+ nwritten8 = 4 * (UINT8) nwritten4; /* In 32-bit words */
+ bitpack = (__m128i *) &(bitpackcomp[nwritten8]);
offset0 = info[1];
offset1 = info[DIFFERENTIAL_METAINFO_SIZE+1];
- packsize = (info[DIFFERENTIAL_METAINFO_SIZE] - nwritten)*2;
+ packsize = (info[DIFFERENTIAL_METAINFO_SIZE] - nwritten4)*2;
#endif
#ifdef DEBUG
printf("oligo: %08X, nwritten %u, offset0 %u, offset1 %u, packsize %d\n",
- oligo,nwritten,offset0,offset1,packsize);
+ oligo,nwritten4,offset0,offset1,packsize);
#endif
@@ -15360,7 +15378,8 @@ void
Bitpack64_block_offsets_huge (UINT8 *offsets, Oligospace_T oligo,
UINT4 *bitpackpages, UINT4 *bitpackptrs, UINT4 *bitpackcomp) {
UINT4 *pageptr;
- UINT4 *info, nwritten;
+ UINT4 *info, nwritten4;
+ UINT8 nwritten8;
Oligospace_T bmer;
UINT8 offset0, offset1, temp;
int packsize, k;
@@ -15380,16 +15399,19 @@ Bitpack64_block_offsets_huge (UINT8 *offsets, Oligospace_T oligo,
info = &(bitpackptrs[bmer * DIFFERENTIAL_METAINFO_SIZE]);
#ifdef WORDS_BIGENDIAN
- nwritten = Bigendian_convert_uint(info[0]); /* In 128-bit registers */
- bitpack = (UINT4 *) &(bitpackcomp[nwritten*4]);
+ nwritten4 = Bigendian_convert_uint(info[0]); /* In 128-bit registers */
+ nwritten8 = 4 * (UINT8) nwritten4; /* In 32-bit words */
+ bitpack = (UINT4 *) &(bitpackcomp[nwritten8]);
#elif !defined(HAVE_SSE2)
- nwritten = info[0]; /* In 128-bit registers */
- bitpack = (UINT4 *) &(bitpackcomp[nwritten*4]);
+ nwritten4 = info[0]; /* In 128-bit registers */
+ nwritten8 = 4 * (UINT8) nwritten4; /* In 32-bit words */
+ bitpack = (UINT4 *) &(bitpackcomp[nwritten8]);
#else
- nwritten = info[0]; /* In 128-bit registers */
- bitpack = (__m128i *) &(bitpackcomp[nwritten*4]);
+ nwritten4 = info[0]; /* In 128-bit registers */
+ nwritten8 = 4 * (UINT8) nwritten4; /* In 32-bit words */
+ bitpack = (__m128i *) &(bitpackcomp[nwritten8]);
#endif
#ifdef WORDS_BIGENDIAN
@@ -15422,17 +15444,17 @@ Bitpack64_block_offsets_huge (UINT8 *offsets, Oligospace_T oligo,
#ifdef WORDS_BIGENDIAN
offset0 += Bigendian_convert_uint(info[1]);
offset1 += Bigendian_convert_uint(info[DIFFERENTIAL_METAINFO_SIZE+1]);
- packsize = (Bigendian_convert_uint(info[DIFFERENTIAL_METAINFO_SIZE]) - nwritten)*2;
+ packsize = (Bigendian_convert_uint(info[DIFFERENTIAL_METAINFO_SIZE]) - nwritten4)*2;
#else
offset0 += info[1];
offset1 += info[DIFFERENTIAL_METAINFO_SIZE+1];
- packsize = (info[DIFFERENTIAL_METAINFO_SIZE] - nwritten)*2;
+ packsize = (info[DIFFERENTIAL_METAINFO_SIZE] - nwritten4)*2;
#endif
#ifdef DEBUG
printf("oligo: %08X, nwritten %u, offset0 %u, offset1 %u, packsize %d\n",
- oligo,nwritten,offset0,offset1,packsize);
+ oligo,nwritten4,offset0,offset1,packsize);
#endif
@@ -15540,3 +15562,22 @@ Bitpack64_block_offsets_huge (UINT8 *offsets, Oligospace_T oligo,
return;
}
#endif
+
+
+
+int
+Bitpack64_differential_packsize (Oligospace_T oligo, UINT4 *bitpackptrs) {
+ UINT4 *info, nwritten4;
+
+ info = &(bitpackptrs[oligo/BLOCKSIZE * DIFFERENTIAL_METAINFO_SIZE]);
+
+#ifdef WORDS_BIGENDIAN
+ nwritten4 = Bigendian_convert_uint(info[0]); /* In 128-bit registers */
+ return (Bigendian_convert_uint(info[DIFFERENTIAL_METAINFO_SIZE]) - nwritten4)*2;
+
+#else
+ nwritten4 = info[0]; /* In 128-bit registers */
+ return (info[DIFFERENTIAL_METAINFO_SIZE] - nwritten4)*2;
+#endif
+}
+
diff --git a/src/bitpack64-read.h b/src/bitpack64-read.h
index 2a40624..5b7766e 100644
--- a/src/bitpack64-read.h
+++ b/src/bitpack64-read.h
@@ -27,4 +27,7 @@ Bitpack64_block_offsets_huge (UINT8 *offsets, Oligospace_T oligo,
UINT4 *bitpackpages, UINT4 *bitpackptrs, UINT4 *bitpackcomp);
#endif
+extern int
+Bitpack64_differential_packsize (Oligospace_T oligo, UINT4 *bitpackptrs);
+
#endif
diff --git a/src/bitpack64-readtwo.c b/src/bitpack64-readtwo.c
index b4ad85b..5fb7fe4 100644
--- a/src/bitpack64-readtwo.c
+++ b/src/bitpack64-readtwo.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: bitpack64-readtwo.c 184456 2016-02-18 00:05:04Z twu $";
+static char rcsid[] = "$Id: bitpack64-readtwo.c 199469 2016-10-23 03:49:07Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -14208,7 +14208,8 @@ static Unpacker_T unpacker_table[17][17] =
UINT4
Bitpack64_read_two (UINT4 *end0, Oligospace_T oligo, UINT4 *bitpackptrs, UINT4 *bitpackcomp) {
Oligospace_T bmer;
- UINT4 *info, nwritten, packsize_div2;
+ UINT4 *info, nwritten4, packsize_div2;
+ UINT8 nwritten8;
int remainder0, column;
#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
UINT4 offset0, offset1;
@@ -14236,20 +14237,23 @@ Bitpack64_read_two (UINT4 *end0, Oligospace_T oligo, UINT4 *bitpackptrs, UINT4 *
debug(printf("Entered Bitpack64_read_two with oligo %llu => bmer %u\n",oligo,bmer));
#ifdef WORDS_BIGENDIAN
- nwritten = Bigendian_convert_uint(info[0]); /* In 128-bit registers */
- bitpack = (UINT4 *) &(bitpackcomp[nwritten*4]);
- packsize_div2 = (Bigendian_convert_uint(info[METAINFO_SIZE]) - nwritten);
+ nwritten4 = Bigendian_convert_uint(info[0]); /* In 128-bit registers */
+ nwritten8 = 4 * (UINT8) nwritten4; /* In 32-bit words */
+ bitpack = (UINT4 *) &(bitpackcomp[nwritten8]);
+ packsize_div2 = (Bigendian_convert_uint(info[METAINFO_SIZE]) - nwritten4);
#elif !defined(HAVE_SSE2)
- nwritten = info[0]; /* In 128-bit registers */
- bitpack = (UINT4 *) &(bitpackcomp[nwritten*4]);
- packsize_div2 = (info[METAINFO_SIZE] - nwritten);
+ nwritten4 = info[0]; /* In 128-bit registers */
+ nwritten8 = 4 * (UINT8) nwritten4; /* In 32-bit words */
+ bitpack = (UINT4 *) &(bitpackcomp[nwritten8]);
+ packsize_div2 = (info[METAINFO_SIZE] - nwritten4);
#else
- nwritten = info[0]; /* In 128-bit registers */
- bitpack = (__m128i *) &(bitpackcomp[nwritten*4]);
- /* packsize = (info[METAINFO_SIZE] - nwritten)*2; */
- packsize_div2 = (info[METAINFO_SIZE] - nwritten);
+ nwritten4 = info[0]; /* In 128-bit registers */
+ nwritten8 = 4 * (UINT8) nwritten4; /* In 32-bit words */
+ bitpack = (__m128i *) &(bitpackcomp[nwritten8]);
+ /* packsize = (info[METAINFO_SIZE] - nwritten4)*2; */
+ packsize_div2 = (info[METAINFO_SIZE] - nwritten4);
#endif
remainder0 = oligo % BLOCKSIZE;
@@ -14522,7 +14526,8 @@ UINT8
Bitpack64_read_two_huge (UINT8 *end0, Oligospace_T oligo,
UINT4 *bitpackpages, UINT4 *bitpackptrs, UINT4 *bitpackcomp) {
Oligospace_T bmer;
- UINT4 *info, nwritten;
+ UINT4 *info, nwritten4;
+ UINT8 nwritten8;
UINT8 offset0, offset1;
UINT4 packsize_div2;
int remainder0, column;
@@ -14556,20 +14561,23 @@ Bitpack64_read_two_huge (UINT8 *end0, Oligospace_T oligo,
debug(printf("Entered Bitpack64_read_two_huge with oligo %u => bmer %u\n",oligo,bmer));
#ifdef WORDS_BIGENDIAN
- nwritten = Bigendian_convert_uint(info[0]);
- bitpack = (UINT4 *) &(bitpackcomp[nwritten*4]);
+ nwritten4 = Bigendian_convert_uint(info[0]); /* In 128-bit registers */
+ nwritten8 = 4 * (UINT8) nwritten4; /* In 32-bit words */
+ bitpack = (UINT4 *) &(bitpackcomp[nwritten8]);
offset0 = (UINT8) Bigendian_convert_uint(info[1]);
offset1 = (UINT8) Bigendian_convert_uint(info[METAINFO_SIZE+1]);
#elif !defined(HAVE_SSE2)
- nwritten = info[0];
- bitpack = (UINT4 *) &(bitpackcomp[nwritten*4]);
+ nwritten4 = info[0]; /* In 128-bit registers */
+ nwritten8 = 4 * (UINT8) nwritten4; /* In 32-bit words */
+ bitpack = (UINT4 *) &(bitpackcomp[nwritten8]);
offset0 = (UINT8) info[1];
offset1 = (UINT8) info[METAINFO_SIZE+1];
#else
- nwritten = info[0];
- bitpack = (__m128i *) &(bitpackcomp[nwritten*4]);
+ nwritten4 = info[0]; /* In 128-bit registers */
+ nwritten8 = 4 * (UINT8) nwritten4; /* In 32-bit words */
+ bitpack = (__m128i *) &(bitpackcomp[nwritten8]);
offset0 = (UINT8) info[1];
offset1 = (UINT8) info[METAINFO_SIZE+1];
#endif
@@ -14592,7 +14600,7 @@ Bitpack64_read_two_huge (UINT8 *end0, Oligospace_T oligo,
}
}
debug(printf("offsets are %llu, %llu\n",offset0,offset1));
- packsize_div2 = (Bigendian_convert_uint(info[METAINFO_SIZE]) - nwritten);
+ packsize_div2 = (Bigendian_convert_uint(info[METAINFO_SIZE]) - nwritten4);
#else
if (bitpackpages != NULL) {
@@ -14610,7 +14618,7 @@ Bitpack64_read_two_huge (UINT8 *end0, Oligospace_T oligo,
}
}
debug(printf("offsets are %llu, %llu\n",offset0,offset1));
- packsize_div2 = (info[METAINFO_SIZE] - nwritten);
+ packsize_div2 = (info[METAINFO_SIZE] - nwritten4);
#endif
remainder0 = oligo % BLOCKSIZE;
diff --git a/src/bitpack64-write.c b/src/bitpack64-write.c
index 9f2e7d9..e5f54ab 100644
--- a/src/bitpack64-write.c
+++ b/src/bitpack64-write.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: bitpack64-write.c 183888 2016-02-05 20:33:29Z twu $";
+static char rcsid[] = "$Id: bitpack64-write.c 199471 2016-10-23 03:54:54Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -4590,8 +4590,9 @@ compute_q4_diffs_bidir_huge (UINT4 *diffs, UINT8 *values) {
void
Bitpack64_write_differential (char *ptrsfile, char *compfile, UINT4 *ascending, Oligospace_T n) {
FILE *ptrs_fp, *comp_fp;
- UINT4 *ptrs;
- int ptri, i;
+ UINT4 *ptrs, *p;
+ size_t nptrs;
+ int i;
Oligospace_T positioni;
/* Buffer is used to avoid frequent writes to the file */
@@ -4612,8 +4613,7 @@ Bitpack64_write_differential (char *ptrsfile, char *compfile, UINT4 *ascending,
/* 2 metavalues: nwritten (pointer) and cumulative sum for block.
Packsize can be computed from difference between successive
pointers, if only even packsizes are allowed */
- ptrs = (UINT4 *) CALLOC(((n + BLOCKSIZE)/BLOCKSIZE + 1) * DIFFERENTIAL_METAINFO_SIZE,sizeof(UINT4));
- ptri = 0;
+ p = ptrs = (UINT4 *) CALLOC(((n + BLOCKSIZE)/BLOCKSIZE + 1) * DIFFERENTIAL_METAINFO_SIZE,sizeof(UINT4));
if ((comp_fp = FOPEN_WRITE_BINARY(compfile)) == NULL) {
fprintf(stderr,"Can't write to file %s\n",compfile);
@@ -4628,10 +4628,10 @@ Bitpack64_write_differential (char *ptrsfile, char *compfile, UINT4 *ascending,
/* Use <= n instead of < n, because we want ascending[n] to be taken care of by unpack_00, not a check for remainder == 0 */
for (positioni = 0; positioni + BLOCKSIZE <= n; positioni += BLOCKSIZE) {
/* Pointer */
- ptrs[ptri++] = nwritten/4; /* In 128-bit registers */
+ *p++ = nwritten/4; /* In 128-bit registers */
/* Value for start of block */
- ptrs[ptri++] = ascending[positioni];
+ *p++ = ascending[positioni];
/* Pack block of 64 diffs */
packsize = Bitpack64_compute_q4_diffs_bidir(diffs,&(ascending[positioni]));
@@ -4648,10 +4648,10 @@ Bitpack64_write_differential (char *ptrsfile, char *compfile, UINT4 *ascending,
/* Use <= n instead of < n, because we want ascending[n] to be taken care of by unpack_00, not a check for remainder == 0 */
if (positioni <= n) {
/* Finish last block of 64 */
- ptrs[ptri++] = nwritten/4; /* In 128-bit registers */
+ *p++ = nwritten/4; /* In 128-bit registers */
/* Value for start of block */
- ptrs[ptri++] = ascending[positioni];
+ *p++ = ascending[positioni];
/* For differential, want <=. For direct, want < */
for (i = 0; i <= (int) (n - positioni); i++) {
@@ -4675,16 +4675,17 @@ Bitpack64_write_differential (char *ptrsfile, char *compfile, UINT4 *ascending,
/* Write the final pointer, which will point after the end of the file */
- ptrs[ptri++] = nwritten/4; /* In 128-bit registers */
+ *p++ = nwritten/4; /* In 128-bit registers */
/* Value for end of block */
- ptrs[ptri++] = ascending[n];
+ *p++ = ascending[n];
if ((ptrs_fp = FOPEN_WRITE_BINARY(ptrsfile)) == NULL) {
fprintf(stderr,"Can't write to file %s\n",ptrsfile);
exit(9);
} else {
- FWRITE_UINTS(ptrs,ptri,ptrs_fp);
+ nptrs = p - ptrs;
+ FWRITE_UINTS(ptrs,nptrs,ptrs_fp);
FREE(ptrs);
fclose(ptrs_fp);
}
@@ -4720,9 +4721,9 @@ void
Bitpack64_write_differential_bitpacks (char *ptrsfile, char *compfile, char *packsizes, UINT4 **bitpacks,
Oligospace_T n) {
FILE *ptrs_fp, *comp_fp;
- UINT4 *ptrs, nregisters;
+ UINT4 *ptrs, *p, nregisters;
UINT4 totalcount;
- UINT4 ptri;
+ size_t nptrs;
int i;
Oligospace_T positioni, bmer;
@@ -4742,8 +4743,7 @@ Bitpack64_write_differential_bitpacks (char *ptrsfile, char *compfile, char *pac
/* 2 metavalues: nwritten (pointer) and cumulative sum for block.
Packsize can be computed from difference between successive
pointers, if only even packsizes are allowed */
- ptrs = (UINT4 *) CALLOC(((n + BLOCKSIZE)/BLOCKSIZE + 1) * DIFFERENTIAL_METAINFO_SIZE,sizeof(UINT4));
- ptri = 0;
+ p = ptrs = (UINT4 *) CALLOC(((n + BLOCKSIZE)/BLOCKSIZE + 1) * DIFFERENTIAL_METAINFO_SIZE,sizeof(UINT4));
if ((comp_fp = FOPEN_WRITE_BINARY(compfile)) == NULL) {
fprintf(stderr,"Can't write to file %s\n",compfile);
@@ -4759,10 +4759,10 @@ Bitpack64_write_differential_bitpacks (char *ptrsfile, char *compfile, char *pac
totalcount = 0;
for (positioni = 0, bmer = 0; positioni + BLOCKSIZE <= n; positioni += BLOCKSIZE, bmer++) {
/* Pointer */
- ptrs[ptri++] = nregisters; /* In 128-bit registers */
+ *p++ = nregisters; /* In 128-bit registers */
/* Value for start of block */
- ptrs[ptri++] = totalcount;
+ *p++ = totalcount;
/* Pack block of 64 diffs */
Bitpack64_extract_bitpack(counts,packsizes[bmer],bitpacks[bmer]);
@@ -4778,13 +4778,13 @@ Bitpack64_write_differential_bitpacks (char *ptrsfile, char *compfile, char *pac
/* For nucleotides, expect a single final block where positioni == n */
if (positioni <= n) {
/* Finish last block of 64 */
- ptrs[ptri++] = nregisters; /* In 128-bit registers */
+ *p++ = nregisters; /* In 128-bit registers */
/* Value for start of block */
- ptrs[ptri++] = totalcount;
+ *p++ = totalcount;
if (positioni == n) {
- /* Don't have a bitpack at [bmerspace] */
+ /* Don't have a bitpack at [bmerspace]. Just fills counts with zeroes. */
Bitpack64_extract_bitpack(counts,/*packsize*/0,/*bitpack*/NULL);
} else {
Bitpack64_extract_bitpack(counts,packsizes[bmer],bitpacks[bmer]);
@@ -4808,16 +4808,17 @@ Bitpack64_write_differential_bitpacks (char *ptrsfile, char *compfile, char *pac
}
/* Write the final pointer, which will point after the end of the file */
- ptrs[ptri++] = nregisters; /* In 128-bit registers */
+ *p++ = nregisters; /* In 128-bit registers */
/* Value for end of block */
- ptrs[ptri++] = totalcount;
+ *p++ = totalcount;
if ((ptrs_fp = FOPEN_WRITE_BINARY(ptrsfile)) == NULL) {
fprintf(stderr,"Can't write to file %s\n",ptrsfile);
exit(9);
} else {
- FWRITE_UINTS(ptrs,ptri,ptrs_fp);
+ nptrs = p - ptrs;
+ FWRITE_UINTS(ptrs,nptrs,ptrs_fp);
FREE(ptrs);
fclose(ptrs_fp);
}
@@ -4841,8 +4842,9 @@ Bitpack64_write_differential_bitpacks (char *ptrsfile, char *compfile, char *pac
void
Bitpack64_write_differential_paired (char *ptrsfile, char *compfile, UINT4 *ascending, Oligospace_T n) {
FILE *ptrs_fp, *comp_fp;
- UINT4 *ptrs;
- int ptri, i;
+ UINT4 *ptrs, *p;
+ size_t nptrs;
+ int i;
Oligospace_T positioni;
/* Buffer is used to avoid frequent writes to the file */
@@ -4861,8 +4863,7 @@ Bitpack64_write_differential_paired (char *ptrsfile, char *compfile, UINT4 *asce
/* 2 metavalues: nwritten (pointer) and cumulative sum for block.
Packsize can be computed from difference between successive
pointers, if only even packsizes are allowed */
- ptrs = (UINT4 *) CALLOC(((n + BLOCKSIZE)/BLOCKSIZE + 1) * PAIRED_METAINFO_SIZE,sizeof(UINT4));
- ptri = 0;
+ p = ptrs = (UINT4 *) CALLOC(((n + BLOCKSIZE)/BLOCKSIZE + 1) * PAIRED_METAINFO_SIZE,sizeof(UINT4));
if ((comp_fp = FOPEN_WRITE_BINARY(compfile)) == NULL) {
fprintf(stderr,"Can't write to file %s\n",compfile);
@@ -4877,10 +4878,10 @@ Bitpack64_write_differential_paired (char *ptrsfile, char *compfile, UINT4 *asce
/* Use <= n instead of < n, because we want ascending[n] to be taken care of by unpack_00, not a check for remainder == 0 */
for (positioni = 0; positioni + BLOCKSIZE <= n; positioni += BLOCKSIZE) {
/* Pointer to D4 */
- ptrs[ptri++] = nwritten/4; /* In 128-bit registers */
+ *p++ = nwritten/4; /* In 128-bit registers */
/* Prefix sum for start of block */
- ptrs[ptri++] = ascending[positioni];
+ *p++ = ascending[positioni];
/* D4: Pack block of 64 diffs */
packsize = Bitpack64_compute_q4_diffs_bidir(diffs,&(ascending[positioni]));
@@ -4893,7 +4894,7 @@ Bitpack64_write_differential_paired (char *ptrsfile, char *compfile, UINT4 *asce
#endif
/* Pointer to D1 */
- ptrs[ptri++] = nwritten/4; /* In 128-bit registers */
+ *p++ = nwritten/4; /* In 128-bit registers */
/* D1: Pack block of 64 diffs */
packsize = compute_q1_diffs(diffs,&(ascending[positioni]));
@@ -4911,10 +4912,10 @@ Bitpack64_write_differential_paired (char *ptrsfile, char *compfile, UINT4 *asce
if (positioni <= n) {
/* Finish last block of 64 */
/* Pointer to D4 */
- ptrs[ptri++] = nwritten/4; /* In 128-bit registers */
+ *p++ = nwritten/4; /* In 128-bit registers */
/* Prefix sum for start of block */
- ptrs[ptri++] = ascending[positioni];
+ *p++ = ascending[positioni];
/* For differential, want <=. For direct, want < */
for (i = 0; i <= (int) (n - positioni); i++) {
@@ -4936,7 +4937,7 @@ Bitpack64_write_differential_paired (char *ptrsfile, char *compfile, UINT4 *asce
#endif
/* Pointer to D1 */
- ptrs[ptri++] = nwritten/4; /* In 128-bit registers */
+ *p++ = nwritten/4; /* In 128-bit registers */
/* D1: Pack block of < 64 diffs */
packsize = compute_q1_diffs(diffs,last_block);
@@ -4944,16 +4945,17 @@ Bitpack64_write_differential_paired (char *ptrsfile, char *compfile, UINT4 *asce
}
/* Write the final pointer, which will point after the end of the file */
- ptrs[ptri++] = nwritten/4; /* In 128-bit registers */
+ *p++ = nwritten/4; /* In 128-bit registers */
/* Prefix sum for end of block */
- ptrs[ptri++] = ascending[n];
+ *p++ = ascending[n];
if ((ptrs_fp = FOPEN_WRITE_BINARY(ptrsfile)) == NULL) {
fprintf(stderr,"Can't write to file %s\n",ptrsfile);
exit(9);
} else {
- FWRITE_UINTS(ptrs,ptri,ptrs_fp);
+ nptrs = p - ptrs;
+ FWRITE_UINTS(ptrs,nptrs,ptrs_fp);
FREE(ptrs);
fclose(ptrs_fp);
}
@@ -4990,7 +4992,8 @@ Bitpack64_write_fixed10 (char *ptrsfile, char *compfile, UINT4 *ascending, Oligo
#endif
FILE *comp_fp;
UINT4 *ptrs;
- int ptri, i;
+ UINT4 ptri;
+ int i;
Oligospace_T positioni;
/* Buffer is used to avoid frequent writes to the file */
@@ -5127,8 +5130,8 @@ Bitpack64_write_differential_huge (char *pagesfile, char *ptrsfile, char *compfi
UINT8 currpage, nextpage;
FILE *pages_fp, *ptrs_fp, *comp_fp;
UINT4 pages[25]; /* Allows us to handle up to 100 billion positions */
- UINT4 *ptrs;
- int ptri;
+ UINT4 *ptrs, *p;
+ size_t nptrs;
Oligospace_T positioni;
/* Buffer is used to avoid frequent writes to the file */
@@ -5149,8 +5152,7 @@ Bitpack64_write_differential_huge (char *pagesfile, char *ptrsfile, char *compfi
/* 2 metavalues: nwritten (pointer) and cumulative sum for block.
Packsize can be computed from difference between successive
pointers, if only even packsizes are allowed */
- ptrs = (UINT4 *) CALLOC(((n + BLOCKSIZE)/BLOCKSIZE + 1) * DIFFERENTIAL_METAINFO_SIZE,sizeof(UINT4));
- ptri = 0;
+ p = ptrs = (UINT4 *) CALLOC(((n + BLOCKSIZE)/BLOCKSIZE + 1) * DIFFERENTIAL_METAINFO_SIZE,sizeof(UINT4));
if ((comp_fp = FOPEN_WRITE_BINARY(compfile)) == NULL) {
fprintf(stderr,"Can't write to file %s\n",compfile);
@@ -5167,7 +5169,7 @@ Bitpack64_write_differential_huge (char *pagesfile, char *ptrsfile, char *compfi
/* Use <= n instead of < n, because we want ascending[n] to be taken care of by unpack_00, not a check for remainder == 0 */
for (positioni = 0; positioni + BLOCKSIZE <= n; positioni += BLOCKSIZE) {
/* Pointer */
- ptrs[ptri++] = nwritten/4; /* In 128-bit registers */
+ *p++ = nwritten/4; /* In 128-bit registers */
/* Value for start of block */
while (ascending[positioni] >= nextpage) {
@@ -5177,7 +5179,7 @@ Bitpack64_write_differential_huge (char *pagesfile, char *ptrsfile, char *compfi
currpage = nextpage;
nextpage += POSITIONS_PAGE;
}
- ptrs[ptri++] = ascending[positioni] - currpage;
+ *p++ = ascending[positioni] - currpage;
/* Pack block of 64 diffs */
@@ -5195,7 +5197,7 @@ Bitpack64_write_differential_huge (char *pagesfile, char *ptrsfile, char *compfi
/* Use <= n instead of < n, because we want ascending[n] to be taken care of by unpack_00, not a check for remainder == 0 */
if (positioni <= n) {
/* Finish last block of 64 */
- ptrs[ptri++] = nwritten/4; /* In 128-bit registers */
+ *p++ = nwritten/4; /* In 128-bit registers */
/* Value for start of block */
while (ascending[positioni] >= nextpage) {
@@ -5205,7 +5207,7 @@ Bitpack64_write_differential_huge (char *pagesfile, char *ptrsfile, char *compfi
currpage = nextpage;
nextpage += POSITIONS_PAGE;
}
- ptrs[ptri++] = ascending[positioni] - currpage;
+ *p++ = ascending[positioni] - currpage;
/* For differential, want <=. For direct, want < */
for (i = 0; i <= (int) (n - positioni); i++) {
@@ -5229,7 +5231,7 @@ Bitpack64_write_differential_huge (char *pagesfile, char *ptrsfile, char *compfi
/* Write the final pointer, which will point after the end of the file */
- ptrs[ptri++] = nwritten/4; /* In 128-bit registers */
+ *p++ = nwritten/4; /* In 128-bit registers */
/* Value for end of block */
if (ascending[n] >= nextpage) {
@@ -5239,7 +5241,7 @@ Bitpack64_write_differential_huge (char *pagesfile, char *ptrsfile, char *compfi
currpage = nextpage;
/* nextpage += POSITIONS_PAGE; */
}
- ptrs[ptri++] = ascending[n] - currpage;
+ *p++ = ascending[n] - currpage;
/* Write pages */
@@ -5264,7 +5266,8 @@ Bitpack64_write_differential_huge (char *pagesfile, char *ptrsfile, char *compfi
fprintf(stderr,"Can't write to file %s\n",ptrsfile);
exit(9);
} else {
- FWRITE_UINTS(ptrs,ptri,ptrs_fp);
+ nptrs = p - ptrs;
+ FWRITE_UINTS(ptrs,nptrs,ptrs_fp);
FREE(ptrs);
fclose(ptrs_fp);
}
@@ -5286,10 +5289,10 @@ Bitpack64_write_differential_huge_bitpacks (char *pagesfile, char *ptrsfile, cha
char *packsizes, UINT4 **bitpacks, Oligospace_T n) {
UINT8 currpage, nextpage;
FILE *pages_fp, *ptrs_fp, *comp_fp;
- UINT4 pages[25]; /* Allows us to handle up to 100 billion positions */
- UINT4 *ptrs, nregisters;
+ UINT4 pages[25]; /* Allows us to handle up to 100 billion positions. At q3, means 300 billion nt */
+ UINT4 *ptrs, *p, nregisters;
UINT8 totalcount;
- int ptri;
+ size_t nptrs;
Oligospace_T positioni, bmer;
/* Buffer is used to avoid frequent writes to the file */
@@ -5308,8 +5311,7 @@ Bitpack64_write_differential_huge_bitpacks (char *pagesfile, char *ptrsfile, cha
/* 2 metavalues: nwritten (pointer) and cumulative sum for block.
Packsize can be computed from difference between successive
pointers, if only even packsizes are allowed */
- ptrs = (UINT4 *) CALLOC(((n + BLOCKSIZE)/BLOCKSIZE + 1) * DIFFERENTIAL_METAINFO_SIZE,sizeof(UINT4));
- ptri = 0;
+ p = ptrs = (UINT4 *) CALLOC(((n + BLOCKSIZE)/BLOCKSIZE + 1) * DIFFERENTIAL_METAINFO_SIZE,sizeof(UINT4));
if ((comp_fp = FOPEN_WRITE_BINARY(compfile)) == NULL) {
fprintf(stderr,"Can't write to file %s\n",compfile);
@@ -5327,7 +5329,7 @@ Bitpack64_write_differential_huge_bitpacks (char *pagesfile, char *ptrsfile, cha
totalcount = 0;
for (positioni = 0, bmer = 0; positioni + BLOCKSIZE <= n; positioni += BLOCKSIZE, bmer++) {
/* Pointer */
- ptrs[ptri++] = nregisters; /* In 128-bit registers */
+ *p++ = nregisters; /* In 128-bit registers */
/* Value for start of block */
while (totalcount >= nextpage) {
@@ -5337,7 +5339,7 @@ Bitpack64_write_differential_huge_bitpacks (char *pagesfile, char *ptrsfile, cha
currpage = nextpage;
nextpage += POSITIONS_PAGE;
}
- ptrs[ptri++] = totalcount - currpage;
+ *p++ = totalcount - currpage;
/* Pack block of 64 diffs */
@@ -5354,7 +5356,7 @@ Bitpack64_write_differential_huge_bitpacks (char *pagesfile, char *ptrsfile, cha
/* For nucleotides, expect a single final block where positioni == n */
if (positioni <= n) {
/* Finish last block of 64 */
- ptrs[ptri++] = nregisters; /* In 128-bit registers */
+ *p++ = nregisters; /* In 128-bit registers */
/* Value for start of block */
while (totalcount >= nextpage) {
@@ -5364,7 +5366,7 @@ Bitpack64_write_differential_huge_bitpacks (char *pagesfile, char *ptrsfile, cha
currpage = nextpage;
nextpage += POSITIONS_PAGE;
}
- ptrs[ptri++] = totalcount - currpage;
+ *p++ = totalcount - currpage;
if (positioni == n) {
/* Don't have a bitpack at [bmerspace] */
@@ -5392,7 +5394,7 @@ Bitpack64_write_differential_huge_bitpacks (char *pagesfile, char *ptrsfile, cha
/* Write the final pointer, which will point after the end of the file */
- ptrs[ptri++] = nregisters; /* In 128-bit registers */
+ *p++ = nregisters; /* In 128-bit registers */
/* Value for end of block */
if (totalcount >= nextpage) {
@@ -5402,7 +5404,7 @@ Bitpack64_write_differential_huge_bitpacks (char *pagesfile, char *ptrsfile, cha
currpage = nextpage;
/* nextpage += POSITIONS_PAGE; */
}
- ptrs[ptri++] = totalcount - currpage;
+ *p++ = totalcount - currpage;
/* Write pages */
@@ -5427,7 +5429,8 @@ Bitpack64_write_differential_huge_bitpacks (char *pagesfile, char *ptrsfile, cha
fprintf(stderr,"Can't write to file %s\n",ptrsfile);
exit(9);
} else {
- FWRITE_UINTS(ptrs,ptri,ptrs_fp);
+ nptrs = p - ptrs;
+ FWRITE_UINTS(ptrs,nptrs,ptrs_fp);
FREE(ptrs);
fclose(ptrs_fp);
}
@@ -5456,7 +5459,7 @@ Bitpack64_write_fixed10_huge (char *pagesfile, char *ptrsfile, char *compfile,
FILE *pages_fp, *comp_fp;
UINT4 pages[25]; /* Allows us to handle up to 100 billion positions */
UINT4 *ptrs;
- int ptri;
+ UINT4 ptri;
Oligospace_T positioni;
/* Buffer is used to avoid frequent writes to the file */
@@ -5677,8 +5680,9 @@ compute_packsize (UINT4 *values) {
void
Bitpack64_write_direct (char *ptrsfile, char *compfile, UINT4 *direct, Oligospace_T n) {
FILE *ptrs_fp, *comp_fp;
- UINT4 *ptrs;
- int ptri, i;
+ UINT4 *ptrs, *p;
+ size_t nptrs;
+ int i;
Oligospace_T positioni;
UINT4 *buffer;
@@ -5696,8 +5700,7 @@ Bitpack64_write_direct (char *ptrsfile, char *compfile, UINT4 *direct, Oligospac
/* 1 metavalue: nwritten (pointer). Packsize can be
computed from difference between successive pointers, if only
even packsizes are allowed */
- ptrs = (UINT4 *) CALLOC(((n + BLOCKSIZE - 1)/BLOCKSIZE + 1) * DIRECT_METAINFO_SIZE,sizeof(UINT4));
- ptri = 0;
+ p = ptrs = (UINT4 *) CALLOC(((n + BLOCKSIZE - 1)/BLOCKSIZE + 1) * DIRECT_METAINFO_SIZE,sizeof(UINT4));
if ((comp_fp = FOPEN_WRITE_BINARY(compfile)) == NULL) {
fprintf(stderr,"Can't write to file %s\n",compfile);
@@ -5710,7 +5713,7 @@ Bitpack64_write_direct (char *ptrsfile, char *compfile, UINT4 *direct, Oligospac
for (positioni = 0; positioni + BLOCKSIZE < n; positioni += BLOCKSIZE) {
/* Pointer */
- ptrs[ptri++] = nwritten/4; /* In 128-bit registers */
+ *p++ = nwritten/4; /* In 128-bit registers */
/* Pack block of 64 diffs */
packsize = compute_packsize(&(direct[positioni]));
@@ -5725,7 +5728,7 @@ Bitpack64_write_direct (char *ptrsfile, char *compfile, UINT4 *direct, Oligospac
if (positioni < n) {
/* Finish last block of 64 */
- ptrs[ptri++] = nwritten/4; /* In 128-bit registers */
+ *p++ = nwritten/4; /* In 128-bit registers */
i = 0;
while (positioni < n) {
@@ -5747,13 +5750,14 @@ Bitpack64_write_direct (char *ptrsfile, char *compfile, UINT4 *direct, Oligospac
/* Write the final pointer, which will point after the end of the
file */
- ptrs[ptri++] = nwritten/4; /* In 128-bit registers */
+ *p++ = nwritten/4; /* In 128-bit registers */
if ((ptrs_fp = FOPEN_WRITE_BINARY(ptrsfile)) == NULL) {
fprintf(stderr,"Can't write to file %s\n",ptrsfile);
exit(9);
} else {
- FWRITE_UINTS(ptrs,ptri,ptrs_fp);
+ nptrs = p - ptrs;
+ FWRITE_UINTS(ptrs,nptrs,ptrs_fp);
FREE(ptrs);
fclose(ptrs_fp);
}
diff --git a/src/cpuid.c b/src/cpuid.c
index 45d722e..eadd351 100644
--- a/src/cpuid.c
+++ b/src/cpuid.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: cpuid.c 191640 2016-06-09 22:32:52Z twu $";
+static char rcsid[] = "$Id: cpuid.c 200231 2016-11-08 00:55:17Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -10,12 +10,14 @@ static char rcsid[] = "$Id: cpuid.c 191640 2016-06-09 22:32:52Z twu $";
#if defined(AX_HOST_POWER8)
void
-CPUID_support (bool *sse2_support_p, bool *ssse3_support_p, bool *sse4_1_support_p, bool *sse4_2_support_p, bool *avx2_support_p) {
+CPUID_support (bool *sse2_support_p, bool *ssse3_support_p, bool *sse4_1_support_p, bool *sse4_2_support_p,
+ bool *avx2_support_p, bool *avx512_support_p) {
*sse2_support_p = false;
*ssse3_support_p = false;
*sse4_1_support_p = false;
*sse4_2_support_p = false;
*avx2_support_p = false;
+ *avx512_support_p = false;
return;
}
@@ -26,12 +28,14 @@ CPUID_support (bool *sse2_support_p, bool *ssse3_support_p, bool *sse4_1_support
#include <immintrin.h>
void
-CPUID_support (bool *sse2_support_p, bool *ssse3_support_p, bool *sse4_1_support_p, bool *sse4_2_support_p, bool *avx2_support_p) {
+CPUID_support (bool *sse2_support_p, bool *ssse3_support_p, bool *sse4_1_support_p, bool *sse4_2_support_p,
+ bool *avx2_support_p, bool *avx512_support_p) {
*sse2_support_p = _may_i_use_cpu_feature(_FEATURE_SSE2);
*ssse3_support_p = _may_i_use_cpu_feature(_FEATURE_SSSE3);
*sse4_1_support_p = _may_i_use_cpu_feature(_FEATURE_SSE4_1);
*sse4_2_support_p = _may_i_use_cpu_feature(_FEATURE_SSE4_2);
*avx2_support_p = _may_i_use_cpu_feature(_FEATURE_AVX2 | _FEATURE_FMA | _FEATURE_BMI | _FEATURE_LZCNT | _FEATURE_MOVBE);
+ *avx512_support_p = _may_i_use_cpu_feature(_FEATURE_512F);
return;
}
@@ -78,7 +82,8 @@ check_xcr0_ymm () {
}
void
-CPUID_support (bool *sse2_support_p, bool *ssse3_support_p, bool *sse4_1_support_p, bool *sse4_2_support_p, bool *avx2_support_p) {
+CPUID_support (bool *sse2_support_p, bool *ssse3_support_p, bool *sse4_1_support_p, bool *sse4_2_support_p,
+ bool *avx2_support_p, bool *avx512_support_p) {
uint32_t abcd[4];
uint32_t sse2_mask = (1 << 26); /* edx */
uint32_t ssse3_mask = (1 << 9); /* ecx */
@@ -90,6 +95,7 @@ CPUID_support (bool *sse2_support_p, bool *ssse3_support_p, bool *sse4_1_support
uint32_t fma_movbe_osxsave_mask = ((1 << 12) | (1 << 22) | (1 << 27)); /* ecx */
uint32_t avx2_bmi12_mask = ((1 << 5) | (1 << 3) | (1 << 8)); /* ebx */
uint32_t lzcnt_mask = (1 << 5); /* ecx */
+ uint32_t avx512_mask = (1 << 16);
run_cpuid(1, 0, abcd);
@@ -105,8 +111,10 @@ CPUID_support (bool *sse2_support_p, bool *ssse3_support_p, bool *sse4_1_support
if ((abcd[ECX] & fma_movbe_osxsave_mask) != fma_movbe_osxsave_mask) {
*avx2_support_p = false;
+ *avx512_support_p = false;
} else if (!check_xcr0_ymm()) {
*avx2_support_p = false;
+ *avx512_support_p = false;
} else {
run_cpuid(7, 0, abcd);
#ifdef MAIN
@@ -115,6 +123,7 @@ CPUID_support (bool *sse2_support_p, bool *ssse3_support_p, bool *sse4_1_support
if ((abcd[EBX] & avx2_bmi12_mask) != avx2_bmi12_mask) {
*avx2_support_p = false;
+ *avx2_support_p = false;
} else {
run_cpuid(0x80000001, 0, abcd);
#ifdef MAIN
@@ -123,12 +132,16 @@ CPUID_support (bool *sse2_support_p, bool *ssse3_support_p, bool *sse4_1_support
if ((abcd[ECX] & lzcnt_mask) != lzcnt_mask) {
*avx2_support_p = false;
+ *avx512_support_p = false;
} else {
*avx2_support_p = true;
+ *avx512_support_p = ((abcd[ECX] & avx512_mask) == avx512_mask) ? true : false;
}
}
}
+
+
return;
}
@@ -143,14 +156,17 @@ main (int argc, char *argv[]) {
bool sse4_1_support_p;
bool sse4_2_support_p;
bool avx2_support_p;
+ bool avx512_support_p;
- CPUID_support(&sse2_support_p,&ssse3_support_p,&sse4_1_support_p,&sse4_2_support_p,&avx2_support_p);
+ CPUID_support(&sse2_support_p,&ssse3_support_p,&sse4_1_support_p,&sse4_2_support_p,
+ &avx2_support_p,&avx512_support_p);
printf("sse2 support: %d\n",sse2_support_p);
printf("ssse3 support: %d\n",ssse3_support_p);
printf("sse4.1 support: %d\n",sse4_1_support_p);
printf("sse4.2 support: %d\n",sse4_2_support_p);
printf("avx2 support: %d\n",avx2_support_p);
+ printf("avx512 support: %d\n",avx512_support_p);
return 0;
}
diff --git a/src/cpuid.h b/src/cpuid.h
index 97048ab..287e91c 100644
--- a/src/cpuid.h
+++ b/src/cpuid.h
@@ -1,10 +1,11 @@
-/* $Id: cpuid.h 171614 2015-08-10 23:27:29Z twu $ */
+/* $Id: cpuid.h 200231 2016-11-08 00:55:17Z twu $ */
#ifndef CPUID_INCLUDED
#define CPUID_INCLUDED
#include "bool.h"
extern void
-CPUID_support (bool *sse2_support_p, bool *ssse3_support_p, bool *sse4_1_support_p, bool *sse4_2_support_p, bool *avx2_support_p);
+CPUID_support (bool *sse2_support_p, bool *ssse3_support_p, bool *sse4_1_support_p, bool *sse4_2_support_p,
+ bool *avx2_support_p, bool *avx512_support_p);
#endif
diff --git a/src/gmap.c b/src/gmap.c
index 5a0fe5f..26a6c10 100644
--- a/src/gmap.c
+++ b/src/gmap.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: gmap.c 197391 2016-09-03 00:43:23Z twu $";
+static char rcsid[] = "$Id: gmap.c 200232 2016-11-08 00:55:43Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -395,6 +395,7 @@ static char *sam_read_group_name = NULL;
static char *sam_read_group_library = NULL;
static char *sam_read_group_platform = NULL;
static bool sam_insert_0M_p = false;
+static bool sam_cigar_extended_p = false;
static bool orderedp = false;
static bool failsonlyp = false;
@@ -565,6 +566,7 @@ static struct option long_options[] = {
{"quality-print-shift", required_argument, 0, 'j'}, /* quality_shift */
{"no-sam-headers", no_argument, 0, 0}, /* sam_headers_p */
{"sam-use-0M", no_argument, 0, 0}, /* sam_insert_0M_p */
+ {"sam-extended-cigar", no_argument, 0, 0}, /* sam_cigar_extended_p */
{"read-group-id", required_argument, 0, 0}, /* sam_read_group_id */
{"read-group-name", required_argument, 0, 0}, /* sam_read_group_name */
{"read-group-library", required_argument, 0, 0}, /* sam_read_group_library */
@@ -5326,6 +5328,8 @@ parse_command_line (int argc, char *argv[], int optind) {
sam_headers_p = false;
} else if (!strcmp(long_name,"sam-use-0M")) {
sam_insert_0M_p = true;
+ } else if (!strcmp(long_name,"sam-extended-cigar")) {
+ sam_cigar_extended_p = true;
} else if (!strcmp(long_name,"quality-protocol")) {
if (user_quality_shift == true) {
fprintf(stderr,"Cannot specify both -j (--quality-print-shift) and --quality-protocol\n");
@@ -6589,7 +6593,7 @@ main (int argc, char *argv[]) {
force_xs_direction_p,md_lowercase_variant_p,
/*snps_p*/genomecomp_alt ? true : false,
/*print_nsnpdiffs_p*/genomecomp_alt ? true : false,genomelength,
- gff3_phase_swap_p);
+ gff3_phase_swap_p,sam_cigar_extended_p);
Stage3_setup(/*splicingp*/novelsplicingp == true || knownsplicingp == true,novelsplicingp,
require_splicedir_p,splicing_iit,splicing_divint_crosstable,
donor_typeint,acceptor_typeint,splicesites,altlocp,alias_starts,alias_ends,
@@ -7209,6 +7213,8 @@ Output options\n\
--no-sam-headers Do not print headers beginning with '@'\n\
--sam-use-0M Insert 0M in CIGAR between adjacent insertions and deletions\n\
Required by Picard, but can cause errors in other tools\n\
+ --sam-extended-cigar Use extended CIGAR format (using X and = symbols instead of M,\n\
+ to indicate matches and mismatches, respectively\n\
--force-xs-dir For RNA-Seq alignments, disallows XS:A:? when the sense direction\n\
is unclear, and replaces this value arbitrarily with XS:A:+.\n\
May be useful for some programs, such as Cufflinks, that cannot\n\
diff --git a/src/gmap_select.c b/src/gmap_select.c
index b7ecfcf..7bbffb9 100644
--- a/src/gmap_select.c
+++ b/src/gmap_select.c
@@ -27,11 +27,13 @@ main (int argc, char *argv[]) {
bool sse4_1_support_p;
bool sse4_2_support_p;
bool avx2_support_p;
+ bool avx512_support_p;
char *dir, **new_argv;
int i;
int rc;
- CPUID_support(&sse2_support_p,&ssse3_support_p,&sse4_1_support_p,&sse4_2_support_p,&avx2_support_p);
+ CPUID_support(&sse2_support_p,&ssse3_support_p,&sse4_1_support_p,&sse4_2_support_p,
+ &avx2_support_p,&avx512_support_p);
new_argv = (char **) malloc((argc + 1) * sizeof(char *));
for (i = 1; i < argc; i++) {
diff --git a/src/gmapl_select.c b/src/gmapl_select.c
index 5020d38..816b356 100644
--- a/src/gmapl_select.c
+++ b/src/gmapl_select.c
@@ -27,11 +27,13 @@ main (int argc, char *argv[]) {
bool sse4_1_support_p;
bool sse4_2_support_p;
bool avx2_support_p;
+ bool avx512_support_p;
char *dir, **new_argv;
int i;
int rc;
- CPUID_support(&sse2_support_p,&ssse3_support_p,&sse4_1_support_p,&sse4_2_support_p,&avx2_support_p);
+ CPUID_support(&sse2_support_p,&ssse3_support_p,&sse4_1_support_p,&sse4_2_support_p,
+ &avx2_support_p,&avx512_support_p);
new_argv = (char **) malloc((argc + 1) * sizeof(char *));
for (i = 1; i < argc; i++) {
diff --git a/src/gsnap.c b/src/gsnap.c
index 963f3f7..d61e979 100644
--- a/src/gsnap.c
+++ b/src/gsnap.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: gsnap.c 197391 2016-09-03 00:43:23Z twu $";
+static char rcsid[] = "$Id: gsnap.c 200234 2016-11-08 00:56:52Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -3302,7 +3302,7 @@ worker_setup (char *genomesubdir, char *fileroot) {
force_xs_direction_p,md_lowercase_variant_p,
/*snps_p*/snps_iit ? true : false,print_nsnpdiffs_p,
Univ_IIT_genomelength(chromosome_iit,/*with_circular_alias*/false),
- /*gff3_phase_swap_p*/false);
+ /*gff3_phase_swap_p*/false,/*cigar_extended_p*/false);
Stage3_setup(/*splicingp*/novelsplicingp == true || knownsplicingp == true,novelsplicingp,
/*require_splicedir_p*/true,splicing_iit,splicing_divint_crosstable,
donor_typeint,acceptor_typeint,splicesites,altlocp,alias_starts,alias_ends,
diff --git a/src/gsnap_select.c b/src/gsnap_select.c
index 001d338..788a29a 100644
--- a/src/gsnap_select.c
+++ b/src/gsnap_select.c
@@ -27,11 +27,13 @@ main (int argc, char *argv[]) {
bool sse4_1_support_p;
bool sse4_2_support_p;
bool avx2_support_p;
+ bool avx512_support_p;
char *dir, **new_argv;
int i;
int rc;
- CPUID_support(&sse2_support_p,&ssse3_support_p,&sse4_1_support_p,&sse4_2_support_p,&avx2_support_p);
+ CPUID_support(&sse2_support_p,&ssse3_support_p,&sse4_1_support_p,&sse4_2_support_p,
+ &avx2_support_p,&avx512_support_p);
new_argv = (char **) malloc((argc + 1) * sizeof(char *));
for (i = 1; i < argc; i++) {
diff --git a/src/gsnapl_select.c b/src/gsnapl_select.c
index 1730264..d2d20dc 100644
--- a/src/gsnapl_select.c
+++ b/src/gsnapl_select.c
@@ -27,11 +27,13 @@ main (int argc, char *argv[]) {
bool sse4_1_support_p;
bool sse4_2_support_p;
bool avx2_support_p;
+ bool avx512_support_p;
char *dir, **new_argv;
int i;
int rc;
- CPUID_support(&sse2_support_p,&ssse3_support_p,&sse4_1_support_p,&sse4_2_support_p,&avx2_support_p);
+ CPUID_support(&sse2_support_p,&ssse3_support_p,&sse4_1_support_p,&sse4_2_support_p,
+ &avx2_support_p,&avx512_support_p);
new_argv = (char **) malloc((argc + 1) * sizeof(char *));
for (i = 1; i < argc; i++) {
diff --git a/src/indexdb-write.c b/src/indexdb-write.c
index faf0eb3..a6077db 100644
--- a/src/indexdb-write.c
+++ b/src/indexdb-write.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: indexdb-write.c 184465 2016-02-18 00:09:34Z twu $";
+static char rcsid[] = "$Id: indexdb-write.c 199472 2016-10-23 04:00:39Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -695,6 +695,7 @@ Indexdb_write_offsets (char *destdir, char interval_char, FILE *sequence_fp, Uni
) {
masked = oligo & mask;
bmer = masked/BLOCKSIZE;
+
if (Bitpack64_access_filledp(masked,(int) packsizes[bmer],bitpacks[bmer]) == true) {
bitpacks[bmer] = Bitpack64_realloc_one((int) packsizes[bmer],bitpacks[bmer]);
packsizes[bmer] += 2;
@@ -1006,6 +1007,7 @@ Indexdb_write_offsets_huge (char *destdir, char interval_char, FILE *sequence_fp
aaindex = Alphabet_get_aa_index(high,low,watsonp,index1part_nt);
#endif
bmer = aaindex/BLOCKSIZE;
+
if (Bitpack64_access_filledp(aaindex,(int) packsizes[bmer],bitpacks[bmer]) == true) {
bitpacks[bmer] = Bitpack64_realloc_one((int) packsizes[bmer],bitpacks[bmer]);
packsizes[bmer] += 2;
@@ -1028,6 +1030,7 @@ Indexdb_write_offsets_huge (char *destdir, char interval_char, FILE *sequence_fp
) {
masked = oligo & mask;
bmer = masked/BLOCKSIZE;
+
if (Bitpack64_access_filledp(masked,(int) packsizes[bmer],bitpacks[bmer]) == true) {
bitpacks[bmer] = Bitpack64_realloc_one((int) packsizes[bmer],bitpacks[bmer]);
packsizes[bmer] += 2;
@@ -1167,6 +1170,7 @@ Indexdb_bitpack_counter (UINT4 **counterstrm, UINT4 *offsetsmeta, UINT4 *offsets
for (oligoi = 0ULL; oligoi < oligospace; oligoi += blocksize) {
countermeta[k++] = nvectors;
+#if 0
Bitpack64_block_offsets(offsets,oligoi,offsetsmeta,offsetsstrm);
maxdiff = 0;
@@ -1190,8 +1194,12 @@ Indexdb_bitpack_counter (UINT4 **counterstrm, UINT4 *offsetsmeta, UINT4 *offsets
packsize = 32 - firstbit;
#endif
}
-
packsize = (packsize + 1) & ~1; /* Converts packsizes to the next multiple of 2 */
+
+#else
+ packsize = Bitpack64_differential_packsize(oligoi,offsetsmeta);
+#endif
+
/* nvectors += (packsize * blocksize)/128; */
nvectors += packsize / 2; /* For blocksize of 64 => *64/128 */
}
@@ -1248,6 +1256,7 @@ Indexdb_bitpack_counter_huge (UINT4 **counterstrm, UINT4 *offsetspages, UINT4 *o
for (oligoi = 0ULL; oligoi < oligospace; oligoi += BLOCKSIZE) {
countermeta[k++] = nvectors;
+#if 0
Bitpack64_block_offsets_huge(offsets,oligoi,offsetspages,offsetsmeta,offsetsstrm);
maxdiff = 0;
@@ -1273,6 +1282,10 @@ Indexdb_bitpack_counter_huge (UINT4 **counterstrm, UINT4 *offsetspages, UINT4 *o
}
packsize = (packsize + 1) & ~1; /* Converts packsizes to the next multiple of 2 */
+#else
+ packsize = Bitpack64_differential_packsize(oligoi,offsetsmeta);
+#endif
+
/* nvectors += (packsize * blocksize)/128; */
nvectors += packsize / 2; /* For blocksize of 64 => *64/128 */
}
diff --git a/src/indexdb.c b/src/indexdb.c
index 699b6e5..47c98a6 100644
--- a/src/indexdb.c
+++ b/src/indexdb.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: indexdb.c 197240 2016-09-01 17:07:26Z twu $";
+static char rcsid[] = "$Id: indexdb.c 200235 2016-11-08 00:57:16Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -1518,7 +1518,6 @@ Indexdb_new_genome (Width_T *index1part, Width_T *index1interval,
#endif
#ifdef LARGE_GENOMES
- /*ptr0 =*/ Bitpack64_read_two_huge(&end0,poly_T,new->offsetspages,new->offsetsmeta,new->offsetsstrm);
if ((filesize = Access_filesize(filenames->positions_high_filename)) != end0 * sizeof(unsigned char)) {
fprintf(stderr,"Something is wrong with the genomic index: expected file size for %s is %zu, but observed %zu.\n",
filenames->positions_high_filename,(size_t) (end0*sizeof(unsigned char)),filesize);
diff --git a/src/pair.c b/src/pair.c
index ae3ddda..8a80e3f 100644
--- a/src/pair.c
+++ b/src/pair.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: pair.c 198279 2016-09-24 00:54:22Z twu $";
+static char rcsid[] = "$Id: pair.c 200236 2016-11-08 00:58:17Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -150,13 +150,14 @@ static bool print_nsnpdiffs_p;
static double genomelength; /* For BLAST E-value */
static bool gff3_phase_swap_p = true;
+static bool cigar_extended_p = true;
void
Pair_setup (int trim_mismatch_score_in, int trim_indel_score_in,
bool gff3_separators_p_in, bool sam_insert_0M_p_in, bool force_xs_direction_p_in,
bool md_lowercase_variant_p_in, bool snps_p_in, bool print_nsnpdiffs_p_in,
- Univcoord_T genomelength_in, bool gff3_phase_swap_p_in) {
+ Univcoord_T genomelength_in, bool gff3_phase_swap_p_in, bool cigar_extended_p_in) {
trim_mismatch_score = trim_mismatch_score_in;
trim_indel_score = trim_indel_score_in;
gff3_separators_p = gff3_separators_p_in;
@@ -167,6 +168,7 @@ Pair_setup (int trim_mismatch_score_in, int trim_indel_score_in,
print_nsnpdiffs_p = print_nsnpdiffs_p_in;
genomelength = (double) genomelength_in;
gff3_phase_swap_p = gff3_phase_swap_p_in;
+ cigar_extended_p = cigar_extended_p_in;
return;
}
@@ -4087,7 +4089,6 @@ Pair_print_gsnap (Filestring_T fp, struct T *pairs_querydir, int npairs, int nse
#endif /* PMAP */
-#ifdef GSNAP
/* Taken from NCBI Blast 2.2.29, algo/blast/core/blast_stat.c */
/* Karlin-Altschul formula: m n exp(-lambda * S + log k) = k m n exp(-lambda * S) */
/* Also in substring.c */
@@ -4103,6 +4104,8 @@ blast_evalue (int alignlength, int nmismatches) {
return k * (double) alignlength * genomelength * exp(-lambda * score);
}
+
+#ifdef GSNAP
static double
blast_bitscore (int alignlength, int nmismatches) {
double k = 0.1;
@@ -4307,6 +4310,133 @@ Pair_print_m8 (Filestring_T fp, struct T *pairs_querydir, int npairs, bool inver
#endif
+#if 0
+double
+Pair_min_evalue (struct T *pairarray, int npairs) {
+ double min_evalue = 1000.0, evalue;
+ bool in_exon = true;
+ struct T *ptr, *ptr0, *this = NULL;
+ int alignlength_trim, exon_querystart = -1, exon_queryend;
+ int nmismatches_bothdiff, i;
+ int last_querypos = -1;
+
+
+ ptr = pairarray;
+ exon_querystart = ptr->querypos + 1;
+ nmismatches_bothdiff = 0;
+
+ i = 0;
+ while (i < npairs) {
+ this = ptr++;
+ i++;
+
+ if (this->gapp) {
+ if (in_exon == true) {
+ /* SPLICE START */
+ ptr0 = ptr;
+ while (ptr0->gapp) {
+ ptr0++;
+ }
+ exon_queryend = last_querypos + 1;
+
+ alignlength_trim = exon_queryend - exon_querystart;
+ assert(alignlength_trim >= 0);
+ if ((evalue = blast_evalue(alignlength_trim,nmismatches_bothdiff)) < min_evalue) {
+ min_evalue = evalue;
+ }
+
+ nmismatches_bothdiff = 0;
+
+ in_exon = false;
+ }
+ } else if (this->comp == INTRONGAP_COMP) {
+ /* May want to print dinucleotides */
+
+ } else {
+ /* Remaining possibilities are MATCH_COMP, DYNPROG_MATCH_COMP, AMBIGUOUS_COMP, INDEL_COMP,
+ SHORTGAP_COMP, or MISMATCH_COMP */
+ if (in_exon == false) {
+ /* SPLICE CONTINUATION */
+ exon_querystart = this->querypos + 1;
+
+ in_exon = true;
+ }
+ if (this->comp == INDEL_COMP || this->comp == SHORTGAP_COMP) {
+ if (this->genome == ' ') {
+ /* INSERTION */
+ exon_queryend = last_querypos + 1;
+
+ /* indel_pos = this->querypos; */
+ while (i < npairs && this->gapp == false && this->genome == ' ') {
+ this = ptr++;
+ i++;
+ }
+ ptr--;
+ i--;
+
+ this = ptr;
+ exon_querystart = this->querypos + 1;
+ nmismatches_bothdiff = 0;
+
+ } else if (this->cdna == ' ') {
+ /* DELETION */
+ exon_queryend = last_querypos + 1;
+
+ /* indel_pos = this->querypos; */
+ while (i < npairs && this->gapp == false && this->cdna == ' ') {
+ this = ptr++;
+ i++;
+ }
+ ptr--;
+ i--;
+
+ /* Finish rest of this line */
+ alignlength_trim = exon_queryend - exon_querystart;
+ assert(alignlength_trim >= 0);
+ if ((evalue = blast_evalue(alignlength_trim,nmismatches_bothdiff)) < min_evalue) {
+ min_evalue = evalue;
+ }
+
+ this = ptr;
+ exon_querystart = this->querypos + 1;
+ nmismatches_bothdiff = 0;
+
+ } else {
+ fprintf(stderr,"Error at %c%c%c\n",this->genome,this->comp,this->cdna);
+ exit(9);
+ }
+
+ } else {
+ /* c = this->genome; */
+ if (this->genome == this->cdna) {
+ /* nmatches++; */
+ } else if (this->genomealt == this->cdna) {
+ /* nmismatches_refdiff++; */
+ } else {
+ nmismatches_bothdiff++;
+ /* nmismatches_refdiff++; */
+ }
+ }
+ }
+
+ if (this->cdna != ' ') {
+ last_querypos = this->querypos;
+ }
+ }
+
+ exon_queryend = last_querypos + 1;
+
+ alignlength_trim = exon_queryend - exon_querystart;
+ assert(alignlength_trim >= 0);
+ if ((evalue = blast_evalue(alignlength_trim,nmismatches_bothdiff)) < min_evalue) {
+ min_evalue = evalue;
+ }
+
+ return min_evalue;
+}
+#endif
+
+
/* Modified from print_endtypes */
static void
splice_site_probs (double *sense_prob, double *antisense_prob,
@@ -4698,7 +4828,7 @@ Pair_tokens_cigarlength (List_T tokens) {
token = (char *) List_head(p);
type = token[strlen(token)-1];
/* Should include 'H', but that gets added according to hardclip_low and hardclip_high */
- if (type == 'S' || type == 'I' || type == 'M') {
+ if (type == 'S' || type == 'I' || type == 'M' || type == 'X' || type == '=') {
sscanf(token,"%d",&tokenlength);
length += tokenlength;
}
@@ -4923,7 +5053,7 @@ print_sam_line (Filestring_T fp, char *abbrev, bool first_read_p, char *acc1, ch
#else
int mapq_score,
#endif
- char *sam_read_group_id, bool merged_overlap_p, bool sarrayp) {
+ double min_evalue, char *sam_read_group_id, bool merged_overlap_p, bool sarrayp) {
#if 0
/* Should already be checked when Stage3_T or Stage3end_T object was created */
@@ -5149,6 +5279,11 @@ print_sam_line (Filestring_T fp, char *abbrev, bool first_read_p, char *acc1, ch
FPRINTF(fp,"\tXG:Z:M");
}
+#if 0
+ /* 12. TAGS: XE (BLAST E-value) */
+ FPRINTF(fp,"\tXE:f:%.2g",min_evalue);
+#endif
+
FPRINTF(fp,"\n");
return;
@@ -5345,9 +5480,9 @@ Pair_clean_cigar (List_T tokens, bool watsonp) {
}
-List_T
-Pair_compute_cigar (bool *intronp, int *hardclip_start, int *hardclip_end, struct T *pairs, int npairs, int querylength_given,
- bool watsonp, int sensedir, int chimera_part) {
+static List_T
+compute_cigar_standard (bool *intronp, int *hardclip_start, int *hardclip_end, struct T *pairs, int npairs, int querylength_given,
+ bool watsonp, int sensedir, int chimera_part) {
List_T tokens = NULL;
char token[11];
int Mlength = 0, Ilength = 0, Dlength = 0;
@@ -5621,6 +5756,340 @@ Pair_compute_cigar (bool *intronp, int *hardclip_start, int *hardclip_end, struc
}
+static List_T
+compute_cigar_extended (bool *intronp, int *hardclip_start, int *hardclip_end, struct T *pairs, int npairs, int querylength_given,
+ bool watsonp, int sensedir, int chimera_part) {
+ List_T tokens = NULL;
+ char token[11];
+ int Elength = 0, Xlength = 0, Ilength = 0, Dlength = 0;
+ bool in_exon = false, deletionp;
+ struct T *ptr, *prev, *this = NULL;
+ int exon_queryend = -1;
+ Chrpos_T exon_genomestart = -1;
+ Chrpos_T exon_genomeend, genome_gap;
+ int query_gap;
+ int last_querypos = -1;
+ Chrpos_T last_genomepos = -1U;
+ int i;
+
+ /* *chimera_hardclip_start = *chimera_hardclip_high = 0; */
+ *intronp = false;
+
+ ptr = pairs;
+
+ if (chimera_part == +1) {
+ if (ptr->querypos > *hardclip_start) {
+ if (ptr->querypos > 0) {
+ /* Clip to beginning */
+ *hardclip_start = ptr->querypos;
+ sprintf(token,"%dH",*hardclip_start);
+ tokens = push_token(tokens,token);
+ }
+ } else {
+ if (*hardclip_start > 0) {
+ /* Clip to hard clip boundary */
+ sprintf(token,"%dH",*hardclip_start);
+ tokens = push_token(tokens,token);
+ }
+ }
+ } else {
+ if (*hardclip_start > 0) {
+ sprintf(token,"%dH",*hardclip_start);
+ tokens = push_token(tokens,token);
+ }
+ if (ptr->querypos > (*hardclip_start)) {
+ sprintf(token,"%dS",ptr->querypos - (*hardclip_start));
+ tokens = push_token(tokens,token);
+ }
+ }
+
+ this = (T) NULL;
+ for (i = 0; i < npairs; i++) {
+ prev = this;
+ this = ptr++;
+
+#if 0
+ /* print_tokens_sam(stdout,tokens); */
+ Pair_dump_one(this,true);
+ printf("\n");
+#endif
+
+ if (this->gapp) {
+ if (in_exon == true) {
+ exon_queryend = last_querypos + 1;
+ exon_genomeend = last_genomepos + 1;
+#if 0
+ if (watsonp) {
+ intron_start = exon_genomeend + 1;
+ } else {
+ intron_start = exon_genomeend - 1;
+ }
+#endif
+
+ if (Elength > 0) {
+ sprintf(token,"%d=",Elength);
+ tokens = push_token(tokens,token);
+ } else if (Xlength > 0) {
+ sprintf(token,"%dX",Xlength);
+ tokens = push_token(tokens,token);
+ } else if (Ilength > 0) {
+ sprintf(token,"%dI",Ilength);
+ tokens = push_token(tokens,token);
+ } else if (Dlength > 0) {
+ sprintf(token,"%dD",Dlength);
+ tokens = push_token(tokens,token);
+ }
+
+ Elength = Xlength = Ilength = Dlength = 0;
+
+ in_exon = false;
+ }
+
+ } else if (this->comp == INTRONGAP_COMP) {
+ /* Do nothing */
+
+ } else {
+ /* Remaining possibilities are MATCH_COMP, DYNPROG_MATCH_COMP, AMBIGUOUS_COMP, INDEL_COMP,
+ SHORTGAP_COMP, or MISMATCH_COMP */
+ if (in_exon == false) {
+ /* exon_querystart = this->querypos + 1; */
+ exon_genomestart = this->genomepos + 1;
+
+ if (prev != NULL) {
+ /* Gap */
+ /* abs() gives a large value when flag -m64 is specified */
+ /* genome_gap = abs(intron_end - intron_start) + 1; */
+ if (watsonp) {
+ /* intron_end = exon_genomestart - 1; */
+ /* genome_gap = (intron_end - intron_start) + 1; */
+ genome_gap = exon_genomestart - exon_genomeend - 1;
+ } else {
+ /* intron_end = exon_genomestart + 1; */
+ /* genome_gap = (intron_start - intron_end) + 1; */
+ genome_gap = exon_genomeend - exon_genomestart - 1;
+ }
+
+ deletionp = false;
+#ifdef CONVERT_INTRONS_TO_DELETIONS
+ if (sensedir == SENSE_FORWARD) {
+ if (prev->comp == FWD_CANONICAL_INTRON_COMP ||
+ prev->comp == FWD_GCAG_INTRON_COMP ||
+ prev->comp == FWD_ATAC_INTRON_COMP) {
+ sprintf(token,"%uN",genome_gap);
+ *intronp = true;
+ } else if (cigar_noncanonical_splices_p == true && genome_gap >= MIN_INTRONLEN) {
+ sprintf(token,"%uN",genome_gap);
+ *intronp = true;
+ } else {
+ sprintf(token,"%uD",genome_gap);
+ deletionp = true;
+ }
+ } else if (sensedir == SENSE_ANTI) {
+ if (prev->comp == REV_CANONICAL_INTRON_COMP ||
+ prev->comp == REV_GCAG_INTRON_COMP ||
+ prev->comp == REV_ATAC_INTRON_COMP) {
+ sprintf(token,"%uN",genome_gap);
+ *intronp = true;
+ } else if (cigar_noncanonical_splices_p == true && genome_gap >= MIN_INTRONLEN) {
+ sprintf(token,"%uN",genome_gap);
+ *intronp = true;
+ } else {
+ sprintf(token,"%uD",genome_gap);
+ deletionp = true;
+ }
+ } else if (cigar_noncanonical_splices_p == true && genome_gap >= MIN_INTRONLEN){
+ sprintf(token,"%uN",genome_gap);
+ *intronp = true;
+ } else {
+ sprintf(token,"%uD",genome_gap);
+ deletionp = true;
+ }
+#else
+ sprintf(token,"%uN",genome_gap);
+ *intronp = true;
+#endif
+ tokens = push_token(tokens,token);
+
+ /* Check for dual gap. Doesn't work for hard clipping. */
+ /* assert(exon_queryend >= 0); */
+
+ query_gap = this->querypos - exon_queryend;
+ assert(query_gap >= 0);
+ if (query_gap > 0) {
+ if (deletionp == true && sam_insert_0M_p == true) {
+ /* Put zero matches between deletion and insertion, since some programs will complain */
+ sprintf(token,"0M");
+ tokens = push_token(tokens,token);
+ }
+
+ sprintf(token,"%uI",query_gap);
+ tokens = push_token(tokens,token);
+ }
+ }
+
+ in_exon = true;
+ }
+
+ if (this->comp == INDEL_COMP || this->comp == SHORTGAP_COMP) {
+ /* Gap in upper or lower sequence */
+ if (this->genome == ' ') {
+ /* Insertion relative to genome */
+ if (Elength > 0) {
+ sprintf(token,"%d=",Elength);
+ tokens = push_token(tokens,token);
+ Elength = 0;
+ } else if (Xlength > 0) {
+ sprintf(token,"%dX",Xlength);
+ tokens = push_token(tokens,token);
+ Xlength = 0;
+ } else if (Dlength > 0) {
+ /* unlikely */
+ sprintf(token,"%dD",Dlength);
+ tokens = push_token(tokens,token);
+ Dlength = 0;
+ }
+ Ilength++;
+ } else if (this->cdna == ' ') {
+ /* Deletion relative to genome */
+ if (Elength > 0) {
+ sprintf(token,"%d=",Elength);
+ tokens = push_token(tokens,token);
+ Elength = 0;
+ } else if (Xlength > 0) {
+ sprintf(token,"%dX",Xlength);
+ tokens = push_token(tokens,token);
+ Xlength = 0;
+ } else if (Ilength > 0) {
+ sprintf(token,"%dI",Ilength);
+ tokens = push_token(tokens,token);
+ Ilength = 0;
+ }
+ Dlength++;
+ } else {
+ fprintf(stderr,"Error at %c%c%c\n",this->genome,this->comp,this->cdna);
+ exit(9);
+ }
+
+ } else {
+ /* Count even if unknown base */
+
+ if (Ilength > 0) {
+ sprintf(token,"%dI",Ilength);
+ tokens = push_token(tokens,token);
+ Ilength = 0;
+ } else if (Dlength > 0) {
+ sprintf(token,"%dD",Dlength);
+ tokens = push_token(tokens,token);
+ Dlength = 0;
+ }
+
+ if (prev == NULL || prev->gapp || prev->comp == INDEL_COMP || prev->comp == SHORTGAP_COMP) {
+ if (this->cdna == this->genome) {
+ Elength++;
+ } else {
+ Xlength++;
+ }
+
+ } else if (prev->cdna == prev->genome) {
+ if (this->cdna == this->genome) {
+ Elength++;
+ } else {
+ if (Elength > 0) {
+ sprintf(token,"%d=",Elength);
+ tokens = push_token(tokens,token);
+ Elength = 0;
+ }
+ Xlength++;
+ }
+
+ } else {
+ if (this->cdna != this->genome) {
+ Xlength++;
+ } else {
+ if (Xlength > 0) {
+ sprintf(token,"%dX",Xlength);
+ tokens = push_token(tokens,token);
+ Xlength = 0;
+ }
+ Elength++;
+ }
+ }
+ }
+ }
+
+ if (this != NULL) {
+ if (this->cdna != ' ') {
+ last_querypos = this->querypos;
+ }
+ if (this->genome != ' ') {
+ last_genomepos = this->genomepos;
+ }
+ }
+ }
+
+ /* prev = this; */
+ /* exon_queryend = last_querypos + 1; */
+ /* exon_genomeend = last_genomepos + 1; */
+
+ if (Elength > 0) {
+ sprintf(token,"%d=",Elength);
+ tokens = push_token(tokens,token);
+ } else if (Xlength > 0) {
+ sprintf(token,"%dX",Xlength);
+ tokens = push_token(tokens,token);
+ } else if (Ilength > 0) {
+ sprintf(token,"%dI",Ilength);
+ tokens = push_token(tokens,token);
+ } else if (Dlength > 0) {
+ sprintf(token,"%dD",Dlength);
+ tokens = push_token(tokens,token);
+ }
+
+
+ /* Terminal clipping */
+ if (chimera_part == -1) {
+ if (last_querypos < querylength_given - 1 - (*hardclip_end)) {
+ if (last_querypos < querylength_given - 1) {
+ /* Clip to end */
+ *hardclip_end = querylength_given - 1 - last_querypos;
+ sprintf(token,"%dH",*hardclip_end);
+ tokens = push_token(tokens,token);
+ }
+ } else {
+ if (*hardclip_end > 0) {
+ /* Clip to hard clip boundary */
+ sprintf(token,"%dH",*hardclip_end);
+ tokens = push_token(tokens,token);
+ }
+ }
+ } else {
+ if (last_querypos < querylength_given - 1 - (*hardclip_end)) {
+ sprintf(token,"%dS",querylength_given - 1 - (*hardclip_end) - last_querypos);
+ tokens = push_token(tokens,token);
+ }
+ if (*hardclip_end > 0) {
+ sprintf(token,"%dH",*hardclip_end);
+ tokens = push_token(tokens,token);
+ }
+ }
+
+ return Pair_clean_cigar(tokens,watsonp);
+}
+
+
+List_T
+Pair_compute_cigar (bool *intronp, int *hardclip_start, int *hardclip_end, struct T *pairs, int npairs, int querylength_given,
+ bool watsonp, int sensedir, int chimera_part) {
+ if (cigar_extended_p == true) {
+ return compute_cigar_extended(&(*intronp),&(*hardclip_start),&(*hardclip_end),pairs,npairs,querylength_given,
+ watsonp,sensedir,chimera_part);
+ } else {
+ return compute_cigar_standard(&(*intronp),&(*hardclip_start),&(*hardclip_end),pairs,npairs,querylength_given,
+ watsonp,sensedir,chimera_part);
+ }
+}
+
+
#if 0
/* Copied from samprint.c */
static bool
@@ -6176,7 +6645,7 @@ compute_md_string (int *nmismatches_refdiff, int *nmismatches_bothdiff, int *nin
} else if (type == 'S') {
/* k += length; */
- } else if (type == 'M') {
+ } else if (type == 'M' || type == 'X' || type == '=') {
for (i = 0; i < length; i++, k++) {
this = &(pairs[k]);
debug4(printf("M %d/%d comp %c\n",i,length,this->comp));
@@ -6271,7 +6740,7 @@ compute_md_string (int *nmismatches_refdiff, int *nmismatches_bothdiff, int *nin
} else if (type == 'S') {
/* k += length; */
- } else if (type == 'M') {
+ } else if (type == 'M' || type == 'X' || type == '=') {
if (state == IN_DELETION) {
md_tokens = push_token(md_tokens,"^");
}
@@ -6415,6 +6884,7 @@ Pair_print_sam (Filestring_T fp, char *abbrev, struct T *pairarray, int npairs,
struct T *clipped_pairarray;
int clipped_npairs;
bool cigar_tokens_alloc;
+ double min_evalue, max_bitscore;
if (chrnum == 0) {
@@ -6484,6 +6954,10 @@ Pair_print_sam (Filestring_T fp, char *abbrev, struct T *pairarray, int npairs,
md_tokens = compute_md_string(&nmismatches_refdiff,&nmismatches_bothdiff,&nindels,
clipped_pairarray,clipped_npairs,watsonp,cigar_tokens);
+#if 0
+ min_evalue = Pair_min_evalue(clipped_pairarray,clipped_npairs);
+#endif
+
print_sam_line(fp,abbrev,first_read_p,acc1,acc2,chrstring,
watsonp,sensedir,cigar_tokens,md_tokens,
nmismatches_refdiff,nmismatches_bothdiff,nindels,
@@ -6496,7 +6970,7 @@ Pair_print_sam (Filestring_T fp, char *abbrev, struct T *pairarray, int npairs,
#else
mapq_score,
#endif
- sam_read_group_id,merged_overlap_p,sarrayp);
+ min_evalue,sam_read_group_id,merged_overlap_p,sarrayp);
/* Print procedures free the character strings */
Pair_tokens_free(&md_tokens);
diff --git a/src/pair.h b/src/pair.h
index 3123737..f461b2f 100644
--- a/src/pair.h
+++ b/src/pair.h
@@ -1,4 +1,4 @@
-/* $Id: pair.h 196403 2016-08-16 14:33:56Z twu $ */
+/* $Id: pair.h 200236 2016-11-08 00:58:17Z twu $ */
#ifndef PAIR_INCLUDED
#define PAIR_INCLUDED
@@ -32,7 +32,7 @@ extern void
Pair_setup (int trim_mismatch_score_in, int trim_indel_score_in,
bool gff3_separators_p_in, bool sam_insert_0M_p_in, bool force_xs_direction_p_in,
bool md_lowercase_variant_p_in, bool snps_p_in, bool print_nsnpdiffs_p_in,
- Univcoord_T genomelength_in, bool gff3_phase_swap_p_in);
+ Univcoord_T genomelength_in, bool gff3_phase_swap_p_in, bool cigar_extended_p_in);
extern int
Pair_querypos (T this);
extern Chrpos_T
diff --git a/src/sam_sort.c b/src/sam_sort.c
index adfeb5f..bcf37e9 100644
--- a/src/sam_sort.c
+++ b/src/sam_sort.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: sam_sort.c 191131 2016-06-03 17:23:29Z twu $";
+static char rcsid[] = "$Id: sam_sort.c 200239 2016-11-08 01:00:30Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -27,6 +27,9 @@ static char rcsid[] = "$Id: sam_sort.c 191131 2016-06-03 17:23:29Z twu $";
#include "getopt.h"
+#define MONITOR_INTERVAL 100000
+
+
/************************************************************************
*
* Check for correctness:
@@ -613,7 +616,7 @@ process_without_dups (FILE **sam_inputs, int *headerlengths, int *ncells, int ni
int n_mappers = 0, n_nomappers = 0;
Intlist_T l;
struct T *cells_allocated, *ptr;
- int i, j, k;
+ int b, i, j, k;
size_t fileposition;
int linelen;
@@ -664,7 +667,16 @@ process_without_dups (FILE **sam_inputs, int *headerlengths, int *ncells, int ni
Stopwatch_start(stopwatch);
fprintf(stderr,"Printing entries...");
- for (k = 0; k < ncells_total; k++) {
+ for (b = 0; b + MONITOR_INTERVAL < ncells_total; b += MONITOR_INTERVAL) {
+ fprintf(stderr,"Printed %d entries\n",b);
+ for (i = 0; i < MONITOR_INTERVAL; i++) {
+ k = b + i;
+ debug(printf("%u\t%u\t%d\n",cells[k]->genomicpos,cells[k]->linestart,cells[k]->linelen));
+ fp_sam = sam_inputs[cells[k]->filei];
+ Cell_print_fromfile(fp_sam,cells[k],headers);
+ }
+ }
+ for (k = b; k < ncells_total; k++) {
debug(printf("%u\t%u\t%d\n",cells[k]->genomicpos,cells[k]->linestart,cells[k]->linelen));
fp_sam = sam_inputs[cells[k]->filei];
Cell_print_fromfile(fp_sam,cells[k],headers);
@@ -679,7 +691,15 @@ process_without_dups (FILE **sam_inputs, int *headerlengths, int *ncells, int ni
Stopwatch_start(stopwatch);
fprintf(stderr,"Printing entries...");
- for (k = 0; k < ncells_total; k++) {
+ for (b = 0; b + MONITOR_INTERVAL < ncells_total; b += MONITOR_INTERVAL) {
+ fprintf(stderr,"Printed %d entries\n",b);
+ for (i = 0; i < MONITOR_INTERVAL; i++) {
+ k = b + i;
+ fp_sam = sam_inputs[cells[k]->filei];
+ Cell_print_fromfile(fp_sam,cells[k],headers);
+ }
+ }
+ for (k = b; k < ncells_total; k++) {
fp_sam = sam_inputs[cells[k]->filei];
Cell_print_fromfile(fp_sam,cells[k],headers);
}
diff --git a/src/samprint.c b/src/samprint.c
index 1fe4f71..a377944 100644
--- a/src/samprint.c
+++ b/src/samprint.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: samprint.c 197889 2016-09-15 23:19:09Z twu $";
+static char rcsid[] = "$Id: samprint.c 200237 2016-11-08 00:58:55Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -67,6 +67,7 @@ static int maxpaths_report;
static char *failedinput_root;
static bool fastq_format_p;
static bool hide_soft_clips_p;
+static bool cigar_extended_p = false;
static bool clip_overlap_p;
static bool merge_overlap_p;
@@ -1143,6 +1144,257 @@ print_md_string (bool *printp, int *nmismatches_refdiff, int *nmismatches_bothdi
}
+/* Based on print_md_string */
+static void
+print_extended_cigar (Filestring_T fp, char *genomicfwd_refdiff,
+ int stringlength, int querypos, int querylength,
+ int hardclip_low, int hardclip_high, bool plusp, bool lastp) {
+ int nmatches = 0, nmismatches = 0;
+ int starti, endi, i;
+ bool hardclip_end_p = false;
+ int cliplength, endpos;
+
+ if (plusp == true) {
+ debug2(printf("\nEntering print_extended_cigar with querypos %d, querylength %d, hardclip_low %d, hardclip_high %d, plus: %s ref, %s both\n",
+ querypos,querylength,hardclip_low,hardclip_high,genomicfwd_refdiff,genomicfwd_bothdiff));
+ if (hardclip_low == 0) {
+ starti = 0;
+ hardclip_end_p = true;
+ } else if (hardclip_low > querypos) {
+ /* startpos = hardclip_low; */
+ starti = hardclip_low - querypos;
+ hardclip_end_p = true;
+ debug2(printf(" Setting starti %d = hardclip_low %d - querypos %d\n",
+ starti,hardclip_low,querypos));
+ } else {
+ /* startpos = querypos; */
+ starti = 0;
+ }
+
+ if (querylength - hardclip_high < querypos + stringlength) {
+ endpos = querylength - hardclip_high;
+ endi = (querylength - hardclip_high) - querypos;
+ debug2(printf(" Setting endi %d = (querylength %d - hardclip_high %d) - querypos %d\n",
+ endi,querylength,hardclip_high,querypos));
+ } else {
+ endpos = querypos + stringlength;
+ endi = stringlength;
+ }
+
+ debug2(printf(" Counting matches from %d to %d\n",starti,endi));
+
+ if (genomicfwd_refdiff == NULL) {
+ if (endi > starti) {
+ nmatches += (endi - starti);
+ }
+
+ } else if (md_lowercase_variant_p == false) {
+ for (i = starti; i < endi; i++) {
+ if (isupper(genomicfwd_refdiff[i])) {
+ if (nmismatches > 0 || hardclip_end_p == true) {
+ FPRINTF(fp,"%dX",nmismatches);
+ nmismatches = 0;
+ hardclip_end_p = false;
+ }
+ nmatches++;
+
+ } else {
+ /* A true mismatch against both variants */
+ if (nmatches > 0 || hardclip_end_p == true) {
+ FPRINTF(fp,"%d=",nmatches);
+ nmatches = 0;
+ hardclip_end_p = false;
+ }
+ nmismatches++;
+ }
+ }
+
+ } else {
+ for (i = starti; i < endi; i++) {
+ if (isupper(genomicfwd_refdiff[i])) {
+ if (nmismatches > 0 || hardclip_end_p == true) {
+ FPRINTF(fp,"%dX",nmismatches);
+ nmismatches = 0;
+ hardclip_end_p = false;
+ }
+ nmatches++;
+
+#if 0
+ } else if (isupper(genomicfwd_bothdiff[i])) {
+ /* A mismatch against the reference only => alternate variant */
+ if (nmatches > 0 || hardclip_end_p == true) {
+ FPRINTF(fp,"%d=",nmatches);
+ nmatches = 0;
+ hardclip_end_p = false;
+ }
+ nmismatches++;
+#endif
+
+ } else {
+ /* A true mismatch against both variants */
+ if (nmatches > 0 || hardclip_end_p == true) {
+ FPRINTF(fp,"%d=",nmatches);
+ nmatches = 0;
+ hardclip_end_p = false;
+ }
+ nmismatches++;
+ }
+ }
+ }
+
+ if (nmatches > 0) {
+ FPRINTF(fp,"%d=",nmatches);
+ } else if (nmismatches > 0) {
+ FPRINTF(fp,"%dX",nmismatches);
+ }
+
+ if (lastp == true) {
+ /* cliplength = querypos + stringlength - endpos; */
+ cliplength = querylength - endpos;
+ if (cliplength > 0) {
+ debug1(printf(" Pushing final %dH\n",cliplength));
+ FPRINTF(fp,"%dH",cliplength);
+ }
+ }
+
+ } else {
+ debug2(printf("\nEntering print_extended_cigar with querypos %d, querylength %d, hardclip_low %d, hardclip_high %d, minus: %s ref, %s both\n",
+ querypos,querylength,hardclip_low,hardclip_high,genomicfwd_refdiff,genomicfwd_bothdiff));
+ querypos = querylength - querypos - stringlength;
+ debug2(printf(" Revising querypos to be %d\n",querypos));
+
+ if (hardclip_low == 0) {
+ starti = 0;
+ hardclip_end_p = true;
+ } else if (hardclip_low > querypos) {
+ /* startpos = hardclip_low; */
+ starti = hardclip_low - querypos;
+ hardclip_end_p = true;
+ debug2(printf(" Setting starti %d = hardclip_low %d - querypos %d\n",
+ starti,hardclip_low,querypos));
+ } else {
+ /* startpos = querypos; */
+ starti = 0;
+ }
+
+ if (querylength - hardclip_high < querypos + stringlength) {
+ endpos = querylength - hardclip_high;
+ endi = (querylength - hardclip_high) - querypos;
+ debug2(printf(" Setting endi %d = (querylength %d - hardclip_high %d) - querypos %d\n",
+ endi,querylength,hardclip_high,querypos));
+ } else {
+ endpos = querypos + stringlength;
+ endi = stringlength;
+ }
+
+ debug2(printf(" Counting matches from %d to %d\n",starti,endi));
+
+ if (genomicfwd_refdiff == NULL) {
+ if (endi > starti) {
+ nmatches += (endi - starti);
+ }
+
+ } else if (md_lowercase_variant_p == false) {
+ for (i = starti; i < endi; i++) {
+ if (isupper(genomicfwd_refdiff[i])) {
+ if (nmismatches > 0 || hardclip_end_p == true) {
+ FPRINTF(fp,"%dX",nmismatches);
+ nmismatches = 0;
+ hardclip_end_p = false;
+ }
+ nmatches++;
+
+ } else {
+ /* A true mismatch against both variants */
+ if (nmatches > 0 || hardclip_end_p == true) {
+ FPRINTF(fp,"%d=",nmatches);
+ nmatches = 0;
+ hardclip_end_p = false;
+ }
+ nmismatches++;
+ }
+ }
+
+ } else {
+ for (i = starti; i < endi; i++) {
+ if (isupper(genomicfwd_refdiff[i])) {
+ if (nmismatches > 0 || hardclip_end_p == true) {
+ FPRINTF(fp,"%dX",nmismatches);
+ nmismatches = 0;
+ hardclip_end_p = false;
+ }
+ nmatches++;
+
+#if 0
+ } else if (isupper(genomicfwd_bothdiff[i])) {
+ /* A mismatch against the reference only => alternate variant */
+ if (nmatches > 0 || hardclip_end_p == true) {
+ FPRINTF(fp,"%d=",nmatches);
+ nmatches = 0;
+ hardclip_end_p = false;
+ }
+ nmismatches++;
+#endif
+
+ } else {
+ /* A true mismatch against both variants */
+ if (nmatches > 0 || hardclip_end_p == true) {
+ FPRINTF(fp,"%d=",nmatches);
+ nmatches = 0;
+ hardclip_end_p = false;
+ }
+ nmismatches++;
+ }
+ }
+ }
+
+ if (nmatches > 0) {
+ FPRINTF(fp,"%d=",nmatches);
+ } else if (nmismatches > 0) {
+ FPRINTF(fp,"%dX",nmismatches);
+ }
+
+ if (lastp == true) {
+ cliplength = endpos;
+ if (cliplength > 0) {
+ debug1(printf(" Pushing final %dH\n",cliplength));
+ FPRINTF(fp,"%dH",cliplength);
+ }
+ }
+ }
+
+ return;
+}
+
+
+static void
+print_cigar_M (Filestring_T fp, Substring_T substring, int substring_length, int substring_start,
+ int stringlength, int querypos, int querylength,
+ int hardclip_low, int hardclip_high, bool plusp, bool lastp, int trimlength) {
+ char *genomicfwd_refdiff, *genomicdir_refdiff;
+
+ if (cigar_extended_p == false) {
+ print_cigar(fp,/*type*/'M',stringlength,querypos,querylength,
+ hardclip_low,hardclip_high,plusp,lastp,trimlength);
+ } else if ((genomicdir_refdiff = Substring_genomic_refdiff(substring)) == NULL) {
+ print_extended_cigar(fp,/*genomicfwd_refdiff*/NULL,/*stringlength*/substring_length,
+ /*querypos*/substring_start,querylength,
+ hardclip_low,hardclip_high,plusp,lastp);
+ } else if (plusp == true) {
+ print_extended_cigar(fp,&(genomicdir_refdiff[substring_start]),/*stringlength*/substring_length,
+ /*querypos*/substring_start,querylength,
+ hardclip_low,hardclip_high,plusp,lastp);
+ } else {
+ genomicfwd_refdiff = (char *) MALLOCA((substring_length+1) * sizeof(char));
+ make_complement_buffered(genomicfwd_refdiff,&(genomicdir_refdiff[substring_start]),substring_length);
+ print_extended_cigar(fp,genomicfwd_refdiff,/*stringlength*/substring_length,
+ /*querypos*/substring_start,querylength,
+ hardclip_low,hardclip_high,plusp,lastp);
+ FREEA(genomicfwd_refdiff);
+ }
+}
+
+
#if 0
/* Copy also in pair.c for GMAP */
static bool
@@ -1281,17 +1533,22 @@ print_substrings (Filestring_T fp, char *abbrev, Stage3end_T stage3end, Stage3en
Substring_queryend((Substring_T) List_head(p))));
if (hide_soft_clips_p == true) {
- print_cigar(fp,/*type*/'M',
- Substring_querystart(substring) + Substring_match_length(substring) +
- (querylength - Substring_queryend(substring)),/*querypos*/0,querylength,
- hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/true,/*trimlength*/0);
+ substring_start = Substring_querystart_orig(substring);
+ substring_length = Substring_match_length_orig(substring);
+ print_cigar_M(fp,substring,substring_length,substring_start,
+ Substring_querystart(substring) + Substring_match_length(substring) +
+ (querylength - Substring_queryend(substring)),/*querypos*/0,querylength,
+ hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/true,/*trimlength*/0);
} else {
+ substring_start = Substring_querystart(substring);
+ substring_length = Substring_match_length(substring);
print_cigar(fp,/*type*/'S',Substring_querystart(substring),
/*querypos*/0,querylength,hardclip_low,hardclip_high,
/*plusp*/true,/*lastp*/false,/*trimlength*/0);
- print_cigar(fp,/*type*/'M',Substring_match_length(substring),
- /*querypos*/Substring_querystart(substring),querylength,
- hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/false,/*trimlength*/0);
+ print_cigar_M(fp,substring,substring_length,substring_start,
+ Substring_match_length(substring),
+ /*querypos*/Substring_querystart(substring),querylength,
+ hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/false,/*trimlength*/0);
print_cigar(fp,/*type*/'S',querylength - Substring_queryend(substring),
/*querypos*/Substring_queryend(substring),querylength,
hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/true,/*trimlength*/0);
@@ -1307,18 +1564,23 @@ print_substrings (Filestring_T fp, char *abbrev, Stage3end_T stage3end, Stage3en
post_junction = (Junction_T) List_head(q);
if (hide_soft_clips_p == true) {
- print_cigar(fp,/*type*/'M',
- Substring_querystart(substring) +
- Substring_match_length(substring),
- /*querypos*/0,querylength,hardclip_low,hardclip_high,
- /*plusp*/true,/*lastp*/false,/*trimlength*/0);
+ substring_start = Substring_querystart_orig(substring);
+ substring_length = Substring_match_length_orig(substring);
+ print_cigar_M(fp,substring,substring_length,substring_start,
+ Substring_querystart(substring) +
+ Substring_match_length(substring),
+ /*querypos*/0,querylength,hardclip_low,hardclip_high,
+ /*plusp*/true,/*lastp*/false,/*trimlength*/0);
} else {
+ substring_start = Substring_querystart(substring);
+ substring_length = Substring_match_length(substring);
print_cigar(fp,/*type*/'S',Substring_querystart(substring),
/*querypos*/0,querylength,hardclip_low,hardclip_high,
/*plusp*/true,/*lastp*/false,/*trimlength*/0);
- print_cigar(fp,/*type*/'M',Substring_match_length(substring),
- /*querypos*/Substring_querystart(substring),querylength,
- hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/false,/*trimlength*/0);
+ print_cigar_M(fp,substring,substring_length,substring_start,
+ Substring_match_length(substring),
+ /*querypos*/Substring_querystart(substring),querylength,
+ hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/false,/*trimlength*/0);
}
p = List_next(p);
@@ -1345,15 +1607,19 @@ print_substrings (Filestring_T fp, char *abbrev, Stage3end_T stage3end, Stage3en
Substring_queryend((Substring_T) List_head(p))));
if (hide_soft_clips_p == true) {
- print_cigar(fp,/*type*/'M',
- Substring_match_length(substring) +
- (querylength - Substring_queryend(substring)),
- /*querypos*/Substring_querystart(substring),querylength,
- hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/true,/*trimlength*/0);
+ substring_start = Substring_querystart_orig(substring);
+ substring_length = Substring_match_length_orig(substring);
+ print_cigar_M(fp,substring,substring_length,substring_start,
+ Substring_match_length(substring) +
+ (querylength - Substring_queryend(substring)),
+ /*querypos*/Substring_querystart(substring),querylength,
+ hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/true,/*trimlength*/0);
} else {
- print_cigar(fp,/*type*/'M',Substring_match_length(substring),
- /*querypos*/Substring_querystart(substring),querylength,
- hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/false,/*trimlength*/0);
+ substring_start = Substring_querystart(substring);
+ substring_length = Substring_match_length(substring);
+ print_cigar_M(fp,substring,substring_length,substring_start,Substring_match_length(substring),
+ /*querypos*/Substring_querystart(substring),querylength,
+ hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/false,/*trimlength*/0);
print_cigar(fp,/*type*/'S',querylength - Substring_queryend(substring),
/*querypos*/Substring_queryend(substring),querylength,
hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/true,/*trimlength*/0);
@@ -1365,10 +1631,13 @@ print_substrings (Filestring_T fp, char *abbrev, Stage3end_T stage3end, Stage3en
/* Middle substring, plus */
debug(printf("Middle substring, plus %d..%d\n",Substring_querystart((Substring_T) List_head(p)),
Substring_queryend((Substring_T) List_head(p))));
+ substring_start = Substring_querystart(substring);
+ substring_length = Substring_match_length(substring);
- print_cigar(fp,/*type*/'M',Substring_match_length(substring),
- /*querypos*/Substring_querystart(substring),querylength,
- hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/false,/*trimlength*/0);
+ print_cigar_M(fp,substring,substring_length,substring_start,
+ Substring_match_length(substring),
+ /*querypos*/Substring_querystart(substring),querylength,
+ hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/false,/*trimlength*/0);
}
p = List_next(p);
}
@@ -1389,15 +1658,20 @@ print_substrings (Filestring_T fp, char *abbrev, Stage3end_T stage3end, Stage3en
debug(printf("Last substring, plus, hard-clipped %d..%d\n",Substring_querystart((Substring_T) List_head(p)),
Substring_queryend((Substring_T) List_head(p))));
if (hide_soft_clips_p == true) {
- print_cigar(fp,/*type*/'M',
- Substring_match_length(substring) +
- (querylength - Substring_queryend(substring)),
- /*querypos*/Substring_querystart(substring),querylength,
- hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/true,/*trimlength*/0);
+ substring_start = Substring_querystart_orig(substring);
+ substring_length = Substring_match_length_orig(substring);
+ print_cigar_M(fp,substring,substring_length,substring_start,
+ Substring_match_length(substring) +
+ (querylength - Substring_queryend(substring)),
+ /*querypos*/Substring_querystart(substring),querylength,
+ hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/true,/*trimlength*/0);
} else {
- print_cigar(fp,/*type*/'M',Substring_match_length(substring),
- /*querypos*/Substring_querystart(substring),querylength,
- hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/false,/*trimlength*/0);
+ substring_start = Substring_querystart(substring);
+ substring_length = Substring_match_length(substring);
+ print_cigar_M(fp,substring,substring_length,substring_start,
+ Substring_match_length(substring),
+ /*querypos*/Substring_querystart(substring),querylength,
+ hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/false,/*trimlength*/0);
print_cigar(fp,/*type*/'S',querylength - Substring_queryend(substring),
/*querypos*/Substring_queryend(substring),querylength,
hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/true,/*trimlength*/0);
@@ -1426,18 +1700,23 @@ print_substrings (Filestring_T fp, char *abbrev, Stage3end_T stage3end, Stage3en
Substring_queryend((Substring_T) List_head(p))));
if (hide_soft_clips_p == true) {
- print_cigar(fp,/*type*/'M',
- (querylength - Substring_queryend(substring)) +
- Substring_match_length(substring) + Substring_querystart(substring),
- /*querypos*/querylength,querylength,hardclip_low,hardclip_high,
- /*plusp*/false,/*lastp*/true,/*trimlength*/0);
+ substring_start = Substring_querystart_orig(substring);
+ substring_length = Substring_match_length_orig(substring);
+ print_cigar_M(fp,substring,substring_length,substring_start,
+ (querylength - Substring_queryend(substring)) +
+ Substring_match_length(substring) + Substring_querystart(substring),
+ /*querypos*/querylength,querylength,hardclip_low,hardclip_high,
+ /*plusp*/false,/*lastp*/true,/*trimlength*/0);
} else {
+ substring_start = Substring_querystart(substring);
+ substring_length = Substring_match_length(substring);
print_cigar(fp,/*type*/'S',querylength - Substring_queryend(substring),
/*querypos*/querylength,querylength,
hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/false,/*trimlength*/0);
- print_cigar(fp,/*type*/'M',Substring_match_length(substring),
- /*querypos*/Substring_queryend(substring),querylength,
- hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/false,/*trimlength*/0);
+ print_cigar_M(fp,substring,substring_length,substring_start,
+ Substring_match_length(substring),
+ /*querypos*/Substring_queryend(substring),querylength,
+ hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/false,/*trimlength*/0);
print_cigar(fp,/*type*/'S',Substring_querystart(substring),
/*querypos*/Substring_querystart(substring),querylength,
hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/true,/*trimlength*/0);
@@ -1453,18 +1732,23 @@ print_substrings (Filestring_T fp, char *abbrev, Stage3end_T stage3end, Stage3en
post_junction = (Junction_T) List_head(q);
if (hide_soft_clips_p == true) {
- print_cigar(fp,/*type*/'M',
- (querylength - Substring_queryend(substring)) +
- Substring_match_length(substring),
- /*querypos*/querylength,querylength,hardclip_low,hardclip_high,
- /*plusp*/false,/*lastp*/false,/*trimlength*/0);
+ substring_start = Substring_querystart_orig(substring);
+ substring_length = Substring_match_length_orig(substring);
+ print_cigar_M(fp,substring,substring_length,substring_start,
+ (querylength - Substring_queryend(substring)) +
+ Substring_match_length(substring),
+ /*querypos*/querylength,querylength,hardclip_low,hardclip_high,
+ /*plusp*/false,/*lastp*/false,/*trimlength*/0);
} else {
+ substring_start = Substring_querystart(substring);
+ substring_length = Substring_match_length(substring);
print_cigar(fp,/*type*/'S',querylength - Substring_queryend(substring),
/*querypos*/querylength,querylength,
hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/false,/*trimlength*/0);
- print_cigar(fp,/*type*/'M',Substring_match_length(substring),
- /*querypos*/Substring_queryend(substring),querylength,
- hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/false,/*trimlength*/0);
+ print_cigar_M(fp,substring,substring_length,substring_start,
+ Substring_match_length(substring),
+ /*querypos*/Substring_queryend(substring),querylength,
+ hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/false,/*trimlength*/0);
}
p = List_next(p);
@@ -1491,15 +1775,20 @@ print_substrings (Filestring_T fp, char *abbrev, Stage3end_T stage3end, Stage3en
Substring_queryend((Substring_T) List_head(p))));
if (hide_soft_clips_p == true) {
- print_cigar(fp,/*type*/'M',
- Substring_match_length(substring) +
- Substring_querystart(substring),
- /*querypos*/Substring_queryend(substring),querylength,
- hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/true,/*trimlength*/0);
+ substring_start = Substring_querystart_orig(substring);
+ substring_length = Substring_match_length_orig(substring);
+ print_cigar_M(fp,substring,substring_length,substring_start,
+ Substring_match_length(substring) +
+ Substring_querystart(substring),
+ /*querypos*/Substring_queryend(substring),querylength,
+ hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/true,/*trimlength*/0);
} else {
- print_cigar(fp,/*type*/'M',Substring_match_length(substring),
- /*querypos*/Substring_queryend(substring),querylength,
- hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/false,/*trimlength*/0);
+ substring_start = Substring_querystart(substring);
+ substring_length = Substring_match_length(substring);
+ print_cigar_M(fp,substring,substring_length,substring_start,
+ Substring_match_length(substring),
+ /*querypos*/Substring_queryend(substring),querylength,
+ hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/false,/*trimlength*/0);
print_cigar(fp,/*type*/'S',Substring_querystart(substring),
/*querypos*/Substring_querystart(substring),querylength,hardclip_low,hardclip_high,
/*plusp*/false,/*lastp*/true,/*trimlength*/0);
@@ -1511,10 +1800,13 @@ print_substrings (Filestring_T fp, char *abbrev, Stage3end_T stage3end, Stage3en
/* Middle substring, minus */
debug(printf("Middle substring, minus %d..%d\n",Substring_querystart((Substring_T) List_head(p)),
Substring_queryend((Substring_T) List_head(p))));
+ substring_start = Substring_querystart(substring);
+ substring_length = Substring_match_length(substring);
- print_cigar(fp,/*type*/'M',Substring_match_length(substring),
- /*querypos*/Substring_queryend(substring),querylength,
- hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/false,/*trimlength*/0);
+ print_cigar_M(fp,substring,substring_length,substring_start,
+ Substring_match_length(substring),
+ /*querypos*/Substring_queryend(substring),querylength,
+ hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/false,/*trimlength*/0);
}
p = List_next(p);
}
@@ -1536,15 +1828,20 @@ print_substrings (Filestring_T fp, char *abbrev, Stage3end_T stage3end, Stage3en
Substring_queryend((Substring_T) List_head(p))));
if (hide_soft_clips_p == true) {
- print_cigar(fp,/*type*/'M',
- Substring_match_length(substring) +
- Substring_querystart(substring),
- /*querypos*/Substring_queryend(substring),querylength,
- hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/true,/*trimlength*/0);
+ substring_start = Substring_querystart_orig(substring);
+ substring_length = Substring_match_length_orig(substring);
+ print_cigar_M(fp,substring,substring_length,substring_start,
+ Substring_match_length(substring) +
+ Substring_querystart(substring),
+ /*querypos*/Substring_queryend(substring),querylength,
+ hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/true,/*trimlength*/0);
} else {
- print_cigar(fp,/*type*/'M',Substring_match_length(substring),
- /*querypos*/Substring_queryend(substring),querylength,
- hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/false,/*trimlength*/0);
+ substring_start = Substring_querystart(substring);
+ substring_length = Substring_match_length(substring);
+ print_cigar_M(fp,substring,substring_length,substring_start,
+ Substring_match_length(substring),
+ /*querypos*/Substring_queryend(substring),querylength,
+ hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/false,/*trimlength*/0);
print_cigar(fp,/*type*/'S',Substring_querystart(substring),
/*querypos*/Substring_querystart(substring),querylength,hardclip_low,hardclip_high,
/*plusp*/false,/*lastp*/true,/*trimlength*/0);
@@ -2136,6 +2433,11 @@ print_substrings (Filestring_T fp, char *abbrev, Stage3end_T stage3end, Stage3en
FPRINTF(fp,"\tXG:Z:A");
}
+#if 0
+ /* 12. TAGS: XE (BLAST E-value) */
+ FPRINTF(fp,"\tXE:f:%.2g",Stage3end_min_evalue(stage3end));
+#endif
+
FPRINTF(fp,"\n");
return;
}
@@ -2269,7 +2571,8 @@ print_halfdonor (Filestring_T fp, char *abbrev, Substring_T donor, Substring_T a
if (sensep == true) {
- assert(Substring_siteD_pos(donor) == Substring_queryend(donor));
+ /* Doesn't hold for DNA-Seq chimeras */
+ /* assert(Substring_siteD_pos(donor) == Substring_queryend(donor)); */
if (plusp == true) {
/* sensep true, plusp true */
/* FPRINTF(fp,"donor sensep true, plusp true\n"); */
@@ -2328,7 +2631,8 @@ print_halfdonor (Filestring_T fp, char *abbrev, Substring_T donor, Substring_T a
}
} else {
- assert(Substring_siteD_pos(donor) == Substring_querystart(donor));
+ /* Doesn't hold for DNA-Seq chimeras */
+ /* assert(Substring_siteD_pos(donor) == Substring_querystart(donor)); */
if (plusp == true) {
/* sensep false, plusp true */
/* FPRINTF(fp,"donor sensep false, plusp true\n"); */
@@ -2588,8 +2892,11 @@ print_halfdonor (Filestring_T fp, char *abbrev, Substring_T donor, Substring_T a
FPRINTF(fp,"\tXO:Z:%s",abbrev);
/* 12. TAGS: XS */
- assert(donor_sensedir != SENSE_NULL);
- FPRINTF(fp,"\tXS:A:%c",donor_strand);
+ /* Doesn't hold for DNA-Seq chimeras */
+ /* assert(donor_sensedir != SENSE_NULL); */
+ if (donor_sensedir != SENSE_NULL) {
+ FPRINTF(fp,"\tXS:A:%c",donor_strand);
+ }
/* 12. TAGS: XA */
if ((start_ambig = Stage3end_start_ambiguous_p(this)) == true ||
@@ -2783,7 +3090,8 @@ print_halfacceptor (Filestring_T fp, char *abbrev, Substring_T donor, Substring_
if (sensep == true) {
- assert(Substring_siteA_pos(acceptor) == Substring_querystart(acceptor));
+ /* Doesn't hold for DNA-Seq chimeras */
+ /* assert(Substring_siteA_pos(acceptor) == Substring_querystart(acceptor)); */
if (plusp == true) {
/* sensep true, plusp true */
/* FPRINTF(fp,"acceptor sensep true, plusp true\n"); */
@@ -2837,7 +3145,8 @@ print_halfacceptor (Filestring_T fp, char *abbrev, Substring_T donor, Substring_
} else {
/* sensep false, plusp true */
- assert(Substring_siteA_pos(acceptor) == Substring_queryend(acceptor));
+ /* Doesn't hold for DNA-Seq chimeras */
+ /* assert(Substring_siteA_pos(acceptor) == Substring_queryend(acceptor)); */
if (plusp == true) {
/* FPRINTF(fp,"acceptor sensep false, plusp true\n"); */
if (hide_soft_clips_p == true) {
@@ -3094,8 +3403,11 @@ print_halfacceptor (Filestring_T fp, char *abbrev, Substring_T donor, Substring_
FPRINTF(fp,"\tXO:Z:%s",abbrev);
/* 12. TAGS: XS */
- assert(acceptor_sensedir != SENSE_NULL);
- FPRINTF(fp,"\tXS:A:%c",acceptor_strand);
+ /* Doesn't hold for DNA-Seq chimeras */
+ /* assert(acceptor_sensedir != SENSE_NULL); */
+ if (acceptor_sensedir != SENSE_NULL) {
+ FPRINTF(fp,"\tXS:A:%c",acceptor_strand);
+ }
/* 12. TAGS: XA */
if ((start_ambig = Stage3end_start_ambiguous_p(this)) == true ||
@@ -3257,7 +3569,12 @@ print_exon_exon (Filestring_T fp, char *abbrev, Stage3end_T this, Stage3end_T ma
donor_strand = '+';
}
} else {
- abort();
+ /* For DNA-Seq chimeras, SENSE_NULL is possible */
+ if (Substring_plusp(donor) == true) {
+ donor_strand = '+';
+ } else {
+ donor_strand = '-';
+ }
}
if ((acceptor_sensedir = Substring_sensedir(acceptor)) == SENSE_FORWARD) {
@@ -3273,7 +3590,12 @@ print_exon_exon (Filestring_T fp, char *abbrev, Stage3end_T this, Stage3end_T ma
acceptor_strand = '+';
}
} else {
- abort();
+ /* For DNA-Seq chimeras, SENSE_NULL is possible */
+ if (Substring_plusp(acceptor) == true) {
+ acceptor_strand = '+';
+ } else {
+ acceptor_strand = '-';
+ }
}
if (sensedir == SENSE_FORWARD) {
diff --git a/src/stage1hr.c b/src/stage1hr.c
index d5019a3..7178440 100644
--- a/src/stage1hr.c
+++ b/src/stage1hr.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: stage1hr.c 198074 2016-09-21 00:25:02Z twu $";
+static char rcsid[] = "$Id: stage1hr.c 199517 2016-10-24 23:55:08Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -14301,6 +14301,9 @@ compute_floors (bool *any_omitted_p, bool *alloc_floors_p, Floors_T *floors_arra
} else if (keep_floors_p == false) {
floors = Floors_new_standard(querylength,max_end_insertions,/*keep_floors_p*/false);
*alloc_floors_p = true;
+ } else if (querylength <= index1part) {
+ *alloc_floors_p = false;
+ return (Floors_T) NULL;
} else {
if (floors_array[querylength] == NULL) {
floors_array[querylength] = Floors_new_standard(querylength,max_end_insertions,/*keep_floors_p*/true);
diff --git a/src/stage3hr.c b/src/stage3hr.c
index 3b36519..f0e8170 100644
--- a/src/stage3hr.c
+++ b/src/stage3hr.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: stage3hr.c 198077 2016-09-21 00:34:35Z twu $";
+static char rcsid[] = "$Id: stage3hr.c 200238 2016-11-08 00:59:56Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -1173,6 +1173,21 @@ Stage3end_substring_containing (T this, int querypos) {
}
+double
+Stage3end_min_evalue (T this) {
+ double min_evalue = 1000.0, evalue;
+ Substring_T substring;
+ List_T p;
+
+ for (p = this->substrings_LtoH; p != NULL; p = List_next(p)) {
+ substring = (Substring_T) List_head(p);
+ if ((evalue = Substring_evalue(substring)) < min_evalue) {
+ min_evalue = evalue;
+ }
+ }
+
+ return min_evalue;
+}
struct Pair_T *
Stage3end_pairarray (T this) {
@@ -7223,6 +7238,10 @@ Stage3end_new_insertion (int *found_score, int nindels, int indel_pos, int nmism
queryend2 = querylength;
if (plusp == true) {
+ if (left + querylength < (Univcoord_T) nindels) {
+ return (T) NULL;
+ }
+
alignstart1 = left /*+ querystart1 (0)*/;
alignend1 = alignstart2 = left + /*queryend1*/indel_pos;
alignend2 = (left - nindels) + /*queryend2*/querylength;
@@ -7282,6 +7301,10 @@ Stage3end_new_insertion (int *found_score, int nindels, int indel_pos, int nmism
}
} else {
+ if (left + querylength < (Univcoord_T) (nindels + indel_pos)) {
+ return (T) NULL;
+ }
+
alignstart1 = (left - nindels) + (querylength /*- querystart (0)*/);
alignend1 = alignstart2 = (left - nindels) + (querylength - indel_pos);
alignend2 = left /* + (querylength - queryend)*/;
@@ -7527,6 +7550,10 @@ Stage3end_new_deletion (int *found_score, int nindels, int indel_pos, int nmisma
if (plusp == true) {
+ if (left + nindels + querylength >= chrhigh) {
+ return (T) NULL;
+ }
+
alignstart1 = left /*+ querystart1 (0)*/;
alignend1 = left + indel_pos;
alignstart2 = (left + nindels) + indel_pos;
@@ -7593,6 +7620,10 @@ Stage3end_new_deletion (int *found_score, int nindels, int indel_pos, int nmisma
}
} else {
+ if (left + querylength < (Univcoord_T) (nindels + indel_pos)) {
+ return (T) NULL;
+ }
+
alignstart1 = left + (querylength /*- querystart (0)*/);
alignend1 = left + (querylength - indel_pos);
alignstart2 = (left - nindels) + (querylength - indel_pos);
diff --git a/src/stage3hr.h b/src/stage3hr.h
index b7b6566..5c20725 100644
--- a/src/stage3hr.h
+++ b/src/stage3hr.h
@@ -1,4 +1,4 @@
-/* $Id: stage3hr.h 198078 2016-09-21 00:34:48Z twu $ */
+/* $Id: stage3hr.h 199475 2016-10-23 23:21:59Z twu $ */
#ifndef STAGE3HR_INCLUDED
#define STAGE3HR_INCLUDED
@@ -167,6 +167,8 @@ extern Substring_T
Stage3end_substring_low (T this, int hardclip_low);
extern Substring_T
Stage3end_substring_containing (T this, int querypos);
+extern double
+Stage3end_min_evalue (T this);
extern struct Pair_T *
Stage3end_pairarray (T this);
extern int
diff --git a/src/substring.c b/src/substring.c
index 64a3b29..90b4603 100644
--- a/src/substring.c
+++ b/src/substring.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: substring.c 198079 2016-09-21 00:35:41Z twu $";
+static char rcsid[] = "$Id: substring.c 199475 2016-10-23 23:21:59Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -4973,6 +4973,26 @@ blast_bitscore (int alignlength, int nmismatches) {
}
+double
+Substring_evalue (T substring) {
+ int alignlength_trim;
+
+#if 0
+ /* Doesn't work for ambiguous substrings */
+ if (substring->plusp == true) {
+ alignlength_trim = (int) (substring->alignend_trim - substring->alignstart_trim);
+ } else {
+ alignlength_trim = (int) (substring->alignstart_trim - substring->alignend_trim);
+ }
+ assert(alignlength_trim == substring->queryend - substring->querystart);
+#else
+ alignlength_trim = substring->queryend - substring->querystart;
+#endif
+
+ return blast_evalue(alignlength_trim,substring->nmismatches_bothdiff);
+}
+
+
void
Substring_print_m8 (Filestring_T fp, T substring, Shortread_T headerseq, char *acc_suffix,
char *chr, bool invertp) {
diff --git a/src/substring.h b/src/substring.h
index 4f6f570..ce4f283 100644
--- a/src/substring.h
+++ b/src/substring.h
@@ -1,4 +1,4 @@
-/* $Id: substring.h 198080 2016-09-21 00:35:55Z twu $ */
+/* $Id: substring.h 199475 2016-10-23 23:21:59Z twu $ */
#ifndef SUBSTRING_INCLUDED
#define SUBSTRING_INCLUDED
@@ -325,6 +325,9 @@ Substring_sort_siteN_halves (List_T hitlist, bool ascendingp);
extern Chrpos_T
Substring_compute_chrpos (T this, int hardclip_low, bool hide_soft_clips_p);
+extern double
+Substring_evalue (T substring);
+
extern void
Substring_print_m8 (Filestring_T fp, T substring, Shortread_T headerseq, char *acc_suffix,
char *chr, bool invertp);
diff --git a/src/uniqscan.c b/src/uniqscan.c
index d5c3fbe..e6c1d92 100644
--- a/src/uniqscan.c
+++ b/src/uniqscan.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: uniqscan.c 197391 2016-09-03 00:43:23Z twu $";
+static char rcsid[] = "$Id: uniqscan.c 200234 2016-11-08 00:56:52Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -1325,7 +1325,7 @@ main (int argc, char *argv[]) {
/*force_xs_direction_p*/false,/*md_lowercase_variant_p*/false,
/*snps_p*/snps_iit ? true : false,/*print_nsnpdiffs_p*/snps_iit ? true : false,
Univ_IIT_genomelength(chromosome_iit,/*with_circular_alias*/false),
- /*gff3_phase_swap_p*/false);
+ /*gff3_phase_swap_p*/false,/*cigar_extended_p*/false);
Stage3_setup(/*splicingp*/novelsplicingp == true || knownsplicingp == true,novelsplicingp,
/*require_splicedir_p*/false,splicing_iit,splicing_divint_crosstable,
donor_typeint,acceptor_typeint,splicesites,altlocp,alias_starts,alias_ends,
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/gmap.git
More information about the debian-med-commit
mailing list