[med-svn] [gmap] 01/08: Imported Upstream version 2014-11-25
Alex Mestiashvili
malex-guest at moszumanska.debian.org
Fri Mar 13 09:51:49 UTC 2015
This is an automated email from the git hooks/post-receive script.
malex-guest pushed a commit to branch master
in repository gmap.
commit 61cf1e5569567ac6a1eb07d1ad0601a4fac3b0fa
Author: Alexandre Mestiashvili <alex at biotec.tu-dresden.de>
Date: Wed Nov 26 10:11:33 2014 +0100
Imported Upstream version 2014-11-25
---
ChangeLog | 141 +++
Makefile.in | 3 +
README | 25 +-
VERSION | 2 +-
acinclude.m4 | 3 +
config/ax_ext.m4 | 4 +-
config/ax_mpi.m4 | 176 ++++
configure | 493 ++++++++++-
configure.ac | 51 +-
src/Makefile.in | 3 +
src/access.c | 42 +-
src/bitpack64-read.c | 30 +-
src/bitpack64-readtwo.c | 6 +-
src/bitpack64-write.c | 14 +-
src/bytecoding.c | 69 +-
src/compress-write.c | 13 +-
src/config.h.in | 3 +
src/genome-write.c | 131 +--
src/genome.c | 32 +-
src/genome_sites.c | 170 ++--
src/get-genome.c | 28 +-
src/gmap.c | 24 +-
src/gmapindex.c | 58 +-
src/gsnap.c | 32 +-
src/iit-read-univ.c | 107 ++-
src/iit-read.c | 200 +++--
src/iit-write-univ.c | 14 +-
src/iit_get.c | 6 +-
src/iit_store.c | 27 +-
src/indel.c | 10 +-
src/indexdb-write.c | 146 ++--
src/indexdb.c | 30 +-
src/indexdb_hr.c | 28 +-
src/mem.c | 62 +-
src/oligoindex_hr.c | 8 +-
src/outbuffer.c | 59 +-
src/pair.c | 706 ++++++++++-----
src/pair.h | 22 +-
src/sam_sort.c | 289 ++++++-
src/samflags.h | 27 +-
src/samheader.c | 4 +-
src/samprint.c | 2214 ++++++++++++++++++++++++++++++++++-------------
src/samprint.h | 9 +-
src/samread.c | 220 ++++-
src/samread.h | 13 +-
src/sarray-read.c | 830 ++++++++++++------
src/snpindex.c | 22 +-
src/splice.c | 38 +-
src/stage1hr.c | 821 ++++++++++--------
src/stage2.c | 22 +-
src/stage3.c | 17 +-
src/stage3hr.c | 1752 +++++++++++++++++++------------------
src/stage3hr.h | 15 +-
src/substring.c | 129 +--
src/substring.h | 14 +-
src/table.c | 20 +-
src/tableuint8.c | 21 +-
src/uniqscan.c | 9 +-
src/univinterval.c | 13 +-
tests/Makefile.in | 3 +
util/Makefile.in | 3 +
util/gmap_build.pl.in | 4 +-
62 files changed, 6326 insertions(+), 3161 deletions(-)
diff --git a/ChangeLog b/ChangeLog
index 178c15e..d56db72 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,144 @@
+2014-11-26 twu
+
+ * README: Added comment about sam_sort and --split-output
+
+2014-11-25 twu
+
+ * sam_sort.c, samflags.h, samread.c, samread.h: Added --split-output and
+ --append-output options
+
+ * outbuffer.c: Changed abbrev NM in comment
+
+ * stage1hr.c: Changed calculation of amb_nmatches to amb_length
+
+ * stage3hr.c: Swapping ilength_low and ilength_high for GMAP when alignment
+ is minus
+
+ * stage1hr.c: Turning off debugging
+
+ * bootstrap.gsnaptoo: Running automake to add missing files
+
+ * trunk, util: Merged revisions 153682 to 154020 from
+ branches/2014-11-20-redo-overlap to compute overlap better using ilength53
+ and ilength35 and a common shift
+
+ * outbuffer.c, pair.c, pair.h, samprint.c, samprint.h, src, stage3hr.c,
+ stage3hr.h, substring.c, substring.h: Merged revisions 153682 to 154020
+ from branches/2014-11-20-redo-overlap to compute overlap better using
+ ilength53 and ilength35 and a common shift
+
+ * sarray-read.c: Merged revisions 153682 to 154020 to handle ambiguous
+ splicing better
+
+2014-11-24 twu
+
+ * INSTALL, VERSION, config.site.rescomp.prd, index.html, trunk: No longer
+ keeping track of INSTALL
+
+ * config.guess, config.sub, ltmain.sh: No longer keeping track of
+ config.guess, config.sub, or ltmain.sh
+
+ * gmap_build.pl.in: Added comment about meaning of -D flag
+
+ * acinclude.m4, configure.ac: Adding check for MPI
+
+ * ax_mpi.m4: Added code for MPI
+
+ * access.c, bitpack64-read.c, bitpack64-readtwo.c, bitpack64-write.c,
+ compress-write.c, genome-write.c, genome.c, genome128-write.c,
+ genome128.c, genome_sites.c, get-genome.c, gmapindex.c, iit-read-univ.c,
+ iit-read.c, iit_get.c, iit_store.c, indel.c, indexdb-write.c, indexdb.c,
+ indexdb_hr.c, mem.c, oligoindex_hr.c, sam_sort.c, samheader.c, snpindex.c,
+ src, stage1hr.c, stage3.c, table.c, tableuint8.c, uniqscan.c,
+ univinterval.c: Merged revisions 153114 to 153944 from
+ branches/2014-11-12-make-check-i386 to make tests work in i386 computers
+
+ * stage3hr.c: Not using ambiguous splices to update found_score
+
+ * stage2.c: Removed adjacentp as unused variables
+
+ * samprint.c, samprint.h: For circular alignments, checking for sole HS
+ pattern. Also checking for chrpos > chrlength, and subtracting chrlength
+ if necessary.
+
+ * pair.c, pair.h: Added Cigar_action_T. Added Pair_check_cigar. Removed
+ prev as an unused variable.
+
+ * iit-write-univ.c: Handling the case if total_nintervals is 0
+
+ * gmap.c, gsnap.c: Added --action-if-cigar-error
+
+2014-11-18 twu
+
+ * bytecoding.c: Removed unused variable
+
+ * gmapindex.c: Printing genome length to stderr
+
+ * bytecoding.c: Using a buffer of 10,000,000 block-sizes, and writing
+ iteratively, rather than a single buffer and single write.
+
+2014-11-13 twu
+
+ * gmapindex.c: Made some changes in casting. Fixed printf format to use
+ %llu.
+
+ * stage3hr.c: Renamed amb_nmatches to amb_length.
+
+ * stage3hr.h: Renamed amb_nmatches to amb_length.
+
+ * samprint.c: In adjust_hardclips, not changing hardclips if shift downward
+ fails. Renamed amb_nmatches to amb_length.
+
+ * splice.c: Renamed amb_nmatches to amb_length. Providing
+ Substring_match_length_orig to amb_length in Stage3end_new_splice and
+ Stage3end_new_shortexon
+
+ * stage1hr.c: Renamed amb_nmatches to amb_length. Providing
+ Substring_match_length_orig to amb_length in Stage3end_new_splice and
+ Stage3end_new_shortexon
+
+2014-10-31 twu
+
+ * iit-read-univ.c, iit-read.c: Using %llu and casting to (long long int) for
+ printing offset and filesize
+
+ * gmap.c, gsnap.c: Using %zu for printing results of sizeof().
+
+2014-10-29 twu
+
+ * stage3hr.c: Restoring revision of SAM insertlength for ends involving GMAP
+ when method is successful
+
+ * stage3hr.c: Fixed SAM output of insert length of 0 when no overlap is
+ found in a GMAP alignment
+
+ * stage3.c: Added debugging statements
+
+ * gmap.c, gmapindex.c, gsnap.c: Added output statement at end of checking
+ compiler assumptions
+
+ * README: Added comment about change from PG: to XG:
+
+ * ax_ext.m4, configure.ac: Added option to enable or disable sse4.2
+
+ * samprint.c: Fixed typo in adjust_hardclips. Also, when querypos increase
+ fails, trying querypos decrease.
+
+ * samprint.c: Fixed infinite loop in adjust_hardclips
+
+2014-10-28 twu
+
+ * stage3hr.c: Fixed bug with uninitialized variables
+
+ * outbuffer.c: Fixing potential data race as noted by valgrind for
+ this->ntotal between input and output threads, although not problematic
+ before, because this->ntotal increases monotonically
+
+2014-10-27 twu
+
+ * stage3hr.c: Fixed computation of overlap between GMAP and non-GMAP
+ alignments
+
2014-10-22 twu
* gregion.c: Checking size before deciding to use alloca or malloc.
diff --git a/Makefile.in b/Makefile.in
index 3e7cc85..02f38ca 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -51,6 +51,7 @@ am__aclocal_m4_deps = $(top_srcdir)/config/libtool.m4 \
$(top_srcdir)/config/mmap-flags.m4 \
$(top_srcdir)/config/acx_mmap_fixed.m4 \
$(top_srcdir)/config/acx_mmap_variable.m4 \
+ $(top_srcdir)/config/ax_mpi.m4 \
$(top_srcdir)/config/acx_pthread.m4 \
$(top_srcdir)/config/builtin-popcount.m4 \
$(top_srcdir)/config/struct-stat64.m4 \
@@ -169,6 +170,8 @@ LTLIBOBJS = @LTLIBOBJS@
MAKEINFO = @MAKEINFO@
MAX_READLENGTH = @MAX_READLENGTH@
MKDIR_P = @MKDIR_P@
+MPICC = @MPICC@
+MPILIBS = @MPILIBS@
NM = @NM@
NMEDIT = @NMEDIT@
OBJDUMP = @OBJDUMP@
diff --git a/README b/README
index 56880f2..98274ba 100644
--- a/README
+++ b/README
@@ -697,8 +697,13 @@ given read. If there is only a single alignment, this value is 0.
XO: Output type. GSNAP categorizes its alignments into output types,
as follows. Note that the --split-output option will create separate
-output files for each output type, and the filename suffixes are shown
-below for each output type:
+output files for each output type. Alternatively, if you use
+sam_sort, you should provide --split-output to that program instead
+and achieve the same functionality. (The reason for this is that
+there may be situations where GSNAP assigns different output types to
+the first and second ends of the reads and sam_sort needs to see
+alignments from both ends together.) In either case, the output types
+have the following meanings and filename suffixes:
NM (nomapping) (filename suffix ".nomapping"): The entire read
(single-end or paired-end) could not be aligned. If the
@@ -856,7 +861,9 @@ NM.
XG: Indicates which method within GSNAP generated the alignment. A:
suffix array method, T: terminal alignment, M: GMAP method, O: merging
of overlaps. Absence of XG flag indicates the standard GSNAP hash
-table method.
+table method. (Note: older versions of GSNAP used "PG:", but some
+downstream software required all PG methods to be listed in the header
+section, so we changed the field name to "XG:")
@@ -1416,3 +1423,15 @@ stranded flag is for laboratory protocols that allow only the 5'-to-3'
RNA, or sense, reads, and the non-stranded flag is for laboratory
protocols that allow both sense and antisense reads.
+
+16. Post-processing of SAM output
+==================================
+
+This package includes a program called sam_sort, which can help sort
+and mark duplicates of the SAM output efficiently. (More
+documentation to follow.)
+
+
+
+
+
diff --git a/VERSION b/VERSION
index 0fab9d5..7cc4453 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-2014-10-22
\ No newline at end of file
+2014-11-25
\ No newline at end of file
diff --git a/acinclude.m4 b/acinclude.m4
index a3fe8b2..3bbc4c4 100644
--- a/acinclude.m4
+++ b/acinclude.m4
@@ -7,7 +7,10 @@ m4_include([config/madvise-flags.m4])
m4_include([config/mmap-flags.m4])
m4_include([config/acx_mmap_fixed.m4])
m4_include([config/acx_mmap_variable.m4])
+
+m4_include([config/ax_mpi.m4])
m4_include([config/acx_pthread.m4])
+
m4_include([config/builtin-popcount.m4])
m4_include([config/struct-stat64.m4])
m4_include([config/expand.m4])
diff --git a/config/ax_ext.m4 b/config/ax_ext.m4
index 170a8c7..5a0988b 100644
--- a/config/ax_ext.m4
+++ b/config/ax_ext.m4
@@ -136,8 +136,10 @@ AC_DEFUN([AX_EXT],
[
ax_cv_cpu_have_sse42_ext=no
if test "$((0x$ecx>>20&0x01))" = 1; then
- ax_cv_cpu_have_sse42_ext=yes
ax_cv_cpu_features="$ax_cv_cpu_features sse4.2"
+ if test "$ax_cv_want_sse42_ext" = yes; then
+ ax_cv_cpu_have_sse42_ext=yes
+ fi
fi
])
diff --git a/config/ax_mpi.m4 b/config/ax_mpi.m4
new file mode 100755
index 0000000..5b2322c
--- /dev/null
+++ b/config/ax_mpi.m4
@@ -0,0 +1,176 @@
+# ===========================================================================
+# http://www.gnu.org/software/autoconf-archive/ax_mpi.html
+# ===========================================================================
+#
+# SYNOPSIS
+#
+# AX_MPI([ACTION-IF-FOUND[, ACTION-IF-NOT-FOUND]])
+#
+# DESCRIPTION
+#
+# This macro tries to find out how to compile programs that use MPI
+# (Message Passing Interface), a standard API for parallel process
+# communication (see http://www-unix.mcs.anl.gov/mpi/)
+#
+# On success, it sets the MPICC, MPICXX, MPIF77, or MPIFC output variable
+# to the name of the MPI compiler, depending upon the current language.
+# (This may just be $CC/$CXX/$F77/$FC, but is more often something like
+# mpicc/mpiCC/mpif77/mpif90.) It also sets MPILIBS to any libraries that
+# are needed for linking MPI (e.g. -lmpi or -lfmpi, if a special
+# MPICC/MPICXX/MPIF77/MPIFC was not found).
+#
+# Note that this macro should be used only if you just have a few source
+# files that need to be compiled using MPI. In particular, you should
+# neither overwrite CC/CXX/F77/FC with the values of
+# MPICC/MPICXX/MPIF77/MPIFC, nor assume that you can use the same flags
+# etc. as the standard compilers. If you want to compile a whole program
+# using the MPI compiler commands, use one of the macros
+# AX_PROG_{CC,CXX,FC}_MPI.
+#
+# ACTION-IF-FOUND is a list of shell commands to run if an MPI library is
+# found, and ACTION-IF-NOT-FOUND is a list of commands to run if it is not
+# found. If ACTION-IF-FOUND is not specified, the default action will
+# define HAVE_MPI.
+#
+# LICENSE
+#
+# Copyright (c) 2008 Steven G. Johnson <stevenj at alum.mit.edu>
+# Copyright (c) 2008 Julian C. Cummings <cummings at cacr.caltech.edu>
+#
+# This program is free software: you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the
+# Free Software Foundation, either version 3 of the License, or (at your
+# option) any later version.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+# Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program. If not, see <http://www.gnu.org/licenses/>.
+#
+# As a special exception, the respective Autoconf Macro's copyright owner
+# gives unlimited permission to copy, distribute and modify the configure
+# scripts that are the output of Autoconf when processing the Macro. You
+# need not follow the terms of the GNU General Public License when using
+# or distributing such scripts, even though portions of the text of the
+# Macro appear in them. The GNU General Public License (GPL) does govern
+# all other use of the material that constitutes the Autoconf Macro.
+#
+# This special exception to the GPL applies to versions of the Autoconf
+# Macro released by the Autoconf Archive. When you make and distribute a
+# modified version of the Autoconf Macro, you may extend this special
+# exception to the GPL to apply to your modified version as well.
+
+#serial 8
+
+AU_ALIAS([ACX_MPI], [AX_MPI])
+AC_DEFUN([AX_MPI], [
+AC_PREREQ(2.50) dnl for AC_LANG_CASE
+
+AC_LANG_CASE([C], [
+ AC_REQUIRE([AC_PROG_CC])
+ AC_ARG_VAR(MPICC,[MPI C compiler command])
+ AC_CHECK_PROGS(MPICC, mpicc hcc mpxlc_r mpxlc mpcc cmpicc, $CC)
+ ax_mpi_save_CC="$CC"
+ CC="$MPICC"
+ AC_SUBST(MPICC)
+],
+[C++], [
+ AC_REQUIRE([AC_PROG_CXX])
+ AC_ARG_VAR(MPICXX,[MPI C++ compiler command])
+ AC_CHECK_PROGS(MPICXX, mpic++ mpicxx mpiCC hcp mpxlC_r mpxlC mpCC cmpic++, $CXX)
+ ax_mpi_save_CXX="$CXX"
+ CXX="$MPICXX"
+ AC_SUBST(MPICXX)
+],
+[Fortran 77], [
+ AC_REQUIRE([AC_PROG_F77])
+ AC_ARG_VAR(MPIF77,[MPI Fortran 77 compiler command])
+ AC_CHECK_PROGS(MPIF77, mpif77 hf77 mpxlf_r mpxlf mpf77 cmpifc, $F77)
+ ax_mpi_save_F77="$F77"
+ F77="$MPIF77"
+ AC_SUBST(MPIF77)
+],
+[Fortran], [
+ AC_REQUIRE([AC_PROG_FC])
+ AC_ARG_VAR(MPIFC,[MPI Fortran compiler command])
+ AC_CHECK_PROGS(MPIFC, mpif90 mpxlf95_r mpxlf90_r mpxlf95 mpxlf90 mpf90 cmpif90c, $FC)
+ ax_mpi_save_FC="$FC"
+ FC="$MPIFC"
+ AC_SUBST(MPIFC)
+])
+
+if test x = x"$MPILIBS"; then
+ AC_LANG_CASE([C], [AC_CHECK_FUNC(MPI_Init, [MPILIBS=" "])],
+ [C++], [AC_CHECK_FUNC(MPI_Init, [MPILIBS=" "])],
+ [Fortran 77], [AC_MSG_CHECKING([for MPI_Init])
+ AC_LINK_IFELSE([AC_LANG_PROGRAM([],[ call MPI_Init])],[MPILIBS=" "
+ AC_MSG_RESULT(yes)], [AC_MSG_RESULT(no)])],
+ [Fortran], [AC_MSG_CHECKING([for MPI_Init])
+ AC_LINK_IFELSE([AC_LANG_PROGRAM([],[ call MPI_Init])],[MPILIBS=" "
+ AC_MSG_RESULT(yes)], [AC_MSG_RESULT(no)])])
+fi
+AC_LANG_CASE([Fortran 77], [
+ if test x = x"$MPILIBS"; then
+ AC_CHECK_LIB(fmpi, MPI_Init, [MPILIBS="-lfmpi"])
+ fi
+ if test x = x"$MPILIBS"; then
+ AC_CHECK_LIB(fmpich, MPI_Init, [MPILIBS="-lfmpich"])
+ fi
+],
+[Fortran], [
+ if test x = x"$MPILIBS"; then
+ AC_CHECK_LIB(fmpi, MPI_Init, [MPILIBS="-lfmpi"])
+ fi
+ if test x = x"$MPILIBS"; then
+ AC_CHECK_LIB(mpichf90, MPI_Init, [MPILIBS="-lmpichf90"])
+ fi
+])
+if test x = x"$MPILIBS"; then
+ AC_CHECK_LIB(mpi, MPI_Init, [MPILIBS="-lmpi"])
+fi
+if test x = x"$MPILIBS"; then
+ AC_CHECK_LIB(mpich, MPI_Init, [MPILIBS="-lmpich"])
+fi
+
+dnl We have to use AC_TRY_COMPILE and not AC_CHECK_HEADER because the
+dnl latter uses $CPP, not $CC (which may be mpicc).
+AC_LANG_CASE([C], [if test x != x"$MPILIBS"; then
+ AC_MSG_CHECKING([for mpi.h])
+ AC_TRY_COMPILE([#include <mpi.h>],[],[AC_MSG_RESULT(yes)], [MPILIBS=""
+ AC_MSG_RESULT(no)])
+fi],
+[C++], [if test x != x"$MPILIBS"; then
+ AC_MSG_CHECKING([for mpi.h])
+ AC_TRY_COMPILE([#include <mpi.h>],[],[AC_MSG_RESULT(yes)], [MPILIBS=""
+ AC_MSG_RESULT(no)])
+fi],
+[Fortran 77], [if test x != x"$MPILIBS"; then
+ AC_MSG_CHECKING([for mpif.h])
+ AC_COMPILE_IFELSE([AC_LANG_PROGRAM([],[ include 'mpif.h'])],[AC_MSG_RESULT(yes)], [MPILIBS=""
+ AC_MSG_RESULT(no)])
+fi],
+[Fortran], [if test x != x"$MPILIBS"; then
+ AC_MSG_CHECKING([for mpif.h])
+ AC_COMPILE_IFELSE([AC_LANG_PROGRAM([],[ include 'mpif.h'])],[AC_MSG_RESULT(yes)], [MPILIBS=""
+ AC_MSG_RESULT(no)])
+fi])
+
+AC_LANG_CASE([C], [CC="$ax_mpi_save_CC"],
+ [C++], [CXX="$ax_mpi_save_CXX"],
+ [Fortran 77], [F77="$ax_mpi_save_F77"],
+ [Fortran], [FC="$ax_mpi_save_FC"])
+
+AC_SUBST(MPILIBS)
+
+# Finally, execute ACTION-IF-FOUND/ACTION-IF-NOT-FOUND:
+if test x = x"$MPILIBS"; then
+ $2
+ :
+else
+ ifelse([$1],,[AC_DEFINE(HAVE_MPI,1,[Define if you have the MPI library.])],[$1])
+ :
+fi
+])dnl AX_MPI
diff --git a/configure b/configure
index 290e333..ace2d5f 100755
--- a/configure
+++ b/configure
@@ -1,6 +1,6 @@
#! /bin/sh
# Guess values for system-dependent variables and create Makefiles.
-# Generated by GNU Autoconf 2.63 for gmap 2014-10-22.
+# Generated by GNU Autoconf 2.63 for gmap 2014-11-25.
#
# Report bugs to <Thomas Wu <twu at gene.com>>.
#
@@ -745,8 +745,8 @@ SHELL=${CONFIG_SHELL-/bin/sh}
# Identity of this package.
PACKAGE_NAME='gmap'
PACKAGE_TARNAME='gmap'
-PACKAGE_VERSION='2014-10-22'
-PACKAGE_STRING='gmap 2014-10-22'
+PACKAGE_VERSION='2014-11-25'
+PACKAGE_STRING='gmap 2014-11-25'
PACKAGE_BUGREPORT='Thomas Wu <twu at gene.com>'
ac_unique_file="src/gmap.c"
@@ -824,6 +824,8 @@ EGREP
GREP
SED
LIBTOOL
+MPILIBS
+MPICC
PERL
BINDIR
MAINTAINER_FALSE
@@ -938,6 +940,7 @@ enable_builtin_popcount
enable_sse2
enable_ssse3
enable_sse4_1
+enable_sse4_2
enable_avx
enable_avx2
enable_simd
@@ -954,6 +957,7 @@ CC
LDFLAGS
LIBS
CPPFLAGS
+MPICC
CPP
MAX_READLENGTH'
@@ -1508,7 +1512,7 @@ if test "$ac_init_help" = "long"; then
# Omit some internal or obsolete options to make the list less imposing.
# This message is too long to be a string in the A/UX 3.1 sh.
cat <<_ACEOF
-\`configure' configures gmap 2014-10-22 to adapt to many kinds of systems.
+\`configure' configures gmap 2014-11-25 to adapt to many kinds of systems.
Usage: $0 [OPTION]... [VAR=VALUE]...
@@ -1579,7 +1583,7 @@ fi
if test -n "$ac_init_help"; then
case $ac_init_help in
- short | recursive ) echo "Configuration of gmap 2014-10-22:";;
+ short | recursive ) echo "Configuration of gmap 2014-11-25:";;
esac
cat <<\_ACEOF
@@ -1612,8 +1616,10 @@ Optional Features:
(default=yes). Requires that sse2 be enabled.
--enable-sse4.1 Enable sse4.1 simd commands if they compile and run
(default=yes). Requires that ssse3 be enabled.
- --enable-avx Enable avx simd commands if they compile and run
+ --enable-sse4.2 Enable sse4.2 simd commands if they compile and run
(default=yes). Requires that sse4.1 be enabled.
+ --enable-avx Enable avx simd commands if they compile and run
+ (default=yes). Requires that sse4.2 be enabled.
--enable-avx2 Enable avx2 simd commands if they compile and run
(default=yes). Requires that avx be enabled.
--enable-simd Enable simd commands in general if they compile and
@@ -1641,6 +1647,7 @@ Some influential environment variables:
LIBS libraries to pass to the linker, e.g. -l<library>
CPPFLAGS C/C++/Objective C preprocessor flags, e.g. -I<include dir> if
you have headers in a nonstandard directory <include dir>
+ MPICC MPI C compiler command
CPP C preprocessor
MAX_READLENGTH
Maximum read length for GSNAP (default 300)
@@ -1711,7 +1718,7 @@ fi
test -n "$ac_init_help" && exit $ac_status
if $ac_init_version; then
cat <<\_ACEOF
-gmap configure 2014-10-22
+gmap configure 2014-11-25
generated by GNU Autoconf 2.63
Copyright (C) 1992, 1993, 1994, 1995, 1996, 1998, 1999, 2000, 2001,
@@ -1725,7 +1732,7 @@ cat >config.log <<_ACEOF
This file contains any messages produced by compilers while
running configure, to aid debugging if configure makes a mistake.
-It was created by gmap $as_me 2014-10-22, which was
+It was created by gmap $as_me 2014-11-25, which was
generated by GNU Autoconf 2.63. Invocation command line was
$ $0 $@
@@ -2095,8 +2102,8 @@ ac_compiler_gnu=$ac_cv_c_compiler_gnu
{ $as_echo "$as_me:$LINENO: checking package version" >&5
$as_echo_n "checking package version... " >&6; }
-{ $as_echo "$as_me:$LINENO: result: 2014-10-22" >&5
-$as_echo "2014-10-22" >&6; }
+{ $as_echo "$as_me:$LINENO: result: 2014-11-25" >&5
+$as_echo "2014-11-25" >&6; }
### Read defaults
@@ -4147,7 +4154,7 @@ fi
# Define the identity of the package.
PACKAGE=gmap
- VERSION=2014-10-22
+ VERSION=2014-11-25
cat >>confdefs.h <<_ACEOF
@@ -4452,6 +4459,7 @@ $as_echo "not found" >&6; }
+
ac_ext=c
ac_cpp='$CPP $CPPFLAGS'
ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
@@ -5267,6 +5275,362 @@ if test "$am_t" != yes; then
fi
+
+
+
+
+
+ for ac_prog in mpicc hcc mpxlc_r mpxlc mpcc cmpicc
+do
+ # Extract the first word of "$ac_prog", so it can be a program name with args.
+set dummy $ac_prog; ac_word=$2
+{ $as_echo "$as_me:$LINENO: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if test "${ac_cv_prog_MPICC+set}" = set; then
+ $as_echo_n "(cached) " >&6
+else
+ if test -n "$MPICC"; then
+ ac_cv_prog_MPICC="$MPICC" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+ IFS=$as_save_IFS
+ test -z "$as_dir" && as_dir=.
+ for ac_exec_ext in '' $ac_executable_extensions; do
+ if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+ ac_cv_prog_MPICC="$ac_prog"
+ $as_echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
+ break 2
+ fi
+done
+done
+IFS=$as_save_IFS
+
+fi
+fi
+MPICC=$ac_cv_prog_MPICC
+if test -n "$MPICC"; then
+ { $as_echo "$as_me:$LINENO: result: $MPICC" >&5
+$as_echo "$MPICC" >&6; }
+else
+ { $as_echo "$as_me:$LINENO: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+
+ test -n "$MPICC" && break
+done
+test -n "$MPICC" || MPICC="$CC"
+
+ ax_mpi_save_CC="$CC"
+ CC="$MPICC"
+
+
+
+if test x = x"$MPILIBS"; then
+ { $as_echo "$as_me:$LINENO: checking for MPI_Init" >&5
+$as_echo_n "checking for MPI_Init... " >&6; }
+if test "${ac_cv_func_MPI_Init+set}" = set; then
+ $as_echo_n "(cached) " >&6
+else
+ cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h. */
+/* Define MPI_Init to an innocuous variant, in case <limits.h> declares MPI_Init.
+ For example, HP-UX 11i <limits.h> declares gettimeofday. */
+#define MPI_Init innocuous_MPI_Init
+
+/* System header to define __stub macros and hopefully few prototypes,
+ which can conflict with char MPI_Init (); below.
+ Prefer <limits.h> to <assert.h> if __STDC__ is defined, since
+ <limits.h> exists even on freestanding compilers. */
+
+#ifdef __STDC__
+# include <limits.h>
+#else
+# include <assert.h>
+#endif
+
+#undef MPI_Init
+
+/* Override any GCC internal prototype to avoid an error.
+ Use char because int might match the return type of a GCC
+ builtin and then its argument prototype would still apply. */
+#ifdef __cplusplus
+extern "C"
+#endif
+char MPI_Init ();
+/* The GNU C library defines this for functions which it implements
+ to always fail with ENOSYS. Some functions are actually named
+ something starting with __ and the normal name is an alias. */
+#if defined __stub_MPI_Init || defined __stub___MPI_Init
+choke me
+#endif
+
+int
+main ()
+{
+return MPI_Init ();
+ ;
+ return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\""
+$as_echo "$ac_try_echo") >&5
+ (eval "$ac_link") 2>conftest.er1
+ ac_status=$?
+ grep -v '^ *+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } && {
+ test -z "$ac_c_werror_flag" ||
+ test ! -s conftest.err
+ } && test -s conftest$ac_exeext && {
+ test "$cross_compiling" = yes ||
+ $as_test_x conftest$ac_exeext
+ }; then
+ ac_cv_func_MPI_Init=yes
+else
+ $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+ ac_cv_func_MPI_Init=no
+fi
+
+rm -rf conftest.dSYM
+rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \
+ conftest$ac_exeext conftest.$ac_ext
+fi
+{ $as_echo "$as_me:$LINENO: result: $ac_cv_func_MPI_Init" >&5
+$as_echo "$ac_cv_func_MPI_Init" >&6; }
+if test "x$ac_cv_func_MPI_Init" = x""yes; then
+ MPILIBS=" "
+fi
+
+fi
+
+if test x = x"$MPILIBS"; then
+ { $as_echo "$as_me:$LINENO: checking for MPI_Init in -lmpi" >&5
+$as_echo_n "checking for MPI_Init in -lmpi... " >&6; }
+if test "${ac_cv_lib_mpi_MPI_Init+set}" = set; then
+ $as_echo_n "(cached) " >&6
+else
+ ac_check_lib_save_LIBS=$LIBS
+LIBS="-lmpi $LIBS"
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h. */
+
+/* Override any GCC internal prototype to avoid an error.
+ Use char because int might match the return type of a GCC
+ builtin and then its argument prototype would still apply. */
+#ifdef __cplusplus
+extern "C"
+#endif
+char MPI_Init ();
+int
+main ()
+{
+return MPI_Init ();
+ ;
+ return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\""
+$as_echo "$ac_try_echo") >&5
+ (eval "$ac_link") 2>conftest.er1
+ ac_status=$?
+ grep -v '^ *+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } && {
+ test -z "$ac_c_werror_flag" ||
+ test ! -s conftest.err
+ } && test -s conftest$ac_exeext && {
+ test "$cross_compiling" = yes ||
+ $as_test_x conftest$ac_exeext
+ }; then
+ ac_cv_lib_mpi_MPI_Init=yes
+else
+ $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+ ac_cv_lib_mpi_MPI_Init=no
+fi
+
+rm -rf conftest.dSYM
+rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \
+ conftest$ac_exeext conftest.$ac_ext
+LIBS=$ac_check_lib_save_LIBS
+fi
+{ $as_echo "$as_me:$LINENO: result: $ac_cv_lib_mpi_MPI_Init" >&5
+$as_echo "$ac_cv_lib_mpi_MPI_Init" >&6; }
+if test "x$ac_cv_lib_mpi_MPI_Init" = x""yes; then
+ MPILIBS="-lmpi"
+fi
+
+fi
+if test x = x"$MPILIBS"; then
+ { $as_echo "$as_me:$LINENO: checking for MPI_Init in -lmpich" >&5
+$as_echo_n "checking for MPI_Init in -lmpich... " >&6; }
+if test "${ac_cv_lib_mpich_MPI_Init+set}" = set; then
+ $as_echo_n "(cached) " >&6
+else
+ ac_check_lib_save_LIBS=$LIBS
+LIBS="-lmpich $LIBS"
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h. */
+
+/* Override any GCC internal prototype to avoid an error.
+ Use char because int might match the return type of a GCC
+ builtin and then its argument prototype would still apply. */
+#ifdef __cplusplus
+extern "C"
+#endif
+char MPI_Init ();
+int
+main ()
+{
+return MPI_Init ();
+ ;
+ return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\""
+$as_echo "$ac_try_echo") >&5
+ (eval "$ac_link") 2>conftest.er1
+ ac_status=$?
+ grep -v '^ *+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } && {
+ test -z "$ac_c_werror_flag" ||
+ test ! -s conftest.err
+ } && test -s conftest$ac_exeext && {
+ test "$cross_compiling" = yes ||
+ $as_test_x conftest$ac_exeext
+ }; then
+ ac_cv_lib_mpich_MPI_Init=yes
+else
+ $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+ ac_cv_lib_mpich_MPI_Init=no
+fi
+
+rm -rf conftest.dSYM
+rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \
+ conftest$ac_exeext conftest.$ac_ext
+LIBS=$ac_check_lib_save_LIBS
+fi
+{ $as_echo "$as_me:$LINENO: result: $ac_cv_lib_mpich_MPI_Init" >&5
+$as_echo "$ac_cv_lib_mpich_MPI_Init" >&6; }
+if test "x$ac_cv_lib_mpich_MPI_Init" = x""yes; then
+ MPILIBS="-lmpich"
+fi
+
+fi
+
+if test x != x"$MPILIBS"; then
+ { $as_echo "$as_me:$LINENO: checking for mpi.h" >&5
+$as_echo_n "checking for mpi.h... " >&6; }
+ cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h. */
+#include <mpi.h>
+int
+main ()
+{
+
+ ;
+ return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\""
+$as_echo "$ac_try_echo") >&5
+ (eval "$ac_compile") 2>conftest.er1
+ ac_status=$?
+ grep -v '^ *+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } && {
+ test -z "$ac_c_werror_flag" ||
+ test ! -s conftest.err
+ } && test -s conftest.$ac_objext; then
+ { $as_echo "$as_me:$LINENO: result: yes" >&5
+$as_echo "yes" >&6; }
+else
+ $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+ MPILIBS=""
+ { $as_echo "$as_me:$LINENO: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+
+CC="$ax_mpi_save_CC"
+
+
+
+# Finally, execute ACTION-IF-FOUND/ACTION-IF-NOT-FOUND:
+if test x = x"$MPILIBS"; then
+
+ :
+else
+
+cat >>confdefs.h <<\_ACEOF
+#define HAVE_MPI 1
+_ACEOF
+
+ :
+fi
+ # Sets MPICC to use for isolated source files that need it
+
case `pwd` in
*\ * | *\ *)
{ $as_echo "$as_me:$LINENO: WARNING: Libtool does not cope well with whitespace in \`pwd\`" >&5
@@ -5886,13 +6250,13 @@ if test "${lt_cv_nm_interface+set}" = set; then
else
lt_cv_nm_interface="BSD nm"
echo "int some_variable = 0;" > conftest.$ac_ext
- (eval echo "\"\$as_me:5889: $ac_compile\"" >&5)
+ (eval echo "\"\$as_me:6253: $ac_compile\"" >&5)
(eval "$ac_compile" 2>conftest.err)
cat conftest.err >&5
- (eval echo "\"\$as_me:5892: $NM \\\"conftest.$ac_objext\\\"\"" >&5)
+ (eval echo "\"\$as_me:6256: $NM \\\"conftest.$ac_objext\\\"\"" >&5)
(eval "$NM \"conftest.$ac_objext\"" 2>conftest.err > conftest.out)
cat conftest.err >&5
- (eval echo "\"\$as_me:5895: output\"" >&5)
+ (eval echo "\"\$as_me:6259: output\"" >&5)
cat conftest.out >&5
if $GREP 'External.*some_variable' conftest.out > /dev/null; then
lt_cv_nm_interface="MS dumpbin"
@@ -7097,7 +7461,7 @@ ia64-*-hpux*)
;;
*-*-irix6*)
# Find out which ABI we are using.
- echo '#line 7100 "configure"' > conftest.$ac_ext
+ echo '#line 7464 "configure"' > conftest.$ac_ext
if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
(eval $ac_compile) 2>&5
ac_status=$?
@@ -8954,11 +9318,11 @@ else
-e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
-e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \
-e 's:$: $lt_compiler_flag:'`
- (eval echo "\"\$as_me:8957: $lt_compile\"" >&5)
+ (eval echo "\"\$as_me:9321: $lt_compile\"" >&5)
(eval "$lt_compile" 2>conftest.err)
ac_status=$?
cat conftest.err >&5
- echo "$as_me:8961: \$? = $ac_status" >&5
+ echo "$as_me:9325: \$? = $ac_status" >&5
if (exit $ac_status) && test -s "$ac_outfile"; then
# The compiler can only warn and ignore the option if not recognized
# So say no if there are warnings other than the usual output.
@@ -9293,11 +9657,11 @@ else
-e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
-e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \
-e 's:$: $lt_compiler_flag:'`
- (eval echo "\"\$as_me:9296: $lt_compile\"" >&5)
+ (eval echo "\"\$as_me:9660: $lt_compile\"" >&5)
(eval "$lt_compile" 2>conftest.err)
ac_status=$?
cat conftest.err >&5
- echo "$as_me:9300: \$? = $ac_status" >&5
+ echo "$as_me:9664: \$? = $ac_status" >&5
if (exit $ac_status) && test -s "$ac_outfile"; then
# The compiler can only warn and ignore the option if not recognized
# So say no if there are warnings other than the usual output.
@@ -9398,11 +9762,11 @@ else
-e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
-e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \
-e 's:$: $lt_compiler_flag:'`
- (eval echo "\"\$as_me:9401: $lt_compile\"" >&5)
+ (eval echo "\"\$as_me:9765: $lt_compile\"" >&5)
(eval "$lt_compile" 2>out/conftest.err)
ac_status=$?
cat out/conftest.err >&5
- echo "$as_me:9405: \$? = $ac_status" >&5
+ echo "$as_me:9769: \$? = $ac_status" >&5
if (exit $ac_status) && test -s out/conftest2.$ac_objext
then
# The compiler can only warn and ignore the option if not recognized
@@ -9453,11 +9817,11 @@ else
-e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
-e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \
-e 's:$: $lt_compiler_flag:'`
- (eval echo "\"\$as_me:9456: $lt_compile\"" >&5)
+ (eval echo "\"\$as_me:9820: $lt_compile\"" >&5)
(eval "$lt_compile" 2>out/conftest.err)
ac_status=$?
cat out/conftest.err >&5
- echo "$as_me:9460: \$? = $ac_status" >&5
+ echo "$as_me:9824: \$? = $ac_status" >&5
if (exit $ac_status) && test -s out/conftest2.$ac_objext
then
# The compiler can only warn and ignore the option if not recognized
@@ -12256,7 +12620,7 @@ else
lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
lt_status=$lt_dlunknown
cat > conftest.$ac_ext <<_LT_EOF
-#line 12259 "configure"
+#line 12623 "configure"
#include "confdefs.h"
#if HAVE_DLFCN_H
@@ -12352,7 +12716,7 @@ else
lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
lt_status=$lt_dlunknown
cat > conftest.$ac_ext <<_LT_EOF
-#line 12355 "configure"
+#line 12719 "configure"
#include "confdefs.h"
#if HAVE_DLFCN_H
@@ -19545,8 +19909,8 @@ $as_echo "enabled" >&6; }
;;
xno)
- { $as_echo "$as_me:$LINENO: result: disabled" >&5
-$as_echo "disabled" >&6; }
+ { $as_echo "$as_me:$LINENO: result: disabled by user" >&5
+$as_echo "disabled by user" >&6; }
ax_cv_want_sse2_ext=no
;;
@@ -19661,8 +20025,8 @@ $as_echo "enabled" >&6; }
;;
xno)
- { $as_echo "$as_me:$LINENO: result: disabled" >&5
-$as_echo "disabled" >&6; }
+ { $as_echo "$as_me:$LINENO: result: disabled by user" >&5
+$as_echo "disabled by user" >&6; }
ax_cv_want_ssse3_ext=no
;;
@@ -19698,8 +20062,8 @@ $as_echo "enabled" >&6; }
;;
xno)
- { $as_echo "$as_me:$LINENO: result: disabled" >&5
-$as_echo "disabled" >&6; }
+ { $as_echo "$as_me:$LINENO: result: disabled by user" >&5
+$as_echo "disabled by user" >&6; }
ax_cv_want_sse41_ext=no
;;
@@ -19711,6 +20075,43 @@ $as_echo "not specified so enabled by default" >&6; }
esac
fi
+
+{ $as_echo "$as_me:$LINENO: checking whether sse4.2 is enabled" >&5
+$as_echo_n "checking whether sse4.2 is enabled... " >&6; }
+# Check whether --enable-sse4.2 was given.
+if test "${enable_sse4_2+set}" = set; then
+ enableval=$enable_sse4_2; answer="$enableval"
+else
+ answer=""
+fi
+
+if test "$ax_cv_want_sse41_ext" = no; then
+ { $as_echo "$as_me:$LINENO: result: disabled because the user disabled sse4.1" >&5
+$as_echo "disabled because the user disabled sse4.1" >&6; }
+ ax_cv_want_sse42_ext=no
+else
+ case x"$answer" in
+ xyes)
+ { $as_echo "$as_me:$LINENO: result: enabled" >&5
+$as_echo "enabled" >&6; }
+ ax_cv_want_sse42_ext=yes
+ ;;
+
+ xno)
+ { $as_echo "$as_me:$LINENO: result: disabled by user" >&5
+$as_echo "disabled by user" >&6; }
+ ax_cv_want_sse42_ext=no
+ ;;
+
+ x)
+ { $as_echo "$as_me:$LINENO: result: not specified so enabled by default" >&5
+$as_echo "not specified so enabled by default" >&6; }
+ ax_cv_want_sse42_ext=yes
+ ;;
+ esac
+fi
+
+
{ $as_echo "$as_me:$LINENO: checking whether avx is enabled" >&5
$as_echo_n "checking whether avx is enabled... " >&6; }
# Check whether --enable-avx was given.
@@ -19720,9 +20121,9 @@ else
answer=""
fi
-if test "$ax_cv_want_sse4.1_ext" = no; then
- { $as_echo "$as_me:$LINENO: result: disabled because the user disabled sse4.1" >&5
-$as_echo "disabled because the user disabled sse4.1" >&6; }
+if test "$ax_cv_want_sse42_ext" = no; then
+ { $as_echo "$as_me:$LINENO: result: disabled because the user disabled sse4.2" >&5
+$as_echo "disabled because the user disabled sse4.2" >&6; }
ax_cv_want_avx_ext=no
else
case x"$answer" in
@@ -19733,8 +20134,8 @@ $as_echo "enabled" >&6; }
;;
xno)
- { $as_echo "$as_me:$LINENO: result: disabled" >&5
-$as_echo "disabled" >&6; }
+ { $as_echo "$as_me:$LINENO: result: disabled by user" >&5
+$as_echo "disabled by user" >&6; }
ax_cv_want_avx_ext=no
;;
@@ -19769,8 +20170,8 @@ $as_echo "enabled" >&6; }
;;
xno)
- { $as_echo "$as_me:$LINENO: result: disabled" >&5
-$as_echo "disabled" >&6; }
+ { $as_echo "$as_me:$LINENO: result: disabled by user" >&5
+$as_echo "disabled by user" >&6; }
ax_cv_want_avx2_ext=no
;;
@@ -20273,8 +20674,10 @@ else
ax_cv_cpu_have_sse42_ext=no
if test "$((0x$ecx>>20&0x01))" = 1; then
- ax_cv_cpu_have_sse42_ext=yes
ax_cv_cpu_features="$ax_cv_cpu_features sse4.2"
+ if test "$ax_cv_want_sse42_ext" = yes; then
+ ax_cv_cpu_have_sse42_ext=yes
+ fi
fi
fi
@@ -22435,8 +22838,10 @@ else
ax_cv_cpu_have_sse42_ext=no
if test "$((0x$ecx>>20&0x01))" = 1; then
- ax_cv_cpu_have_sse42_ext=yes
ax_cv_cpu_features="$ax_cv_cpu_features sse4.2"
+ if test "$ax_cv_want_sse42_ext" = yes; then
+ ax_cv_cpu_have_sse42_ext=yes
+ fi
fi
fi
@@ -25945,7 +26350,7 @@ exec 6>&1
# report actual input values of CONFIG_FILES etc. instead of their
# values after options handling.
ac_log="
-This file was extended by gmap $as_me 2014-10-22, which was
+This file was extended by gmap $as_me 2014-11-25, which was
generated by GNU Autoconf 2.63. Invocation command line was
CONFIG_FILES = $CONFIG_FILES
@@ -26008,7 +26413,7 @@ Report bugs to <bug-autoconf at gnu.org>."
_ACEOF
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
ac_cs_version="\\
-gmap config.status 2014-10-22
+gmap config.status 2014-11-25
configured by $0, generated by GNU Autoconf 2.63,
with options \\"`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`\\"
@@ -28015,6 +28420,10 @@ rebuild your compiler and linker.
" >&2;}
fi
+{ $as_echo "$as_me:$LINENO: checking compiler and flags to be used" >&5
+$as_echo_n "checking compiler and flags to be used... " >&6; }
+{ $as_echo "$as_me:$LINENO: result: $CC $CFLAGS" >&5
+$as_echo "$CC $CFLAGS" >&6; }
{ $as_echo "$as_me:$LINENO: checking pthread compiler flags to be used" >&5
$as_echo_n "checking pthread compiler flags to be used... " >&6; }
{ $as_echo "$as_me:$LINENO: result: $PTHREAD_CFLAGS" >&5
diff --git a/configure.ac b/configure.ac
index 12da643..3ac6615 100644
--- a/configure.ac
+++ b/configure.ac
@@ -133,8 +133,11 @@ AC_MSG_RESULT($BINDIR)
# Checks for programs.
#AC_PATH_PROG([PERL],[perl])
ACX_PATH_PERL
+
AC_PROG_CC
AM_PROG_CC_C_O
+AX_MPI # Sets MPICC to use for isolated source files that need it
+
AC_PROG_LIBTOOL
# Checks for libraries.
@@ -310,7 +313,7 @@ case x"$answer" in
;;
xno)
- AC_MSG_RESULT(disabled)
+ AC_MSG_RESULT(disabled by user)
ax_cv_want_sse2_ext=no
;;
@@ -343,7 +346,7 @@ else
;;
xno)
- AC_MSG_RESULT(disabled)
+ AC_MSG_RESULT(disabled by user)
ax_cv_want_ssse3_ext=no
;;
@@ -373,7 +376,7 @@ else
;;
xno)
- AC_MSG_RESULT(disabled)
+ AC_MSG_RESULT(disabled by user)
ax_cv_want_sse41_ext=no
;;
@@ -384,14 +387,44 @@ else
esac
fi
+
+AC_MSG_CHECKING(whether sse4.2 is enabled)
+AC_ARG_ENABLE([sse4.2],
+ AC_HELP_STRING([--enable-sse4.2],
+ [Enable sse4.2 simd commands if they compile and run (default=yes). Requires that sse4.1 be enabled.]),
+ [answer="$enableval"],
+ [answer=""])
+if test "$ax_cv_want_sse41_ext" = no; then
+ AC_MSG_RESULT([disabled because the user disabled sse4.1])
+ ax_cv_want_sse42_ext=no
+else
+ case x"$answer" in
+ xyes)
+ AC_MSG_RESULT(enabled)
+ ax_cv_want_sse42_ext=yes
+ ;;
+
+ xno)
+ AC_MSG_RESULT(disabled by user)
+ ax_cv_want_sse42_ext=no
+ ;;
+
+ x)
+ AC_MSG_RESULT([not specified so enabled by default])
+ ax_cv_want_sse42_ext=yes
+ ;;
+ esac
+fi
+
+
AC_MSG_CHECKING(whether avx is enabled)
AC_ARG_ENABLE([avx],
AC_HELP_STRING([--enable-avx],
- [Enable avx simd commands if they compile and run (default=yes). Requires that sse4.1 be enabled.]),
+ [Enable avx simd commands if they compile and run (default=yes). Requires that sse4.2 be enabled.]),
[answer="$enableval"],
[answer=""])
-if test "$ax_cv_want_sse4.1_ext" = no; then
- AC_MSG_RESULT([disabled because the user disabled sse4.1])
+if test "$ax_cv_want_sse42_ext" = no; then
+ AC_MSG_RESULT([disabled because the user disabled sse4.2])
ax_cv_want_avx_ext=no
else
case x"$answer" in
@@ -401,7 +434,7 @@ else
;;
xno)
- AC_MSG_RESULT(disabled)
+ AC_MSG_RESULT(disabled by user)
ax_cv_want_avx_ext=no
;;
@@ -430,7 +463,7 @@ else
;;
xno)
- AC_MSG_RESULT(disabled)
+ AC_MSG_RESULT(disabled by user)
ax_cv_want_avx2_ext=no
;;
@@ -710,6 +743,8 @@ rebuild your compiler and linker.
])
fi
+AC_MSG_CHECKING(compiler and flags to be used)
+AC_MSG_RESULT($CC $CFLAGS)
AC_MSG_CHECKING(pthread compiler flags to be used)
AC_MSG_RESULT($PTHREAD_CFLAGS)
AC_MSG_CHECKING(popcnt compiler flags to be used)
diff --git a/src/Makefile.in b/src/Makefile.in
index 8d5bc43..ba57020 100644
--- a/src/Makefile.in
+++ b/src/Makefile.in
@@ -54,6 +54,7 @@ am__aclocal_m4_deps = $(top_srcdir)/config/libtool.m4 \
$(top_srcdir)/config/mmap-flags.m4 \
$(top_srcdir)/config/acx_mmap_fixed.m4 \
$(top_srcdir)/config/acx_mmap_variable.m4 \
+ $(top_srcdir)/config/ax_mpi.m4 \
$(top_srcdir)/config/acx_pthread.m4 \
$(top_srcdir)/config/builtin-popcount.m4 \
$(top_srcdir)/config/struct-stat64.m4 \
@@ -648,6 +649,8 @@ LTLIBOBJS = @LTLIBOBJS@
MAKEINFO = @MAKEINFO@
MAX_READLENGTH = @MAX_READLENGTH@
MKDIR_P = @MKDIR_P@
+MPICC = @MPICC@
+MPILIBS = @MPILIBS@
NM = @NM@
NMEDIT = @NMEDIT@
OBJDUMP = @OBJDUMP@
diff --git a/src/access.c b/src/access.c
index aaf0d72..31e4cf5 100644
--- a/src/access.c
+++ b/src/access.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: access.c 140509 2014-07-03 01:47:47Z twu $";
+static char rcsid[] = "$Id: access.c 153955 2014-11-24 17:54:45Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -86,7 +86,7 @@ Access_filesize (char *filename) {
#else
stat(filename,&sb);
#endif
- debug(printf("filesize is %lu\n",sb.st_size));
+ debug(printf("filesize is %zu\n",sb.st_size));
return sb.st_size;
}
@@ -440,19 +440,19 @@ Access_mmap (int *fd, size_t *len, char *filename, size_t eltsize, bool randomp)
#endif
,*fd,0);
if (memory == MAP_FAILED) {
- fprintf(stderr,"Got mmap failure on len %ju from length %ju. Error %d: %s\n",
+ fprintf(stderr,"Got mmap failure on len %jd from length %jd. Error %d: %s\n",
length,length,errno,strerror(errno));
- debug(printf("Got MAP_FAILED on len %lu from length %lu\n",length,length));
+ debug(printf("Got MAP_FAILED on len %jd from length %jd\n",length,length));
memory = NULL;
} else if (randomp == true) {
- debug(printf("Got mmap of %lu bytes at %p to %p\n",length,memory,memory+length-1));
+ debug(printf("Got mmap of %jd bytes at %p to %p\n",length,memory,memory+length-1));
#ifdef HAVE_MADVISE
#ifdef HAVE_MADVISE_MADV_RANDOM
madvise(memory,*len,MADV_RANDOM);
#endif
#endif
} else {
- debug(printf("Got mmap of %lu bytes at %p to %p\n",length,memory,memory+length-1));
+ debug(printf("Got mmap of %jd bytes at %p to %p\n",length,memory,memory+length-1));
#ifdef HAVE_MADVISE
#ifdef HAVE_MADVISE_MADV_DONTNEED
madvise(memory,*len,MADV_DONTNEED);
@@ -505,19 +505,19 @@ Access_mmap_offset (int *remainder, int fd, off_t offset, size_t length, size_t
#endif
,fd,offset);
if (memory == MAP_FAILED) {
- fprintf(stderr,"Got mmap failure on fd %d, offset %ju, length %ju. Error %d: %s\n",
+ fprintf(stderr,"Got mmap failure on fd %d, offset %jd, length %jd. Error %d: %s\n",
fd,offset,length,errno,strerror(errno));
- debug(printf("Got MAP_FAILED on fd %d, offset %lu, length %lu\n",fd,offset,length));
+ debug(printf("Got MAP_FAILED on fd %d, offset %jd, length %zu\n",fd,offset,length));
memory = NULL;
} else if (randomp == true) {
- debug(printf("Got mmap of %lu bytes at %p to %p\n",length,memory,memory+length-1));
+ debug(printf("Got mmap of %jd bytes at %p to %p\n",length,memory,memory+length-1));
#ifdef HAVE_MADVISE
#ifdef HAVE_MADVISE_MADV_RANDOM
madvise(memory,length,MADV_RANDOM);
#endif
#endif
} else {
- debug(printf("Got mmap of %lu bytes at %p to %p\n",length,memory,memory+length-1));
+ debug(printf("Got mmap of %jd bytes at %p to %p\n",length,memory,memory+length-1));
#ifdef HAVE_MADVISE
#ifdef HAVE_MADVISE_MADV_DONTNEED
madvise(memory,length,MADV_DONTNEED);
@@ -575,19 +575,19 @@ Access_mmap_rw (int *fd, size_t *len, char *filename, size_t eltsize, bool rando
#endif
,*fd,0);
if (memory == MAP_FAILED) {
- fprintf(stderr,"Got mmap failure on len %ju from length %ju. Error %d: %s\n",
+ fprintf(stderr,"Got mmap failure on len %jd from length %jd. Error %d: %s\n",
*len,length,errno,strerror(errno));
- debug(printf("Got MAP_FAILED on len %lu from length %lu\n",*len,length));
+ debug(printf("Got MAP_FAILED on len %zu from length %jd\n",*len,length));
memory = NULL;
} else if (randomp == true) {
- debug(printf("Got mmap of %lu bytes at %p to %p\n",length,memory,memory+length-1));
+ debug(printf("Got mmap of %jd bytes at %p to %p\n",length,memory,memory+length-1));
#ifdef HAVE_MADVISE
#ifdef HAVE_MADVISE_MADV_RANDOM
madvise(memory,*len,MADV_RANDOM);
#endif
#endif
} else {
- debug(printf("Got mmap of %lu bytes at %p to %p\n",length,memory,memory+length-1));
+ debug(printf("Got mmap of %jd bytes at %p to %p\n",length,memory,memory+length-1));
#ifdef HAVE_MADVISE
#ifdef HAVE_MADVISE_MADV_DONTNEED
madvise(memory,*len,MADV_DONTNEED);
@@ -638,19 +638,19 @@ Access_mmap_offset_rw (int *remainder, int fd, off_t offset, size_t length, size
#endif
,fd,offset);
if (memory == MAP_FAILED) {
- fprintf(stderr,"Got mmap failure on offset %ju, length %ju. Error %d: %s\n",
+ fprintf(stderr,"Got mmap failure on offset %jd, length %jd. Error %d: %s\n",
offset,length,errno,strerror(errno));
- debug(printf("Got MAP_FAILED on offset %lu, length %lu\n",offset,length));
+ debug(printf("Got MAP_FAILED on offset %jd, length %zu\n",offset,length));
memory = NULL;
} else if (randomp == true) {
- debug(printf("Got mmap of %lu bytes at %p to %p\n",length,memory,memory+length-1));
+ debug(printf("Got mmap of %zu bytes at %p to %p\n",length,memory,memory+length-1));
#ifdef HAVE_MADVISE
#ifdef HAVE_MADVISE_MADV_RANDOM
madvise(memory,length,MADV_RANDOM);
#endif
#endif
} else {
- debug(printf("Got mmap of %lu bytes at %p to %p\n",length,memory,memory+length-1));
+ debug(printf("Got mmap of %zu bytes at %p to %p\n",length,memory,memory+length-1));
#ifdef HAVE_MADVISE
#ifdef HAVE_MADVISE_MADV_DONTNEED
madvise(memory,length,MADV_DONTNEED);
@@ -723,15 +723,15 @@ Access_mmap_and_preload (int *fd, size_t *len, int *npages, double *seconds, cha
#endif
,*fd,0);
if (memory == MAP_FAILED) {
- fprintf(stderr,"Got mmap failure on len %ju from length %ju. Error %d: %s\n",
+ fprintf(stderr,"Got mmap failure on len %jd from length %jd. Error %d: %s\n",
*len,length,errno,strerror(errno));
- debug(printf("Got MAP_FAILED on len %lu from length %lu\n",*len,length));
+ debug(printf("Got MAP_FAILED on len %jd from length %zu\n",*len,length));
memory = NULL;
Stopwatch_stop(stopwatch);
Stopwatch_free(&stopwatch);
} else {
/* Touch all pages */
- debug(printf("Got mmap of %lu bytes at %p to %p\n",length,memory,memory+length-1));
+ debug(printf("Got mmap of %zu bytes at %p to %p\n",length,memory,memory+length-1));
#ifdef HAVE_MADVISE
#ifdef HAVE_MADVISE_MADV_WILLNEED
madvise(memory,*len,MADV_WILLNEED);
diff --git a/src/bitpack64-read.c b/src/bitpack64-read.c
index cb37d91..306697e 100644
--- a/src/bitpack64-read.c
+++ b/src/bitpack64-read.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: bitpack64-read.c 132467 2014-04-06 01:31:26Z twu $";
+static char rcsid[] = "$Id: bitpack64-read.c 153955 2014-11-24 17:54:45Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -13038,7 +13038,7 @@ Bitpack64_offsetptr (UINT4 *end0, Storedoligomer_T oligo, UINT4 *bitpackptrs, UI
(unpacker_all_table[packsize_div2*2])(&(diffs[1]),bitpack);
#ifdef DEBUG
- printf("oligo: %08X, remainder %d, offset0 %lu, offset1 %lu\n",
+ printf("oligo: %08X, remainder %d, offset0 %u, offset1 %u\n",
oligo,oligo % BLOCKSIZE,info[1],info[DIFFERENTIAL_METAINFO_SIZE+1]);
printf("bitpack:\n");
@@ -13364,7 +13364,7 @@ Bitpack64_offsetptr_huge (UINT8 *end0, Storedoligomer_T oligo,
(unpacker_all_table[packsize_div2*2])(&(diffs[1]),bitpack);
#ifdef DEBUG
- printf("oligo: %08X, remainder %d, offset0 %lu, offset1 %lu\n",
+ printf("oligo: %08X, remainder %d, offset0 %u, offset1 %u\n",
oligo,oligo % BLOCKSIZE,info[1],info[DIFFERENTIAL_METAINFO_SIZE+1]);
printf("bitpack:\n");
@@ -13623,7 +13623,7 @@ Bitpack64_offsetptr_paired (UINT4 *end0, Storedoligomer_T oligo, UINT4 *bitpackp
(unpacker_all_table[packsize_div2*2])(&(diffs[1]),bitpack);
#ifdef DEBUG
- printf("oligo: %08X, remainder %d, offset0 %lu, offset1 %lu\n",
+ printf("oligo: %08X, remainder %d, offset0 %u, offset1 %u\n",
oligo,oligo % BLOCKSIZE,info[1],info[DIFFERENTIAL_METAINFO_SIZE+1]);
printf("bitpack:\n");
@@ -14024,7 +14024,7 @@ Bitpack64_read_one_huge (Storedoligomer_T oligo, UINT4 *bitpackpages,
offset0 += info[1];
offset1 += info[DIFFERENTIAL_METAINFO_SIZE+1];
}
- debug(printf("offset0 = %lu, offset1 = %lu\n",offset0,offset1));
+ debug(printf("offset0 = %u, offset1 = %u\n",offset0,offset1));
psums[0] = psums[1] = offset0;
psums[2] = psums[3] = offset1;
@@ -14042,7 +14042,7 @@ Bitpack64_read_one_huge (Storedoligomer_T oligo, UINT4 *bitpackpages,
printf("%d %d %d %d\n",_diffs[12],_diffs[13],_diffs[14],_diffs[15]);
#endif
- debug(printf("Returning %lu + %d + %d + %d + %d\n",
+ debug(printf("Returning %u + %d + %d + %d + %d\n",
psums[quarter_block],_diffs[row+1],_diffs[row+2],_diffs[row+3],_diffs[row+4]));
return psums[quarter_block] + (INT4) (_diffs[row+1] + _diffs[row+2] + _diffs[row+3] + _diffs[row+4]);
@@ -14243,7 +14243,7 @@ Bitpack64_block_offsets (UINT4 *offsets, Storedoligomer_T oligo,
packsize = (info[DIFFERENTIAL_METAINFO_SIZE] - nwritten)*2;
#ifdef DEBUG
- printf("oligo: %08X, nwritten %u, offset0 %lu, offset1 %lu, packsize %d\n",
+ printf("oligo: %08X, nwritten %u, offset0 %u, offset1 %u, packsize %d\n",
oligo,nwritten,offset0,offset1,packsize);
#endif
@@ -14405,7 +14405,7 @@ Bitpack64_block_offsets_huge (UINT8 *offsets, Storedoligomer_T oligo,
#ifdef DEBUG
- printf("oligo: %08X, nwritten %u, offset0 %lu, offset1 %lu, packsize %d\n",
+ printf("oligo: %08X, nwritten %u, offset0 %u, offset1 %u, packsize %d\n",
oligo,nwritten,offset0,offset1,packsize);
#endif
@@ -14429,9 +14429,9 @@ Bitpack64_block_offsets_huge (UINT8 *offsets, Storedoligomer_T oligo,
vertical_order_huge_rev(&(offsets[33]),_diffs);
#ifdef DEBUG
- printf("%lu\n",offsets[i]);
+ printf("%u\n",offsets[i]);
for (i = 1; i <= 64; i += 4) {
- printf("%lu %lu %lu %lu\n",offsets[i],offsets[i+1],offsets[i+2],offsets[i+3]);
+ printf("%u %u %u %u\n",offsets[i],offsets[i+1],offsets[i+2],offsets[i+3]);
}
printf("end of diffs vertical\n");
#endif
@@ -14466,9 +14466,9 @@ Bitpack64_block_offsets_huge (UINT8 *offsets, Storedoligomer_T oligo,
vertical_order_huge(&(offsets[1]),columnar);
#ifdef DEBUG
- printf("%lu\n",offset0);
+ printf("%u\n",offset0);
for (i = 1; i <= 64; i += 4) {
- printf("%lu %lu %lu %lu\n",offsets[i],offsets[i+1],offsets[i+2],offsets[i+3]);
+ printf("%u %u %u %u\n",offsets[i],offsets[i+1],offsets[i+2],offsets[i+3]);
}
printf("end of diffs vertical\n");
#endif
@@ -14500,13 +14500,13 @@ Bitpack64_block_offsets_huge (UINT8 *offsets, Storedoligomer_T oligo,
#ifdef DEBUG
- printf("%lu\n",offsets[0]);
+ printf("%u\n",offsets[0]);
for (i = 1; i <= 32; i += 4) {
- printf("%lu %lu %lu %lu\n",offsets[i],offsets[i+1],offsets[i+2],offsets[i+3]);
+ printf("%u %u %u %u\n",offsets[i],offsets[i+1],offsets[i+2],offsets[i+3]);
}
printf("\n");
for (i = 33; i <= 64; i += 4) {
- printf("%lu %lu %lu %lu\n",offsets[i],offsets[i+1],offsets[i+2],offsets[i+3]);
+ printf("%u %u %u %u\n",offsets[i],offsets[i+1],offsets[i+2],offsets[i+3]);
}
printf("end of offsets\n");
#endif
diff --git a/src/bitpack64-readtwo.c b/src/bitpack64-readtwo.c
index 8f942e6..afc5e72 100644
--- a/src/bitpack64-readtwo.c
+++ b/src/bitpack64-readtwo.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: bitpack64-readtwo.c 132472 2014-04-06 02:02:52Z twu $";
+static char rcsid[] = "$Id: bitpack64-readtwo.c 153955 2014-11-24 17:54:45Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -13763,7 +13763,7 @@ Bitpack64_read_two (UINT4 *end0, Storedoligomer_T oligo, UINT4 *bitpackptrs, UIN
(unpacker_all_table[packsize_div2*2])(&(diffs[1]),bitpack);
#ifdef DEBUG
- printf("oligo: %08X, remainder %d, offset0 %lu, offset1 %lu\n",
+ printf("oligo: %08X, remainder %d, offset0 %u, offset1 %u\n",
oligo,oligo % BLOCKSIZE,info[1],info[METAINFO_SIZE+1]);
printf("bitpack:\n");
@@ -14087,7 +14087,7 @@ Bitpack64_read_two_huge (UINT8 *end0, Storedoligomer_T oligo,
(unpacker_all_table[packsize_div2*2])(&(diffs[1]),bitpack);
#ifdef DEBUG
- printf("oligo: %08X, remainder %d, offset0 %lu, offset1 %lu\n",
+ printf("oligo: %08X, remainder %d, offset0 %u, offset1 %u\n",
oligo,oligo % BLOCKSIZE,info[1],info[METAINFO_SIZE+1]);
printf("bitpack:\n");
diff --git a/src/bitpack64-write.c b/src/bitpack64-write.c
index 6a98ec1..6314470 100644
--- a/src/bitpack64-write.c
+++ b/src/bitpack64-write.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: bitpack64-write.c 151045 2014-10-16 19:08:17Z twu $";
+static char rcsid[] = "$Id: bitpack64-write.c 153955 2014-11-24 17:54:45Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -5017,7 +5017,7 @@ Bitpack64_write_differential_huge (char *pagesfile, char *ptrsfile, char *compfi
/* Value for start of block */
while (ascending[positioni] >= nextpage) {
- fprintf(stderr,"\nAt position %u (block %u), ascending %lu >= nextpage %lu",
+ fprintf(stderr,"\nAt position %u (block %u), ascending %llu >= nextpage %llu",
positioni,positioni/BLOCKSIZE,ascending[positioni],nextpage);
pages[pagei++] = positioni/BLOCKSIZE;
currpage = nextpage;
@@ -5045,7 +5045,7 @@ Bitpack64_write_differential_huge (char *pagesfile, char *ptrsfile, char *compfi
/* Value for start of block */
while (ascending[positioni] >= nextpage) {
- fprintf(stderr,"\nAt position %u (block %u), ascending %lu >= nextpage %lu",
+ fprintf(stderr,"\nAt position %u (block %u), ascending %llu >= nextpage %llu",
positioni,positioni/BLOCKSIZE,ascending[positioni],nextpage);
pages[pagei++] = positioni/BLOCKSIZE;
currpage = nextpage;
@@ -5079,7 +5079,7 @@ Bitpack64_write_differential_huge (char *pagesfile, char *ptrsfile, char *compfi
/* Value for end of block */
if (ascending[n] >= nextpage) {
- fprintf(stderr,"\nAt final oligo %u (block %u), ascending %lu >= nextpage %lu",
+ fprintf(stderr,"\nAt final oligo %u (block %u), ascending %llu >= nextpage %llu",
n,n/BLOCKSIZE,ascending[n],nextpage);
pages[pagei++] = n/BLOCKSIZE;
currpage = nextpage;
@@ -5187,7 +5187,7 @@ Bitpack64_write_fixed10_huge (char *pagesfile, char *ptrsfile, char *compfile,
/* Value for start of block */
while (ascending[positioni] >= nextpage) {
- fprintf(stderr,"\nAt position %u (block %u), ascending %lu >= nextpage %lu",
+ fprintf(stderr,"\nAt position %u (block %u), ascending %llu >= nextpage %llu",
positioni,positioni/BLOCKSIZE,ascending[positioni],nextpage);
pages[pagei++] = positioni/BLOCKSIZE;
currpage = nextpage;
@@ -5219,7 +5219,7 @@ Bitpack64_write_fixed10_huge (char *pagesfile, char *ptrsfile, char *compfile,
/* Value for start of block */
while (ascending[positioni] >= nextpage) {
- fprintf(stderr,"\nAt position %u (block %u), ascending %lu >= nextpage %lu",
+ fprintf(stderr,"\nAt position %u (block %u), ascending %llu >= nextpage %llu",
positioni,positioni/BLOCKSIZE,ascending[positioni],nextpage);
pages[pagei++] = positioni/BLOCKSIZE;
currpage = nextpage;
@@ -5258,7 +5258,7 @@ Bitpack64_write_fixed10_huge (char *pagesfile, char *ptrsfile, char *compfile,
/* Value for end of block */
if (ascending[n] >= nextpage) {
- fprintf(stderr,"\nAt final oligo %u (block %u), ascending %lu >= nextpage %lu",
+ fprintf(stderr,"\nAt final oligo %u (block %u), ascending %llu >= nextpage %llu",
n,n/BLOCKSIZE,ascending[n],nextpage);
pages[pagei++] = n/BLOCKSIZE;
currpage = nextpage;
diff --git a/src/bytecoding.c b/src/bytecoding.c
index e9d2dc9..5c7bbcf 100644
--- a/src/bytecoding.c
+++ b/src/bytecoding.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: bytecoding.c 136085 2014-05-13 23:00:04Z twu $";
+static char rcsid[] = "$Id: bytecoding.c 153444 2014-11-18 01:24:55Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -140,6 +140,7 @@ Bytecoding_write_exceptions_only (char *excfile, char *guidefile, UINT4 *values,
#define LCPCHILDDC_BLOCKSIZE 5
+#define BUFFER_NBLOCKS 10000000
/* Interleaved byte array with lcp info, child info, and
discriminating chars. Each lcp and child element takes 1 byte, and
@@ -156,31 +157,37 @@ Bytecoding_write_lcpchilddc (char *bytesfile, char *excfile, char *guidefile, UI
unsigned char *discrim_chars, unsigned char *lcpbytes,
UINT4 genomelength, int guide_interval) {
FILE *fp_bytes, *fp_guide, *fp_exceptions;
- unsigned char *bytes, *bytes_orig;
+ unsigned char *bytes_buffer, *bytes_ptr;
UINT4 nexceptions = 0;
UINT4 n = genomelength, i;
- size_t nblocks;
+ /* size_t nblocks; */
+ int b;
UINT4 guide_value = 0;
- nblocks = ((n + 1) + 1)/2;
+ /* nblocks = ((n + 1) + 1)/2; */
+
+ bytes_buffer = (unsigned char *) MALLOC(BUFFER_NBLOCKS * LCPCHILDDC_BLOCKSIZE * sizeof(unsigned char));
- bytes_orig = bytes = (unsigned char *) MALLOC(nblocks * LCPCHILDDC_BLOCKSIZE * sizeof(unsigned char));
fp_exceptions = FOPEN_WRITE_BINARY(excfile);
fp_guide = FOPEN_WRITE_BINARY(guidefile);
+ fp_bytes = FOPEN_WRITE_BINARY(bytesfile);
i = 0;
+ bytes_ptr = &(bytes_buffer[0]);
+ b = 0;
+ fprintf(stderr,"Writing file %s",bytesfile);
while (i + 1 <= n) {
- *bytes++ = lcpbytes[i];
- *bytes++ = lcpbytes[i+1];
+ *bytes_ptr++ = lcpbytes[i]; /* Byte 0 */
+ *bytes_ptr++ = lcpbytes[i+1]; /* Byte 1 */
- *bytes++ = *discrim_chars++;
+ *bytes_ptr++ = *discrim_chars++; /* Byte 2 */
if (child[i] < 255) {
- *bytes++ = (unsigned char) child[i];
+ *bytes_ptr++ = (unsigned char) child[i]; /* Byte 3 */
} else {
- *bytes++ = (unsigned char) 255; /* Indicates an exception */
+ *bytes_ptr++ = (unsigned char) 255; /* Byte 3. Indicates an exception */
while (i >= guide_value) {
FWRITE_UINT(nexceptions,fp_guide);
@@ -194,9 +201,9 @@ Bytecoding_write_lcpchilddc (char *bytesfile, char *excfile, char *guidefile, UI
i++;
if (child[i] < 255) {
- *bytes++ = (unsigned char) child[i];
+ *bytes_ptr++ = (unsigned char) child[i]; /* Byte 4 */
} else {
- *bytes++ = (unsigned char) 255; /* Indicates an exception */
+ *bytes_ptr++ = (unsigned char) 255; /* Byte 4. Indicates an exception */
while (i >= guide_value) {
FWRITE_UINT(nexceptions,fp_guide);
@@ -209,18 +216,24 @@ Bytecoding_write_lcpchilddc (char *bytesfile, char *excfile, char *guidefile, UI
}
i++;
+ if (++b >= BUFFER_NBLOCKS) {
+ fwrite(bytes_buffer,sizeof(unsigned char),BUFFER_NBLOCKS*LCPCHILDDC_BLOCKSIZE,fp_bytes);
+ bytes_ptr = &(bytes_buffer[0]);
+ b = 0;
+ fprintf(stderr,".");
+ }
}
if (i <= n) {
- *bytes++ = lcpbytes[i];
- *bytes++ = 0;
+ *bytes_ptr++ = lcpbytes[i]; /* Byte 0 */
+ *bytes_ptr++ = 0; /* Byte 1 */
- *bytes++ = *discrim_chars++;
+ *bytes_ptr++ = *discrim_chars++; /* Byte 2 */
if (child[i] < 255) {
- *bytes++ = (unsigned char) child[i];
+ *bytes_ptr++ = (unsigned char) child[i]; /* Byte 3 */
} else {
- *bytes++ = (unsigned char) 255; /* Indicates an exception */
+ *bytes_ptr++ = (unsigned char) 255; /* Byte 3. Indicates an exception */
while (i >= guide_value) {
FWRITE_UINT(nexceptions,fp_guide);
@@ -232,7 +245,14 @@ Bytecoding_write_lcpchilddc (char *bytesfile, char *excfile, char *guidefile, UI
nexceptions++;
}
- *bytes++ = 0x00;
+ *bytes_ptr++ = 0x00; /* Byte 4 */
+
+ if (++b >= BUFFER_NBLOCKS) {
+ fwrite(bytes_buffer,sizeof(unsigned char),BUFFER_NBLOCKS*LCPCHILDDC_BLOCKSIZE,fp_bytes);
+ bytes_ptr = &(bytes_buffer[0]);
+ b = 0;
+ fprintf(stderr,".");
+ }
}
@@ -248,18 +268,19 @@ Bytecoding_write_lcpchilddc (char *bytesfile, char *excfile, char *guidefile, UI
#endif
+ if (b > 0) {
+ fwrite(bytes_buffer,sizeof(unsigned char),b*LCPCHILDDC_BLOCKSIZE,fp_bytes);
+ }
+ fprintf(stderr,"done\n");
+
+ fclose(fp_bytes);
fclose(fp_exceptions);
fclose(fp_guide);
fprintf(stderr,"Byte-coding: %u values < 255, %u exceptions >= 255 (%.1f%%)\n",
(n+1)-nexceptions,nexceptions,100*(double) nexceptions/(double) (n+1));
- fprintf(stderr,"Writing bytes file...");
- fp_bytes = FOPEN_WRITE_BINARY(bytesfile);
- fwrite(bytes_orig,sizeof(unsigned char),nblocks*LCPCHILDDC_BLOCKSIZE,fp_bytes);
- fclose(fp_bytes);
- fprintf(stderr,"done\n");
- FREE(bytes_orig);
+ FREE(bytes_buffer);
return;
}
diff --git a/src/compress-write.c b/src/compress-write.c
index 1c00e0a..c726331 100644
--- a/src/compress-write.c
+++ b/src/compress-write.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: compress-write.c 132144 2014-04-02 16:02:28Z twu $";
+static char rcsid[] = "$Id: compress-write.c 153955 2014-11-24 17:54:45Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -70,7 +70,8 @@ fill_buffer (int *Buffer, Genomecomp_T high, Genomecomp_T low, Genomecomp_T flag
} else if (Buffer[i] == 'T') {
Buffer[i] = 'X';
} else {
- printf("Parsing error; saw non-ACGT flag plus %c at position %lu\n",Buffer[i],position+i);
+ printf("Parsing error; saw non-ACGT flag plus %c at position %llu\n",
+ Buffer[i],(unsigned long long) position+i);
exit(9);
}
}
@@ -248,8 +249,8 @@ write_compressed_one (FILE *fp, int *nbadchars, char Buffer[], Univcoord_T posit
default:
(*nbadchars) += 1;
if (*nbadchars < MAX_BADCHAR_MESSAGES) {
- fprintf(stderr,"Don't recognize character %c at position %lu. Using N instead\n",
- Buffer[i],position+i);
+ fprintf(stderr,"Don't recognize character %c at position %llu. Using N instead\n",
+ Buffer[i],(unsigned long long) position+i);
} else if (*nbadchars == MAX_BADCHAR_MESSAGES) {
fprintf(stderr,"Too many non-recognizable characters. Not reporting each individual occurrence anymore.\n");
} else if ((*nbadchars) % BADCHAR_INTERVAL == 0) {
@@ -296,8 +297,8 @@ put_compressed_one (Genomecomp_T *sectioncomp, int *nbadchars, char Buffer[], Un
default:
(*nbadchars) += 1;
if (*nbadchars < MAX_BADCHAR_MESSAGES) {
- fprintf(stderr,"Don't recognize character %c at position %lu. Using N instead\n",
- Buffer[i],position+i);
+ fprintf(stderr,"Don't recognize character %c at position %llu. Using N instead\n",
+ Buffer[i],(unsigned long long) position+i);
} else if (*nbadchars == MAX_BADCHAR_MESSAGES) {
fprintf(stderr,"Too many non-recognizable characters. Not reporting each individual occurrence anymore.\n");
} else if ((*nbadchars) % BADCHAR_INTERVAL == 0) {
diff --git a/src/config.h.in b/src/config.h.in
index 92de003..9f7c832 100644
--- a/src/config.h.in
+++ b/src/config.h.in
@@ -145,6 +145,9 @@
*/
#undef HAVE_MM_POPCNT
+/* Define if you have the MPI library. */
+#undef HAVE_MPI
+
/* Define to 1 if you have the `munmap' function. */
#undef HAVE_MUNMAP
diff --git a/src/genome-write.c b/src/genome-write.c
index 448019f..6fae146 100644
--- a/src/genome-write.c
+++ b/src/genome-write.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: genome-write.c 132144 2014-04-02 16:02:28Z twu $";
+static char rcsid[] = "$Id: genome-write.c 153955 2014-11-24 17:54:45Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -90,35 +90,33 @@ find_positions (bool *revcompp, Univcoord_T *leftposition, Univcoord_T *rightpos
*truelength = Univinterval_length(interval);
*contigtype = Univinterval_type(interval);
- debug(printf("revcompp = %d, leftposition = %lu, rightposition = %lu, startposition = %lu, endposition = %lu\n",
- *revcompp,*leftposition,*rightposition,*startposition,*endposition));
+ debug(fprintf(stderr,"revcompp = %d, leftposition = %llu, rightposition = %llu, startposition = %llu, endposition = %llu\n",
+ *revcompp,(unsigned long long) *leftposition,(unsigned long long) *rightposition,
+ (unsigned long long) *startposition,(unsigned long long) *endposition));
+
return;
}
}
-#ifdef HAVE_FSEEKO
-
static void
-move_absolute (FILE *fp, off_t offset) {
- if (fseeko(fp,offset,SEEK_SET) < 0) {
- perror("Error in gmapindex, seek");
+move_absolute (FILE *fp, Univcoord_T offset) {
+
+#ifdef HAVE_FSEEKO
+ if (fseeko(fp,(off_t) offset,SEEK_SET) < 0) {
+ perror("Error in gmapindex, fseeko");
exit(9);
}
- return;
-}
-
#else
-
-static void
-move_absolute (FILE *fp, long int offset) {
- if (fseek(fp,offset,SEEK_SET) < 0) {
- perror("Error in gmapindex, seek");
+ if (fseek(fp,(long) offset,SEEK_SET) < 0) {
+ perror("Error in gmapindex, fseek");
exit(9);
}
+#endif
+
return;
}
-#endif
+
#define WRITEBLOCK 1024
static char Empty[WRITEBLOCK];
@@ -249,11 +247,11 @@ genome_write_file (FILE *refgenome_fp, FILE *input,
&truelength,&contigtype,accession,contig_iit);
if (++ncontigs < nmessages) {
if (revcompp == true) {
- fprintf(stderr,"Writing contig %s to universal coordinates %lu..%lu in genome %s\n",
- accession,startposition,endposition,fileroot);
+ fprintf(stderr,"Writing contig %s to universal coordinates %llu..%llu in genome %s\n",
+ accession,(unsigned long long) startposition,(unsigned long long) endposition,fileroot);
} else {
- fprintf(stderr,"Writing contig %s to universal coordinates %lu..%lu in genome %s\n",
- accession,startposition+1U,endposition+1U,fileroot);
+ fprintf(stderr,"Writing contig %s to universal coordinates %llu..%llu in genome %s\n",
+ accession,(unsigned long long) startposition+1U,(unsigned long long) endposition+1U,fileroot);
}
} else if (ncontigs == nmessages) {
fprintf(stderr,"More than %d contigs. Will stop printing messages\n",nmessages);
@@ -276,7 +274,7 @@ genome_write_file (FILE *refgenome_fp, FILE *input,
} else {
altstrain_offset = 0;
}
- debug(printf("Setting altstrain_offset to be %d\n",altstrain_offset));
+ debug(fprintf(stderr,"Setting altstrain_offset to be %d\n",altstrain_offset));
#endif
}
@@ -298,7 +296,8 @@ genome_write_file (FILE *refgenome_fp, FILE *input,
with sufficient X's. */
if (startposition > maxposition) {
/* Start beyond end of file */
- debug(printf("Filling with X's from %lu to %lu-1\n",maxposition,startposition));
+ debug(fprintf(stderr,"Filling with X's from %llu to %llu-1\n",
+ (unsigned long long) maxposition,(unsigned long long) startposition));
fill_x(refgenome_fp,maxposition,startposition,uncompressedp,index1part);
if (contigtype > 0) {
@@ -316,13 +315,14 @@ genome_write_file (FILE *refgenome_fp, FILE *input,
if (contigtype > 0) {
#ifdef ALTSTRAIN
if (rightposition + 1 > maxposition) {
- debug(printf("Filling with X's from %u to %u-1\n",maxposition,rightposition+1));
+ debug(fprintf(stderr,"Filling with X's from %llu to %llu-1\n",
+ (unsigned long long) maxposition,(unsigned long long) rightposition+1));
fill_x(refgenome_fp,maxposition,rightposition + 1,uncompressedp,index1part);
maxposition = currposition = rightposition + 1;
}
#endif
} else {
- debug(printf("Moving to %lu\n",startposition));
+ debug(fprintf(stderr,"Moving to %llu\n",(unsigned long long) startposition));
if (uncompressedp == true) {
move_absolute(refgenome_fp,startposition);
}
@@ -350,10 +350,10 @@ genome_write_file (FILE *refgenome_fp, FILE *input,
/* Write alternate strain */
if (revcompp == true) {
altstrain_offset -= strlen(segment);
- debug(printf("Writing alternate strain at %u\n",altstrain_offset));
+ debug(fprintf(stderr,"Writing alternate strain at %d\n",altstrain_offset));
IIT_backfill_sequence(altstrain_iit,altstrain_index,altstrain_offset,segment);
} else {
- debug(printf("Writing alternate strain at %u\n",altstrain_offset));
+ debug(fprintf(stderr,"Writing alternate strain at %d\n",altstrain_offset));
IIT_backfill_sequence(altstrain_iit,altstrain_index,altstrain_offset,segment);
altstrain_offset += strlen(segment);
}
@@ -361,23 +361,25 @@ genome_write_file (FILE *refgenome_fp, FILE *input,
} else {
/* Write reference strain */
if (revcompp == true) {
- debug(printf("Filling with sequence from %lu-1 to %lu\n",currposition,currposition-strlen(segment)));
- currposition -= strlen(segment);
+ debug(fprintf(stderr,"Filling with sequence from %llu-1 to %llu\n",
+ (unsigned long long) currposition,(unsigned long long) (currposition-strlen(segment))));
+ currposition -= (Univcoord_T) strlen(segment);
if (uncompressedp == true) {
fwrite(segment,sizeof(char),strlen(segment),refgenome_fp);
} else {
- nbadchars = Compress_update_file(nbadchars,refgenome_fp,segment,currposition,currposition+strlen(segment),
+ nbadchars = Compress_update_file(nbadchars,refgenome_fp,segment,currposition,currposition + (Univcoord_T) strlen(segment),
index1part);
}
} else {
- debug(printf("Filling with sequence from %lu to %lu-1\n",currposition,currposition+strlen(segment)));
+ debug(fprintf(stderr,"Filling with sequence from %llu to %llu-1\n",
+ (unsigned long long) currposition,(unsigned long long) (currposition+strlen(segment))));
if (uncompressedp == true) {
fwrite(segment,sizeof(char),strlen(segment),refgenome_fp);
} else {
- nbadchars = Compress_update_file(nbadchars,refgenome_fp,segment,currposition,currposition+strlen(segment),
+ nbadchars = Compress_update_file(nbadchars,refgenome_fp,segment,currposition,currposition + (Univcoord_T) strlen(segment),
index1part);
}
- currposition += strlen(segment);
+ currposition += (Univcoord_T) strlen(segment);
if (currposition > maxposition) {
maxposition = currposition;
}
@@ -426,11 +428,11 @@ genome_writeraw_file (FILE *refgenome_fp, FILE *input,
&truelength,&contigtype,accession,contig_iit);
if (++ncontigs < nmessages) {
if (revcompp == true) {
- fprintf(stderr,"Writing contig %s to universal coordinates %lu..%lu in genome %s\n",
- accession,startposition,endposition,fileroot);
+ fprintf(stderr,"Writing contig %s to universal coordinates %llu..%llu in genome %s\n",
+ accession,(unsigned long long) startposition,(unsigned long long) endposition,fileroot);
} else {
- fprintf(stderr,"Writing contig %s to universal coordinates %lu..%lu in genome %s\n",
- accession,startposition+1U,endposition+1U,fileroot);
+ fprintf(stderr,"Writing contig %s to universal coordinates %llu..%llu in genome %s\n",
+ accession,(unsigned long long) startposition+1U,(unsigned long long) endposition+1U,fileroot);
}
} else if (ncontigs == nmessages) {
fprintf(stderr,"More than %d contigs. Will stop printing messages\n",nmessages);
@@ -452,7 +454,7 @@ genome_writeraw_file (FILE *refgenome_fp, FILE *input,
} else {
altstrain_offset = 0;
}
- debug(printf("Setting altstrain_offset to be %d\n",altstrain_offset));
+ debug(fprintf(stderr,"Setting altstrain_offset to be %d\n",altstrain_offset));
#endif
}
@@ -474,7 +476,8 @@ genome_writeraw_file (FILE *refgenome_fp, FILE *input,
with sufficient X's. */
if (startposition > maxposition) {
/* Start beyond end of file */
- debug(printf("Filling with zeroes from %lu to %lu-1\n",maxposition,startposition));
+ debug(fprintf(stderr,"Filling with zeroes from %llu to %llu-1\n",
+ (unsigned long long) maxposition,(unsigned long long) startposition));
fill_zero(refgenome_fp,maxposition,startposition,/*uncompressedp*/true,index1part);
if (contigtype > 0) {
@@ -492,13 +495,13 @@ genome_writeraw_file (FILE *refgenome_fp, FILE *input,
if (contigtype > 0) {
#ifdef ALTSTRAIN
if (rightposition + 1 > maxposition) {
- debug(printf("Filling with zeroes from %u to %u-1\n",maxposition,rightposition+1));
+ debug(fprintf(stderr,"Filling with zeroes from %u to %u-1\n",maxposition,rightposition+1));
fill_zero(refgenome_fp,maxposition,rightposition + 1,/*uncompressedp*/true,index1part);
maxposition = currposition = rightposition + 1;
}
#endif
} else {
- debug(printf("Moving to %lu\n",startposition));
+ debug(fprintf(stderr,"Moving to %llu\n",(unsigned long long) startposition));
move_absolute(refgenome_fp,startposition);
currposition = startposition;
}
@@ -534,10 +537,10 @@ genome_writeraw_file (FILE *refgenome_fp, FILE *input,
will fail because they depend on \0 to terminate the segment. */
if (revcompp == true) {
altstrain_offset -= strlength;
- debug(printf("Writing alternate strain at %u\n",altstrain_offset));
+ debug(fprintf(stderr,"Writing alternate strain at %d\n",altstrain_offset));
IIT_backfill_sequence(altstrain_iit,altstrain_index,altstrain_offset,segment);
} else {
- debug(printf("Writing alternate strain at %u\n",altstrain_offset));
+ debug(fprintf(stderr,"Writing alternate strain at %d\n",altstrain_offset));
IIT_backfill_sequence(altstrain_iit,altstrain_index,altstrain_offset,segment);
altstrain_offset += strlength;
}
@@ -545,11 +548,13 @@ genome_writeraw_file (FILE *refgenome_fp, FILE *input,
} else {
/* Write reference strain */
if (revcompp == true) {
- debug(printf("Filling with sequence from %lu-1 to %lu\n",currposition,currposition-strlength));
+ debug(fprintf(stderr,"Filling with sequence from %llu-1 to %llu\n",
+ (unsigned long long) currposition,(unsigned long long) (currposition-strlength)));
currposition -= strlength;
fwrite(segment,sizeof(char),strlength,refgenome_fp);
} else {
- debug(printf("Filling with sequence from %lu to %lu-1\n",currposition,currposition+strlength));
+ debug(fprintf(stderr,"Filling with sequence from %llu to %llu-1\n",
+ (unsigned long long) currposition,(unsigned long long) (currposition+strlength)));
fwrite(segment,sizeof(char),strlength,refgenome_fp);
currposition += strlength;
if (currposition > maxposition) {
@@ -592,7 +597,9 @@ fill_circular_chromosomes (UINT4 *genomecomp, Univ_IIT_T chromosome_iit, int cir
chr = Univ_IIT_label(chromosome_iit,indx,&allocp);
/* Add 1U to report 1-based coordinates */
- fprintf(stderr,"Chromosome %s is circular. Copying %lu..%lu to %lu..%lu\n",chr,orig_startpos+1U,orig_endpos+1U,alias_startpos+1U,alias_endpos+1U);
+ fprintf(stderr,"Chromosome %s is circular. Copying %llu..%llu to %llu..%llu\n",
+ chr,(unsigned long long) orig_startpos+1U,(unsigned long long) orig_endpos+1U,
+ (unsigned long long) alias_startpos+1U,(unsigned long long) alias_endpos+1U);
if (allocp) {
FREE(chr);
}
@@ -639,11 +646,11 @@ genome_write_memory (FILE *refgenome_fp, FILE *input,
&truelength,&contigtype,accession,contig_iit);
if (++ncontigs < nmessages) {
if (revcompp == true) {
- fprintf(stderr,"Writing contig %s to universal coordinates %lu..%lu in genome %s\n",
- accession,startposition,endposition,fileroot);
+ fprintf(stderr,"Writing contig %s to universal coordinates %llu..%llu\n",
+ accession,(unsigned long long) startposition,(unsigned long long) endposition);
} else {
- fprintf(stderr,"Writing contig %s to universal coordinates %lu..%lu in genome %s\n",
- accession,startposition+1U,endposition+1U,fileroot);
+ fprintf(stderr,"Writing contig %s to universal coordinates %llu..%llu\n",
+ accession,(unsigned long long) startposition+1U,(unsigned long long) endposition+1U);
}
} else if (ncontigs == nmessages) {
fprintf(stderr,"More than %d contigs. Will stop printing messages\n",nmessages);
@@ -665,7 +672,7 @@ genome_write_memory (FILE *refgenome_fp, FILE *input,
} else {
altstrain_offset = 0;
}
- debug(printf("Setting altstrain_offset to be %d\n",altstrain_offset));
+ debug(fprintf(stderr,"Setting altstrain_offset to be %d\n",altstrain_offset));
#endif
}
@@ -687,7 +694,8 @@ genome_write_memory (FILE *refgenome_fp, FILE *input,
with sufficient X's. */
if (startposition > maxposition) {
/* Start beyond end of file */
- debug(printf("Filling with X's from %lu to %lu-1\n",maxposition,startposition));
+ debug(fprintf(stderr,"Filling with X's from %llu to %llu-1\n",
+ (unsigned long long) maxposition,(unsigned long long) startposition));
fill_x_memory(genomecomp,maxposition,startposition);
if (contigtype > 0) {
@@ -705,13 +713,13 @@ genome_write_memory (FILE *refgenome_fp, FILE *input,
if (contigtype > 0) {
#ifdef ALTSTRAIN
if (rightposition + 1 > maxposition) {
- debug(printf("Filling with X's from %u to %u-1\n",maxposition,rightposition+1));
+ debug(fprintf(stderr,"Filling with X's from %u to %u-1\n",maxposition,rightposition+1));
fill_x_memory(genomecomp,maxposition,rightposition + 1);
maxposition = currposition = rightposition + 1;
}
#endif
} else {
- debug(printf("Moving to %lu\n",startposition));
+ debug(fprintf(stderr,"Moving to %llu\n",(unsigned long long) startposition));
currposition = startposition;
}
}
@@ -736,10 +744,10 @@ genome_write_memory (FILE *refgenome_fp, FILE *input,
/* Write alternate strain */
if (revcompp == true) {
altstrain_offset -= strlen(segment);
- debug(printf("Writing alternate strain at %u\n",altstrain_offset));
+ debug(fprintf(stderr,"Writing alternate strain at %d\n",altstrain_offset));
IIT_backfill_sequence(altstrain_iit,altstrain_index,altstrain_offset,segment);
} else {
- debug(printf("Writing alternate strain at %u\n",altstrain_offset));
+ debug(fprintf(stderr,"Writing alternate strain at %d\n",altstrain_offset));
IIT_backfill_sequence(altstrain_iit,altstrain_index,altstrain_offset,segment);
altstrain_offset += strlen(segment);
}
@@ -747,11 +755,13 @@ genome_write_memory (FILE *refgenome_fp, FILE *input,
} else {
/* Write reference strain */
if (revcompp == true) {
- debug(printf("Filling with sequence from %lu-1 to %lu\n",currposition,currposition-strlen(segment)));
+ debug(fprintf(stderr,"Filling with sequence from %llu-1 to %llu\n",
+ (unsigned long long) currposition,(unsigned long long) currposition-strlen(segment)));
currposition -= strlen(segment);
nbadchars = Compress_update_memory(nbadchars,genomecomp,segment,currposition,currposition+strlen(segment));
} else {
- debug(printf("Filling with sequence from %lu to %lu-1\n",currposition,currposition+strlen(segment)));
+ debug(fprintf(stderr,"Filling with sequence from %llu to %llu-1\n",
+ (unsigned long long) currposition,(unsigned long long) currposition+strlen(segment)));
nbadchars = Compress_update_memory(nbadchars,genomecomp,segment,currposition,currposition+strlen(segment));
currposition += strlen(segment);
if (currposition > maxposition) {
@@ -784,7 +794,7 @@ Genome_write_comp32 (char *genomesubdir, char *fileroot, FILE *input,
Genomecomp_T *genomecomp;
int circular_typeint;
- fprintf(stderr,"Genome length is %lu nt\n",genomelength);
+ fprintf(stderr,"Genome length is %llu nt\n",(unsigned long long) genomelength);
if (uncompressedp == true) {
filename = (char *) CALLOC(strlen(genomesubdir)+strlen("/")+
strlen(fileroot)+strlen(".genome")+1,sizeof(char));
@@ -823,7 +833,8 @@ Genome_write_comp32 (char *genomesubdir, char *fileroot, FILE *input,
sprintf(filename,"%s/%s.genomecomp",genomesubdir,fileroot);
nuint4 = ((genomelength + 31)/32U)*3;
- fprintf(stderr,"Trying to allocate %lu*%lu bytes of memory...",nuint4,sizeof(Genomecomp_T));
+ fprintf(stderr,"Trying to allocate %llu*%d bytes of memory...",
+ (unsigned long long) nuint4,(int) sizeof(Genomecomp_T));
genomecomp = (Genomecomp_T *) CALLOC_NO_EXCEPTION(nuint4,sizeof(Genomecomp_T));
if (genomecomp == NULL) {
fprintf(stderr,"failed. Building genome in file.\n");
diff --git a/src/genome.c b/src/genome.c
index e0d0fe2..94746e2 100644
--- a/src/genome.c
+++ b/src/genome.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: genome.c 145990 2014-08-25 21:47:32Z twu $";
+static char rcsid[] = "$Id: genome.c 153955 2014-11-24 17:54:45Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -10406,7 +10406,7 @@ fill_buffer (Chrnum_T *chrnum, int *nunknowns, T this, Univcoord_T left, Chrpos_
}
gbuffer1[length] = '\0';
- debug(printf("Got sequence at %lu with length %u, forward\n",left,length));
+ debug(printf("Got sequence at %llu with length %u, forward\n",(unsigned long long) left,length));
/* Fix out of bounds resulting from crossing chromosomes */
if (chromosome_iit == NULL) {
@@ -10449,7 +10449,7 @@ fill_buffer (Chrnum_T *chrnum, int *nunknowns, T this, Univcoord_T left, Chrpos_
inbounds_high = high - left;
}
}
- debug(printf("in-bounds at %lu..%lu\n",inbounds_low,inbounds_high));
+ debug(printf("in-bounds at %llu..%llu\n",(unsigned long long) inbounds_low,(unsigned long long) inbounds_high));
for (pos = 0; pos < inbounds_low; pos++) {
gbuffer1[pos] = OUTOFBOUNDS;
*nunknowns += 1;
@@ -10484,11 +10484,7 @@ Genome_fill_buffer_simple (T this, Univcoord_T left, Chrpos_T length, unsigned c
/* Fix out of bounds resulting from negative numbers */
if (left + length < left) {
-#ifdef HAVE_64_BIT
- fprintf(stderr,"left %lu + length %u < left %lu\n",left,length,left);
-#else
- fprintf(stderr,"left %u + length %u < left %u\n",left,length,left);
-#endif
+ fprintf(stderr,"left %llu + length %u < left %llu\n",(unsigned long long) left,length,(unsigned long long) left);
delta = -left;
length -= delta;
for (i = 0; i < delta; i++) {
@@ -10541,7 +10537,7 @@ Genome_fill_buffer_simple (T this, Univcoord_T left, Chrpos_T length, unsigned c
}
gbuffer1[length] = '\0';
- debug(printf("Got sequence at %lu with length %u, forward\n",left,length));
+ debug(printf("Got sequence at %llu with length %u, forward\n",(unsigned long long) left,length));
return;
}
@@ -10608,7 +10604,7 @@ Genome_fill_buffer_simple_alt (T genome, T genomealt, Univcoord_T left, Chrpos_T
/* Fix out of bounds resulting from negative numbers */
if (left + length < left) {
- fprintf(stderr,"left %lu + length %u < left %lu\n",left,length,left);
+ fprintf(stderr,"left %llu + length %u < left %llu\n",(unsigned long long) left,length,(unsigned long long) left);
delta = -left;
length -= delta;
for (i = 0; i < delta; i++) {
@@ -10657,7 +10653,7 @@ Genome_fill_buffer_simple_alt (T genome, T genomealt, Univcoord_T left, Chrpos_T
}
gbuffer1[length] = '\0';
- debug(printf("Got sequence at %lu with length %u, forward\n",left,length));
+ debug(printf("Got sequence at %llu with length %u, forward\n",(unsigned long long) left,length));
return;
}
@@ -11004,11 +11000,11 @@ Genome_get_segment (T this, Univcoord_T left, Chrpos_T length, Univ_IIT_T chromo
if (revcomp == true) {
/* make_complement_buffered(gbuffer2,gbuffer1,length);*/
make_complement_inplace(gbuffer,length);
- debug(printf("Got sequence at %lu with length %u, revcomp\n",left,length));
+ debug(printf("Got sequence at %llu with length %u, revcomp\n",(unsigned long long) left,length));
debug1(Sequence_print(stdout,Sequence_genomic_new(gbuffer,length,/*copyp*/false),false,60,true));
return Sequence_genomic_new(gbuffer,length,/*copyp*/false);
} else {
- debug(printf("Got sequence at %lu with length %u, forward\n",left,length));
+ debug(printf("Got sequence at %llu with length %u, forward\n",(unsigned long long) left,length));
debug1(Sequence_print(stdout,Sequence_genomic_new(gbuffer,length,/*copyp*/false),false,60,true));
return Sequence_genomic_new(gbuffer,length,/*copyp*/false);
}
@@ -11031,11 +11027,11 @@ Genome_get_segment_alt (T this, Univcoord_T left, Chrpos_T length, Univ_IIT_T ch
if (revcomp == true) {
/* make_complement_buffered(gbuffer2,gbuffer1,length); */
make_complement_inplace(gbuffer,length);
- debug(printf("Got sequence at %lu with length %u, revcomp\n",left,length));
+ debug(printf("Got sequence at %llu with length %u, revcomp\n",(unsigned long long) left,length));
debug1(Sequence_print(stdout,Sequence_genomic_new(gbuffer,length,/*copyp*/false),false,60,true));
return Sequence_genomic_new(gbuffer,length,/*copyp*/false);
} else {
- debug(printf("Got sequence at %lu with length %u, forward\n",left,length));
+ debug(printf("Got sequence at %llu with length %u, forward\n",(unsigned long long) left,length));
debug1(Sequence_print(stdout,Sequence_genomic_new(gbuffer1,length,/*copyp*/false),false,60,true));
return Sequence_genomic_new(gbuffer,length,/*copyp*/false);
}
@@ -11056,11 +11052,11 @@ Genome_get_segment_snp (T this, Univcoord_T left, Chrpos_T length, Univ_IIT_T ch
if (revcomp == true) {
/* make_complement_buffered(gbuffer2,gbuffer1,length); */
make_complement_inplace(gbuffer,length);
- debug(printf("Got sequence at %lu with length %u, revcomp\n",left,length));
+ debug(printf("Got sequence at %llu with length %u, revcomp\n",(unsigned long long) left,length));
debug1(Sequence_print(stdout,Sequence_genomic_new(gbuffer,length,/*copyp*/false),false,60,true));
return Sequence_genomic_new(gbuffer,length,/*copyp*/false);
} else {
- debug(printf("Got sequence at %lu with length %u, forward\n",left,length));
+ debug(printf("Got sequence at %llu with length %u, forward\n",(unsigned long long) left,length));
debug1(Sequence_print(stdout,Sequence_genomic_new(gbuffer,length,/*copyp*/false),false,60,true));
return Sequence_genomic_new(gbuffer,length,/*copyp*/false);
}
@@ -11144,7 +11140,7 @@ Genome_ntcounts (Univcoord_T *na, Univcoord_T *nc, Univcoord_T *ng, Univcoord_T
}
}
- debug(printf("Got sequence at %lu with length %u, forward\n",left,length));
+ debug(printf("Got sequence at %llu with length %u, forward\n",(unsigned long long) left,length));
return (*na) + (*nc) + (*ng) + (*nt);
}
diff --git a/src/genome_sites.c b/src/genome_sites.c
index 582fc4c..708e3b9 100644
--- a/src/genome_sites.c
+++ b/src/genome_sites.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: genome_sites.c 145990 2014-08-25 21:47:32Z twu $";
+static char rcsid[] = "$Id: genome_sites.c 153955 2014-11-24 17:54:45Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -24726,7 +24726,7 @@ Genome_print_blocks (Genomecomp_T *blocks, Univcoord_T startpos, Univcoord_T end
/* high: 9F61B62A low: 6D68A157 flags: 00000000 */
printf(" \t");
- printf("%u\t",startblock/3*32U);
+ printf("%llu\t",(unsigned long long) startblock/3*32U);
for (i = 0; i < startdiscard; i++) {
printf("*");
}
@@ -24741,14 +24741,14 @@ Genome_print_blocks (Genomecomp_T *blocks, Univcoord_T startpos, Univcoord_T end
high = blocks[ptr]; low = blocks[ptr+1]; flags = blocks[ptr+2];
#endif
printf("high: %08X low: %08X flags: %08X\t",high,low,flags);
- printf("%u\t",ptr/3*32U);
+ printf("%llu\t",(unsigned long long) ptr/3*32U);
write_chars_comp(high,low,flags);
printf("\n");
}
/* high: 9F61B62A low: 6D68A157 flags: 00000000 */
printf(" \t");
- printf("%u\t",(endblock+3)/3*32U);
+ printf("%llu\t",(unsigned long long) (endblock+3)/3*32U);
for (i = 0; i < enddiscard; i++) {
printf(" ");
}
@@ -24960,9 +24960,9 @@ splicesite_positions (int *site_positions, int *site_knowni, int *knownpos, int
offset = -startdiscard + pos5 + splicepos_offset;
- debug2(printf("left = %u, pos5 = %d, pos3 = %d, startblocki = %u, endblocki = %u\n",
- left,pos5,pos3,startblocki,endblocki));
- debug2(printf("startdiscard = %u, enddiscard = %u\n",startdiscard,enddiscard));
+ debug2(printf("left = %llu, pos5 = %d, pos3 = %d, startblocki = %llu, endblocki = %llu\n",
+ (unsigned long long) left,pos5,pos3,(unsigned long long) startblocki,(unsigned long long) endblocki));
+ debug2(printf("startdiscard = %d, enddiscard = %d\n",startdiscard,enddiscard));
if (endblocki == startblocki) {
/* Advance knownpos past pos5 */
@@ -25013,7 +25013,7 @@ splicesite_positions (int *site_positions, int *site_knowni, int *knownpos, int
}
found -= lowbit;
#endif
- debug2(printf("found is %08X => offset %u + relpos %d\n",found,offset,relpos));
+ debug2(printf("found is %08X => offset %llu + relpos %d\n",found,(unsigned long long) offset,relpos));
}
/* Add knownpos to pos3 */
@@ -25074,7 +25074,7 @@ splicesite_positions (int *site_positions, int *site_knowni, int *knownpos, int
}
found -= lowbit;
#endif
- debug2(printf("found is %08X => offset %u + relpos %d\n",found,offset,relpos));
+ debug2(printf("found is %08X => offset %llu + relpos %d\n",found,(unsigned long long) offset,relpos));
}
ptr = &(ref_blocks[startblocki+3]);
@@ -25090,7 +25090,7 @@ splicesite_positions (int *site_positions, int *site_knowni, int *knownpos, int
}
if (low_halfsite & prev_high_halfsite) {
- debug2(printf("low_halfsite & prev_high_halfsite => offset %u - 1\n",offset));
+ debug2(printf("low_halfsite & prev_high_halfsite => offset %llu - 1\n",(unsigned long long) offset));
pos = offset - 1; /* verified that this should be offset - 1 */
while (*knownpos < pos) {
site_knowni[nfound] = *knowni++;
@@ -25136,7 +25136,7 @@ splicesite_positions (int *site_positions, int *site_knowni, int *knownpos, int
}
found -= lowbit;
#endif
- debug2(printf("found is %08X => offset %u + relpos %d\n",found,offset,relpos));
+ debug2(printf("found is %08X => offset %llu + relpos %d\n",found,(unsigned long long) offset,relpos));
}
ptr += 3;
@@ -25155,7 +25155,7 @@ splicesite_positions (int *site_positions, int *site_knowni, int *knownpos, int
debug2(printf("adding end mask %08x\n",clear_end_mask(enddiscard)));
if (low_halfsite & prev_high_halfsite) {
- debug2(printf("low_halfsite & prev_high_halfsite => offset %u - 1\n",offset));
+ debug2(printf("low_halfsite & prev_high_halfsite => offset %llu - 1\n",(unsigned long long) offset));
pos = offset - 1; /* verified that this should be offset - 1 */
while (*knownpos < pos) {
site_knowni[nfound] = *knowni++;
@@ -25201,7 +25201,7 @@ splicesite_positions (int *site_positions, int *site_knowni, int *knownpos, int
}
found -= lowbit;
#endif
- debug2(printf("found is %08X => offset %u + relpos %d\n",found,offset,relpos));
+ debug2(printf("found is %08X => offset %llu + relpos %d\n",found,(unsigned long long) offset,relpos));
}
/* Add knownpos to pos3 */
@@ -25271,8 +25271,8 @@ prev_dinucleotide_position (Univcoord_T pos, Univcoord_T prevpos,
debug3a(
printf("\n\n");
- printf("Genome (in prev_dinucleotide_position): chroffset %lu, pos %u, prevpos %u\n",
- chroffset,pos-chroffset,prevpos-chroffset);
+ printf("Genome (in prev_dinucleotide_position): chroffset %llu, pos %llu, prevpos %llu\n",
+ (unsigned long long) chroffset,(unsigned long long) (pos-chroffset),(unsigned long long) (prevpos-chroffset));
Genome_print_blocks(ref_blocks,prevpos-chroffset,pos-chroffset);
printf("\n");
);
@@ -25296,8 +25296,9 @@ prev_dinucleotide_position (Univcoord_T pos, Univcoord_T prevpos,
#else
foundpos = offset - (relpos = (top = found >> 16) ? clz_table[top] : 16 + clz_table[found]);
#endif
- debug3a(printf("oneblock: found is %08X => offset %u - relpos %d (%lu) => returning %u\n",
- found,offset,relpos,foundpos-chroffset,foundpos));
+ debug3a(printf("oneblock: found is %08X => offset %llu - relpos %d (%llu) => returning %llu\n",
+ found,(unsigned long long) offset,relpos,(unsigned long long) (foundpos-chroffset),
+ (unsigned long long) foundpos));
return foundpos;
} else {
debug3a(printf("oneblock: not found\n"));
@@ -25320,8 +25321,9 @@ prev_dinucleotide_position (Univcoord_T pos, Univcoord_T prevpos,
#else
foundpos = offset - (relpos = (top = found >> 16) ? clz_table[top] : 16 + clz_table[found]);
#endif
- debug3a(printf("endblock: found is %08X => offset %u - relpos %d (%lu) => returning %u\n",
- found,offset,relpos,foundpos-chroffset,foundpos));
+ debug3a(printf("endblock: found is %08X => offset %llu - relpos %d (%llu) => returning %llu\n",
+ found,(unsigned long long) offset,relpos,(unsigned long long) (foundpos-chroffset),
+ (unsigned long long) foundpos));
return foundpos;
}
@@ -25340,8 +25342,8 @@ prev_dinucleotide_position (Univcoord_T pos, Univcoord_T prevpos,
}
if (high_halfsite & prev_low_halfsite) {
- debug3a(printf("high_halfsite & prev_low_halfsite => offset %u - 1 (%lu)\n",
- offset,offset));
+ debug3a(printf("high_halfsite & prev_low_halfsite => offset %llu - 1 (%llu)\n",
+ (unsigned long long) offset,(unsigned long long) offset));
return offset;
} else if (found != 0U) {
#ifdef HAVE_BUILTIN_CLZ
@@ -25349,8 +25351,9 @@ prev_dinucleotide_position (Univcoord_T pos, Univcoord_T prevpos,
#else
foundpos = offset - (relpos = (top = found >> 16) ? clz_table[top] : 16 + clz_table[found]);
#endif
- debug3a(printf("middleblock: found is %08X => offset %u - relpos %d (%lu) => returning %u\n",
- found,offset,relpos,foundpos-chroffset,foundpos));
+ debug3a(printf("middleblock: found is %08X => offset %llu - relpos %d (%llu) => returning %llu\n",
+ found,(unsigned long long) offset,relpos,(unsigned long long) (foundpos-chroffset),
+ (unsigned long long) foundpos));
return foundpos;
}
@@ -25368,8 +25371,8 @@ prev_dinucleotide_position (Univcoord_T pos, Univcoord_T prevpos,
}
if (high_halfsite & prev_low_halfsite) {
- debug3a(printf("high_halfsite & prev_low_halfsite => offset %u - 1 (%lu) => returning %u\n",
- offset,offset-1-chroffset,offset));
+ debug3a(printf("high_halfsite & prev_low_halfsite => offset %llu - 1 (%llu) => returning %llu\n",
+ (unsigned long long) offset,(unsigned long long) (offset-1-chroffset),(unsigned long long) offset));
return offset;
} else {
startdiscard = prevpos % 32;
@@ -25381,8 +25384,9 @@ prev_dinucleotide_position (Univcoord_T pos, Univcoord_T prevpos,
#else
foundpos = offset - (relpos = (top = found >> 16) ? clz_table[top] : 16 + clz_table[found]);
#endif
- debug3a(printf("startblock: found is %08X => offset %u - relpos %d (%lu) => returning %u\n",
- found,offset,relpos,foundpos-chroffset,foundpos));
+ debug3a(printf("startblock: found is %08X => offset %llu - relpos %d (%llu) => returning %llu\n",
+ found,(unsigned long long) offset,relpos,(unsigned long long) (foundpos-chroffset),
+ (unsigned long long) foundpos));
return foundpos;
} else {
debug3a(printf("startblock: not found\n"));
@@ -25432,9 +25436,9 @@ last_dinucleotide_positions_fwd (int *last_position, Univcoord_T genomicstart,
offset = -startdiscard + pos5 + splicepos_offset;
- debug3(printf("genomicstart = %u, pos5 = %d, pos3 = %d, startblocki = %u, endblocki = %u\n",
- genomicstart,pos5,pos3,startblocki,endblocki));
- debug3(printf("startdiscard = %u, enddiscard = %u\n",startdiscard,enddiscard));
+ debug3(printf("genomicstart = %llu, pos5 = %d, pos3 = %d, startblocki = %llu, endblocki = %llu\n",
+ (unsigned long long) genomicstart,pos5,pos3,(unsigned long long) startblocki,(unsigned long long) endblocki));
+ debug3(printf("startdiscard = %d, enddiscard = %d\n",startdiscard,enddiscard));
if (endblocki == startblocki) {
if (snp_blocks) {
@@ -25468,7 +25472,8 @@ last_dinucleotide_positions_fwd (int *last_position, Univcoord_T genomicstart,
found -= lowbit;
#endif
- debug3(printf("found is %08X => offset %u + relpos %d\n",found,offset,relpos));
+ debug3(printf("found is %08X => offset %llu + relpos %d\n",
+ found,(unsigned long long) offset,relpos));
}
while (i < genomiclength) {
@@ -25511,7 +25516,8 @@ last_dinucleotide_positions_fwd (int *last_position, Univcoord_T genomicstart,
found -= lowbit;
#endif
- debug3(printf("found is %08X => offset %u + relpos %d\n",found,offset,relpos));
+ debug3(printf("found is %08X => offset %llu + relpos %d\n",
+ found,(unsigned long long) offset,relpos));
}
ptr = &(ref_blocks[startblocki+3]);
@@ -25527,7 +25533,7 @@ last_dinucleotide_positions_fwd (int *last_position, Univcoord_T genomicstart,
}
if (low_halfsite & prev_high_halfsite) {
- debug3(printf("low_halfsite & prev_high_halfsite => offset %u\n",offset));
+ debug3(printf("low_halfsite & prev_high_halfsite => offset %llu\n",(unsigned long long) offset));
pos = offset;
while (i < pos) {
last_position[i] = lastpos;
@@ -25557,7 +25563,8 @@ last_dinucleotide_positions_fwd (int *last_position, Univcoord_T genomicstart,
found -= lowbit;
#endif
- debug3(printf("found is %08X => offset %u + relpos %d\n",found,offset,relpos));
+ debug3(printf("found is %08X => offset %llu + relpos %d\n",
+ found,(unsigned long long) offset,relpos));
}
ptr += 3;
@@ -25576,7 +25583,7 @@ last_dinucleotide_positions_fwd (int *last_position, Univcoord_T genomicstart,
debug3(printf("adding end mask %08x\n",clear_end_mask(enddiscard)));
if (low_halfsite & prev_high_halfsite) {
- debug3(printf("low_halfsite & prev_high_halfsite => offset %u\n",offset));
+ debug3(printf("low_halfsite & prev_high_halfsite => offset %llu\n",(unsigned long long) offset));
pos = offset;
while (i < pos) {
last_position[i] = lastpos;
@@ -25606,7 +25613,8 @@ last_dinucleotide_positions_fwd (int *last_position, Univcoord_T genomicstart,
found -= lowbit;
#endif
- debug3(printf("found is %08X => offset %u + relpos %d\n",found,offset,relpos));
+ debug3(printf("found is %08X => offset %llu + relpos %d\n",
+ found,(unsigned long long) offset,relpos));
}
while (i < genomiclength) {
@@ -25644,15 +25652,15 @@ prev_dinucleotide_position_rev (Chrpos_T pos, Chrpos_T prevpos, Univcoord_T chrh
debug3b(
printf("\n\n");
- printf("Genome (in prev_dinucleotide_position_rev): chrhigh %u, pos %u, prevpos %u\n",
- chrhigh,pos,prevpos);
+ printf("Genome (in prev_dinucleotide_position_rev): chrhigh %llu, pos %u, prevpos %u\n",
+ (unsigned long long) chrhigh,pos,prevpos);
Genome_print_blocks(ref_blocks,chrhigh-pos,chrhigh-prevpos);
printf("\n");
);
offset = pos + startdiscard + splicepos_offset;
- debug3b(printf("offset %u = pos %d + startdiscard %d + splicepos_offset %u\n",
- offset,pos,startdiscard,splicepos_offset));
+ debug3b(printf("offset %llu = pos %d + startdiscard %d + splicepos_offset %d\n",
+ (unsigned long long) offset,pos,startdiscard,splicepos_offset));
if (startblocki == endblocki) {
if (snp_blocks) {
@@ -25671,8 +25679,8 @@ prev_dinucleotide_position_rev (Chrpos_T pos, Chrpos_T prevpos, Univcoord_T chrh
#else
foundpos = offset - (relpos = mod_37_bit_position[(lowbit = -found & found) % 37]);
#endif
- debug3b(printf("oneblock: found is %08X => offset %u - relpos %d (%lu)\n",
- found,offset,relpos,chrhigh - foundpos));
+ debug3b(printf("oneblock: found is %08X => offset %llu - relpos %d (%llu)\n",
+ found,(unsigned long long) offset,relpos,(unsigned long long) (chrhigh - foundpos)));
return foundpos;
} else {
return (Chrpos_T) -1;
@@ -25689,8 +25697,8 @@ prev_dinucleotide_position_rev (Chrpos_T pos, Chrpos_T prevpos, Univcoord_T chrh
#else
foundpos = offset - (relpos = mod_37_bit_position[(lowbit = -found & found) % 37]);
#endif
- debug3b(printf("startblock: found is %08X => offset %u - relpos %d (%lu)\n",
- found,offset,relpos,chrhigh-foundpos));
+ debug3b(printf("startblock: found is %08X => offset %llu - relpos %d (%llu)\n",
+ found,(unsigned long long) offset,relpos,(unsigned long long) (chrhigh-foundpos)));
return foundpos;
}
@@ -25709,8 +25717,8 @@ prev_dinucleotide_position_rev (Chrpos_T pos, Chrpos_T prevpos, Univcoord_T chrh
}
if (low_halfsite & prev_high_halfsite) {
- debug3b(printf("low_halfsite & prev_high_halfsite => offset %u + 1 (%lu)\n",
- offset,chrhigh - (offset + 1)));
+ debug3b(printf("low_halfsite & prev_high_halfsite => offset %llu + 1 (%llu)\n",
+ (unsigned long long) offset,(unsigned long long) (chrhigh - (offset + 1))));
return offset + 1;
} else if (found != 0U) {
#ifdef HAVE_BUILTIN_CTZ
@@ -25718,8 +25726,8 @@ prev_dinucleotide_position_rev (Chrpos_T pos, Chrpos_T prevpos, Univcoord_T chrh
#else
foundpos = offset - (relpos = mod_37_bit_position[(lowbit = -found & found) % 37]);
#endif
- debug3b(printf("middleblock: found is %08X => offset %u - relpos %d (%lu)\n",
- found,offset,relpos,chrhigh-foundpos));
+ debug3b(printf("middleblock: found is %08X => offset %llu - relpos %d (%llu)\n",
+ found,(unsigned long long) offset,relpos,(unsigned long long) (chrhigh-foundpos)));
return foundpos;
}
@@ -25737,8 +25745,8 @@ prev_dinucleotide_position_rev (Chrpos_T pos, Chrpos_T prevpos, Univcoord_T chrh
}
if (low_halfsite & prev_high_halfsite) {
- debug3b(printf("low_halfsite & prev_high_halfsite => offset %u + 1 (%lu)\n",
- offset,chrhigh - (offset + 1)));
+ debug3b(printf("low_halfsite & prev_high_halfsite => offset %llu + 1 (%llu)\n",
+ (unsigned long long) offset,(unsigned long long) (chrhigh - (offset + 1))));
return offset + 1;
} else {
enddiscard = (chrhigh-prevpos) % 32;
@@ -25750,8 +25758,8 @@ prev_dinucleotide_position_rev (Chrpos_T pos, Chrpos_T prevpos, Univcoord_T chrh
#else
foundpos = offset - (relpos = mod_37_bit_position[(lowbit = -found & found) % 37]);
#endif
- debug3b(printf("endblock: found is %08X => offset %u - relpos %d (%lu)\n",
- found,offset,relpos,chrhigh-foundpos));
+ debug3b(printf("endblock: found is %08X => offset %llu - relpos %d (%llu)\n",
+ found,(unsigned long long) offset,relpos,(unsigned long long) (chrhigh-foundpos)));
return foundpos;
} else {
return (Chrpos_T) -1;
@@ -25800,7 +25808,7 @@ last_dinucleotide_positions_rev (int *last_position, Univcoord_T genomicstart,
offset = (originblocki - startblocki) * 32U/3 + origindiscard + splicepos_offset;
- debug3(printf("genomicstart = %u, pos5 = %d, pos3 = %d, genomiclength = %d, startblocki = %u, endblocki = %u, originblocki = %u\n",
+ debug3(printf("genomicstart = %llu, pos5 = %d, pos3 = %d, genomiclength = %d, startblocki = %u, endblocki = %u, originblocki = %u\n",
genomicstart,pos5,pos3,genomiclength,startblocki,endblocki,originblocki));
debug3(printf("startdiscard = %u, enddiscard = %u, origindiscard = %u\n",
startdiscard,enddiscard,origindiscard));
@@ -26031,8 +26039,11 @@ Genome_sense_canonicalp (Univcoord_T donor_rightbound, Univcoord_T donor_leftbou
Univcoord_T donorpos, acceptorpos;
Univcoord_T donor_shift, acceptor_shift;
- debug4(printf("Entered Genome_sense_canonicalp with donor %u..%u and acceptor %u..%u\n",
- donor_leftbound-chroffset,donor_rightbound-chroffset,acceptor_leftbound-chroffset,acceptor_rightbound-chroffset));
+ debug4(printf("Entered Genome_sense_canonicalp with donor %llu..%llu and acceptor %llu..%llu\n",
+ (unsigned long long) (donor_leftbound-chroffset),
+ (unsigned long long) (donor_rightbound-chroffset),
+ (unsigned long long) (acceptor_leftbound-chroffset),
+ (unsigned long long) (acceptor_rightbound-chroffset)));
if ((donorpos = prev_dinucleotide_position(donor_rightbound+1,donor_leftbound,
#ifdef DEBUG3A
chroffset,
@@ -26042,7 +26053,8 @@ Genome_sense_canonicalp (Univcoord_T donor_rightbound, Univcoord_T donor_leftbou
} else {
donorpos += 1U; /* Shift coordinates to match input */
donor_shift = donor_rightbound - donorpos;
- debug4(printf("Found donor at %u (shift %u)\n",donorpos-chroffset,donor_shift));
+ debug4(printf("Found donor at %llu (shift %llu)\n",
+ (unsigned long long) (donorpos-chroffset),(unsigned long long) donor_shift));
}
if ((acceptorpos = prev_dinucleotide_position(acceptor_rightbound-1,acceptor_leftbound-2,
@@ -26053,11 +26065,13 @@ Genome_sense_canonicalp (Univcoord_T donor_rightbound, Univcoord_T donor_leftbou
return false;
} else {
acceptor_shift = acceptor_rightbound - acceptorpos;
- debug4(printf("Found acceptor at %u (shift %u)\n",acceptorpos-chroffset,acceptor_shift));
+ debug4(printf("Found acceptor at %llu (shift %llu)\n",
+ (unsigned long long) (acceptorpos-chroffset),(unsigned long long) acceptor_shift));
}
while (1) {
- debug4(printf("sense: donor_shift %lu, acceptor_shift %lu\n",donor_shift,acceptor_shift));
+ debug4(printf("sense: donor_shift %llu, acceptor_shift %llu\n",
+ (unsigned long long) donor_shift,(unsigned long long) acceptor_shift));
if (donor_shift == acceptor_shift) {
debug4(printf("donor prob %f, acceptor prob %f\n",
Maxent_hr_donor_prob(donorpos,chroffset),Maxent_hr_acceptor_prob(acceptorpos,chroffset)));
@@ -26073,7 +26087,8 @@ Genome_sense_canonicalp (Univcoord_T donor_rightbound, Univcoord_T donor_leftbou
} else {
donorpos += 1U; /* Shift coordinates to match input */
donor_shift = donor_rightbound - donorpos;
- debug4(printf("Found donor at %u (shift %u)\n",donorpos-chroffset,donor_shift));
+ debug4(printf("Found donor at %llu (shift %llu)\n",
+ (unsigned long long) (donorpos-chroffset),(unsigned long long) donor_shift));
}
if ((acceptorpos = prev_dinucleotide_position((acceptorpos-1)-1,acceptor_leftbound-2,
@@ -26084,7 +26099,8 @@ Genome_sense_canonicalp (Univcoord_T donor_rightbound, Univcoord_T donor_leftbou
return false;
} else {
acceptor_shift = acceptor_rightbound - acceptorpos;
- debug4(printf("Found acceptor at %u (shift %u)\n",acceptorpos-chroffset,acceptor_shift));
+ debug4(printf("Found acceptor at %llu (shift %llu)\n",
+ (unsigned long long) (acceptorpos-chroffset),(unsigned long long) acceptor_shift));
}
}
@@ -26098,7 +26114,8 @@ Genome_sense_canonicalp (Univcoord_T donor_rightbound, Univcoord_T donor_leftbou
} else {
donorpos += 1U; /* Shift coordinates to match input */
donor_shift = donor_rightbound - donorpos;
- debug4(printf("Found donor at %u (shift %u)\n",donorpos-chroffset,donor_shift));
+ debug4(printf("Found donor at %llu (shift %llu)\n",
+ (unsigned long long) (donorpos-chroffset),(unsigned long long) donor_shift));
}
} else {
if ((acceptorpos = prev_dinucleotide_position((acceptorpos-1)-1,acceptor_leftbound-2,
@@ -26109,7 +26126,8 @@ Genome_sense_canonicalp (Univcoord_T donor_rightbound, Univcoord_T donor_leftbou
return false;
} else {
acceptor_shift = acceptor_rightbound - acceptorpos;
- debug4(printf("Found acceptor at %u (shift %u)\n",acceptorpos-chroffset,acceptor_shift));
+ debug4(printf("Found acceptor at %llu (shift %llu)\n",
+ (unsigned long long) (acceptorpos-chroffset),(unsigned long long) acceptor_shift));
}
}
}
@@ -26122,8 +26140,11 @@ Genome_antisense_canonicalp (Univcoord_T donor_rightbound, Univcoord_T donor_lef
Univcoord_T donorpos, acceptorpos;
Univcoord_T donor_shift, acceptor_shift;
- debug4(printf("Entered Genome_antisense_canonicalp with donor %u..%u and acceptor %u..%u\n",
- donor_leftbound-chroffset,donor_rightbound-chroffset,acceptor_leftbound-chroffset,acceptor_rightbound-chroffset));
+ debug4(printf("Entered Genome_antisense_canonicalp with donor %llu..%llu and acceptor %llu..%llu\n",
+ (unsigned long long) (donor_leftbound-chroffset),
+ (unsigned long long) (donor_rightbound-chroffset),
+ (unsigned long long) (acceptor_leftbound-chroffset),
+ (unsigned long long) (acceptor_rightbound-chroffset)));
if ((donorpos = prev_dinucleotide_position(donor_rightbound-1,donor_leftbound-2,
#ifdef DEBUG3A
chroffset,
@@ -26132,7 +26153,8 @@ Genome_antisense_canonicalp (Univcoord_T donor_rightbound, Univcoord_T donor_lef
return false;
} else {
donor_shift = donor_rightbound - donorpos;
- debug4(printf("Found donor at %u (shift %u)\n",donorpos-chroffset,donor_shift));
+ debug4(printf("Found donor at %llu (shift %llu)\n",
+ (unsigned long long) (donorpos-chroffset),(unsigned long long) donor_shift));
}
if ((acceptorpos = prev_dinucleotide_position(acceptor_rightbound+1,acceptor_leftbound,
@@ -26144,11 +26166,13 @@ Genome_antisense_canonicalp (Univcoord_T donor_rightbound, Univcoord_T donor_lef
} else {
acceptorpos += 1U; /* Shift coordinates to match input */
acceptor_shift = acceptor_rightbound - acceptorpos;
- debug4(printf("Found acceptor at %u (shift %u)\n",acceptorpos-chroffset,acceptor_shift));
+ debug4(printf("Found acceptor at %llu (shift %llu)\n",
+ (unsigned long long) (acceptorpos-chroffset),(unsigned long long) acceptor_shift));
}
while (1) {
- debug4(printf("antisense: donor_shift %lu, acceptor_shift %lu\n",donor_shift,acceptor_shift));
+ debug4(printf("antisense: donor_shift %llu, acceptor_shift %llu\n",
+ (unsigned long long) donor_shift,(unsigned long long) acceptor_shift));
if (donor_shift == acceptor_shift) {
debug4(printf("antidonor prob %f, antiacceptor prob %f\n",
Maxent_hr_antidonor_prob(donorpos,chroffset),Maxent_hr_antiacceptor_prob(acceptorpos,chroffset)));
@@ -26163,7 +26187,8 @@ Genome_antisense_canonicalp (Univcoord_T donor_rightbound, Univcoord_T donor_lef
return false;
} else {
donor_shift = donor_rightbound - donorpos;
- debug4(printf("Found donor at %u (shift %u)\n",donorpos-chroffset,donor_shift));
+ debug4(printf("Found donor at %llu (shift %llu)\n",
+ (unsigned long long) (donorpos-chroffset),(unsigned long long) donor_shift));
}
if ((acceptorpos = prev_dinucleotide_position((acceptorpos+1)-1,acceptor_leftbound,
@@ -26175,7 +26200,8 @@ Genome_antisense_canonicalp (Univcoord_T donor_rightbound, Univcoord_T donor_lef
} else {
acceptorpos += 1U; /* Shift coordinates to match input */
acceptor_shift = acceptor_rightbound - acceptorpos;
- debug4(printf("Found acceptor at %u (shift %u)\n",acceptorpos-chroffset,acceptor_shift));
+ debug4(printf("Found acceptor at %llu (shift %llu)\n",
+ (unsigned long long) (acceptorpos-chroffset),(unsigned long long) acceptor_shift));
}
}
@@ -26188,7 +26214,8 @@ Genome_antisense_canonicalp (Univcoord_T donor_rightbound, Univcoord_T donor_lef
return false;
} else {
donor_shift = donor_rightbound - donorpos;
- debug4(printf("Found donor at %u (shift %u)\n",donorpos-chroffset,donor_shift));
+ debug4(printf("Found donor at %llu (shift %llu)\n",
+ (unsigned long long) (donorpos-chroffset),(unsigned long long) donor_shift));
}
} else {
if ((acceptorpos = prev_dinucleotide_position((acceptorpos+1)-1,acceptor_leftbound,
@@ -26200,7 +26227,8 @@ Genome_antisense_canonicalp (Univcoord_T donor_rightbound, Univcoord_T donor_lef
} else {
acceptorpos += 1U; /* Shift coordinates to match input */
acceptor_shift = acceptor_rightbound - acceptorpos;
- debug4(printf("Found acceptor at %u (shift %u)\n",acceptorpos-chroffset,acceptor_shift));
+ debug4(printf("Found acceptor at %llu (shift %llu)\n",
+ (unsigned long long) (acceptorpos-chroffset),(unsigned long long) acceptor_shift));
}
}
}
diff --git a/src/get-genome.c b/src/get-genome.c
index c8616cf..679f285 100644
--- a/src/get-genome.c
+++ b/src/get-genome.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: get-genome.c 149319 2014-09-30 02:15:42Z twu $";
+static char rcsid[] = "$Id: get-genome.c 153955 2014-11-24 17:54:45Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -195,7 +195,7 @@ print_two_coords (Univcoord_T left, Chrpos_T length, Univ_IIT_T chromosome_iit)
char *chromosome;
Chrpos_T chrpos;
- printf("%lu%s%lu\t",left+1,SEPARATOR,left+length);
+ printf("%llu%s%llu\t",(unsigned long long) left+1,SEPARATOR,(unsigned long long) left+length);
chromosome = Univ_IIT_string_from_position(&chrpos,left,chromosome_iit);
printf("%s:%u\t",chromosome,chrpos+1U);
FREE(chromosome);
@@ -314,8 +314,8 @@ parse_query (char **divstring, Chrpos_T *coordstart, Chrpos_T *coordend, bool *r
FREE(query);
return true;
} else if (Parserange_israngep(&left,&length,&(*revcomp),coords)) {
- debug(printf(" and coords %s as a range starting at %lu with length %u and revcomp = %d\n",
- coords,left,length,*revcomp));
+ debug(printf(" and coords %s as a range starting at %llu with length %u and revcomp = %d\n",
+ coords,(unsigned long long) left,length,*revcomp));
*coordstart = left;
*coordend = left + length;
FREE(query);
@@ -413,7 +413,7 @@ print_sequence (Genome_T genome, Genome_T genomealt, Univcoord_T genomicstart, C
} else {
printf("%s:%u",chromosome2,chrpos);
}
- printf(" %s:%lu%s%lu\n",dbversion,genomicstart+genomiclength,SEPARATOR,genomicstart+1U);
+ printf(" %s:%llu%s%llu\n",dbversion,(unsigned long long) genomicstart+genomiclength,SEPARATOR,(unsigned long long) genomicstart+1U);
FREE(chromosome2);
}
FREE(chromosome1);
@@ -431,7 +431,7 @@ print_sequence (Genome_T genome, Genome_T genomealt, Univcoord_T genomicstart, C
} else {
printf("%s:%u",chromosome2,chrpos+1U);
}
- printf(" %s:%lu%s%lu\n",dbversion,genomicstart+1U,SEPARATOR,genomicstart+genomiclength);
+ printf(" %s:%llu%s%llu\n",dbversion,(unsigned long long) genomicstart+1U,SEPARATOR,(unsigned long long) genomicstart+genomiclength);
FREE(chromosome2);
}
FREE(chromosome1);
@@ -1254,8 +1254,8 @@ main (int argc, char *argv[]) {
debug(printf("coordp is true\n"));
if (Parserange_universal(&segment,&revcomp,&genomicstart,&genomiclength,&chrstart,&chrend,
&chroffset,&chrlength,argv[0],genomesubdir,fileroot) == true) {
- debug(printf("Query %s parsed as: genomicstart = %lu, genomiclength = %u, revcomp = %d\n",
- argv[0],genomicstart,genomiclength,revcomp));
+ debug(printf("Query %s parsed as: genomicstart = %llu, genomiclength = %u, revcomp = %d\n",
+ argv[0],(unsigned long long) genomicstart,genomiclength,revcomp));
print_two_coords(genomicstart,genomiclength,chromosome_iit);
}
@@ -1277,8 +1277,8 @@ main (int argc, char *argv[]) {
if (Parserange_universal(&segment,&revcomp,&genomicstart,&genomiclength,&chrstart,&chrend,
&chroffset,&chrlength,argv[0],genomesubdir,fileroot) == true) {
- debug(printf("Query %s parsed as: genomicstart = %lu, genomiclength = %u, revcomp = %d\n",
- argv[0],genomicstart,genomiclength,revcomp));
+ debug(printf("Query %s parsed as: genomicstart = %llu, genomiclength = %u, revcomp = %d\n",
+ argv[0],(unsigned long long) genomicstart,genomiclength,revcomp));
print_sequence(genome,genomealt,genomicstart,genomiclength,chromosome_iit,
/*whole_chromosome_p*/false);
}
@@ -1322,8 +1322,8 @@ main (int argc, char *argv[]) {
if (force_label_p == false &&
Parserange_universal(&segment,&revcomp,&genomicstart,&genomiclength,&chrstart,&chrend,
&chroffset,&chrlength,argv[0],genomesubdir,fileroot) == true) {
- debug(printf("Query %s parsed as: genomicstart = %lu, genomiclength = %u, revcomp = %d\n",
- argv[0],genomicstart,genomiclength,revcomp));
+ debug(printf("Query %s parsed as: genomicstart = %llu, genomiclength = %u, revcomp = %d\n",
+ argv[0],(unsigned long long) genomicstart,genomiclength,revcomp));
divstring = Univ_IIT_string_from_position(&chrstart,genomicstart,chromosome_iit);
divstring2 = Univ_IIT_string_from_position(&chrend,genomicstart+genomiclength-1U,chromosome_iit);
if (strcmp(divstring,divstring2)) {
@@ -1493,8 +1493,8 @@ main (int argc, char *argv[]) {
if (force_label_p == false &&
Parserange_universal_iit(&segment,&revcomp,&genomicstart,&genomiclength,&chrstart,&chrend,
&chroffset,&chrlength,coords,chromosome_iit,contig_iit) == true) {
- debug(printf("Query %s parsed as: genomicstart = %lu, genomiclength = %u, revcomp = %d\n",
- coords,genomicstart,genomiclength,revcomp));
+ debug(printf("Query %s parsed as: genomicstart = %llu, genomiclength = %u, revcomp = %d\n",
+ coords,(unsigned long long) genomicstart,genomiclength,revcomp));
divstring = Univ_IIT_string_from_position(&chrstart,genomicstart,chromosome_iit);
divstring2 = Univ_IIT_string_from_position(&chrend,genomicstart+genomiclength-1U,chromosome_iit);
if (strcmp(divstring,divstring2)) {
diff --git a/src/gmap.c b/src/gmap.c
index 7324bf6..03aa19f 100644
--- a/src/gmap.c
+++ b/src/gmap.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: gmap.c 150408 2014-10-09 21:55:35Z twu $";
+static char rcsid[] = "$Id: gmap.c 153947 2014-11-24 17:46:05Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -356,6 +356,7 @@ static bool checksump = false;
static int chimera_overlap = 0;
static bool force_xs_direction_p = false;
static bool md_lowercase_variant_p = false;
+static Cigar_action_T cigar_action = CIGAR_ACTION_WARNING;
/* Map file options */
static char *user_mapdir = NULL;
@@ -678,8 +679,8 @@ print_program_version () {
#ifdef PMAP
fprintf(stdout,"Stage 1 index size: %d aa\n",index1part_aa);
#endif
- fprintf(stdout,"Sizes: off_t (%lu), size_t (%lu), unsigned int (%lu), long int (%lu)\n",
- sizeof(off_t),sizeof(size_t),sizeof(unsigned int),sizeof(long int));
+ fprintf(stdout,"Sizes: off_t (%d), size_t (%d), unsigned int (%d), long int (%d), long long int (%d)\n",
+ (int) sizeof(off_t),(int) sizeof(size_t),(int) sizeof(unsigned int),(int) sizeof(long int),(int) sizeof(long long int));
fprintf(stdout,"Default gmap directory (compiled): %s\n",GMAPDB);
genomedir = Datadir_find_genomedir(/*user_genomedir*/NULL);
fprintf(stdout,"Default gmap directory (environment): %s\n",genomedir);
@@ -775,6 +776,8 @@ check_compiler_assumptions () {
#endif
+ fprintf(stderr,"Finished checking compiler assumptions\n");
+
return;
}
@@ -4873,6 +4876,17 @@ main (int argc, char *argv[]) {
force_xs_direction_p = true;
} else if (!strcmp(long_name,"md-lowercase-snp")) {
md_lowercase_variant_p = true;
+ } else if (!strcmp(long_name,"action-if-cigar-error")) {
+ if (!strcmp(optarg,"ignore")) {
+ cigar_action = CIGAR_ACTION_IGNORE;
+ } else if (!strcmp(optarg,"warning")) {
+ cigar_action = CIGAR_ACTION_WARNING;
+ } else if (!strcmp(optarg,"abort")) {
+ cigar_action = CIGAR_ACTION_ABORT;
+ } else {
+ fprintf(stderr,"action-if-cigar-error needs to be ignore, warning, or abort\n");
+ exit(9);
+ }
} else if (!strcmp(long_name,"read-group-id")) {
sam_read_group_id = optarg;
} else if (!strcmp(long_name,"read-group-name")) {
@@ -5924,7 +5938,7 @@ main (int argc, char *argv[]) {
trieoffsets_obs,triecontents_obs,trieoffsets_max,triecontents_max);
Pair_setup(trim_mismatch_score,trim_indel_score,sam_insert_0M_p,
force_xs_direction_p,md_lowercase_variant_p,
- /*snps_p*/genomecomp_alt ? true : false,genomelength);
+ /*snps_p*/genomecomp_alt ? true : false,genomelength,cigar_action);
Stage3_setup(/*splicingp*/novelsplicingp == true || knownsplicingp == true,novelsplicingp,
require_splicedir_p,splicing_iit,splicing_divint_crosstable,
donor_typeint,acceptor_typeint,
@@ -6426,6 +6440,8 @@ Output options\n\
--md-lowercase-snp In MD string, when known SNPs are given by the -v flag,\n\
prints difference nucleotides as lower-case when they,\n\
differ from reference but match a known alternate allele\n\
+ --action-if-cigar-error Action to take if there is a disagreement between CIGAR length and sequence length\n\
+ Allowed values: ignore, warning (default), abort\n\
--read-group-id=STRING Value to put into read-group id (RG-ID) field\n\
--read-group-name=STRING Value to put into read-group name (RG-SM) field\n\
--read-group-library=STRING Value to put into read-group library (RG-LB) field\n\
diff --git a/src/gmapindex.c b/src/gmapindex.c
index 0fa2521..a54a691 100644
--- a/src/gmapindex.c
+++ b/src/gmapindex.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: gmapindex.c 150409 2014-10-09 21:55:59Z twu $";
+static char rcsid[] = "$Id: gmapindex.c 153955 2014-11-24 17:54:45Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -181,6 +181,8 @@ check_compiler_assumptions () {
#endif
+ fprintf(stderr,"Finished checking compiler assumptions\n");
+
return;
}
@@ -417,7 +419,7 @@ process_sequence_aux (Chrpos_T *seglength, Table_T accsegmentpos_table, Table_ch
return false;
}
- nitems = sscanf(Buffer,"%s %s %lu",accession_p,chrpos_string,&universal_coord);
+ nitems = sscanf(Buffer,"%s %s %llu",accession_p,chrpos_string,&universal_coord);
if (nitems < 2) {
fprintf(stderr,"Can't parse line %s\n",Buffer);
exit(1);
@@ -563,6 +565,7 @@ write_chromosome_file (char *genomesubdir, char *fileroot, Table_chrpos_T chrlen
emptystring[0] = '\0';
if (divsort == NO_SORT) {
+ fprintf(stderr,"divsort == NO_SORT\n");
#ifdef HAVE_64_BIT
chroms = (Chrom_T *) Tableuint8_keys_by_timeindex(chrlength_table,0U);
n = Tableuint8_length(chrlength_table);
@@ -588,6 +591,8 @@ write_chromosome_file (char *genomesubdir, char *fileroot, Table_chrpos_T chrlen
default: abort();
}
}
+ fprintf(stderr,"Have a total of %d chromosomes\n",n);
+
/* Write chromosome text file and chrsubset file */
textfile = (char *) CALLOC(strlen(genomesubdir)+strlen("/")+
@@ -616,15 +621,16 @@ write_chromosome_file (char *genomesubdir, char *fileroot, Table_chrpos_T chrlen
chrtypelist = List_push(chrtypelist,"");
for (i = 0; i < n; i++) {
#ifdef HAVE_64_BIT
- chrlength = (Univcoord_T) Tableuint8_get(chrlength_table,chroms[i]);
+ chrlength = (Chrpos_T) Tableuint8_get(chrlength_table,chroms[i]);
#else
- chrlength = (Univcoord_T) Tableuint_get(chrlength_table,chroms[i]);
+ chrlength = (Chrpos_T) Tableuint_get(chrlength_table,chroms[i]);
#endif
- assert(chroffset <= chroffset+chrlength-1);
+ assert(chroffset <= chroffset + (Univcoord_T) chrlength - 1);
chr_string = Chrom_string(chroms[i]);
if (i < nmessages) {
- fprintf(stderr,"Chromosome %s has universal coordinates %lu..%lu\n",
- chr_string,chroffset+1,chroffset+1+chrlength-1);
+ fprintf(stderr,"Chromosome %s has universal coordinates %llu..%llu\n",
+ chr_string,(unsigned long long) chroffset+1,
+ (unsigned long long) chroffset + 1 + (Univcoord_T) chrlength - 1);
} else if (i == nmessages) {
fprintf(stderr,"More than %d contigs. Will stop printing messages\n",nmessages);
}
@@ -634,8 +640,9 @@ write_chromosome_file (char *genomesubdir, char *fileroot, Table_chrpos_T chrlen
fprintf(chrsubsetfp,"+%s\n",chr_string);
}
- fprintf(textfp,"%s\t%lu..%lu\t%u",
- chr_string,chroffset+1,chroffset+chrlength,chrlength);
+ fprintf(textfp,"%s\t%llu..%llu\t%u",
+ chr_string,(unsigned long long) chroffset+1,
+ (unsigned long long) chroffset + (Univcoord_T) chrlength,chrlength);
if (Chrom_circularp(chroms[i]) == true) {
fprintf(textfp,"\tcircular");
typeint = 1;
@@ -644,19 +651,21 @@ write_chromosome_file (char *genomesubdir, char *fileroot, Table_chrpos_T chrlen
}
fprintf(textfp,"\n");
- intervallist = List_push(intervallist,(void *) Univinterval_new(chroffset,chroffset+chrlength-1U,typeint));
+ intervallist = List_push(intervallist,(void *) Univinterval_new(chroffset,chroffset + (Univcoord_T) chrlength - 1U,typeint));
labellist = List_push(labellist,(void *) chr_string);
annotlist = List_push(annotlist,(void *) emptystring); /* No annotations */
+
+ /* Now chrlength_table holds chroffsets, not chrlengths */
#ifdef HAVE_64_BIT
Tableuint8_put(chrlength_table,chroms[i],chroffset);
#else
Tableuint_put(chrlength_table,chroms[i],chroffset);
#endif
if (Chrom_circularp(chroms[i]) == true) {
- chroffset += chrlength;
- chroffset += chrlength;
+ chroffset += (Univcoord_T) chrlength;
+ chroffset += (Univcoord_T) chrlength;
} else {
- chroffset += chrlength;
+ chroffset += (Univcoord_T) chrlength;
}
}
FREE(chroms);
@@ -669,7 +678,7 @@ write_chromosome_file (char *genomesubdir, char *fileroot, Table_chrpos_T chrlen
/* Write chromosome IIT file */
divstring = (char *) CALLOC(1,sizeof(char));
divstring[0] = '\0';
- divlist = List_push(NULL,divstring);
+ divlist = List_push(NULL,(void *) divstring);
intervaltable = Table_new(65522,Table_string_compare,Table_string_hash);
labeltable = Table_new(65522,Table_string_compare,Table_string_hash);
@@ -806,18 +815,18 @@ write_contig_file (char *genomesubdir, char *fileroot, Table_T accsegmentpos_tab
#else
chroffset = (Univcoord_T) Tableuint_get(chrlength_table,chrom);
#endif
- universalpos1 = chroffset + Segmentpos_chrpos1(segmentpos);
- universalpos2 = chroffset + Segmentpos_chrpos2(segmentpos);
+ universalpos1 = chroffset + (Univcoord_T) Segmentpos_chrpos1(segmentpos);
+ universalpos2 = chroffset + (Univcoord_T) Segmentpos_chrpos2(segmentpos);
/* Print as 1-based, inclusive [a,b] */
if (Segmentpos_revcompp(segmentpos) == true) {
- fprintf(textfp,"%s\t%lu..%lu\t%s:%u..%u\t%u",
- accessions[i],universalpos2+1U,universalpos1,
+ fprintf(textfp,"%s\t%llu..%llu\t%s:%u..%u\t%u",
+ accessions[i],(unsigned long long) universalpos2+1U,(unsigned long long) universalpos1,
Chrom_string(chrom),Segmentpos_chrpos2(segmentpos)+1U,Segmentpos_chrpos1(segmentpos),
Segmentpos_length(segmentpos));
} else {
- fprintf(textfp,"%s\t%lu..%lu\t%s:%u..%u\t%u",
- accessions[i],universalpos1+1U,universalpos2,
+ fprintf(textfp,"%s\t%llu..%llu\t%s:%u..%u\t%u",
+ accessions[i],(unsigned long long) universalpos1+1U,(unsigned long long) universalpos2,
Chrom_string(chrom),Segmentpos_chrpos1(segmentpos)+1U,Segmentpos_chrpos2(segmentpos),
Segmentpos_length(segmentpos));
}
@@ -1238,7 +1247,7 @@ main (int argc, char *argv[]) {
contigtypelist = List_push(NULL,typestring);
ncontigs = 0;
- totalnts = 0U;
+ totalnts = 0;
while (process_sequence_aux(&seglength,accsegmentpos_table,chrlength_table,chrorder_table,fileroot,ncontigs) == true) {
if (totalnts + seglength < totalnts) {
/* Exceeds 32 bits */
@@ -1249,7 +1258,7 @@ main (int argc, char *argv[]) {
}
ncontigs++;
}
- fprintf(stderr,"Total genomic length = %lu bp\n",totalnts);
+ fprintf(stderr,"Total genomic length = %llu bp\n",(unsigned long long) totalnts);
if (ncontigs == 0) {
fprintf(stderr,"No contig information was provided to gmapindex\n");
@@ -1257,7 +1266,7 @@ main (int argc, char *argv[]) {
}
#ifdef HAVE_64_BIT
- if (totalnts > 4294967295U) {
+ if (totalnts > 4294967295) {
coord_values_8p = true;
} else {
coord_values_8p = false;
@@ -1350,7 +1359,7 @@ main (int argc, char *argv[]) {
noffsets = Indexdb_count_offsets(stdin,chromosome_iit,index1part,index1interval,
genome_lc_p,fileroot,mask_lowercase_p);
- printf("%lu\n",noffsets);
+ printf("%llu\n",(unsigned long long) noffsets);
Univ_IIT_free(&chromosome_iit);
#endif
@@ -1477,6 +1486,7 @@ main (int argc, char *argv[]) {
genomelength = Univ_IIT_genomelength(chromosome_iit,/*with_circular_alias_p*/true);
Univ_IIT_free(&chromosome_iit);
+ fprintf(stderr,"Genome length is %llu\n",(unsigned long long) genomelength);
if (genomelength > 4294967295) {
fprintf(stderr,"Suffix arrays not yet supported for large genomes with more than 2^32 bp. Will use hash table only.\n");
diff --git a/src/gsnap.c b/src/gsnap.c
index 3631282..ef74b93 100644
--- a/src/gsnap.c
+++ b/src/gsnap.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: gsnap.c 150408 2014-10-09 21:55:35Z twu $";
+static char rcsid[] = "$Id: gsnap.c 153947 2014-11-24 17:46:05Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -96,6 +96,7 @@ static char rcsid[] = "$Id: gsnap.c 150408 2014-10-09 21:55:35Z twu $";
#include "samprint.h" /* For SAM_setup */
#include "stage3.h" /* To get EXTRAQUERYGAP */
+#include "pair.h" /* For Cigar_action_T */
#include "getopt.h"
@@ -391,6 +392,7 @@ static char *sam_read_group_platform = NULL;
static bool force_xs_direction_p = false;
static bool md_lowercase_variant_p = false;
static bool hide_soft_clips_p = false;
+static Cigar_action_T cigar_action = CIGAR_ACTION_WARNING;
/* Goby */
@@ -540,7 +542,8 @@ static struct option long_options[] = {
{"read-group-platform", required_argument, 0, 0}, /* sam_read_group_platform */
{"force-xs-dir", no_argument, 0, 0}, /* force_xs_direction_p */
{"md-lowercase-snp", no_argument, 0, 0}, /* md_lowercase_variant_p */
- {"hide-soft-clips", no_argument, 0, 0}, /* hide_soft_clips_p */
+ {"extend-soft-clips", no_argument, 0, 0}, /* hide_soft_clips_p */
+ {"action-if-cigar-error", required_argument, 0, 0}, /* cigar_action */
{"noexceptions", no_argument, 0, '0'}, /* exception_raise_p */
{"maxsearch", required_argument, 0, 0}, /* maxpaths_search */
@@ -680,8 +683,8 @@ print_program_version () {
fprintf(stdout,"\n");
- fprintf(stdout,"Sizes: off_t (%lu), size_t (%lu), unsigned int (%lu), long int (%lu)\n",
- sizeof(off_t),sizeof(size_t),sizeof(unsigned int),sizeof(long int));
+ fprintf(stdout,"Sizes: off_t (%d), size_t (%d), unsigned int (%d), long int (%d), long long int (%d)\n",
+ (int) sizeof(off_t),(int) sizeof(size_t),(int) sizeof(unsigned int),(int) sizeof(long int),(int) sizeof(long long int));
fprintf(stdout,"Default gmap directory (compiled): %s\n",GMAPDB);
genomedir = Datadir_find_genomedir(/*user_genomedir*/NULL);
fprintf(stdout,"Default gmap directory (environment): %s\n",genomedir);
@@ -779,6 +782,8 @@ check_compiler_assumptions () {
#endif
+ fprintf(stderr,"Finished checking compiler assumptions\n");
+
return;
}
@@ -1836,8 +1841,19 @@ main (int argc, char *argv[]) {
force_xs_direction_p = true;
} else if (!strcmp(long_name,"md-lowercase-snp")) {
md_lowercase_variant_p = true;
- } else if (!strcmp(long_name,"hide-soft-clips")) {
+ } else if (!strcmp(long_name,"extend-soft-clips")) {
hide_soft_clips_p = true;
+ } else if (!strcmp(long_name,"action-if-cigar-error")) {
+ if (!strcmp(optarg,"ignore")) {
+ cigar_action = CIGAR_ACTION_IGNORE;
+ } else if (!strcmp(optarg,"warning")) {
+ cigar_action = CIGAR_ACTION_WARNING;
+ } else if (!strcmp(optarg,"abort")) {
+ cigar_action = CIGAR_ACTION_ABORT;
+ } else {
+ fprintf(stderr,"action-if-cigar-error needs to be ignore, warning, or abort\n");
+ exit(9);
+ }
} else if (!strcmp(long_name,"read-group-id")) {
sam_read_group_id = optarg;
} else if (!strcmp(long_name,"read-group-name")) {
@@ -2979,7 +2995,8 @@ main (int argc, char *argv[]) {
Pair_setup(trim_mismatch_score,trim_indel_score,sam_insert_0M_p,
force_xs_direction_p,md_lowercase_variant_p,
/*snps_p*/snps_iit ? true : false,
- Univ_IIT_genomelength(chromosome_iit,/*with_circular_alias*/false));
+ Univ_IIT_genomelength(chromosome_iit,/*with_circular_alias*/false),
+ cigar_action);
Stage3_setup(/*splicingp*/novelsplicingp == true || knownsplicingp == true,novelsplicingp,
/*require_splicedir_p*/true,splicing_iit,splicing_divint_crosstable,
donor_typeint,acceptor_typeint,
@@ -3623,6 +3640,9 @@ is still designed to be fast.\n\
--md-lowercase-snp In MD string, when known SNPs are given by the -v flag,\n\
prints difference nucleotides as lower-case when they,\n\
differ from reference but match a known alternate allele\n\
+ --extend-soft-clips Extends alignments through soft clipped regions\n\
+ --action-if-cigar-error Action to take if there is a disagreement between CIGAR length and sequence length\n\
+ Allowed values: ignore, warning (default), abort\n\
--read-group-id=STRING Value to put into read-group id (RG-ID) field\n\
--read-group-name=STRING Value to put into read-group name (RG-SM) field\n\
--read-group-library=STRING Value to put into read-group library (RG-LB) field\n\
diff --git a/src/iit-read-univ.c b/src/iit-read-univ.c
index 8a104c9..1f1ca44 100644
--- a/src/iit-read-univ.c
+++ b/src/iit-read-univ.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: iit-read-univ.c 149319 2014-09-30 02:15:42Z twu $";
+static char rcsid[] = "$Id: iit-read-univ.c 153955 2014-11-24 17:54:45Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -506,7 +506,7 @@ Univ_IIT_dump (T this) {
startpos = Univinterval_low(interval);
endpos = startpos + Univinterval_length(interval) - 1U;
- printf(" %lu..%lu",startpos,endpos);
+ printf(" %llu..%llu",(unsigned long long) startpos,(unsigned long long) endpos);
if (Univinterval_type(interval) > 0) {
printf(" %s",Univ_IIT_typestring(this,Univinterval_type(interval)));
@@ -544,9 +544,9 @@ Univ_IIT_dump_table (T this, bool zerobasedp) {
endpos = startpos + Univinterval_length(interval) - 1U;
if (zerobasedp) {
- printf("%lu..%lu\t",startpos,endpos);
+ printf("%llu..%llu\t",(unsigned long long) startpos,(unsigned long long) endpos);
} else {
- printf("%lu..%lu\t",startpos+1,endpos+1);
+ printf("%llu..%llu\t",(unsigned long long) startpos+1,(unsigned long long) endpos+1);
}
printf("%u",Univinterval_length(interval));
@@ -719,16 +719,16 @@ Univ_IIT_dump_contigs (T this, T chromosome_iit, bool directionalp) {
chrstring = Univ_IIT_label(chromosome_iit,chromosome_index,&allocp);
if (directionalp == false) {
- printf("%lu..%lu\t",startpos+1U,endpos+1U);
+ printf("%llu..%llu\t",(unsigned long long) startpos+1U,(unsigned long long) endpos+1U);
printf("%s:%u..%u\t",chrstring,chrstart+1U,chrend+1U);
} else {
firstchar = Univ_IIT_annotation_firstchar(this,index+1);
if (firstchar == '-') {
- printf("%lu..%lu\t",endpos+1U,startpos+1U);
+ printf("%llu..%llu\t",(unsigned long long) endpos+1U,(unsigned long long) startpos+1U);
printf("%s:%u..%u\t",chrstring,chrend+1U,chrstart+1U);
} else {
- printf("%lu..%lu\t",startpos+1U,endpos+1U);
+ printf("%llu..%llu\t",(unsigned long long) startpos+1U,(unsigned long long) endpos+1U);
printf("%s:%u..%u\t",chrstring,chrstart+1U,chrend+1U);
}
}
@@ -800,30 +800,25 @@ Univ_IIT_free (T *old) {
}
-#ifdef HAVE_FSEEKO
static void
move_relative (FILE *fp, off_t offset) {
+
+#ifdef HAVE_FSEEKO
if (fseeko(fp,offset,SEEK_CUR) < 0) {
fprintf(stderr,"Error in move_relative, seek\n");
abort();
}
- return;
-}
-
#else
-
-static void
-move_relative (FILE *fp, long int offset) {
- if (fseek(fp,offset,SEEK_CUR) < 0) {
+ if (fseek(fp,(long) offset,SEEK_CUR) < 0) {
fprintf(stderr,"Error in move_relative, seek\n");
abort();
}
+#endif
+
return;
}
-#endif
-
static off_t
read_tree_univ (off_t offset, off_t filesize, FILE *fp, char *filename, T new) {
@@ -832,8 +827,8 @@ read_tree_univ (off_t offset, off_t filesize, FILE *fp, char *filename, T new) {
UINT4 uint4;
if ((offset += sizeof(int)*(new->total_nintervals+1)) > filesize) {
- fprintf(stderr,"IIT file %s has an invalid binary format -- offset is too large (offset after sigmas %lu, filesize %lu). Did you generate it using iit_store?\n",
- filename,(unsigned long) offset,(unsigned long) filesize);
+ fprintf(stderr,"IIT file %s has an invalid binary format -- offset is too large (offset after sigmas %lld, filesize %lld). Did you generate it using iit_store?\n",
+ filename,(long long int) offset,(long long int) filesize);
exit(9);
} else {
new->sigmas = (int *) CALLOC(new->total_nintervals+1,sizeof(int));
@@ -844,8 +839,8 @@ read_tree_univ (off_t offset, off_t filesize, FILE *fp, char *filename, T new) {
}
if ((offset += sizeof(int)*(new->total_nintervals+1)) > filesize) {
- fprintf(stderr,"IIT file %s has an invalid binary format -- offset is too large (offset after omegas %lu, filesize %lu). Did you generate it using iit_store?\n",
- filename,(unsigned long) offset,(unsigned long) filesize);
+ fprintf(stderr,"IIT file %s has an invalid binary format -- offset is too large (offset after omegas %lld, filesize %lld). Did you generate it using iit_store?\n",
+ filename,(long long int) offset,(long long int) filesize);
exit(9);
} else {
new->omegas = (int *) CALLOC(new->total_nintervals+1,sizeof(int));
@@ -892,7 +887,7 @@ read_tree_univ (off_t offset, off_t filesize, FILE *fp, char *filename, T new) {
FREAD_INT(&(new->nodes[i].b),fp);
FREAD_INT(&(new->nodes[i].leftindex),fp);
FREAD_INT(&(new->nodes[i].rightindex),fp);
- printf("i %d, node value %lu\n",i,new->nodes[i].value);
+ printf("i %d, node value %llu\n",i,(unsigned long long) new->nodes[i].value);
}
offset += (sizeof(UINT8)+sizeof(int)+sizeof(int)+sizeof(int)+sizeof(int))*new->nnodes;
#endif
@@ -909,8 +904,8 @@ read_tree_univ (off_t offset, off_t filesize, FILE *fp, char *filename, T new) {
}
#endif
if (offset > filesize) {
- fprintf(stderr,"IIT file %s has an invalid binary format -- offset is too large (offset after nodes %lu, filesize %lu). Did you generate it using iit_store?\n",
- filename,(unsigned long) offset,(unsigned long) filesize);
+ fprintf(stderr,"IIT file %s has an invalid binary format -- offset is too large (offset after nodes %lld, filesize %lld). Did you generate it using iit_store?\n",
+ filename,(long long int) offset,(long long int) filesize);
exit(9);
}
}
@@ -962,8 +957,8 @@ read_intervals_univ (off_t offset, off_t filesize, FILE *fp, char *filename, T n
}
#endif
if (offset > filesize) {
- fprintf(stderr,"IIT file %s has an invalid binary format -- offset is too large (offset after intervals %lu, filesize %lu). Did you generate it using iit_store?\n",
- filename,(unsigned long) offset,(unsigned long) filesize);
+ fprintf(stderr,"IIT file %s has an invalid binary format -- offset is too large (offset after intervals %lld, filesize %lld). Did you generate it using iit_store?\n",
+ filename,(long long int) offset,(long long int) filesize);
exit(9);
}
@@ -1007,14 +1002,14 @@ read_words (off_t offset, off_t filesize, FILE *fp, T new) {
debug1(printf("Starting read of labelorder offset/length\n"));
new->labelorder_offset = offset;
new->labelorder_length = (size_t) (new->total_nintervals*sizeof(int));
- /* fprintf(stderr,"Doing a move_relative for labelorder_length %lu\n",new->labelorder_length); */
+ /* fprintf(stderr,"Doing a move_relative for labelorder_length %zu\n",new->labelorder_length); */
move_relative(fp,new->labelorder_length);
offset += new->labelorder_length;
debug1(printf("Starting read of labelpointer offset/length\n"));
new->labelpointers_offset = offset;
new->labelpointers_length = (size_t) ((new->total_nintervals+1)*sizeof(UINT4));
- /* fprintf(stderr,"Doing a move_relative for labelpointer %lu\n",new->total_nintervals * sizeof(UINT4)); */
+ /* fprintf(stderr,"Doing a move_relative for labelpointer %zu\n",new->total_nintervals * sizeof(UINT4)); */
move_relative(fp,new->total_nintervals * sizeof(UINT4));
FREAD_UINT(&length,fp);
new->label_length = (size_t) length;
@@ -1023,7 +1018,7 @@ read_words (off_t offset, off_t filesize, FILE *fp, T new) {
debug1(printf("Starting read of label offset/length\n"));
new->label_offset = offset;
/* new->label_length computed above */
- /* fprintf(stderr,"Doing a move_relative for label_length %lu\n",new->label_length); */
+ /* fprintf(stderr,"Doing a move_relative for label_length %zu\n",new->label_length); */
move_relative(fp,new->label_length);
offset += new->label_length;
@@ -1042,15 +1037,15 @@ read_words (off_t offset, off_t filesize, FILE *fp, T new) {
fprintf(stderr,"Incorrect length: %u\n",length);
#else
new->annot_length = filesize - new->annot_offset;
- /* fprintf(stderr,"annot_length: %lu\n",new->annot_length); */
+ /* fprintf(stderr,"annot_length: %zu\n",new->annot_length); */
#endif
#if 0
/* To do this check, we need to get stringlen for annotation similarly to that for labels */
last_offset = offset + sizeof(char)*stringlen;
if (last_offset != filesize) {
- fprintf(stderr,"Problem with last_offset (%lu) not equal to filesize = (%lu)\n",
- (unsigned long) last_offset,(unsigned long) filesize);
+ fprintf(stderr,"Problem with last_offset (%lld) not equal to filesize = (%lld)\n",
+ (long long int) last_offset,(long long int) filesize);
exit(9);
}
#endif
@@ -1102,7 +1097,7 @@ read_words_debug (off_t offset, off_t filesize, FILE *fp, T new) {
new->label_length = (size_t) length;
offset += new->labelpointers_length;
- fprintf(stderr,"label_length: %lu\n",new->label_length);
+ fprintf(stderr,"label_length: %zu\n",new->label_length);
debug1(printf("Starting read of label offset/length\n"));
new->label_offset = offset;
/* new->label_length computed above */
@@ -1124,15 +1119,15 @@ read_words_debug (off_t offset, off_t filesize, FILE *fp, T new) {
fprintf(stderr,"Incorrect length: %u\n",length);
#else
new->annot_length = filesize - new->annot_offset;
- fprintf(stderr,"annot_length: %lu\n",new->annot_length);
+ fprintf(stderr,"annot_length: %zu\n",new->annot_length);
#endif
#if 0
/* To do this check, we need to get stringlen for annotation similarly to that for labels */
last_offset = offset + sizeof(char)*stringlen;
if (last_offset != filesize) {
- fprintf(stderr,"Problem with last_offset (%lu) not equal to filesize = (%lu)\n",
- (unsigned long) last_offset,(unsigned long) filesize);
+ fprintf(stderr,"Problem with last_offset (%lld) not equal to filesize = (%lld)\n",
+ (long long int) last_offset,(long long int) filesize);
exit(9);
}
#endif
@@ -1292,8 +1287,8 @@ Univ_IIT_read (char *filename, bool readonlyp, bool add_iit_p) {
fprintf(stderr,"IIT file %s appears to be empty\n",filename);
return NULL;
} else if ((offset += sizeof(int)) > filesize) {
- fprintf(stderr,"IIT file %s has an invalid binary format -- offset is too large (offset after first byte %lu, filesize %lu). Did you generate it using iit_store?\n",
- filename,(unsigned long) offset,(unsigned long) filesize);
+ fprintf(stderr,"IIT file %s has an invalid binary format -- offset is too large (offset after first byte %lld, filesize %lld). Did you generate it using iit_store?\n",
+ filename,(long long int) offset,(long long int) filesize);
return NULL;
}
@@ -1333,8 +1328,8 @@ Univ_IIT_read (char *filename, bool readonlyp, bool add_iit_p) {
fprintf(stderr,"IIT file %s appears to have a negative number of types\n",filename);
return NULL;
} else if ((offset += sizeof(int)) > filesize) {
- fprintf(stderr,"IIT file %s has an invalid binary format -- offset is too large (offset after ntypes %lu, filesize %lu). Did you generate it using iit_store?\n",
- filename,(unsigned long) offset,(unsigned long) filesize);
+ fprintf(stderr,"IIT file %s has an invalid binary format -- offset is too large (offset after ntypes %lld, filesize %lld). Did you generate it using iit_store?\n",
+ filename,(long long int) offset,(long long int) filesize);
return NULL;
}
debug(printf("ntypes: %d\n",new->ntypes));
@@ -1346,8 +1341,8 @@ Univ_IIT_read (char *filename, bool readonlyp, bool add_iit_p) {
fprintf(stderr,"IIT file %s appears to have a negative number of nodes\n",filename);
return NULL;
} else if ((offset += sizeof(int)) > filesize) {
- fprintf(stderr,"IIT file %s has an invalid binary format -- offset is too large (offset after nnodes %lu, filesize %lu). Did you generate it using iit_store?\n",
- filename,(unsigned long) offset,(unsigned long) filesize);
+ fprintf(stderr,"IIT file %s has an invalid binary format -- offset is too large (offset after nnodes %lld, filesize %lld). Did you generate it using iit_store?\n",
+ filename,(long long int) offset,(long long int) filesize);
return NULL;
}
@@ -1419,8 +1414,8 @@ Univ_IIT_debug (char *filename) {
fprintf(stderr,"IIT file %s appears to be empty\n",filename);
return;
} else if ((offset += sizeof(int)) > filesize) {
- fprintf(stderr,"IIT file %s has an invalid binary format -- offset is too large (offset after first byte %lu, filesize %lu). Did you generate it using iit_store?\n",
- filename,(unsigned long) offset,(unsigned long) filesize);
+ fprintf(stderr,"IIT file %s has an invalid binary format -- offset is too large (offset after first byte %lld, filesize %lld). Did you generate it using iit_store?\n",
+ filename,(long long int) offset,(long long int) filesize);
return;
}
@@ -1452,8 +1447,8 @@ Univ_IIT_debug (char *filename) {
fprintf(stderr,"IIT file %s appears to have a negative number of types\n",filename);
return;
} else if ((offset += sizeof(int)) > filesize) {
- fprintf(stderr,"IIT file %s has an invalid binary format -- offset is too large (offset after ntypes %lu, filesize %lu). Did you generate it using iit_store?\n",
- filename,(unsigned long) offset,(unsigned long) filesize);
+ fprintf(stderr,"IIT file %s has an invalid binary format -- offset is too large (offset after ntypes %lld, filesize %lld). Did you generate it using iit_store?\n",
+ filename,(long long int) offset,(long long int) filesize);
return;
}
printf("ntypes: %d\n",new->ntypes);
@@ -1465,8 +1460,8 @@ Univ_IIT_debug (char *filename) {
fprintf(stderr,"IIT file %s appears to have a negative number of nodes\n",filename);
return;
} else if ((offset += sizeof(int)) > filesize) {
- fprintf(stderr,"IIT file %s has an invalid binary format -- offset is too large (offset after nnodes %lu, filesize %lu). Did you generate it using iit_store?\n",
- filename,(unsigned long) offset,(unsigned long) filesize);
+ fprintf(stderr,"IIT file %s has an invalid binary format -- offset is too large (offset after nnodes %lld, filesize %lld). Did you generate it using iit_store?\n",
+ filename,(long long int) offset,(long long int) filesize);
return;
}
@@ -1521,11 +1516,11 @@ fnode_query_aux (int *min, int *max, T this, int nodeindex, Univcoord_T x) {
}
node = &(this->nodes[nodeindex]);
- debug(printf("Entered fnode_query_aux with nodeindex %d: a %d, b %d, leftindex %d, rightindex %d, value %lu\n",
- nodeindex,node->a,node->b,node->leftindex,node->rightindex,node->value));
+ debug(printf("Entered fnode_query_aux with nodeindex %d: a %d, b %d, leftindex %d, rightindex %d, value %llu\n",
+ nodeindex,node->a,node->b,node->leftindex,node->rightindex,(unsigned long long) node->value));
if (x == node->value) {
- debug(printf("%luD:\n",node->value));
+ debug(printf("%lluD:\n",(unsigned long long) node->value));
if (node->a < *min) {
*min = node->a;
}
@@ -1534,9 +1529,9 @@ fnode_query_aux (int *min, int *max, T this, int nodeindex, Univcoord_T x) {
}
return;
} else if (x < node->value) {
- debug(printf("x %lu < node->value %lu\n",x,node->value));
+ debug(printf("x %llu < node->value %llu\n",(unsigned long long) x,(unsigned long long) node->value));
fnode_query_aux(&(*min),&(*max),this,node->leftindex,x);
- debug(printf("%luL:\n",node->value));
+ debug(printf("%lluL:\n",(unsigned long long) node->value));
if (node->a < *min) {
*min = node->a;
}
@@ -1554,9 +1549,9 @@ fnode_query_aux (int *min, int *max, T this, int nodeindex, Univcoord_T x) {
return;
} else {
/* (node->value < x) */
- debug(printf("x %lu > node->value %lu\n",x,node->value));
+ debug(printf("x %llu > node->value %llu\n",(unsigned long long) x,(unsigned long long) node->value));
fnode_query_aux(&(*min),&(*max),this,node->rightindex,x);
- debug(printf("%luR:\n", node->value));
+ debug(printf("%lluR:\n",(unsigned long long) node->value));
if (node->b > *max) {
*max = node->b;
}
@@ -1816,7 +1811,7 @@ Univ_IIT_get_one (T this, Univcoord_T x, Univcoord_T y) {
min1 = min2 = this->total_nintervals + 1;
- debug(printf("Entering Univ_IIT_get_one with query %lu %lu\n",x,y));
+ debug(printf("Entering Univ_IIT_get_one with query %llu %llu\n",(unsigned long long) x,(unsigned long long) y));
fnode_query_aux(&min1,&max1,this,0,x);
fnode_query_aux(&min2,&max2,this,0,y);
debug(printf("min1=%d max1=%d min2=%d max2=%d\n",min1,max1,min2,max2));
diff --git a/src/iit-read.c b/src/iit-read.c
index cd3746c..a95e60f 100644
--- a/src/iit-read.c
+++ b/src/iit-read.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: iit-read.c 126795 2014-02-12 00:59:39Z twu $";
+static char rcsid[] = "$Id: iit-read.c 153955 2014-11-24 17:54:45Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -1464,29 +1464,25 @@ IIT_free (T *old) {
}
-#ifdef HAVE_FSEEKO
static void
move_relative (FILE *fp, off_t offset) {
+
+#ifdef HAVE_FSEEKO
if (fseeko(fp,offset,SEEK_CUR) < 0) {
fprintf(stderr,"Error in move_relative, seek\n");
abort();
}
- return;
-}
-
#else
-
-static void
-move_relative (FILE *fp, long int offset) {
- if (fseek(fp,offset,SEEK_CUR) < 0) {
+ if (fseek(fp,(long) offset,SEEK_CUR) < 0) {
fprintf(stderr,"Error in move_relative, seek\n");
abort();
}
+#endif
+
return;
}
-#endif
static off_t
skip_trees (off_t offset, off_t filesize, FILE *fp, char *filename,
@@ -1499,8 +1495,8 @@ skip_trees (off_t offset, off_t filesize, FILE *fp, char *filename,
skipsize += skip_nnodes * sizeof(struct FNode_T);
if ((offset += skipsize) > filesize) {
- fprintf(stderr,"IIT file %s has an invalid binary format -- offset is too large (offset after skip_trees %lu, filesize %lu). Did you generate it using iit_store?\n",
- filename,(unsigned long) offset,(unsigned long) filesize);
+ fprintf(stderr,"IIT file %s has an invalid binary format -- offset is too large (offset after skip_trees %jd, filesize %jd). Did you generate it using iit_store?\n",
+ filename,offset,filesize);
exit(9);
} else {
move_relative(fp,skipsize);
@@ -1526,26 +1522,26 @@ read_tree (off_t offset, off_t filesize, FILE *fp, char *filename, T new, int di
} else {
if ((offset += sizeof(int)*(new->nintervals[divno]+1)) > filesize) {
- fprintf(stderr,"IIT file %s has an invalid binary format -- offset is too large (offset after alphas %lu, filesize %lu). Did you generate it using iit_store?\n",
- filename,(unsigned long) offset,(unsigned long) filesize);
+ fprintf(stderr,"IIT file %s has an invalid binary format -- offset is too large (offset after alphas %lld, filesize %lld). Did you generate it using iit_store?\n",
+ filename,(long long int) offset,(long long int) filesize);
exit(9);
} else {
new->alphas[divno] = (int *) CALLOC(new->nintervals[divno]+1,sizeof(int));
if ((items_read = FREAD_INTS(new->alphas[divno],new->nintervals[divno]+1,fp)) != (unsigned int) new->nintervals[divno] + 1) {
- fprintf(stderr,"IIT file %s appears to be truncated. items_read = %lu\n",
- filename,(unsigned long) items_read);
+ fprintf(stderr,"IIT file %s appears to be truncated. items_read = %lld\n",
+ filename,(long long int) items_read);
exit(9);
}
}
if ((offset += sizeof(int)*(new->nintervals[divno]+1)) > filesize) {
- fprintf(stderr,"IIT file %s has an invalid binary format -- offset is too large (offset after betas %lu, filesize %lu). Did you generate it using iit_store?\n",
- filename,(unsigned long) offset,(unsigned long) filesize);
+ fprintf(stderr,"IIT file %s has an invalid binary format -- offset is too large (offset after betas %lld, filesize %lld). Did you generate it using iit_store?\n",
+ filename,(long long int) offset,(long long int) filesize);
exit(9);
} else {
new->betas[divno] = (int *) CALLOC(new->nintervals[divno]+1,sizeof(int));
if ((items_read = FREAD_INTS(new->betas[divno],new->nintervals[divno]+1,fp)) != (unsigned int) new->nintervals[divno] + 1) {
- fprintf(stderr,"IIT file %s appears to be truncated. items_read = %lu\n",filename,items_read);
+ fprintf(stderr,"IIT file %s appears to be truncated. items_read = %zu\n",filename,items_read);
exit(9);
}
#if 0
@@ -1561,8 +1557,8 @@ read_tree (off_t offset, off_t filesize, FILE *fp, char *filename, T new, int di
}
if ((offset += sizeof(int)*(new->nintervals[divno]+1)) > filesize) {
- fprintf(stderr,"IIT file %s has an invalid binary format -- offset is too large (offset after sigmas %lu, filesize %lu). Did you generate it using iit_store?\n",
- filename,(unsigned long) offset,(unsigned long) filesize);
+ fprintf(stderr,"IIT file %s has an invalid binary format -- offset is too large (offset after sigmas %lld, filesize %lld). Did you generate it using iit_store?\n",
+ filename,(long long int) offset,(long long int) filesize);
exit(9);
} else {
new->sigmas[divno] = (int *) CALLOC(new->nintervals[divno]+1,sizeof(int));
@@ -1582,8 +1578,8 @@ read_tree (off_t offset, off_t filesize, FILE *fp, char *filename, T new, int di
}
if ((offset += sizeof(int)*(new->nintervals[divno]+1)) > filesize) {
- fprintf(stderr,"IIT file %s has an invalid binary format -- offset is too large (offset after omegas %lu, filesize %lu). Did you generate it using iit_store?\n",
- filename,(unsigned long) offset,(unsigned long) filesize);
+ fprintf(stderr,"IIT file %s has an invalid binary format -- offset is too large (offset after omegas %lld, filesize %lld). Did you generate it using iit_store?\n",
+ filename,(long long int) offset,(long long int) filesize);
exit(9);
} else {
new->omegas[divno] = (int *) CALLOC(new->nintervals[divno]+1,sizeof(int));
@@ -1631,8 +1627,8 @@ read_tree (off_t offset, off_t filesize, FILE *fp, char *filename, T new, int di
}
#endif
if (offset > filesize) {
- fprintf(stderr,"IIT file %s has an invalid binary format -- offset is too large (offset after nodes %lu, filesize %lu). Did you generate it using iit_store?\n",
- filename,(unsigned long) offset,(unsigned long) filesize);
+ fprintf(stderr,"IIT file %s has an invalid binary format -- offset is too large (offset after nodes %lld, filesize %lld). Did you generate it using iit_store?\n",
+ filename,(long long int) offset,(long long int) filesize);
exit(9);
}
@@ -1668,8 +1664,8 @@ skip_intervals (int *skip_nintervals, off_t offset, off_t filesize, FILE *fp, ch
}
if ((offset += skipsize) > filesize) {
- fprintf(stderr,"IIT file %s has an invalid binary format -- offset is too large (offset after skip_intervals %lu, filesize %lu). Did you generate it using iit_store?\n",
- filename,(unsigned long) offset,(unsigned long) filesize);
+ fprintf(stderr,"IIT file %s has an invalid binary format -- offset is too large (offset after skip_intervals %lld, filesize %lld). Did you generate it using iit_store?\n",
+ filename,(long long int) offset,(long long int) filesize);
exit(9);
} else {
move_relative(fp,skipsize);
@@ -1723,8 +1719,8 @@ read_intervals (off_t offset, off_t filesize, FILE *fp, char *filename, T new, i
}
#endif
if (offset > filesize) {
- fprintf(stderr,"IIT file %s has an invalid binary format -- offset is too large (offset after intervals %lu, filesize %lu). Did you generate it using iit_store?\n",
- filename,(unsigned long) offset,(unsigned long) filesize);
+ fprintf(stderr,"IIT file %s has an invalid binary format -- offset is too large (offset after intervals %lld, filesize %lld). Did you generate it using iit_store?\n",
+ filename,(long long int) offset,(long long int) filesize);
exit(9);
}
@@ -1793,14 +1789,14 @@ read_words (off_t offset, off_t filesize, FILE *fp, T new) {
debug1(printf("Starting read of valueorder offset/length\n"));
new->valueorder_offset = offset;
new->valueorder_length = (size_t) (new->total_nintervals*sizeof(int));
- /* fprintf(stderr,"Doing a move_relative for valueorder_length %lu\n",new->valueorder_length); */
+ /* fprintf(stderr,"Doing a move_relative for valueorder_length %zu\n",new->valueorder_length); */
move_relative(fp,new->valueorder_length);
offset += new->valueorder_length;
debug1(printf("Starting read of value offset/length\n"));
new->value_offset = offset;
new->value_length = (size_t) (new->total_nintervals*sizeof(double));
- /* fprintf(stderr,"Doing a move_relative for value_length %lu\n",new->value_length); */
+ /* fprintf(stderr,"Doing a move_relative for value_length %zu\n",new->value_length); */
move_relative(fp,new->value_length);
offset += new->value_length;
}
@@ -1808,7 +1804,7 @@ read_words (off_t offset, off_t filesize, FILE *fp, T new) {
debug1(printf("Starting read of labelorder offset/length\n"));
new->labelorder_offset = offset;
new->labelorder_length = (size_t) (new->total_nintervals*sizeof(int));
- /* fprintf(stderr,"Doing a move_relative for labelorder_length %lu\n",new->labelorder_length); */
+ /* fprintf(stderr,"Doing a move_relative for labelorder_length %zu\n",new->labelorder_length); */
move_relative(fp,new->labelorder_length);
offset += new->labelorder_length;
@@ -1822,14 +1818,14 @@ read_words (off_t offset, off_t filesize, FILE *fp, T new) {
new->label_length = (size_t) length8;
} else {
new->labelpointers_length = (size_t) ((new->total_nintervals+1)*sizeof(UINT4));
- /* fprintf(stderr,"Doing a move_relative for labelpointer %lu\n",new->total_nintervals * sizeof(UINT4)); */
+ /* fprintf(stderr,"Doing a move_relative for labelpointer %zu\n",new->total_nintervals * sizeof(UINT4)); */
move_relative(fp,new->total_nintervals * sizeof(UINT4));
FREAD_UINT(&length,fp);
new->label_length = (size_t) length;
}
#else
new->labelpointers_length = (size_t) ((new->total_nintervals+1)*sizeof(UINT4));
- /* fprintf(stderr,"Doing a move_relative for labelpointer %lu\n",new->total_nintervals * sizeof(UINT4)); */
+ /* fprintf(stderr,"Doing a move_relative for labelpointer %zu\n",new->total_nintervals * sizeof(UINT4)); */
move_relative(fp,new->total_nintervals * sizeof(UINT4));
FREAD_UINT(&length,fp);
new->label_length = (size_t) length;
@@ -1839,7 +1835,7 @@ read_words (off_t offset, off_t filesize, FILE *fp, T new) {
debug1(printf("Starting read of label offset/length\n"));
new->label_offset = offset;
/* new->label_length computed above */
- /* fprintf(stderr,"Doing a move_relative for label_length %lu\n",new->label_length); */
+ /* fprintf(stderr,"Doing a move_relative for label_length %zu\n",new->label_length); */
move_relative(fp,new->label_length);
offset += new->label_length;
@@ -1866,15 +1862,15 @@ read_words (off_t offset, off_t filesize, FILE *fp, T new) {
fprintf(stderr,"Incorrect length: %u\n",length);
#else
new->annot_length = filesize - new->annot_offset;
- /* fprintf(stderr,"annot_length: %lu\n",new->annot_length); */
+ /* fprintf(stderr,"annot_length: %zu\n",new->annot_length); */
#endif
#if 0
/* To do this check, we need to get stringlen for annotation similarly to that for labels */
last_offset = offset + sizeof(char)*stringlen;
if (last_offset != filesize) {
- fprintf(stderr,"Problem with last_offset (%lu) not equal to filesize = (%lu)\n",
- (unsigned long) last_offset,(unsigned long) filesize);
+ fprintf(stderr,"Problem with last_offset (%lld) not equal to filesize = (%lld)\n",
+ (long long int) last_offset,(long long int) filesize);
exit(9);
}
#endif
@@ -1938,14 +1934,14 @@ read_words_debug (off_t offset, off_t filesize, FILE *fp, T new) {
debug1(printf("Starting read of valueorder offset/length\n"));
new->valueorder_offset = offset;
new->valueorder_length = (size_t) (new->total_nintervals*sizeof(int));
- /* fprintf(stderr,"Doing a move_relative for valueorder_length %lu\n",new->valueorder_length); */
+ /* fprintf(stderr,"Doing a move_relative for valueorder_length %zu\n",new->valueorder_length); */
move_relative(fp,new->valueorder_length);
offset += new->valueorder_length;
debug1(printf("Starting read of value offset/length\n"));
new->value_offset = offset;
new->value_length = (size_t) (new->total_nintervals*sizeof(double));
- /* fprintf(stderr,"Doing a move_relative for value_length %lu\n",new->value_length); */
+ /* fprintf(stderr,"Doing a move_relative for value_length %zu\n",new->value_length); */
move_relative(fp,new->value_length);
offset += new->value_length;
}
@@ -1978,7 +1974,7 @@ read_words_debug (off_t offset, off_t filesize, FILE *fp, T new) {
#endif
offset += new->labelpointers_length;
- fprintf(stderr,"label_length: %lu\n",new->label_length);
+ fprintf(stderr,"label_length: %zu\n",new->label_length);
debug1(printf("Starting read of label offset/length\n"));
new->label_offset = offset;
/* new->label_length computed above */
@@ -2008,15 +2004,15 @@ read_words_debug (off_t offset, off_t filesize, FILE *fp, T new) {
fprintf(stderr,"Incorrect length: %u\n",length);
#else
new->annot_length = filesize - new->annot_offset;
- fprintf(stderr,"annot_length: %lu\n",new->annot_length);
+ fprintf(stderr,"annot_length: %zu\n",new->annot_length);
#endif
#if 0
/* To do this check, we need to get stringlen for annotation similarly to that for labels */
last_offset = offset + sizeof(char)*stringlen;
if (last_offset != filesize) {
- fprintf(stderr,"Problem with last_offset (%lu) not equal to filesize = (%lu)\n",
- (unsigned long) last_offset,(unsigned long) filesize);
+ fprintf(stderr,"Problem with last_offset (%lld) not equal to filesize = (%lld)\n",
+ (long long int) last_offset,(long long int) filesize);
exit(9);
}
#endif
@@ -2300,8 +2296,8 @@ IIT_read_divint (char *filename, char *divstring, bool add_iit_p) {
fclose(fp);
return -1;
} else if ((offset += sizeof(int)) > filesize) {
- fprintf(stderr,"IIT file %s has an invalid binary format -- offset is too large (offset after first byte %lu, filesize %lu). Did you generate it using iit_store?\n",
- filename,(unsigned long) offset,(unsigned long) filesize);
+ fprintf(stderr,"IIT file %s has an invalid binary format -- offset is too large (offset after first byte %lld, filesize %lld). Did you generate it using iit_store?\n",
+ filename,(long long int) offset,(long long int) filesize);
return -1;
}
@@ -2316,8 +2312,8 @@ IIT_read_divint (char *filename, char *divstring, bool add_iit_p) {
version,IIT_LATEST_VERSION_NOVALUES,IIT_LATEST_VERSION_VALUES);
return -1;
} else if ((offset += sizeof(int)) > filesize) {
- fprintf(stderr,"IIT file %s has an invalid binary format -- offset is too large (offset after version %lu, filesize %lu). Did you generate it using iit_store?\n",
- filename,(unsigned long) offset,(unsigned long) filesize);
+ fprintf(stderr,"IIT file %s has an invalid binary format -- offset is too large (offset after version %lld, filesize %lld). Did you generate it using iit_store?\n",
+ filename,(long long int) offset,(long long int) filesize);
return -1;
}
@@ -2328,8 +2324,8 @@ IIT_read_divint (char *filename, char *divstring, bool add_iit_p) {
fprintf(stderr,"IIT file %s appears to be truncated\n",filename);
return -1;
} else if ((offset += sizeof(int)) > filesize) {
- fprintf(stderr,"IIT file %s has an invalid binary format -- offset is too large (offset after nintervals %lu, filesize %lu). Did you generate it using iit_store?\n",
- filename,(unsigned long) offset,(unsigned long) filesize);
+ fprintf(stderr,"IIT file %s has an invalid binary format -- offset is too large (offset after nintervals %lld, filesize %lld). Did you generate it using iit_store?\n",
+ filename,(long long int) offset,(long long int) filesize);
return -1;
}
@@ -2337,8 +2333,8 @@ IIT_read_divint (char *filename, char *divstring, bool add_iit_p) {
fprintf(stderr,"IIT file %s appears to be truncated\n",filename);
return -1;
} else if ((offset += sizeof(int)) > filesize) {
- fprintf(stderr,"IIT file %s has an invalid binary format -- offset is too large (offset after nintervals %lu, filesize %lu). Did you generate it using iit_store?\n",
- filename,(unsigned long) offset,(unsigned long) filesize);
+ fprintf(stderr,"IIT file %s has an invalid binary format -- offset is too large (offset after nintervals %lld, filesize %lld). Did you generate it using iit_store?\n",
+ filename,(long long int) offset,(long long int) filesize);
return -1;
}
@@ -2364,8 +2360,8 @@ IIT_read_divint (char *filename, char *divstring, bool add_iit_p) {
fprintf(stderr,"IIT file %s appears to be truncated\n",filename);
return -1;
} else if ((offset += sizeof(int)) > filesize) {
- fprintf(stderr,"IIT file %s has an invalid binary format -- offset is too large (offset after nintervals %lu, filesize %lu). Did you generate it using iit_store?\n",
- filename,(unsigned long) offset,(unsigned long) filesize);
+ fprintf(stderr,"IIT file %s has an invalid binary format -- offset is too large (offset after nintervals %lld, filesize %lld). Did you generate it using iit_store?\n",
+ filename,(long long int) offset,(long long int) filesize);
return -1;
}
}
@@ -2381,8 +2377,8 @@ IIT_read_divint (char *filename, char *divstring, bool add_iit_p) {
fprintf(stderr,"IIT file %s appears to have a negative number of types\n",filename);
return -1;
} else if ((offset += sizeof(int)) > filesize) {
- fprintf(stderr,"IIT file %s has an invalid binary format -- offset is too large (offset after ntypes %lu, filesize %lu). Did you generate it using iit_store?\n",
- filename,(unsigned long) offset,(unsigned long) filesize);
+ fprintf(stderr,"IIT file %s has an invalid binary format -- offset is too large (offset after ntypes %lld, filesize %lld). Did you generate it using iit_store?\n",
+ filename,(long long int) offset,(long long int) filesize);
return -1;
}
debug(printf("ntypes: %d\n",ntypes));
@@ -2398,8 +2394,8 @@ IIT_read_divint (char *filename, char *divstring, bool add_iit_p) {
fprintf(stderr,"IIT file %s appears to have a negative number of fields\n",filename);
return -1;
} else if ((offset += sizeof(int)) > filesize) {
- fprintf(stderr,"IIT file %s has an invalid binary format -- offset is too large (offset after nfields %lu, filesize %lu). Did you generate it using iit_store?\n",
- filename,(unsigned long) offset,(unsigned long) filesize);
+ fprintf(stderr,"IIT file %s has an invalid binary format -- offset is too large (offset after nfields %lld, filesize %lld). Did you generate it using iit_store?\n",
+ filename,(long long int) offset,(long long int) filesize);
return -1;
}
}
@@ -2418,8 +2414,8 @@ IIT_read_divint (char *filename, char *divstring, bool add_iit_p) {
fprintf(stderr,"IIT file %s appears to have a negative number of divs\n",filename);
return -1;
} else if ((offset += sizeof(int)) > filesize) {
- fprintf(stderr,"IIT file %s has an invalid binary format -- offset is too large (offset after ndivs %lu, filesize %lu). Did you generate it using iit_store?\n",
- filename,(unsigned long) offset,(unsigned long) filesize);
+ fprintf(stderr,"IIT file %s has an invalid binary format -- offset is too large (offset after ndivs %lld, filesize %lld). Did you generate it using iit_store?\n",
+ filename,(long long int) offset,(long long int) filesize);
return -1;
}
debug(printf("ndivs: %d\n",ndivs));
@@ -2447,8 +2443,8 @@ IIT_read_divint (char *filename, char *divstring, bool add_iit_p) {
fprintf(stderr,"IIT file %s appears to have a negative value for divsort\n",filename);
return -1;
} else if ((offset += sizeof(int)) > filesize) {
- fprintf(stderr,"IIT file %s has an invalid binary format -- offset is too large (offset after divsort %lu, filesize %lu). Did you generate it using iit_store?\n",
- filename,(unsigned long) offset,(unsigned long) filesize);
+ fprintf(stderr,"IIT file %s has an invalid binary format -- offset is too large (offset after divsort %lld, filesize %lld). Did you generate it using iit_store?\n",
+ filename,(long long int) offset,(long long int) filesize);
return -1;
}
debug(printf("divsort: %d\n",divsort));
@@ -2556,8 +2552,8 @@ IIT_read (char *filename, char *name, bool readonlyp, Divread_T divread, char *d
fclose(fp);
return NULL;
} else if ((offset += sizeof(int)) > filesize) {
- fprintf(stderr,"IIT file %s has an invalid binary format -- offset is too large (offset after first byte %lu, filesize %lu). Did you generate it using iit_store?\n",
- filename,(unsigned long) offset,(unsigned long) filesize);
+ fprintf(stderr,"IIT file %s has an invalid binary format -- offset is too large (offset after first byte %lld, filesize %lld). Did you generate it using iit_store?\n",
+ filename,(long long int) offset,(long long int) filesize);
return NULL;
}
@@ -2574,8 +2570,8 @@ IIT_read (char *filename, char *name, bool readonlyp, Divread_T divread, char *d
new->version,IIT_LATEST_VERSION_NOVALUES,IIT_LATEST_VERSION_VALUES);
return NULL;
} else if ((offset += sizeof(int)) > filesize) {
- fprintf(stderr,"IIT file %s has an invalid binary format -- offset is too large (offset after version %lu, filesize %lu). Did you generate it using iit_store?\n",
- filename,(unsigned long) offset,(unsigned long) filesize);
+ fprintf(stderr,"IIT file %s has an invalid binary format -- offset is too large (offset after version %lld, filesize %lld). Did you generate it using iit_store?\n",
+ filename,(long long int) offset,(long long int) filesize);
return NULL;
}
@@ -2598,8 +2594,8 @@ IIT_read (char *filename, char *name, bool readonlyp, Divread_T divread, char *d
fprintf(stderr,"IIT file %s appears to be truncated\n",filename);
return NULL;
} else if ((offset += sizeof(int)) > filesize) {
- fprintf(stderr,"IIT file %s has an invalid binary format -- offset is too large (offset after nintervals %lu, filesize %lu). Did you generate it using iit_store?\n",
- filename,(unsigned long) offset,(unsigned long) filesize);
+ fprintf(stderr,"IIT file %s has an invalid binary format -- offset is too large (offset after nintervals %lld, filesize %lld). Did you generate it using iit_store?\n",
+ filename,(long long int) offset,(long long int) filesize);
return NULL;
}
@@ -2607,8 +2603,8 @@ IIT_read (char *filename, char *name, bool readonlyp, Divread_T divread, char *d
fprintf(stderr,"IIT file %s appears to be truncated\n",filename);
return NULL;
} else if ((offset += sizeof(int)) > filesize) {
- fprintf(stderr,"IIT file %s has an invalid binary format -- offset is too large (offset after nintervals %lu, filesize %lu). Did you generate it using iit_store?\n",
- filename,(unsigned long) offset,(unsigned long) filesize);
+ fprintf(stderr,"IIT file %s has an invalid binary format -- offset is too large (offset after nintervals %lld, filesize %lld). Did you generate it using iit_store?\n",
+ filename,(long long int) offset,(long long int) filesize);
return NULL;
}
@@ -2636,8 +2632,8 @@ IIT_read (char *filename, char *name, bool readonlyp, Divread_T divread, char *d
fprintf(stderr,"IIT file %s appears to be truncated\n",filename);
return NULL;
} else if ((offset += sizeof(int)) > filesize) {
- fprintf(stderr,"IIT file %s has an invalid binary format -- offset is too large (offset after nintervals %lu, filesize %lu). Did you generate it using iit_store?\n",
- filename,(unsigned long) offset,(unsigned long) filesize);
+ fprintf(stderr,"IIT file %s has an invalid binary format -- offset is too large (offset after nintervals %lld, filesize %lld). Did you generate it using iit_store?\n",
+ filename,(long long int) offset,(long long int) filesize);
return NULL;
}
}
@@ -2653,8 +2649,8 @@ IIT_read (char *filename, char *name, bool readonlyp, Divread_T divread, char *d
fprintf(stderr,"IIT file %s appears to have a negative number of types\n",filename);
return NULL;
} else if ((offset += sizeof(int)) > filesize) {
- fprintf(stderr,"IIT file %s has an invalid binary format -- offset is too large (offset after ntypes %lu, filesize %lu). Did you generate it using iit_store?\n",
- filename,(unsigned long) offset,(unsigned long) filesize);
+ fprintf(stderr,"IIT file %s has an invalid binary format -- offset is too large (offset after ntypes %lld, filesize %lld). Did you generate it using iit_store?\n",
+ filename,(long long int) offset,(long long int) filesize);
return NULL;
}
debug(printf("ntypes: %d\n",new->ntypes));
@@ -2670,8 +2666,8 @@ IIT_read (char *filename, char *name, bool readonlyp, Divread_T divread, char *d
fprintf(stderr,"IIT file %s appears to have a negative number of fields\n",filename);
return NULL;
} else if ((offset += sizeof(int)) > filesize) {
- fprintf(stderr,"IIT file %s has an invalid binary format -- offset is too large (offset after nfields %lu, filesize %lu). Did you generate it using iit_store?\n",
- filename,(unsigned long) offset,(unsigned long) filesize);
+ fprintf(stderr,"IIT file %s has an invalid binary format -- offset is too large (offset after nfields %lld, filesize %lld). Did you generate it using iit_store?\n",
+ filename,(long long int) offset,(long long int) filesize);
return NULL;
}
}
@@ -2695,8 +2691,8 @@ IIT_read (char *filename, char *name, bool readonlyp, Divread_T divread, char *d
fprintf(stderr,"IIT file %s appears to have a negative number of nodes\n",filename);
return NULL;
} else if ((offset += sizeof(int)) > filesize) {
- fprintf(stderr,"IIT file %s has an invalid binary format -- offset is too large (offset after nnodes %lu, filesize %lu). Did you generate it using iit_store?\n",
- filename,(unsigned long) offset,(unsigned long) filesize);
+ fprintf(stderr,"IIT file %s has an invalid binary format -- offset is too large (offset after nnodes %lld, filesize %lld). Did you generate it using iit_store?\n",
+ filename,(long long int) offset,(long long int) filesize);
return NULL;
}
new->cum_nnodes = (int *) CALLOC(new->ndivs+1,sizeof(int));
@@ -2721,8 +2717,8 @@ IIT_read (char *filename, char *name, bool readonlyp, Divread_T divread, char *d
fprintf(stderr,"IIT file %s appears to have a negative number of divs\n",filename);
return NULL;
} else if ((offset += sizeof(int)) > filesize) {
- fprintf(stderr,"IIT file %s has an invalid binary format -- offset is too large (offset after ndivs %lu, filesize %lu). Did you generate it using iit_store?\n",
- filename,(unsigned long) offset,(unsigned long) filesize);
+ fprintf(stderr,"IIT file %s has an invalid binary format -- offset is too large (offset after ndivs %lld, filesize %lld). Did you generate it using iit_store?\n",
+ filename,(long long int) offset,(long long int) filesize);
return NULL;
}
debug(printf("ndivs: %d\n",new->ndivs));
@@ -2774,8 +2770,8 @@ IIT_read (char *filename, char *name, bool readonlyp, Divread_T divread, char *d
fprintf(stderr,"IIT file %s appears to have a negative value for divsort\n",filename);
return NULL;
} else if ((offset += sizeof(int)) > filesize) {
- fprintf(stderr,"IIT file %s has an invalid binary format -- offset is too large (offset after divsort %lu, filesize %lu). Did you generate it using iit_store?\n",
- filename,(unsigned long) offset,(unsigned long) filesize);
+ fprintf(stderr,"IIT file %s has an invalid binary format -- offset is too large (offset after divsort %lld, filesize %lld). Did you generate it using iit_store?\n",
+ filename,(long long int) offset,(long long int) filesize);
return NULL;
}
debug(printf("divsort: %d\n",new->divsort));
@@ -2968,8 +2964,8 @@ IIT_debug (char *filename) {
fclose(fp);
return;
} else if ((offset += sizeof(int)) > filesize) {
- fprintf(stderr,"IIT file %s has an invalid binary format -- offset is too large (offset after first byte %lu, filesize %lu). Did you generate it using iit_store?\n",
- filename,(unsigned long) offset,(unsigned long) filesize);
+ fprintf(stderr,"IIT file %s has an invalid binary format -- offset is too large (offset after first byte %lld, filesize %lld). Did you generate it using iit_store?\n",
+ filename,(long long int) offset,(long long int) filesize);
return;
}
@@ -2985,8 +2981,8 @@ IIT_debug (char *filename) {
new->version,IIT_LATEST_VERSION_NOVALUES,IIT_LATEST_VERSION_VALUES);
return;
} else if ((offset += sizeof(int)) > filesize) {
- fprintf(stderr,"IIT file %s has an invalid binary format -- offset is too large (offset after version %lu, filesize %lu). Did you generate it using iit_store?\n",
- filename,(unsigned long) offset,(unsigned long) filesize);
+ fprintf(stderr,"IIT file %s has an invalid binary format -- offset is too large (offset after version %lld, filesize %lld). Did you generate it using iit_store?\n",
+ filename,(long long int) offset,(long long int) filesize);
return;
}
@@ -3009,8 +3005,8 @@ IIT_debug (char *filename) {
fprintf(stderr,"IIT file %s appears to be truncated\n",filename);
return;
} else if ((offset += sizeof(int)) > filesize) {
- fprintf(stderr,"IIT file %s has an invalid binary format -- offset is too large (offset after nintervals %lu, filesize %lu). Did you generate it using iit_store?\n",
- filename,(unsigned long) offset,(unsigned long) filesize);
+ fprintf(stderr,"IIT file %s has an invalid binary format -- offset is too large (offset after nintervals %lld, filesize %lld). Did you generate it using iit_store?\n",
+ filename,(long long int) offset,(long long int) filesize);
return;
}
@@ -3018,8 +3014,8 @@ IIT_debug (char *filename) {
fprintf(stderr,"IIT file %s appears to be truncated\n",filename);
return;
} else if ((offset += sizeof(int)) > filesize) {
- fprintf(stderr,"IIT file %s has an invalid binary format -- offset is too large (offset after nintervals %lu, filesize %lu). Did you generate it using iit_store?\n",
- filename,(unsigned long) offset,(unsigned long) filesize);
+ fprintf(stderr,"IIT file %s has an invalid binary format -- offset is too large (offset after nintervals %lld, filesize %lld). Did you generate it using iit_store?\n",
+ filename,(long long int) offset,(long long int) filesize);
return;
}
@@ -3047,8 +3043,8 @@ IIT_debug (char *filename) {
fprintf(stderr,"IIT file %s appears to be truncated\n",filename);
return;
} else if ((offset += sizeof(int)) > filesize) {
- fprintf(stderr,"IIT file %s has an invalid binary format -- offset is too large (offset after nintervals %lu, filesize %lu). Did you generate it using iit_store?\n",
- filename,(unsigned long) offset,(unsigned long) filesize);
+ fprintf(stderr,"IIT file %s has an invalid binary format -- offset is too large (offset after nintervals %lld, filesize %lld). Did you generate it using iit_store?\n",
+ filename,(long long int) offset,(long long int) filesize);
return;
}
}
@@ -3073,8 +3069,8 @@ IIT_debug (char *filename) {
fprintf(stderr,"IIT file %s appears to have a negative number of types\n",filename);
return;
} else if ((offset += sizeof(int)) > filesize) {
- fprintf(stderr,"IIT file %s has an invalid binary format -- offset is too large (offset after ntypes %lu, filesize %lu). Did you generate it using iit_store?\n",
- filename,(unsigned long) offset,(unsigned long) filesize);
+ fprintf(stderr,"IIT file %s has an invalid binary format -- offset is too large (offset after ntypes %lld, filesize %lld). Did you generate it using iit_store?\n",
+ filename,(long long int) offset,(long long int) filesize);
return;
}
printf("ntypes: %d\n",new->ntypes);
@@ -3090,8 +3086,8 @@ IIT_debug (char *filename) {
fprintf(stderr,"IIT file %s appears to have a negative number of fields\n",filename);
return;
} else if ((offset += sizeof(int)) > filesize) {
- fprintf(stderr,"IIT file %s has an invalid binary format -- offset is too large (offset after nfields %lu, filesize %lu). Did you generate it using iit_store?\n",
- filename,(unsigned long) offset,(unsigned long) filesize);
+ fprintf(stderr,"IIT file %s has an invalid binary format -- offset is too large (offset after nfields %lld, filesize %lld). Did you generate it using iit_store?\n",
+ filename,(long long int) offset,(long long int) filesize);
return;
}
}
@@ -3115,8 +3111,8 @@ IIT_debug (char *filename) {
fprintf(stderr,"IIT file %s appears to have a negative number of nodes\n",filename);
return;
} else if ((offset += sizeof(int)) > filesize) {
- fprintf(stderr,"IIT file %s has an invalid binary format -- offset is too large (offset after nnodes %lu, filesize %lu). Did you generate it using iit_store?\n",
- filename,(unsigned long) offset,(unsigned long) filesize);
+ fprintf(stderr,"IIT file %s has an invalid binary format -- offset is too large (offset after nnodes %lld, filesize %lld). Did you generate it using iit_store?\n",
+ filename,(long long int) offset,(long long int) filesize);
return;
}
new->cum_nnodes = (int *) CALLOC(new->ndivs+1,sizeof(int));
@@ -3141,8 +3137,8 @@ IIT_debug (char *filename) {
fprintf(stderr,"IIT file %s appears to have a negative number of divs\n",filename);
return;
} else if ((offset += sizeof(int)) > filesize) {
- fprintf(stderr,"IIT file %s has an invalid binary format -- offset is too large (offset after ndivs %lu, filesize %lu). Did you generate it using iit_store?\n",
- filename,(unsigned long) offset,(unsigned long) filesize);
+ fprintf(stderr,"IIT file %s has an invalid binary format -- offset is too large (offset after ndivs %lld, filesize %lld). Did you generate it using iit_store?\n",
+ filename,(long long int) offset,(long long int) filesize);
return;
}
printf("ndivs: %d\n",new->ndivs);
@@ -3186,8 +3182,8 @@ IIT_debug (char *filename) {
fprintf(stderr,"IIT file %s appears to have a negative value for divsort\n",filename);
return;
} else if ((offset += sizeof(int)) > filesize) {
- fprintf(stderr,"IIT file %s has an invalid binary format -- offset is too large (offset after divsort %lu, filesize %lu). Did you generate it using iit_store?\n",
- filename,(unsigned long) offset,(unsigned long) filesize);
+ fprintf(stderr,"IIT file %s has an invalid binary format -- offset is too large (offset after divsort %lld, filesize %lld). Did you generate it using iit_store?\n",
+ filename,(long long int) offset,(long long int) filesize);
return;
}
printf("divsort: %d\n",new->divsort);
diff --git a/src/iit-write-univ.c b/src/iit-write-univ.c
index 6e81939..1672a6d 100644
--- a/src/iit-write-univ.c
+++ b/src/iit-write-univ.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: iit-write-univ.c 102176 2013-07-20 00:51:14Z twu $";
+static char rcsid[] = "$Id: iit-write-univ.c 153948 2014-11-24 17:46:46Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -230,8 +230,8 @@ Node_make (int *nnodes, int i, int j, int *sigmas, int *omegas, struct Univinter
node->a = q + 1;
node->b = r;
- debug(printf(" NODE=%lu [%d..%d], left: %d, cont: %d, right: %d\n",
- node->value, i, j, q - i + 1, r - q, j - r));
+ debug(printf(" NODE=%llu [%d..%d], left: %d, cont: %d, right: %d\n",
+ (unsigned long long) node->value, i, j, q - i + 1, r - q, j - r));
assert(Node_is_valid_output (node, i, j, sigmas, omegas, intervals));
@@ -530,9 +530,11 @@ IIT_write_univ_footer (FILE *fp, List_T divlist, List_T typelist, Table_T interv
}
/* Write labelorder */
- labelorder = get_labelorder(divlist,labeltable,total_nintervals);
- FWRITE_INTS(labelorder,total_nintervals,fp);
- FREE(labelorder);
+ if (total_nintervals > 0) {
+ labelorder = get_labelorder(divlist,labeltable,total_nintervals);
+ FWRITE_INTS(labelorder,total_nintervals,fp);
+ FREE(labelorder);
+ }
/* Write label pointers */
#ifdef HAVE_64_BIT
diff --git a/src/iit_get.c b/src/iit_get.c
index 1f4905d..a37b468 100644
--- a/src/iit_get.c
+++ b/src/iit_get.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: iit_get.c 115892 2013-11-20 22:52:31Z twu $";
+static char rcsid[] = "$Id: iit_get.c 153955 2014-11-24 17:54:45Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -633,7 +633,9 @@ print_interval_univ (Univcoord_T coordstart, Univcoord_T coordend, int index,
debug(printf("index is %d\n",index));
interval = Univ_IIT_interval(chromosome_iit,index);
- printf("%lu..%lu",Univinterval_low(interval),Univinterval_high(interval));
+ printf("%llu..%llu",
+ (unsigned long long) Univinterval_low(interval),
+ (unsigned long long) Univinterval_high(interval));
if (Univinterval_type(interval) > 0) {
printf(" %s",Univ_IIT_typestring(chromosome_iit,Univinterval_type(interval)));
}
diff --git a/src/iit_store.c b/src/iit_store.c
index 1859818..028dcb8 100644
--- a/src/iit_store.c
+++ b/src/iit_store.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: iit_store.c 118464 2013-11-27 20:12:59Z twu $";
+static char rcsid[] = "$Id: iit_store.c 153955 2014-11-24 17:54:45Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -321,11 +321,8 @@ scan_header_div (int *labellength, bool *seenp, List_T *divlist, List_T *typelis
debug(printf(" and coords %s as a number\n",coords));
*end = *start;
} else if (isrange(&(*start),&(*end),coords)) {
-#ifdef HAVE_64_BIT
- debug(printf(" and coords %s as a range starting at %lu and ending at %lu\n",coords,*start,*end));
-#else
- debug(printf(" and coords %s as a range starting at %u and ending at %u\n",coords,*start,*end));
-#endif
+ debug(printf(" and coords %s as a range starting at %llu and ending at %llu\n",
+ coords,(unsigned long long) *start,(unsigned long long) *end));
} else {
fprintf(stderr,"Error parsing %s:%s. Expecting coords (as <div>:<number>..<number>)\n",query,coords);
fprintf(stderr,"Problematic line was: %s\n",header);
@@ -410,11 +407,7 @@ scan_header_spaces (int *labellength, bool *seenp, List_T *divlist, List_T *type
/* Example: >A 1 10 X red. Here, A is a label, 1 and 10 are start and end, X is a div, and red is a type. */
*seenp = false;
-#ifdef HAVE_64_BIT
- nscanned = sscanf(header,">%s %lu %lu\n",Buffer,&(*start),&(*end));
-#else
- nscanned = sscanf(header,">%s %u %u\n",Buffer,&(*start),&(*end));
-#endif
+ nscanned = sscanf(header,">%s %llu %llu\n",Buffer,&(*start),&(*end));
if (nscanned < 3) {
fprintf(stderr,"Error parsing %s. Expecting a FASTA type header with a label, two coordinates, and optional tag.\n",header);
exit(9);
@@ -657,15 +650,9 @@ parse_fasta (bool *valuep, Univcoord_T *max_coordinate, Univcoord_T *label_total
FREE(divstring);
}
-#ifdef HAVE_64_BIT
- fprintf(stderr,"Maximum coordinate: %lu\n",*max_coordinate);
- fprintf(stderr,"Total label length: %lu + %d separators\n",*label_totallength,nentries);
- fprintf(stderr,"Total annotation length: %lu + %d separators\n",*annot_totallength,nentries);
-#else
- fprintf(stderr,"Maximum coordinate: %u\n",*max_coordinate);
- fprintf(stderr,"Total label length: %u + %d separators\n",*label_totallength,nentries);
- fprintf(stderr,"Total annotation length: %u + %d separators\n",*annot_totallength,nentries);
-#endif
+ fprintf(stderr,"Maximum coordinate: %llu\n",(unsigned long long) *max_coordinate);
+ fprintf(stderr,"Total label length: %llu + %d separators\n",(unsigned long long) *label_totallength,nentries);
+ fprintf(stderr,"Total annotation length: %llu + %d separators\n",(unsigned long long) *annot_totallength,nentries);
*label_totallength += nentries;
*annot_totallength += nentries;
diff --git a/src/indel.c b/src/indel.c
index 3574e91..f64254c 100644
--- a/src/indel.c
+++ b/src/indel.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: indel.c 148844 2014-09-24 21:32:56Z twu $";
+static char rcsid[] = "$Id: indel.c 153955 2014-11-24 17:54:45Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -64,8 +64,8 @@ Indel_solve_middle_insertion (bool *foundp, int *found_score, int *nhits, List_T
assert(indels > 0);
debug2(Genome_fill_buffer_blocks(left+indels,querylength-indels,gbuffer));
- debug2(printf("solve_middle_indel, plus, insertion: Getting genome at diagonal - querylength %d + indels %d = %lu\n",
- querylength,indels,left+indels));
+ debug2(printf("solve_middle_indel, plus, insertion: Getting genome at diagonal - querylength %d + indels %d = %llu\n",
+ querylength,indels,(unsigned long long) left+indels));
debug2(printf("g1: %s\n",gbuffer));
debug2(printf("g2: %s\n",&(gbuffer[indels])));
@@ -227,8 +227,8 @@ Indel_solve_middle_deletion (bool *foundp, int *found_score, int *nhits, List_T
assert(indels < 0);
debug2(gbuffer = (char *) CALLOC(querylength-indels+1,sizeof(char)));
debug2(Genome_fill_buffer_blocks(left,querylength-indels,gbuffer));
- debug2(printf("solve_middle_indel, plus, deletion (indels %d): Getting genome at diagonal - querylength %d = %lu\n",
- indels,querylength,left));
+ debug2(printf("solve_middle_indel, plus, deletion (indels %d): Getting genome at diagonal - querylength %d = %llu\n",
+ indels,querylength,(unsigned long long) left));
debug2(printf("g1: %s\n",gbuffer));
debug2(printf("g2: %s\n",&(gbuffer[-indels])));
debug2(FREE(gbuffer));
diff --git a/src/indexdb-write.c b/src/indexdb-write.c
index 4c3792e..718c6e3 100644
--- a/src/indexdb-write.c
+++ b/src/indexdb-write.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: indexdb-write.c 151046 2014-10-16 19:08:41Z twu $";
+static char rcsid[] = "$Id: indexdb-write.c 153955 2014-11-24 17:54:45Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -192,8 +192,9 @@ check_offsets_from_bitpack (char *offsetsmetafile, char *offsetsstrmfile, Positi
fprintf(stderr,"Problem with bitpack at oligo %u+%u = %u: %u != %u. Please inform twu at gene.com\n",
oligoi,i,oligoi+i,offsets_decoded[i],offsets[oligoi+i]);
#else
- fprintf(stderr,"Problem with bitpack at oligo %lu+%lu = %lu: %u != %u. Please inform twu at gene.com\n",
- oligoi,i,oligoi+i,offsets_decoded[i],offsets[oligoi+i]);
+ fprintf(stderr,"Problem with bitpack at oligo %llu+%llu = %llu: %llu != %llu. Please inform twu at gene.com\n",
+ (unsigned long long) oligoi,(unsigned long long) i,
+ (unsigned long long) oligoi+i,(unsigned long long) offsets_decoded[i],(unsigned long long) offsets[oligoi+i]);
#endif
exit(9);
}
@@ -243,11 +244,12 @@ check_offsets_from_bitpack_huge (char *offsetspagesfile, char *offsetsmetafile,
for (i = 0; i <= 64; i++) {
if (offsets64[i] != offsets[oligoi+i]) {
#ifdef OLIGOSPACE_NOT_LONG
- fprintf(stderr,"\nProblem with bitpack64 at oligo %u+%u = %u: uncompressed %lu != expected %lu. Your compiler may be defective. Please inform twu at gene.com\n",
+ fprintf(stderr,"\nProblem with bitpack64 at oligo %u+%u = %u: uncompressed %u != expected %u. Your compiler may be defective. Please inform twu at gene.com\n",
oligoi,i,oligoi+i,offsets64[i],offsets[oligoi+i]);
#else
- fprintf(stderr,"\nProblem with bitpack64 at oligo %lu+%lu = %lu: uncompressed %lu != expected %lu. Your compiler may be defective. Please inform twu at gene.com\n",
- oligoi,i,oligoi+i,offsets64[i],offsets[oligoi+i]);
+ fprintf(stderr,"\nProblem with bitpack64 at oligo %llu+%llu = %llu: uncompressed %llu != expected %llu. Your compiler may be defective. Please inform twu at gene.com\n",
+ (unsigned long long) oligoi,(unsigned long long) i,(unsigned long long) oligoi+i,
+ (unsigned long long) offsets64[i],(unsigned long long) offsets[oligoi+i]);
#endif
}
}
@@ -373,7 +375,7 @@ Indexdb_count_offsets (FILE *sequence_fp, Univ_IIT_T chromosome_iit,
if (genome_lc_p == true) {
in_counter = 0;
} else {
- fprintf(stderr,"Bad character %c at position %lu\n",c,position);
+ fprintf(stderr,"Bad character %c at position %llu\n",c,(unsigned long long) position);
abort();
}
}
@@ -493,16 +495,17 @@ Indexdb_write_offsets (char *destdir, char interval_char, FILE *sequence_fp, Uni
between_counter[0] = between_counter[1] = between_counter[2] = 0;
in_counter[0] = in_counter[1] = in_counter[2] = 0;
#ifdef OLIGOSPACE_NOT_LONG
- fprintf(stderr,"Allocating %u*%lu bytes for offsets\n",oligospace+1U,sizeof(Positionsptr_T));
+ fprintf(stderr,"Allocating %u*%d bytes for offsets\n",oligospace+1U,(int) sizeof(Positionsptr_T));
#else
- fprintf(stderr,"Allocating %lu*%lu bytes for offsets\n",oligospace+1UL,sizeof(Positionsptr_T));
+ fprintf(stderr,"Allocating %llu*%d bytes for offsets\n",(unsigned long long) oligospace+1UL,(int) sizeof(Positionsptr_T));
#endif
offsets = (Positionsptr_T *) CALLOC_NO_EXCEPTION(oligospace+1,sizeof(Positionsptr_T));
if (offsets == NULL) {
#ifdef OLIGOSPACE_NOT_LONG
fprintf(stderr,"Unable to allocate %u bytes of memory, needed to build offsets with %d-mers\n",oligospace+1U,index1part_aa);
#else
- fprintf(stderr,"Unable to allocate %lu bytes of memory, needed to build offsets with %d-mers\n",oligospace+1UL,index1part_aa);
+ fprintf(stderr,"Unable to allocate %llu bytes of memory, needed to build offsets with %d-mers\n",
+ (unsigned long long) oligospace+1UL,index1part_aa);
#endif
fprintf(stderr,"Either find a computer with more RAM, or lower your value for the k-mer size\n");
exit(9);
@@ -511,16 +514,17 @@ Indexdb_write_offsets (char *destdir, char interval_char, FILE *sequence_fp, Uni
mask = ~(~0UL << 2*index1part);
oligospace = power(4,index1part);
#ifdef OLIGOSPACE_NOT_LONG
- fprintf(stderr,"Allocating %u*%lu bytes for offsets\n",oligospace+1U,sizeof(Positionsptr_T));
+ fprintf(stderr,"Allocating %u*%d bytes for offsets\n",oligospace+1U,(int) sizeof(Positionsptr_T));
#else
- fprintf(stderr,"Allocating %lu*%lu bytes for offsets\n",oligospace+1UL,sizeof(Positionsptr_T));
+ fprintf(stderr,"Allocating %llu*%d bytes for offsets\n",oligospace+1UL,(int) sizeof(Positionsptr_T));
#endif
offsets = (Positionsptr_T *) CALLOC_NO_EXCEPTION(oligospace+1,sizeof(Positionsptr_T));
if (offsets == NULL) {
#ifdef OLIGOSPACE_NOT_LONG
fprintf(stderr,"Unable to allocate %u bytes of memory, needed to build offsets with %d-mers\n",oligospace+1U,index1part);
#else
- fprintf(stderr,"Unable to allocate %lu bytes of memory, needed to build offsets with %d-mers\n",oligospace+1UL,index1part);
+ fprintf(stderr,"Unable to allocate %llu bytes of memory, needed to build offsets with %d-mers\n",
+ (unsigned long long) oligospace+1UL,index1part);
#endif
fprintf(stderr,"Either find a computer with more RAM, or lower your value for the k-mer size\n");
exit(9);
@@ -598,7 +602,7 @@ Indexdb_write_offsets (char *destdir, char interval_char, FILE *sequence_fp, Uni
if (genome_lc_p == true) {
oligo = 0U; in_counter = 0;
} else {
- fprintf(stderr,"Bad character %c at position %lu\n",c,position);
+ fprintf(stderr,"Bad character %c at position %llu\n",c,(unsigned long long) position);
abort();
}
}
@@ -659,8 +663,8 @@ Indexdb_write_offsets (char *destdir, char interval_char, FILE *sequence_fp, Uni
oligoi = (Oligospace_T) masked + 1UL;
#endif
offsets[oligoi] += 1;
- debug(printf("Found oligo %06X. Incremented offsets for %lu to be %u\n",
- masked,oligoi,offsets[oligoi]));
+ debug(printf("Found oligo %06X. Incremented offsets for %llu to be %llu\n",
+ masked,(unsigned long long) oligoi,(unsigned long long) offsets[oligoi]));
between_counter = 0;
}
in_counter--;
@@ -735,7 +739,8 @@ Indexdb_write_offsets (char *destdir, char interval_char, FILE *sequence_fp, Uni
fprintf(stderr,"Can't write to file %s\n",offsetsfile);
exit(9);
} else {
- fprintf(stderr,"Writing %lu offsets to file with total of %u k-mers...",oligospace+1,offsets[oligospace]);
+ fprintf(stderr,"Writing %llu offsets to file with total of %llu k-mers...",
+ (unsigned long long) oligospace+1,(unsigned long long) offsets[oligospace]);
FWRITE_UINTS(offsets,oligospace+1,offsets_fp);
fprintf(stderr,"done\n");
}
@@ -771,7 +776,8 @@ Indexdb_write_offsets (char *destdir, char interval_char, FILE *sequence_fp, Uni
#ifdef OLIGOSPACE_NOT_LONG
fprintf(stderr,"Writing %u offsets compressed via bitpack64 to file with total of %u k-mers...",oligospace+1U,offsets[oligospace]);
#else
- fprintf(stderr,"Writing %lu offsets compressed via bitpack64 to file with total of %u k-mers...",oligospace+1UL,offsets[oligospace]);
+ fprintf(stderr,"Writing %llu offsets compressed via bitpack64 to file with total of %llu k-mers...",
+ (unsigned long long) oligospace+1UL,(unsigned long long) offsets[oligospace]);
#endif
Bitpack64_write_differential(pointersfile,offsetsfile,offsets,oligospace);
@@ -849,16 +855,18 @@ Indexdb_write_offsets_huge (char *destdir, char interval_char, FILE *sequence_fp
between_counter[0] = between_counter[1] = between_counter[2] = 0;
in_counter[0] = in_counter[1] = in_counter[2] = 0;
#ifdef OLIGOSPACE_NOT_LONG
- fprintf(stderr,"Allocating %u*%lu bytes for offsets\n",oligospace+1U,sizeof(Hugepositionsptr_T));
+ fprintf(stderr,"Allocating %u*%d bytes for offsets\n",oligospace+1U,(int) sizeof(Hugepositionsptr_T));
#else
- fprintf(stderr,"Allocating %lu*%lu bytes for offsets\n",oligospace+1UL,sizeof(Hugepositionsptr_T));
+ fprintf(stderr,"Allocating %llu*%d bytes for offsets\n",
+ (unsigned long long) oligospace+1UL,(int) sizeof(Hugepositionsptr_T));
#endif
offsets = (Hugepositionsptr_T *) CALLOC_NO_EXCEPTION(oligospace+1,sizeof(Hugepositionsptr_T));
if (offsets == NULL) {
#ifdef OLIGOSPACE_NOT_LONG
fprintf(stderr,"Unable to allocate %u bytes of memory, needed to build offsets with %d-mers\n",oligospace+1U,index1part_aa);
#else
- fprintf(stderr,"Unable to allocate %lu bytes of memory, needed to build offsets with %d-mers\n",oligospace+1UL,index1part_aa);
+ fprintf(stderr,"Unable to allocate %llu bytes of memory, needed to build offsets with %d-mers\n",
+ (unsigned long long) oligospace+1UL,index1part_aa);
#endif
fprintf(stderr,"Either find a computer with more RAM, or lower your value for the k-mer size\n");
exit(9);
@@ -867,16 +875,17 @@ Indexdb_write_offsets_huge (char *destdir, char interval_char, FILE *sequence_fp
mask = ~(~0UL << 2*index1part);
oligospace = power(4,index1part);
#ifdef OLIGOSPACE_NOT_LONG
- fprintf(stderr,"Allocating %u*%lu bytes for offsets\n",oligospace+1U,sizeof(Hugepositionsptr_T));
+ fprintf(stderr,"Allocating %u*%d bytes for offsets\n",oligospace+1U,(int) sizeof(Hugepositionsptr_T));
#else
- fprintf(stderr,"Allocating %lu*%lu bytes for offsets\n",oligospace+1UL,sizeof(Hugepositionsptr_T));
+ fprintf(stderr,"Allocating %llu*%d bytes for offsets\n",(unsigned long long) oligospace+1UL,(int) sizeof(Hugepositionsptr_T));
#endif
offsets = (Hugepositionsptr_T *) CALLOC_NO_EXCEPTION(oligospace+1,sizeof(Hugepositionsptr_T));
if (offsets == NULL) {
#ifdef OLIGOSPACE_NOT_LONG
fprintf(stderr,"Unable to allocate %u bytes of memory, needed to build offsets with %d-mers\n",oligospace+1U,index1part);
#else
- fprintf(stderr,"Unable to allocate %lu bytes of memory, needed to build offsets with %d-mers\n",oligospace+1UL,index1part);
+ fprintf(stderr,"Unable to allocate %llu bytes of memory, needed to build offsets with %d-mers\n",
+ (unsigned long long) oligospace+1UL,index1part);
#endif
fprintf(stderr,"Either find a computer with more RAM, or lower your value for the k-mer size\n");
exit(9);
@@ -953,7 +962,7 @@ Indexdb_write_offsets_huge (char *destdir, char interval_char, FILE *sequence_fp
if (genome_lc_p == true) {
oligo = 0U; in_counter = 0;
} else {
- fprintf(stderr,"Bad character %c at position %lu\n",c,position);
+ fprintf(stderr,"Bad character %c at position %llu\n",c,(unsigned long long) position);
abort();
}
}
@@ -1014,8 +1023,8 @@ Indexdb_write_offsets_huge (char *destdir, char interval_char, FILE *sequence_fp
oligoi = (Oligospace_T) masked + 1UL;
#endif
offsets[oligoi] += 1;
- debug(printf("Found oligo %06X. Incremented offsets for %lu to be %u\n",
- masked,oligoi,offsets[oligoi]));
+ debug(printf("Found oligo %06X. Incremented offsets for %llu to be %llu\n",
+ masked,(unsigned long long) oligoi,(unsigned long long) offsets[oligoi]));
between_counter = 0;
}
in_counter--;
@@ -1119,9 +1128,10 @@ Indexdb_write_offsets_huge (char *destdir, char interval_char, FILE *sequence_fp
#endif
#ifdef OLIGOSPACE_NOT_LONG
- fprintf(stderr,"Writing %u offsets compressed via bitpack to file with total of %lu k-mers...",oligospace+1U,offsets[oligospace]);
+ fprintf(stderr,"Writing %u offsets compressed via bitpack to file with total of %u k-mers...",oligospace+1U,offsets[oligospace]);
#else
- fprintf(stderr,"Writing %lu offsets compressed via bitpack to file with total of %lu k-mers...",oligospace+1UL,offsets[oligospace]);
+ fprintf(stderr,"Writing %llu offsets compressed via bitpack to file with total of %llu k-mers...",
+ (unsigned long long) oligospace+1UL,(unsigned long long) offsets[oligospace]);
#endif
Bitpack64_write_differential_huge(pagesfile,pointersfile,offsetsfile,offsets,oligospace);
@@ -1148,24 +1158,6 @@ Indexdb_write_offsets_huge (char *destdir, char interval_char, FILE *sequence_fp
#endif
-/* FILE *fp is preferable to int fd, because former is buffered. No
- need for fseeko, because offsets file is < 2 Gigabytes */
-#if 0
-static void
-offsetsfile_move_absolute (FILE *fp, int ptr) {
- long int offset = ptr*((long int) sizeof(Positionsptr_T));
-
- if (fseek(fp,offset,SEEK_SET) < 0) {
- fprintf(stderr,"Attempted to do fseek on offset %u*%lu=%lu\n",ptr,sizeof(Positionsptr_T),offset);
- perror("Error in indexdb.c, offsetsfile_move_absolute");
- exit(9);
- }
-
- return;
-}
-#endif
-
-
#if 0
static bool
need_to_sort_p (Univcoord_T *positions, int length) {
@@ -1190,7 +1182,7 @@ positions_move_absolute_1 (int positions_fd, Positionsptr_T ptr) {
off_t offset = ptr*((off_t) sizeof(unsigned char));
if (lseek(positions_fd,offset,SEEK_SET) < 0) {
- fprintf(stderr,"Attempted to do lseek on offset %u*%lu=%lu\n",ptr,sizeof(unsigned char),offset);
+ fprintf(stderr,"Attempted to do lseek on offset %jd*%d=%jd\n",ptr,(int) sizeof(unsigned char),offset);
perror("Error in indexdb.c, positions_move_absolute_1");
exit(9);
}
@@ -1202,7 +1194,7 @@ positions_move_absolute_4 (int positions_fd, Positionsptr_T ptr) {
off_t offset = ptr*((off_t) sizeof(UINT4));
if (lseek(positions_fd,offset,SEEK_SET) < 0) {
- fprintf(stderr,"Attempted to do lseek on offset %u*%lu=%lu\n",ptr,sizeof(UINT4),offset);
+ fprintf(stderr,"Attempted to do lseek on offset %jd*%d=%jd\n",ptr,(int) sizeof(UINT4),offset);
perror("Error in indexdb.c, positions_move_absolute_4");
exit(9);
}
@@ -1216,7 +1208,7 @@ positions_move_absolute_8 (int positions_fd, Positionsptr_T ptr) {
off_t offset = ptr*((off_t) sizeof(UINT8));
if (lseek(positions_fd,offset,SEEK_SET) < 0) {
- fprintf(stderr,"Attempted to do lseek on offset %u*%lu=%lu\n",ptr,sizeof(UINT8),offset);
+ fprintf(stderr,"Attempted to do lseek on offset %jd*%d=%jd\n",ptr,(int) sizeof(UINT8),offset);
perror("Error in indexdb.c, positions_move_absolute_8");
exit(9);
}
@@ -1342,7 +1334,7 @@ compute_positions_in_file (int positions_high_fd, int positions_low_fd, Position
if (genome_lc_p == true) {
oligo = 0U; in_counter = 0;
} else {
- fprintf(stderr,"Bad character %c at position %lu\n",c,position);
+ fprintf(stderr,"Bad character %c at position %llu\n",c,(unsigned long long) position);
abort();
}
}
@@ -1579,7 +1571,7 @@ compute_positions_in_memory (UINT4 *positions4, unsigned char *positions8_high,
if (genome_lc_p == true) {
oligo = 0U; in_counter = 0;
} else {
- fprintf(stderr,"Bad character %c at position %lu\n",c,position);
+ fprintf(stderr,"Bad character %c at position %llu\n",c,(unsigned long long) position);
abort();
}
}
@@ -1658,7 +1650,7 @@ compute_positions_in_memory (UINT4 *positions4, unsigned char *positions8_high,
positions4[offsets[masked]++] = (UINT4) (position - index1part + 1);
}
debug1(nt = shortoligo_nt(masked,index1part);
- printf("Storing %s at %lu, chrpos %u\n",nt,position-index1part+1U,chrpos-index1part+1U);
+ printf("Storing %s at %llu, chrpos %u\n",nt,(unsigned long long) (position-index1part+1U),chrpos-index1part+1U);
FREE(nt));
between_counter = 0;
}
@@ -1830,7 +1822,7 @@ compute_positions_in_memory_huge (UINT4 *positions4, unsigned char *positions8_h
if (genome_lc_p == true) {
oligo = 0U; in_counter = 0;
} else {
- fprintf(stderr,"Bad character %c at position %lu\n",c,position);
+ fprintf(stderr,"Bad character %c at position %llu\n",c,(unsigned long long) position);
abort();
}
}
@@ -1909,7 +1901,7 @@ compute_positions_in_memory_huge (UINT4 *positions4, unsigned char *positions8_h
positions4[offsets[masked]++] = (UINT4) (position - index1part + 1);
}
debug1(nt = shortoligo_nt(masked,index1part);
- printf("Storing %s at %lu, chrpos %u\n",nt,position-index1part+1U,chrpos-index1part+1U);
+ printf("Storing %s at %llu, chrpos %u\n",nt,(unsigned long long) (position-index1part+1U),chrpos-index1part+1U);
FREE(nt));
between_counter = 0;
}
@@ -2048,11 +2040,11 @@ Indexdb_write_positions (char *positionsfile_high, char *positionsfile_low, char
close(positions_low_fd);
if ((filesize = Access_filesize(positionsfile_high)) != totalcounts * (off_t) sizeof(unsigned char)) {
- fprintf(stderr,"Error after file-based build: expected file size for %s is %lu, but observed only %lu. Please notify twu at gene.com of this error.\n",
+ fprintf(stderr,"Error after file-based build: expected file size for %s is %zu, but observed only %zu. Please notify twu at gene.com of this error.\n",
positionsfile_high,totalcounts*sizeof(unsigned char),filesize);
abort();
} else if ((filesize = Access_filesize(positionsfile_low)) != totalcounts * (off_t) sizeof(UINT4)) {
- fprintf(stderr,"Error after file-based build: expected file size for %s is %lu, but observed only %lu. Please notify twu at gene.com of this error.\n",
+ fprintf(stderr,"Error after file-based build: expected file size for %s is %zu, but observed only %zu. Please notify twu at gene.com of this error.\n",
positionsfile_low,totalcounts*sizeof(UINT4),filesize);
abort();
}
@@ -2086,7 +2078,7 @@ Indexdb_write_positions (char *positionsfile_high, char *positionsfile_low, char
fclose(positions_low_fp);
if ((filesize = Access_filesize(positionsfile_high)) != totalcounts * (off_t) sizeof(unsigned char)) {
- fprintf(stderr,"Error: expected file size for %s is %lu, but observed only %lu. Trying now to write with smaller chunks.\n",
+ fprintf(stderr,"Error: expected file size for %s is %zu, but observed only %zu. Trying now to write with smaller chunks.\n",
positionsfile_high,totalcounts*sizeof(unsigned char),filesize);
if ((positions_high_fp = FOPEN_WRITE_BINARY(positionsfile_high)) == NULL) {
fprintf(stderr,"Can't open file %s\n",positionsfile_high);
@@ -2101,14 +2093,14 @@ Indexdb_write_positions (char *positionsfile_high, char *positionsfile_low, char
fclose(positions_high_fp);
if ((filesize = Access_filesize(positionsfile_high)) != totalcounts * (off_t) sizeof(unsigned char)) {
- fprintf(stderr,"Error persists: expected file size for %s is %lu, but observed only %lu. Please notify twu at gene.com of this error.\n",
+ fprintf(stderr,"Error persists: expected file size for %s is %zu, but observed only %zu. Please notify twu at gene.com of this error.\n",
positionsfile_high,totalcounts*sizeof(unsigned char),filesize);
abort();
}
}
if ((filesize = Access_filesize(positionsfile_low)) != totalcounts * (off_t) sizeof(UINT4)) {
- fprintf(stderr,"Error: expected file size for %s is %lu, but observed only %lu. Trying now to write with smaller chunks.\n",
+ fprintf(stderr,"Error: expected file size for %s is %zu, but observed only %zu. Trying now to write with smaller chunks.\n",
positionsfile_low,totalcounts*sizeof(UINT4),filesize);
if ((positions_low_fp = FOPEN_WRITE_BINARY(positionsfile_low)) == NULL) {
fprintf(stderr,"Can't open file %s\n",positionsfile_low);
@@ -2123,7 +2115,7 @@ Indexdb_write_positions (char *positionsfile_high, char *positionsfile_low, char
fclose(positions_low_fp);
if ((filesize = Access_filesize(positionsfile_low)) != totalcounts * (off_t) sizeof(UINT4)) {
- fprintf(stderr,"Error persists: expected file size for %s is %lu, but observed only %lu. Please notify twu at gene.com of this error.\n",
+ fprintf(stderr,"Error persists: expected file size for %s is %zu, but observed only %zu. Please notify twu at gene.com of this error.\n",
positionsfile_low,totalcounts*sizeof(UINT4),filesize);
abort();
}
@@ -2151,7 +2143,7 @@ Indexdb_write_positions (char *positionsfile_high, char *positionsfile_low, char
close(positions_low_fd);
if ((filesize = Access_filesize(positionsfile_low)) != totalcounts * (off_t) sizeof(UINT4)) {
- fprintf(stderr,"Error after file-based build: expected file size for %s is %lu, but observed only %lu. Please notify twu at gene.com of this error.\n",
+ fprintf(stderr,"Error after file-based build: expected file size for %s is %zu, but observed only %zu. Please notify twu at gene.com of this error.\n",
positionsfile_low,totalcounts*sizeof(UINT4),filesize);
abort();
}
@@ -2180,7 +2172,7 @@ Indexdb_write_positions (char *positionsfile_high, char *positionsfile_low, char
fclose(positions_low_fp);
if ((filesize = Access_filesize(positionsfile_low)) != totalcounts * (off_t) sizeof(UINT4)) {
- fprintf(stderr,"Error: expected file size for %s is %lu, but observed only %lu. Trying now to write with smaller chunks.\n",
+ fprintf(stderr,"Error: expected file size for %s is %zu, but observed only %zu. Trying now to write with smaller chunks.\n",
positionsfile_low,totalcounts*sizeof(UINT4),filesize);
if ((positions_low_fp = FOPEN_WRITE_BINARY(positionsfile_low)) == NULL) {
fprintf(stderr,"Can't open file %s\n",positionsfile_low);
@@ -2195,7 +2187,7 @@ Indexdb_write_positions (char *positionsfile_high, char *positionsfile_low, char
fclose(positions_low_fp);
if ((filesize = Access_filesize(positionsfile_low)) != totalcounts * (off_t) sizeof(UINT4)) {
- fprintf(stderr,"Error persists: expected file size for %s is %lu, but observed only %lu. Please notify twu at gene.com of this error.\n",
+ fprintf(stderr,"Error persists: expected file size for %s is %zu, but observed only %zu. Please notify twu at gene.com of this error.\n",
positionsfile_low,totalcounts*sizeof(UINT4),filesize);
abort();
}
@@ -2259,8 +2251,8 @@ Indexdb_write_positions_huge (char *positionsfile_high, char *positionsfile_low,
abort();
} else if (coord_values_8p == true) {
- fprintf(stderr,"Trying to allocate %lu*(%d+%d) bytes of memory for positions...",
- totalcounts,(int) sizeof(unsigned char),(int) sizeof(UINT8));
+ fprintf(stderr,"Trying to allocate %llu*(%d+%d) bytes of memory for positions...",
+ (unsigned long long) totalcounts,(int) sizeof(unsigned char),(int) sizeof(UINT8));
positions8_high = (unsigned char *) CALLOC_NO_EXCEPTION(totalcounts,sizeof(unsigned char));
positions8_low = (UINT4 *) CALLOC_NO_EXCEPTION(totalcounts,sizeof(UINT4));
if (positions8_high == NULL || positions8_low == NULL) {
@@ -2286,8 +2278,8 @@ Indexdb_write_positions_huge (char *positionsfile_high, char *positionsfile_low,
sequence_fp,chromosome_iit,index1part,index1interval,
genome_lc_p,fileroot,mask_lowercase_p,/*coord_values_8p*/true);
#endif
- fprintf(stderr,"Writing %lu genomic positions to files %s and %s...\n",
- totalcounts,positionsfile_high,positionsfile_low);
+ fprintf(stderr,"Writing %llu genomic positions to files %s and %s...\n",
+ (unsigned long long) totalcounts,positionsfile_high,positionsfile_low);
FWRITE_CHARS(positions8_high,totalcounts,positions_high_fp);
FWRITE_UINTS(positions8_low,totalcounts,positions_low_fp);
@@ -2295,7 +2287,7 @@ Indexdb_write_positions_huge (char *positionsfile_high, char *positionsfile_low,
fclose(positions_low_fp);
if ((filesize = Access_filesize(positionsfile_high)) != totalcounts * (off_t) sizeof(unsigned char)) {
- fprintf(stderr,"Error: expected file size for %s is %lu, but observed only %lu. Trying now to write with smaller chunks.\n",
+ fprintf(stderr,"Error: expected file size for %s is %zu, but observed only %zu. Trying now to write with smaller chunks.\n",
positionsfile_high,totalcounts*sizeof(unsigned char),filesize);
if ((positions_high_fp = FOPEN_WRITE_BINARY(positionsfile_high)) == NULL) {
fprintf(stderr,"Can't open file %s\n",positionsfile_high);
@@ -2310,14 +2302,14 @@ Indexdb_write_positions_huge (char *positionsfile_high, char *positionsfile_low,
fclose(positions_high_fp);
if ((filesize = Access_filesize(positionsfile_high)) != totalcounts * (off_t) sizeof(unsigned char)) {
- fprintf(stderr,"Error persists: expected file size for %s is %lu, but observed only %lu. Please notify twu at gene.com of this error.\n",
+ fprintf(stderr,"Error persists: expected file size for %s is %zu, but observed only %zu. Please notify twu at gene.com of this error.\n",
positionsfile_high,totalcounts*sizeof(unsigned char),filesize);
abort();
}
}
if ((filesize = Access_filesize(positionsfile_low)) != totalcounts * (off_t) sizeof(UINT4)) {
- fprintf(stderr,"Error: expected file size for %s is %lu, but observed only %lu. Trying now to write with smaller chunks.\n",
+ fprintf(stderr,"Error: expected file size for %s is %zu, but observed only %zu. Trying now to write with smaller chunks.\n",
positionsfile_low,totalcounts*sizeof(UINT4),filesize);
if ((positions_low_fp = FOPEN_WRITE_BINARY(positionsfile_low)) == NULL) {
fprintf(stderr,"Can't open file %s\n",positionsfile_low);
@@ -2332,7 +2324,7 @@ Indexdb_write_positions_huge (char *positionsfile_high, char *positionsfile_low,
fclose(positions_low_fp);
if ((filesize = Access_filesize(positionsfile_low)) != totalcounts * (off_t) sizeof(UINT4)) {
- fprintf(stderr,"Error persists: expected file size for %s is %lu, but observed only %lu. Please notify twu at gene.com of this error.\n",
+ fprintf(stderr,"Error persists: expected file size for %s is %zu, but observed only %zu. Please notify twu at gene.com of this error.\n",
positionsfile_low,totalcounts*sizeof(UINT4),filesize);
abort();
}
@@ -2347,7 +2339,7 @@ Indexdb_write_positions_huge (char *positionsfile_high, char *positionsfile_low,
fprintf(stderr,"Please report this bug to twu at gene.com\n");
abort();
- fprintf(stderr,"Trying to allocate %lu*%d bytes of memory for positions...",totalcounts,(int) sizeof(UINT4));
+ fprintf(stderr,"Trying to allocate %llu*%d bytes of memory for positions...",(unsigned long long) totalcounts,(int) sizeof(UINT4));
positions4 = (UINT4 *) CALLOC_NO_EXCEPTION(totalcounts,sizeof(UINT4));
if (positions4 == NULL) {
fprintf(stderr,"failed. Not able to proceed.\n");
@@ -2368,14 +2360,14 @@ Indexdb_write_positions_huge (char *positionsfile_high, char *positionsfile_low,
sequence_fp,chromosome_iit,index1part,index1interval,
genome_lc_p,fileroot,mask_lowercase_p,/*coord_values_8p*/false);
#endif
- fprintf(stderr,"Writing %lu genomic positions to file %s ...\n",
- totalcounts,positionsfile_low);
+ fprintf(stderr,"Writing %llu genomic positions to file %s ...\n",
+ (unsigned long long) totalcounts,positionsfile_low);
FWRITE_UINTS(positions4,totalcounts,positions_low_fp);
fclose(positions_low_fp);
if ((filesize = Access_filesize(positionsfile_low)) != totalcounts * (off_t) sizeof(UINT4)) {
- fprintf(stderr,"Error: expected file size for %s is %lu, but observed only %lu. Trying now to write with smaller chunks.\n",
+ fprintf(stderr,"Error: expected file size for %s is %zu, but observed only %zu. Trying now to write with smaller chunks.\n",
positionsfile_low,totalcounts*sizeof(UINT4),filesize);
if ((positions_low_fp = FOPEN_WRITE_BINARY(positionsfile_low)) == NULL) {
fprintf(stderr,"Can't open file %s\n",positionsfile_low);
@@ -2390,7 +2382,7 @@ Indexdb_write_positions_huge (char *positionsfile_high, char *positionsfile_low,
fclose(positions_low_fp);
if ((filesize = Access_filesize(positionsfile_low)) != totalcounts * (off_t) sizeof(UINT4)) {
- fprintf(stderr,"Error persists: expected file size for %s is %lu, but observed only %lu. Please notify twu at gene.com of this error.\n",
+ fprintf(stderr,"Error persists: expected file size for %s is %zu, but observed only %zu. Please notify twu at gene.com of this error.\n",
positionsfile_low,totalcounts*sizeof(UINT4),filesize);
abort();
}
diff --git a/src/indexdb.c b/src/indexdb.c
index bee9c4f..5be1780 100644
--- a/src/indexdb.c
+++ b/src/indexdb.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: indexdb.c 150679 2014-10-14 00:46:33Z twu $";
+static char rcsid[] = "$Id: indexdb.c 153955 2014-11-24 17:54:45Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -1169,13 +1169,13 @@ Indexdb_new_genome (Width_T *index1part, Width_T *index1interval,
end0 = new->offsetsstrm[poly_T+1];
#ifdef LARGE_GENOMES
if ((filesize = Access_filesize(filenames->positions_high_filename)) != end0 * (off_t) sizeof(unsigned char)) {
- fprintf(stderr,"Something is wrong with the genomic index: expected file size for %s is %lu, but observed %lu.\n",
+ fprintf(stderr,"Something is wrong with the genomic index: expected file size for %s is %zu, but observed %zu.\n",
filenames->positions_high_filename,end0*sizeof(unsigned char),filesize);
abort();
}
#endif
if ((filesize = Access_filesize(filenames->positions_low_filename)) != end0 * (off_t) sizeof(UINT4)) {
- fprintf(stderr,"Something is wrong with the genomic index: expected file size for %s is %lu, but observed %lu.\n",
+ fprintf(stderr,"Something is wrong with the genomic index: expected file size for %s is %zu, but observed %zu.\n",
filenames->positions_low_filename,end0*sizeof(UINT4),filesize);
abort();
}
@@ -1299,13 +1299,13 @@ Indexdb_new_genome (Width_T *index1part, Width_T *index1interval,
#ifdef LARGE_GENOMES
ptr0 = Bitpack64_read_two_huge(&end0,poly_T,new->offsetspages,new->offsetsmeta,new->offsetsstrm);
if ((filesize = Access_filesize(filenames->positions_high_filename)) != end0 * (off_t) sizeof(unsigned char)) {
- fprintf(stderr,"Something is wrong with the genomic index: expected file size for %s is %lu, but observed %lu.\n",
+ fprintf(stderr,"Something is wrong with the genomic index: expected file size for %s is %zu, but observed %zu.\n",
filenames->positions_high_filename,end0*sizeof(unsigned char),filesize);
abort();
}
#endif
if ((filesize = Access_filesize(filenames->positions_low_filename)) != end0 * (off_t) sizeof(UINT4)) {
- fprintf(stderr,"Something is wrong with the genomic index: expected file size for %s is %lu, but observed %lu.\n",
+ fprintf(stderr,"Something is wrong with the genomic index: expected file size for %s is %zu, but observed %zu.\n",
filenames->positions_low_filename,end0*sizeof(UINT4),filesize);
abort();
}
@@ -1520,8 +1520,8 @@ positions_move_absolute_1 (int positions_fd, off_t ptr) {
off_t offset = ptr*((off_t) sizeof(unsigned char));
if (lseek(positions_fd,offset,SEEK_SET) < 0) {
- fprintf(stderr,"Attempted to do lseek on offset %ld*%lu=%ld\n",
- ptr,sizeof(unsigned char),offset);
+ fprintf(stderr,"Attempted to do lseek on offset %jd*%d=%jd\n",
+ ptr,(int) sizeof(unsigned char),offset);
perror("Error in indexdb.c, positions_move_absolute");
exit(9);
}
@@ -1533,8 +1533,8 @@ positions_move_absolute_4 (int positions_fd, off_t ptr) {
off_t offset = ptr*((off_t) sizeof(UINT4));
if (lseek(positions_fd,offset,SEEK_SET) < 0) {
- fprintf(stderr,"Attempted to do lseek on offset %ld*%lu=%ld\n",
- ptr,sizeof(UINT4),offset);
+ fprintf(stderr,"Attempted to do lseek on offset %jd*%d=%jd\n",
+ ptr,(int) sizeof(UINT4),offset);
perror("Error in indexdb.c, positions_move_absolute");
exit(9);
}
@@ -1738,7 +1738,7 @@ Indexdb_offsets_from_bitpack (char *offsetsmetafile, char *offsetsstrmfile,
#endif
);
#else
- fprintf(stderr,"Allocating memory (%lu 4-byte words) for offsets, kmer %d...",oligospace+1UL,
+ fprintf(stderr,"Allocating memory (%llu 4-byte words) for offsets, kmer %d...",(unsigned long long) oligospace+1UL,
#ifdef PMAP
index1part_aa
#else
@@ -1841,7 +1841,7 @@ Indexdb_offsets_from_bitpack_huge (char *offsetspagesfile, char *offsetsmetafile
#endif
);
#else
- fprintf(stderr,"Allocating memory (%lu 8-byte words) for offsets, kmer %d...",oligospace+1UL,
+ fprintf(stderr,"Allocating memory (%llu 8-byte words) for offsets, kmer %d...",(unsigned long long) oligospace+1UL,
#ifdef PMAP
index1part_aa
#else
@@ -2250,7 +2250,7 @@ Indexdb_read_inplace (int *nentries,
debug0(
printf("%d entries:",*nentries);
for (ptr = ptr0; ptr < end0; ptr++) {
- printf(" %d %lu",this->positions_high[ptr],this->positions_low[ptr]);
+ printf(" %d %u",this->positions_high[ptr],this->positions_low[ptr]);
}
printf("\n");
);
@@ -2261,7 +2261,7 @@ Indexdb_read_inplace (int *nentries,
debug0(
printf("%d entries:",*nentries);
for (ptr = ptr0; ptr < end0; ptr++) {
- printf(" %lu",this->positions[ptr]);
+ printf(" %u",this->positions[ptr]);
}
printf("\n");
);
@@ -2626,8 +2626,8 @@ Indexdb_new_segment (char *genomicseg,
oligoi = (Oligospace_T) masked + 1UL;
#endif
new->offsetsstrm[oligoi] += 1;
- debug(printf("Found oligo %06X. Incremented offsets for %lu to be %u\n",
- masked,oligoi,new->offsetsstrm[oligoi]));
+ debug(printf("Found oligo %06X. Incremented offsets for %llu to be %u\n",
+ masked,(unsigned long long) oligoi,new->offsetsstrm[oligoi]));
between_counter = 0;
}
in_counter--;
diff --git a/src/indexdb_hr.c b/src/indexdb_hr.c
index 3b810e5..1731c2f 100644
--- a/src/indexdb_hr.c
+++ b/src/indexdb_hr.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: indexdb_hr.c 132144 2014-04-02 16:02:28Z twu $";
+static char rcsid[] = "$Id: indexdb_hr.c 153955 2014-11-24 17:54:45Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -173,16 +173,16 @@ check_heap_even (Batch_T *heap, int heapsize) {
for (i = 1; i <= heapsize; i++) {
if (heap[i]->position > heap[2*i-1]->position) {
- fprintf(stderr,"Failed because position %lu at heap %d is > position %lu at heap %d\n",
- heap[i]->position,i,heap[2*i-1]->position,2*i-1);
+ fprintf(stderr,"Failed because position %llu at heap %d is > position %llu at heap %d\n",
+ (unsigned long long) heap[i]->position,i,(unsigned long long) heap[2*i-1]->position,2*i-1);
for (j = 1; j <= heapsize*2; j++) {
fprintf(stderr,"%02d %u\n",j,heap[j]->position);
}
abort();
}
if (heap[i]->position > heap[2*i]->position) {
- fprintf(stderr,"Failed because position %lu at heap %d is > position %lu at heap %d\n",
- heap[i]->position,i,heap[2*i]->position,2*i);
+ fprintf(stderr,"Failed because position %llu at heap %d is > position %llu at heap %d\n",
+ (unsigned long long) heap[i]->position,i,(unsigned long long) heap[2*i]->position,2*i);
for (j = 1; j <= heapsize*2; j++) {
fprintf(stderr,"%02d %u\n",j,heap[j]->position);
}
@@ -590,8 +590,8 @@ positions_move_absolute (int positions_fd, Positionsptr_T ptr) {
off_t offset = ptr*((off_t) sizeof(Univcoord_T));
if (lseek(positions_fd,offset,SEEK_SET) < 0) {
- fprintf(stderr,"Attempted to do lseek on offset %u*%lu=%lu\n",
- ptr,sizeof(Univcoord_T),(long unsigned int) offset);
+ fprintf(stderr,"Attempted to do lseek on offset %zd*%d=%zd\n",
+ ptr,(int) sizeof(Univcoord_T),offset);
perror("Error in indexdb.c, positions_move_absolute_4");
exit(9);
}
@@ -1115,7 +1115,7 @@ Compoundpos_dump (Compoundpos_T compoundpos, int diagterm) {
for (i = 0; i < compoundpos->n; i++) {
for (j = 0; j < compoundpos->npositions[i]; j++) {
#ifdef LARGE_GENOMES
- printf(" compound%d.%d:%lu+%d\n",
+ printf(" compound%d.%d:%llu+%d\n",
i,j,((Univcoord_T) compoundpos->positions_high[i][j] << 32) + compoundpos->positions_low[i][j],diagterm);
#elif defined(WORDS_BIGENDIAN)
printf(" compound%d.%d:%u+%d\n",
@@ -1537,11 +1537,11 @@ Compoundpos_find (bool *emptyp, Compoundpos_T compoundpos, Univcoord_T local_goa
debug6(printf("Setting emptyp to be false\n"));
*emptyp = false;
#ifdef LARGE_GENOMES
- debug6(printf("Found! Returning position %lu\n",(((Univcoord_T) *batch->positionptr_high) << 32) + (*batch->positionptr_low)));
+ debug6(printf("Found! Returning position %llu\n",(((Univcoord_T) *batch->positionptr_high) << 32) + (*batch->positionptr_low)));
#elif defined(WORDS_BIGENDIAN)
- debug6(printf("Found! Returning position %lu\n",Bigendian_convert_univcoord(*batch->positionptr)));
+ debug6(printf("Found! Returning position %u\n",Bigendian_convert_univcoord(*batch->positionptr)));
#else
- debug6(printf("Found! Returning position %lu\n",*batch->positionptr));
+ debug6(printf("Found! Returning position %u\n",*batch->positionptr));
#endif
#ifdef LARGE_GENOMES
++batch->positionptr_high;
@@ -1663,7 +1663,7 @@ Compoundpos_search (Univcoord_T *value, Compoundpos_T compoundpos, Univcoord_T l
}
if (batch->position == local_goal) {
*value = batch->position;
- debug3(printf("Found! Returning position %lu\n",*value));
+ debug3(printf("Found! Returning position %llu\n",(unsigned long long) *value));
return 1;
}
@@ -1783,13 +1783,13 @@ Compoundpos_search (Univcoord_T *value, Compoundpos_T compoundpos, Univcoord_T l
}
if (batch->position == local_goal) {
*value = batch->position;
- debug3(printf("Found! Returning position %lu\n",*value));
+ debug3(printf("Found! Returning position %llu\n",(unsigned long long) *value));
return 1;
}
}
*value = batch->position;
- debug3(printf("Returning position %lu\n",*value));
+ debug3(printf("Returning position %llu\n",(unsigned long long) *value));
return 1;
}
diff --git a/src/mem.c b/src/mem.c
index 14e72a3..1b3d377 100644
--- a/src/mem.c
+++ b/src/mem.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: mem.c 145990 2014-08-25 21:47:32Z twu $";
+static char rcsid[] = "$Id: mem.c 153955 2014-11-24 17:54:45Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -407,10 +407,10 @@ Mem_alloc (size_t nbytes, const char *file, int line) {
#endif
#ifdef MEMUSAGE
- debug_heap(printf("%ld %s: Allocating %p to %p -- Malloc of %lu bytes in standard pool requested from %s:%d\n",
+ debug_heap(printf("%ld %s: Allocating %p to %p -- Malloc of %zu bytes in standard pool requested from %s:%d\n",
memusage_std_heap,threadname,ptr,(char *) ptr + nbytes-1,nbytes,file,line));
#else
- debug_heap(printf("Allocating %p to %p -- Malloc of %lu bytes in standard pool requested from %s:%d\n",
+ debug_heap(printf("Allocating %p to %p -- Malloc of %zu bytes in standard pool requested from %s:%d\n",
ptr,(char *) ptr + nbytes-1,nbytes,file,line));
#endif
@@ -428,7 +428,7 @@ Mem_alloc (size_t nbytes, const char *file, int line) {
#endif
if (ptr == NULL) {
- fprintf(stderr,"Failed attempt to alloc %lu bytes\n",nbytes);
+ fprintf(stderr,"Failed attempt to alloc %zu bytes\n",nbytes);
if (file == NULL) {
RAISE(Mem_Failed);
} else {
@@ -482,10 +482,10 @@ Mem_alloc_keep (size_t nbytes, const char *file, int line) {
#endif
#ifdef MEMUSAGE
- debug_heap(printf("%ld %s-keep: Allocating %p to %p -- Malloc of %lu bytes in keep pool requested from %s:%d\n",
+ debug_heap(printf("%ld %s-keep: Allocating %p to %p -- Malloc of %zu bytes in keep pool requested from %s:%d\n",
memusage_keep,threadname,ptr,(char *) ptr + nbytes-1,nbytes,file,line));
#else
- debug_heap(printf("Allocating %p to %p -- Malloc of %lu bytes in keep pool requested from %s:%d\n",
+ debug_heap(printf("Allocating %p to %p -- Malloc of %zu bytes in keep pool requested from %s:%d\n",
ptr,(char *) ptr + nbytes-1,nbytes,file,line));
#endif
@@ -503,7 +503,7 @@ Mem_alloc_keep (size_t nbytes, const char *file, int line) {
#endif
if (ptr == NULL) {
- fprintf(stderr,"Failed attempt to alloc %lu bytes\n",nbytes);
+ fprintf(stderr,"Failed attempt to alloc %zu bytes\n",nbytes);
if (file == NULL) {
RAISE(Mem_Failed);
} else {
@@ -546,10 +546,10 @@ Mem_alloc_in (size_t nbytes, const char *file, int line) {
#endif
#ifdef MEMUSAGE
- debug_heap(printf("%ld IN: Allocating %p to %p -- Malloc of %lu bytes in input pool requested from %s:%d\n",
+ debug_heap(printf("%ld IN: Allocating %p to %p -- Malloc of %zu bytes in input pool requested from %s:%d\n",
memusage_in,ptr,(char *) ptr + nbytes-1,nbytes,file,line));
#else
- debug_heap(printf("Allocating %p to %p -- Malloc of %lu bytes in input pool requested from %s:%d\n",
+ debug_heap(printf("Allocating %p to %p -- Malloc of %zu bytes in input pool requested from %s:%d\n",
ptr,(char *) ptr + nbytes-1,nbytes,file,line));
#endif
@@ -567,7 +567,7 @@ Mem_alloc_in (size_t nbytes, const char *file, int line) {
#endif
if (ptr == NULL) {
- fprintf(stderr,"Failed attempt to alloc %lu bytes\n",nbytes);
+ fprintf(stderr,"Failed attempt to alloc %zu bytes\n",nbytes);
if (file == NULL) {
RAISE(Mem_Failed);
} else {
@@ -610,10 +610,10 @@ Mem_alloc_out (size_t nbytes, const char *file, int line) {
#endif
#ifdef MEMUSAGE
- debug_heap(printf("%ld OUT: Allocating %p to %p -- Malloc of %lu bytes in output pool requested from %s:%d\n",
+ debug_heap(printf("%ld OUT: Allocating %p to %p -- Malloc of %zu bytes in output pool requested from %s:%d\n",
memusage_out,ptr,(char *) ptr + nbytes-1,nbytes,file,line));
#else
- debug_heap(printf("Allocating %p to %p -- Malloc of %lu bytes in output pool requested from %s:%d\n",
+ debug_heap(printf("Allocating %p to %p -- Malloc of %zu bytes in output pool requested from %s:%d\n",
ptr,(char *) ptr + nbytes-1,nbytes,file,line));
#endif
@@ -631,7 +631,7 @@ Mem_alloc_out (size_t nbytes, const char *file, int line) {
#endif
if (ptr == NULL) {
- fprintf(stderr,"Failed attempt to alloc %lu bytes\n",nbytes);
+ fprintf(stderr,"Failed attempt to alloc %zu bytes\n",nbytes);
if (file == NULL) {
RAISE(Mem_Failed);
} else {
@@ -671,7 +671,7 @@ Mem_calloc (size_t count, size_t nbytes, const char *file, int line) {
#endif
if (count <= 0) {
- fprintf(stderr,"Failed attempt to calloc %lu x %lu bytes\n",count,nbytes);
+ fprintf(stderr,"Failed attempt to calloc %zu x %zu bytes\n",count,nbytes);
if (file == NULL) {
RAISE(Mem_Failed);
} else {
@@ -721,15 +721,15 @@ Mem_calloc (size_t count, size_t nbytes, const char *file, int line) {
#endif
#ifdef MEMUSAGE
- debug_heap(printf("%ld %s: Allocating %p to %p -- Calloc of %lu x %lu = %lu bytes in standard pool requested from %s:%d\n",
+ debug_heap(printf("%ld %s: Allocating %p to %p -- Calloc of %zu x %zu = %zu bytes in standard pool requested from %s:%d\n",
memusage_std_heap,threadname,ptr,(char *) ptr + count*nbytes-1,count,nbytes,count*nbytes,file,line));
#else
- debug_heap(printf("Allocating %p to %p -- Calloc of %lu x %lu = %lu bytes in standard pool requested from %s:%d\n",
+ debug_heap(printf("Allocating %p to %p -- Calloc of %zu x %zu = %zu bytes in standard pool requested from %s:%d\n",
ptr,(char *) ptr + count*nbytes-1,count,nbytes,count*nbytes,file,line));
#endif
if (ptr == NULL) {
- fprintf(stderr,"Failed attempt to calloc %lu x %lu bytes\n",count,nbytes);
+ fprintf(stderr,"Failed attempt to calloc %zu x %zu bytes\n",count,nbytes);
if (file == NULL) {
RAISE(Mem_Failed);
} else {
@@ -762,7 +762,7 @@ Mem_calloc_keep (size_t count, size_t nbytes, const char *file, int line) {
#endif
if (count <= 0) {
- fprintf(stderr,"Failed attempt to calloc %lu x %lu bytes\n",count,nbytes);
+ fprintf(stderr,"Failed attempt to calloc %zu x %zu bytes\n",count,nbytes);
if (file == NULL) {
RAISE(Mem_Failed);
} else {
@@ -804,15 +804,15 @@ Mem_calloc_keep (size_t count, size_t nbytes, const char *file, int line) {
#endif
#ifdef MEMUSAGE
- debug_heap(printf("%ld %s-keep: Allocating %p to %p -- Calloc of %lu x %lu = %lu bytes in keep pool requested from %s:%d\n",
+ debug_heap(printf("%ld %s-keep: Allocating %p to %p -- Calloc of %zu x %zu = %zu bytes in keep pool requested from %s:%d\n",
memusage_keep,threadname,ptr,(char *) ptr + count*nbytes-1,count,nbytes,count*nbytes,file,line));
#else
- debug_heap(printf("Allocating %p to %p -- Calloc of %lu x %lu = %lu bytes in keep pool requested from %s:%d\n",
+ debug_heap(printf("Allocating %p to %p -- Calloc of %zu x %zu = %zu bytes in keep pool requested from %s:%d\n",
ptr,(char *) ptr + count*nbytes-1,count,nbytes,count*nbytes,file,line));
#endif
if (ptr == NULL) {
- fprintf(stderr,"Failed attempt to calloc %lu x %lu bytes\n",count,nbytes);
+ fprintf(stderr,"Failed attempt to calloc %zu x %zu bytes\n",count,nbytes);
if (file == NULL) {
RAISE(Mem_Failed);
} else {
@@ -843,7 +843,7 @@ Mem_calloc_in (size_t count, size_t nbytes, const char *file, int line) {
#endif
if (count <= 0) {
- fprintf(stderr,"Failed attempt to calloc %lu x %lu bytes\n",count,nbytes);
+ fprintf(stderr,"Failed attempt to calloc %zu x %zu bytes\n",count,nbytes);
if (file == NULL) {
RAISE(Mem_Failed);
} else {
@@ -878,15 +878,15 @@ Mem_calloc_in (size_t count, size_t nbytes, const char *file, int line) {
#endif
#ifdef MEMUSAGE
- debug_heap(printf("%ld IN: Allocating %p to %p -- Calloc of %lu x %lu = %lu bytes in input pool requested from %s:%d\n",
+ debug_heap(printf("%ld IN: Allocating %p to %p -- Calloc of %zu x %zu = %zu bytes in input pool requested from %s:%d\n",
memusage_in,ptr,(char *) ptr + count*nbytes-1,count,nbytes,count*nbytes,file,line));
#else
- debug_heap(printf("Allocating %p to %p -- Calloc of %lu x %lu = %lu bytes in input pool requested from %s:%d\n",
+ debug_heap(printf("Allocating %p to %p -- Calloc of %zu x %zu = %zu bytes in input pool requested from %s:%d\n",
ptr,(char *) ptr + count*nbytes-1,count,nbytes,count*nbytes,file,line));
#endif
if (ptr == NULL) {
- fprintf(stderr,"Failed attempt to calloc %lu x %lu bytes\n",count,nbytes);
+ fprintf(stderr,"Failed attempt to calloc %zu x %zu bytes\n",count,nbytes);
if (file == NULL) {
RAISE(Mem_Failed);
} else {
@@ -916,7 +916,7 @@ Mem_calloc_out (size_t count, size_t nbytes, const char *file, int line) {
#endif
if (count <= 0) {
- fprintf(stderr,"Failed attempt to calloc %lu x %lu bytes\n",count,nbytes);
+ fprintf(stderr,"Failed attempt to calloc %zu x %zu bytes\n",count,nbytes);
if (file == NULL) {
RAISE(Mem_Failed);
} else {
@@ -951,15 +951,15 @@ Mem_calloc_out (size_t count, size_t nbytes, const char *file, int line) {
#endif
#ifdef MEMUSAGE
- debug_heap(printf("%ld OUT: Allocating %p to %p -- Calloc of %lu x %lu = %lu bytes in output pool requested from %s:%d\n",
+ debug_heap(printf("%ld OUT: Allocating %p to %p -- Calloc of %zu x %zu = %zu bytes in output pool requested from %s:%d\n",
memusage_out,ptr,(char *) ptr + count*nbytes-1,count,nbytes,count*nbytes,file,line));
#else
- debug_heap(printf("Allocating %p to %p -- Calloc of %lu x %lu = %lu bytes in output pool requested from %s:%d\n",
+ debug_heap(printf("Allocating %p to %p -- Calloc of %zu x %zu = %zu bytes in output pool requested from %s:%d\n",
ptr,(char *) ptr + count*nbytes-1,count,nbytes,count*nbytes,file,line));
#endif
if (ptr == NULL) {
- fprintf(stderr,"Failed attempt to calloc %lu x %lu bytes\n",count,nbytes);
+ fprintf(stderr,"Failed attempt to calloc %zu x %zu bytes\n",count,nbytes);
if (file == NULL) {
RAISE(Mem_Failed);
} else {
@@ -991,7 +991,7 @@ Mem_calloc_no_exception (size_t count, size_t nbytes, const char *file, int line
#endif
if (count <= 0) {
- fprintf(stderr,"Failed attempt to allocate %lu x %lu bytes\n",count,nbytes);
+ fprintf(stderr,"Failed attempt to allocate %zu x %zu bytes\n",count,nbytes);
if (file == NULL) {
RAISE(Mem_Failed);
} else {
@@ -1275,7 +1275,7 @@ Mem_resize (void *ptr, size_t nbytes, const char *file, int line) {
assert(nbytes > 0);
ptr = realloc(ptr, nbytes);
if (ptr == NULL) {
- fprintf(stderr,"Failed attempt to realloc %lu bytes\n",nbytes);
+ fprintf(stderr,"Failed attempt to realloc %zu bytes\n",nbytes);
if (file == NULL) {
RAISE(Mem_Failed);
} else {
diff --git a/src/oligoindex_hr.c b/src/oligoindex_hr.c
index 91c85ed..414420a 100644
--- a/src/oligoindex_hr.c
+++ b/src/oligoindex_hr.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: oligoindex_hr.c 146626 2014-09-02 21:34:26Z twu $";
+static char rcsid[] = "$Id: oligoindex_hr.c 153955 2014-11-24 17:54:45Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -17584,7 +17584,7 @@ counts_compare (Count_T *counts1, Count_T *counts2, Oligospace_T oligospace) {
for (i = 0; i < oligospace; i++) {
if (counts1[i] != counts2[i]) {
- printf("At oligo %lu, counts1 %d != counts2 %d\n",i,counts1[i],counts2[i]);
+ printf("At oligo %llu, counts1 %d != counts2 %d\n",(unsigned long long) i,counts1[i],counts2[i]);
abort();
}
}
@@ -17600,8 +17600,8 @@ positions_compare (Chrpos_T **positions1, Chrpos_T **positions2, Count_T *counts
/* nt = shortoligo_nt(i,indexsize); */
for (hit = 0; hit < counts[i]; hit++) {
if (positions1[i][hit] != positions2[i][hit]) {
- printf("At oligo %lu, hit %d, positions1 %u != positions2 %u\n",
- i,hit,positions1[i][hit],positions2[i][hit]);
+ printf("At oligo %llu, hit %d, positions1 %u != positions2 %u\n",
+ (unsigned long long) i,hit,positions1[i][hit],positions2[i][hit]);
abort();
}
}
diff --git a/src/outbuffer.c b/src/outbuffer.c
index 74ee684..4c6dc81 100644
--- a/src/outbuffer.c
+++ b/src/outbuffer.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: outbuffer.c 150878 2014-10-15 18:18:56Z twu $";
+static char rcsid[] = "$Id: outbuffer.c 154088 2014-11-25 21:02:46Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -229,7 +229,7 @@ struct T {
#else
- FILE *fp_nomapping; /* N1 */
+ FILE *fp_nomapping; /* NM */
FILE *fp_uniq; /* UU */
FILE *fp_circular; /* UC */
FILE *fp_transloc; /* UT */
@@ -1689,8 +1689,8 @@ print_result_sam (T this, Result_T result, Request_T request) {
Stage3end_mapq_score(stage3array[0]),
this->chromosome_iit,queryseq1,/*queryseq2*/NULL,
/*pairedlength*/0,chrpos,/*mate_chrpos*/0U,
- /*clipdir*/0,/*hardclip_low*/0,/*hardclip_high*/0,resulttype,
- /*first_read_p*/true,/*npaths_mate*/0,this->quality_shift,
+ /*clipdir*/0,/*hardclip5_low*/0,/*hardclip5_high*/0,/*hardclip3_low*/0,/*hardclip3_high*/0,
+ resulttype,/*first_read_p*/true,/*npaths_mate*/0,this->quality_shift,
this->sam_read_group_id,this->invert_first_p,this->invert_second_p,
this->merge_samechr_p);
}
@@ -1725,8 +1725,8 @@ print_result_sam (T this, Result_T result, Request_T request) {
Stage3end_mapq_score(stage3array[pathnum-1]),
this->chromosome_iit,queryseq1,/*queryseq2*/NULL,
/*pairedlength*/0,chrpos,/*mate_chrpos*/0U,
- /*clipdir*/0,/*hardclip_low*/0,/*hardclip_high*/0,resulttype,
- /*first_read_p*/true,/*npaths_mate*/0,this->quality_shift,
+ /*clipdir*/0,/*hardclip5_low*/0,/*hardclip5_high*/0,/*hardclip3_low*/0,/*hardclip3_high*/0,
+ resulttype,/*first_read_p*/true,/*npaths_mate*/0,this->quality_shift,
this->sam_read_group_id,this->invert_first_p,this->invert_second_p,
this->merge_samechr_p);
}
@@ -1762,8 +1762,8 @@ print_result_sam (T this, Result_T result, Request_T request) {
Stage3end_mapq_score(stage3array[pathnum-1]),
this->chromosome_iit,queryseq1,/*queryseq2*/NULL,
/*pairedlength*/0,chrpos,/*mate_chrpos*/0U,
- /*clipdir*/0,/*hardclip_low*/0,/*hardclip_high*/0,resulttype,
- /*first_read_p*/true,/*npaths_mate*/0,this->quality_shift,
+ /*clipdir*/0,/*hardclip5_low*/0,/*hardclip5_high*/0,/*hardclip3_low*/0,/*hardclip3_high*/0,
+ resulttype,/*first_read_p*/true,/*npaths_mate*/0,this->quality_shift,
this->sam_read_group_id,this->invert_first_p,this->invert_second_p,
this->merge_samechr_p);
}
@@ -2529,7 +2529,7 @@ void *
Outbuffer_thread_anyorder (void *data) {
T this = (T) data;
unsigned int output_buffer_size = this->output_buffer_size;
- unsigned int noutput = 0;
+ unsigned int noutput = 0, ntotal;
Result_T result;
Request_T request;
@@ -2537,8 +2537,16 @@ Outbuffer_thread_anyorder (void *data) {
Mem_usage_set_threadname("outbuffer");
#endif
- while (noutput < this->ntotal) {
+ /* Obtain this->ntotal while locked, to prevent race between output thread and input thread */
+#ifdef HAVE_PTHREAD
+ pthread_mutex_lock(&this->lock);
+#endif
+ ntotal = this->ntotal;
+#ifdef HAVE_PTHREAD
+ pthread_mutex_unlock(&this->lock);
+#endif
+ while (noutput < ntotal) { /* Previously check against this->ntotal */
#ifdef HAVE_PTHREAD
pthread_mutex_lock(&this->lock);
while (this->head == NULL && noutput < this->ntotal) {
@@ -2612,6 +2620,15 @@ Outbuffer_thread_anyorder (void *data) {
}
}
+
+ /* Obtain this->ntotal while locked, to prevent race between output thread and input thread */
+#ifdef HAVE_PTHREAD
+ pthread_mutex_lock(&this->lock);
+#endif
+ ntotal = this->ntotal;
+#ifdef HAVE_PTHREAD
+ pthread_mutex_unlock(&this->lock);
+#endif
}
assert(this->head == NULL);
@@ -2625,7 +2642,7 @@ void *
Outbuffer_thread_ordered (void *data) {
T this = (T) data;
unsigned int output_buffer_size = this->output_buffer_size;
- unsigned int noutput = 0, nqueued = 0;
+ unsigned int noutput = 0, nqueued = 0, ntotal;
Result_T result;
Request_T request;
RRlist_T queue = NULL;
@@ -2635,7 +2652,16 @@ Outbuffer_thread_ordered (void *data) {
Mem_usage_set_threadname("outbuffer");
#endif
- while (noutput < this->ntotal) {
+ /* Obtain this->ntotal while locked, to prevent race between output thread and input thread */
+#ifdef HAVE_PTHREAD
+ pthread_mutex_lock(&this->lock);
+#endif
+ ntotal = this->ntotal;
+#ifdef HAVE_PTHREAD
+ pthread_mutex_unlock(&this->lock);
+#endif
+
+ while (noutput < ntotal) { /* Previously checked against this->ntotal */
#ifdef HAVE_PTHREAD
pthread_mutex_lock(&this->lock);
while (this->head == NULL && noutput < this->ntotal) {
@@ -2762,6 +2788,15 @@ Outbuffer_thread_ordered (void *data) {
}
}
+
+ /* Obtain this->ntotal while locked, to prevent race between output thread and input thread */
+#ifdef HAVE_PTHREAD
+ pthread_mutex_lock(&this->lock);
+#endif
+ ntotal = this->ntotal;
+#ifdef HAVE_PTHREAD
+ pthread_mutex_unlock(&this->lock);
+#endif
}
assert(queue == NULL);
diff --git a/src/pair.c b/src/pair.c
index 47c78e8..5f26c5f 100644
--- a/src/pair.c
+++ b/src/pair.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: pair.c 149618 2014-10-01 22:31:16Z twu $";
+static char rcsid[] = "$Id: pair.c 154023 2014-11-25 03:45:18Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -129,12 +129,14 @@ static bool force_xs_direction_p;
static bool md_lowercase_variant_p;
static bool snps_p;
static double genomelength; /* For BLAST E-value */
+static Cigar_action_T cigar_action;
void
Pair_setup (int trim_mismatch_score_in, int trim_indel_score_in,
bool sam_insert_0M_p_in, bool force_xs_direction_p_in,
- bool md_lowercase_variant_p_in, bool snps_p_in, Univcoord_T genomelength_in) {
+ bool md_lowercase_variant_p_in, bool snps_p_in, Univcoord_T genomelength_in,
+ Cigar_action_T cigar_action_in) {
trim_mismatch_score = trim_mismatch_score_in;
trim_indel_score = trim_indel_score_in;
sam_insert_0M_p = sam_insert_0M_p_in;
@@ -142,6 +144,7 @@ Pair_setup (int trim_mismatch_score_in, int trim_indel_score_in,
md_lowercase_variant_p = md_lowercase_variant_p_in;
snps_p = snps_p_in;
genomelength = (double) genomelength_in;
+ cigar_action = cigar_action_in;
return;
}
@@ -813,7 +816,7 @@ invert_and_revcomp_path_and_coords (struct T *old, int npairs, int querylength)
static void
add_intronlengths (struct T *pairs, int npairs) {
- struct T *prev, *this = NULL, *ptr;
+ struct T *this = NULL, *ptr;
int space, margin, i, j, k, gapstart;
char intronstring[20], cdnabreak[20], genomicbreak[20], comp;
int last_querypos = -1;
@@ -821,7 +824,7 @@ add_intronlengths (struct T *pairs, int npairs) {
i = 0;
while (i < npairs) {
- prev = this;
+ /* prev = this; */
this = &(pairs[i++]);
if (this->extraexonp == true) {
@@ -1632,14 +1635,13 @@ Pair_check_list (List_T pairs) {
T this;
List_T p;
int prev_querypos;
- Chrpos_T prev_genomepos;
if (pairs == NULL) {
return;
} else {
this = List_head(pairs);
prev_querypos = this->querypos;
- prev_genomepos = this->genomepos;
+ /* prev_genomepos = this->genomepos; */
for (p = List_next(pairs); p != NULL; p = List_next(p)) {
this = List_head(p);
@@ -1654,7 +1656,7 @@ Pair_check_list (List_T pairs) {
}
#endif
prev_querypos = this->querypos;
- prev_genomepos = this->genomepos;
+ /* prev_genomepos = this->genomepos; */
}
}
}
@@ -1667,7 +1669,6 @@ Pair_check_array (struct T *pairs, int npairs) {
bool result = false;
struct T *this;
int prev_querypos;
- Chrpos_T prev_genomepos;
int i;
if (npairs == 0) {
@@ -1675,7 +1676,7 @@ Pair_check_array (struct T *pairs, int npairs) {
} else {
this = pairs++;
prev_querypos = this->querypos;
- prev_genomepos = this->genomepos;
+ /* prev_genomepos = this->genomepos; */
for (i = 1; i < npairs; i++) {
this = pairs++;
@@ -1695,7 +1696,7 @@ Pair_check_array (struct T *pairs, int npairs) {
}
#endif
prev_querypos = this->querypos;
- prev_genomepos = this->genomepos;
+ /* prev_genomepos = this->genomepos; */
}
}
return result;
@@ -1705,32 +1706,9 @@ Pair_check_array (struct T *pairs, int npairs) {
/* Called by output thread for --merge-overlap feature. Modeled after Substring_convert_to_pairs. */
List_T
Pair_convert_array_to_pairs (List_T pairs, struct T *pairarray, int npairs, bool plusp, int querylength,
- int clipdir, int hardclip, bool first_read_p, int queryseq_offset) {
+ int clipdir, int hardclip_low, int hardclip_high, bool first_read_p, int queryseq_offset) {
T pair;
- int querystart, queryend, querypos, i, k;
- int hardclip_low, hardclip_high;
- Chrpos_T chrpos;
- char genome;
-
- if (first_read_p == true) {
- if (clipdir >= 0) {
- hardclip_low = 0;
- hardclip_high = hardclip; /* hardclip5 */
- } else {
- hardclip_low = hardclip; /* hardclip5 */
- hardclip_high = 0;
- }
-
- } else {
- if (clipdir >= 0) {
- hardclip_low = hardclip; /* hardclip3 */
- hardclip_high = 0;
- } else {
- hardclip_low = 0;
- hardclip_high = hardclip; /* hardclip3 */
- }
- }
-
+ int querystart, queryend, i;
if (plusp == true) {
querystart = hardclip_low;
@@ -1886,7 +1864,7 @@ Pair_print_exonsummary (FILE *fp, struct T *pairs, int npairs, Chrnum_T chrnum,
Univcoord_T chroffset, Genome_T genome, Univ_IIT_T chromosome_iit,
bool watsonp, int cdna_direction, bool genomefirstp, int invertmode) {
bool in_exon = false;
- struct T *save = NULL, *ptr, *prev, *this = NULL;
+ struct T *save = NULL, *ptr, *this = NULL;
int exon_querystart = -1, exon_queryend;
Chrpos_T exon_genomestart = -1U, exon_genomeend, intron_start, intron_end;
int num = 0, den = 0, i;
@@ -1919,7 +1897,7 @@ Pair_print_exonsummary (FILE *fp, struct T *pairs, int npairs, Chrnum_T chrnum,
debug(Pair_dump_array(pairs,npairs,/*zerobasedp*/true));
for (i = 0; i < npairs; i++) {
- prev = this;
+ /* prev = this; */
this = ptr++;
if (this->gapp) {
@@ -2051,7 +2029,7 @@ Pair_print_exonsummary (FILE *fp, struct T *pairs, int npairs, Chrnum_T chrnum,
}
}
- prev = this;
+ /* prev = this; */
exon_queryend = last_querypos + ONEBASEDP;
exon_genomeend = last_genomepos + ONEBASEDP;
if (genomefirstp == true) {
@@ -2491,7 +2469,7 @@ print_gff3_exons_forward (FILE *fp, struct T *pairs, int npairs, int pathnum, T
int qindels, int tindels, int unknowns, bool watsonp, int cdna_direction,
bool gff_introns_p, bool gff_gene_format_p, bool gff_estmatch_format_p) {
bool in_exon = false;
- struct T *ptr, *prev, *this = NULL;
+ struct T *ptr, *this = NULL;
int exon_querystart = -1, exon_queryend;
Chrpos_T exon_genomestart = -1, exon_genomeend, intron_start, intron_end;
int pctidentity, num = 0, den = 0, exonno = 0, i;
@@ -2507,7 +2485,7 @@ print_gff3_exons_forward (FILE *fp, struct T *pairs, int npairs, int pathnum, T
ptr = pairs;
for (i = 0; i < npairs; i++) {
- prev = this;
+ /* prev = this; */
this = ptr++;
if (this->gapp) {
@@ -2664,7 +2642,7 @@ print_gff3_exons_forward (FILE *fp, struct T *pairs, int npairs, int pathnum, T
}
}
- prev = this;
+ /* prev = this; */
exon_queryend = last_querypos + 1;
exon_genomeend = last_genomepos + 1;
@@ -2717,7 +2695,7 @@ static void
print_gff3_exons_backward (FILE *fp, struct T *pairs, int npairs, int pathnum, char *sourcename, char *accession, char *chrstring,
bool watsonp, int cdna_direction, bool gff_introns_p) {
bool in_exon = false;
- struct T *ptr, *prev, *this = NULL;
+ struct T *ptr, *this = NULL;
int exon_querystart = -1, exon_queryend;
Chrpos_T exon_genomestart = -1, exon_genomeend;
int pctidentity, num = 0, den = 0, exonno = 0, i;
@@ -2730,7 +2708,7 @@ print_gff3_exons_backward (FILE *fp, struct T *pairs, int npairs, int pathnum, c
ptr = &(pairs[npairs-1]);
for (i = npairs-1; i >= 0; i--) {
- prev = this;
+ /* prev = this; */
this = ptr--;
if (this->gapp) {
@@ -2800,7 +2778,7 @@ print_gff3_exons_backward (FILE *fp, struct T *pairs, int npairs, int pathnum, c
}
}
- prev = this;
+ /* prev = this; */
exon_queryend = last_querypos + 1;
exon_genomeend = last_genomepos + 1;
@@ -2820,7 +2798,7 @@ static void
print_gff3_cdss_forward (FILE *fp, struct T *pairs, int npairs, int pathnum, char *sourcename, char *accession, char *chrstring,
bool watsonp, int cdna_direction) {
bool in_cds = false;
- struct T *ptr, *prev, *this = NULL;
+ struct T *ptr, *this = NULL;
int exon_querystart = -1, exon_queryend, exon_phase;
Chrpos_T exon_genomestart = -1, exon_genomeend;
int pctidentity, num = 0, den = 0, cdsno = 0;
@@ -2832,7 +2810,7 @@ print_gff3_cdss_forward (FILE *fp, struct T *pairs, int npairs, int pathnum, cha
ptr = pairs;
while (ptr < &(pairs[npairs])) {
- prev = this;
+ /* prev = this; */
this = ptr++;
if (in_cds == true) {
@@ -2919,7 +2897,7 @@ static void
print_gff3_cdss_backward (FILE *fp, struct T *pairs, int npairs, int pathnum, char *sourcename, char *accession, char *chrstring,
bool watsonp, int cdna_direction) {
bool in_cds = false;
- struct T *ptr, *prev, *this = NULL;
+ struct T *ptr, *this = NULL;
int exon_querystart = -1, exon_queryend, exon_phase;
Chrpos_T exon_genomestart = -1, exon_genomeend;
int pctidentity, num = 0, den = 0, cdsno = 0;
@@ -2932,7 +2910,7 @@ print_gff3_cdss_backward (FILE *fp, struct T *pairs, int npairs, int pathnum, ch
ptr = &(pairs[npairs-1]);
while (ptr >= &(pairs[0])) {
- prev = this;
+ /* prev = this; */
this = ptr--;
if (in_cds == true) {
@@ -3397,15 +3375,15 @@ Pair_print_gsnap (FILE *fp, struct T *pairs_querydir, int npairs, int nsegments,
Univ_IIT_T chromosome_iit, IIT_T splicesites_iit,
int *splicesites_divint_crosstable, int donor_typeint, int acceptor_typeint) {
bool in_exon = true;
- struct T *pairs, *ptr, *ptr0, *prev, *this = NULL;
+ struct T *pairs, *ptr, *ptr0, *this = NULL;
int exon_querystart = -1, exon_queryend;
Chrpos_T exon_genomestart = -1U, exon_genomeend;
int querypos, nmismatches_refdiff, nmismatches_bothdiff, nmatches,
- ntrim_start, ntrim_end, indel_pos, nindels, prev_nindels, i;
+ ntrim_start, ntrim_end, nindels, prev_nindels, i;
int last_querypos = -1;
Chrpos_T last_genomepos = -1U;
Univcoord_T pos;
- Chrpos_T splice_dist = 0U, prev_splice_dist, splicesitepos, prev_splicesitepos;
+ Chrpos_T splice_dist = 0U, prev_splice_dist;
char *chr, strand, c, c_alt;
Endtype_T endtype, prev_endtype;
bool allocp, firstp = true;
@@ -3479,7 +3457,7 @@ Pair_print_gsnap (FILE *fp, struct T *pairs_querydir, int npairs, int nsegments,
i = 0;
while (i < npairs) {
- prev = this;
+ /* prev = this; */
this = ptr++;
i++;
@@ -3515,12 +3493,12 @@ Pair_print_gsnap (FILE *fp, struct T *pairs_querydir, int npairs, int nsegments,
if (watsonp == true) {
splice_dist = ptr0->genomepos - last_genomepos - 1;
- prev_splicesitepos = exon_genomestart-1U;
- splicesitepos = exon_genomeend;
+ /* prev_splicesitepos = exon_genomestart-1U; */
+ /* splicesitepos = exon_genomeend; */
} else {
splice_dist = last_genomepos - ptr0->genomepos - 1;
- prev_splicesitepos = exon_genomestart;
- splicesitepos = exon_genomeend-1U;
+ /* prev_splicesitepos = exon_genomestart; */
+ /* splicesitepos = exon_genomeend-1U; */
}
@@ -3594,7 +3572,7 @@ Pair_print_gsnap (FILE *fp, struct T *pairs_querydir, int npairs, int nsegments,
prev_splice_dist = 0U;
}
- indel_pos = this->querypos;
+ /* indel_pos = this->querypos; */
nindels = 0;
while (i < npairs && this->gapp == false && this->genome == ' ') {
nindels++;
@@ -3658,7 +3636,7 @@ Pair_print_gsnap (FILE *fp, struct T *pairs_querydir, int npairs, int nsegments,
prev_splice_dist = 0U;
}
- indel_pos = this->querypos;
+ /* indel_pos = this->querypos; */
nindels = 0;
while (i < npairs && this->gapp == false && this->cdna == ' ') {
fprintf(fp,"%c",tolower(this->genome));
@@ -3732,7 +3710,7 @@ Pair_print_gsnap (FILE *fp, struct T *pairs_querydir, int npairs, int nsegments,
}
}
- prev = this;
+ /* prev = this; */
exon_queryend = last_querypos + 1;
exon_genomeend = last_genomepos + 1;
prev_endtype = endtype;
@@ -3903,7 +3881,7 @@ Pair_print_m8 (FILE *fp, struct T *pairs_querydir, int npairs, bool invertedp,
int nmismatches_refdiff, nmismatches_bothdiff, nmatches, i;
int last_querypos = -1;
Chrpos_T last_genomepos = -1U;
- char *chr, c;
+ char *chr;
int querylength;
bool allocp;
@@ -4005,7 +3983,7 @@ Pair_print_m8 (FILE *fp, struct T *pairs_querydir, int npairs, bool invertedp,
}
} else {
- c = this->genome;
+ /* c = this->genome; */
if (this->genome == this->cdna) {
nmatches++;
} else if (this->genomealt == this->cdna) {
@@ -4465,10 +4443,10 @@ compute_sam_flag_nomate (int pathnum, int npaths, bool first_read_p, bool watson
/* Modeled after Shortread_print_chopped */
static void
print_chopped (FILE *fp, char *contents, int querylength,
- int hardclip_low, int hardclip_high) {
+ int hardclip_start, int hardclip_end) {
int i;
- for (i = hardclip_low; i < querylength - hardclip_high; i++) {
+ for (i = hardclip_start; i < querylength - hardclip_end; i++) {
putc(contents[i],fp);
}
return;
@@ -4477,10 +4455,10 @@ print_chopped (FILE *fp, char *contents, int querylength,
/* Differs from Shortread version, in that hardclip_high and hardclip_low are not reversed */
static void
print_chopped_revcomp (FILE *fp, char *contents, int querylength,
- int hardclip_low, int hardclip_high) {
+ int hardclip_start, int hardclip_end) {
int i;
- for (i = querylength - 1 - hardclip_high; i >= hardclip_low; --i) {
+ for (i = querylength - 1 - hardclip_end; i >= hardclip_start; --i) {
putc(complCode[(int) contents[i]],fp);
}
return;
@@ -4489,17 +4467,17 @@ print_chopped_revcomp (FILE *fp, char *contents, int querylength,
static void
print_chopped_end (FILE *fp, char *contents, int querylength,
- int hardclip_low, int hardclip_high) {
+ int hardclip_start, int hardclip_end) {
int i;
- if (hardclip_low > 0) {
- for (i = 0; i < hardclip_low; i++) {
+ if (hardclip_start > 0) {
+ for (i = 0; i < hardclip_start; i++) {
putc(contents[i],fp);
}
return;
} else {
- for (i = querylength - hardclip_high; i < querylength; i++) {
+ for (i = querylength - hardclip_end; i < querylength; i++) {
putc(contents[i],fp);
}
return;
@@ -4509,17 +4487,17 @@ print_chopped_end (FILE *fp, char *contents, int querylength,
/* Differs from Shortread version, in that hardclip_high and hardclip_low are not reversed */
static void
print_chopped_end_revcomp (FILE *fp, char *contents, int querylength,
- int hardclip_low, int hardclip_high) {
+ int hardclip_start, int hardclip_end) {
int i;
- if (hardclip_low > 0) {
- for (i = hardclip_low - 1; i >= 0; --i) {
+ if (hardclip_start > 0) {
+ for (i = hardclip_start - 1; i >= 0; --i) {
putc(complCode[(int) contents[i]],fp);
}
return;
} else {
- for (i = querylength - 1; i >= querylength - hardclip_high; --i) {
+ for (i = querylength - 1; i >= querylength - hardclip_end; --i) {
putc(complCode[(int) contents[i]],fp);
}
return;
@@ -4531,14 +4509,14 @@ print_chopped_end_revcomp (FILE *fp, char *contents, int querylength,
/* Modeled after Shortread_print_quality */
static void
print_quality (FILE *fp, char *quality, int querylength,
- int hardclip_low, int hardclip_high, int shift) {
+ int hardclip_start, int hardclip_end, int shift) {
int i;
int c;
if (quality == NULL) {
putc('*',fp);
} else {
- for (i = hardclip_low; i < querylength - hardclip_high; i++) {
+ for (i = hardclip_start; i < querylength - hardclip_end; i++) {
if ((c = quality[i] + shift) <= 32) {
fprintf(stderr,"Warning: With a quality-print-shift of %d, QC score %c becomes non-printable. May need to specify --quality-protocol or --quality-print-shift\n",
shift,quality[i]);
@@ -4554,14 +4532,14 @@ print_quality (FILE *fp, char *quality, int querylength,
static void
print_quality_revcomp (FILE *fp, char *quality, int querylength,
- int hardclip_low, int hardclip_high, int shift) {
+ int hardclip_start, int hardclip_end, int shift) {
int i;
int c;
if (quality == NULL) {
putc('*',fp);
} else {
- for (i = querylength - 1 - hardclip_high; i >= hardclip_low; --i) {
+ for (i = querylength - 1 - hardclip_end; i >= hardclip_start; --i) {
if ((c = quality[i] + shift) <= 32) {
fprintf(stderr,"Warning: With a quality-print-shift of %d, QC score %c becomes non-printable. May need to specify --quality-protocol or --quality-print-shift\n",
shift,quality[i]);
@@ -4596,20 +4574,32 @@ print_sam_line (FILE *fp, char *abbrev, bool first_read_p, char *acc1, char *acc
bool watsonp, int cdna_direction, List_T cigar_tokens, List_T md_tokens,
int nmismatches_refdiff, int nmismatches_bothdiff, int nindels,
bool intronp, char *queryseq_ptr, char *quality_string,
- int hardclip_low, int hardclip_high, int querylength, Chimera_T chimera, int quality_shift,
+ int hardclip_start, int hardclip_end, int querylength, Chimera_T chimera, int quality_shift,
int pathnum, int npaths, int absmq_score, int first_absmq, int second_absmq, unsigned int flag,
- Chrnum_T chrnum, Univ_IIT_T chromosome_iit, Chrpos_T chrpos,
+ Chrnum_T chrnum, Univ_IIT_T chromosome_iit, Chrpos_T chrpos, Chrpos_T chrlength,
#ifdef GSNAP
Shortread_T queryseq, Resulttype_T resulttype, int pair_mapq_score, int end_mapq_score,
char *mate_chrstring, Chrnum_T mate_chrnum, Chrnum_T mate_effective_chrnum,
- Chrpos_T mate_chrpos, int mate_cdna_direction, int pairedlength,
+ Chrpos_T mate_chrpos, Chrpos_T mate_chrlength, int mate_cdna_direction, int pairedlength,
#else
int mapq_score, struct T *pairarray, int npairs,
#endif
char *sam_read_group_id, bool invertp, bool merged_overlap_p) {
int sensedir;
- assert(tokens_cigarlength(cigar_tokens) + hardclip_low + hardclip_high == querylength);
+ if (cigar_action == CIGAR_ACTION_IGNORE) {
+ /* Don't check */
+ } else if (tokens_cigarlength(cigar_tokens) + hardclip_start + hardclip_end == querylength) {
+ /* Okay */
+ } else if (cigar_action == CIGAR_ACTION_WARNING) {
+ fprintf(stderr,"Warning: for %s, CIGAR length %d plus hardclips %d and %d do not match sequence length %d\n",
+ acc1,tokens_cigarlength(cigar_tokens),hardclip_start,hardclip_end,querylength);
+ } else {
+ /* CIGAR_ACTION_ABORT */
+ fprintf(stderr,"Error: for %s, CIGAR length %d plus hardclips %d and %d do not match sequence length %d\n",
+ acc1,tokens_cigarlength(cigar_tokens),hardclip_start,hardclip_end,querylength);
+ abort();
+ }
/* 1. QNAME or Accession */
if (acc2 == NULL) {
@@ -4624,8 +4614,11 @@ print_sam_line (FILE *fp, char *abbrev, bool first_read_p, char *acc1, char *acc
/* 3. RNAME or Chrstring */
/* 4. POS or Chrlow */
/* Taken from GMAP part of SAM_chromosomal_pos */
- fprintf(fp,"%s\t%u\t",chrstring,chrpos /*+ 1U*/);
-
+ if (chrpos > chrlength) {
+ fprintf(fp,"%s\t%u\t",chrstring,chrpos - chrlength /*+ 1U*/);
+ } else {
+ fprintf(fp,"%s\t%u\t",chrstring,chrpos /*+ 1U*/);
+ }
/* 5. MAPQ or Mapping quality */
#ifdef GSNAP
@@ -4642,6 +4635,8 @@ print_sam_line (FILE *fp, char *abbrev, bool first_read_p, char *acc1, char *acc
#ifdef GSNAP
if (mate_chrpos == 0U) {
fprintf(fp,"\t*\t0");
+ } else if (mate_chrpos > mate_chrlength) {
+ fprintf(fp,"\t%s\t%u",mate_chrstring,mate_chrpos - mate_chrlength /* +1U*/);
} else {
fprintf(fp,"\t%s\t%u",mate_chrstring,mate_chrpos /* +1U*/);
}
@@ -4675,14 +4670,14 @@ print_sam_line (FILE *fp, char *abbrev, bool first_read_p, char *acc1, char *acc
/* 10. SEQ: queryseq and 11. QUAL: quality_scores */
fprintf(fp,"\t");
if (watsonp == true) {
- print_chopped(fp,queryseq_ptr,querylength,hardclip_low,hardclip_high);
+ print_chopped(fp,queryseq_ptr,querylength,hardclip_start,hardclip_end);
fprintf(fp,"\t");
- print_quality(fp,quality_string,querylength,hardclip_low,hardclip_high,
+ print_quality(fp,quality_string,querylength,hardclip_start,hardclip_end,
quality_shift);
} else {
- print_chopped_revcomp(fp,queryseq_ptr,querylength,hardclip_low,hardclip_high);
+ print_chopped_revcomp(fp,queryseq_ptr,querylength,hardclip_start,hardclip_end);
fprintf(fp,"\t");
- print_quality_revcomp(fp,quality_string,querylength,hardclip_low,hardclip_high,
+ print_quality_revcomp(fp,quality_string,querylength,hardclip_start,hardclip_end,
quality_shift);
}
@@ -4692,12 +4687,12 @@ print_sam_line (FILE *fp, char *abbrev, bool first_read_p, char *acc1, char *acc
}
/* 12. TAGS: XH */
- if (hardclip_low > 0 || hardclip_high > 0) {
+ if (hardclip_start > 0 || hardclip_end > 0) {
fprintf(fp,"\tXH:Z:");
if (watsonp == true) {
- print_chopped_end(fp,queryseq_ptr,querylength,hardclip_low,hardclip_high);
+ print_chopped_end(fp,queryseq_ptr,querylength,hardclip_start,hardclip_end);
} else {
- print_chopped_end_revcomp(fp,queryseq_ptr,querylength,hardclip_low,hardclip_high);
+ print_chopped_end_revcomp(fp,queryseq_ptr,querylength,hardclip_start,hardclip_end);
}
}
@@ -4837,19 +4832,19 @@ Pair_unalias_circular (struct T *pairs, int npairs, Chrpos_T chrlength) {
static struct T *
-hardclip_pairs (int *clipped_npairs, int hardclip_low, int hardclip_high,
+hardclip_pairs (int *clipped_npairs, int hardclip_start, int hardclip_end,
struct T *pairs, int npairs, int querylength) {
struct T *clipped_pairs, *ptr;
int i, starti;
- debug10(printf("Entered hardclip_pairs with hardclip_low %d, hardclip_high %d, querylength %d\n",
- hardclip_low,hardclip_high,querylength));
+ debug10(printf("Entered hardclip_pairs with hardclip_start %d, hardclip_end %d, querylength %d\n",
+ hardclip_start,hardclip_end,querylength));
debug10(Pair_dump_array(pairs,npairs,true));
debug10(printf("Starting with %d pairs\n",npairs));
i = 0;
ptr = pairs;
- while (i < npairs && ptr->querypos < hardclip_low) {
+ while (i < npairs && ptr->querypos < hardclip_start) {
i++;
ptr++;
}
@@ -4859,10 +4854,10 @@ hardclip_pairs (int *clipped_npairs, int hardclip_low, int hardclip_high,
}
if (i >= npairs) {
- /* hardclip_low passes right end of read, so invalid */
- hardclip_low = 0;
- } else if (hardclip_low > 0) {
- hardclip_low = ptr->querypos;
+ /* hardclip_start passes right end of read, so invalid */
+ hardclip_start = 0;
+ } else if (hardclip_start > 0) {
+ hardclip_start = ptr->querypos;
}
starti = i;
@@ -4870,7 +4865,7 @@ hardclip_pairs (int *clipped_npairs, int hardclip_low, int hardclip_high,
clipped_pairs = ptr;
- while (i < npairs && ptr->querypos < querylength - hardclip_high) {
+ while (i < npairs && ptr->querypos < querylength - hardclip_end) {
i++;
ptr++;
}
@@ -4883,13 +4878,13 @@ hardclip_pairs (int *clipped_npairs, int hardclip_low, int hardclip_high,
}
if (i < 0) {
- /* hardclip_high passes left end of read, so invalid */
- hardclip_high = 0;
- } else if (hardclip_high > 0) {
- hardclip_high = querylength - 1 - ptr->querypos;
+ /* hardclip_end passes left end of read, so invalid */
+ hardclip_end = 0;
+ } else if (hardclip_end > 0) {
+ hardclip_end = querylength - 1 - ptr->querypos;
}
- if (hardclip_low == 0 && hardclip_high == 0) {
+ if (hardclip_start == 0 && hardclip_end == 0) {
debug10(printf("Unable to hard clip\n"));
*clipped_npairs = npairs;
clipped_pairs = pairs;
@@ -4898,8 +4893,8 @@ hardclip_pairs (int *clipped_npairs, int hardclip_low, int hardclip_high,
}
debug10(printf("Ending with %d pairs\n",*clipped_npairs));
- debug10(printf("Exiting hardclip_pairs with hardclip_low %d, hardclip_high %d\n",
- hardclip_low,hardclip_high));
+ debug10(printf("Exiting hardclip_pairs with hardclip_start %d, hardclip_end %d\n",
+ hardclip_start,hardclip_end));
return clipped_pairs;
}
@@ -4990,48 +4985,50 @@ Pair_clean_cigar (List_T tokens, bool watsonp) {
static List_T
-compute_cigar (bool *intronp, int *hardclip_low, int *hardclip_high, struct T *pairs, int npairs, int querylength_given,
+compute_cigar (bool *intronp, int *hardclip_start, int *hardclip_end, struct T *pairs, int npairs, int querylength_given,
bool watsonp, int cdna_direction, int chimera_part) {
List_T tokens = NULL;
char token[10];
int Mlength = 0, Ilength = 0, Dlength = 0;
bool in_exon = false, deletionp;
struct T *ptr, *prev, *this = NULL;
- int exon_querystart = -1, exon_queryend = -1;
- Chrpos_T exon_genomestart = -1, exon_genomeend, genome_gap;
+ int exon_querystart = -1;
+ int exon_queryend = -1;
+ Chrpos_T exon_genomestart = -1;
+ Chrpos_T exon_genomeend, genome_gap;
Chrpos_T intron_start, intron_end;
int query_gap;
int last_querypos = -1;
Chrpos_T last_genomepos = -1U;
int i;
- /* *chimera_hardclip_low = *chimera_hardclip_high = 0; */
+ /* *chimera_hardclip_start = *chimera_hardclip_high = 0; */
*intronp = false;
ptr = pairs;
if (chimera_part == +1) {
- if (ptr->querypos > *hardclip_low) {
+ if (ptr->querypos > *hardclip_start) {
if (ptr->querypos > 0) {
/* Clip to beginning */
- *hardclip_low = ptr->querypos;
- sprintf(token,"%dH",*hardclip_low);
+ *hardclip_start = ptr->querypos;
+ sprintf(token,"%dH",*hardclip_start);
tokens = push_token(tokens,token);
}
} else {
- if (*hardclip_low > 0) {
+ if (*hardclip_start > 0) {
/* Clip to hard clip boundary */
- sprintf(token,"%dH",*hardclip_low);
+ sprintf(token,"%dH",*hardclip_start);
tokens = push_token(tokens,token);
}
}
} else {
- if (*hardclip_low > 0) {
- sprintf(token,"%dH",*hardclip_low);
+ if (*hardclip_start > 0) {
+ sprintf(token,"%dH",*hardclip_start);
tokens = push_token(tokens,token);
}
- if (ptr->querypos > (*hardclip_low)) {
- sprintf(token,"%dS",ptr->querypos - (*hardclip_low));
+ if (ptr->querypos > (*hardclip_start)) {
+ sprintf(token,"%dS",ptr->querypos - (*hardclip_start));
tokens = push_token(tokens,token);
}
}
@@ -5230,27 +5227,27 @@ compute_cigar (bool *intronp, int *hardclip_low, int *hardclip_high, struct T *p
/* Terminal clipping */
if (chimera_part == -1) {
- if (last_querypos < querylength_given - 1 - (*hardclip_high)) {
+ if (last_querypos < querylength_given - 1 - (*hardclip_end)) {
if (last_querypos < querylength_given - 1) {
/* Clip to end */
- *hardclip_high = querylength_given - 1 - last_querypos;
- sprintf(token,"%dH",*hardclip_high);
+ *hardclip_end = querylength_given - 1 - last_querypos;
+ sprintf(token,"%dH",*hardclip_end);
tokens = push_token(tokens,token);
}
} else {
- if (*hardclip_high > 0) {
+ if (*hardclip_end > 0) {
/* Clip to hard clip boundary */
- sprintf(token,"%dH",*hardclip_high);
+ sprintf(token,"%dH",*hardclip_end);
tokens = push_token(tokens,token);
}
}
} else {
- if (last_querypos < querylength_given - 1 - (*hardclip_high)) {
- sprintf(token,"%dS",querylength_given - 1 - (*hardclip_high) - last_querypos);
+ if (last_querypos < querylength_given - 1 - (*hardclip_end)) {
+ sprintf(token,"%dS",querylength_given - 1 - (*hardclip_end) - last_querypos);
tokens = push_token(tokens,token);
}
- if (*hardclip_high > 0) {
- sprintf(token,"%dH",*hardclip_high);
+ if (*hardclip_end > 0) {
+ sprintf(token,"%dH",*hardclip_end);
tokens = push_token(tokens,token);
}
}
@@ -5259,6 +5256,339 @@ compute_cigar (bool *intronp, int *hardclip_low, int *hardclip_high, struct T *p
}
+/* Copied from samprint.c */
+static bool
+check_cigar_types (Intlist_T cigar_types) {
+ Intlist_T p;
+ int type, last_type = 'M';
+ bool M_present_p = false;
+
+ for (p = cigar_types; p != NULL; p = Intlist_next(p)) {
+ type = Intlist_head(p);
+ if (type == 'M') {
+ M_present_p = true;
+#if 0
+ } else if (type == 'H' && last_type == 'S') {
+ debug1(printf("check_cigar_types detects adjacent S and H, so returning false\n"));
+ return false;
+ } else if (type == 'S' && last_type == 'H') {
+ debug1(printf("check_cigar_types detects adjacent S and H, so returning false\n"));
+ return false;
+#endif
+ }
+ }
+
+ return M_present_p;
+}
+
+
+bool
+Pair_check_cigar (struct T *pairs, int npairs, int querylength_given,
+ int clipdir, int hardclip5, int hardclip3,
+ bool watsonp, int cdna_direction, bool first_read_p, bool circularp) {
+ bool result;
+ Intlist_T cigar_types = NULL;
+ int hardclip_low, hardclip_high;
+ int Mlength = 0, Ilength = 0, Dlength = 0;
+ bool in_exon = false, deletionp;
+ struct T *ptr, *prev, *this = NULL;
+ int exon_queryend;
+ int query_gap;
+ int last_querypos = -1;
+ int i;
+
+ if (circularp == true) {
+ if (watsonp == true) {
+ hardclip_low = hardclip5;
+ hardclip_high = hardclip3;
+ } else {
+ hardclip_low = hardclip3;
+ hardclip_high = hardclip5;
+ }
+ } else {
+ /* Incoming hardclip5 and hardclip3 are due to overlaps, not chimera */
+ if (clipdir >= 0) {
+ if (watsonp == true) {
+ if (first_read_p == true) {
+ hardclip_high = hardclip5;
+ hardclip_low = 0;
+ } else {
+ hardclip_high = 0;
+ hardclip_low = hardclip3;
+ }
+ } else {
+ if (first_read_p == true) {
+ hardclip_low = hardclip5;
+ hardclip_high = 0;
+ } else {
+ hardclip_low = 0;
+ hardclip_high = hardclip3;
+ }
+ }
+ } else {
+ if (watsonp == true) {
+ if (first_read_p == true) {
+ hardclip_low = hardclip5;
+ hardclip_high = 0;
+ } else {
+ hardclip_low = 0;
+ hardclip_high = hardclip3;
+ }
+ } else {
+ if (first_read_p == true) {
+ hardclip_high = hardclip5;
+ hardclip_low = 0;
+ } else {
+ hardclip_high = 0;
+ hardclip_low = hardclip3;
+ }
+ }
+ }
+ }
+
+
+ ptr = pairs;
+
+#if 0
+ /* This procedure is used to check circular alignments */
+ if (chimera_part == +1) {
+ if (ptr->querypos > hardclip_low) {
+ if (ptr->querypos > 0) {
+ /* Clip to beginning */
+ hardclip_low = ptr->querypos;
+ cigar_types = Intlist_push(cigar_types,'H');
+ }
+ } else {
+ if (hardclip_low > 0) {
+ /* Clip to hard clip boundary */
+ cigar_types = Intlist_push(cigar_types,'H');
+ }
+ }
+ } else {
+#endif
+ if (hardclip_low > 0) {
+ cigar_types = Intlist_push(cigar_types,'H');
+ }
+ if (ptr->querypos > hardclip_low) {
+ cigar_types = Intlist_push(cigar_types,'S');
+ }
+#if 0
+ }
+#endif
+
+ this = (T) NULL;
+ for (i = 0; i < npairs; i++) {
+ prev = this;
+ this = ptr++;
+
+ if (this->gapp) {
+ if (in_exon == true) {
+ exon_queryend = last_querypos + 1;
+#if 0
+ exon_genomeend = last_genomepos + 1;
+ if (watsonp) {
+ intron_start = exon_genomeend + 1;
+ } else {
+ intron_start = exon_genomeend - 1;
+ }
+#endif
+
+ if (Mlength > 0) {
+ cigar_types = Intlist_push(cigar_types,'M');
+ } else if (Ilength > 0) {
+ cigar_types = Intlist_push(cigar_types,'I');
+ } else if (Dlength > 0) {
+ cigar_types = Intlist_push(cigar_types,'D');
+ }
+
+ Mlength = Ilength = Dlength = 0;
+
+ in_exon = false;
+ }
+
+ } else if (this->comp == INTRONGAP_COMP) {
+ /* Do nothing */
+
+ } else {
+ /* Remaining possibilities are MATCH_COMP, DYNPROG_MATCH_COMP, AMBIGUOUS_COMP, INDEL_COMP,
+ SHORTGAP_COMP, or MISMATCH_COMP */
+ if (in_exon == false) {
+#if 0
+ /* Needed only for full token */
+ exon_querystart = this->querypos + 1;
+ exon_genomestart = this->genomepos + 1;
+ if (watsonp) {
+ intron_end = exon_genomestart - 1;
+ } else {
+ intron_end = exon_genomestart + 1;
+ }
+#endif
+
+ if (prev != NULL) {
+ /* Gap */
+ /* genome_gap = abs(intron_end - intron_start) + 1; */
+
+ deletionp = false;
+#ifdef CONVERT_INTRONS_TO_DELETIONS
+ if (cdna_direction > 0) {
+ if (prev->comp == FWD_CANONICAL_INTRON_COMP ||
+ prev->comp == FWD_GCAG_INTRON_COMP ||
+ prev->comp == FWD_ATAC_INTRON_COMP) {
+ cigar_types = Intlist_push(cigar_types,'N');
+ /* *intronp = true; */
+ } else if (cigar_noncanonical_splices_p == true && genome_gap >= MIN_INTRONLEN) {
+ cigar_types = Intlist_push(cigar_types,'N');
+ /* *intronp = true; */
+ } else {
+ cigar_types = Intlist_push(cigar_types,'D');
+ deletionp = true;
+ }
+ } else if (cdna_direction < 0) {
+ if (prev->comp == REV_CANONICAL_INTRON_COMP ||
+ prev->comp == REV_GCAG_INTRON_COMP ||
+ prev->comp == REV_ATAC_INTRON_COMP) {
+ cigar_types = Intlist_push(cigar_types,'N');
+ /* *intronp = true; */
+ } else if (cigar_noncanonical_splices_p == true && genome_gap >= MIN_INTRONLEN) {
+ cigar_types = Intlist_push(cigar_types,'N');
+ /* *intronp = true; */
+ } else {
+ cigar_types = Intlist_push(cigar_types,'D');
+ deletionp = true;
+ }
+ } else if (cigar_noncanonical_splices_p == true && genome_gap >= MIN_INTRONLEN){
+ cigar_types = Intlist_push(cigar_types,'N');
+ /* *intronp = true; */
+ } else {
+ cigar_types = Intlist_push(cigar_types,'D');
+ deletionp = true;
+ }
+#else
+ cigar_types = Intlist_push(cigar_types,'N');
+ /* *intronp = true; */
+#endif
+
+ /* Check for dual gap. Doesn't work for hard clipping. */
+ assert(exon_queryend >= 0);
+
+ query_gap = this->querypos - exon_queryend;
+ assert(query_gap >= 0);
+ if (query_gap > 0) {
+ if (deletionp == true && sam_insert_0M_p == true) {
+ /* Put zero matches between deletion and insertion, since some programs will complain */
+ cigar_types = Intlist_push(cigar_types,'M');
+ }
+
+ cigar_types = Intlist_push(cigar_types,'I');
+ }
+ }
+
+ in_exon = true;
+ }
+
+ if (this->comp == INDEL_COMP || this->comp == SHORTGAP_COMP) {
+ /* Gap in upper or lower sequence */
+ if (this->genome == ' ') {
+ /* Insertion relative to genome */
+ if (Mlength > 0) {
+ cigar_types = Intlist_push(cigar_types,'M');
+ Mlength = 0;
+ } else if (Dlength > 0) {
+ /* unlikely */
+ cigar_types = Intlist_push(cigar_types,'D');
+ Dlength = 0;
+ }
+ Ilength++;
+ } else if (this->cdna == ' ') {
+ /* Deletion relative to genome */
+ if (Mlength > 0) {
+ cigar_types = Intlist_push(cigar_types,'M');
+ Mlength = 0;
+ } else if (Ilength > 0) {
+ cigar_types = Intlist_push(cigar_types,'I');
+ Ilength = 0;
+ }
+ Dlength++;
+ } else {
+ fprintf(stderr,"Error at %c%c%c\n",this->genome,this->comp,this->cdna);
+ exit(9);
+ }
+
+ } else {
+ /* Count even if unknown base */
+
+ if (Ilength > 0) {
+ cigar_types = Intlist_push(cigar_types,'I');
+ Ilength = 0;
+ } else if (Dlength > 0) {
+ cigar_types = Intlist_push(cigar_types,'D');
+ Dlength = 0;
+ }
+ Mlength++;
+ }
+ }
+
+ if (this != NULL) {
+ if (this->cdna != ' ') {
+ last_querypos = this->querypos;
+ }
+#if 0
+ if (this->genome != ' ') {
+ last_genomepos = this->genomepos;
+ }
+#endif
+ }
+ }
+
+ /* prev = this; */
+ exon_queryend = last_querypos + 1;
+ /* exon_genomeend = last_genomepos + 1; */
+
+ if (Mlength > 0) {
+ cigar_types = Intlist_push(cigar_types,'M');
+ } else if (Ilength > 0) {
+ cigar_types = Intlist_push(cigar_types,'I');
+ } else if (Dlength > 0) {
+ cigar_types = Intlist_push(cigar_types,'D');
+ }
+
+
+ /* Terminal clipping */
+#if 0
+ /* This procedure is used to check circular alignments */
+ if (chimera_part == -1) {
+ if (last_querypos < querylength_given - 1 - hardclip_high) {
+ if (last_querypos < querylength_given - 1) {
+ /* Clip to end */
+ hardclip_high = querylength_given - 1 - last_querypos;
+ cigar_types = Intlist_push(cigar_types,'H');
+ }
+ } else {
+ if (hardclip_high > 0) {
+ /* Clip to hard clip boundary */
+ cigar_types = Intlist_push(cigar_types,'H');
+ }
+ }
+ } else {
+#endif
+ if (last_querypos < querylength_given - 1 - hardclip_high) {
+ cigar_types = Intlist_push(cigar_types,'S');
+ }
+ if (hardclip_high > 0) {
+ cigar_types = Intlist_push(cigar_types,'H');
+ }
+#if 0
+ }
+#endif
+
+ result = check_cigar_types(cigar_types);
+
+ Intlist_free(&cigar_types);
+ return result;
+}
+
+
+
typedef enum {IN_MATCHES, IN_MISMATCHES, IN_DELETION} MD_state_T;
#if 0
@@ -5686,14 +6016,15 @@ void
Pair_print_sam (FILE *fp, char *abbrev, struct T *pairs, int npairs,
char *acc1, char *acc2, Chrnum_T chrnum, Univ_IIT_T chromosome_iit, Sequence_T usersegment,
char *queryseq_ptr, char *quality_string,
- int clipdir, int hardclip5, int hardclip3, int querylength_given,
+ int clipdir, int hardclip_low, int hardclip_high, int querylength_given,
bool watsonp, int cdna_direction, int chimera_part, Chimera_T chimera,
int quality_shift, bool first_read_p, int pathnum, int npaths,
- int absmq_score, int first_absmq, int second_absmq, Chrpos_T chrpos,
+ int absmq_score, int first_absmq, int second_absmq, Chrpos_T chrpos, Chrpos_T chrlength,
#ifdef GSNAP
Shortread_T queryseq, Resulttype_T resulttype, unsigned int flag,
int pair_mapq_score, int end_mapq_score,
- Chrnum_T mate_chrnum, Chrnum_T mate_effective_chrnum, Chrpos_T mate_chrpos,
+ Chrnum_T mate_chrnum, Chrnum_T mate_effective_chrnum,
+ Chrpos_T mate_chrpos, Chrpos_T mate_chrlength,
int mate_cdna_direction, int pairedlength,
#else
int mapq_score, bool sam_paired_p,
@@ -5709,8 +6040,8 @@ Pair_print_sam (FILE *fp, char *abbrev, struct T *pairs, int npairs,
List_T cigar_tokens = NULL, md_tokens = NULL;
int nmismatches_refdiff, nmismatches_bothdiff, nindels;
bool intronp, ignore_intronp;
- int hardclip_low, hardclip_high;
- int hardclip_low_zero = 0, hardclip_high_zero = 0;
+ int hardclip_start, hardclip_end;
+ int hardclip_start_zero = 0, hardclip_end_zero = 0;
struct T *clipped_pairs;
int clipped_npairs;
@@ -5744,70 +6075,30 @@ Pair_print_sam (FILE *fp, char *abbrev, struct T *pairs, int npairs,
debug4(printf("Entered Pair_print_sam with clipdir %d, watsonp %d, first_read_p %d, hardclip5 %d, and hardclip3 %d\n",
clipdir,watsonp,first_read_p,hardclip5,hardclip3));
- if (circularp == true) {
- if (watsonp == true) {
- hardclip_low = hardclip5;
- hardclip_high = hardclip3;
- } else {
- hardclip_low = hardclip3;
- hardclip_high = hardclip5;
- }
+ if (watsonp == true) {
+ hardclip_start = hardclip_low;
+ hardclip_end = hardclip_high;
} else {
- /* Incoming hardclip5 and hardclip3 are due to overlaps, not chimera */
- if (clipdir >= 0) {
- if (watsonp == true) {
- if (first_read_p == true) {
- hardclip_high = hardclip5;
- hardclip_low = 0;
- } else {
- hardclip_high = 0;
- hardclip_low = hardclip3;
- }
- } else {
- if (first_read_p == true) {
- hardclip_low = hardclip5;
- hardclip_high = 0;
- } else {
- hardclip_low = 0;
- hardclip_high = hardclip3;
- }
- }
- } else {
- if (watsonp == true) {
- if (first_read_p == true) {
- hardclip_low = hardclip5;
- hardclip_high = 0;
- } else {
- hardclip_low = 0;
- hardclip_high = hardclip3;
- }
- } else {
- if (first_read_p == true) {
- hardclip_high = hardclip5;
- hardclip_low = 0;
- } else {
- hardclip_high = 0;
- hardclip_low = hardclip3;
- }
- }
- }
+ hardclip_start = hardclip_high;
+ hardclip_end = hardclip_low;
}
- debug4(printf("hardclip_low %d, hardclip_high %d\n",hardclip_low,hardclip_high));
+ debug4(printf("hardclip_start %d, hardclip_end %d\n",hardclip_start,hardclip_end));
+
/* Get CIGAR and intronp for entire read */
- cigar_tokens = compute_cigar(&intronp,&hardclip_low_zero,&hardclip_high_zero,pairs,npairs,querylength_given,
+ cigar_tokens = compute_cigar(&intronp,&hardclip_start_zero,&hardclip_end_zero,pairs,npairs,querylength_given,
watsonp,cdna_direction,chimera_part);
- if (hardclip5 == 0 && hardclip3 == 0) {
+ if (hardclip_start == 0 && hardclip_end == 0) {
clipped_pairs = pairs;
clipped_npairs = npairs;
} else {
- clipped_pairs = hardclip_pairs(&clipped_npairs,hardclip_low,hardclip_high,
+ clipped_pairs = hardclip_pairs(&clipped_npairs,hardclip_start,hardclip_end,
pairs,npairs,querylength_given);
}
tokens_free(&cigar_tokens);
/* Cigar updates hardclip5 and hardclip3 for chimeras */
- cigar_tokens = compute_cigar(&ignore_intronp,&hardclip_low,&hardclip_high,clipped_pairs,clipped_npairs,querylength_given,
+ cigar_tokens = compute_cigar(&ignore_intronp,&hardclip_start,&hardclip_end,clipped_pairs,clipped_npairs,querylength_given,
watsonp,cdna_direction,chimera_part);
md_tokens = compute_md_string(&nmismatches_refdiff,&nmismatches_bothdiff,&nindels,
@@ -5816,12 +6107,13 @@ Pair_print_sam (FILE *fp, char *abbrev, struct T *pairs, int npairs,
print_sam_line(fp,abbrev,first_read_p,acc1,acc2,chrstring,
watsonp,cdna_direction,cigar_tokens,md_tokens,
nmismatches_refdiff,nmismatches_bothdiff,nindels,
- intronp,queryseq_ptr,quality_string,hardclip_low,hardclip_high,
+ intronp,queryseq_ptr,quality_string,hardclip_start,hardclip_end,
querylength_given,chimera,quality_shift,pathnum,npaths,
- absmq_score,first_absmq,second_absmq,flag,chrnum,chromosome_iit,chrpos,
+ absmq_score,first_absmq,second_absmq,flag,
+ chrnum,chromosome_iit,chrpos,chrlength,
#ifdef GSNAP
queryseq,resulttype,pair_mapq_score,end_mapq_score,mate_chrstring,
- mate_chrnum,mate_effective_chrnum,mate_chrpos,
+ mate_chrnum,mate_effective_chrnum,mate_chrpos,mate_chrlength,
mate_cdna_direction,pairedlength,
#else
mapq_score,clipped_pairs,clipped_npairs,
@@ -5886,9 +6178,9 @@ Pair_print_sam_nomapping (FILE *fp, char *abbrev, char *acc1, char *acc2, char *
fprintf(fp,"\t*\t0\t0\t");
/* 10. SEQ: queryseq and 11. QUAL: quality scores */
- print_chopped(fp,queryseq_ptr,querylength,/*hardclip_low*/0,/*hardclip_high*/0);
+ print_chopped(fp,queryseq_ptr,querylength,/*hardclip_start*/0,/*hardclip_end*/0);
fprintf(fp,"\t");
- print_quality(fp,quality_string,querylength,/*hardclip_low*/0,/*hardclip_high*/0,
+ print_quality(fp,quality_string,querylength,/*hardclip_start*/0,/*hardclip_end*/0,
quality_shift);
/* 12. TAGS: RG */
@@ -5912,14 +6204,14 @@ Pair_print_sam_nomapping (FILE *fp, char *abbrev, char *acc1, char *acc2, char *
Uintlist_T
Pair_exonbounds (struct T *pairs, int npairs, Univcoord_T chroffset) {
Uintlist_T exonbounds = NULL;
- struct T *ptr, *prev, *this = NULL;
+ struct T *ptr, *this = NULL;
bool in_exon = false;
int i;
Chrpos_T last_genomepos = -1U;
ptr = pairs;
for (i = 0; i < npairs; i++) {
- prev = this;
+ /* prev = this; */
this = ptr++;
if (this->gapp) {
@@ -5944,7 +6236,7 @@ Pair_exonbounds (struct T *pairs, int npairs, Univcoord_T chroffset) {
}
}
- prev = this;
+ /* prev = this; */
exonbounds = Uintlist_push(exonbounds,chroffset + last_genomepos);
return Uintlist_reverse(exonbounds);
@@ -5956,13 +6248,13 @@ count_psl_blocks_nt (Intlist_T *blockSizes, Intlist_T *qStarts, Uintlist_T *tSta
int npairs, int querylength, bool watsonp) {
int nblocks = 0, i;
int block_querystart, block_queryend;
- struct T *ptr = pairs_directional, *prev, *this = NULL;
+ struct T *ptr = pairs_directional, *this = NULL;
bool in_block = false;
int last_querypos = -1;
- Chrpos_T last_genomepos = -1U;
+ /* Chrpos_T last_genomepos = -1U; */
for (i = 0; i < npairs; i++) {
- prev = this;
+ /* prev = this; */
this = ptr++;
if (this->gapp) {
@@ -6005,13 +6297,15 @@ count_psl_blocks_nt (Intlist_T *blockSizes, Intlist_T *qStarts, Uintlist_T *tSta
if (this->cdna != ' ') {
last_querypos = this->querypos;
}
+#if 0
if (this->genome != ' ') {
last_genomepos = this->genomepos;
}
+#endif
}
if (in_block == true) {
- prev = this;
+ /* prev = this; */
nblocks++;
block_queryend = last_querypos;
debug2(fprintf(fp,"Block size: %d\n",abs(block_queryend-block_querystart)+1));
@@ -7465,7 +7759,7 @@ Pair_print_compressed (FILE *fp, int pathnum, int npaths, T start, T end, Sequen
bool in_exon = false;
List_T tokens = NULL;
- struct T *ptr = pairs, *prev, *this = NULL;
+ struct T *ptr = pairs, *this = NULL;
int querypos1, querypos2;
int exon_querystart = -1, exon_queryend;
Chrpos_T exon_genomestart = -1, exon_genomeend, intron_start, intron_end;
@@ -7575,7 +7869,7 @@ Pair_print_compressed (FILE *fp, int pathnum, int npaths, T start, T end, Sequen
putc('\n',fp);
for (i = 0; i < npairs; i++) {
- prev = this;
+ /* prev = this; */
this = ptr++;
if (this->gapp) {
@@ -7731,7 +8025,7 @@ Pair_print_compressed (FILE *fp, int pathnum, int npaths, T start, T end, Sequen
}
}
- prev = this;
+ /* prev = this; */
exon_queryend = last_querypos + ONEBASEDP;
exon_genomeend = last_genomepos + ONEBASEDP;
@@ -7787,7 +8081,7 @@ Pair_print_iit_exon_map (FILE *fp, struct T *pairs, int npairs, Sequence_T query
T start, T end, Chrnum_T chrnum, Univ_IIT_T chromosome_iit) {
int i;
bool in_exon = false;
- struct T *ptr = pairs, *prev, *this = NULL;
+ struct T *ptr = pairs, *this = NULL;
Chrpos_T exon_genomestart = -1, exon_genomeend;
char *chrstring = NULL;
Chrpos_T chrpos1, chrpos2;
@@ -7805,7 +8099,7 @@ Pair_print_iit_exon_map (FILE *fp, struct T *pairs, int npairs, Sequence_T query
Sequence_print_header(fp,queryseq,/*checksump*/false);
for (i = 0; i < npairs; i++) {
- prev = this;
+ /* prev = this; */
this = ptr++;
if (this->gapp) {
@@ -7830,7 +8124,7 @@ Pair_print_iit_exon_map (FILE *fp, struct T *pairs, int npairs, Sequence_T query
}
}
- prev = this;
+ /* prev = this; */
exon_genomeend = last_genomepos + ONEBASEDP;
fprintf(fp,"%u %u\n",exon_genomestart,exon_genomeend);
@@ -7848,7 +8142,7 @@ Pair_print_splicesites (FILE *fp, struct T *pairs, int npairs, char *accession,
int nexons, Chrnum_T chrnum, Univ_IIT_T chromosome_iit, bool watsonp) {
int exoni = 0, i;
bool in_exon = false;
- struct T *ptr = pairs, *prev, *this = NULL;
+ struct T *ptr = pairs, *this = NULL;
Chrpos_T exon_genomestart = -1U, exon_genomeend;
char *chrstring = NULL;
Chrpos_T last_genomepos = -1U, intron_length;
@@ -7860,7 +8154,7 @@ Pair_print_splicesites (FILE *fp, struct T *pairs, int npairs, char *accession,
}
for (i = 0; i < npairs; i++) {
- prev = this;
+ /* prev = this; */
this = ptr++;
if (this->gapp) {
@@ -7917,7 +8211,7 @@ Pair_print_introns (FILE *fp, struct T *pairs, int npairs, char *accession,
int nexons, Chrnum_T chrnum, Univ_IIT_T chromosome_iit) {
int exoni = 0, i;
bool in_exon = false;
- struct T *ptr = pairs, *prev, *this = NULL;
+ struct T *ptr = pairs, *this = NULL;
Chrpos_T exon_genomestart = -1, exon_genomeend;
char *chrstring = NULL;
Chrpos_T last_genomepos = -1U;
@@ -7929,7 +8223,7 @@ Pair_print_introns (FILE *fp, struct T *pairs, int npairs, char *accession,
}
for (i = 0; i < npairs; i++) {
- prev = this;
+ /* prev = this; */
this = ptr++;
if (this->gapp) {
diff --git a/src/pair.h b/src/pair.h
index 04ca6ec..1a3e93f 100644
--- a/src/pair.h
+++ b/src/pair.h
@@ -1,4 +1,4 @@
-/* $Id: pair.h 149571 2014-10-01 19:22:17Z twu $ */
+/* $Id: pair.h 154023 2014-11-25 03:45:18Z twu $ */
#ifndef PAIR_INCLUDED
#define PAIR_INCLUDED
@@ -24,12 +24,16 @@ typedef struct Pair_T *Pair_T;
#define MATCHESPERGAP 3
+typedef enum {CIGAR_ACTION_IGNORE, CIGAR_ACTION_WARNING, CIGAR_ACTION_ABORT} Cigar_action_T;
+
+
#define T Pair_T
extern void
Pair_setup (int trim_mismatch_score_in, int trim_indel_score_in,
bool sam_insert_0M_p_in, bool force_xs_direction_p_in,
- bool md_lowercase_variant_p_in, bool snps_p_in, Univcoord_T genomelength_in);
+ bool md_lowercase_variant_p_in, bool snps_p_in, Univcoord_T genomelength_in,
+ Cigar_action_T cigar_action_in);
extern int
Pair_querypos (T this);
extern Chrpos_T
@@ -124,7 +128,7 @@ extern bool
Pair_check_array (struct T *pairs, int npairs);
extern List_T
Pair_convert_array_to_pairs (List_T pairs, struct T *pairarray, int npairs, bool plusp, int querylength,
- int clipdir, int hardclip, bool first_read_p, int queryseq_offset);
+ int clipdir, int hardclip_low, int hardclip_high, bool first_read_p, int queryseq_offset);
extern void
Pair_print_exonsummary (FILE *fp, struct T *pairs, int npairs, Chrnum_T chrnum,
@@ -174,6 +178,11 @@ Pair_alias_circular (struct T *pairs, int npairs, Chrpos_T chrlength);
extern void
Pair_unalias_circular (struct T *pairs, int npairs, Chrpos_T chrlength);
+extern bool
+Pair_check_cigar (struct T *pairs, int npairs, int querylength_given,
+ int clipdir, int hardclip5, int hardclip3,
+ bool watsonp, int cdna_direction, bool first_read_p, bool circularp);
+
extern List_T
Pair_clean_cigar (List_T tokens, bool watsonp);
@@ -181,14 +190,15 @@ extern void
Pair_print_sam (FILE *fp, char *abbrev, struct T *pairs, int npairs,
char *acc1, char *acc2, Chrnum_T chrnum, Univ_IIT_T chromosome_iit, Sequence_T usersegment,
char *queryseq_ptr, char *quality_string,
- int clipdir, int hardclip5, int hardclip3, int querylength_given,
+ int clipdir, int hardclip_low, int hardclip_high, int querylength_given,
bool watsonp, int cdna_direction, int chimera_part, Chimera_T chimera,
int quality_shift, bool first_read_p, int pathnum, int npaths,
- int absmq_score, int first_absmq, int second_absmq, Chrpos_T chrpos,
+ int absmq_score, int first_absmq, int second_absmq, Chrpos_T chrpos, Chrpos_T chrlength,
#ifdef GSNAP
Shortread_T queryseq, Resulttype_T resulttype, unsigned int flag,
int pair_mapq_score, int end_mapq_score,
- Chrnum_T mate_chrnum, Chrnum_T mate_effective_chrnum, Chrpos_T mate_chrpos,
+ Chrnum_T mate_chrnum, Chrnum_T mate_effective_chrnum,
+ Chrpos_T mate_chrpos, Chrpos_T mate_chrlength,
int mate_cdna_direction, int pairedlength,
#else
int mapq_score, bool sam_paired_p,
diff --git a/src/sam_sort.c b/src/sam_sort.c
index a4e5f49..acd252d 100644
--- a/src/sam_sort.c
+++ b/src/sam_sort.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: sam_sort.c 149423 2014-09-30 18:07:12Z twu $";
+static char rcsid[] = "$Id: sam_sort.c 154089 2014-11-25 21:03:16Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -128,11 +128,17 @@ static bool multiple_primaries_p = false;
static Stopwatch_T stopwatch = NULL;
+static char *sevenway_root = NULL;
+static bool appendp = false;
+static FILE **outputs = NULL;
+
static struct option long_options[] = {
/* Input options */
{"dir", required_argument, 0, 'D'}, /* user_genomedir */
{"db", required_argument, 0, 'd'}, /* dbroot */
+ {"split-output", required_argument, 0, 0}, /* outputs */
+ {"append-output", no_argument, 0, 0}, /* appendp */
{"sort2", required_argument, 0, 0}, /* secondary_sort_method */
@@ -170,6 +176,12 @@ Input options\n\
-D, --dir=STRING Genome directory\n\
-d, --db=STRING Genome database. If argument is '?' (with\n\
the quotes), this command lists available databases.\n\
+Output file options\n\
+ --split-output=STRING Basename for multiple-file output, separately for nomapping,\n\
+ halfmapping_uniq, halfmapping_mult, unpaired_uniq, unpaired_mult,\n\
+ paired_uniq, paired_mult, concordant_uniq, and concordant_mult results\n\
+ --append-output When --split-output is given, this flag will append output\n\
+ to the existing files. Otherwise, the default is to create new files.\n\
\n\
Other options\n\
--sort2=STRING For positions with the same genomic position, sort secondarily by\n\
@@ -215,6 +227,7 @@ make_complement_inplace (char *sequence, unsigned int length) {
typedef struct T *T;
struct T {
unsigned int flag;
+ SAM_split_output_type split_output;
bool low_read_p;
Chrnum_T chrnum;
@@ -261,12 +274,14 @@ Cell_standardize_queryseqs (T this) {
/* initial_softclip needs to be determined only if we are marking duplicates */
static void
-Cell_fill (struct T *this, int readindex, unsigned int flag, bool query_lowp, int initial_softclip,
- Univcoord_T genomicpos, off_t fileposition, int linelen) {
+Cell_fill (struct T *this, int readindex, unsigned int flag, SAM_split_output_type split_output,
+ bool query_lowp, int initial_softclip, Univcoord_T genomicpos, off_t fileposition,
+ int linelen) {
this->readindex = readindex;
this->flag = flag;
+ this->split_output = split_output;
this->low_read_p = query_lowp;
this->genomicpos = genomicpos;
@@ -283,12 +298,13 @@ Cell_fill (struct T *this, int readindex, unsigned int flag, bool query_lowp, in
/* initial_softclip needs to be determined only if we are marking duplicates */
static void
-Cell_fill_nodups (struct T *this, unsigned int flag, Univcoord_T genomicpos,
- off_t fileposition, int linelen) {
+Cell_fill_nodups (struct T *this, unsigned int flag, SAM_split_output_type split_output,
+ Univcoord_T genomicpos, off_t fileposition, int linelen) {
this->readindex = 0;
this->flag = flag;
+ this->split_output = split_output;
this->low_read_p = true;
this->genomicpos = genomicpos;
@@ -326,24 +342,31 @@ print_fromfile (FILE *fp, off_t fileposition, int linelength) {
static void
-Cell_print_fromfile (FILE *fp, T this) {
+Cell_print_fromfile (FILE *fp_input, T this) {
char buffer[CHUNK];
int linelength = this->linelen;
+ FILE *fp_output;
+
+ if (outputs == NULL) {
+ fp_output = stdout;
+ } else {
+ fp_output = outputs[this->split_output];
+ }
- moveto(fp,this->linestart);
+ moveto(fp_input,this->linestart);
#ifdef DEBUG
printf("readindex %d: ",this->readindex);
#endif
while (linelength > CHUNK) {
- fread(buffer,sizeof(char),CHUNK,fp);
- fwrite(buffer,sizeof(char),CHUNK,stdout);
+ fread(buffer,sizeof(char),CHUNK,fp_input);
+ fwrite(buffer,sizeof(char),CHUNK,fp_output);
linelength -= CHUNK;
}
if (linelength > 0) {
- fread(buffer,sizeof(char),linelength,fp);
- fwrite(buffer,sizeof(char),linelength,stdout);
+ fread(buffer,sizeof(char),linelength,fp_input);
+ fwrite(buffer,sizeof(char),linelength,fp_output);
}
return;
@@ -541,6 +564,7 @@ process_without_dups (FILE *fp_sam, int headerlen, Intlist_T linelengths, int nc
off_t fileposition;
int linelen;
unsigned int flag;
+ SAM_split_output_type split_output;
Univcoord_T genomicpos;
int acclength;
@@ -556,8 +580,9 @@ process_without_dups (FILE *fp_sam, int headerlen, Intlist_T linelengths, int nc
for (l = linelengths; l != NULL; l = Intlist_next(l)) {
linelen = Intlist_head(l);
moveto(fp_sam,fileposition);
- genomicpos = Samread_parse_genomicpos_fromfile(fp_sam,&flag,chromosome_iit,chroffsets,linelen);
- Cell_fill_nodups(cells[k++],flag,genomicpos,fileposition,linelen);
+ genomicpos = Samread_parse_genomicpos_fromfile(fp_sam,&flag,&split_output,
+ chromosome_iit,chroffsets,linelen);
+ Cell_fill_nodups(cells[k++],flag,split_output,genomicpos,fileposition,linelen);
if (flag & QUERY_UNMAPPED) {
n_nomappers++;
} else {
@@ -729,6 +754,7 @@ process_with_dups (FILE *fp_sam, int headerlen, Intlist_T linelengths, int ncell
Intlist_T l;
unsigned int flag;
+ SAM_split_output_type split_output;
int initial_softclip;
char *acc, *last_acc, *read;
int readindex, nreads;
@@ -760,8 +786,9 @@ process_with_dups (FILE *fp_sam, int headerlen, Intlist_T linelengths, int ncell
for (l = linelengths; l != NULL; l = Intlist_next(l)) {
linelen = Intlist_head(l);
moveto(fp_sam,fileposition);
- acc = Samread_get_acc_and_softclip_fromfile(&acclength,&flag,&genomicpos,&initial_softclip,&query_lowp,
- fp_sam,chromosome_iit,chroffsets,linelen);
+ acc = Samread_parse_acc_and_softclip_fromfile(&acclength,&flag,&split_output,&hiti,
+ &genomicpos,&initial_softclip,&query_lowp,
+ fp_sam,chromosome_iit,chroffsets,linelen);
if (acclength != last_acclength) {
readindex++;
} else if (strcmp(acc,last_acc)) {
@@ -782,7 +809,10 @@ process_with_dups (FILE *fp_sam, int headerlen, Intlist_T linelengths, int ncell
/* Don't use secondary hit for accessing reads */
} else if (multiple_primaries_p == true) {
+#if 0
+ /* Now always parsed */
hiti = Samread_parse_aux_fromfile(fp_sam,/*auxfield*/"HI",linelen);
+#endif
if (strcmp(hiti,"1")) {
/* Don't use second or later primary hit for accessing reads */
} else if (flag & FIRST_READ_P) {
@@ -790,7 +820,6 @@ process_with_dups (FILE *fp_sam, int headerlen, Intlist_T linelengths, int ncell
} else {
queryseq3_index[readindex] = k;
}
- FREE(hiti);
} else {
if (flag & FIRST_READ_P) {
@@ -800,7 +829,8 @@ process_with_dups (FILE *fp_sam, int headerlen, Intlist_T linelengths, int ncell
}
}
- Cell_fill(cells[k++],readindex,flag,query_lowp,initial_softclip,genomicpos,fileposition,linelen);
+ FREE(hiti);
+ Cell_fill(cells[k++],readindex,flag,split_output,query_lowp,initial_softclip,genomicpos,fileposition,linelen);
fileposition += linelen;
}
@@ -1177,6 +1207,213 @@ process_with_dups (FILE *fp_sam, int headerlen, Intlist_T linelengths, int ncell
}
+static void
+split_output_open (char *sevenway_root, bool appendp) {
+ char *filename;
+ char *write_mode;
+
+ if (appendp == true) {
+ write_mode = "a";
+ } else {
+ write_mode = "w";
+ }
+
+ outputs = (FILE **) MALLOC(23 * sizeof(FILE *));
+ outputs[OUTPUT_NONE] = stdout;
+
+ filename = (char *) CALLOC(strlen(sevenway_root)+strlen(".nomapping")+1,sizeof(char));
+ sprintf(filename,"%s.nomapping",sevenway_root);
+ if ((outputs[OUTPUT_NM] = fopen(filename,write_mode)) == NULL) {
+ fprintf(stderr,"Cannot open file %s for writing\n",filename);
+ exit(9);
+ }
+ FREE(filename);
+
+ filename = (char *) CALLOC(strlen(sevenway_root)+strlen(".halfmapping_uniq")+1,sizeof(char));
+ sprintf(filename,"%s.halfmapping_uniq",sevenway_root);
+ if ((outputs[OUTPUT_HU] = fopen(filename,write_mode)) == NULL) {
+ fprintf(stderr,"Cannot open file %s for writing\n",filename);
+ exit(9);
+ }
+ FREE(filename);
+
+ filename = (char *) CALLOC(strlen(sevenway_root)+strlen(".halfmapping_circular")+1,sizeof(char));
+ sprintf(filename,"%s.halfmapping_circular",sevenway_root);
+ if ((outputs[OUTPUT_HC] = fopen(filename,write_mode)) == NULL) {
+ fprintf(stderr,"Cannot open file %s for writing\n",filename);
+ exit(9);
+ }
+ FREE(filename);
+
+ filename = (char *) CALLOC(strlen(sevenway_root)+strlen(".halfmapping_transloc")+1,sizeof(char));
+ sprintf(filename,"%s.halfmapping_transloc",sevenway_root);
+ if ((outputs[OUTPUT_HT] = fopen(filename,write_mode)) == NULL) {
+ fprintf(stderr,"Cannot open file %s for writing\n",filename);
+ exit(9);
+ }
+ FREE(filename);
+
+ filename = (char *) CALLOC(strlen(sevenway_root)+strlen(".halfmapping_mult")+1,sizeof(char));
+ sprintf(filename,"%s.halfmapping_mult",sevenway_root);
+ if ((outputs[OUTPUT_HM] = fopen(filename,write_mode)) == NULL) {
+ fprintf(stderr,"Cannot open file %s for writing\n",filename);
+ exit(9);
+ }
+ FREE(filename);
+
+ filename = (char *) CALLOC(strlen(sevenway_root)+strlen(".halfmapping_mult_xs")+1,sizeof(char));
+ sprintf(filename,"%s.halfmapping_mult_xs",sevenway_root);
+ if ((outputs[OUTPUT_HX] = fopen(filename,write_mode)) == NULL) {
+ fprintf(stderr,"Cannot open file %s for writing\n",filename);
+ exit(9);
+ }
+ FREE(filename);
+
+
+ filename = (char *) CALLOC(strlen(sevenway_root)+strlen(".unpaired_uniq")+1,sizeof(char));
+ sprintf(filename,"%s.unpaired_uniq",sevenway_root);
+ if ((outputs[OUTPUT_UU] = fopen(filename,write_mode)) == NULL) {
+ fprintf(stderr,"Cannot open file %s for writing\n",filename);
+ exit(9);
+ }
+ FREE(filename);
+
+ filename = (char *) CALLOC(strlen(sevenway_root)+strlen(".unpaired_circular")+1,sizeof(char));
+ sprintf(filename,"%s.unpaired_circular",sevenway_root);
+ if ((outputs[OUTPUT_UC] = fopen(filename,write_mode)) == NULL) {
+ fprintf(stderr,"Cannot open file %s for writing\n",filename);
+ exit(9);
+ }
+ FREE(filename);
+
+ filename = (char *) CALLOC(strlen(sevenway_root)+strlen(".unpaired_transloc")+1,sizeof(char));
+ sprintf(filename,"%s.unpaired_transloc",sevenway_root);
+ if ((outputs[OUTPUT_UT] = fopen(filename,write_mode)) == NULL) {
+ fprintf(stderr,"Cannot open file %s for writing\n",filename);
+ exit(9);
+ }
+ FREE(filename);
+
+ filename = (char *) CALLOC(strlen(sevenway_root)+strlen(".unpaired_mult")+1,sizeof(char));
+ sprintf(filename,"%s.unpaired_mult",sevenway_root);
+ if ((outputs[OUTPUT_UM] = fopen(filename,write_mode)) == NULL) {
+ fprintf(stderr,"Cannot open file %s for writing\n",filename);
+ exit(9);
+ }
+ FREE(filename);
+
+ filename = (char *) CALLOC(strlen(sevenway_root)+strlen(".unpaired_mult_xs")+1,sizeof(char));
+ sprintf(filename,"%s.unpaired_mult_xs",sevenway_root);
+ if ((outputs[OUTPUT_UX] = fopen(filename,write_mode)) == NULL) {
+ fprintf(stderr,"Cannot open file %s for writing\n",filename);
+ exit(9);
+ }
+ FREE(filename);
+
+
+ filename = (char *) CALLOC(strlen(sevenway_root)+strlen(".concordant_uniq")+1,sizeof(char));
+ sprintf(filename,"%s.concordant_uniq",sevenway_root);
+ if ((outputs[OUTPUT_CU] = fopen(filename,write_mode)) == NULL) {
+ fprintf(stderr,"Cannot open file %s for writing\n",filename);
+ exit(9);
+ }
+ FREE(filename);
+
+ filename = (char *) CALLOC(strlen(sevenway_root)+strlen(".concordant_circular")+1,sizeof(char));
+ sprintf(filename,"%s.concordant_circular",sevenway_root);
+ if ((outputs[OUTPUT_CC] = fopen(filename,write_mode)) == NULL) {
+ fprintf(stderr,"Cannot open file %s for writing\n",filename);
+ exit(9);
+ }
+ FREE(filename);
+
+ filename = (char *) CALLOC(strlen(sevenway_root)+strlen(".concordant_transloc")+1,sizeof(char));
+ sprintf(filename,"%s.concordant_transloc",sevenway_root);
+ if ((outputs[OUTPUT_CT] = fopen(filename,write_mode)) == NULL) {
+ fprintf(stderr,"Cannot open file %s for writing\n",filename);
+ exit(9);
+ }
+ FREE(filename);
+
+ filename = (char *) CALLOC(strlen(sevenway_root)+strlen(".concordant_mult")+1,sizeof(char));
+ sprintf(filename,"%s.concordant_mult",sevenway_root);
+ if ((outputs[OUTPUT_CM] = fopen(filename,write_mode)) == NULL) {
+ fprintf(stderr,"Cannot open file %s for writing\n",filename);
+ exit(9);
+ }
+ FREE(filename);
+
+ filename = (char *) CALLOC(strlen(sevenway_root)+strlen(".concordant_mult_xs")+1,sizeof(char));
+ sprintf(filename,"%s.concordant_mult_xs",sevenway_root);
+ if ((outputs[OUTPUT_CX] = fopen(filename,write_mode)) == NULL) {
+ fprintf(stderr,"Cannot open file %s for writing\n",filename);
+ exit(9);
+ }
+ FREE(filename);
+
+
+ filename = (char *) CALLOC(strlen(sevenway_root)+strlen(".paired_uniq_circular")+1,sizeof(char));
+ sprintf(filename,"%s.paired_uniq_circular",sevenway_root);
+ if ((outputs[OUTPUT_PC] = fopen(filename,write_mode)) == NULL) {
+ fprintf(stderr,"Cannot open file %s for writing\n",filename);
+ exit(9);
+ }
+ FREE(filename);
+
+ filename = (char *) CALLOC(strlen(sevenway_root)+strlen(".paired_uniq_inv")+1,sizeof(char));
+ sprintf(filename,"%s.paired_uniq_inv",sevenway_root);
+ if ((outputs[OUTPUT_PI] = fopen(filename,write_mode)) == NULL) {
+ fprintf(stderr,"Cannot open file %s for writing\n",filename);
+ exit(9);
+ }
+ FREE(filename);
+
+ filename = (char *) CALLOC(strlen(sevenway_root)+strlen(".paired_uniq_scr")+1,sizeof(char));
+ sprintf(filename,"%s.paired_uniq_scr",sevenway_root);
+ if ((outputs[OUTPUT_PS] = fopen(filename,write_mode)) == NULL) {
+ fprintf(stderr,"Cannot open file %s for writing\n",filename);
+ exit(9);
+ }
+ FREE(filename);
+
+ filename = (char *) CALLOC(strlen(sevenway_root)+strlen(".paired_uniq_long")+1,sizeof(char));
+ sprintf(filename,"%s.paired_uniq_long",sevenway_root);
+ if ((outputs[OUTPUT_PL] = fopen(filename,write_mode)) == NULL) {
+ fprintf(stderr,"Cannot open file %s for writing\n",filename);
+ exit(9);
+ }
+ FREE(filename);
+
+ filename = (char *) CALLOC(strlen(sevenway_root)+strlen(".paired_mult")+1,sizeof(char));
+ sprintf(filename,"%s.paired_mult",sevenway_root);
+ if ((outputs[OUTPUT_PM] = fopen(filename,write_mode)) == NULL) {
+ fprintf(stderr,"Cannot open file %s for writing\n",filename);
+ exit(9);
+ }
+ FREE(filename);
+
+ filename = (char *) CALLOC(strlen(sevenway_root)+strlen(".paired_mult_xs")+1,sizeof(char));
+ sprintf(filename,"%s.paired_mult_xs",sevenway_root);
+ if ((outputs[OUTPUT_PX] = fopen(filename,write_mode)) == NULL) {
+ fprintf(stderr,"Cannot open file %s for writing\n",filename);
+ exit(9);
+ }
+ FREE(filename);
+
+ return;
+}
+
+static void
+split_output_close ( ) {
+ int i;
+
+ for (i = 1; i <= 23; i++) {
+ fclose(outputs[i]);
+ }
+ return;
+}
+
+
#define BUFFERLEN 1024
@@ -1219,6 +1456,11 @@ main (int argc, char *argv[]) {
print_program_usage();
exit(0);
+ } else if (!strcmp(long_name,"split-output")) {
+ sevenway_root = optarg;
+ } else if (!strcmp(long_name,"append-output")) {
+ appendp = true;
+
} else if (!strcmp(long_name,"sort2")) {
if (!strcmp(optarg,"none")) {
secondary_sort_method = NO_SECONDARY_SORT;
@@ -1309,6 +1551,11 @@ main (int argc, char *argv[]) {
FREE(chrlengths);
}
+ if (sevenway_root != NULL) {
+ split_output_open(sevenway_root,appendp);
+ }
+
+
/* SAM file */
stopwatch = Stopwatch_new();
if ((fp_sam = fopen(argv[0],"r")) == NULL) {
@@ -1389,8 +1636,8 @@ main (int argc, char *argv[]) {
if (fileposition != Access_filesize(argv[0])) {
fprintf(stderr,"Something is wrong with parsing of SAM file\n");
- fprintf(stderr,"Final file position using sortinfo: %lu\n",fileposition);
- fprintf(stderr,"File size of SAM output file: %lu\n",Access_filesize(argv[0]));
+ fprintf(stderr,"Final file position using sortinfo: %llu\n",(unsigned long long) fileposition);
+ fprintf(stderr,"File size of SAM output file: %llu\n",(unsigned long long) Access_filesize(argv[0]));
exit(9);
}
@@ -1413,6 +1660,10 @@ main (int argc, char *argv[]) {
fclose(fp_sam);
+ if (sevenway_root != NULL) {
+ split_output_close();
+ }
+
Intlist_free(&linelengths);
FREE(chroffsets);
diff --git a/src/samflags.h b/src/samflags.h
index 9f3c160..101cae0 100644
--- a/src/samflags.h
+++ b/src/samflags.h
@@ -1,4 +1,4 @@
-/* $Id: samflags.h 149314 2014-09-30 01:14:55Z twu $ */
+/* $Id: samflags.h 154089 2014-11-25 21:03:16Z twu $ */
#ifndef SAMFLAGS_INCLUDED
#define SAMFLAGS_INCLUDED
@@ -49,6 +49,31 @@
#define ABBREV_CONCORDANT_MULT "CM"
#define ABBREV_CONCORDANT_MULT_XS "CX"
+typedef enum {OUTPUT_NONE,
+ OUTPUT_NM, /* nomapping */
+ OUTPUT_HU, /* halfmapping_uniq */
+ OUTPUT_HC, /* halfmapping_circular */
+ OUTPUT_HT, /* halfmapping_transloc */
+ OUTPUT_HM, /* halfmapping_mult */
+ OUTPUT_HX, /* halfmapping_mult_xs */
+ OUTPUT_UU, /* unpaired_uniq */
+ OUTPUT_UC, /* unpaired_circular */
+ OUTPUT_UT, /* unpaired_transloc */
+ OUTPUT_UM, /* unpaired_mult */
+ OUTPUT_UX, /* unpaired_mult_xs */
+ OUTPUT_PC, /* paired_uniq_circular */
+ OUTPUT_PI, /* paired_uniq_inv */
+ OUTPUT_PS, /* paired_uniq_scr */
+ OUTPUT_PL, /* paired_uniq_long */
+ OUTPUT_PM, /* paired_mult */
+ OUTPUT_PX, /* paired_mult_xs */
+ OUTPUT_CU, /* concordant_uniq */
+ OUTPUT_CC, /* concordant_circular */
+ OUTPUT_CT, /* concordant_transloc */
+ OUTPUT_CM, /* concordant_mult */
+ OUTPUT_CX} /* concordant_mult_xs */
+ SAM_split_output_type;
+
#endif
diff --git a/src/samheader.c b/src/samheader.c
index 64b1960..9736fe2 100644
--- a/src/samheader.c
+++ b/src/samheader.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: samheader.c 149320 2014-09-30 02:16:01Z twu $";
+static char rcsid[] = "$Id: samheader.c 153955 2014-11-24 17:54:45Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -38,7 +38,7 @@ SAM_header_change_HD_tosorted (FILE *fp, int headerlen) {
headerlen -= 3;
if (c0 == 'S' && c1 == 'O' && c2 == ':') {
- printf("sorted\n");
+ printf("coordinate\n");
while (headerlen > 0 && fgetc(fp) != '\n') {
/* Skip given SO value */
headerlen--;
diff --git a/src/samprint.c b/src/samprint.c
index 1276903..f88f885 100644
--- a/src/samprint.c
+++ b/src/samprint.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: samprint.c 150401 2014-10-09 19:24:27Z twu $";
+static char rcsid[] = "$Id: samprint.c 154023 2014-11-25 03:45:18Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -310,20 +310,30 @@ SAM_compute_flag (bool plusp, Stage3end_T mate, Resulttype_T resulttype,
}
+/* Shifts low_querypos and high_querypos upward until a matching
+ nucleotide is found from both hits. If not found, the shifts
+ low_querypos and high_querypos downward until a matching nucleotide
+ is found. */
+
static void
adjust_hardclips (int *hardclip_low, Stage3end_T hit_low, int low_querylength,
int *hardclip_high, Stage3end_T hit_high, int high_querylength) {
+ int orig_hardclip_low, orig_hardclip_high;
Substring_T low_substring, high_substring;
struct Pair_T *low_pairarray, *high_pairarray;
int low_querystart, low_queryend, low_npairs, high_npairs;
+ int low_querypos, high_querypos;
bool plusp;
debug3(printf("Entering adjust_hardclips with hardclip_low %d, hardclip_high %d\n",
*hardclip_low,*hardclip_high));
+ orig_hardclip_low = *hardclip_low;
+ orig_hardclip_high = *hardclip_high;
plusp = Stage3end_plusp(hit_low);
if (Stage3end_hittype(hit_low) == GMAP && Stage3end_hittype(hit_high) == GMAP) {
+ debug3(printf("Dual GMAP\n"));
low_pairarray = Stage3end_pairarray(hit_low);
low_npairs = Stage3end_npairs(hit_low);
high_pairarray = Stage3end_pairarray(hit_high);
@@ -336,10 +346,34 @@ adjust_hardclips (int *hardclip_low, Stage3end_T hit_low, int low_querylength,
low_querystart = Stage3end_gmap_querystart(hit_low);
}
if (*hardclip_low > low_querystart) {
- while (Pairarray_contains_p(low_pairarray,low_npairs,*hardclip_low) == false ||
- Pairarray_contains_p(high_pairarray,high_npairs,high_querylength - 1 - *hardclip_high) == false) {
+ low_querypos = *hardclip_low;
+ high_querypos = high_querylength - 1 - (*hardclip_high);
+ while (low_querypos < low_querylength && high_querypos < high_querylength &&
+ (Pairarray_contains_p(low_pairarray,low_npairs,low_querypos) == false ||
+ Pairarray_contains_p(high_pairarray,high_npairs,high_querypos) == false)) {
(*hardclip_low)++;
(*hardclip_high)--;
+ low_querypos++;
+ high_querypos++;
+ }
+ if (low_querypos >= low_querylength || high_querypos >= high_querylength) {
+ debug3(printf("Querypos increase failed. Trying querypos decrease.\n"));
+ (*hardclip_low)--;
+ (*hardclip_high)++;
+ low_querypos--;
+ high_querypos--;
+ while (low_querypos > 0 && high_querypos > 0 &&
+ (Pairarray_contains_p(low_pairarray,low_npairs,low_querypos) == false ||
+ Pairarray_contains_p(high_pairarray,high_npairs,high_querypos) == false)) {
+ (*hardclip_low)--;
+ (*hardclip_high)++;
+ low_querypos--;
+ high_querypos--;
+ }
+ if (low_querypos <= 0 || high_querypos <= 0) {
+ *hardclip_low = orig_hardclip_low;
+ *hardclip_high = orig_hardclip_high;
+ }
}
}
@@ -350,15 +384,40 @@ adjust_hardclips (int *hardclip_low, Stage3end_T hit_low, int low_querylength,
low_queryend = Stage3end_gmap_queryend(hit_low);
}
if (low_querylength - *hardclip_low < low_queryend) {
- while (Pairarray_contains_p(low_pairarray,low_npairs,low_querylength - 1 - (*hardclip_low)) == false ||
- Pairarray_contains_p(high_pairarray,high_npairs,*hardclip_high) == false) {
+ low_querypos = low_querylength - 1 - (*hardclip_low);
+ high_querypos = *hardclip_high;
+ while (low_querypos < low_querylength && high_querypos < high_querylength &&
+ (Pairarray_contains_p(low_pairarray,low_npairs,low_querypos) == false ||
+ Pairarray_contains_p(high_pairarray,high_npairs,high_querypos) == false)) {
(*hardclip_low)--;
(*hardclip_high)++;
+ low_querypos++;
+ high_querypos++;
+ }
+ if (low_querypos >= low_querylength || high_querypos >= high_querylength) {
+ debug3(printf("Querypos increase failed. Trying querypos decrease.\n"));
+ (*hardclip_low)++;
+ (*hardclip_high)--;
+ low_querypos--;
+ high_querypos--;
+ while (low_querypos > 0 && high_querypos > 0 &&
+ (Pairarray_contains_p(low_pairarray,low_npairs,low_querypos) == false ||
+ Pairarray_contains_p(high_pairarray,high_npairs,high_querypos) == false)) {
+ (*hardclip_low)++;
+ (*hardclip_high)--;
+ low_querypos--;
+ high_querypos--;
+ }
+ if (low_querypos <= 0 || high_querypos <= 0) {
+ *hardclip_low = orig_hardclip_low;
+ *hardclip_high = orig_hardclip_high;
+ }
}
}
}
} else if (Stage3end_hittype(hit_low) == GMAP) {
+ debug3(printf("Low GMAP\n"));
low_pairarray = Stage3end_pairarray(hit_low);
low_npairs = Stage3end_npairs(hit_low);
@@ -369,11 +428,35 @@ adjust_hardclips (int *hardclip_low, Stage3end_T hit_low, int low_querylength,
low_querystart = Stage3end_gmap_querystart(hit_low);
}
if (*hardclip_low > low_querystart) {
- high_substring = Stage3end_substring_containing(hit_high,high_querylength - 1 - *hardclip_high);
- while (Pairarray_contains_p(low_pairarray,low_npairs,*hardclip_low) == false || high_substring == NULL) {
+ low_querypos = *hardclip_low;
+ high_querypos = high_querylength - 1 - (*hardclip_high);
+ high_substring = Stage3end_substring_containing(hit_high,high_querypos);
+ while (low_querypos < low_querylength && high_querypos < high_querylength &&
+ (Pairarray_contains_p(low_pairarray,low_npairs,low_querypos) == false || high_substring == NULL)) {
(*hardclip_low)++;
(*hardclip_high)--;
- high_substring = Stage3end_substring_containing(hit_high,high_querylength - 1 - *hardclip_high);
+ low_querypos++;
+ high_querypos++;
+ high_substring = Stage3end_substring_containing(hit_high,high_querypos);
+ }
+ if (low_querypos >= low_querylength || high_querypos >= high_querylength) {
+ debug3(printf("Querypos increase failed. Trying querypos decrease.\n"));
+ (*hardclip_low)--;
+ (*hardclip_high)++;
+ low_querypos--;
+ high_querypos--;
+ while (low_querypos > 0 && high_querypos > 0 &&
+ (Pairarray_contains_p(low_pairarray,low_npairs,low_querypos) == false || high_substring == NULL)) {
+ (*hardclip_low)--;
+ (*hardclip_high)++;
+ low_querypos--;
+ high_querypos--;
+ high_substring = Stage3end_substring_containing(hit_high,high_querypos);
+ }
+ if (low_querypos <= 0 || high_querypos <= 0) {
+ *hardclip_low = orig_hardclip_low;
+ *hardclip_high = orig_hardclip_high;
+ }
}
}
@@ -384,16 +467,41 @@ adjust_hardclips (int *hardclip_low, Stage3end_T hit_low, int low_querylength,
low_queryend = Stage3end_gmap_queryend(hit_low);
}
if (low_querylength - *hardclip_low < low_queryend) {
- high_substring = Stage3end_substring_containing(hit_high,*hardclip_high);
- while (Pairarray_contains_p(low_pairarray,low_npairs,low_querylength - 1 - (*hardclip_low)) == false || high_substring == NULL) {
+ low_querypos = low_querylength - 1 - (*hardclip_low);
+ high_querypos = *hardclip_high;
+ high_substring = Stage3end_substring_containing(hit_high,high_querypos);
+ while (low_querypos < low_querylength && high_querypos < high_querylength &&
+ (Pairarray_contains_p(low_pairarray,low_npairs,low_querypos) == false || high_substring == NULL)) {
(*hardclip_low)--;
(*hardclip_high)++;
- high_substring = Stage3end_substring_containing(hit_high,*hardclip_high);
+ low_querypos++;
+ high_querypos++;
+ high_substring = Stage3end_substring_containing(hit_high,high_querypos);
+ }
+ if (low_querypos >= low_querylength || high_querypos >= high_querylength) {
+ debug3(printf("Querypos increase failed. Trying querypos decrease.\n"));
+ (*hardclip_low)++;
+ (*hardclip_high)--;
+ low_querypos--;
+ high_querypos--;
+ while (low_querypos > 0 && high_querypos > 0 &&
+ (Pairarray_contains_p(low_pairarray,low_npairs,low_querypos) == false || high_substring == NULL)) {
+ (*hardclip_low)++;
+ (*hardclip_high)--;
+ low_querypos--;
+ high_querypos--;
+ high_substring = Stage3end_substring_containing(hit_high,high_querypos);
+ }
+ if (low_querypos <= 0 || high_querypos <= 0) {
+ *hardclip_low = orig_hardclip_low;
+ *hardclip_high = orig_hardclip_high;
+ }
}
}
}
} else if (Stage3end_hittype(hit_high) == GMAP) {
+ debug3(printf("High GMAP\n"));
high_pairarray = Stage3end_pairarray(hit_high);
high_npairs = Stage3end_npairs(hit_high);
@@ -404,11 +512,35 @@ adjust_hardclips (int *hardclip_low, Stage3end_T hit_low, int low_querylength,
low_querystart = Substring_querystart(Stage3end_substring_low(hit_low));
}
if (*hardclip_low > low_querystart) {
- low_substring = Stage3end_substring_containing(hit_low,*hardclip_low);
- while (low_substring == NULL || Pairarray_contains_p(high_pairarray,high_npairs,high_querylength - 1 - *hardclip_high) == false) {
+ low_querypos = *hardclip_low;
+ high_querypos = high_querylength - 1 - (*hardclip_high);
+ low_substring = Stage3end_substring_containing(hit_low,low_querypos);
+ while (low_querypos < low_querylength && high_querypos < high_querylength &&
+ (low_substring == NULL || Pairarray_contains_p(high_pairarray,high_npairs,high_querypos) == false)) {
(*hardclip_low)++;
(*hardclip_high)--;
- low_substring = Stage3end_substring_containing(hit_low,*hardclip_low);
+ low_querypos++;
+ high_querypos++;
+ low_substring = Stage3end_substring_containing(hit_low,low_querypos);
+ }
+ if (low_querypos >= low_querylength || high_querypos >= high_querylength) {
+ debug3(printf("Querypos increase failed. Trying querypos decrease.\n"));
+ (*hardclip_low)--;
+ (*hardclip_high)++;
+ low_querypos--;
+ high_querypos--;
+ while (low_querypos > 0 && high_querypos > 0 &&
+ (low_substring == NULL || Pairarray_contains_p(high_pairarray,high_npairs,high_querypos) == false)) {
+ (*hardclip_low)--;
+ (*hardclip_high)++;
+ low_querypos--;
+ high_querypos--;
+ low_substring = Stage3end_substring_containing(hit_low,low_querypos);
+ }
+ if (low_querypos <= 0 || high_querypos <= 0) {
+ *hardclip_low = orig_hardclip_low;
+ *hardclip_high = orig_hardclip_high;
+ }
}
}
@@ -419,34 +551,88 @@ adjust_hardclips (int *hardclip_low, Stage3end_T hit_low, int low_querylength,
low_queryend = Substring_queryend(Stage3end_substring_low(hit_low));
}
if (low_querylength - *hardclip_low < low_queryend) {
- low_substring = Stage3end_substring_containing(hit_low,low_querylength - 1 - (*hardclip_low));
- while (low_substring == NULL || Pairarray_contains_p(high_pairarray,high_npairs,*hardclip_high) == false) {
+ low_querypos = low_querylength - 1 - (*hardclip_low);
+ high_querypos = *hardclip_high;
+ low_substring = Stage3end_substring_containing(hit_low,low_querypos);
+ while (low_querypos < low_querylength && high_querypos < high_querylength &&
+ (low_substring == NULL || Pairarray_contains_p(high_pairarray,high_npairs,high_querypos) == false)) {
(*hardclip_low)--;
(*hardclip_high)++;
- low_substring = Stage3end_substring_containing(hit_low,low_querylength - 1 - (*hardclip_low));
+ low_querypos++;
+ high_querypos++;
+ low_substring = Stage3end_substring_containing(hit_low,low_querypos);
+ }
+ if (low_querypos >= low_querylength || high_querypos >= high_querylength) {
+ debug3(printf("Querypos increase failed. Trying querypos decrease.\n"));
+ (*hardclip_low)++;
+ (*hardclip_high)--;
+ low_querypos--;
+ high_querypos--;
+ while (low_querypos > 0 && high_querypos > 0 &&
+ (low_substring == NULL || Pairarray_contains_p(high_pairarray,high_npairs,high_querypos) == false)) {
+ (*hardclip_low)++;
+ (*hardclip_high)--;
+ low_querypos--;
+ high_querypos--;
+ low_substring = Stage3end_substring_containing(hit_low,low_querypos);
+ }
+ if (low_querypos <= 0 || high_querypos <= 0) {
+ *hardclip_low = orig_hardclip_low;
+ *hardclip_high = orig_hardclip_high;
+ }
}
}
}
} else {
if (plusp == true) {
+ debug3(printf("Both substrings, plus\n"));
+
if (hide_soft_clips_p == true) {
low_querystart = Substring_querystart_orig(Stage3end_substring_low(hit_low));
} else {
low_querystart = Substring_querystart(Stage3end_substring_low(hit_low));
}
+
if (*hardclip_low > low_querystart) {
- low_substring = Stage3end_substring_containing(hit_low,*hardclip_low);
- high_substring = Stage3end_substring_containing(hit_high,high_querylength - 1 - *hardclip_high);
- while (low_substring == NULL || high_substring == NULL) {
+ low_querypos = *hardclip_low;
+ high_querypos = high_querylength - 1 - *hardclip_high;
+ low_substring = Stage3end_substring_containing(hit_low,low_querypos);
+ high_substring = Stage3end_substring_containing(hit_high,high_querypos);
+ while (low_querypos < low_querylength && high_querypos < high_querylength &&
+ (low_substring == NULL || high_substring == NULL)) {
(*hardclip_low)++;
(*hardclip_high)--;
- low_substring = Stage3end_substring_containing(hit_low,*hardclip_low);
- high_substring = Stage3end_substring_containing(hit_high,high_querylength - 1 - *hardclip_high);
+ low_querypos++;
+ high_querypos++;
+ low_substring = Stage3end_substring_containing(hit_low,low_querypos);
+ high_substring = Stage3end_substring_containing(hit_high,high_querypos);
+ }
+ if (low_querypos >= low_querylength || high_querypos >= high_querylength) {
+ debug3(printf("Querypos increase failed. Tryiing querypos decrease.\n"));
+ (*hardclip_low)--;
+ (*hardclip_high)++;
+ low_querypos--;
+ high_querypos--;
+ while (low_querypos > 0 && high_querypos > 0 &&
+ (low_substring == NULL || high_substring == NULL)) {
+ (*hardclip_low)--;
+ (*hardclip_high)++;
+ low_querypos--;
+ high_querypos--;
+ low_substring = Stage3end_substring_containing(hit_low,low_querypos);
+ high_substring = Stage3end_substring_containing(hit_high,high_querypos);
+ }
+ if (low_querypos <= 0 || high_querypos <= 0) {
+ *hardclip_low = orig_hardclip_low;
+ *hardclip_high = orig_hardclip_high;
+ }
}
}
} else {
+ debug3(printf("Both substrings, minus\n"));
+
if (hide_soft_clips_p == true) {
low_queryend = Substring_queryend_orig(Stage3end_substring_low(hit_low));
} else {
@@ -454,13 +640,38 @@ adjust_hardclips (int *hardclip_low, Stage3end_T hit_low, int low_querylength,
}
if (low_querylength - *hardclip_low < low_queryend) {
- low_substring = Stage3end_substring_containing(hit_low,low_querylength - 1 - (*hardclip_low));
- high_substring = Stage3end_substring_containing(hit_high,*hardclip_high);
- while (low_substring == NULL || high_substring == NULL) {
+ low_querypos = low_querylength - 1 - (*hardclip_low);
+ high_querypos = *hardclip_high;
+ low_substring = Stage3end_substring_containing(hit_low,low_querypos);
+ high_substring = Stage3end_substring_containing(hit_high,high_querypos);
+ while (low_querypos < low_querylength && high_querypos < high_querylength &&
+ (low_substring == NULL || high_substring == NULL)) {
(*hardclip_low)--;
(*hardclip_high)++;
- low_substring = Stage3end_substring_containing(hit_low,low_querylength - 1 - (*hardclip_low));
- high_substring = Stage3end_substring_containing(hit_high,*hardclip_high);
+ low_querypos++;
+ high_querypos++;
+ low_substring = Stage3end_substring_containing(hit_low,low_querypos);
+ high_substring = Stage3end_substring_containing(hit_high,high_querypos);
+ }
+ if (low_querypos >= low_querylength || high_querypos >= high_querylength) {
+ debug3(printf("Querypos increase failed. Trying querypos decrease.\n"));
+ (*hardclip_low)++;
+ (*hardclip_high)--;
+ low_querypos--;
+ high_querypos--;
+ while (low_querypos > 0 && high_querypos > 0 &&
+ (low_substring == NULL || high_substring == NULL)) {
+ (*hardclip_low)++;
+ (*hardclip_high)--;
+ low_querypos--;
+ high_querypos--;
+ low_substring = Stage3end_substring_containing(hit_low,low_querypos);
+ high_substring = Stage3end_substring_containing(hit_high,high_querypos);
+ }
+ if (low_querypos <= 0 || high_querypos <= 0) {
+ *hardclip_low = orig_hardclip_low;
+ *hardclip_high = orig_hardclip_high;
+ }
}
}
}
@@ -531,7 +742,8 @@ SAM_compute_chrpos (int hardclip_low, int hardclip_high, Stage3end_T this, Subst
}
static void
-print_chromosomal_pos (FILE *fp, Chrnum_T chrnum, Chrpos_T chrpos, Univ_IIT_T chromosome_iit) {
+print_chromosomal_pos (FILE *fp, Chrnum_T chrnum, Chrpos_T chrpos, Chrpos_T chrlength,
+ Univ_IIT_T chromosome_iit) {
bool allocp;
char *chr;
@@ -556,14 +768,18 @@ print_chromosomal_pos (FILE *fp, Chrnum_T chrnum, Chrpos_T chrpos, Univ_IIT_T ch
}
/* chrpos already in 1-based coordinates */
- fprintf(fp,"\t%u",chrpos /*+1U*/);
+ if (chrpos > chrlength) {
+ fprintf(fp,"\t%u",chrpos - chrlength /*+1U*/);
+ } else {
+ fprintf(fp,"\t%u",chrpos /*+1U*/);
+ }
return;
}
}
static void
print_mate_chromosomal_pos (FILE *fp, Chrnum_T mate_chrnum, Chrnum_T mate_effective_chrnum,
- Chrpos_T mate_chrpos, Chrnum_T anchor_chrnum, Chrpos_T anchor_chrpos,
+ Chrpos_T mate_chrpos, Chrpos_T mate_chrlength, Chrnum_T anchor_chrnum, Chrpos_T anchor_chrpos,
Univ_IIT_T chromosome_iit) {
bool allocp;
char *chr;
@@ -586,7 +802,11 @@ print_mate_chromosomal_pos (FILE *fp, Chrnum_T mate_chrnum, Chrnum_T mate_effect
}
/* chrpos already in 1-based coordinates */
- fprintf(fp,"\t%u",mate_chrpos /*+1U*/);
+ if (mate_chrpos > mate_chrlength) {
+ fprintf(fp,"\t%u",mate_chrpos - mate_chrlength /*+1U*/);
+ } else {
+ fprintf(fp,"\t%u",mate_chrpos /*+1U*/);
+ }
return;
} else {
@@ -601,7 +821,11 @@ print_mate_chromosomal_pos (FILE *fp, Chrnum_T mate_chrnum, Chrnum_T mate_effect
}
/* chrpos already in 1-based coordinates */
- fprintf(fp,"\t%u",mate_chrpos /*+1U*/);
+ if (mate_chrpos > mate_chrlength) {
+ fprintf(fp,"\t%u",mate_chrpos - mate_chrlength /*+1U*/);
+ } else {
+ fprintf(fp,"\t%u",mate_chrpos /*+1U*/);
+ }
return;
}
}
@@ -662,7 +886,8 @@ SAM_print_nomapping (FILE *fp, char *abbrev, Shortread_T queryseq, Stage3end_T m
/* 7. MRNM: Mate chr */
/* 8. MPOS: Mate chrpos */
- print_mate_chromosomal_pos(fp,Stage3end_chrnum(mate),Stage3end_effective_chrnum(mate),mate_chrpos,
+ print_mate_chromosomal_pos(fp,Stage3end_chrnum(mate),Stage3end_effective_chrnum(mate),
+ mate_chrpos,Stage3end_chrlength(mate),
/*anchor_chrnum*/0,/*anchor_chrpos*/0U,chromosome_iit);
@@ -986,6 +1211,103 @@ compute_cigar (List_T tokens, char type, int stringlength, int querypos, int que
}
+/* Modified from compute_cigar */
+static Intlist_T
+compute_cigar_types_only (Intlist_T types, char type, int stringlength, int querypos, int querylength,
+ int hardclip_low, int hardclip_high, bool plusp, int lastp) {
+ int matchlength = 0;
+ int startpos, endpos;
+ int cliplength = 0;
+
+ if (plusp == true) {
+ debug1(printf("\nEntering compute_cigar_types_only with type %c, stringlength %d, querypos %d, querylength %d, hardclip_low %d, hardclip_high %d, plus\n",
+ type,stringlength,querypos,querylength,hardclip_low,hardclip_high));
+ if (hardclip_low > querypos) { /* > not >= */
+ startpos = hardclip_low;
+ cliplength = hardclip_low;
+ } else {
+ startpos = querypos;
+ }
+
+ if (querylength - hardclip_high < querypos + stringlength) {
+ endpos = querylength - hardclip_high;
+ debug1(printf(" endpos %d = querylength %d - hardclip_high %d\n",endpos,querylength,hardclip_high));
+ } else {
+ endpos = querypos + stringlength;
+ debug1(printf(" endpos %d = querypos %d + stringlength %d\n",endpos,querypos,stringlength));
+ }
+
+ debug1(printf(" new startpos %d, endpos %d, cliplength %d\n",startpos,endpos,cliplength));
+
+ if (endpos >= startpos) {
+ if (cliplength > 0) {
+ debug1(printf(" Pushing initial %dH\n",cliplength));
+ types = Intlist_push(types,'H');
+ }
+ matchlength = endpos - startpos;
+ if (matchlength > 0) {
+ debug1(printf(" Pushing %d%c\n",matchlength,type));
+ types = Intlist_push(types,type);
+ }
+ }
+
+
+ if (lastp == true) {
+ /* cliplength = querypos + stringlength - endpos; */
+ cliplength = querylength - endpos;
+ if (cliplength > 0) {
+ debug1(printf(" Pushing final %dH\n",cliplength));
+ types = Intlist_push(types,'H');
+ }
+ }
+
+ } else {
+ debug1(printf("\nEntering compute_cigar with type %c, stringlength %d, querypos %d, querylength %d, hardclip_low %d, hardclip_high %d, minus\n",
+ type,stringlength,querypos,querylength,hardclip_low,hardclip_high));
+
+ if (querylength - hardclip_low < querypos) {
+ startpos = querylength - hardclip_low;
+ cliplength = hardclip_low;
+ } else {
+ startpos = querypos;
+ }
+
+ if (hardclip_high >= querypos - stringlength) {
+ endpos = hardclip_high;
+ debug1(printf(" endpos %d = hardclip_high %d\n",endpos,hardclip_high));
+ } else {
+ endpos = querypos - stringlength;
+ debug1(printf(" endpos %d = querypos %d - stringlength %d\n",endpos,querypos,stringlength));
+ }
+
+ debug1(printf(" new startpos %d, endpos %d, cliplength %d\n",startpos,endpos,cliplength));
+
+ if (endpos <= startpos) {
+ if (cliplength > 0) {
+ debug1(printf(" Pushing initial %dH\n",cliplength));
+ types = Intlist_push(types,'H');
+ }
+ matchlength = startpos - endpos;
+ if (matchlength > 0) {
+ debug1(printf(" Pushing %d%c\n",matchlength,type));
+ types = Intlist_push(types,type);
+ }
+ }
+
+
+ if (lastp == true) {
+ cliplength = endpos;
+ if (cliplength > 0) {
+ debug1(printf(" Pushing final %dH\n",cliplength));
+ types = Intlist_push(types,'H');
+ }
+ }
+ }
+
+ return types;
+}
+
+
static void
print_cigar (FILE *fp, char type, int stringlength, int querypos, int querylength,
int hardclip_low, int hardclip_high, bool plusp, int lastp) {
@@ -1295,20 +1617,45 @@ print_md_string (bool *printp, int *nmismatches_refdiff, int *nmismatches_bothdi
}
+/* Copy also in pair.c for GMAP */
+static bool
+check_cigar_types (Intlist_T cigar_types) {
+ Intlist_T p;
+ int type, last_type = 'M';
+ bool M_present_p = false;
+
+ for (p = cigar_types; p != NULL; p = Intlist_next(p)) {
+ type = Intlist_head(p);
+ if (type == 'M') {
+ M_present_p = true;
+#if 0
+ } else if (type == 'H' && last_type == 'S') {
+ debug1(printf("check_cigar_types detects adjacent S and H, so returning false\n"));
+ return false;
+ } else if (type == 'S' && last_type == 'H') {
+ debug1(printf("check_cigar_types detects adjacent S and H, so returning false\n"));
+ return false;
+#endif
+ }
+ }
+
+ return M_present_p;
+}
+
+
+
static void
print_single (FILE *fp, char *abbrev, Hittype_T hittype, Stage3end_T this, Stage3end_T mate,
char *acc1, char *acc2, int pathnum, int npaths,
int absmq_score, int first_absmq, int second_absmq, int mapq_score,
Univ_IIT_T chromosome_iit, Shortread_T queryseq, int pairedlength,
- Chrpos_T chrpos, Chrpos_T mate_chrpos, int clipdir, int hardclip5, int hardclip3,
+ Chrpos_T chrpos, Chrpos_T mate_chrpos, int clipdir, int hardclip_low, int hardclip_high,
Resulttype_T resulttype, bool first_read_p,
int npaths_mate, int quality_shift,
char *sam_read_group_id, bool invertp, bool invert_mate_p, bool circularp) {
unsigned int flag = 0U;
Substring_T substring;
int nmismatches_refdiff = 0, nmismatches_bothdiff = 0, querylength, substring_start, substring_length;
- int hardclip_low, hardclip_high;
- /* int mate_hardclip_low, mate_hardclip_high; */
char *genomicfwd_refdiff, *genomicfwd_bothdiff, *genomicdir_refdiff, *genomicdir_bothdiff;
bool plusp, printp;
@@ -1318,47 +1665,7 @@ print_single (FILE *fp, char *abbrev, Hittype_T hittype, Stage3end_T this, Stage
plusp = Stage3end_plusp(this);
substring = Stage3end_substring1(this);
- if (circularp == true) {
- /* clipdir should be +1 */
- if (1 || plusp == true) {
- hardclip_low = hardclip5;
- hardclip_high = hardclip3;
- /* mate_hardclip_low = 0; */
- /* mate_hardclip_high = 0; */
- } else {
- hardclip_low = hardclip3;
- hardclip_high = hardclip5;
- /* mate_hardclip_low = 0; */
- /* mate_hardclip_high = 0; */
- }
-
- } else {
- if (first_read_p == true) {
- if (clipdir >= 0) {
- hardclip_low = 0;
- hardclip_high = hardclip5;
- } else {
- hardclip_low = hardclip5;
- hardclip_high = 0;
- }
- /* mate_hardclip_low = hardclip3; */
- /* mate_hardclip_high = 0; */
- /* fprintf(stderr,"first read: hardclip_low = %d, hardclip_high = %d\n",hardclip_low,hardclip_high); */
- } else {
- if (clipdir >= 0) {
- hardclip_low = hardclip3;
- hardclip_high = 0;
- } else {
- hardclip_low = 0;
- hardclip_high = hardclip3;
- }
- /* mate_hardclip_low = 0; */
- /* mate_hardclip_high = hardclip5; */
- /* fprintf(stderr,"second read: hardclip_low = %d, hardclip_high = %d\n",hardclip_low,hardclip_high); */
- }
- }
- /* printf("clipdir is %d, hardclip_low %d, hardclip_high %d\n",clipdir,hardclip_low,hardclip_high); */
-
+ debug(printf("clipdir is %d, hardclip_low %d, hardclip_high %d\n",clipdir,hardclip_low,hardclip_high));
/* 1. QNAME */
if (acc2 == NULL) {
@@ -1375,7 +1682,7 @@ print_single (FILE *fp, char *abbrev, Hittype_T hittype, Stage3end_T this, Stage
/* 3. RNAME: chr */
/* 4. POS: chrpos */
- print_chromosomal_pos(fp,Stage3end_chrnum(this),chrpos,chromosome_iit);
+ print_chromosomal_pos(fp,Stage3end_chrnum(this),chrpos,Stage3end_chrlength(this),chromosome_iit);
/* 5. MAPQ: Mapping quality */
@@ -1425,7 +1732,8 @@ print_single (FILE *fp, char *abbrev, Hittype_T hittype, Stage3end_T this, Stage
/* 7. MRNM: Mate chr */
/* 8. MPOS: Mate chrpos */
- print_mate_chromosomal_pos(fp,Stage3end_chrnum(mate),Stage3end_effective_chrnum(mate),mate_chrpos,
+ print_mate_chromosomal_pos(fp,Stage3end_chrnum(mate),Stage3end_effective_chrnum(mate),
+ mate_chrpos,Stage3end_chrlength(mate),
Stage3end_chrnum(this),chrpos,chromosome_iit);
@@ -1589,12 +1897,72 @@ print_single (FILE *fp, char *abbrev, Hittype_T hittype, Stage3end_T this, Stage
}
+static bool
+check_cigar_single (Hittype_T hittype, Stage3end_T this,
+ int querylength, int clipdir, int hardclip_low, int hardclip_high,
+ bool first_read_p, bool circularp) {
+ bool result;
+ Intlist_T cigar_types = NULL;
+ Substring_T substring;
+ bool plusp;
+
+ plusp = Stage3end_plusp(this);
+ substring = Stage3end_substring1(this);
+
+ debug1(printf("clipdir is %d, hardclip_low %d, hardclip_high %d\n",clipdir,hardclip_low,hardclip_high));
+
+ if (plusp == true) {
+ if (hide_soft_clips_p == true && hittype != TERMINAL) {
+ cigar_types = compute_cigar_types_only(cigar_types,/*type*/'M',
+ Substring_querystart(substring) + Substring_match_length(substring) +
+ (querylength - Substring_queryend(substring)),/*querypos*/0,querylength,
+ hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/true);
+ } else {
+ cigar_types = compute_cigar_types_only(cigar_types,/*type*/'S',Substring_querystart(substring),
+ /*querypos*/0,querylength,hardclip_low,hardclip_high,
+ /*plusp*/true,/*lastp*/false);
+ cigar_types = compute_cigar_types_only(cigar_types,/*type*/'M',Substring_match_length(substring),
+ /*querypos*/Substring_querystart(substring),querylength,
+ hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/false);
+ cigar_types = compute_cigar_types_only(cigar_types,/*type*/'S',querylength - Substring_queryend(substring),
+ /*querypos*/Substring_queryend(substring),querylength,
+ hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/true);
+ }
+
+ } else {
+ if (hide_soft_clips_p == true && hittype != TERMINAL) {
+ cigar_types = compute_cigar_types_only(cigar_types,/*type*/'M',
+ (querylength - Substring_queryend(substring)) +
+ Substring_match_length(substring) + Substring_querystart(substring),
+ /*querypos*/querylength,querylength,hardclip_low,hardclip_high,
+ /*plusp*/false,/*lastp*/true);
+ } else {
+ cigar_types = compute_cigar_types_only(cigar_types,/*type*/'S',querylength - Substring_queryend(substring),
+ /*querypos*/querylength,querylength,hardclip_low,hardclip_high,
+ /*plusp*/false,/*lastp*/false);
+ cigar_types = compute_cigar_types_only(cigar_types,/*type*/'M',Substring_match_length(substring),
+ /*querypos*/Substring_queryend(substring),querylength,
+ hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/false);
+ cigar_types = compute_cigar_types_only(cigar_types,/*type*/'S',Substring_querystart(substring),
+ /*querypos*/Substring_querystart(substring),querylength,
+ hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/true);
+ }
+ }
+
+ result = check_cigar_types(cigar_types);
+
+ Intlist_free(&cigar_types);
+ return result;
+}
+
+
+
static void
print_insertion (FILE *fp, char *abbrev, Stage3end_T this, Stage3end_T mate,
char *acc1, char *acc2, int pathnum, int npaths,
int absmq_score, int first_absmq, int second_absmq, int mapq_score,
Univ_IIT_T chromosome_iit, Shortread_T queryseq, int pairedlength,
- Chrpos_T chrpos, Chrpos_T mate_chrpos, int clipdir, int hardclip5, int hardclip3,
+ Chrpos_T chrpos, Chrpos_T mate_chrpos, int clipdir, int hardclip_low, int hardclip_high,
Resulttype_T resulttype, bool first_read_p, int npaths_mate,
int quality_shift, char *sam_read_group_id, bool invertp, bool invert_mate_p,
bool circularp) {
@@ -1603,8 +1971,6 @@ print_insertion (FILE *fp, char *abbrev, Stage3end_T this, Stage3end_T mate,
int nmismatches_refdiff = 0, nmismatches_bothdiff = 0, querylength;
char *genomicfwd_refdiff, *genomicfwd_bothdiff, *genomicdir_refdiff, *genomicdir_bothdiff;
int substring1_start, substring2_start, substring1_length, substring2_length, matchlength, nindels;
- int hardclip_low, hardclip_high;
- /* int mate_hardclip_low, mate_hardclip_high; */
bool plusp, printp;
List_T cigar_tokens = NULL;
@@ -1614,45 +1980,6 @@ print_insertion (FILE *fp, char *abbrev, Stage3end_T this, Stage3end_T mate,
substring1 = Stage3end_substring1(this);
substring2 = Stage3end_substring2(this);
- if (circularp == true) {
- /* clipdir should be +1 */
- if (1 || plusp == true) {
- hardclip_low = hardclip5;
- hardclip_high = hardclip3;
- /* mate_hardclip_low = 0; */
- /* mate_hardclip_high = 0; */
- } else {
- hardclip_low = hardclip3;
- hardclip_high = hardclip5;
- /* mate_hardclip_low = 0; */
- /* mate_hardclip_high = 0; */
- }
- } else {
- if (first_read_p == true) {
- if (clipdir >= 0) {
- hardclip_low = 0;
- hardclip_high = hardclip5;
- } else {
- hardclip_low = hardclip5;
- hardclip_high = 0;
- }
- /* mate_hardclip_low = hardclip3; */
- /* mate_hardclip_high = 0; */
- /* fprintf(stderr,"first read: hardclip_low = %d, hardclip_high = %d\n",hardclip_low,hardclip_high); */
- } else {
- if (clipdir >= 0) {
- hardclip_low = hardclip3;
- hardclip_high = 0;
- } else {
- hardclip_low = 0;
- hardclip_high = hardclip3;
- }
- /* mate_hardclip_low = 0; */
- /* mate_hardclip_high = hardclip5; */
- /* fprintf(stderr,"second read: hardclip_low = %d, hardclip_high = %d\n",hardclip_low,hardclip_high); */
- }
- }
-
nindels = Stage3end_nindels(this);
/* 1. QNAME */
@@ -1670,7 +1997,7 @@ print_insertion (FILE *fp, char *abbrev, Stage3end_T this, Stage3end_T mate,
/* 3. RNAME: chr */
/* 4. POS: chrpos */
- print_chromosomal_pos(fp,Stage3end_chrnum(this),chrpos,chromosome_iit);
+ print_chromosomal_pos(fp,Stage3end_chrnum(this),chrpos,Stage3end_chrlength(this),chromosome_iit);
/* 5. MAPQ: Mapping quality */
@@ -1754,7 +2081,8 @@ print_insertion (FILE *fp, char *abbrev, Stage3end_T this, Stage3end_T mate,
/* 7. MRNM: Mate chr */
/* 8. MPOS: Mate chrpos */
- print_mate_chromosomal_pos(fp,Stage3end_chrnum(mate),Stage3end_effective_chrnum(mate),mate_chrpos,
+ print_mate_chromosomal_pos(fp,Stage3end_chrnum(mate),Stage3end_effective_chrnum(mate),
+ mate_chrpos,Stage3end_chrlength(mate),
Stage3end_chrnum(this),chrpos,chromosome_iit);
@@ -1948,13 +2276,104 @@ print_insertion (FILE *fp, char *abbrev, Stage3end_T this, Stage3end_T mate,
return;
}
+static bool
+check_cigar_insertion (Stage3end_T this, int querylength, int clipdir, int hardclip_low, int hardclip_high,
+ bool first_read_p, bool circularp) {
+ bool result;
+ Intlist_T cigar_types = NULL;
+ Substring_T substring1, substring2;
+ bool plusp;
+ int nindels;
+
+ plusp = Stage3end_plusp(this);
+
+ substring1 = Stage3end_substring1(this);
+ substring2 = Stage3end_substring2(this);
+
+ nindels = Stage3end_nindels(this);
+
+ if (plusp == true) {
+ if (hide_soft_clips_p == true) {
+ cigar_types = compute_cigar_types_only(cigar_types,/*type*/'M',
+ Substring_querystart(substring1) + Substring_match_length(substring1),
+ /*querypos*/0,querylength,hardclip_low,hardclip_high,
+ /*plusp*/true,/*lastp*/false);
+ } else {
+ cigar_types = compute_cigar_types_only(cigar_types,/*type*/'S',Substring_querystart(substring1),
+ /*querypos*/0,querylength,hardclip_low,hardclip_high,
+ /*plusp*/true,/*lastp*/false);
+ cigar_types = compute_cigar_types_only(cigar_types,/*type*/'M',Substring_match_length(substring1),
+ /*querypos*/Substring_querystart(substring1),querylength,
+ hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/false);
+ }
+
+ cigar_types = compute_cigar_types_only(cigar_types,/*type*/'I',nindels,
+ /*querypos*/Substring_queryend(substring1),querylength,
+ hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/false);
+
+ if (hide_soft_clips_p == true) {
+ cigar_types = compute_cigar_types_only(cigar_types,/*type*/'M',
+ Substring_match_length(substring2) + (querylength - Substring_queryend(substring2)),
+ /*querypos*/Substring_querystart(substring2),querylength,
+ hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/false);
+ } else {
+ cigar_types = compute_cigar_types_only(cigar_types,/*type*/'M',Substring_match_length(substring2),
+ /*querypos*/Substring_querystart(substring2),querylength,
+ hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/false);
+ cigar_types = compute_cigar_types_only(cigar_types,/*type*/'S',querylength - Substring_queryend(substring2),
+ /*querypos*/Substring_queryend(substring2),querylength,
+ hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/true);
+ }
+
+ } else {
+ if (hide_soft_clips_p == true) {
+ cigar_types = compute_cigar_types_only(cigar_types,/*type*/'M',
+ (querylength - Substring_queryend(substring2)) +
+ Substring_match_length(substring2),
+ /*querypos*/querylength,querylength,hardclip_low,hardclip_high,
+ /*plusp*/false,/*lastp*/false);
+ } else {
+ cigar_types = compute_cigar_types_only(cigar_types,/*type*/'S',querylength - Substring_queryend(substring2),
+ /*querypos*/querylength,querylength,hardclip_low,hardclip_high,
+ /*plusp*/false,/*lastp*/false);
+ cigar_types = compute_cigar_types_only(cigar_types,/*type*/'M',Substring_match_length(substring2),
+ /*querypos*/Substring_queryend(substring2),querylength,
+ hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/false);
+ }
+
+ cigar_types = compute_cigar_types_only(cigar_types,/*type*/'I',nindels,
+ /*querypos*/Substring_querystart(substring2),querylength,
+ hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/false);
+
+ if (hide_soft_clips_p == true) {
+ cigar_types = compute_cigar_types_only(cigar_types,/*type*/'M',
+ Substring_match_length(substring1) +
+ Substring_querystart(substring1),
+ /*querypos*/Substring_queryend(substring1),querylength,
+ hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/true);
+ } else {
+ cigar_types = compute_cigar_types_only(cigar_types,/*type*/'M',Substring_match_length(substring1),
+ /*querypos*/Substring_queryend(substring1),querylength,
+ hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/false);
+ cigar_types = compute_cigar_types_only(cigar_types,/*type*/'S',Substring_querystart(substring1),
+ /*querypos*/Substring_querystart(substring1),querylength,
+ hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/true);
+ }
+ }
+
+ result = check_cigar_types(cigar_types);
+
+ Intlist_free(&cigar_types);
+ return result;
+}
+
static void
print_deletion (FILE *fp, char *abbrev, Stage3end_T this, Stage3end_T mate,
char *acc1, char *acc2, int pathnum, int npaths,
int absmq_score, int first_absmq, int second_absmq, int mapq_score,
Univ_IIT_T chromosome_iit, Shortread_T queryseq, int pairedlength,
- Chrpos_T chrpos, Chrpos_T mate_chrpos, int clipdir, int hardclip5, int hardclip3,
+ Chrpos_T chrpos, Chrpos_T mate_chrpos, int clipdir, int hardclip_low, int hardclip_high,
Resulttype_T resulttype, bool first_read_p, int npaths_mate,
int quality_shift, char *sam_read_group_id, bool invertp, bool invert_mate_p,
bool circularp) {
@@ -1964,8 +2383,6 @@ print_deletion (FILE *fp, char *abbrev, Stage3end_T this, Stage3end_T mate,
char *genomicfwd_refdiff, *genomicfwd_bothdiff, *genomicfwd_deletion,
*genomicdir_refdiff, *genomicdir_bothdiff;
int substring1_start, substring2_start, substring1_length, substring2_length, nindels;
- int hardclip_low, hardclip_high;
- /* int mate_hardclip_low, mate_hardclip_high; */
bool plusp, printp;
querylength = Shortread_fulllength(queryseq);
@@ -1974,44 +2391,6 @@ print_deletion (FILE *fp, char *abbrev, Stage3end_T this, Stage3end_T mate,
substring1 = Stage3end_substring1(this);
substring2 = Stage3end_substring2(this);
- if (circularp == true) {
- /* clipdir should be +1 */
- if (1 || plusp == true) {
- hardclip_low = hardclip5;
- hardclip_high = hardclip3;
- /* mate_hardclip_low = 0; */
- /* mate_hardclip_high = 0; */
- } else {
- hardclip_low = hardclip3;
- hardclip_high = hardclip5;
- /* mate_hardclip_low = 0; */
- /* mate_hardclip_high = 0; */
- }
- } else {
- if (first_read_p == true) {
- if (clipdir >= 0) {
- hardclip_low = 0;
- hardclip_high = hardclip5;
- } else {
- hardclip_low = hardclip5;
- hardclip_high = 0;
- }
- /* mate_hardclip_low = hardclip3; */
- /* mate_hardclip_high = 0; */
- /* fprintf(stderr,"first read: hardclip_low = %d, hardclip_high = %d\n",hardclip_low,hardclip_high); */
- } else {
- if (clipdir >= 0) {
- hardclip_low = hardclip3;
- hardclip_high = 0;
- } else {
- hardclip_low = 0;
- hardclip_high = hardclip3;
- }
- /* mate_hardclip_low = 0; */
- /* mate_hardclip_high = hardclip5; */
- /* fprintf(stderr,"second read: hardclip_low = %d, hardclip_high = %d\n",hardclip_low,hardclip_high); */
- }
- }
#if 0
/* These cases are checked below */
@@ -2042,7 +2421,7 @@ print_deletion (FILE *fp, char *abbrev, Stage3end_T this, Stage3end_T mate,
/* 3. RNAME: chr */
/* 4. POS: chrpos */
- print_chromosomal_pos(fp,Stage3end_chrnum(this),chrpos,chromosome_iit);
+ print_chromosomal_pos(fp,Stage3end_chrnum(this),chrpos,Stage3end_chrlength(this),chromosome_iit);
/* 5. MAPQ: Mapping quality */
@@ -2158,7 +2537,8 @@ print_deletion (FILE *fp, char *abbrev, Stage3end_T this, Stage3end_T mate,
/* 7. MRNM: Mate chr */
/* 8. MPOS: Mate chrpos */
- print_mate_chromosomal_pos(fp,Stage3end_chrnum(mate),Stage3end_effective_chrnum(mate),mate_chrpos,
+ print_mate_chromosomal_pos(fp,Stage3end_chrnum(mate),Stage3end_effective_chrnum(mate),
+ mate_chrpos,Stage3end_chrlength(mate),
Stage3end_chrnum(this),chrpos,chromosome_iit);
@@ -2361,6 +2741,131 @@ print_deletion (FILE *fp, char *abbrev, Stage3end_T this, Stage3end_T mate,
return;
}
+static bool
+check_cigar_deletion (Stage3end_T this, int querylength, int clipdir, int hardclip_low, int hardclip_high,
+ bool first_read_p, bool circularp) {
+ bool result;
+ Intlist_T cigar_types = NULL;
+ Substring_T substring1, substring2;
+ int substring1_start, substring2_start, substring1_length;
+ bool plusp;
+
+ plusp = Stage3end_plusp(this);
+
+ substring1 = Stage3end_substring1(this);
+ substring2 = Stage3end_substring2(this);
+
+ if (hide_soft_clips_p == true) {
+ substring1_start = Substring_querystart_orig(substring1);
+ substring1_length = Substring_match_length_orig(substring1);
+ substring2_start = Substring_querystart_orig(substring2);
+ /* substring2_length = Substring_match_length_orig(substring2); */
+ } else {
+ substring1_start = Substring_querystart(substring1);
+ substring1_length = Substring_match_length(substring1);
+ substring2_start = Substring_querystart(substring2);
+ /* substring2_length = Substring_match_length(substring2); */
+ }
+
+ if (plusp == true) {
+ if (hide_soft_clips_p == true) {
+ if (/*nindels > 0 &&*/ hardclip_low < substring1_start + substring1_length && hardclip_high < querylength - substring2_start) {
+ cigar_types = compute_cigar_types_only(cigar_types,/*type*/'M',
+ Substring_querystart(substring1) +
+ Substring_match_length(substring1),
+ /*querypos*/0,querylength,hardclip_low,hardclip_high,
+ /*plusp*/true,/*lastp*/false);
+ cigar_types = Intlist_push(cigar_types,'D');
+ cigar_types = compute_cigar_types_only(cigar_types,/*type*/'M',
+ Substring_match_length(substring2) +
+ (querylength - Substring_queryend(substring2)),
+ /*querypos*/Substring_querystart(substring2),querylength,
+ hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/true);
+ } else {
+ cigar_types = compute_cigar_types_only(cigar_types,/*type*/'M',
+ Substring_querystart(substring1) +
+ (Substring_match_length(substring1) +
+ Substring_match_length(substring2)) +
+ (querylength - Substring_queryend(substring2)),
+ /*querypos*/0,querylength,hardclip_low,hardclip_high,
+ /*plusp*/true,/*lastp*/true);
+ }
+
+
+ } else {
+ cigar_types = compute_cigar_types_only(cigar_types,/*type*/'S',Substring_querystart(substring1),
+ /*querypos*/0,querylength,hardclip_low,hardclip_high,
+ /*plusp*/true,/*lastp*/false);
+ if (/*nindels > 0 &&*/ hardclip_low < substring1_start + substring1_length && hardclip_high < querylength - substring2_start) {
+ cigar_types = compute_cigar_types_only(cigar_types,/*type*/'M',Substring_match_length(substring1),
+ /*querypos*/Substring_querystart(substring1),querylength,
+ hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/false);
+ cigar_types = Intlist_push(cigar_types,'D');
+ cigar_types = compute_cigar_types_only(cigar_types,/*type*/'M',Substring_match_length(substring2),
+ /*querypos*/Substring_querystart(substring2),querylength,
+ hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/false);
+ } else {
+ cigar_types = compute_cigar_types_only(cigar_types,/*type*/'M',Substring_match_length(substring1) + Substring_match_length(substring2),
+ /*querypos*/Substring_querystart(substring1),querylength,
+ hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/false);
+ }
+ cigar_types = compute_cigar_types_only(cigar_types,/*type*/'S',querylength - Substring_queryend(substring2),
+ /*querypos*/Substring_queryend(substring2),querylength,
+ hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/true);
+ }
+
+ } else {
+ if (hide_soft_clips_p == true) {
+ if (/*nindels > 0 &&*/ hardclip_low < querylength - substring2_start && hardclip_high < substring1_start + substring1_length) {
+ cigar_types = compute_cigar_types_only(cigar_types,/*type*/'M',
+ (querylength - Substring_queryend(substring2)) +
+ Substring_match_length(substring2),
+ /*querypos*/querylength,querylength,hardclip_low,hardclip_high,
+ /*plusp*/false,/*lastp*/false);
+ cigar_types = Intlist_push(cigar_types,'D');
+ cigar_types = compute_cigar_types_only(cigar_types,/*type*/'M',
+ Substring_match_length(substring1) +
+ Substring_querystart(substring1),
+ /*querypos*/Substring_querystart(substring2),querylength,
+ hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/true);
+ } else {
+ cigar_types = compute_cigar_types_only(cigar_types,/*type*/'M',
+ (querylength - Substring_queryend(substring2)) +
+ (Substring_match_length(substring2) + Substring_match_length(substring1)) +
+ Substring_querystart(substring1),
+ /*querypos*/querylength,querylength,
+ hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/true);
+ }
+
+ } else {
+ cigar_types = compute_cigar_types_only(cigar_types,/*type*/'S',querylength - Substring_queryend(substring2),
+ /*querypos*/querylength,querylength,
+ hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/false);
+ if (/*nindels > 0 &&*/ hardclip_low < querylength - substring2_start && hardclip_high < substring1_start + substring1_length) {
+ cigar_types = compute_cigar_types_only(cigar_types,/*type*/'M',Substring_match_length(substring2),
+ /*querypos*/Substring_queryend(substring2),querylength,
+ hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/false);
+ cigar_types = Intlist_push(cigar_types,'D');
+ cigar_types = compute_cigar_types_only(cigar_types,/*type*/'M',Substring_match_length(substring1),
+ /*querypos*/Substring_querystart(substring2),querylength,
+ hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/false);
+ } else {
+ cigar_types = compute_cigar_types_only(cigar_types,/*type*/'M',Substring_match_length(substring2) + Substring_match_length(substring1),
+ /*querypos*/Substring_queryend(substring2),querylength,
+ hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/false);
+ }
+ cigar_types = compute_cigar_types_only(cigar_types,/*type*/'S',Substring_querystart(substring1),
+ /*querypos*/Substring_querystart(substring1),querylength,
+ hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/true);
+ }
+ }
+
+ result = check_cigar_types(cigar_types);
+
+ Intlist_free(&cigar_types);
+ return result;
+}
+
static void
halfdonor_dinucleotide (char *donor1, char *donor2, Substring_T donor) {
@@ -2419,7 +2924,7 @@ print_halfdonor (FILE *fp, char *abbrev, Substring_T donor, Stage3end_T this, St
char *acc1, char *acc2, int pathnum, int npaths, int absmq_score, int first_absmq, int second_absmq, int mapq_score,
Univ_IIT_T chromosome_iit, Shortread_T queryseq, int pairedlength,
Chrpos_T concordant_chrpos, Chrpos_T donor_chrpos, Chrpos_T acceptor_chrpos, Chrpos_T mate_chrpos,
- int clipdir, int hardclip5, int hardclip3, Resulttype_T resulttype, bool first_read_p, int npaths_mate,
+ int clipdir, int hardclip_low, int hardclip_high, Resulttype_T resulttype, bool first_read_p, int npaths_mate,
int quality_shift, char *sam_read_group_id, bool invertp, bool invert_mate_p,
bool use_hardclip_p, bool print_xt_p, char donor_strand, char acceptor_strand,
char *donor_chr, char *acceptor_chr, char donor1, char donor2, char acceptor2, char acceptor1,
@@ -2429,57 +2934,15 @@ print_halfdonor (FILE *fp, char *abbrev, Substring_T donor, Stage3end_T this, St
bool sensep;
char *genomicfwd_refdiff, *genomicfwd_bothdiff, *genomicdir_refdiff, *genomicdir_bothdiff;
int substring_start, substring_length;
- int hardclip_low, hardclip_high;
- /* int mate_hardclip_low, mate_hardclip_high; */
int transloc_hardclip_low, transloc_hardclip_high;
bool plusp, printp;
bool start_ambig, end_ambig;
- int amb_nmatches_start, amb_nmatches_end;
+ int amb_length_start, amb_length_end;
querylength = Shortread_fulllength(queryseq);
plusp = Substring_plusp(donor);
- if (circularp == true) {
- /* clipdir should be +1 */
- if (1 || plusp == true) {
- hardclip_low = hardclip5;
- hardclip_high = hardclip3;
- /* mate_hardclip_low = 0; */
- /* mate_hardclip_high = 0; */
- } else {
- hardclip_low = hardclip3;
- hardclip_high = hardclip5;
- /* mate_hardclip_low = 0; */
- /* mate_hardclip_high = 0; */
- }
- } else {
- if (first_read_p == true) {
- if (clipdir >= 0) {
- hardclip_low = 0;
- hardclip_high = hardclip5;
- } else {
- hardclip_low = hardclip5;
- hardclip_high = 0;
- }
- /* mate_hardclip_low = hardclip3; */
- /* mate_hardclip_high = 0; */
- /* fprintf(stderr,"first read: hardclip_low = %d, hardclip_high = %d\n",hardclip_low,hardclip_high); */
- } else {
- if (clipdir >= 0) {
- hardclip_low = hardclip3;
- hardclip_high = 0;
- } else {
- hardclip_low = 0;
- hardclip_high = hardclip3;
- }
- /* mate_hardclip_low = 0; */
- /* mate_hardclip_high = hardclip5; */
- /* fprintf(stderr,"second read: hardclip_low = %d, hardclip_high = %d\n",hardclip_low,hardclip_high); */
- }
- }
-
-
/* 1. QNAME */
if (acc2 == NULL) {
fprintf(fp,"%s",acc1);
@@ -2495,7 +2958,7 @@ print_halfdonor (FILE *fp, char *abbrev, Substring_T donor, Stage3end_T this, St
/* 3. RNAME: chr */
/* 4. POS: chrpos */
- print_chromosomal_pos(fp,Substring_chrnum(donor),donor_chrpos,chromosome_iit);
+ print_chromosomal_pos(fp,Substring_chrnum(donor),donor_chrpos,Substring_chrlength(donor),chromosome_iit);
/* 5. MAPQ: Mapping quality */
@@ -2642,7 +3105,8 @@ print_halfdonor (FILE *fp, char *abbrev, Substring_T donor, Stage3end_T this, St
/* 7. MRNM: Mate chr */
/* 8. MPOS: Mate chrpos */
/* For anchor_chrnum, previously used Stage3end_chrnum(this), but this is 0 */
- print_mate_chromosomal_pos(fp,Stage3end_chrnum(mate),Stage3end_effective_chrnum(mate),mate_chrpos,
+ print_mate_chromosomal_pos(fp,Stage3end_chrnum(mate),Stage3end_effective_chrnum(mate),
+ mate_chrpos,Stage3end_chrlength(mate),
/*anchor_chrnum*/Substring_chrnum(donor),donor_chrpos,chromosome_iit);
@@ -2874,12 +3338,12 @@ print_halfdonor (FILE *fp, char *abbrev, Substring_T donor, Stage3end_T this, St
if ((start_ambig = Stage3end_start_ambiguous_p(this)) == true ||
(end_ambig = Stage3end_end_ambiguous_p(this)) == true) {
#if 1
- amb_nmatches_start = Stage3end_amb_nmatches_start(this);
- amb_nmatches_end = Stage3end_amb_nmatches_end(this);
+ amb_length_start = Stage3end_amb_length_start(this);
+ amb_length_end = Stage3end_amb_length_end(this);
if (plusp == true) {
- fprintf(fp,"\tXA:Z:%d,%d",amb_nmatches_start,amb_nmatches_end);
+ fprintf(fp,"\tXA:Z:%d,%d",amb_length_start,amb_length_end);
} else {
- fprintf(fp,"\tXA:Z:%d,%d",amb_nmatches_end,amb_nmatches_start);
+ fprintf(fp,"\tXA:Z:%d,%d",amb_length_end,amb_length_start);
}
#else
if (start_ambig == true && end_ambig == true) {
@@ -2921,12 +3385,165 @@ print_halfdonor (FILE *fp, char *abbrev, Substring_T donor, Stage3end_T this, St
}
+static bool
+check_cigar_halfdonor (Substring_T donor, int querylength, int clipdir, int hardclip_low, int hardclip_high,
+ bool first_read_p, bool circularp) {
+ bool result;
+ Intlist_T cigar_types = NULL;
+ bool plusp, sensep;
+ bool use_hardclip_p = false;
+ int transloc_hardclip_low, transloc_hardclip_high;
+
+ plusp = Substring_plusp(donor);
+
+ sensep = Substring_chimera_sensep(donor);
+
+ if (use_hardclip_p == true) {
+ if (sensep == plusp) {
+ transloc_hardclip_low = 0;
+ if (plusp == true) {
+ /* sensep true */
+ transloc_hardclip_high = querylength - Substring_queryend(donor);
+
+ } else {
+ /* sensep false */
+ transloc_hardclip_high = Substring_querystart(donor);
+ }
+
+ } else { /* sensep != Substring_plusp(donor) */
+ transloc_hardclip_high = 0;
+ if (plusp == true) {
+ transloc_hardclip_low = Substring_querystart(donor);
+
+ } else {
+ transloc_hardclip_low = querylength - Substring_queryend(donor);
+ }
+ }
+
+ if (transloc_hardclip_low > hardclip_low) {
+ hardclip_low = transloc_hardclip_low;
+ }
+ if (transloc_hardclip_high > hardclip_high) {
+ hardclip_high = transloc_hardclip_high;
+ }
+ }
+
+
+ if (sensep == plusp) {
+ if (plusp == true) {
+ /* sensep true */
+ assert(Substring_chimera_pos(donor) == Substring_queryend(donor));
+ if (hide_soft_clips_p == true) {
+ cigar_types = compute_cigar_types_only(cigar_types,/*type*/'M',
+ Substring_querystart(donor) +
+ Substring_match_length(donor),
+ /*querypos*/0,querylength,hardclip_low,hardclip_high,
+ /*plusp*/true,/*lastp*/false);
+ cigar_types = compute_cigar_types_only(cigar_types,/*type*/use_hardclip_p ? 'H' : 'S',querylength - Substring_queryend(donor),
+ /*querypos*/Substring_queryend(donor),querylength,
+ hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/true);
+
+ } else {
+ cigar_types = compute_cigar_types_only(cigar_types,/*type*/'S',Substring_querystart(donor),
+ /*querypos*/0,querylength,hardclip_low,hardclip_high,
+ /*plusp*/true,/*lastp*/false);
+ cigar_types = compute_cigar_types_only(cigar_types,/*type*/'M',Substring_match_length(donor),
+ /*querypos*/Substring_querystart(donor),querylength,
+ hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/false);
+ cigar_types = compute_cigar_types_only(cigar_types,/*type*/use_hardclip_p ? 'H' : 'S',querylength - Substring_queryend(donor),
+ /*querypos*/Substring_queryend(donor),querylength,
+ hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/true);
+ }
+
+ } else {
+ /* sensep false */
+ assert(Substring_chimera_pos(donor) == Substring_querystart(donor));
+ if (hide_soft_clips_p == true) {
+ cigar_types = compute_cigar_types_only(cigar_types,/*type*/'M',
+ (querylength - Substring_queryend(donor)) +
+ Substring_match_length(donor),
+ /*querypos*/querylength,querylength,hardclip_low,hardclip_high,
+ /*plusp*/false,/*lastp*/false);
+ cigar_types = compute_cigar_types_only(cigar_types,/*type*/use_hardclip_p ? 'H' : 'S',Substring_querystart(donor),
+ /*querypos*/Substring_querystart(donor),querylength,
+ hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/true);
+
+ } else {
+ cigar_types = compute_cigar_types_only(cigar_types,/*type*/'S',querylength - Substring_queryend(donor),
+ /*querypos*/querylength,querylength,
+ hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/false);
+ cigar_types = compute_cigar_types_only(cigar_types,/*type*/'M',Substring_match_length(donor),
+ /*querypos*/Substring_queryend(donor),querylength,
+ hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/false);
+ cigar_types = compute_cigar_types_only(cigar_types,/*type*/use_hardclip_p ? 'H' : 'S',Substring_querystart(donor),
+ /*querypos*/Substring_querystart(donor),querylength,
+ hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/true);
+ }
+ }
+
+ } else { /* sensep != Substring_plusp(donor) */
+ if (plusp == true) {
+ assert(Substring_chimera_pos(donor) == Substring_querystart(donor));
+ if (hide_soft_clips_p == true) {
+ cigar_types = compute_cigar_types_only(cigar_types,/*type*/use_hardclip_p ? 'H' : 'S',Substring_querystart(donor),
+ /*querypos*/0,querylength,hardclip_low,hardclip_high,
+ /*plusp*/true,/*lastp*/false);
+ cigar_types = compute_cigar_types_only(cigar_types,/*type*/'M',
+ Substring_match_length(donor) +
+ (querylength - Substring_queryend(donor)),
+ /*querypos*/Substring_querystart(donor),querylength,
+ hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/true);
+ } else {
+ cigar_types = compute_cigar_types_only(cigar_types,/*type*/use_hardclip_p ? 'H' : 'S',Substring_querystart(donor),
+ /*querypos*/0,querylength,hardclip_low,hardclip_high,
+ /*plusp*/true,/*lastp*/false);
+ cigar_types = compute_cigar_types_only(cigar_types,/*type*/'M',Substring_match_length(donor),
+ /*querypos*/Substring_querystart(donor),querylength,
+ hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/false);
+ cigar_types = compute_cigar_types_only(cigar_types,/*type*/'S',querylength - Substring_queryend(donor),
+ /*querypos*/Substring_queryend(donor),querylength,
+ hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/true);
+ }
+
+ } else {
+ assert(Substring_chimera_pos(donor) == Substring_queryend(donor));
+ if (hide_soft_clips_p == true) {
+ cigar_types = compute_cigar_types_only(cigar_types,/*type*/use_hardclip_p ? 'H' : 'S',querylength - Substring_queryend(donor),
+ /*querypos*/querylength,querylength,
+ hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/false);
+ cigar_types = compute_cigar_types_only(cigar_types,/*type*/'M',
+ Substring_match_length(donor) +
+ Substring_querystart(donor),
+ /*querypos*/Substring_queryend(donor),querylength,
+ hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/true);
+ } else {
+ cigar_types = compute_cigar_types_only(cigar_types,/*type*/use_hardclip_p ? 'H' : 'S',querylength - Substring_queryend(donor),
+ /*querypos*/querylength,querylength,
+ hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/false);
+ cigar_types = compute_cigar_types_only(cigar_types,/*type*/'M',Substring_match_length(donor),
+ /*querypos*/Substring_queryend(donor),querylength,
+ hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/false);
+ cigar_types = compute_cigar_types_only(cigar_types,/*type*/'S',Substring_querystart(donor),
+ /*querypos*/Substring_querystart(donor),querylength,hardclip_low,hardclip_high,
+ /*plusp*/false,/*lastp*/true);
+ }
+ }
+ }
+
+ result = check_cigar_types(cigar_types);
+
+ Intlist_free(&cigar_types);
+ return result;
+}
+
+
+
static void
print_halfacceptor (FILE *fp, char *abbrev, Substring_T acceptor, Stage3end_T this, Stage3end_T mate,
char *acc1, char *acc2, int pathnum, int npaths, int absmq_score, int first_absmq, int second_absmq, int mapq_score,
Univ_IIT_T chromosome_iit, Shortread_T queryseq, int pairedlength,
Chrpos_T concordant_chrpos, Chrpos_T donor_chrpos, Chrpos_T acceptor_chrpos, Chrpos_T mate_chrpos,
- int clipdir, int hardclip5, int hardclip3, Resulttype_T resulttype, bool first_read_p, int npaths_mate,
+ int clipdir, int hardclip_low, int hardclip_high, Resulttype_T resulttype, bool first_read_p, int npaths_mate,
int quality_shift, char *sam_read_group_id, bool invertp, bool invert_mate_p,
bool use_hardclip_p, bool print_xt_p, char donor_strand, char acceptor_strand,
char *donor_chr, char *acceptor_chr, char donor1, char donor2, char acceptor2, char acceptor1,
@@ -2936,57 +3553,15 @@ print_halfacceptor (FILE *fp, char *abbrev, Substring_T acceptor, Stage3end_T th
bool sensep;
char *genomicfwd_refdiff, *genomicfwd_bothdiff, *genomicdir_refdiff, *genomicdir_bothdiff;
int substring_start, substring_length;
- int hardclip_low, hardclip_high;
- /* int mate_hardclip_low, mate_hardclip_high; */
int transloc_hardclip_low, transloc_hardclip_high;
bool plusp, printp;
bool start_ambig, end_ambig;
- int amb_nmatches_start, amb_nmatches_end;
+ int amb_length_start, amb_length_end;
querylength = Shortread_fulllength(queryseq);
plusp = Substring_plusp(acceptor);
- if (circularp == true) {
- /* clipdir should be +1 */
- if (1 || plusp == true) {
- hardclip_low = hardclip5;
- hardclip_high = hardclip3;
- /* mate_hardclip_low = 0; */
- /* mate_hardclip_high = 0; */
- } else {
- hardclip_low = hardclip3;
- hardclip_high = hardclip5;
- /* mate_hardclip_low = 0; */
- /* mate_hardclip_high = 0; */
- }
- } else {
- if (first_read_p == true) {
- if (clipdir >= 0) {
- hardclip_low = 0;
- hardclip_high = hardclip5;
- } else {
- hardclip_low = hardclip5;
- hardclip_high = 0;
- }
- /* mate_hardclip_low = hardclip3; */
- /* mate_hardclip_high = 0; */
- /* fprintf(stderr,"first read: hardclip_low = %d, hardclip_high = %d\n",hardclip_low,hardclip_high); */
- } else {
- if (clipdir >= 0) {
- hardclip_low = hardclip3;
- hardclip_high = 0;
- } else {
- hardclip_low = 0;
- hardclip_high = hardclip3;
- }
- /* mate_hardclip_low = 0; */
- /* mate_hardclip_high = hardclip5; */
- /* fprintf(stderr,"second read: hardclip_low = %d, hardclip_high = %d\n",hardclip_low,hardclip_high); */
- }
- }
-
-
/* 1. QNAME */
if (acc2 == NULL) {
fprintf(fp,"%s",acc1);
@@ -3002,7 +3577,7 @@ print_halfacceptor (FILE *fp, char *abbrev, Substring_T acceptor, Stage3end_T th
/* 3. RNAME: chr */
/* 4. POS: chrpos */
- print_chromosomal_pos(fp,Substring_chrnum(acceptor),acceptor_chrpos,chromosome_iit);
+ print_chromosomal_pos(fp,Substring_chrnum(acceptor),acceptor_chrpos,Substring_chrlength(acceptor),chromosome_iit);
/* 5. MAPQ: Mapping quality */
@@ -3147,7 +3722,8 @@ print_halfacceptor (FILE *fp, char *abbrev, Substring_T acceptor, Stage3end_T th
/* 7. MRNM: Mate chr */
/* 8. MPOS: Mate chrpos */
/* For anchor_chrnum, previously used Stage3end_chrnum(this), but this is 0 */
- print_mate_chromosomal_pos(fp,Stage3end_chrnum(mate),Stage3end_effective_chrnum(mate),mate_chrpos,
+ print_mate_chromosomal_pos(fp,Stage3end_chrnum(mate),Stage3end_effective_chrnum(mate),
+ mate_chrpos,Stage3end_chrlength(mate),
/*anchor_chrnum*/Substring_chrnum(acceptor),acceptor_chrpos,chromosome_iit);
@@ -3378,12 +3954,12 @@ print_halfacceptor (FILE *fp, char *abbrev, Substring_T acceptor, Stage3end_T th
if ((start_ambig = Stage3end_start_ambiguous_p(this)) == true ||
(end_ambig = Stage3end_end_ambiguous_p(this)) == true) {
#if 1
- amb_nmatches_start = Stage3end_amb_nmatches_start(this);
- amb_nmatches_end = Stage3end_amb_nmatches_end(this);
+ amb_length_start = Stage3end_amb_length_start(this);
+ amb_length_end = Stage3end_amb_length_end(this);
if (plusp == true) {
- fprintf(fp,"\tXA:Z:%d,%d",amb_nmatches_start,amb_nmatches_end);
+ fprintf(fp,"\tXA:Z:%d,%d",amb_length_start,amb_length_end);
} else {
- fprintf(fp,"\tXA:Z:%d,%d",amb_nmatches_end,amb_nmatches_start);
+ fprintf(fp,"\tXA:Z:%d,%d",amb_length_end,amb_length_start);
}
#else
if (start_ambig == true && end_ambig == true) {
@@ -3425,13 +4001,161 @@ print_halfacceptor (FILE *fp, char *abbrev, Substring_T acceptor, Stage3end_T th
}
+static bool
+check_cigar_halfacceptor (Substring_T acceptor, int querylength, int clipdir, int hardclip_low, int hardclip_high,
+ bool first_read_p, bool circularp) {
+ bool result;
+ Intlist_T cigar_types = NULL;
+ bool plusp, sensep;
+ bool use_hardclip_p = false;
+ int transloc_hardclip_low, transloc_hardclip_high;
+
+ plusp = Substring_plusp(acceptor);
+
+ sensep = Substring_chimera_sensep(acceptor);
+
+ if (use_hardclip_p == true) {
+ if (sensep != plusp) {
+ transloc_hardclip_low = 0;
+ if (plusp == true) {
+ /* sensep false */
+ transloc_hardclip_high = querylength - Substring_queryend(acceptor);
+
+ } else {
+ /* sensep true */
+ transloc_hardclip_high = Substring_querystart(acceptor);
+ }
+
+ } else { /* sensep == Substring_plusp(acceptor) */
+ transloc_hardclip_high = 0;
+ if (plusp == true) {
+ transloc_hardclip_low = Substring_querystart(acceptor);
+
+ } else {
+ transloc_hardclip_low = querylength - Substring_queryend(acceptor);
+ }
+ }
+
+ if (transloc_hardclip_low > hardclip_low) {
+ hardclip_low = transloc_hardclip_low;
+ }
+ if (transloc_hardclip_high > hardclip_high) {
+ hardclip_high = transloc_hardclip_high;
+ }
+ }
+
+ if (sensep != plusp) {
+ if (plusp == true) {
+ /* sensep false */
+ assert(Substring_chimera_pos(acceptor) == Substring_queryend(acceptor));
+ if (hide_soft_clips_p == true) {
+ cigar_types = compute_cigar_types_only(cigar_types,/*type*/'M',
+ Substring_querystart(acceptor) +
+ Substring_match_length(acceptor),
+ /*querypos*/0,querylength,hardclip_low,hardclip_high,
+ /*plusp*/true,/*lastp*/false);
+ cigar_types = compute_cigar_types_only(cigar_types,/*type*/use_hardclip_p ? 'H' : 'S',querylength - Substring_queryend(acceptor),
+ /*querypos*/Substring_queryend(acceptor),querylength,
+ hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/true);
+ } else {
+ cigar_types = compute_cigar_types_only(cigar_types,/*type*/'S',Substring_querystart(acceptor),
+ /*querypos*/0,querylength,hardclip_low,hardclip_high,
+ /*plusp*/true,/*lastp*/false);
+ cigar_types = compute_cigar_types_only(cigar_types,/*type*/'M',Substring_match_length(acceptor),
+ /*querypos*/Substring_querystart(acceptor),querylength,
+ hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/false);
+ cigar_types = compute_cigar_types_only(cigar_types,/*type*/use_hardclip_p ? 'H' : 'S',querylength - Substring_queryend(acceptor),
+ /*querypos*/Substring_queryend(acceptor),querylength,
+ hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/true);
+ }
+
+ } else {
+ /* sensep true */
+ assert(Substring_chimera_pos(acceptor) == Substring_querystart(acceptor));
+ if (hide_soft_clips_p == true) {
+ cigar_types = compute_cigar_types_only(cigar_types,/*type*/'M',
+ (querylength - Substring_queryend(acceptor)) +
+ Substring_match_length(acceptor),
+ /*querypos*/querylength,querylength,hardclip_low,hardclip_high,
+ /*plusp*/false,/*lastp*/false);
+ cigar_types = compute_cigar_types_only(cigar_types,/*type*/use_hardclip_p ? 'H' : 'S',Substring_querystart(acceptor),
+ /*querypos*/Substring_querystart(acceptor),querylength,hardclip_low,hardclip_high,
+ /*plusp*/false,/*lastp*/true);
+ } else {
+ cigar_types = compute_cigar_types_only(cigar_types,/*type*/'S',querylength - Substring_queryend(acceptor),
+ /*querypos*/querylength,querylength,
+ hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/false);
+ cigar_types = compute_cigar_types_only(cigar_types,/*type*/'M',Substring_match_length(acceptor),
+ /*querypos*/Substring_queryend(acceptor),querylength,
+ hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/false);
+ cigar_types = compute_cigar_types_only(cigar_types,/*type*/use_hardclip_p ? 'H' : 'S',Substring_querystart(acceptor),
+ /*querypos*/Substring_querystart(acceptor),querylength,hardclip_low,hardclip_high,
+ /*plusp*/false,/*lastp*/true);
+ }
+ }
+
+ } else { /* sensep == Substring_plusp(acceptor) */
+ if (plusp == true) {
+ assert(Substring_chimera_pos(acceptor) == Substring_querystart(acceptor));
+ if (hide_soft_clips_p == true) {
+ cigar_types = compute_cigar_types_only(cigar_types,/*type*/use_hardclip_p ? 'H' : 'S',Substring_querystart(acceptor),
+ /*querypos*/0,querylength,hardclip_low,hardclip_high,
+ /*plusp*/true,/*lastp*/false);
+ cigar_types = compute_cigar_types_only(cigar_types,/*type*/'M',
+ Substring_match_length(acceptor) +
+ (querylength - Substring_queryend(acceptor)),
+ /*querypos*/Substring_querystart(acceptor),querylength,
+ hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/true);
+ } else {
+ cigar_types = compute_cigar_types_only(cigar_types,/*type*/use_hardclip_p ? 'H' : 'S',Substring_querystart(acceptor),
+ /*querypos*/0,querylength,hardclip_low,hardclip_high,
+ /*plusp*/true,/*lastp*/false);
+ cigar_types = compute_cigar_types_only(cigar_types,/*type*/'M',Substring_match_length(acceptor),
+ /*querypos*/Substring_querystart(acceptor),querylength,
+ hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/false);
+ cigar_types = compute_cigar_types_only(cigar_types,/*type*/'S',querylength - Substring_queryend(acceptor),
+ /*querypos*/Substring_queryend(acceptor),querylength,
+ hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/true);
+ }
+
+ } else {
+ assert(Substring_chimera_pos(acceptor) == Substring_queryend(acceptor));
+ if (hide_soft_clips_p == true) {
+ cigar_types = compute_cigar_types_only(cigar_types,/*type*/use_hardclip_p ? 'H' : 'S',querylength - Substring_queryend(acceptor),
+ /*querypos*/querylength,querylength,
+ hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/false);
+ cigar_types = compute_cigar_types_only(cigar_types,/*type*/'M',
+ Substring_match_length(acceptor) +
+ Substring_querystart(acceptor),
+ /*querypos*/Substring_queryend(acceptor),querylength,
+ hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/true);
+ } else {
+ cigar_types = compute_cigar_types_only(cigar_types,/*type*/use_hardclip_p ? 'H' : 'S',querylength - Substring_queryend(acceptor),
+ /*querypos*/querylength,querylength,
+ hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/false);
+ cigar_types = compute_cigar_types_only(cigar_types,/*type*/'M',Substring_match_length(acceptor),
+ /*querypos*/Substring_queryend(acceptor),querylength,
+ hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/false);
+ cigar_types = compute_cigar_types_only(cigar_types,/*type*/'S',Substring_querystart(acceptor),
+ /*querypos*/Substring_querystart(acceptor),querylength,hardclip_low,hardclip_high,
+ /*plusp*/false,/*lastp*/true);
+ }
+ }
+ }
+
+ result = check_cigar_types(cigar_types);
+
+ Intlist_free(&cigar_types);
+ return result;
+}
+
static void
print_localsplice (FILE *fp, char *abbrev, Stage3end_T this, Stage3end_T mate,
char *acc1, char *acc2, int pathnum, int npaths,
int absmq_score, int first_absmq, int second_absmq, int mapq_score,
Univ_IIT_T chromosome_iit, Shortread_T queryseq, int pairedlength,
- Chrpos_T chrpos, Chrpos_T mate_chrpos, int clipdir, int hardclip5, int hardclip3,
+ Chrpos_T chrpos, Chrpos_T mate_chrpos, int clipdir, int hardclip_low, int hardclip_high,
Resulttype_T resulttype, bool first_read_p, int npaths_mate,
int quality_shift, char *sam_read_group_id, bool invertp, bool invert_mate_p,
bool circularp) {
@@ -3442,8 +4166,6 @@ print_localsplice (FILE *fp, char *abbrev, Stage3end_T this, Stage3end_T mate,
bool sensep;
char *genomicfwd_refdiff, *genomicfwd_bothdiff, *genomicdir_refdiff, *genomicdir_bothdiff;
int substring1_start, substring2_start, substring1_length, substring2_length, matchlength;
- int hardclip_low, hardclip_high;
- /* int mate_hardclip_low, mate_hardclip_high; */
bool plusp, printp;
querylength = Shortread_fulllength(queryseq);
@@ -3454,46 +4176,6 @@ print_localsplice (FILE *fp, char *abbrev, Stage3end_T this, Stage3end_T mate,
}
sensep = (sensedir == SENSE_FORWARD);
- if (circularp == true) {
- /* clipdir should be +1 */
- if (1 || plusp == true) {
- hardclip_low = hardclip5;
- hardclip_high = hardclip3;
- /* mate_hardclip_low = 0; */
- /* mate_hardclip_high = 0; */
- } else {
- hardclip_low = hardclip3;
- hardclip_high = hardclip5;
- /* mate_hardclip_low = 0; */
- /* mate_hardclip_high = 0; */
- }
- } else {
- if (first_read_p == true) {
- if (clipdir >= 0) {
- hardclip_low = 0;
- hardclip_high = hardclip5;
- } else {
- hardclip_low = hardclip5;
- hardclip_high = 0;
- }
- /* mate_hardclip_low = hardclip3; */
- /* mate_hardclip_high = 0; */
- /* fprintf(stderr,"first read: hardclip_low = %d, hardclip_high = %d\n",hardclip_low,hardclip_high); */
- } else {
- if (clipdir >= 0) {
- hardclip_low = hardclip3;
- hardclip_high = 0;
- } else {
- hardclip_low = 0;
- hardclip_high = hardclip3;
- }
- /* mate_hardclip_low = 0; */
- /* mate_hardclip_high = hardclip5; */
- /* fprintf(stderr,"second read: hardclip_low = %d, hardclip_high = %d\n",hardclip_low,hardclip_high); */
- }
- }
-
-
/* 1. QNAME */
if (acc2 == NULL) {
fprintf(fp,"%s",acc1);
@@ -3509,7 +4191,7 @@ print_localsplice (FILE *fp, char *abbrev, Stage3end_T this, Stage3end_T mate,
/* 3. RNAME: chr */
/* 4. POS: chrpos */
- print_chromosomal_pos(fp,Stage3end_chrnum(this),chrpos,chromosome_iit);
+ print_chromosomal_pos(fp,Stage3end_chrnum(this),chrpos,Stage3end_chrlength(this),chromosome_iit);
/* 5. MAPQ: Mapping quality */
@@ -3607,7 +4289,8 @@ print_localsplice (FILE *fp, char *abbrev, Stage3end_T this, Stage3end_T mate,
/* 7. MRNM: Mate chr */
/* 8. MPOS: Mate chrpos */
- print_mate_chromosomal_pos(fp,Stage3end_chrnum(mate),Stage3end_effective_chrnum(mate),mate_chrpos,
+ print_mate_chromosomal_pos(fp,Stage3end_chrnum(mate),Stage3end_effective_chrnum(mate),
+ mate_chrpos,Stage3end_chrlength(mate),
Stage3end_chrnum(this),chrpos,chromosome_iit);
@@ -3826,12 +4509,122 @@ print_localsplice (FILE *fp, char *abbrev, Stage3end_T this, Stage3end_T mate,
}
+static bool
+check_cigar_localsplice (Stage3end_T this, Stage3end_T mate, int querylength, int clipdir, int hardclip_low, int hardclip_high,
+ bool first_read_p, bool circularp) {
+ bool result;
+ Intlist_T cigar_types = NULL;
+ Substring_T substring1, substring2;
+ bool plusp, sensep;
+ int sensedir;
+
+ plusp = Stage3end_plusp(this);
+
+ if ((sensedir = Stage3end_sensedir(this)) == SENSE_NULL) {
+ sensedir = Stage3end_sensedir(mate);
+ }
+ sensep = (sensedir == SENSE_FORWARD);
+
+ if (sensep == plusp) {
+ substring1 = /* donor */ Stage3end_substring_donor(this);
+ substring2 = /* acceptor */ Stage3end_substring_acceptor(this);
+ } else {
+ substring1 = /* acceptor */ Stage3end_substring_acceptor(this);
+ substring2 = /* donor */ Stage3end_substring_donor(this);
+ }
+
+ if (plusp == true) {
+ if (hide_soft_clips_p == true) {
+ cigar_types = compute_cigar_types_only(cigar_types,/*type*/'M',
+ Substring_querystart(substring1) +
+ Substring_match_length(substring1),
+ /*querypos*/0,querylength,hardclip_low,hardclip_high,
+ /*plusp*/true,/*lastp*/false);
+ if (hardclip_low < Substring_queryend(substring1) &&
+ querylength - hardclip_high > Substring_querystart(substring2)) {
+ debug1(printf("\ncase 1: hardclip_low %d < queryend(substring1) %d && querylength %d - hardclip_high %d > querystart(substring2) %d\n",
+ hardclip_low,Substring_queryend(substring1),querylength,hardclip_high,Substring_querystart(substring2)));
+ cigar_types = Intlist_push(cigar_types,'N');
+ }
+ cigar_types = compute_cigar_types_only(cigar_types,/*type*/'M',
+ Substring_match_length(substring2) +
+ (querylength - Substring_queryend(substring2)),
+ /*querypos*/Substring_querystart(substring2),querylength,
+ hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/true);
+ } else {
+ cigar_types = compute_cigar_types_only(cigar_types,/*type*/'S',Substring_querystart(substring1),
+ /*querypos*/0,querylength,hardclip_low,hardclip_high,
+ /*plusp*/true,/*lastp*/false);
+ cigar_types = compute_cigar_types_only(cigar_types,/*type*/'M',Substring_match_length(substring1),
+ /*querypos*/Substring_querystart(substring1),querylength,
+ hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/false);
+ if (hardclip_low < Substring_queryend(substring1) &&
+ querylength - hardclip_high > Substring_querystart(substring2)) {
+ debug1(printf("\ncase 1: hardclip_low %d < queryend(substring1) %d && querylength %d - hardclip_high %d > querystart(substring2) %d\n",
+ hardclip_low,Substring_queryend(substring1),querylength,hardclip_high,Substring_querystart(substring2)));
+ cigar_types = Intlist_push(cigar_types,'N');
+ }
+ cigar_types = compute_cigar_types_only(cigar_types,/*type*/'M',Substring_match_length(substring2),
+ /*querypos*/Substring_querystart(substring2),querylength,
+ hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/false);
+ cigar_types = compute_cigar_types_only(cigar_types,/*type*/'S',querylength - Substring_queryend(substring2),
+ /*querypos*/Substring_queryend(substring2),querylength,
+ hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/true);
+ }
+
+ } else {
+ if (hide_soft_clips_p == true) {
+ cigar_types = compute_cigar_types_only(cigar_types,/*type*/'M',
+ (querylength - Substring_queryend(substring1)) +
+ Substring_match_length(substring1),
+ /*querypos*/querylength,querylength,hardclip_low,hardclip_high,
+ /*plusp*/false,/*lastp*/false);
+ if (querylength - hardclip_low > Substring_queryend(substring2) &&
+ hardclip_high < Substring_querystart(substring1)) {
+ debug1(printf("\ncase 2: querylength %d - hardclip_low %d > queryend(substring2) %d && hardclip_high %d < querystart(substring1) %d\n",
+ querylength,hardclip_low,Substring_queryend(substring2),hardclip_high,Substring_querystart(substring1)));
+ cigar_types = Intlist_push(cigar_types,'N');
+ }
+ cigar_types = compute_cigar_types_only(cigar_types,/*type*/'M',
+ Substring_match_length(substring2) +
+ Substring_querystart(substring2),
+ /*querypos*/Substring_querystart(substring1),querylength,
+ hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/true);
+ } else {
+ cigar_types = compute_cigar_types_only(cigar_types,/*type*/'S',querylength - Substring_queryend(substring1),
+ /*querypos*/querylength,querylength,
+ hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/false);
+ cigar_types = compute_cigar_types_only(cigar_types,/*type*/'M',Substring_match_length(substring1),
+ /*querypos*/Substring_queryend(substring1),querylength,
+ hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/false);
+ if (querylength - hardclip_low > Substring_queryend(substring2) &&
+ hardclip_high < Substring_querystart(substring1)) {
+ debug1(printf("\ncase 2: querylength %d - hardclip_low %d > queryend(substring2) %d && hardclip_high %d < querystart(substring1) %d\n",
+ querylength,hardclip_low,Substring_queryend(substring2),hardclip_high,Substring_querystart(substring1)));
+ cigar_types = Intlist_push(cigar_types,'N');
+ }
+ cigar_types = compute_cigar_types_only(cigar_types,/*type*/'M',Substring_match_length(substring2),
+ /*querypos*/Substring_querystart(substring1),querylength,
+ hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/false);
+ cigar_types = compute_cigar_types_only(cigar_types,/*type*/'S',Substring_querystart(substring2),
+ /*querypos*/Substring_querystart(substring2),querylength,hardclip_low,hardclip_high,
+ /*plusp*/false,/*lastp*/true);
+ }
+ }
+
+ result = check_cigar_types(cigar_types);
+
+ Intlist_free(&cigar_types);
+ return result;
+}
+
+
static void
print_shortexon (FILE *fp, char *abbrev, Stage3end_T shortexon, Stage3end_T mate,
char *acc1, char *acc2, int pathnum, int npaths,
int absmq_score, int first_absmq, int second_absmq, int mapq_score,
Univ_IIT_T chromosome_iit, Shortread_T queryseq, int pairedlength,
- Chrpos_T chrpos, Chrpos_T mate_chrpos, int clipdir, int hardclip5, int hardclip3,
+ Chrpos_T chrpos, Chrpos_T mate_chrpos, int clipdir, int hardclip_low, int hardclip_high,
Resulttype_T resulttype, bool first_read_p, int npaths_mate,
int quality_shift, char *sam_read_group_id, bool invertp, bool invert_mate_p,
bool circularp) {
@@ -3845,11 +4638,9 @@ print_shortexon (FILE *fp, char *abbrev, Stage3end_T shortexon, Stage3end_T mate
char *genomicfwd_refdiff, *genomicfwd_bothdiff, *genomicdir_refdiff, *genomicdir_bothdiff;
int substring1_start, substring2_start, substringM_start,
substring1_length, substring2_length, substringM_length, matchlength;
- int hardclip_low, hardclip_high;
- /* int mate_hardclip_low, mate_hardclip_high; */
bool plusp, printp;
bool start_ambig, end_ambig;
- int amb_nmatches_start, amb_nmatches_end;
+ int amb_length_start, amb_length_end;
querylength = Shortread_fulllength(queryseq);
@@ -3860,46 +4651,6 @@ print_shortexon (FILE *fp, char *abbrev, Stage3end_T shortexon, Stage3end_T mate
}
sensep = (sensedir == SENSE_FORWARD);
- if (circularp == true) {
- /* clipdir should be +1 */
- if (1 || plusp == true) {
- hardclip_low = hardclip5;
- hardclip_high = hardclip3;
- /* mate_hardclip_low = 0; */
- /* mate_hardclip_high = 0; */
- } else {
- hardclip_low = hardclip3;
- hardclip_high = hardclip5;
- /* mate_hardclip_low = 0; */
- /* mate_hardclip_high = 0; */
- }
- } else {
- if (first_read_p == true) {
- if (clipdir >= 0) {
- hardclip_low = 0;
- hardclip_high = hardclip5;
- } else {
- hardclip_low = hardclip5;
- hardclip_high = 0;
- }
- /* mate_hardclip_low = hardclip3; */
- /* mate_hardclip_high = 0; */
- /* fprintf(stderr,"first read: hardclip_low = %d, hardclip_high = %d\n",hardclip_low,hardclip_high); */
- } else {
- if (clipdir >= 0) {
- hardclip_low = hardclip3;
- hardclip_high = 0;
- } else {
- hardclip_low = 0;
- hardclip_high = hardclip3;
- }
- /* mate_hardclip_low = 0; */
- /* mate_hardclip_high = hardclip5; */
- /* fprintf(stderr,"second read: hardclip_low = %d, hardclip_high = %d\n",hardclip_low,hardclip_high); */
- }
- }
-
-
/* 1. QNAME */
if (acc2 == NULL) {
fprintf(fp,"%s",acc1);
@@ -3915,7 +4666,7 @@ print_shortexon (FILE *fp, char *abbrev, Stage3end_T shortexon, Stage3end_T mate
/* 3. RNAME: chr */
/* 4. POS: chrpos */
- print_chromosomal_pos(fp,Stage3end_chrnum(shortexon),chrpos,chromosome_iit);
+ print_chromosomal_pos(fp,Stage3end_chrnum(shortexon),chrpos,Stage3end_chrlength(shortexon),chromosome_iit);
/* 5. MAPQ: Mapping quality */
@@ -4062,7 +4813,8 @@ print_shortexon (FILE *fp, char *abbrev, Stage3end_T shortexon, Stage3end_T mate
/* 7. MRNM: Mate chr */
/* 8. MPOS: Mate chrpos */
- print_mate_chromosomal_pos(fp,Stage3end_chrnum(mate),Stage3end_effective_chrnum(mate),mate_chrpos,
+ print_mate_chromosomal_pos(fp,Stage3end_chrnum(mate),Stage3end_effective_chrnum(mate),
+ mate_chrpos,Stage3end_chrlength(mate),
Stage3end_chrnum(shortexon),chrpos,chromosome_iit);
@@ -4349,12 +5101,12 @@ print_shortexon (FILE *fp, char *abbrev, Stage3end_T shortexon, Stage3end_T mate
if ((start_ambig = Stage3end_start_ambiguous_p(shortexon)) == true ||
(end_ambig = Stage3end_end_ambiguous_p(shortexon)) == true) {
#if 1
- amb_nmatches_start = Stage3end_amb_nmatches_start(shortexon);
- amb_nmatches_end = Stage3end_amb_nmatches_end(shortexon);
+ amb_length_start = Stage3end_amb_length_start(shortexon);
+ amb_length_end = Stage3end_amb_length_end(shortexon);
if (plusp == true) {
- fprintf(fp,"\tXA:Z:%d,%d",amb_nmatches_start,amb_nmatches_end);
+ fprintf(fp,"\tXA:Z:%d,%d",amb_length_start,amb_length_end);
} else {
- fprintf(fp,"\tXA:Z:%d,%d",amb_nmatches_end,amb_nmatches_start);
+ fprintf(fp,"\tXA:Z:%d,%d",amb_length_end,amb_length_start);
}
#else
if (start_ambig == true && end_ambig == true) {
@@ -4390,6 +5142,161 @@ print_shortexon (FILE *fp, char *abbrev, Stage3end_T shortexon, Stage3end_T mate
}
+static bool
+check_cigar_shortexon (Stage3end_T shortexon, Stage3end_T mate, int querylength, int clipdir, int hardclip_low, int hardclip_high,
+ bool first_read_p, bool circularp) {
+ bool result;
+ Intlist_T cigar_types = NULL;
+ Substring_T substring1, substring2, substringM;
+ bool plusp, sensep;
+ int sensedir;
+
+ plusp = Stage3end_plusp(shortexon);
+
+ if ((sensedir = Stage3end_sensedir(shortexon)) == SENSE_NULL) {
+ sensedir = Stage3end_sensedir(mate);
+ }
+ sensep = (sensedir == SENSE_FORWARD);
+
+ substringM = Stage3end_substring1(shortexon);
+
+ if (sensep == plusp) {
+ substring1 = /* donor */ Stage3end_substringD(shortexon);
+ substring2 = /* acceptor */ Stage3end_substringA(shortexon);
+ } else {
+ substring1 = /* acceptor */ Stage3end_substringA(shortexon);
+ substring2 = /* donor */ Stage3end_substringD(shortexon);
+ }
+
+ if (substring1 == NULL) {
+ if (plusp == true) {
+ cigar_types = compute_cigar_types_only(cigar_types,/*type*/'S',Substring_querystart(substringM),
+ /*querypos*/0,querylength,hardclip_low,hardclip_high,
+ /*plusp*/true,/*lastp*/false);
+ } else {
+ cigar_types = compute_cigar_types_only(cigar_types,/*type*/'S',querylength - Substring_queryend(substringM),
+ /*querypos*/querylength,querylength,
+ hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/false);
+ }
+
+ } else if (plusp == true) {
+ if (hide_soft_clips_p == true) {
+ cigar_types = compute_cigar_types_only(cigar_types,/*type*/'M',
+ Substring_querystart(substring1) +
+ Substring_match_length(substring1),
+ /*querypos*/0,querylength,hardclip_low,hardclip_high,
+ /*plusp*/true,/*lastp*/false);
+ } else {
+ cigar_types = compute_cigar_types_only(cigar_types,/*type*/'S',Substring_querystart(substring1),
+ /*querypos*/0,querylength,hardclip_low,hardclip_high,
+ /*plusp*/true,/*lastp*/false);
+ cigar_types = compute_cigar_types_only(cigar_types,/*type*/'M',Substring_match_length(substring1),
+ /*querypos*/Substring_querystart(substring1),querylength,
+ hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/false);
+ }
+ if (hardclip_low < Substring_queryend(substring1) &&
+ querylength - hardclip_high > Substring_querystart(substringM)) {
+ debug1(printf("\ncase 3: hardclip_low %d < queryend(substring1) %d && querylength %d - hardclip_high %d > querystart(substringM) %d\n",
+ hardclip_low,Substring_queryend(substring1),querylength,hardclip_high,Substring_querystart(substringM)));
+ cigar_types = Intlist_push(cigar_types,'N');
+ }
+
+ } else {
+ if (hide_soft_clips_p == true) {
+ cigar_types = compute_cigar_types_only(cigar_types,/*type*/'M',
+ (querylength - Substring_queryend(substring1)) +
+ Substring_match_length(substring1),
+ /*querypos*/querylength,querylength,hardclip_low,hardclip_high,
+ /*plusp*/false,/*lastp*/false);
+ } else {
+ cigar_types = compute_cigar_types_only(cigar_types,/*type*/'S',querylength - Substring_queryend(substring1),
+ /*querypos*/querylength,querylength,
+ hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/false);
+ cigar_types = compute_cigar_types_only(cigar_types,/*type*/'M',Substring_match_length(substring1),
+ /*querypos*/Substring_queryend(substring1),querylength,
+ hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/false);
+ }
+ if (querylength - hardclip_low > Substring_queryend(substringM) &&
+ hardclip_high < Substring_querystart(substring1)) {
+ debug1(printf("\ncase 4: querylength %d - hardclip_low %d > queryend(substringM) %d && hardclip_high %d < querystart(substring1) %d\n",
+ querylength,hardclip_low,Substring_queryend(substringM),hardclip_high,Substring_querystart(substring1)));
+ cigar_types = Intlist_push(cigar_types,'N');
+ }
+ }
+
+ if (plusp == true) {
+ cigar_types = compute_cigar_types_only(cigar_types,/*type*/'M',Substring_match_length(substringM),
+ /*querypos*/Substring_querystart(substringM),querylength,
+ hardclip_low,hardclip_high,plusp,/*lastp*/false);
+ } else {
+ cigar_types = compute_cigar_types_only(cigar_types,/*type*/'M',Substring_match_length(substringM),
+ /*querypos*/Substring_queryend(substringM),querylength,
+ hardclip_low,hardclip_high,plusp,/*lastp*/false);
+ }
+
+ if (substring2 == NULL) {
+ if (plusp == true) {
+ cigar_types = compute_cigar_types_only(cigar_types,/*type*/'S',querylength - Substring_queryend(substringM),
+ /*querypos*/Substring_queryend(substringM),querylength,
+ hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/true);
+ } else {
+ cigar_types = compute_cigar_types_only(cigar_types,/*type*/'S',Substring_querystart(substringM),
+ /*querypos*/Substring_querystart(substringM),querylength,
+ hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/true);
+ }
+
+ } else if (plusp == true) {
+ if (hardclip_low < Substring_queryend(substringM) &&
+ querylength - hardclip_high > Substring_querystart(substring2)) {
+ debug1(printf("\ncase 5: hardclip_low %d < queryend(substringM) %d && querylength %d - hardclip_high %d > querystart(substring2) %d\n",
+ hardclip_low,Substring_queryend(substringM),querylength,hardclip_high,Substring_querystart(substring2)));
+ cigar_types = Intlist_push(cigar_types,'N');
+ }
+ if (hide_soft_clips_p == true) {
+ cigar_types = compute_cigar_types_only(cigar_types,/*type*/'M',
+ Substring_match_length(substring2) +
+ (querylength - Substring_queryend(substring2)),
+ /*querypos*/Substring_querystart(substring2),querylength,
+ hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/true);
+ } else {
+ cigar_types = compute_cigar_types_only(cigar_types,/*type*/'M',Substring_match_length(substring2),
+ /*querypos*/Substring_querystart(substring2),querylength,
+ hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/false);
+ cigar_types = compute_cigar_types_only(cigar_types,/*type*/'S',querylength - Substring_queryend(substring2),
+ /*querypos*/Substring_queryend(substring2),querylength,
+ hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/true);
+ }
+
+ } else {
+ if (querylength - hardclip_low > Substring_queryend(substring2) &&
+ hardclip_high < Substring_querystart(substringM)) {
+ debug1(printf("\ncase 6: querylength %d - hardclip_low %d > queryend(substring2) %d && hardclip_high %d < querystart(substringM) %d\n",
+ querylength,hardclip_low,Substring_queryend(substring2),querylength,Substring_querystart(substringM)));
+ cigar_types = Intlist_push(cigar_types,'N');
+ }
+ if (hide_soft_clips_p == true) {
+ cigar_types = compute_cigar_types_only(cigar_types,/*type*/'M',
+ Substring_match_length(substring2) +
+ Substring_querystart(substring2),
+ /*querypos*/Substring_queryend(substring2),querylength,
+ hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/true);
+ } else {
+ cigar_types = compute_cigar_types_only(cigar_types,/*type*/'M',Substring_match_length(substring2),
+ /*querypos*/Substring_queryend(substring2),querylength,
+ hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/false);
+ cigar_types = compute_cigar_types_only(cigar_types,/*type*/'S',Substring_querystart(substring2),
+ /*querypos*/Substring_querystart(substring2),querylength,hardclip_low,hardclip_high,
+ /*plusp*/false,/*lastp*/true);
+ }
+ }
+
+ result = check_cigar_types(cigar_types);
+
+ Intlist_free(&cigar_types);
+ return result;
+}
+
+
/* Distant splicing, including scramble, inversion, translocation */
static void
@@ -4397,12 +5304,11 @@ print_exon_exon (FILE *fp, char *abbrev, Stage3end_T this, Stage3end_T mate,
char *acc1, char *acc2, int pathnum, int npaths,
int absmq_score, int first_absmq, int second_absmq, int mapq_score,
Univ_IIT_T chromosome_iit, Shortread_T queryseq, int pairedlength,
- Chrpos_T mate_chrpos, int clipdir, int hardclip5, int hardclip3,
+ Chrpos_T mate_chrpos, int clipdir, int hardclip_low, int hardclip_high,
Resulttype_T resulttype, bool first_read_p, int npaths_mate,
int quality_shift, char *sam_read_group_id, bool invertp, bool invert_mate_p) {
Chrpos_T donor_chrpos, acceptor_chrpos, concordant_chrpos;
Substring_T donor, acceptor;
- int hardclip_low, hardclip_high;
char *donor_chr, *acceptor_chr;
char donor1, donor2, acceptor2, acceptor1;
double donor_prob, acceptor_prob;
@@ -4495,8 +5401,8 @@ print_exon_exon (FILE *fp, char *abbrev, Stage3end_T this, Stage3end_T mate,
absmq_score,first_absmq,second_absmq,mapq_score,
chromosome_iit,queryseq,pairedlength,
concordant_chrpos,donor_chrpos,acceptor_chrpos,mate_chrpos,
- /*clipdir*/+1,/*hardclip5*/0,/*hardclip3*/querylength-circularpos,resulttype,first_read_p,
- npaths_mate,quality_shift,sam_read_group_id,
+ /*clipdir*/+1,/*hardclip_low*/0,/*hardclip_high*/querylength-circularpos,
+ resulttype,first_read_p,npaths_mate,quality_shift,sam_read_group_id,
invertp,invert_mate_p,/*use_hardclip_p*/true,/*print_xt_p*/true,
donor_strand,acceptor_strand,donor_chr,acceptor_chr,
donor1,donor2,acceptor2,acceptor1,donor_prob,acceptor_prob,
@@ -4505,8 +5411,8 @@ print_exon_exon (FILE *fp, char *abbrev, Stage3end_T this, Stage3end_T mate,
absmq_score,first_absmq,second_absmq,mapq_score,
chromosome_iit,queryseq,pairedlength,
/*concordant_chrpos*/1,/*donor_chrpos*/1,acceptor_chrpos,mate_chrpos,
- /*clipdir*/+1,/*hardclip5*/circularpos,/*hardclip3*/0,resulttype,first_read_p,
- npaths_mate,quality_shift,sam_read_group_id,
+ /*clipdir*/+1,/*hardclip_low*/circularpos,/*hardclip_high*/0,
+ resulttype,first_read_p,npaths_mate,quality_shift,sam_read_group_id,
invertp,invert_mate_p,/*use_hardclip_p*/true,/*print_xt_p*/true,
donor_strand,acceptor_strand,donor_chr,acceptor_chr,
donor1,donor2,acceptor2,acceptor1,donor_prob,acceptor_prob,
@@ -4516,7 +5422,7 @@ print_exon_exon (FILE *fp, char *abbrev, Stage3end_T this, Stage3end_T mate,
absmq_score,first_absmq,second_absmq,mapq_score,
chromosome_iit,queryseq,pairedlength,
concordant_chrpos,donor_chrpos,acceptor_chrpos,mate_chrpos,
- clipdir,hardclip5,hardclip3,resulttype,first_read_p,
+ clipdir,hardclip_low,hardclip_high,resulttype,first_read_p,
npaths_mate,quality_shift,sam_read_group_id,
invertp,invert_mate_p,/*use_hardclip_p*/true,/*print_xt_p*/true,
donor_strand,acceptor_strand,donor_chr,acceptor_chr,
@@ -4529,7 +5435,7 @@ print_exon_exon (FILE *fp, char *abbrev, Stage3end_T this, Stage3end_T mate,
absmq_score,first_absmq,second_absmq,mapq_score,
chromosome_iit,queryseq,pairedlength,
concordant_chrpos,donor_chrpos,acceptor_chrpos,mate_chrpos,
- /*clipdir*/+1,/*hardclip5*/0,/*hardclip3*/querylength-circularpos,
+ /*clipdir*/+1,/*hardclip_low*/0,/*hardclip_high*/querylength-circularpos,
resulttype,first_read_p,npaths_mate,quality_shift,sam_read_group_id,
invertp,invert_mate_p,/*use_hardclip_p*/true,/*print_xt_p*/true,
donor_strand,acceptor_strand,donor_chr,acceptor_chr,
@@ -4539,7 +5445,7 @@ print_exon_exon (FILE *fp, char *abbrev, Stage3end_T this, Stage3end_T mate,
absmq_score,first_absmq,second_absmq,mapq_score,
chromosome_iit,queryseq,pairedlength,
/*concordant_chrpos*/1,donor_chrpos,/*acceptor_chrpos*/1,mate_chrpos,
- /*clipdir*/+1,/*hardclip5*/circularpos,/*hardclip3*/0,
+ /*clipdir*/+1,/*hardclip_low*/circularpos,/*hardclip_high*/0,
resulttype,first_read_p,npaths_mate,quality_shift,sam_read_group_id,
invertp,invert_mate_p,/*use_hardclip_p*/true,/*print_xt_p*/true,
donor_strand,acceptor_strand,donor_chr,acceptor_chr,
@@ -4550,7 +5456,7 @@ print_exon_exon (FILE *fp, char *abbrev, Stage3end_T this, Stage3end_T mate,
absmq_score,first_absmq,second_absmq,mapq_score,
chromosome_iit,queryseq,pairedlength,
concordant_chrpos,donor_chrpos,acceptor_chrpos,mate_chrpos,
- clipdir,hardclip5,hardclip3,resulttype,first_read_p,
+ clipdir,hardclip_low,hardclip_high,resulttype,first_read_p,
npaths_mate,quality_shift,sam_read_group_id,
invertp,invert_mate_p,/*use_hardclip_p*/true,/*print_xt_p*/true,
donor_strand,acceptor_strand,donor_chr,acceptor_chr,
@@ -4564,7 +5470,7 @@ print_exon_exon (FILE *fp, char *abbrev, Stage3end_T this, Stage3end_T mate,
absmq_score,first_absmq,second_absmq,mapq_score,
chromosome_iit,queryseq,pairedlength,
concordant_chrpos,donor_chrpos,acceptor_chrpos,mate_chrpos,
- /*clipdir*/+1,/*hardclip5*/0,/*hardclip3*/querylength-circularpos,
+ /*clipdir*/+1,/*hardclip_low*/0,/*hardclip_high*/querylength-circularpos,
resulttype,first_read_p,npaths_mate,quality_shift,sam_read_group_id,
invertp,invert_mate_p,/*use_hardclip_p*/true,/*print_xt_p*/true,
donor_strand,acceptor_strand,donor_chr,acceptor_chr,
@@ -4574,7 +5480,7 @@ print_exon_exon (FILE *fp, char *abbrev, Stage3end_T this, Stage3end_T mate,
absmq_score,first_absmq,second_absmq,mapq_score,
chromosome_iit,queryseq,pairedlength,
/*concordant_chrpos*/1,donor_chrpos,/*acceptor_chrpos*/1,mate_chrpos,
- /*clipdir*/+1,/*hardclip5*/circularpos,/*hardclip3*/0,
+ /*clipdir*/+1,/*hardclip_low*/circularpos,/*hardclip_high*/0,
resulttype,first_read_p,npaths_mate,quality_shift,sam_read_group_id,
invertp,invert_mate_p,/*use_hardclip_p*/true,/*print_xt_p*/true,
donor_strand,acceptor_strand,donor_chr,acceptor_chr,
@@ -4585,7 +5491,7 @@ print_exon_exon (FILE *fp, char *abbrev, Stage3end_T this, Stage3end_T mate,
absmq_score,first_absmq,second_absmq,mapq_score,
chromosome_iit,queryseq,pairedlength,
concordant_chrpos,donor_chrpos,acceptor_chrpos,mate_chrpos,
- clipdir,hardclip5,hardclip3,resulttype,first_read_p,
+ clipdir,hardclip_low,hardclip_high,resulttype,first_read_p,
npaths_mate,quality_shift,sam_read_group_id,
invertp,invert_mate_p,/*use_hardclip_p*/true,/*print_xt_p*/true,
donor_strand,acceptor_strand,donor_chr,acceptor_chr,
@@ -4598,7 +5504,7 @@ print_exon_exon (FILE *fp, char *abbrev, Stage3end_T this, Stage3end_T mate,
absmq_score,first_absmq,second_absmq,mapq_score,
chromosome_iit,queryseq,pairedlength,
concordant_chrpos,donor_chrpos,acceptor_chrpos,mate_chrpos,
- /*clipdir*/+1,/*hardclip5*/0,/*hardclip3*/querylength-circularpos,
+ /*clipdir*/+1,/*hardclip_low*/0,/*hardclip_high*/querylength-circularpos,
resulttype,first_read_p,npaths_mate,quality_shift,sam_read_group_id,
invertp,invert_mate_p,/*use_hardclip_p*/true,/*print_xt_p*/true,
donor_strand,acceptor_strand,donor_chr,acceptor_chr,
@@ -4608,7 +5514,7 @@ print_exon_exon (FILE *fp, char *abbrev, Stage3end_T this, Stage3end_T mate,
absmq_score,first_absmq,second_absmq,mapq_score,
chromosome_iit,queryseq,pairedlength,
/*concordant_chrpos*/1,/*donor_chrpos*/1,acceptor_chrpos,mate_chrpos,
- /*clipdir*/+1,/*hardclip5*/circularpos,/*hardclip3*/0,
+ /*clipdir*/+1,/*hardclip_low*/circularpos,/*hardclip_high*/0,
resulttype,first_read_p,npaths_mate,quality_shift,sam_read_group_id,
invertp,invert_mate_p,/*use_hardclip_p*/true,/*print_xt_p*/true,
donor_strand,acceptor_strand,donor_chr,acceptor_chr,
@@ -4619,7 +5525,7 @@ print_exon_exon (FILE *fp, char *abbrev, Stage3end_T this, Stage3end_T mate,
absmq_score,first_absmq,second_absmq,mapq_score,
chromosome_iit,queryseq,pairedlength,
concordant_chrpos,donor_chrpos,acceptor_chrpos,mate_chrpos,
- clipdir,hardclip5,hardclip3,resulttype,first_read_p,
+ clipdir,hardclip_low,hardclip_high,resulttype,first_read_p,
npaths_mate,quality_shift,sam_read_group_id,
invertp,invert_mate_p,/*use_hardclip_p*/true,/*print_xt_p*/true,
donor_strand,acceptor_strand,donor_chr,acceptor_chr,
@@ -4646,14 +5552,14 @@ SAM_print (FILE *fp, char *abbrev, Stage3end_T this, Stage3end_T mate,
char *acc1, char *acc2, int pathnum, int npaths,
int absmq_score, int first_absmq, int second_absmq, int mapq_score, Univ_IIT_T chromosome_iit, Shortread_T queryseq,
Shortread_T queryseq_mate, int pairedlength, Chrpos_T chrpos, Chrpos_T mate_chrpos,
- int clipdir, int hardclip5, int hardclip3, Resulttype_T resulttype, bool first_read_p,
+ int clipdir, int hardclip5_low, int hardclip5_high, int hardclip3_low, int hardclip3_high,
+ Resulttype_T resulttype, bool first_read_p,
int npaths_mate, int quality_shift, char *sam_read_group_id, bool invertp, bool invert_mate_p,
bool merge_samechr_p) {
Hittype_T hittype;
Substring_T donor, acceptor;
bool sensep, normalp;
unsigned int flag;
- int ignore = 0;
int circularpos, querylength;
char donor_strand, acceptor_strand;
int sensedir;
@@ -4683,8 +5589,12 @@ SAM_print (FILE *fp, char *abbrev, Stage3end_T this, Stage3end_T mate,
}
} else if (hittype == EXACT || hittype == SUB || hittype == TERMINAL) {
- if ((circularpos = Stage3end_circularpos(this)) > 0) {
- querylength = Shortread_fulllength(queryseq);
+ querylength = Shortread_fulllength(queryseq);
+ if ((circularpos = Stage3end_circularpos(this)) > 0 &&
+ check_cigar_single(hittype,this,querylength,/*clipdir*/+1,/*hardclip_low*/0,/*hardclip_high*/querylength-circularpos,
+ first_read_p,/*circularp*/true) == true &&
+ check_cigar_single(hittype,this,querylength,/*clipdir*/+1,/*hardclip_low*/circularpos,/*hardclip_high*/0,
+ first_read_p,/*circularp*/true) == true) {
#ifdef CHECK_ASSERTIONS
if (Stage3end_plusp(this) == true) {
assert(chrpos-Stage3end_trim_left(this)+circularpos-Stage3end_chrlength(this) == 1);
@@ -4694,60 +5604,86 @@ SAM_print (FILE *fp, char *abbrev, Stage3end_T this, Stage3end_T mate,
#endif
print_single(fp,abbrev,hittype,this,mate,acc1,acc2,pathnum,npaths,
absmq_score,first_absmq,second_absmq,mapq_score,chromosome_iit,queryseq,pairedlength,
- chrpos,mate_chrpos,/*clipdir*/+1,/*hardclip5*/0,/*hardclip3*/querylength-circularpos,resulttype,first_read_p,
- npaths_mate,quality_shift,sam_read_group_id,
+ chrpos,mate_chrpos,/*clipdir*/+1,/*hardclip_low*/0,/*hardclip_high*/querylength-circularpos,
+ resulttype,first_read_p,npaths_mate,quality_shift,sam_read_group_id,
invertp,invert_mate_p,/*circularp*/true);
print_single(fp,abbrev,hittype,this,mate,acc1,acc2,pathnum,npaths,
absmq_score,first_absmq,second_absmq,mapq_score,chromosome_iit,queryseq,pairedlength,
- /*chrpos*/1,mate_chrpos,/*clipdir*/+1,/*hardclip5*/circularpos,/*hardclip3*/0,resulttype,first_read_p,
- npaths_mate,quality_shift,sam_read_group_id,
+ /*chrpos*/1,mate_chrpos,/*clipdir*/+1,/*hardclip_low*/circularpos,/*hardclip_high*/0,
+ resulttype,first_read_p,npaths_mate,quality_shift,sam_read_group_id,
invertp,invert_mate_p,/*circularp*/true);
+ } else if (first_read_p == true) {
+ print_single(fp,abbrev,hittype,this,mate,acc1,acc2,pathnum,npaths,
+ absmq_score,first_absmq,second_absmq,mapq_score,chromosome_iit,queryseq,pairedlength,
+ chrpos,mate_chrpos,clipdir,hardclip5_low,hardclip5_high,resulttype,/*first_read_p*/true,
+ npaths_mate,quality_shift,sam_read_group_id,
+ invertp,invert_mate_p,/*circularp*/false);
} else {
print_single(fp,abbrev,hittype,this,mate,acc1,acc2,pathnum,npaths,
absmq_score,first_absmq,second_absmq,mapq_score,chromosome_iit,queryseq,pairedlength,
- chrpos,mate_chrpos,clipdir,hardclip5,hardclip3,resulttype,first_read_p,
+ chrpos,mate_chrpos,clipdir,hardclip3_low,hardclip3_high,resulttype,/*first_read_p*/false,
npaths_mate,quality_shift,sam_read_group_id,
invertp,invert_mate_p,/*circularp*/false);
}
} else if (hittype == INSERTION) {
- if ((circularpos = Stage3end_circularpos(this)) > 0) {
- querylength = Shortread_fulllength(queryseq);
+ querylength = Shortread_fulllength(queryseq);
+ if ((circularpos = Stage3end_circularpos(this)) > 0 &&
+ check_cigar_insertion(this,querylength,/*clipdir*/+1,/*hardclip_low*/0,/*hardclip_high*/querylength-circularpos,
+ first_read_p,/*circularp*/true) == true &&
+ check_cigar_insertion(this,querylength,/*clipdir*/+1,/*hardclip_low*/circularpos,/*hardclip_high*/0,
+ first_read_p,/*circularp*/true) == true) {
print_insertion(fp,abbrev,this,mate,acc1,acc2,pathnum,npaths,
absmq_score,first_absmq,second_absmq,mapq_score,chromosome_iit,queryseq,pairedlength,
- chrpos,mate_chrpos,/*clipdir*/+1,/*hardclip5*/0,/*hardclip3*/querylength-circularpos,
+ chrpos,mate_chrpos,/*clipdir*/+1,/*hardclip_low*/0,/*hardclip_high*/querylength-circularpos,
resulttype,first_read_p,npaths_mate,quality_shift,sam_read_group_id,
invertp,invert_mate_p,/*circularp*/true);
print_insertion(fp,abbrev,this,mate,acc1,acc2,pathnum,npaths,
absmq_score,first_absmq,second_absmq,mapq_score,chromosome_iit,queryseq,pairedlength,
- /*chrpos*/1,mate_chrpos,/*clipdir*/+1,/*hardclip5*/circularpos,/*hardclip3*/0,
+ /*chrpos*/1,mate_chrpos,/*clipdir*/+1,/*hardclip_low*/circularpos,/*hardclip_high*/0,
resulttype,first_read_p,npaths_mate,quality_shift,sam_read_group_id,
invertp,invert_mate_p,/*circularp*/true);
+ } else if (first_read_p == true) {
+ print_insertion(fp,abbrev,this,mate,acc1,acc2,pathnum,npaths,
+ absmq_score,first_absmq,second_absmq,mapq_score,chromosome_iit,queryseq,pairedlength,
+ chrpos,mate_chrpos,clipdir,hardclip5_low,hardclip5_high,resulttype,/*first_read_p*/true,
+ npaths_mate,quality_shift,sam_read_group_id,
+ invertp,invert_mate_p,/*circularp*/false);
} else {
print_insertion(fp,abbrev,this,mate,acc1,acc2,pathnum,npaths,
absmq_score,first_absmq,second_absmq,mapq_score,chromosome_iit,queryseq,pairedlength,
- chrpos,mate_chrpos,clipdir,hardclip5,hardclip3,resulttype,first_read_p,
+ chrpos,mate_chrpos,clipdir,hardclip3_low,hardclip3_high,resulttype,/*first_read_p*/false,
npaths_mate,quality_shift,sam_read_group_id,
invertp,invert_mate_p,/*circularp*/false);
}
} else if (hittype == DELETION) {
- if ((circularpos = Stage3end_circularpos(this)) > 0) {
- querylength = Shortread_fulllength(queryseq);
+ querylength = Shortread_fulllength(queryseq);
+ if ((circularpos = Stage3end_circularpos(this)) > 0 &&
+ check_cigar_deletion(this,querylength,/*clipdir*/+1,/*hardclip_low*/0,/*hardclip_high*/querylength-circularpos,
+ first_read_p,/*circularp*/true) == true &&
+ check_cigar_deletion(this,querylength,/*clipdir*/+1,/*hardclip_low*/circularpos,/*hardclip_high*/0,
+ first_read_p,/*circularp*/true) == true) {
print_deletion(fp,abbrev,this,mate,acc1,acc2,pathnum,npaths,
absmq_score,first_absmq,second_absmq,mapq_score,chromosome_iit,queryseq,pairedlength,
- chrpos,mate_chrpos,/*clipdir*/+1,/*hardclip5*/0,/*hardclip3*/querylength-circularpos,
+ chrpos,mate_chrpos,/*clipdir*/+1,/*hardclip_low*/0,/*hardclip_high*/querylength-circularpos,
resulttype,first_read_p,npaths_mate,quality_shift,sam_read_group_id,
invertp,invert_mate_p,/*circularp*/true);
print_deletion(fp,abbrev,this,mate,acc1,acc2,pathnum,npaths,
absmq_score,first_absmq,second_absmq,mapq_score,chromosome_iit,queryseq,pairedlength,
- /*chrpos*/1,mate_chrpos,/*clipdir*/+1,/*hardclip5*/circularpos,/*hardclip3*/0,
+ /*chrpos*/1,mate_chrpos,/*clipdir*/+1,/*hardclip_low*/circularpos,/*hardclip_high*/0,
resulttype,first_read_p,npaths_mate,quality_shift,sam_read_group_id,
invertp,invert_mate_p,/*circularp*/true);
+ } else if (first_read_p == true) {
+ print_deletion(fp,abbrev,this,mate,acc1,acc2,pathnum,npaths,
+ absmq_score,first_absmq,second_absmq,mapq_score,chromosome_iit,queryseq,pairedlength,
+ chrpos,mate_chrpos,clipdir,hardclip5_low,hardclip5_high,resulttype,/*first_read_p*/true,
+ npaths_mate,quality_shift,sam_read_group_id,
+ invertp,invert_mate_p,/*circularp*/false);
} else {
print_deletion(fp,abbrev,this,mate,acc1,acc2,pathnum,npaths,
absmq_score,first_absmq,second_absmq,mapq_score,chromosome_iit,queryseq,pairedlength,
- chrpos,mate_chrpos,clipdir,hardclip5,hardclip3,resulttype,first_read_p,
+ chrpos,mate_chrpos,clipdir,hardclip3_low,hardclip3_high,resulttype,/*first_read_p*/false,
npaths_mate,quality_shift,sam_read_group_id,
invertp,invert_mate_p,/*circularp*/false);
}
@@ -4774,12 +5710,16 @@ SAM_print (FILE *fp, char *abbrev, Stage3end_T this, Stage3end_T mate,
donor_strand = '?';
}
- if ((circularpos = Stage3end_circularpos(this)) > 0) {
- querylength = Shortread_fulllength(queryseq);
+ querylength = Shortread_fulllength(queryseq);
+ if ((circularpos = Stage3end_circularpos(this)) > 0 &&
+ check_cigar_halfdonor(donor,querylength,/*clipdir*/+1,/*hardclip_low*/0,/*hardclip_high*/querylength-circularpos,
+ first_read_p,/*circularp*/true) == true &&
+ check_cigar_halfdonor(donor,querylength,/*clipdir*/+1,/*hardclip_low*/circularpos,/*hardclip_high*/0,
+ first_read_p,/*circularp*/true) == true) {
print_halfdonor(fp,abbrev,donor,this,mate,acc1,acc2,pathnum,npaths,
absmq_score,first_absmq,second_absmq,mapq_score,chromosome_iit,queryseq,pairedlength,
/*concordant_chrpos*/chrpos,chrpos,/*acceptor_chrpos*/-1U,mate_chrpos,
- /*clipdir*/+1,/*hardclip5*/0,/*hardclip3*/querylength-circularpos,
+ /*clipdir*/+1,/*hardclip_low*/0,/*hardclip_high*/querylength-circularpos,
resulttype,first_read_p,npaths_mate,quality_shift,sam_read_group_id,
invertp,invert_mate_p,/*use_hardclip_p*/false,/*print_xt_p*/false,
donor_strand,/*acceptor_strand*/'\0',/*donor_chr*/NULL,/*acceptor_chr*/NULL,
@@ -4788,17 +5728,27 @@ SAM_print (FILE *fp, char *abbrev, Stage3end_T this, Stage3end_T mate,
print_halfdonor(fp,abbrev,donor,this,mate,acc1,acc2,pathnum,npaths,
absmq_score,first_absmq,second_absmq,mapq_score,chromosome_iit,queryseq,pairedlength,
/*concordant_chrpos*/1,/*chrpos*/1,/*acceptor_chrpos*/-1U,mate_chrpos,
- /*clipdir*/+1,/*hardclip5*/circularpos,/*hardclip3*/0,
+ /*clipdir*/+1,/*hardclip_low*/circularpos,/*hardclip_high*/0,
resulttype,first_read_p,npaths_mate,quality_shift,sam_read_group_id,
invertp,invert_mate_p,/*use_hardclip_p*/false,/*print_xt_p*/false,
donor_strand,/*acceptor_strand*/'\0',/*donor_chr*/NULL,/*acceptor_chr*/NULL,
/*donor1*/'X',/*donor2*/'X',/*acceptor2*/'X',/*acceptor1*/'X',
/*donor_prob*/0.0,/*acceptor_prob*/0.0,/*circularp*/true);
+ } else if (first_read_p == true) {
+ print_halfdonor(fp,abbrev,donor,this,mate,acc1,acc2,pathnum,npaths,
+ absmq_score,first_absmq,second_absmq,mapq_score,chromosome_iit,queryseq,pairedlength,
+ /*concordant_chrpos*/chrpos,chrpos,/*acceptor_chrpos*/-1U,mate_chrpos,
+ clipdir,hardclip5_low,hardclip5_high,resulttype,/*first_read_p*/true,
+ npaths_mate,quality_shift,sam_read_group_id,
+ invertp,invert_mate_p,/*use_hardclip_p*/false,/*print_xt_p*/false,
+ donor_strand,/*acceptor_strand*/'\0',/*donor_chr*/NULL,/*acceptor_chr*/NULL,
+ /*donor1*/'X',/*donor2*/'X',/*acceptor2*/'X',/*acceptor1*/'X',
+ /*donor_prob*/0.0,/*acceptor_prob*/0.0,/*circularp*/false);
} else {
print_halfdonor(fp,abbrev,donor,this,mate,acc1,acc2,pathnum,npaths,
absmq_score,first_absmq,second_absmq,mapq_score,chromosome_iit,queryseq,pairedlength,
/*concordant_chrpos*/chrpos,chrpos,/*acceptor_chrpos*/-1U,mate_chrpos,
- clipdir,hardclip5,hardclip3,resulttype,first_read_p,
+ clipdir,hardclip3_low,hardclip3_high,resulttype,/*first_read_p*/false,
npaths_mate,quality_shift,sam_read_group_id,
invertp,invert_mate_p,/*use_hardclip_p*/false,/*print_xt_p*/false,
donor_strand,/*acceptor_strand*/'\0',/*donor_chr*/NULL,/*acceptor_chr*/NULL,
@@ -4828,12 +5778,16 @@ SAM_print (FILE *fp, char *abbrev, Stage3end_T this, Stage3end_T mate,
acceptor_strand = '?';
}
- if ((circularpos = Stage3end_circularpos(this)) > 0) {
- querylength = Shortread_fulllength(queryseq);
+ querylength = Shortread_fulllength(queryseq);
+ if ((circularpos = Stage3end_circularpos(this)) > 0 &&
+ check_cigar_halfacceptor(acceptor,querylength,/*clipdir*/+1,/*hardclip_low*/0,/*hardclip_high*/querylength-circularpos,
+ first_read_p,/*circularp*/true) == true &&
+ check_cigar_halfacceptor(acceptor,querylength,/*clipdir*/+1,/*hardclip_low*/circularpos,/*hardclip_high*/0,
+ first_read_p,/*circularp*/true) == true) {
print_halfacceptor(fp,abbrev,acceptor,this,mate,acc1,acc2,pathnum,npaths,
absmq_score,first_absmq,second_absmq,mapq_score,chromosome_iit,queryseq,pairedlength,
/*concordant_chrpos*/chrpos,/*donor_chrpos*/-1U,chrpos,mate_chrpos,
- /*clipdir*/+1,/*hardclip5*/0,/*hardclip3*/querylength-circularpos,
+ /*clipdir*/+1,/*hardclip_low*/0,/*hardclip_high*/querylength-circularpos,
resulttype,first_read_p,npaths_mate,quality_shift,sam_read_group_id,
invertp,invert_mate_p,/*use_hardclip_p*/false,/*print_xt_p*/false,
/*donor_strand*/'\0',acceptor_strand,/*donor_chr*/NULL,/*acceptor_chr*/NULL,
@@ -4842,17 +5796,27 @@ SAM_print (FILE *fp, char *abbrev, Stage3end_T this, Stage3end_T mate,
print_halfacceptor(fp,abbrev,acceptor,this,mate,acc1,acc2,pathnum,npaths,
absmq_score,first_absmq,second_absmq,mapq_score,chromosome_iit,queryseq,pairedlength,
/*concordant_chrpos*/1,/*donor_chrpos*/-1U,/*chrpos*/1,mate_chrpos,
- /*clipdir*/+1,/*hardclip5*/circularpos,/*hardclip3*/0,
+ /*clipdir*/+1,/*hardclip_low*/circularpos,/*hardclip_high*/0,
resulttype,first_read_p,npaths_mate,quality_shift,sam_read_group_id,
invertp,invert_mate_p,/*use_hardclip_p*/false,/*print_xt_p*/false,
/*donor_strand*/'\0',acceptor_strand,/*donor_chr*/NULL,/*acceptor_chr*/NULL,
/*donor1*/'X',/*donor2*/'X',/*acceptor2*/'X',/*acceptor1*/'X',
/*donor_prob*/0.0,/*acceptor_prob*/0.0,/*circularp*/true);
+ } else if (first_read_p == true) {
+ print_halfacceptor(fp,abbrev,acceptor,this,mate,acc1,acc2,pathnum,npaths,
+ absmq_score,first_absmq,second_absmq,mapq_score,chromosome_iit,queryseq,pairedlength,
+ /*concordant_chrpos*/chrpos,/*donor_chrpos*/-1U,chrpos,mate_chrpos,
+ clipdir,hardclip5_low,hardclip5_high,resulttype,/*first_read_p*/true,
+ npaths_mate,quality_shift,sam_read_group_id,
+ invertp,invert_mate_p,/*use_hardclip_p*/false,/*print_xt_p*/false,
+ /*donor_strand*/'\0',acceptor_strand,/*donor_chr*/NULL,/*acceptor_chr*/NULL,
+ /*donor1*/'X',/*donor2*/'X',/*acceptor2*/'X',/*acceptor1*/'X',
+ /*donor_prob*/0.0,/*acceptor_prob*/0.0,/*circularp*/false);
} else {
print_halfacceptor(fp,abbrev,acceptor,this,mate,acc1,acc2,pathnum,npaths,
absmq_score,first_absmq,second_absmq,mapq_score,chromosome_iit,queryseq,pairedlength,
/*concordant_chrpos*/chrpos,/*donor_chrpos*/-1U,chrpos,mate_chrpos,
- clipdir,hardclip5,hardclip3,resulttype,first_read_p,
+ clipdir,hardclip3_low,hardclip3_high,resulttype,/*first_read_p*/false,
npaths_mate,quality_shift,sam_read_group_id,
invertp,invert_mate_p,/*use_hardclip_p*/false,/*print_xt_p*/false,
/*donor_strand*/'\0',acceptor_strand,/*donor_chr*/NULL,/*acceptor_chr*/NULL,
@@ -4870,11 +5834,19 @@ SAM_print (FILE *fp, char *abbrev, Stage3end_T this, Stage3end_T mate,
} else if (hittype == TRANSLOC_SPLICE || (hittype == SAMECHR_SPLICE && merge_samechr_p == false)) {
/* Stage3end_chrnum(this) == 0 || Stage3end_distance(this) == 0U */
/* distant splice */
- print_exon_exon(fp,abbrev,this,mate,acc1,acc2,pathnum,npaths,
- absmq_score,first_absmq,second_absmq,mapq_score,chromosome_iit,queryseq,pairedlength,
- mate_chrpos,clipdir,hardclip5,hardclip3,resulttype,first_read_p,
- npaths_mate,quality_shift,sam_read_group_id,
- invertp,invert_mate_p);
+ if (first_read_p == true) {
+ print_exon_exon(fp,abbrev,this,mate,acc1,acc2,pathnum,npaths,
+ absmq_score,first_absmq,second_absmq,mapq_score,chromosome_iit,queryseq,pairedlength,
+ mate_chrpos,clipdir,hardclip5_low,hardclip5_high,resulttype,/*first_read_p*/true,
+ npaths_mate,quality_shift,sam_read_group_id,
+ invertp,invert_mate_p);
+ } else {
+ print_exon_exon(fp,abbrev,this,mate,acc1,acc2,pathnum,npaths,
+ absmq_score,first_absmq,second_absmq,mapq_score,chromosome_iit,queryseq,pairedlength,
+ mate_chrpos,clipdir,hardclip3_low,hardclip3_high,resulttype,/*first_read_p*/false,
+ npaths_mate,quality_shift,sam_read_group_id,
+ invertp,invert_mate_p);
+ }
} else {
normalp = true;
sensep = (Stage3end_sensedir(this) == SENSE_FORWARD);
@@ -4908,52 +5880,81 @@ SAM_print (FILE *fp, char *abbrev, Stage3end_T this, Stage3end_T mate,
}
}
if (normalp == true) {
- if ((circularpos = Stage3end_circularpos(this)) > 0) {
- querylength = Shortread_fulllength(queryseq);
+ querylength = Shortread_fulllength(queryseq);
+ if ((circularpos = Stage3end_circularpos(this)) > 0 &&
+ check_cigar_localsplice(this,mate,querylength,/*clipdir*/+1,/*hardclip_low*/0,/*hardclip_high*/querylength-circularpos,
+ first_read_p,/*circularp*/true) == true &&
+ check_cigar_localsplice(this,mate,querylength,/*clipdir*/+1,/*hardclip_low*/circularpos,/*hardclip_high*/0,
+ first_read_p,/*circularp*/true) == true) {
print_localsplice(fp,abbrev,this,mate,acc1,acc2,pathnum,npaths,
absmq_score,first_absmq,second_absmq,mapq_score,chromosome_iit,queryseq,pairedlength,
- chrpos,mate_chrpos,/*clipdir*/+1,/*hardclip5*/0,/*hardclip3*/querylength-circularpos,
+ chrpos,mate_chrpos,/*clipdir*/+1,/*hardclip_low*/0,/*hardclip_high*/querylength-circularpos,
resulttype,first_read_p,npaths_mate,quality_shift,sam_read_group_id,
invertp,invert_mate_p,/*circularp*/true);
print_localsplice(fp,abbrev,this,mate,acc1,acc2,pathnum,npaths,
absmq_score,first_absmq,second_absmq,mapq_score,chromosome_iit,queryseq,pairedlength,
- /*chrpos*/1,mate_chrpos,/*clipdir*/+1,/*hardclip5*/circularpos,/*hardclip3*/0,
+ /*chrpos*/1,mate_chrpos,/*clipdir*/+1,/*hardclip_low*/circularpos,/*hardclip_high*/0,
resulttype,first_read_p,npaths_mate,quality_shift,sam_read_group_id,
invertp,invert_mate_p,/*circularp*/true);
+ } else if (first_read_p == true) {
+ print_localsplice(fp,abbrev,this,mate,acc1,acc2,pathnum,npaths,
+ absmq_score,first_absmq,second_absmq,mapq_score,chromosome_iit,queryseq,pairedlength,
+ chrpos,mate_chrpos,clipdir,hardclip5_low,hardclip5_high,
+ resulttype,/*first_read_p*/true,npaths_mate,quality_shift,sam_read_group_id,
+ invertp,invert_mate_p,/*circularp*/false);
} else {
print_localsplice(fp,abbrev,this,mate,acc1,acc2,pathnum,npaths,
absmq_score,first_absmq,second_absmq,mapq_score,chromosome_iit,queryseq,pairedlength,
- chrpos,mate_chrpos,clipdir,hardclip5,hardclip3,
- resulttype,first_read_p,npaths_mate,quality_shift,sam_read_group_id,
+ chrpos,mate_chrpos,clipdir,hardclip3_low,hardclip3_high,
+ resulttype,/*first_read_p*/false,npaths_mate,quality_shift,sam_read_group_id,
invertp,invert_mate_p,/*circularp*/false);
}
+
} else {
- print_exon_exon(fp,abbrev,this,mate,acc1,acc2,pathnum,npaths,
- absmq_score,first_absmq,second_absmq,mapq_score,chromosome_iit,queryseq,pairedlength,
- mate_chrpos,clipdir,hardclip5,hardclip3,
- resulttype,first_read_p,npaths_mate,quality_shift,sam_read_group_id,
- invertp,invert_mate_p);
+ if (first_read_p == true) {
+ print_exon_exon(fp,abbrev,this,mate,acc1,acc2,pathnum,npaths,
+ absmq_score,first_absmq,second_absmq,mapq_score,chromosome_iit,queryseq,pairedlength,
+ mate_chrpos,clipdir,hardclip5_low,hardclip5_high,
+ resulttype,/*first_read_p*/true,npaths_mate,quality_shift,sam_read_group_id,
+ invertp,invert_mate_p);
+ } else {
+ print_exon_exon(fp,abbrev,this,mate,acc1,acc2,pathnum,npaths,
+ absmq_score,first_absmq,second_absmq,mapq_score,chromosome_iit,queryseq,pairedlength,
+ mate_chrpos,clipdir,hardclip3_low,hardclip3_high,
+ resulttype,/*first_read_p*/false,npaths_mate,quality_shift,sam_read_group_id,
+ invertp,invert_mate_p);
+ }
}
}
} else if (hittype == ONE_THIRD_SHORTEXON || hittype == TWO_THIRDS_SHORTEXON || hittype == SHORTEXON) {
- if ((circularpos = Stage3end_circularpos(this)) > 0) {
- querylength = Shortread_fulllength(queryseq);
+ querylength = Shortread_fulllength(queryseq);
+ if ((circularpos = Stage3end_circularpos(this)) > 0 &&
+ check_cigar_shortexon(this,mate,querylength,/*clipdir*/+1,/*hardclip_low*/0,/*hardclip_high*/querylength-circularpos,
+ first_read_p,/*circularp*/true) == true &&
+ check_cigar_shortexon(this,mate,querylength,/*clipdir*/+1,/*hardclip_low*/circularpos,/*hardclip_high*/0,
+ first_read_p,/*circularp*/true) == true) {
print_shortexon(fp,abbrev,this,mate,acc1,acc2,pathnum,npaths,
absmq_score,first_absmq,second_absmq,mapq_score,chromosome_iit,queryseq,pairedlength,
- chrpos,mate_chrpos,/*clipdir*/+1,/*hardclip5*/0,/*hardclip3*/querylength-circularpos,
+ chrpos,mate_chrpos,/*clipdir*/+1,/*hardclip_low*/0,/*hardclip_high*/querylength-circularpos,
resulttype,first_read_p,npaths_mate,quality_shift,sam_read_group_id,
invertp,invert_mate_p,/*circularp*/true);
print_shortexon(fp,abbrev,this,mate,acc1,acc2,pathnum,npaths,
absmq_score,first_absmq,second_absmq,mapq_score,chromosome_iit,queryseq,pairedlength,
- /*chrpos*/1,mate_chrpos,/*clipdir*/+1,/*hardclip5*/circularpos,/*hardclip3*/0,
+ /*chrpos*/1,mate_chrpos,/*clipdir*/+1,/*hardclip_low*/circularpos,/*hardclip_high*/0,
resulttype,first_read_p,npaths_mate,quality_shift,sam_read_group_id,
invertp,invert_mate_p,/*circularp*/true);
+ } else if (first_read_p == true) {
+ print_shortexon(fp,abbrev,this,mate,acc1,acc2,pathnum,npaths,
+ absmq_score,first_absmq,second_absmq,mapq_score,chromosome_iit,queryseq,pairedlength,
+ chrpos,mate_chrpos,clipdir,hardclip5_low,hardclip5_high,
+ resulttype,/*first_read_p*/true,npaths_mate,quality_shift,sam_read_group_id,
+ invertp,invert_mate_p,/*circularp*/false);
} else {
print_shortexon(fp,abbrev,this,mate,acc1,acc2,pathnum,npaths,
absmq_score,first_absmq,second_absmq,mapq_score,chromosome_iit,queryseq,pairedlength,
- chrpos,mate_chrpos,clipdir,hardclip5,hardclip3,
- resulttype,first_read_p,npaths_mate,quality_shift,sam_read_group_id,
+ chrpos,mate_chrpos,clipdir,hardclip3_low,hardclip3_high,
+ resulttype,/*first_read_p*/false,npaths_mate,quality_shift,sam_read_group_id,
invertp,invert_mate_p,/*circularp*/false);
}
@@ -4961,77 +5962,82 @@ SAM_print (FILE *fp, char *abbrev, Stage3end_T this, Stage3end_T mate,
/* Note: sam_paired_p must be true because we are calling GMAP only on halfmapping uniq */
if (mate == NULL) {
- chrpos = SAM_compute_chrpos(/*hardclip_low*/0,/*hardclip_high*/hardclip5,
+ chrpos = SAM_compute_chrpos(/*hardclip_low*/0,/*hardclip_high*/hardclip5_high,
this,/*substring_low*/NULL,Shortread_fulllength(queryseq));
mate_chrpos = 0U;
- hardclip3 = 0;
+ hardclip3_low = hardclip3_high = 0;
} else if (first_read_p == true) {
- if (clipdir >= 0) {
- chrpos = SAM_compute_chrpos(/*hardclip_low*/0,/*hardclip_high*/hardclip5,
- this,/*substring_low*/NULL,Shortread_fulllength(queryseq));
- mate_chrpos = SAM_compute_chrpos(/*hardclip_low*/hardclip3,/*hardclip_high*/0,
- mate,Stage3end_substring_low(mate),Shortread_fulllength(queryseq_mate));
- } else {
- chrpos = SAM_compute_chrpos(/*hardclip_low*/hardclip5,/*hardclip_high*/0,
- this,/*substring_low*/NULL,Shortread_fulllength(queryseq));
- mate_chrpos = SAM_compute_chrpos(/*hardclip_low*/0,/*hardclip_high*/hardclip3,
- mate,Stage3end_substring_low(mate),Shortread_fulllength(queryseq_mate));
- }
+ chrpos = SAM_compute_chrpos(/*hardclip_low*/hardclip5_low,/*hardclip_high*/hardclip5_high,
+ this,/*substring_low*/NULL,Shortread_fulllength(queryseq));
+ mate_chrpos = SAM_compute_chrpos(/*hardclip_low*/hardclip3_low,/*hardclip_high*/hardclip3_high,
+ mate,Stage3end_substring_low(mate),Shortread_fulllength(queryseq_mate));
} else {
- if (clipdir >= 0) {
- chrpos = SAM_compute_chrpos(/*hardclip_low*/hardclip3,/*hardclip_high*/0,
- this,/*substring_low*/NULL,Shortread_fulllength(queryseq));
- mate_chrpos = SAM_compute_chrpos(/*hardclip_low*/0,/*hardclip_high*/hardclip5,
- mate,Stage3end_substring_low(mate),Shortread_fulllength(queryseq_mate));
- } else {
- chrpos = SAM_compute_chrpos(/*hardclip_low*/0,/*hardclip_high*/hardclip3,
- this,/*substring_low*/NULL,Shortread_fulllength(queryseq));
- mate_chrpos = SAM_compute_chrpos(/*hardclip_low*/hardclip5,/*hardclip_high*/0,
- mate,Stage3end_substring_low(mate),Shortread_fulllength(queryseq_mate));
- }
+ chrpos = SAM_compute_chrpos(/*hardclip_low*/hardclip3_low,/*hardclip_high*/hardclip3_high,
+ this,/*substring_low*/NULL,Shortread_fulllength(queryseq));
+ mate_chrpos = SAM_compute_chrpos(/*hardclip_low*/hardclip5_low,/*hardclip_high*/hardclip5_high,
+ mate,Stage3end_substring_low(mate),Shortread_fulllength(queryseq_mate));
}
- assert(ignore == 0);
flag = SAM_compute_flag(Stage3end_plusp(this),mate,resulttype,first_read_p,
pathnum,npaths,npaths_mate,absmq_score,first_absmq,
invertp,invert_mate_p);
- if ((circularpos = Stage3end_circularpos(this)) > 0) {
- querylength = Shortread_fulllength(queryseq);
+ querylength = Shortread_fulllength(queryseq);
+ if ((circularpos = Stage3end_circularpos(this)) > 0 &&
+ Pair_check_cigar(Stage3end_pairarray(this),Stage3end_npairs(this),querylength,
+ /*clipdir*/+1,/*hardclip_low*/0,/*hardclip_high*/querylength-circularpos,
+ /*watsonp*/Stage3end_plusp(this),Stage3end_cdna_direction(this),
+ first_read_p,/*circularp*/true) == true &&
+ Pair_check_cigar(Stage3end_pairarray(this),Stage3end_npairs(this),querylength,
+ /*clipdir*/+1,/*hardclip_low*/circularpos,/*hardclip_high*/0,
+ /*watsonp*/Stage3end_plusp(this),Stage3end_cdna_direction(this),
+ first_read_p,/*circularp*/true) == true) {
Pair_print_sam(fp,abbrev,Stage3end_pairarray(this),Stage3end_npairs(this),
acc1,acc2,Stage3end_chrnum(this),chromosome_iit,/*usersegment*/(Sequence_T) NULL,
Shortread_fullpointer(queryseq),Shortread_quality_string(queryseq),
- /*clipdir*/+1,/*hardclip5*/0,/*hardclip3*/querylength-circularpos,Shortread_fulllength(queryseq),
- Stage3end_plusp(this),Stage3end_cdna_direction(this),
+ /*clipdir*/+1,/*hardclip_low*/0,/*hardclip_high*/querylength-circularpos,Shortread_fulllength(queryseq),
+ /*watsonp*/Stage3end_plusp(this),Stage3end_cdna_direction(this),
/*chimera_part*/0,/*chimera*/NULL,quality_shift,first_read_p,
- pathnum,npaths,absmq_score,first_absmq,second_absmq,chrpos,
+ pathnum,npaths,absmq_score,first_absmq,second_absmq,chrpos,Stage3end_chrlength(this),
queryseq,resulttype,flag,/*pair_mapq_score*/mapq_score,/*end_mapq_score*/mapq_score,
- Stage3end_chrnum(mate),Stage3end_effective_chrnum(mate),mate_chrpos,
- /*mate_cdna_direction*/Stage3end_cdna_direction(mate),
+ Stage3end_chrnum(mate),Stage3end_effective_chrnum(mate),
+ mate_chrpos,Stage3end_chrlength(mate),/*mate_cdna_direction*/Stage3end_cdna_direction(mate),
pairedlength,sam_read_group_id,invertp,/*circularp*/true,/*merged_overlap_p*/false);
Pair_print_sam(fp,abbrev,Stage3end_pairarray(this),Stage3end_npairs(this),
acc1,acc2,Stage3end_chrnum(this),chromosome_iit,/*usersegment*/(Sequence_T) NULL,
Shortread_fullpointer(queryseq),Shortread_quality_string(queryseq),
- /*clipdir*/+1,/*hardclip5*/circularpos,/*hardclip3*/0,Shortread_fulllength(queryseq),
- Stage3end_plusp(this),Stage3end_cdna_direction(this),
+ /*clipdir*/+1,/*hardclip_low*/circularpos,/*hardclip_high*/0,Shortread_fulllength(queryseq),
+ /*watsonp*/Stage3end_plusp(this),Stage3end_cdna_direction(this),
/*chimera_part*/0,/*chimera*/NULL,quality_shift,first_read_p,
- pathnum,npaths,absmq_score,first_absmq,second_absmq,/*chrpos*/1,
+ pathnum,npaths,absmq_score,first_absmq,second_absmq,/*chrpos*/1,Stage3end_chrlength(this),
queryseq,resulttype,flag,/*pair_mapq_score*/mapq_score,/*end_mapq_score*/mapq_score,
- Stage3end_chrnum(mate),Stage3end_effective_chrnum(mate),mate_chrpos,
- /*mate_cdna_direction*/Stage3end_cdna_direction(mate),
+ Stage3end_chrnum(mate),Stage3end_effective_chrnum(mate),
+ mate_chrpos,Stage3end_chrlength(mate),/*mate_cdna_direction*/Stage3end_cdna_direction(mate),
pairedlength,sam_read_group_id,invertp,/*circularp*/true,/*merged_overlap_p*/false);
+ } else if (first_read_p == true) {
+ Pair_print_sam(fp,abbrev,Stage3end_pairarray(this),Stage3end_npairs(this),
+ acc1,acc2,Stage3end_chrnum(this),chromosome_iit,/*usersegment*/(Sequence_T) NULL,
+ Shortread_fullpointer(queryseq),Shortread_quality_string(queryseq),
+ clipdir,hardclip5_low,hardclip5_high,Shortread_fulllength(queryseq),
+ Stage3end_plusp(this),Stage3end_cdna_direction(this),
+ /*chimera_part*/0,/*chimera*/NULL,quality_shift,/*first_read_p*/true,
+ pathnum,npaths,absmq_score,first_absmq,second_absmq,chrpos,Stage3end_chrlength(this),
+ queryseq,resulttype,flag,/*pair_mapq_score*/mapq_score,/*end_mapq_score*/mapq_score,
+ Stage3end_chrnum(mate),Stage3end_effective_chrnum(mate),
+ mate_chrpos,Stage3end_chrlength(mate),/*mate_cdna_direction*/Stage3end_cdna_direction(mate),
+ pairedlength,sam_read_group_id,invertp,/*circularp*/false,/*merged_overlap_p*/false);
} else {
Pair_print_sam(fp,abbrev,Stage3end_pairarray(this),Stage3end_npairs(this),
acc1,acc2,Stage3end_chrnum(this),chromosome_iit,/*usersegment*/(Sequence_T) NULL,
Shortread_fullpointer(queryseq),Shortread_quality_string(queryseq),
- clipdir,hardclip5,hardclip3,Shortread_fulllength(queryseq),
+ clipdir,hardclip3_low,hardclip3_high,Shortread_fulllength(queryseq),
Stage3end_plusp(this),Stage3end_cdna_direction(this),
- /*chimera_part*/0,/*chimera*/NULL,quality_shift,first_read_p,
- pathnum,npaths,absmq_score,first_absmq,second_absmq,chrpos,
+ /*chimera_part*/0,/*chimera*/NULL,quality_shift,/*first_read_p*/false,
+ pathnum,npaths,absmq_score,first_absmq,second_absmq,chrpos,Stage3end_chrlength(this),
queryseq,resulttype,flag,/*pair_mapq_score*/mapq_score,/*end_mapq_score*/mapq_score,
- Stage3end_chrnum(mate),Stage3end_effective_chrnum(mate),mate_chrpos,
- /*mate_cdna_direction*/Stage3end_cdna_direction(mate),
+ Stage3end_chrnum(mate),Stage3end_effective_chrnum(mate),
+ mate_chrpos,Stage3end_chrlength(mate),/*mate_cdna_direction*/Stage3end_cdna_direction(mate),
pairedlength,sam_read_group_id,invertp,/*circularp*/false,/*merged_overlap_p*/false);
}
} else {
@@ -5054,7 +6060,7 @@ SAM_print_paired (Result_T result, Resulttype_T resulttype,
Chrpos_T chrpos, chrpos5, chrpos3;
int npaths, npaths1, npaths2, pathnum;
int first_absmq, second_absmq, first_absmq1, second_absmq1, first_absmq2, second_absmq2;
- int hardclip5 = 0, hardclip3 = 0, clipdir;
+ int hardclip5_low = 0, hardclip5_high = 0, hardclip3_low = 0, hardclip3_high = 0, clipdir;
char *acc1, *acc2;
Pairtype_T pairtype;
FILE *fp, *fp_xs;
@@ -5110,19 +6116,20 @@ SAM_print_paired (Result_T result, Resulttype_T resulttype,
if (Stage3pair_circularp(stage3pair) == true) {
/* Don't resolve overlaps on a circular alignment */
clipdir = 0;
- hardclip5 = hardclip3 = 0;
+ hardclip5_low = hardclip5_high = hardclip3_low = hardclip3_high = 0;
fp = fp_concordant_circular;
abbrev = ABBREV_CONCORDANT_CIRCULAR;
} else if (clip_overlap_p == false && merge_overlap_p == false) {
clipdir = 0;
- hardclip5 = hardclip3 = 0;
+ hardclip5_low = hardclip5_high = hardclip3_low = hardclip3_high = 0;
fp = fp_concordant_uniq;
abbrev = ABBREV_CONCORDANT_UNIQ;
} else {
- clipdir = Stage3pair_overlap(&hardclip5,&hardclip3,stage3pair);
- debug3(printf("clipdir %d with hardclip5 = %d, hardclip3 = %d\n",clipdir,hardclip5,hardclip3));
+ clipdir = Stage3pair_overlap(&hardclip5_low,&hardclip5_high,&hardclip3_low,&hardclip3_high,stage3pair);
+ debug3(printf("clipdir %d with hardclip5 = %d..%d, hardclip3 = %d..%d\n",
+ clipdir,hardclip5_low,hardclip5_high,hardclip3_low,hardclip3_high));
fp = fp_concordant_uniq;
abbrev = ABBREV_CONCORDANT_UNIQ;
}
@@ -5133,22 +6140,21 @@ SAM_print_paired (Result_T result, Resulttype_T resulttype,
chrpos3 = SAM_compute_chrpos(/*hardclip_low*/0,/*hardclip_high*/0,hit3,
Stage3end_substring_low(hit3),Shortread_fulllength(queryseq2));
- } else if (clipdir > 0) {
- debug3(printf("clipping %d on hit5 high and %d on hit3 low\n",hardclip5,hardclip3));
- adjust_hardclips(/*hardclip_low*/&hardclip3,/*hit_low*/hit3,/*low_querylength*/Shortread_fulllength(queryseq2),
- /*hardclip_high*/&hardclip5,/*hit_high*/hit5,/*high_querylength*/Shortread_fulllength(queryseq1));
- chrpos5 = SAM_compute_chrpos(/*hardclip_low*/0,/*hardclip_high*/hardclip5,hit5,
- Stage3end_substring_low(hit5),Shortread_fulllength(queryseq1));
- chrpos3 = SAM_compute_chrpos(/*hardclip_low*/hardclip3,/*hardclip_high*/0,hit3,
- Stage3end_substring_low(hit3),Shortread_fulllength(queryseq2));
+ } else if (hardclip3_high == 0 && hardclip5_low == 0) {
+ debug3(printf("clipping %d on hit5 high and %d on hit3 low\n",hardclip5_high,hardclip3_low));
+ adjust_hardclips(/*hardclip_low*/&hardclip3_low,/*hit_low*/hit3,/*low_querylength*/Shortread_fulllength(queryseq2),
+ /*hardclip_high*/&hardclip5_high,/*hit_high*/hit5,/*high_querylength*/Shortread_fulllength(queryseq1));
+ chrpos5 = SAM_compute_chrpos(hardclip5_low,hardclip5_high,hit5,Stage3end_substring_low(hit5),Shortread_fulllength(queryseq1));
+ chrpos3 = SAM_compute_chrpos(hardclip3_low,hardclip3_high,hit3,Stage3end_substring_low(hit3),Shortread_fulllength(queryseq2));
+ } else if (hardclip5_high == 0 && hardclip3_low == 0) {
+ debug3(printf("clipping %d on hit5 low and %d on hit3 high\n",hardclip5_low,hardclip3_high));
+ adjust_hardclips(/*hardclip_low*/&hardclip5_low,/*hit_low*/hit5,/*low_querylength*/Shortread_fulllength(queryseq1),
+ /*hardclip_high*/&hardclip3_high,/*hit_high*/hit3,/*high_querylength*/Shortread_fulllength(queryseq2));
+ chrpos5 = SAM_compute_chrpos(hardclip5_low,hardclip5_high,hit5,Stage3end_substring_low(hit5),Shortread_fulllength(queryseq1));
+ chrpos3 = SAM_compute_chrpos(hardclip3_low,hardclip3_high,hit3,Stage3end_substring_low(hit3),Shortread_fulllength(queryseq2));
} else {
- debug3(printf("clipping %d on hit5 low and %d on hit3 high\n",hardclip5,hardclip3));
- adjust_hardclips(/*hardclip_low*/&hardclip5,/*hit_low*/hit5,/*low_querylength*/Shortread_fulllength(queryseq1),
- /*hardclip_high*/&hardclip3,/*hit_high*/hit3,/*high_querylength*/Shortread_fulllength(queryseq2));
- chrpos5 = SAM_compute_chrpos(/*hardclip_low*/hardclip5,/*hardclip_high*/0,hit5,
- Stage3end_substring_low(hit5),Shortread_fulllength(queryseq1));
- chrpos3 = SAM_compute_chrpos(/*hardclip_low*/0,/*hardclip_high*/hardclip3,hit3,
- Stage3end_substring_low(hit3),Shortread_fulllength(queryseq2));
+ fprintf(stderr,"1 Problem: clipdir %d, but hardclip5 %d..%d and hardclip3 %d..%d\n",clipdir,hardclip5_low,hardclip5_high,hardclip3_low,hardclip3_high);
+ abort();
}
if (merge_overlap_p == false || clipdir == 0) {
@@ -5158,7 +6164,8 @@ SAM_print_paired (Result_T result, Resulttype_T resulttype,
Stage3pair_mapq_score(stage3pair),chromosome_iit,
/*queryseq*/queryseq1,/*queryseq_mate*/queryseq2,
Stage3pair_pairlength(stage3pair),/*chrpos*/chrpos5,/*mate_chrpos*/chrpos3,
- clipdir,hardclip5,hardclip3,resulttype,/*first_read_p*/true,/*npaths_mate*/npaths,
+ clipdir,hardclip5_low,hardclip5_high,hardclip3_low,hardclip3_high,
+ resulttype,/*first_read_p*/true,/*npaths_mate*/npaths,
quality_shift,sam_read_group_id,invert_first_p,invert_second_p,
merge_samechr_p);
@@ -5168,7 +6175,8 @@ SAM_print_paired (Result_T result, Resulttype_T resulttype,
Stage3pair_mapq_score(stage3pair),chromosome_iit,
/*queryseq*/queryseq2,/*queryseq_mate*/queryseq1,
Stage3pair_pairlength(stage3pair),/*chrpos*/chrpos3,/*mate_chrpos*/chrpos5,
- clipdir,hardclip5,hardclip3,resulttype,/*first_read_p*/false,/*npaths_mate*/npaths,
+ clipdir,hardclip5_low,hardclip5_high,hardclip3_low,hardclip3_high,
+ resulttype,/*first_read_p*/false,/*npaths_mate*/npaths,
quality_shift,sam_read_group_id,invert_second_p,invert_first_p,
merge_samechr_p);
@@ -5178,7 +6186,7 @@ SAM_print_paired (Result_T result, Resulttype_T resulttype,
stage3pair,queryseq1,queryseq2,
/*querylength5*/Shortread_fulllength(queryseq1),
/*querylength3*/Shortread_fulllength(queryseq2),
- clipdir,hardclip5,hardclip3);
+ clipdir,hardclip5_low,hardclip5_high,hardclip3_low,hardclip3_high);
/* printf("queryseq_merged: %s\n",queryseq_merged); */
if (clipdir >= 0) {
chrpos = chrpos5;
@@ -5193,7 +6201,7 @@ SAM_print_paired (Result_T result, Resulttype_T resulttype,
Pair_print_sam(fp_unpaired_uniq,/*abbrev*/ABBREV_UNPAIRED_UNIQ,pairarray,npairs,
acc1,/*acc2*/NULL,Stage3end_chrnum(hit5),chromosome_iit,/*usersegment*/(Sequence_T) NULL,
/*queryseq_ptr*/queryseq_merged,/*quality_string*/quality_merged,
- /*clipdir*/0,/*hardclip5*/0,/*hardclip3*/0,/*querylength*/querylength_merged,
+ /*clipdir*/0,/*hardclip_low*/0,/*hardclip_high*/0,/*querylength*/querylength_merged,
Stage3end_plusp(hit5),Stage3end_cdna_direction(hit5),
/*chimera_part*/0,/*chimera*/NULL,quality_shift,/*first_read_p*/true,
/*pathnum*/1,/*npaths*/1,
@@ -5202,9 +6210,9 @@ SAM_print_paired (Result_T result, Resulttype_T resulttype,
#else
/*absmq_score*/MAX_QUALITY_SCORE,/*first_absmq*/MAX_QUALITY_SCORE,/*second_absmq*/0,
#endif
- chrpos,/*queryseq*/NULL,resulttype,flag,
+ chrpos,Stage3end_chrlength(hit5),/*queryseq*/NULL,resulttype,flag,
/*pair_mapq_score*/MAX_QUALITY_SCORE,/*end_mapq_score*/MAX_QUALITY_SCORE,
- /*mate_chrnum*/0,/*mate_effective_chrnum*/0,/*mate_chrpos*/0,
+ /*mate_chrnum*/0,/*mate_effective_chrnum*/0,/*mate_chrpos*/0,/*mate_chrlength*/0,
/*mate_cdna_direction*/0,/*pairedlength*/0,
sam_read_group_id,/*invertp*/false,/*circularp*/false,/*merged_overlap_p*/true);
if (quality_merged != NULL) {
@@ -5247,15 +6255,16 @@ SAM_print_paired (Result_T result, Resulttype_T resulttype,
if (Stage3pair_circularp(stage3pair) == true) {
/* Don't resolve overlaps on a circular alignment */
clipdir = 0;
- hardclip5 = hardclip3 = 0;
+ hardclip5_low = hardclip5_high = hardclip3_low = hardclip3_high = 0;
} else if (clip_overlap_p == false) {
clipdir = 0;
- hardclip5 = hardclip3 = 0;
+ hardclip5_low = hardclip5_high = hardclip3_low = hardclip3_high = 0;
} else {
- clipdir = Stage3pair_overlap(&hardclip5,&hardclip3,stage3pair);
- debug3(printf("clipdir %d with hardclip5 = %d, hardclip3 = %d\n",clipdir,hardclip5,hardclip3));
+ clipdir = Stage3pair_overlap(&hardclip5_low,&hardclip5_high,&hardclip3_low,&hardclip3_high,stage3pair);
+ debug3(printf("clipdir %d with hardclip5 = %d..%d, hardclip3 = %d..%d\n",
+ clipdir,hardclip5_low,hardclip5_high,hardclip3_low,hardclip3_high));
}
if (clipdir == 0) {
@@ -5263,22 +6272,21 @@ SAM_print_paired (Result_T result, Resulttype_T resulttype,
Stage3end_substring_low(hit5),Shortread_fulllength(queryseq1));
chrpos3 = SAM_compute_chrpos(/*hardclip_low*/0,/*hardclip_high*/0,hit3,
Stage3end_substring_low(hit3),Shortread_fulllength(queryseq2));
- } else if (clipdir > 0) {
- debug3(printf("clipping %d on hit5 high and %d on hit3 low\n",hardclip5,hardclip3));
- adjust_hardclips(/*hardclip_low*/&hardclip3,/*hit_low*/hit3,/*low_querylength*/Shortread_fulllength(queryseq2),
- /*hardclip_high*/&hardclip5,/*hit_high*/hit5,/*high_querylength*/Shortread_fulllength(queryseq1));
- chrpos5 = SAM_compute_chrpos(/*hardclip_low*/0,/*hardclip_high*/hardclip5,hit5,
- Stage3end_substring_low(hit5),Shortread_fulllength(queryseq1));
- chrpos3 = SAM_compute_chrpos(/*hardclip_low*/hardclip3,/*hardclip_high*/0,hit3,
- Stage3end_substring_low(hit3),Shortread_fulllength(queryseq2));
+ } else if (hardclip3_high == 0 && hardclip5_low == 0) {
+ debug3(printf("clipping %d on hit5 high and %d on hit3 low\n",hardclip5_high,hardclip3_low));
+ adjust_hardclips(/*hardclip_low*/&hardclip3_low,/*hit_low*/hit3,/*low_querylength*/Shortread_fulllength(queryseq2),
+ /*hardclip_high*/&hardclip5_high,/*hit_high*/hit5,/*high_querylength*/Shortread_fulllength(queryseq1));
+ chrpos5 = SAM_compute_chrpos(hardclip5_low,hardclip5_high,hit5,Stage3end_substring_low(hit5),Shortread_fulllength(queryseq1));
+ chrpos3 = SAM_compute_chrpos(hardclip3_low,hardclip3_high,hit3,Stage3end_substring_low(hit3),Shortread_fulllength(queryseq2));
+ } else if (hardclip5_high == 0 && hardclip3_low == 0) {
+ debug3(printf("clipping %d on hit5 low and %d on hit3 high\n",hardclip5_low,hardclip3_high));
+ adjust_hardclips(/*hardclip_low*/&hardclip5_low,/*hit_low*/hit5,/*low_querylength*/Shortread_fulllength(queryseq1),
+ /*hardclip_high*/&hardclip3_high,/*hit_high*/hit3,/*high_querylength*/Shortread_fulllength(queryseq2));
+ chrpos5 = SAM_compute_chrpos(hardclip5_low,hardclip5_high,hit5,Stage3end_substring_low(hit5),Shortread_fulllength(queryseq1));
+ chrpos3 = SAM_compute_chrpos(hardclip3_low,hardclip3_high,hit3,Stage3end_substring_low(hit3),Shortread_fulllength(queryseq2));
} else {
- debug3(printf("clipping %d on hit5 low and %d on hit3 high\n",hardclip5,hardclip3));
- adjust_hardclips(/*hardclip_low*/&hardclip5,/*hit_low*/hit5,/*low_querylength*/Shortread_fulllength(queryseq1),
- /*hardclip_high*/&hardclip3,/*hit_high*/hit3,/*high_querylength*/Shortread_fulllength(queryseq2));
- chrpos5 = SAM_compute_chrpos(/*hardclip_low*/hardclip5,/*hardclip_high*/0,hit5,
- Stage3end_substring_low(hit5),Shortread_fulllength(queryseq1));
- chrpos3 = SAM_compute_chrpos(/*hardclip_low*/0,/*hardclip_high*/hardclip3,hit3,
- Stage3end_substring_low(hit3),Shortread_fulllength(queryseq2));
+ fprintf(stderr,"2 Problem: clipdir %d, but hardclip5 %d..%d and hardclip3 %d..%d\n",clipdir,hardclip5_low,hardclip5_high,hardclip3_low,hardclip3_high);
+ abort();
}
/* print first end */
@@ -5288,7 +6296,8 @@ SAM_print_paired (Result_T result, Resulttype_T resulttype,
Stage3pair_mapq_score(stage3pair),chromosome_iit,
/*queryseq*/queryseq1,/*queryseq_mate*/queryseq2,
Stage3pair_pairlength(stage3pair),/*chrpos*/chrpos5,/*mate_chrpos*/chrpos3,
- clipdir,hardclip5,hardclip3,resulttype,/*first_read_p*/true,/*npaths_mate*/npaths,
+ clipdir,hardclip5_low,hardclip5_high,hardclip3_low,hardclip3_high,
+ resulttype,/*first_read_p*/true,/*npaths_mate*/npaths,
quality_shift,sam_read_group_id,invert_first_p,invert_second_p,
merge_samechr_p);
@@ -5299,7 +6308,8 @@ SAM_print_paired (Result_T result, Resulttype_T resulttype,
Stage3pair_mapq_score(stage3pair),chromosome_iit,
/*queryseq*/queryseq2,/*queryseq_mate*/queryseq1,
Stage3pair_pairlength(stage3pair),/*chrpos*/chrpos3,/*mate_chrpos*/chrpos5,
- clipdir,hardclip5,hardclip3,resulttype,/*first_read_p*/false,/*npaths_mate*/npaths,
+ clipdir,hardclip5_low,hardclip5_high,hardclip3_low,hardclip3_high,
+ resulttype,/*first_read_p*/false,/*npaths_mate*/npaths,
quality_shift,sam_read_group_id,invert_second_p,invert_first_p,
merge_samechr_p);
}
@@ -5343,15 +6353,16 @@ SAM_print_paired (Result_T result, Resulttype_T resulttype,
if (Stage3pair_circularp(stage3pair) == true) {
/* Don't resolve overlaps on a circular alignment */
clipdir = 0;
- hardclip5 = hardclip3 = 0;
+ hardclip5_low = hardclip5_high = hardclip3_low = hardclip3_high = 0;
} else if (clip_overlap_p == false && merge_overlap_p == false) {
clipdir = 0;
- hardclip5 = hardclip3 = 0;
+ hardclip5_low = hardclip5_high = hardclip3_low = hardclip3_high = 0;
} else {
- clipdir = Stage3pair_overlap(&hardclip5,&hardclip3,stage3pair);
- debug3(printf("clipdir %d with hardclip5 = %d, hardclip3 = %d\n",clipdir,hardclip5,hardclip3));
+ clipdir = Stage3pair_overlap(&hardclip5_low,&hardclip5_high,&hardclip3_low,&hardclip3_high,stage3pair);
+ debug3(printf("clipdir %d with hardclip5 = %d..%d, hardclip3 = %d..%d\n",
+ clipdir,hardclip5_low,hardclip5_high,hardclip3_low,hardclip3_high));
}
if (clipdir == 0) {
@@ -5359,20 +6370,25 @@ SAM_print_paired (Result_T result, Resulttype_T resulttype,
Stage3end_substring_low(hit5),Shortread_fulllength(queryseq1));
chrpos3 = SAM_compute_chrpos(/*hardclip_low*/0,/*hardclip_high*/0,hit3,
Stage3end_substring_low(hit3),Shortread_fulllength(queryseq2));
- } else if (clipdir > 0) {
- adjust_hardclips(/*hardclip_low*/&hardclip3,/*hit_low*/hit3,/*low_querylength*/Shortread_fulllength(queryseq2),
- /*hardclip_high*/&hardclip5,/*hit_high*/hit5,/*high_querylength*/Shortread_fulllength(queryseq1));
- chrpos5 = SAM_compute_chrpos(/*hardclip_low*/0,/*hardclip_high*/hardclip5,hit5,
+ } else if (hardclip3_high == 0 && hardclip5_low == 0) {
+ debug3(printf("clipping %d on hit5 high and %d on hit3 low\n",hardclip5_high,hardclip3_low));
+ adjust_hardclips(/*hardclip_low*/&hardclip3_low,/*hit_low*/hit3,/*low_querylength*/Shortread_fulllength(queryseq2),
+ /*hardclip_high*/&hardclip5_high,/*hit_high*/hit5,/*high_querylength*/Shortread_fulllength(queryseq1));
+ chrpos5 = SAM_compute_chrpos(/*hardclip_low*/0,/*hardclip_high*/hardclip5_high,hit5,
Stage3end_substring_low(hit5),Shortread_fulllength(queryseq1));
- chrpos3 = SAM_compute_chrpos(/*hardclip_low*/hardclip3,/*hardclip_high*/0,hit3,
+ chrpos3 = SAM_compute_chrpos(/*hardclip_low*/hardclip3_low,/*hardclip_high*/0,hit3,
Stage3end_substring_low(hit3),Shortread_fulllength(queryseq2));
- } else {
- adjust_hardclips(/*hardclip_low*/&hardclip5,/*hit_low*/hit5,/*low_querylength*/Shortread_fulllength(queryseq1),
- /*hardclip_high*/&hardclip3,/*hit_high*/hit3,/*high_querylength*/Shortread_fulllength(queryseq2));
- chrpos5 = SAM_compute_chrpos(/*hardclip_low*/hardclip5,/*hardclip_high*/0,hit5,
+ } else if (hardclip5_high == 0 && hardclip3_low == 0) {
+ debug3(printf("clipping %d on hit5 low and %d on hit3 high\n",hardclip5_low,hardclip3_high));
+ adjust_hardclips(/*hardclip_low*/&hardclip5_low,/*hit_low*/hit5,/*low_querylength*/Shortread_fulllength(queryseq1),
+ /*hardclip_high*/&hardclip3_high,/*hit_high*/hit3,/*high_querylength*/Shortread_fulllength(queryseq2));
+ chrpos5 = SAM_compute_chrpos(/*hardclip_low*/hardclip5_low,/*hardclip_high*/0,hit5,
Stage3end_substring_low(hit5),Shortread_fulllength(queryseq1));
- chrpos3 = SAM_compute_chrpos(/*hardclip_low*/0,/*hardclip_high*/hardclip3,hit3,
+ chrpos3 = SAM_compute_chrpos(/*hardclip_low*/0,/*hardclip_high*/hardclip3_high,hit3,
Stage3end_substring_low(hit3),Shortread_fulllength(queryseq2));
+ } else {
+ fprintf(stderr,"3 Problem: clipdir %d, but hardclip5 %d..%d and hardclip3 %d..%d\n",clipdir,hardclip5_low,hardclip5_high,hardclip3_low,hardclip3_high);
+ abort();
}
if (merge_overlap_p == false || clipdir == 0) {
@@ -5383,7 +6399,8 @@ SAM_print_paired (Result_T result, Resulttype_T resulttype,
Stage3pair_mapq_score(stage3pair),chromosome_iit,
/*queryseq*/queryseq1,/*queryseq_mate*/queryseq2,
Stage3pair_pairlength(stage3pair),/*chrpos*/chrpos5,/*mate_chrpos*/chrpos3,
- clipdir,hardclip5,hardclip3,resulttype,/*first_read_p*/true,/*npaths_mate*/npaths,
+ clipdir,hardclip5_low,hardclip5_high,hardclip3_low,hardclip3_high,
+ resulttype,/*first_read_p*/true,/*npaths_mate*/npaths,
quality_shift,sam_read_group_id,invert_first_p,invert_second_p,
merge_samechr_p);
@@ -5394,7 +6411,8 @@ SAM_print_paired (Result_T result, Resulttype_T resulttype,
Stage3pair_mapq_score(stage3pair),chromosome_iit,
/*queryseq*/queryseq2,/*queryseq_mate*/queryseq1,
Stage3pair_pairlength(stage3pair),/*chrpos*/chrpos3,/*mate_chrpos*/chrpos5,
- clipdir,hardclip5,hardclip3,resulttype,/*first_read_p*/false,/*npaths_mate*/npaths,
+ clipdir,hardclip5_low,hardclip5_high,hardclip3_low,hardclip3_high,
+ resulttype,/*first_read_p*/false,/*npaths_mate*/npaths,
quality_shift,sam_read_group_id,invert_second_p,invert_first_p,
merge_samechr_p);
@@ -5404,7 +6422,7 @@ SAM_print_paired (Result_T result, Resulttype_T resulttype,
stage3pair,queryseq1,queryseq2,
/*querylength5*/Shortread_fulllength(queryseq1),
/*querylength3*/Shortread_fulllength(queryseq2),
- clipdir,hardclip5,hardclip3);
+ clipdir,hardclip5_low,hardclip5_high,hardclip3_low,hardclip3_high);
/* printf("queryseq_merged: %s\n",queryseq_merged); */
if (clipdir >= 0) {
chrpos = chrpos5;
@@ -5420,7 +6438,7 @@ SAM_print_paired (Result_T result, Resulttype_T resulttype,
acc1,/*acc2*/NULL,Stage3end_chrnum(hit5),chromosome_iit,
/*usersegment*/(Sequence_T) NULL,
/*queryseq_ptr*/queryseq_merged,/*quality_string*/quality_merged,
- /*clipdir*/0,/*hardclip5*/0,/*hardclip3*/0,/*querylength*/querylength_merged,
+ /*clipdir*/0,/*hardclip_low*/0,/*hardclip_high*/0,/*querylength*/querylength_merged,
Stage3end_plusp(hit5),Stage3end_cdna_direction(hit5),
/*chimera_part*/0,/*chimera*/NULL,quality_shift,/*first_read_p*/true,pathnum,npaths,
#if 0
@@ -5428,9 +6446,9 @@ SAM_print_paired (Result_T result, Resulttype_T resulttype,
#else
/*absmq_score*/MAX_QUALITY_SCORE,/*first_absmq*/MAX_QUALITY_SCORE,/*second_absmq*/0,
#endif
- chrpos,/*queryseq*/NULL,resulttype,flag,
+ chrpos,Stage3end_chrlength(hit5),/*queryseq*/NULL,resulttype,flag,
/*pair_mapq_score*/MAX_QUALITY_SCORE,/*end_mapq_score*/MAX_QUALITY_SCORE,
- /*mate_chrnum*/0,/*mate_effective_chrnum*/0,/*mate_chrpos*/0,
+ /*mate_chrnum*/0,/*mate_effective_chrnum*/0,/*mate_chrpos*/0,/*mate_chrlength*/0,
/*mate_cdna_direction*/0,/*pairedlength*/0,
sam_read_group_id,/*invertp*/false,/*circularp*/false,/*merged_overlap_p*/true);
if (quality_merged != NULL) {
@@ -5464,13 +6482,13 @@ SAM_print_paired (Result_T result, Resulttype_T resulttype,
abort();
}
- hardclip5 = hardclip3 = 0;
+ hardclip5_low = hardclip5_high = hardclip3_low = hardclip3_high = 0;
hit5 = Stage3pair_hit5(stage3pair);
hit3 = Stage3pair_hit3(stage3pair);
- chrpos5 = SAM_compute_chrpos(/*hardclip_low*/0,/*hardclip_high*/hardclip5,hit5,
+ chrpos5 = SAM_compute_chrpos(/*hardclip_low*/0,/*hardclip_high*/hardclip5_high,hit5,
Stage3end_substring_low(hit5),Shortread_fulllength(queryseq1));
- chrpos3 = SAM_compute_chrpos(/*hardclip_low*/hardclip3,/*hardclip_high*/0,hit3,
+ chrpos3 = SAM_compute_chrpos(/*hardclip_low*/hardclip3_low,/*hardclip_high*/0,hit3,
Stage3end_substring_low(hit3),Shortread_fulllength(queryseq2));
/* print first end */
@@ -5479,7 +6497,8 @@ SAM_print_paired (Result_T result, Resulttype_T resulttype,
Stage3pair_mapq_score(stage3pair),chromosome_iit,
/*queryseq*/queryseq1,/*queryseq_mate*/queryseq2,
Stage3pair_pairlength(stage3pair),/*chrpos*/chrpos5,/*mate_chrpos*/chrpos3,
- /*clipdir*/0,hardclip5,hardclip3,resulttype,/*first_read_p*/true,/*npaths_mate*/npaths,
+ /*clipdir*/0,hardclip5_low,hardclip5_high,hardclip3_low,hardclip3_high,
+ resulttype,/*first_read_p*/true,/*npaths_mate*/npaths,
quality_shift,sam_read_group_id,invert_first_p,invert_second_p,
merge_samechr_p);
@@ -5489,7 +6508,8 @@ SAM_print_paired (Result_T result, Resulttype_T resulttype,
Stage3pair_mapq_score(stage3pair),chromosome_iit,
/*queryseq*/queryseq2,/*queryseq_mate*/queryseq1,
Stage3pair_pairlength(stage3pair),/*chrpos*/chrpos3,/*mate_chrpos*/chrpos5,
- /*clipdir*/0,hardclip5,hardclip3,resulttype,/*first_read_p*/false,/*npaths_mate*/npaths,
+ /*clipdir*/0,hardclip5_low,hardclip5_high,hardclip3_low,hardclip3_high,
+ resulttype,/*first_read_p*/false,/*npaths_mate*/npaths,
quality_shift,sam_read_group_id,invert_second_p,invert_first_p,
merge_samechr_p);
@@ -5526,13 +6546,13 @@ SAM_print_paired (Result_T result, Resulttype_T resulttype,
for (pathnum = 1; pathnum <= npaths && pathnum <= maxpaths_report; pathnum++) {
stage3pair = stage3pairarray[pathnum-1];
- hardclip5 = hardclip3 = 0;
+ hardclip5_low = hardclip5_high = hardclip3_low = hardclip3_high = 0;
hit5 = Stage3pair_hit5(stage3pair);
hit3 = Stage3pair_hit3(stage3pair);
- chrpos5 = SAM_compute_chrpos(/*hardclip_low*/0,/*hardclip_high*/hardclip5,hit5,
+ chrpos5 = SAM_compute_chrpos(/*hardclip_low*/0,/*hardclip_high*/hardclip5_high,hit5,
Stage3end_substring_low(hit5),Shortread_fulllength(queryseq1));
- chrpos3 = SAM_compute_chrpos(/*hardclip_low*/hardclip3,/*hardclip_high*/0,hit3,
+ chrpos3 = SAM_compute_chrpos(/*hardclip_low*/hardclip3_low,/*hardclip_high*/0,hit3,
Stage3end_substring_low(hit3),Shortread_fulllength(queryseq2));
/* print first end */
@@ -5542,7 +6562,8 @@ SAM_print_paired (Result_T result, Resulttype_T resulttype,
Stage3pair_mapq_score(stage3pair),chromosome_iit,
/*queryseq*/queryseq1,/*queryseq_mate*/queryseq2,
Stage3pair_pairlength(stage3pair),/*chrpos*/chrpos5,/*mate_chrpos*/chrpos3,
- /*clipdir*/0,hardclip5,hardclip3,resulttype,/*first_read_p*/true,/*npaths_mate*/npaths,
+ /*clipdir*/0,hardclip5_low,hardclip5_high,hardclip3_low,hardclip3_high,
+ resulttype,/*first_read_p*/true,/*npaths_mate*/npaths,
quality_shift,sam_read_group_id,invert_first_p,invert_second_p,
merge_samechr_p);
@@ -5553,7 +6574,8 @@ SAM_print_paired (Result_T result, Resulttype_T resulttype,
Stage3pair_mapq_score(stage3pair),chromosome_iit,
/*queryseq*/queryseq2,/*queryseq_mate*/queryseq1,
Stage3pair_pairlength(stage3pair),/*chrpos*/chrpos3,/*mate_chrpos*/chrpos5,
- /*clipdir*/0,hardclip5,hardclip3,resulttype,/*first_read_p*/false,/*npaths_mate*/npaths,
+ /*clipdir*/0,hardclip5_low,hardclip5_high,hardclip3_low,hardclip3_high,
+ resulttype,/*first_read_p*/false,/*npaths_mate*/npaths,
quality_shift,sam_read_group_id,invert_second_p,invert_first_p,
merge_samechr_p);
}
@@ -5564,13 +6586,13 @@ SAM_print_paired (Result_T result, Resulttype_T resulttype,
stage3array1 = (Stage3end_T *) Result_array(&npaths1,&first_absmq1,&second_absmq1,result);
stage3array2 = (Stage3end_T *) Result_array2(&npaths2,&first_absmq2,&second_absmq2,result);
- hardclip5 = hardclip3 = 0;
+ hardclip5_low = hardclip5_high = hardclip3_low = hardclip3_high = 0;
hit5 = stage3array1[0];
hit3 = stage3array2[0];
- chrpos5 = SAM_compute_chrpos(/*hardclip_low*/0,/*hardclip_high*/hardclip5,hit5,
+ chrpos5 = SAM_compute_chrpos(/*hardclip_low*/0,/*hardclip_high*/hardclip5_high,hit5,
Stage3end_substring_low(hit5),Shortread_fulllength(queryseq1));
- chrpos3 = SAM_compute_chrpos(/*hardclip_low*/hardclip3,/*hardclip_high*/0,hit3,
+ chrpos3 = SAM_compute_chrpos(/*hardclip_low*/hardclip3_low,/*hardclip_high*/0,hit3,
Stage3end_substring_low(hit3),Shortread_fulllength(queryseq2));
if (Stage3end_circularpos(hit5) > 0 || Stage3end_circularpos(hit3) > 0) {
@@ -5588,9 +6610,9 @@ SAM_print_paired (Result_T result, Resulttype_T resulttype,
Stage3end_mapq_score(stage3array1[0]),chromosome_iit,
/*queryseq*/queryseq1,/*queryseq_mate*/queryseq2,
/*pairedlength*/0U,/*chrpos*/chrpos5,/*mate_chrpos*/chrpos3,
- /*clipdir*/0,/*hardclip5*/0,/*hardclip3*/0,resulttype,/*first_read_p*/true,
- /*npaths_mate*/1,quality_shift,sam_read_group_id,invert_first_p,invert_second_p,
- merge_samechr_p);
+ /*clipdir*/0,/*hardclip5_low*/0,/*hardclip5_high*/0,/*hardclip3_low*/0,/*hardclip3_high*/0,
+ resulttype,/*first_read_p*/true,/*npaths_mate*/1,quality_shift,sam_read_group_id,
+ invert_first_p,invert_second_p,merge_samechr_p);
/* print second end */
/* Stage3end_eval_and_sort(stage3array2,npaths2,maxpaths_report,queryseq2); */
@@ -5599,9 +6621,9 @@ SAM_print_paired (Result_T result, Resulttype_T resulttype,
Stage3end_mapq_score(stage3array2[0]),chromosome_iit,
/*queryseq*/queryseq2,/*queryseq_mate*/queryseq1,
/*pairedlength*/0U,/*chrpos*/chrpos3,/*mate_chrpos*/chrpos5,
- /*clipdir*/0,/*hardclip5*/0,/*hardclip3*/0,resulttype,/*first_read_p*/false,
- /*npaths_mate*/1,quality_shift,sam_read_group_id,invert_second_p,invert_first_p,
- merge_samechr_p);
+ /*clipdir*/0,/*hardclip5_low*/0,/*hardclip5_high*/0,/*hardclip3_low*/0,/*hardclip3_high*/0,
+ resulttype,/*first_read_p*/false,/*npaths_mate*/1,quality_shift,sam_read_group_id,
+ invert_second_p,invert_first_p,merge_samechr_p);
} else if (resulttype == UNPAIRED_MULT || resulttype == UNPAIRED_TRANSLOC) {
if (resulttype == UNPAIRED_MULT) {
@@ -5647,15 +6669,15 @@ SAM_print_paired (Result_T result, Resulttype_T resulttype,
chrpos3 = 0U;
} else {
mate = stage3array2[0];
- hardclip3 = 0;
- chrpos3 = SAM_compute_chrpos(/*hardclip_low*/hardclip3,/*hardclip_high*/0,mate,
+ hardclip3_low = hardclip3_high = 0;
+ chrpos3 = SAM_compute_chrpos(/*hardclip_low*/hardclip3_low,/*hardclip_high*/0,mate,
Stage3end_substring_low(mate),Shortread_fulllength(queryseq2));
}
if (npaths1 == 1) {
stage3 = stage3array1[0];
- hardclip5 = 0;
- chrpos5 = SAM_compute_chrpos(/*hardclip_low*/0,/*hardclip_high*/hardclip5,stage3,
+ hardclip5_low = hardclip5_high = 0;
+ chrpos5 = SAM_compute_chrpos(/*hardclip_low*/0,/*hardclip_high*/hardclip5_high,stage3,
Stage3end_substring_low(stage3),Shortread_fulllength(queryseq1));
SAM_print(fp,abbrev,stage3,mate,acc1,acc2,/*pathnum*/1,npaths1,
@@ -5663,8 +6685,8 @@ SAM_print_paired (Result_T result, Resulttype_T resulttype,
Stage3end_mapq_score(stage3),chromosome_iit,
/*queryseq*/queryseq1,/*queryseq_mate*/queryseq2,
/*pairedlength*/0U,/*chrpos*/chrpos5,/*mate_chrpos*/chrpos3,
- /*clipdir*/0,/*hardclip5*/0,/*hardclip3*/0,resulttype,/*first_read_p*/true,
- /*npaths_mate*/npaths2,quality_shift,sam_read_group_id,
+ /*clipdir*/0,/*hardclip5_low*/0,/*hardclip5_high*/0,/*hardclip3_low*/0,/*hardclip3_high*/0,
+ resulttype,/*first_read_p*/true,/*npaths_mate*/npaths2,quality_shift,sam_read_group_id,
invert_first_p,invert_second_p,merge_samechr_p);
} else if (quiet_if_excessive_p && npaths1 > maxpaths_report) {
@@ -5679,8 +6701,8 @@ SAM_print_paired (Result_T result, Resulttype_T resulttype,
} else {
for (pathnum = 1; pathnum <= npaths1 && pathnum <= maxpaths_report; pathnum++) {
stage3 = stage3array1[pathnum-1];
- hardclip5 = 0;
- chrpos5 = SAM_compute_chrpos(/*hardclip_low*/0,/*hardclip_high*/hardclip5,stage3,
+ hardclip5_low = hardclip5_high = 0;
+ chrpos5 = SAM_compute_chrpos(/*hardclip_low*/0,/*hardclip_high*/hardclip5_high,stage3,
Stage3end_substring_low(stage3),Shortread_fulllength(queryseq1));
SAM_print(fp,abbrev,stage3,mate,acc1,acc2,pathnum,npaths1,
@@ -5688,8 +6710,8 @@ SAM_print_paired (Result_T result, Resulttype_T resulttype,
Stage3end_mapq_score(stage3),chromosome_iit,
/*queryseq*/queryseq1,/*queryseq_mate*/queryseq2,
/*pairedlength*/0U,/*chrpos*/chrpos5,/*mate_chrpos*/chrpos3,
- /*clipdir*/0,/*hardclip5*/0,/*hardclip3*/0,resulttype,/*first_read_p*/true,
- /*npaths_mate*/npaths2,quality_shift,sam_read_group_id,
+ /*clipdir*/0,/*hardclip5_low*/0,/*hardclip5_high*/0,/*hardclip3_low*/0,/*hardclip3_high*/0,
+ resulttype,/*first_read_p*/true,/*npaths_mate*/npaths2,quality_shift,sam_read_group_id,
invert_first_p,invert_second_p,merge_samechr_p);
}
}
@@ -5703,15 +6725,15 @@ SAM_print_paired (Result_T result, Resulttype_T resulttype,
chrpos5 = 0U;
} else {
mate = stage3array1[0];
- hardclip5 = 0;
- chrpos5 = SAM_compute_chrpos(/*hardclip_low*/0,/*hardclip_high*/hardclip5,mate,
+ hardclip5_low = hardclip5_high = 0;
+ chrpos5 = SAM_compute_chrpos(/*hardclip_low*/0,/*hardclip_high*/hardclip5_high,mate,
Stage3end_substring_low(mate),Shortread_fulllength(queryseq1));
}
if (npaths2 == 1) {
stage3 = stage3array2[0];
- hardclip3 = 0;
- chrpos3 = SAM_compute_chrpos(/*hardclip_low*/hardclip3,/*hardclip_high*/0,stage3,
+ hardclip3_low = hardclip3_high = 0;
+ chrpos3 = SAM_compute_chrpos(/*hardclip_low*/hardclip3_low,/*hardclip_high*/0,stage3,
Stage3end_substring_low(stage3),Shortread_fulllength(queryseq2));
SAM_print(fp,abbrev,stage3,mate,acc1,acc2,/*pathnum*/1,npaths2,
@@ -5719,8 +6741,8 @@ SAM_print_paired (Result_T result, Resulttype_T resulttype,
Stage3end_mapq_score(stage3),chromosome_iit,
/*queryseq*/queryseq2,/*queryseq_mate*/queryseq1,
/*pairedlength*/0U,/*chrpos*/chrpos3,/*mate_chrpos*/chrpos5,
- /*clipdir*/0,/*hardclip5*/0,/*hardclip3*/0,resulttype,/*first_read_p*/false,
- /*npaths_mate*/npaths1,quality_shift,sam_read_group_id,
+ /*clipdir*/0,/*hardclip5_low*/0,/*hardclip5_high*/0,/*hardclip3_low*/0,/*hardclip3_high*/0,
+ resulttype,/*first_read_p*/false,/*npaths_mate*/npaths1,quality_shift,sam_read_group_id,
invert_second_p,invert_first_p,merge_samechr_p);
} else if (quiet_if_excessive_p && npaths2 > maxpaths_report) {
@@ -5735,8 +6757,8 @@ SAM_print_paired (Result_T result, Resulttype_T resulttype,
} else {
for (pathnum = 1; pathnum <= npaths2 && pathnum <= maxpaths_report; pathnum++) {
stage3 = stage3array2[pathnum-1];
- hardclip3 = 0;
- chrpos3 = SAM_compute_chrpos(/*hardclip_low*/hardclip3,/*hardclip_high*/0,stage3,
+ hardclip3_low = hardclip3_high = 0;
+ chrpos3 = SAM_compute_chrpos(/*hardclip_low*/hardclip3_low,/*hardclip_high*/0,stage3,
Stage3end_substring_low(stage3),Shortread_fulllength(queryseq2));
SAM_print(fp,abbrev,stage3,mate,acc1,acc2,pathnum,npaths2,
@@ -5744,8 +6766,8 @@ SAM_print_paired (Result_T result, Resulttype_T resulttype,
Stage3end_mapq_score(stage3),chromosome_iit,
/*queryseq*/queryseq2,/*queryseq_mate*/queryseq1,
/*pairedlength*/0U,/*chrpos*/chrpos3,/*mate_chrpos*/chrpos5,
- /*clipdir*/0,/*hardclip5*/0,/*hardclip3*/0,resulttype,/*first_read_p*/false,
- /*npaths_mate*/npaths1,quality_shift,sam_read_group_id,
+ /*clipdir*/0,/*hardclip5_low*/0,/*hardclip5_high*/0,/*hardclip3_low*/0,/*hardclip3_high*/0,
+ resulttype,/*first_read_p*/false,/*npaths_mate*/npaths1,quality_shift,sam_read_group_id,
invert_second_p,invert_first_p,merge_samechr_p);
}
}
@@ -5812,8 +6834,8 @@ SAM_print_paired (Result_T result, Resulttype_T resulttype,
chrpos3 = 0U;
} else {
mate = stage3array2[0];
- hardclip3 = 0;
- chrpos3 = SAM_compute_chrpos(/*hardclip_low*/hardclip3,/*hardclip_high*/0,mate,
+ hardclip3_low = hardclip3_high = 0;
+ chrpos3 = SAM_compute_chrpos(/*hardclip_low*/hardclip3_low,/*hardclip_high*/0,mate,
Stage3end_substring_low(mate),Shortread_fulllength(queryseq2));
}
@@ -5831,8 +6853,8 @@ SAM_print_paired (Result_T result, Resulttype_T resulttype,
/* mate should be NULL here */
stage3 = stage3array1[0];
- hardclip5 = 0;
- chrpos5 = SAM_compute_chrpos(/*hardclip_low*/0,/*hardclip_high*/hardclip5,stage3,
+ hardclip5_low = hardclip5_high = 0;
+ chrpos5 = SAM_compute_chrpos(/*hardclip_low*/0,/*hardclip_high*/hardclip5_high,stage3,
Stage3end_substring_low(stage3),Shortread_fulllength(queryseq1));
SAM_print(fp,abbrev,stage3,mate,acc1,acc2,/*pathnum*/1,npaths1,
@@ -5840,8 +6862,8 @@ SAM_print_paired (Result_T result, Resulttype_T resulttype,
Stage3end_mapq_score(stage3),chromosome_iit,
/*queryseq*/queryseq1,/*queryseq_mate*/queryseq2,
/*pairedlength*/0U,/*chrpos*/chrpos5,/*mate_chrpos*/chrpos3,
- /*clipdir*/0,/*hardclip5*/0,/*hardclip3*/0,resulttype,/*first_read_p*/true,
- /*npaths_mate*/npaths2,quality_shift,sam_read_group_id,
+ /*clipdir*/0,/*hardclip5_low*/0,/*hardclip5_high*/0,/*hardclip3_low*/0,/*hardclip3_high*/0,
+ resulttype,/*first_read_p*/true,/*npaths_mate*/npaths2,quality_shift,sam_read_group_id,
invert_first_p,invert_second_p,merge_samechr_p);
} else if (quiet_if_excessive_p && npaths1 > maxpaths_report) {
@@ -5858,8 +6880,8 @@ SAM_print_paired (Result_T result, Resulttype_T resulttype,
/* mate should be NULL here */
for (pathnum = 1; pathnum <= npaths1 && pathnum <= maxpaths_report; pathnum++) {
stage3 = stage3array1[pathnum-1];
- hardclip5 = 0;
- chrpos5 = SAM_compute_chrpos(/*hardclip_low*/0,/*hardclip_high*/hardclip5,stage3,
+ hardclip5_low = hardclip5_high = 0;
+ chrpos5 = SAM_compute_chrpos(/*hardclip_low*/0,/*hardclip_high*/hardclip5_high,stage3,
Stage3end_substring_low(stage3),Shortread_fulllength(queryseq1));
SAM_print(fp,abbrev,stage3,mate,acc1,acc2,pathnum,npaths1,
@@ -5867,8 +6889,8 @@ SAM_print_paired (Result_T result, Resulttype_T resulttype,
Stage3end_mapq_score(stage3),chromosome_iit,
/*queryseq*/queryseq1,/*queryseq_mate*/queryseq2,
/*pairedlength*/0U,/*chrpos*/chrpos5,/*mate_chrpos*/chrpos3,
- /*clipdir*/0,/*hardclip5*/0,/*hardclip3*/0,resulttype,/*first_read_p*/true,
- /*npaths_mate*/npaths2,quality_shift,sam_read_group_id,
+ /*clipdir*/0,/*hardclip5_low*/0,/*hardclip5_high*/0,/*hardclip3_low*/0,/*hardclip3_high*/0,
+ resulttype,/*first_read_p*/true,/*npaths_mate*/npaths2,quality_shift,sam_read_group_id,
invert_first_p,invert_second_p,merge_samechr_p);
}
}
@@ -5882,8 +6904,8 @@ SAM_print_paired (Result_T result, Resulttype_T resulttype,
chrpos5 = 0U;
} else {
mate = stage3array1[0];
- hardclip5 = 0;
- chrpos5 = SAM_compute_chrpos(/*hardclip_low*/0,/*hardclip_high*/hardclip5,mate,
+ hardclip5_low = hardclip5_high = 0;
+ chrpos5 = SAM_compute_chrpos(/*hardclip_low*/0,/*hardclip_high*/hardclip5_high,mate,
Stage3end_substring_low(mate),Shortread_fulllength(queryseq1));
}
@@ -5901,8 +6923,8 @@ SAM_print_paired (Result_T result, Resulttype_T resulttype,
/* mate should be NULL here */
stage3 = stage3array2[0];
- hardclip3 = 0;
- chrpos3 = SAM_compute_chrpos(/*hardclip_low*/hardclip3,/*hardclip_high*/0,stage3,
+ hardclip3_low = hardclip3_high = 0;
+ chrpos3 = SAM_compute_chrpos(/*hardclip_low*/hardclip3_low,/*hardclip_high*/0,stage3,
Stage3end_substring_low(stage3),Shortread_fulllength(queryseq2));
SAM_print(fp,abbrev,stage3,mate,acc1,acc2,/*pathnum*/1,npaths2,
@@ -5910,8 +6932,8 @@ SAM_print_paired (Result_T result, Resulttype_T resulttype,
Stage3end_mapq_score(stage3),chromosome_iit,
/*queryseq*/queryseq2,/*queryseq_mate*/queryseq1,
/*pairedlength*/0U,/*chrpos*/chrpos3,/*mate_chrpos*/chrpos5,
- /*clipdir*/0,/*hardclip5*/0,/*hardclip3*/0,resulttype,/*first_read_p*/false,
- /*npaths_mate*/npaths1,quality_shift,sam_read_group_id,
+ /*clipdir*/0,/*hardclip5_low*/0,/*hardclip5_high*/0,/*hardclip3_low*/0,/*hardclip3_high*/0,
+ resulttype,/*first_read_p*/false,/*npaths_mate*/npaths1,quality_shift,sam_read_group_id,
invert_second_p,invert_first_p,merge_samechr_p);
} else if (quiet_if_excessive_p && npaths2 > maxpaths_report) {
@@ -5928,8 +6950,8 @@ SAM_print_paired (Result_T result, Resulttype_T resulttype,
/* mate should be NULL here */
for (pathnum = 1; pathnum <= npaths2 && pathnum <= maxpaths_report; pathnum++) {
stage3 = stage3array2[pathnum-1];
- hardclip3 = 0;
- chrpos3 = SAM_compute_chrpos(/*hardclip_low*/hardclip3,/*hardclip_high*/0,stage3,
+ hardclip3_low = hardclip3_high = 0;
+ chrpos3 = SAM_compute_chrpos(/*hardclip_low*/hardclip3_low,/*hardclip_high*/0,stage3,
Stage3end_substring_low(stage3),Shortread_fulllength(queryseq2));
SAM_print(fp,abbrev,stage3,mate,acc1,acc2,pathnum,npaths2,
@@ -5937,8 +6959,8 @@ SAM_print_paired (Result_T result, Resulttype_T resulttype,
Stage3end_mapq_score(stage3),chromosome_iit,
/*queryseq*/queryseq2,/*queryseq_mate*/queryseq1,
/*pairedlength*/0U,/*chrpos*/chrpos3,/*mate_chrpos*/chrpos5,
- /*clipdir*/0,/*hardclip5*/0,/*hardclip3*/0,resulttype,/*first_read_p*/false,
- /*npaths_mate*/npaths1,quality_shift,sam_read_group_id,
+ /*clipdir*/0,/*hardclip5_low*/0,/*hardclip5_high*/0,/*hardclip3_low*/0,/*hardclip3_high*/0,
+ resulttype,/*first_read_p*/false,/*npaths_mate*/npaths1,quality_shift,sam_read_group_id,
invert_second_p,invert_first_p,merge_samechr_p);
}
}
diff --git a/src/samprint.h b/src/samprint.h
index 8de963c..d9cb728 100644
--- a/src/samprint.h
+++ b/src/samprint.h
@@ -1,4 +1,4 @@
-/* $Id: samprint.h 149571 2014-10-01 19:22:17Z twu $ */
+/* $Id: samprint.h 154023 2014-11-25 03:45:18Z twu $ */
#ifndef SAMPRINT_INCLUDED
#define SAMPRINT_INCLUDED
@@ -16,6 +16,7 @@
#include "types.h"
#include "substring.h"
#include "bool.h"
+#include "intlist.h"
extern void
SAM_setup (bool quiet_if_excessive_p_in, int maxpaths_report_in,
@@ -67,9 +68,9 @@ SAM_print (FILE *fp, char *abbrev, Stage3end_T this, Stage3end_T mate,
char *acc1, char *acc2, int pathnum, int npaths,
int absmq_score, int first_absmq, int second_absmq, int mapq_score, Univ_IIT_T chromosome_iit, Shortread_T queryseq,
Shortread_T queryseq2, int pairedlength, Chrpos_T chrpos, Chrpos_T mate_chrpos,
- int clipdir, int hardclip_low, int hardclip_high, Resulttype_T resulttype, bool first_read_p,
- int npaths_mate, int quality_shift, char *sam_read_group_id, bool invertp, bool invert_mate_p,
- bool merge_samechr_p);
+ int clipdir, int hardclip5_low, int hardclip5_high, int hardclip3_low, int hardclip3_high,
+ Resulttype_T resulttype, bool first_read_p, int npaths_mate, int quality_shift,
+ char *sam_read_group_id, bool invertp, bool invert_mate_p, bool merge_samechr_p);
extern void
SAM_print_paired (Result_T result, Resulttype_T resulttype,
diff --git a/src/samread.c b/src/samread.c
index 75dfe14..bcd6f07 100644
--- a/src/samread.c
+++ b/src/samread.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: samread.c 149421 2014-09-30 17:54:10Z twu $";
+static char rcsid[] = "$Id: samread.c 154089 2014-11-25 21:03:16Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -160,9 +160,181 @@ Samread_get_acc_fromfile (int *acclength, FILE *fp, int linelength) {
}
+static SAM_split_output_type
+parse_XO_fromfile (FILE *fp) {
+ char c = 1, c0, c1;
+ char abbrev0, abbrev1;
+
+ while (c != '\0') {
+ while ((c = fgetc(fp)) != '\0' && c != '\t') ;
+ if (c == '\0') {
+ return OUTPUT_NONE;
+ } else {
+ c0 = fgetc(fp);
+ c1 = fgetc(fp);
+ if (c0 == 'X' && c1 == 'O') {
+ fgetc(fp); /* : */
+ fgetc(fp); /* type */
+ fgetc(fp); /* : */
+ abbrev0 = fgetc(fp);
+ abbrev1 = fgetc(fp);
+ switch (abbrev0) {
+ case 'N':
+ if (abbrev1 == 'M') {
+ return OUTPUT_NM;
+ } else {
+ fprintf(stderr,"Unexpected output type %c%c\n",abbrev0,abbrev1);
+ return OUTPUT_NONE;
+ }
+ case 'C':
+ switch (abbrev1) {
+ case 'U': return OUTPUT_CU;
+ case 'C': return OUTPUT_CC;
+ case 'T': return OUTPUT_CT;
+ case 'M': return OUTPUT_CM;
+ case 'X': return OUTPUT_CX;
+ default: fprintf(stderr,"Unexpected output type %c%c\n",abbrev0,abbrev1); return OUTPUT_NONE;
+ }
+ case 'H':
+ switch (abbrev1) {
+ case 'U': return OUTPUT_HU;
+ case 'C': return OUTPUT_HC;
+ case 'T': return OUTPUT_HT;
+ case 'M': return OUTPUT_HM;
+ case 'X': return OUTPUT_HX;
+ default: fprintf(stderr,"Unexpected output type %c%c\n",abbrev0,abbrev1); return OUTPUT_NONE;
+ }
+ case 'U':
+ switch (abbrev1) {
+ case 'U': return OUTPUT_UU;
+ case 'C': return OUTPUT_UC;
+ case 'T': return OUTPUT_UT;
+ case 'M': return OUTPUT_UM;
+ case 'X': return OUTPUT_UX;
+ default: fprintf(stderr,"Unexpected output type %c%c\n",abbrev0,abbrev1); return OUTPUT_NONE;
+ }
+ case 'P':
+ switch (abbrev1) {
+ case 'C': return OUTPUT_PC;
+ case 'I': return OUTPUT_PI;
+ case 'S': return OUTPUT_PS;
+ case 'L': return OUTPUT_PL;
+ case 'M': return OUTPUT_PM;
+ case 'X': return OUTPUT_PX;
+ default: fprintf(stderr,"Unexpected output type %c%c\n",abbrev0,abbrev1); return OUTPUT_NONE;
+ }
+ default: fprintf(stderr,"Unexpected output type %c%c\n",abbrev0,abbrev1); return OUTPUT_NONE;
+ }
+ }
+ }
+ }
+
+ return OUTPUT_NONE;
+}
+
+
+#define HITI_MAXDIGITS 10
+
+static SAM_split_output_type
+parse_XO_and_HI_fromfile (char **hiti, FILE *fp) {
+ SAM_split_output_type split_output = OUTPUT_NONE;
+ char *p, c = 1, c0, c1;
+ char abbrev0, abbrev1;
+
+ *hiti = MALLOC((HITI_MAXDIGITS + 1) * sizeof(char));
+ while (c != '\0') {
+ while ((c = fgetc(fp)) != '\0' && c != '\t') ;
+ if (c == '\0') {
+ return split_output;
+ } else {
+ c0 = fgetc(fp);
+ c1 = fgetc(fp);
+ if (c0 == 'H' && c1 == 'I') {
+ fgetc(fp); /* : */
+ fgetc(fp); /* type */
+ fgetc(fp); /* : */
+
+ p = *hiti;
+ while ((c = *p++ = fgetc(fp)) != '\0' && c != '\t') ;
+ *--p = '\0'; /* terminating char */
+
+ } else if (c0 == 'X' && c1 == 'O') {
+ fgetc(fp); /* : */
+ fgetc(fp); /* type */
+ fgetc(fp); /* : */
+ abbrev0 = fgetc(fp);
+ abbrev1 = fgetc(fp);
+ switch (abbrev0) {
+ case 'N':
+ if (abbrev1 == 'M') {
+ split_output = OUTPUT_NM;
+ } else {
+ fprintf(stderr,"Unexpected output type %c%c\n",abbrev0,abbrev1);
+ split_output = OUTPUT_NONE;
+ }
+ case 'C':
+ switch (abbrev1) {
+ case 'U': split_output = OUTPUT_CU; break;
+ case 'C': split_output = OUTPUT_CC; break;
+ case 'T': split_output = OUTPUT_CT; break;
+ case 'M': split_output = OUTPUT_CM; break;
+ case 'X': split_output = OUTPUT_CX; break;
+ default:
+ fprintf(stderr,"Unexpected output type %c%c\n",abbrev0,abbrev1);
+ split_output = OUTPUT_NONE;
+ }
+ case 'H':
+ switch (abbrev1) {
+ case 'U': split_output = OUTPUT_HU; break;
+ case 'C': split_output = OUTPUT_HC; break;
+ case 'T': split_output = OUTPUT_HT; break;
+ case 'M': split_output = OUTPUT_HM; break;
+ case 'X': split_output = OUTPUT_HX; break;
+ default:
+ fprintf(stderr,"Unexpected output type %c%c\n",abbrev0,abbrev1);
+ split_output = OUTPUT_NONE;
+ }
+ case 'U':
+ switch (abbrev1) {
+ case 'U': split_output = OUTPUT_UU; break;
+ case 'C': split_output = OUTPUT_UC; break;
+ case 'T': split_output = OUTPUT_UT; break;
+ case 'M': split_output = OUTPUT_UM; break;
+ case 'X': split_output = OUTPUT_UX; break;
+ default:
+ fprintf(stderr,"Unexpected output type %c%c\n",abbrev0,abbrev1);
+ split_output = OUTPUT_NONE;
+ }
+ case 'P':
+ switch (abbrev1) {
+ case 'C': split_output = OUTPUT_PC; break;
+ case 'I': split_output = OUTPUT_PI; break;
+ case 'S': split_output = OUTPUT_PS; break;
+ case 'L': split_output = OUTPUT_PL; break;
+ case 'M': split_output = OUTPUT_PM; break;
+ case 'X': split_output = OUTPUT_PX; break;
+ default:
+ fprintf(stderr,"Unexpected output type %c%c\n",abbrev0,abbrev1);
+ split_output = OUTPUT_NONE;
+ }
+ default:
+ fprintf(stderr,"Unexpected output type %c%c\n",abbrev0,abbrev1);
+ split_output = OUTPUT_NONE;
+ }
+ }
+ }
+ }
+
+ return split_output;
+}
+
+
+
+/* Main parser for processing with dups */
char *
-Samread_get_acc_and_softclip_fromfile (int *acclength, unsigned int *flag, Univcoord_T *genomicpos, int *initial_softclip, bool *query_lowp,
- FILE *fp, Univ_IIT_T chromosome_iit, Univcoord_T *chroffsets, int linelength) {
+Samread_parse_acc_and_softclip_fromfile (int *acclength, unsigned int *flag, SAM_split_output_type *split_output,
+ char **hiti, Univcoord_T *genomicpos, int *initial_softclip, bool *query_lowp,
+ FILE *fp, Univ_IIT_T chromosome_iit, Univcoord_T *chroffsets, int linelength) {
char *acc, *p;
char *substring;
Chrnum_T chrnum, mate_chrnum;
@@ -285,6 +457,17 @@ Samread_get_acc_and_softclip_fromfile (int *acclength, unsigned int *flag, Univc
FREEA(substring);
+ /* 9. ISIZE: Insert size. Skip. */
+ while (fgetc(fp) != '\t') ;
+
+ /* 10. SEQ: queryseq. Skip. */
+ while (fgetc(fp) != '\t') ;
+
+ /* 11. QUAL: quality scores. Skip. */
+ while (fgetc(fp) != '\t') ;
+
+ *split_output = parse_XO_and_HI_fromfile(&(*hiti),fp);
+
return acc;
}
@@ -588,10 +771,10 @@ Samread_parse_linelen_fromfile (FILE *fp) {
}
-
+/* Main parser for processing without dups */
Univcoord_T
-Samread_parse_genomicpos_fromfile (FILE *fp, unsigned int *flag, Univ_IIT_T chromosome_iit, Univcoord_T *chroffsets,
- int linelength) {
+Samread_parse_genomicpos_fromfile (FILE *fp, unsigned int *flag, SAM_split_output_type *split_output,
+ Univ_IIT_T chromosome_iit, Univcoord_T *chroffsets, int linelength) {
Univcoord_T genomicpos;
Chrnum_T chrnum;
Chrpos_T chrpos;
@@ -642,6 +825,29 @@ Samread_parse_genomicpos_fromfile (FILE *fp, unsigned int *flag, Univ_IIT_T chro
FREEA(substring);
+ /* 5. MAPQ: Mapping quality. Skip */
+ while (fgetc(fp) != '\t') ;
+
+ /* 6. CIGAR. Skip */
+ while (fgetc(fp) != '\t') ;
+
+ /* 7. MRNM: Mate chr. Skip */
+ while (fgetc(fp) != '\t') ;
+
+ /* 8. MPOS: Mate chrpos. Skip */
+ while (fgetc(fp) != '\t') ;
+
+ /* 9. ISIZE: Insert size. Skip. */
+ while (fgetc(fp) != '\t') ;
+
+ /* 10. SEQ: queryseq. Skip. */
+ while (fgetc(fp) != '\t') ;
+
+ /* 11. QUAL: quality scores. Skip. */
+ while (fgetc(fp) != '\t') ;
+
+ *split_output = parse_XO_fromfile(fp);
+
return genomicpos;
}
@@ -724,7 +930,7 @@ Samread_parse_read_and_mateinfo_fromfile (FILE *fp, unsigned int *flag, char **m
*--p = '\0';
/* 8. MPOS: Mate chrpos */
- p = substring;
+w p = substring;
while ((*p++ = fgetc(fp)) != '\t') ;
*--p = '\0';
diff --git a/src/samread.h b/src/samread.h
index 4d159aa..e22dbd3 100644
--- a/src/samread.h
+++ b/src/samread.h
@@ -1,25 +1,28 @@
-/* $Id: samread.h 149421 2014-09-30 17:54:10Z twu $ */
+/* $Id: samread.h 154089 2014-11-25 21:03:16Z twu $ */
#ifndef SAMREAD_INCLUDED
#define SAMREAD_INCLUDED
#include <stdio.h>
+#include "samflags.h"
#include "genomicpos.h"
#include "intlist.h"
#include "uintlist.h"
#include "iit-read-univ.h"
+
extern char *
Samread_get_acc_fromfile (int *acclength, FILE *fp, int linelength);
extern char *
-Samread_get_acc_and_softclip_fromfile (int *acclength, unsigned int *flag, Univcoord_T *genomicpos, int *initial_softclip, bool *query_lowp,
- FILE *fp, Univ_IIT_T chromosome_iit, Univcoord_T *chroffsets, int linelength);
+Samread_parse_acc_and_softclip_fromfile (int *acclength, unsigned int *flag, SAM_split_output_type *split_output,
+ char **hiti, Univcoord_T *genomicpos, int *initial_softclip, bool *query_lowp,
+ FILE *fp, Univ_IIT_T chromosome_iit, Univcoord_T *chroffsets, int linelength);
extern int
Samread_parse_linelen_fromfile (FILE *fp);
extern Univcoord_T
-Samread_parse_genomicpos_fromfile (FILE *fp, unsigned int *flag, Univ_IIT_T chromosome_iit, Univcoord_T *chroffsets,
- int linelength);
+Samread_parse_genomicpos_fromfile (FILE *fp, unsigned int *flag, SAM_split_output_type *split_output,
+ Univ_IIT_T chromosome_iit, Univcoord_T *chroffsets, int linelength);
extern void
Samread_parse_read_fromfile (FILE *fp, unsigned int *flag, int *readlength, char **read, int linelength);
diff --git a/src/sarray-read.c b/src/sarray-read.c
index 69638d0..5cd2da2 100644
--- a/src/sarray-read.c
+++ b/src/sarray-read.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: sarray-read.c 151053 2014-10-16 19:57:23Z twu $";
+static char rcsid[] = "$Id: sarray-read.c 154021 2014-11-25 03:44:23Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -2705,6 +2705,41 @@ extend_leftward (Univcoord_T goal, Univcoord_T chroffset, Univcoord_T chrhigh,
+static int
+donor_match_length_cmp (const void *a, const void *b) {
+ Stage3end_T x = * (Stage3end_T *) a;
+ Stage3end_T y = * (Stage3end_T *) b;
+
+ int x_length = Substring_match_length_orig(Stage3end_substring_donor(x));
+ int y_length = Substring_match_length_orig(Stage3end_substring_donor(y));
+
+ if (x_length < y_length) {
+ return -1;
+ } else if (y_length < x_length) {
+ return +1;
+ } else {
+ return 0;
+ }
+}
+
+static int
+acceptor_match_length_cmp (const void *a, const void *b) {
+ Stage3end_T x = * (Stage3end_T *) a;
+ Stage3end_T y = * (Stage3end_T *) b;
+
+ int x_length = Substring_match_length_orig(Stage3end_substring_acceptor(x));
+ int y_length = Substring_match_length_orig(Stage3end_substring_acceptor(y));
+
+ if (x_length < y_length) {
+ return -1;
+ } else if (y_length < x_length) {
+ return +1;
+ } else {
+ return 0;
+ }
+}
+
+
static void
collect_elt_matches (int *found_score, List_T *subs, List_T *indels, List_T *ambiguous, List_T *singlesplicing,
List_T *doublesplicing, int querystart_same, int queryend_same,
@@ -2713,7 +2748,7 @@ collect_elt_matches (int *found_score, List_T *subs, List_T *indels, List_T *amb
List_T rightward_set, List_T leftward_set, int querylength, Compress_T query_compress,
bool plusp, int genestrand, bool first_read_p, int nmisses_allowed) {
List_T set, p;
- Stage3end_T hit;
+ Stage3end_T hit, *hitarray;
Elt_T elt;
Univcoord_T left, left1, left2, *array;
Uintlist_T difflist = NULL; /* Won't work with LARGE_GENOMES */
@@ -2726,18 +2761,20 @@ collect_elt_matches (int *found_score, List_T *subs, List_T *indels, List_T *amb
#endif
List_T spliceends_sense, spliceends_antisense, lowprob;
+ List_T donor_hits, acceptor_hits;
+ int donor_length, acceptor_length;
int nhits, nspliceends_sense, nspliceends_antisense, n_good_spliceends;
int best_nmismatches, nmismatches_donor, nmismatches_acceptor;
double best_prob, prob;
Substring_T donor, acceptor;
int sensedir;
- Uintlist_T ambcoords, ambcoords_left, ambcoords_right;
+ Uintlist_T ambcoords;
Intlist_T amb_knowni, amb_nmismatches;
int segmenti_donor_nknown, segmentj_acceptor_nknown,
segmentj_antidonor_nknown, segmenti_antiacceptor_nknown;
- int j, i, n;
+ int k, j, i, n;
bool segmenti_usedp, segmentj_usedp;
bool foundp;
@@ -3087,82 +3124,151 @@ collect_elt_matches (int *found_score, List_T *subs, List_T *indels, List_T *amb
List_free(&spliceends_sense);
} else {
- /* Create ambiguous, sense */
- hit = (Stage3end_T) List_head(spliceends_sense);
- donor = Stage3end_substring_donor(hit);
- acceptor = Stage3end_substring_acceptor(hit);
- sensedir = Stage3end_sensedir(hit);
-
- ambcoords = (Uintlist_T) NULL;
- amb_knowni = (Intlist_T) NULL;
- amb_nmismatches = (Intlist_T) NULL;
-
- if (Substring_left_genomicseg(donor) == left1) {
+ /* 1. Multiple hits, sense, left1 */
+ debug7(printf("multiple hits with best prob, sense\n"));
+ donor_hits = acceptor_hits = (List_T) NULL;
+ if (plusp == true) {
for (p = spliceends_sense; p != NULL; p = List_next(p)) {
hit = (Stage3end_T) List_head(p);
+ donor = Stage3end_substring_donor(hit);
acceptor = Stage3end_substring_acceptor(hit);
-#ifdef LARGE_GENOMES
- ambcoords = Uint8list_push(ambcoords,Substring_splicecoord(acceptor));
-#else
- ambcoords = Uintlist_push(ambcoords,Substring_splicecoord(acceptor));
-#endif
- amb_knowni = Intlist_push(amb_knowni,-1);
- amb_nmismatches = Intlist_push(amb_nmismatches,Substring_nmismatches_whole(acceptor));
+ if (Substring_genomicstart(donor) == left1) {
+ donor_hits = List_push(donor_hits,(void *) hit);
+ } else if (Substring_genomicstart(acceptor) == left1) {
+ acceptor_hits = List_push(acceptor_hits,(void *) hit);
+ } else {
+ Stage3end_free(&hit);
+ }
}
-
- nmismatches_acceptor = best_nmismatches - Substring_nmismatches_whole(donor);
- *ambiguous = List_push(*ambiguous,
- (void *) Stage3end_new_splice(&(*found_score),
- /*nmismatches_donor*/Substring_nmismatches_whole(donor),nmismatches_acceptor,
- donor,/*acceptor*/NULL,/*distance*/0U,
- /*shortdistancep*/false,/*penalty*/0,querylength,/*amb_nmatches*/Substring_nmatches_posttrim(acceptor),
- /*ambcoords_donor*/NULL,ambcoords,
- /*amb_knowni_donor*/NULL,amb_knowni,
- /*amb_nmismatches_donor*/NULL,amb_nmismatches,
- /*copy_donor_p*/true,/*copy_acceptor_p*/false,first_read_p,
- sensedir,/*sarrayp*/true));
- Intlist_free(&amb_nmismatches);
- Intlist_free(&amb_knowni);
- Uintlist_free(&ambcoords); /* LARGE_GENOMES not possible with suffix array */
-
- } else if (Substring_left_genomicseg(acceptor) == left1) {
+ } else {
for (p = spliceends_sense; p != NULL; p = List_next(p)) {
hit = (Stage3end_T) List_head(p);
donor = Stage3end_substring_donor(hit);
+ acceptor = Stage3end_substring_acceptor(hit);
+ if (Substring_genomicend(donor) == left1) {
+ donor_hits = List_push(donor_hits,(void *) hit);
+ } else if (Substring_genomicend(acceptor) == left1) {
+ acceptor_hits = List_push(acceptor_hits,(void *) hit);
+ } else {
+ Stage3end_free(&hit);
+ }
+ }
+ }
+
+ if (donor_hits != NULL) {
+ hitarray = (Stage3end_T *) List_to_array_n(&n,donor_hits);
+ qsort(hitarray,n,sizeof(Stage3end_T),donor_match_length_cmp);
+ i = 0;
+ while (i < n) {
+ hit = hitarray[i];
+ donor = Stage3end_substring_donor(hit);
+ donor_length = Substring_match_length_orig(donor);
+ j = i + 1;
+ while (j < n && Substring_match_length_orig(Stage3end_substring_donor(hitarray[j])) == donor_length) {
+ j++;
+ }
+ if (j == i + 1) {
+ *singlesplicing = List_push(*singlesplicing,(void *) hit);
+ } else {
+ ambcoords = (Uintlist_T) NULL;
+ amb_knowni = (Intlist_T) NULL;
+ amb_nmismatches = (Intlist_T) NULL;
+
+ for (k = i; k < j; k++) {
+ acceptor = Stage3end_substring_acceptor(hitarray[k]);
#ifdef LARGE_GENOMES
- ambcoords = Uint8list_push(ambcoords,Substring_splicecoord(donor));
+ ambcoords = Uint8list_push(ambcoords,Substring_splicecoord(acceptor));
#else
- ambcoords = Uintlist_push(ambcoords,Substring_splicecoord(donor));
+ ambcoords = Uintlist_push(ambcoords,Substring_splicecoord(acceptor));
#endif
- amb_knowni = Intlist_push(amb_knowni,-1);
- amb_nmismatches = Intlist_push(amb_nmismatches,Substring_nmismatches_whole(donor));
- }
+ amb_knowni = Intlist_push(amb_knowni,-1);
+ amb_nmismatches = Intlist_push(amb_nmismatches,Substring_nmismatches_whole(acceptor));
+ }
- nmismatches_donor = best_nmismatches - Substring_nmismatches_whole(acceptor);
- *ambiguous = List_push(*ambiguous,
- (void *) Stage3end_new_splice(&(*found_score),
- nmismatches_donor,/*nmismatches_acceptor*/Substring_nmismatches_whole(acceptor),
- /*donor*/NULL,acceptor,/*distance*/0U,
- /*shortdistancep*/false,/*penalty*/0,querylength,/*amb_nmatches*/Substring_nmatches_posttrim(donor),
- ambcoords,/*ambcoords_acceptor*/NULL,
- amb_knowni,/*amb_knowni_acceptor*/NULL,
- amb_nmismatches,/*amb_nmismatches_acceptor*/NULL,
- /*copy_donor_p*/false,/*copy_acceptor_p*/true,first_read_p,
- sensedir,/*sarrayp*/true));
- Intlist_free(&amb_nmismatches);
- Intlist_free(&amb_knowni);
- Uintlist_free(&ambcoords); /* LARGE_GENOMES not possible with suffix array */
+ nmismatches_acceptor = best_nmismatches - Substring_nmismatches_whole(donor);
+ *ambiguous = List_push(*ambiguous,
+ (void *) Stage3end_new_splice(&(*found_score),
+ /*nmismatches_donor*/Substring_nmismatches_whole(donor),nmismatches_acceptor,
+ donor,/*acceptor*/NULL,/*distance*/0U,
+ /*shortdistancep*/false,/*penalty*/0,querylength,/*amb_nmatches*/Substring_match_length_orig(acceptor),
+ /*ambcoords_donor*/NULL,ambcoords,
+ /*amb_knowni_donor*/NULL,amb_knowni,
+ /*amb_nmismatches_donort*/NULL,amb_nmismatches,
+ /*copy_donor_p*/true,/*copy_acceptor_p*/false,first_read_p,
+ Stage3end_sensedir(hit),/*sarrayp*/true));
+ Intlist_free(&amb_nmismatches);
+ Intlist_free(&amb_knowni);
+ Uintlist_free(&ambcoords); /* LARGE_GENOMES not possible with suffix array */
+
+ for (k = i; k < j; k++) {
+ hit = hitarray[k];
+ Stage3end_free(&hit);
+ }
+ }
- } else {
- fprintf(stderr,"Unexpected: Neither donor left %u nor acceptor left %u equals left1 %u\n",
- Substring_left_genomicseg(donor),Substring_left_genomicseg(acceptor),left1);
- abort();
+ i = j;
+ }
+ FREE(hitarray);
+ List_free(&donor_hits);
}
- for (p = spliceends_sense; p != NULL; p = List_next(p)) {
- hit = (Stage3end_T) List_head(p);
- Stage3end_free(&hit);
+ if (acceptor_hits != NULL) {
+ hitarray = (Stage3end_T *) List_to_array_n(&n,acceptor_hits);
+ qsort(hitarray,n,sizeof(Stage3end_T),acceptor_match_length_cmp);
+ i = 0;
+ while (i < n) {
+ hit = hitarray[i];
+ acceptor = Stage3end_substring_acceptor(hit);
+ acceptor_length = Substring_match_length_orig(acceptor);
+ j = i + 1;
+ while (j < n && Substring_match_length_orig(Stage3end_substring_acceptor(hitarray[j])) == acceptor_length) {
+ j++;
+ }
+ if (j == i + 1) {
+ *singlesplicing = List_push(*singlesplicing,(void *) hit);
+ } else {
+ ambcoords = (Uintlist_T) NULL;
+ amb_knowni = (Intlist_T) NULL;
+ amb_nmismatches = (Intlist_T) NULL;
+
+ for (k = i; k < j; k++) {
+ donor = Stage3end_substring_donor(hitarray[k]);
+#ifdef LARGE_GENOMES
+ ambcoords = Uint8list_push(ambcoords,Substring_splicecoord(donor));
+#else
+ ambcoords = Uintlist_push(ambcoords,Substring_splicecoord(donor));
+#endif
+ amb_knowni = Intlist_push(amb_knowni,-1);
+ amb_nmismatches = Intlist_push(amb_nmismatches,Substring_nmismatches_whole(donor));
+ }
+
+ nmismatches_donor = best_nmismatches - Substring_nmismatches_whole(acceptor);
+ *ambiguous = List_push(*ambiguous,
+ (void *) Stage3end_new_splice(&(*found_score),
+ nmismatches_donor,/*nmismatches_acceptor*/Substring_nmismatches_whole(acceptor),
+ /*donor*/NULL,acceptor,/*distance*/0U,
+ /*shortdistancep*/false,/*penalty*/0,querylength,/*amb_nmatches*/Substring_match_length_orig(donor),
+ ambcoords,/*ambcoords_acceptor*/NULL,
+ amb_knowni,/*amb_knowni_acceptor*/NULL,
+ amb_nmismatches,/*amb_nmismatches_acceptor*/NULL,
+ /*copy_donor_p*/false,/*copy_acceptor_p*/true,first_read_p,
+ Stage3end_sensedir(hit),/*sarrayp*/true));
+ Intlist_free(&amb_nmismatches);
+ Intlist_free(&amb_knowni);
+ Uintlist_free(&ambcoords); /* LARGE_GENOMES not possible with suffix array */
+
+ for (k = i; k < j; k++) {
+ hit = hitarray[k];
+ Stage3end_free(&hit);
+ }
+ }
+
+ i = j;
+ }
+ FREE(hitarray);
+ List_free(&acceptor_hits);
}
+
List_free(&spliceends_sense);
}
}
@@ -3175,8 +3281,12 @@ collect_elt_matches (int *found_score, List_T *subs, List_T *indels, List_T *amb
best_prob = 0.0;
for (p = spliceends_antisense; p != NULL; p = List_next(p)) {
hit = (Stage3end_T) List_head(p);
- debug7(printf("analyzing distance %d, probabilities %f and %f\n",
- Stage3end_distance(hit),Substring_chimera_prob(Stage3end_substring_donor(hit)),
+ debug7(printf("analyzing distance %d, donor length %d (%llu..%llu) and acceptor length %d (%llu..%llu), probabilities %f and %f\n",
+ Stage3end_distance(hit),Substring_match_length_orig(Stage3end_substring_donor(hit)),
+ Substring_genomicstart(Stage3end_substring_donor(hit)),Substring_genomicend(Stage3end_substring_donor(hit)),
+ Substring_match_length_orig(Stage3end_substring_acceptor(hit)),
+ Substring_genomicstart(Stage3end_substring_acceptor(hit)),Substring_genomicend(Stage3end_substring_acceptor(hit)),
+ Substring_chimera_prob(Stage3end_substring_donor(hit)),
Substring_chimera_prob(Stage3end_substring_acceptor(hit))));
if ((prob = Stage3end_chimera_prob(hit)) > best_prob) {
best_prob = prob;
@@ -3187,8 +3297,10 @@ collect_elt_matches (int *found_score, List_T *subs, List_T *indels, List_T *amb
for (p = spliceends_antisense; p != NULL; p = List_next(p)) {
hit = (Stage3end_T) List_head(p);
if (Stage3end_chimera_prob(hit) > best_prob - LOCALSPLICING_SLOP) {
- debug7(printf("accepting distance %d, probabilities %f and %f\n",
- Stage3end_distance(hit),Substring_chimera_prob(Stage3end_substring_donor(hit)),
+ debug7(printf("accepting distance %d, donor length %d and acceptor length %d, probabilities %f and %f\n",
+ Stage3end_distance(hit),Substring_match_length_orig(Stage3end_substring_donor(hit)),
+ Substring_match_length_orig(Stage3end_substring_acceptor(hit)),
+ Substring_chimera_prob(Stage3end_substring_donor(hit)),
Substring_chimera_prob(Stage3end_substring_acceptor(hit))));
n_good_spliceends += 1;
}
@@ -3198,8 +3310,10 @@ collect_elt_matches (int *found_score, List_T *subs, List_T *indels, List_T *amb
for (p = spliceends_antisense; p != NULL; p = List_next(p)) {
hit = (Stage3end_T) List_head(p);
if (Stage3end_chimera_prob(hit) == best_prob) {
- debug7(printf("pushing distance %d, probabilities %f and %f\n",
- Stage3end_distance(hit),Substring_chimera_prob(Stage3end_substring_donor(hit)),
+ debug7(printf("pushing distance %d, donor length %d and acceptor length %d, probabilities %f and %f\n",
+ Stage3end_distance(hit),Substring_match_length_orig(Stage3end_substring_donor(hit)),
+ Substring_match_length_orig(Stage3end_substring_acceptor(hit)),
+ Substring_chimera_prob(Stage3end_substring_donor(hit)),
Substring_chimera_prob(Stage3end_substring_acceptor(hit))));
*singlesplicing = List_push(*singlesplicing,(void *) hit);
nhits += 1;
@@ -3210,82 +3324,151 @@ collect_elt_matches (int *found_score, List_T *subs, List_T *indels, List_T *amb
List_free(&spliceends_antisense);
} else {
- /* Create ambiguous, antisense */
- hit = (Stage3end_T) List_head(spliceends_antisense);
- donor = Stage3end_substring_donor(hit);
- acceptor = Stage3end_substring_acceptor(hit);
- sensedir = Stage3end_sensedir(hit);
-
- ambcoords = (Uintlist_T) NULL;
- amb_knowni = (Intlist_T) NULL;
- amb_nmismatches = (Intlist_T) NULL;
-
- if (Substring_left_genomicseg(donor) == left1) {
+ /* 2. Multiple hits, antisense, left1 */
+ debug7(printf("multiple hits with best prob, antisense\n"));
+ donor_hits = acceptor_hits = (List_T) NULL;
+ if (plusp == true) {
for (p = spliceends_antisense; p != NULL; p = List_next(p)) {
hit = (Stage3end_T) List_head(p);
+ donor = Stage3end_substring_donor(hit);
acceptor = Stage3end_substring_acceptor(hit);
-#ifdef LARGE_GENOMES
- ambcoords = Uint8list_push(ambcoords,Substring_splicecoord(acceptor));
-#else
- ambcoords = Uintlist_push(ambcoords,Substring_splicecoord(acceptor));
-#endif
- amb_knowni = Intlist_push(amb_knowni,-1);
- amb_nmismatches = Intlist_push(amb_nmismatches,Substring_nmismatches_whole(acceptor));
+ if (Substring_genomicstart(donor) == left1) {
+ donor_hits = List_push(donor_hits,(void *) hit);
+ } else if (Substring_genomicstart(acceptor) == left1) {
+ acceptor_hits = List_push(acceptor_hits,(void *) hit);
+ } else {
+ Stage3end_free(&hit);
+ }
}
-
- nmismatches_acceptor = best_nmismatches - Substring_nmismatches_whole(donor);
- *ambiguous = List_push(*ambiguous,
- (void *) Stage3end_new_splice(&(*found_score),
- /*nmismatches_donor*/Substring_nmismatches_whole(donor),nmismatches_acceptor,
- donor,/*acceptor*/NULL,/*distance*/0U,
- /*shortdistancep*/false,/*penalty*/0,querylength,/*amb_nmatches*/Substring_nmatches_posttrim(acceptor),
- /*ambcoords_donor*/NULL,ambcoords,
- /*amb_knowni_donor*/NULL,amb_knowni,
- /*amb_nmismatches_donor*/NULL,amb_nmismatches,
- /*copy_donor_p*/true,/*copy_acceptor_p*/false,first_read_p,
- sensedir,/*sarrayp*/true));
- Intlist_free(&amb_nmismatches);
- Intlist_free(&amb_knowni);
- Uintlist_free(&ambcoords); /* LARGE_GENOMES not possible with suffix array */
-
- } else if (Substring_left_genomicseg(acceptor) == left1) {
+ } else {
for (p = spliceends_antisense; p != NULL; p = List_next(p)) {
hit = (Stage3end_T) List_head(p);
donor = Stage3end_substring_donor(hit);
+ acceptor = Stage3end_substring_acceptor(hit);
+ if (Substring_genomicend(donor) == left1) {
+ donor_hits = List_push(donor_hits,(void *) hit);
+ } else if (Substring_genomicend(acceptor) == left1) {
+ acceptor_hits = List_push(acceptor_hits,(void *) hit);
+ } else {
+ Stage3end_free(&hit);
+ }
+ }
+ }
+
+ if (donor_hits != NULL) {
+ hitarray = (Stage3end_T *) List_to_array_n(&n,donor_hits);
+ qsort(hitarray,n,sizeof(Stage3end_T),donor_match_length_cmp);
+ i = 0;
+ while (i < n) {
+ hit = hitarray[i];
+ donor = Stage3end_substring_donor(hit);
+ donor_length = Substring_match_length_orig(donor);
+ j = i + 1;
+ while (j < n && Substring_match_length_orig(Stage3end_substring_donor(hitarray[j])) == donor_length) {
+ j++;
+ }
+ if (j == i + 1) {
+ *singlesplicing = List_push(*singlesplicing,(void *) hit);
+ } else {
+ ambcoords = (Uintlist_T) NULL;
+ amb_knowni = (Intlist_T) NULL;
+ amb_nmismatches = (Intlist_T) NULL;
+
+ for (k = i; k < j; k++) {
+ acceptor = Stage3end_substring_acceptor(hitarray[k]);
#ifdef LARGE_GENOMES
- ambcoords = Uint8list_push(ambcoords,Substring_splicecoord(donor));
+ ambcoords = Uint8list_push(ambcoords,Substring_splicecoord(acceptor));
#else
- ambcoords = Uintlist_push(ambcoords,Substring_splicecoord(donor));
+ ambcoords = Uintlist_push(ambcoords,Substring_splicecoord(acceptor));
#endif
- amb_knowni = Intlist_push(amb_knowni,-1);
- amb_nmismatches = Intlist_push(amb_nmismatches,Substring_nmismatches_whole(donor));
+ amb_knowni = Intlist_push(amb_knowni,-1);
+ amb_nmismatches = Intlist_push(amb_nmismatches,Substring_nmismatches_whole(acceptor));
+ }
+
+ nmismatches_acceptor = best_nmismatches - Substring_nmismatches_whole(donor);
+ *ambiguous = List_push(*ambiguous,
+ (void *) Stage3end_new_splice(&(*found_score),
+ /*nmismatches_donor*/Substring_nmismatches_whole(donor),nmismatches_acceptor,
+ donor,/*acceptor*/NULL,/*distance*/0U,
+ /*shortdistancep*/false,/*penalty*/0,querylength,/*amb_nmatches*/Substring_match_length_orig(acceptor),
+ /*ambcoords_donor*/NULL,ambcoords,
+ /*amb_knowni_donor*/NULL,amb_knowni,
+ /*amb_nmismatches_donort*/NULL,amb_nmismatches,
+ /*copy_donor_p*/true,/*copy_acceptor_p*/false,first_read_p,
+ Stage3end_sensedir(hit),/*sarrayp*/true));
+ Intlist_free(&amb_nmismatches);
+ Intlist_free(&amb_knowni);
+ Uintlist_free(&ambcoords); /* LARGE_GENOMES not possible with suffix array */
+
+ for (k = i; k < j; k++) {
+ hit = hitarray[k];
+ Stage3end_free(&hit);
+ }
+ }
+
+ i = j;
}
+ FREE(hitarray);
+ List_free(&donor_hits);
+ }
- nmismatches_donor = best_nmismatches - Substring_nmismatches_whole(acceptor);
- *ambiguous = List_push(*ambiguous,
- (void *) Stage3end_new_splice(&(*found_score),
- nmismatches_donor,/*nmismatches_acceptor*/Substring_nmismatches_whole(acceptor),
- /*donor*/NULL,acceptor,/*distance*/0U,
- /*shortdistancep*/false,/*penalty*/0,querylength,/*amb_nmatches*/Substring_nmatches_posttrim(donor),
- ambcoords,/*ambcoords_acceptor*/NULL,
- amb_knowni,/*amb_knowni_acceptor*/NULL,
- amb_nmismatches,/*amb_nmismatches_acceptor*/NULL,
- /*copy_donor_p*/false,/*copy_acceptor_p*/true,first_read_p,
- sensedir,/*sarrayp*/true));
- Intlist_free(&amb_nmismatches);
- Intlist_free(&amb_knowni);
- Uintlist_free(&ambcoords); /* LARGE_GENOMES not possible with suffix array */
+ if (acceptor_hits != NULL) {
+ hitarray = (Stage3end_T *) List_to_array_n(&n,acceptor_hits);
+ qsort(hitarray,n,sizeof(Stage3end_T),acceptor_match_length_cmp);
+ i = 0;
+ while (i < n) {
+ hit = hitarray[i];
+ acceptor = Stage3end_substring_acceptor(hit);
+ acceptor_length = Substring_match_length_orig(acceptor);
+ j = i + 1;
+ while (j < n && Substring_match_length_orig(Stage3end_substring_acceptor(hitarray[j])) == acceptor_length) {
+ j++;
+ }
+ if (j == i + 1) {
+ *singlesplicing = List_push(*singlesplicing,(void *) hit);
+ } else {
+ ambcoords = (Uintlist_T) NULL;
+ amb_knowni = (Intlist_T) NULL;
+ amb_nmismatches = (Intlist_T) NULL;
- } else {
- fprintf(stderr,"Unexpected: Neither donor left %u nor acceptor left %u equals left1 %u\n",
- Substring_left_genomicseg(donor),Substring_left_genomicseg(acceptor),left1);
- abort();
- }
+ for (k = i; k < j; k++) {
+ donor = Stage3end_substring_donor(hitarray[k]);
+#ifdef LARGE_GENOMES
+ ambcoords = Uint8list_push(ambcoords,Substring_splicecoord(donor));
+#else
+ ambcoords = Uintlist_push(ambcoords,Substring_splicecoord(donor));
+#endif
+ amb_knowni = Intlist_push(amb_knowni,-1);
+ amb_nmismatches = Intlist_push(amb_nmismatches,Substring_nmismatches_whole(donor));
+ }
+
+ nmismatches_donor = best_nmismatches - Substring_nmismatches_whole(acceptor);
+ *ambiguous = List_push(*ambiguous,
+ (void *) Stage3end_new_splice(&(*found_score),
+ nmismatches_donor,/*nmismatches_acceptor*/Substring_nmismatches_whole(acceptor),
+ /*donor*/NULL,acceptor,/*distance*/0U,
+ /*shortdistancep*/false,/*penalty*/0,querylength,/*amb_nmatches*/Substring_match_length_orig(donor),
+ ambcoords,/*ambcoords_acceptor*/NULL,
+ amb_knowni,/*amb_knowni_acceptor*/NULL,
+ amb_nmismatches,/*amb_nmismatches_acceptor*/NULL,
+ /*copy_donor_p*/false,/*copy_acceptor_p*/true,first_read_p,
+ Stage3end_sensedir(hit),/*sarrayp*/true));
+ Intlist_free(&amb_nmismatches);
+ Intlist_free(&amb_knowni);
+ Uintlist_free(&ambcoords); /* LARGE_GENOMES not possible with suffix array */
+
+ for (k = i; k < j; k++) {
+ hit = hitarray[k];
+ Stage3end_free(&hit);
+ }
+ }
- for (p = spliceends_antisense; p != NULL; p = List_next(p)) {
- hit = (Stage3end_T) List_head(p);
- Stage3end_free(&hit);
+ i = j;
+ }
+ FREE(hitarray);
+ List_free(&acceptor_hits);
}
+
List_free(&spliceends_antisense);
}
}
@@ -3525,82 +3708,152 @@ collect_elt_matches (int *found_score, List_T *subs, List_T *indels, List_T *amb
List_free(&spliceends_sense);
} else {
- /* Create ambiguous, sense */
- hit = (Stage3end_T) List_head(spliceends_sense);
- donor = Stage3end_substring_donor(hit);
- acceptor = Stage3end_substring_acceptor(hit);
- sensedir = Stage3end_sensedir(hit);
-
- ambcoords = (Uintlist_T) NULL;
- amb_knowni = (Intlist_T) NULL;
- amb_nmismatches = (Intlist_T) NULL;
-
- if (Substring_left_genomicseg(donor) == left2) {
+ /* 3. Multiple hits, sense, left2 */
+ debug7(printf("multiple hits with best prob, sense\n"));
+ donor_hits = acceptor_hits = (List_T) NULL;
+ if (plusp == true) {
for (p = spliceends_sense; p != NULL; p = List_next(p)) {
hit = (Stage3end_T) List_head(p);
+ donor = Stage3end_substring_donor(hit);
acceptor = Stage3end_substring_acceptor(hit);
-#ifdef LARGE_GENOMES
- ambcoords = Uint8list_push(ambcoords,Substring_splicecoord(acceptor));
-#else
- ambcoords = Uintlist_push(ambcoords,Substring_splicecoord(acceptor));
-#endif
- amb_knowni = Intlist_push(amb_knowni,-1);
- amb_nmismatches = Intlist_push(amb_nmismatches,Substring_nmismatches_whole(acceptor));
+ if (Substring_genomicstart(donor) == left2) {
+ donor_hits = List_push(donor_hits,(void *) hit);
+ } else if (Substring_genomicstart(acceptor) == left1) {
+ acceptor_hits = List_push(acceptor_hits,(void *) hit);
+ } else {
+ Stage3end_free(&hit);
+ }
}
-
- nmismatches_acceptor = best_nmismatches - Substring_nmismatches_whole(donor);
- *ambiguous = List_push(*ambiguous,
- (void *) Stage3end_new_splice(&(*found_score),
- /*nmismatches_donor*/Substring_nmismatches_whole(donor),nmismatches_acceptor,
- donor,/*acceptor*/NULL,/*distance*/0U,
- /*shortdistancep*/false,/*penalty*/0,querylength,/*amb_nmatches*/Substring_nmatches_posttrim(acceptor),
- /*ambcoords_donor*/NULL,ambcoords,
- /*amb_knowni_donor*/NULL,amb_knowni,
- /*amb_nmismatches_donor*/NULL,amb_nmismatches,
- /*copy_donor_p*/true,/*copy_acceptor_p*/false,first_read_p,
- sensedir,/*sarrayp*/true));
- Intlist_free(&amb_nmismatches);
- Intlist_free(&amb_knowni);
- Uintlist_free(&ambcoords); /* LARGE_GENOMES not possible with suffix array */
-
- } else if (Substring_left_genomicseg(acceptor) == left2) {
+ } else {
for (p = spliceends_sense; p != NULL; p = List_next(p)) {
hit = (Stage3end_T) List_head(p);
donor = Stage3end_substring_donor(hit);
+ acceptor = Stage3end_substring_acceptor(hit);
+ if (Substring_genomicend(donor) == left2) {
+ donor_hits = List_push(donor_hits,(void *) hit);
+ } else if (Substring_genomicend(acceptor) == left1) {
+ acceptor_hits = List_push(acceptor_hits,(void *) hit);
+ } else {
+ Stage3end_free(&hit);
+ }
+ }
+ }
+
+ if (donor_hits != NULL) {
+ hitarray = (Stage3end_T *) List_to_array_n(&n,donor_hits);
+ qsort(hitarray,n,sizeof(Stage3end_T),donor_match_length_cmp);
+ i = 0;
+ while (i < n) {
+ hit = hitarray[i];
+ donor = Stage3end_substring_donor(hit);
+ donor_length = Substring_match_length_orig(donor);
+ j = i + 1;
+ while (j < n && Substring_match_length_orig(Stage3end_substring_donor(hitarray[j])) == donor_length) {
+ j++;
+ }
+ if (j == i + 1) {
+ *singlesplicing = List_push(*singlesplicing,(void *) hit);
+ } else {
+ ambcoords = (Uintlist_T) NULL;
+ amb_knowni = (Intlist_T) NULL;
+ amb_nmismatches = (Intlist_T) NULL;
+
+ for (k = i; k < j; k++) {
+ acceptor = Stage3end_substring_acceptor(hitarray[k]);
#ifdef LARGE_GENOMES
- ambcoords = Uint8list_push(ambcoords,Substring_splicecoord(donor));
+ ambcoords = Uint8list_push(ambcoords,Substring_splicecoord(acceptor));
#else
- ambcoords = Uintlist_push(ambcoords,Substring_splicecoord(donor));
+ ambcoords = Uintlist_push(ambcoords,Substring_splicecoord(acceptor));
#endif
- amb_knowni = Intlist_push(amb_knowni,-1);
- amb_nmismatches = Intlist_push(amb_nmismatches,Substring_nmismatches_whole(donor));
- }
+ amb_knowni = Intlist_push(amb_knowni,-1);
+ amb_nmismatches = Intlist_push(amb_nmismatches,Substring_nmismatches_whole(acceptor));
+ }
- nmismatches_donor = best_nmismatches - Substring_nmismatches_whole(acceptor);
- *ambiguous = List_push(*ambiguous,
- (void *) Stage3end_new_splice(&(*found_score),
- nmismatches_donor,/*nmismatches_acceptor*/Substring_nmismatches_whole(acceptor),
- /*donor*/NULL,acceptor,/*distance*/0U,
- /*shortdistancep*/false,/*penalty*/0,querylength,/*amb_nmatches*/Substring_nmatches_posttrim(donor),
- ambcoords,/*ambcoords_acceptor*/NULL,
- amb_knowni,/*amb_known_acceptor*/NULL,
- amb_nmismatches,/*amb_nmismatches_acceptor*/NULL,
- /*copy_donor_p*/false,/*copy_acceptor_p*/true,first_read_p,
- sensedir,/*sarrayp*/true));
- Intlist_free(&amb_nmismatches);
- Intlist_free(&amb_knowni);
- Uintlist_free(&ambcoords); /* LARGE_GENOMES not possible with suffix array */
+ nmismatches_acceptor = best_nmismatches - Substring_nmismatches_whole(donor);
+ *ambiguous = List_push(*ambiguous,
+ (void *) Stage3end_new_splice(&(*found_score),
+ /*nmismatches_donor*/Substring_nmismatches_whole(donor),nmismatches_acceptor,
+ donor,/*acceptor*/NULL,/*distance*/0U,
+ /*shortdistancep*/false,/*penalty*/0,querylength,/*amb_nmatches*/Substring_match_length_orig(acceptor),
+ /*ambcoords_donor*/NULL,ambcoords,
+ /*amb_knowni_donor*/NULL,amb_knowni,
+ /*amb_nmismatches_donor*/NULL,amb_nmismatches,
+ /*copy_donor_p*/true,/*copy_acceptor_p*/false,first_read_p,
+ Stage3end_sensedir(hit),/*sarrayp*/true));
+ Intlist_free(&amb_nmismatches);
+ Intlist_free(&amb_knowni);
+ Uintlist_free(&ambcoords); /* LARGE_GENOMES not possible with suffix array */
+
+
+ for (k = i; k < j; k++) {
+ hit = hitarray[k];
+ Stage3end_free(&hit);
+ }
+ }
- } else {
- fprintf(stderr,"Unexpected: Neither donor left %u nor acceptor left %u equals left2 %u\n",
- Substring_left_genomicseg(donor),Substring_left_genomicseg(acceptor),left2);
- abort();
+ i = j;
+ }
+ FREE(hitarray);
+ List_free(&donor_hits);
}
- for (p = spliceends_sense; p != NULL; p = List_next(p)) {
- hit = (Stage3end_T) List_head(p);
- Stage3end_free(&hit);
+ if (acceptor_hits != NULL) {
+ hitarray = (Stage3end_T *) List_to_array_n(&n,acceptor_hits);
+ qsort(hitarray,n,sizeof(Stage3end_T),acceptor_match_length_cmp);
+ i = 0;
+ while (i < n) {
+ hit = hitarray[i];
+ acceptor = Stage3end_substring_acceptor(hit);
+ acceptor_length = Substring_match_length_orig(acceptor);
+ j = i + 1;
+ while (j < n && Substring_match_length_orig(Stage3end_substring_acceptor(hitarray[j])) == acceptor_length) {
+ j++;
+ }
+ if (j == i + 1) {
+ *singlesplicing = List_push(*singlesplicing,(void *) hit);
+ } else {
+ ambcoords = (Uintlist_T) NULL;
+ amb_knowni = (Intlist_T) NULL;
+ amb_nmismatches = (Intlist_T) NULL;
+
+ for (k = i; k < j; k++) {
+ donor = Stage3end_substring_donor(hitarray[k]);
+#ifdef LARGE_GENOMES
+ ambcoords = Uint8list_push(ambcoords,Substring_splicecoord(donor));
+#else
+ ambcoords = Uintlist_push(ambcoords,Substring_splicecoord(donor));
+#endif
+ amb_knowni = Intlist_push(amb_knowni,-1);
+ amb_nmismatches = Intlist_push(amb_nmismatches,Substring_nmismatches_whole(donor));
+ }
+
+ nmismatches_donor = best_nmismatches - Substring_nmismatches_whole(acceptor);
+ *ambiguous = List_push(*ambiguous,
+ (void *) Stage3end_new_splice(&(*found_score),
+ nmismatches_donor,/*nmismatches_acceptor*/Substring_nmismatches_whole(acceptor),
+ /*donor*/NULL,acceptor,/*distance*/0U,
+ /*shortdistancep*/false,/*penalty*/0,querylength,/*amb_nmatches*/Substring_match_length_orig(donor),
+ ambcoords,/*ambcoords_acceptor*/NULL,
+ amb_knowni,/*amb_knowni_acceptor*/NULL,
+ amb_nmismatches,/*amb_nmismatches_acceptor*/NULL,
+ /*copy_donor_p*/false,/*copy_acceptor_p*/true,first_read_p,
+ Stage3end_sensedir(hit),/*sarrayp*/true));
+ Intlist_free(&amb_nmismatches);
+ Intlist_free(&amb_knowni);
+ Uintlist_free(&ambcoords); /* LARGE_GENOMES not possible with suffix array */
+
+ for (k = i; k < j; k++) {
+ hit = hitarray[k];
+ Stage3end_free(&hit);
+ }
+ }
+
+ i = j;
+ }
+ FREE(hitarray);
+ List_free(&acceptor_hits);
}
+
List_free(&spliceends_sense);
}
}
@@ -3648,82 +3901,151 @@ collect_elt_matches (int *found_score, List_T *subs, List_T *indels, List_T *amb
List_free(&spliceends_antisense);
} else {
- /* Create ambiguous, antisense */
- hit = (Stage3end_T) List_head(spliceends_antisense);
- donor = Stage3end_substring_donor(hit);
- acceptor = Stage3end_substring_acceptor(hit);
- sensedir = Stage3end_sensedir(hit);
-
- ambcoords = (Uintlist_T) NULL;
- amb_knowni = (Intlist_T) NULL;
- amb_nmismatches = (Intlist_T) NULL;
-
- if (Substring_left_genomicseg(donor) == left2) {
+ /* 4. Multiple hits, antisense, left2 */
+ debug7(printf("multiple hits with best prob, antisense\n"));
+ donor_hits = acceptor_hits = (List_T) NULL;
+ if (plusp == true) {
for (p = spliceends_antisense; p != NULL; p = List_next(p)) {
hit = (Stage3end_T) List_head(p);
+ donor = Stage3end_substring_donor(hit);
acceptor = Stage3end_substring_acceptor(hit);
-#ifdef LARGE_GENOMES
- ambcoords = Uint8list_push(ambcoords,Substring_splicecoord(acceptor));
-#else
- ambcoords = Uintlist_push(ambcoords,Substring_splicecoord(acceptor));
-#endif
- amb_knowni = Intlist_push(amb_knowni,-1);
- amb_nmismatches = Intlist_push(amb_nmismatches,Substring_nmismatches_whole(acceptor));
+ if (Substring_genomicstart(donor) == left2) {
+ donor_hits = List_push(donor_hits,(void *) hit);
+ } else if (Substring_genomicstart(acceptor) == left2) {
+ acceptor_hits = List_push(acceptor_hits,(void *) hit);
+ } else {
+ Stage3end_free(&hit);
+ }
}
-
- nmismatches_acceptor = best_nmismatches - Substring_nmismatches_whole(donor);
- *ambiguous = List_push(*ambiguous,
- (void *) Stage3end_new_splice(&(*found_score),
- /*nmismatches_donor*/Substring_nmismatches_whole(donor),nmismatches_acceptor,
- donor,/*acceptor*/NULL,/*distance*/0U,
- /*shortdistancep*/false,/*penalty*/0,querylength,/*amb_nmatches*/Substring_nmatches_posttrim(acceptor),
- /*ambcoords_donor*/NULL,ambcoords,
- /*amb_knowni_donor*/NULL,amb_knowni,
- /*amb_nmismatches_donor*/NULL,amb_nmismatches,
- /*copy_donor_p*/true,/*copy_acceptor_p*/false,first_read_p,
- sensedir,/*sarrayp*/true));
- Intlist_free(&amb_nmismatches);
- Intlist_free(&amb_knowni);
- Uintlist_free(&ambcoords); /* LARGE_GENOMES not possible with suffix array */
-
- } else if (Substring_left_genomicseg(acceptor) == left2) {
+ } else {
for (p = spliceends_antisense; p != NULL; p = List_next(p)) {
hit = (Stage3end_T) List_head(p);
donor = Stage3end_substring_donor(hit);
+ acceptor = Stage3end_substring_acceptor(hit);
+ if (Substring_genomicend(donor) == left2) {
+ donor_hits = List_push(donor_hits,(void *) hit);
+ } else if (Substring_genomicend(acceptor) == left2) {
+ acceptor_hits = List_push(acceptor_hits,(void *) hit);
+ } else {
+ Stage3end_free(&hit);
+ }
+ }
+ }
+
+ if (donor_hits != NULL) {
+ hitarray = (Stage3end_T *) List_to_array_n(&n,donor_hits);
+ qsort(hitarray,n,sizeof(Stage3end_T),donor_match_length_cmp);
+ i = 0;
+ while (i < n) {
+ hit = hitarray[i];
+ donor = Stage3end_substring_donor(hit);
+ donor_length = Substring_match_length_orig(donor);
+ j = i + 1;
+ while (j < n && Substring_match_length_orig(Stage3end_substring_donor(hitarray[j])) == donor_length) {
+ j++;
+ }
+ if (j == i + 1) {
+ *singlesplicing = List_push(*singlesplicing,(void *) hit);
+ } else {
+ ambcoords = (Uintlist_T) NULL;
+ amb_knowni = (Intlist_T) NULL;
+ amb_nmismatches = (Intlist_T) NULL;
+
+ for (k = i; i < j; k++) {
+ acceptor = Stage3end_substring_acceptor(hitarray[k]);
#ifdef LARGE_GENOMES
- ambcoords = Uint8list_push(ambcoords,Substring_splicecoord(donor));
+ ambcoords = Uint8list_push(ambcoords,Substring_splicecoord(acceptor));
#else
- ambcoords = Uintlist_push(ambcoords,Substring_splicecoord(donor));
+ ambcoords = Uintlist_push(ambcoords,Substring_splicecoord(acceptor));
#endif
- amb_knowni = Intlist_push(amb_knowni,-1);
- amb_nmismatches = Intlist_push(amb_nmismatches,Substring_nmismatches_whole(donor));
- }
+ amb_knowni = Intlist_push(amb_knowni,-1);
+ amb_nmismatches = Intlist_push(amb_nmismatches,Substring_nmismatches_whole(acceptor));
+ }
- nmismatches_donor = best_nmismatches - Substring_nmismatches_whole(acceptor);
- *ambiguous = List_push(*ambiguous,
- (void *) Stage3end_new_splice(&(*found_score),
- nmismatches_donor,/*nmismatches_acceptor*/Substring_nmismatches_whole(acceptor),
- /*donor*/NULL,acceptor,/*distance*/0U,
- /*shortdistancep*/false,/*penalty*/0,querylength,/*amb_nmatches*/Substring_nmatches_posttrim(donor),
- ambcoords,/*ambcoords_acceptor*/NULL,
- amb_knowni,/*amb_known_acceptor*/NULL,
- amb_nmismatches,/*amb_nmismatches_acceptor*/NULL,
- /*copy_donor_p*/false,/*copy_acceptor_p*/true,first_read_p,
- sensedir,/*sarrayp*/true));
- Intlist_free(&amb_nmismatches);
- Intlist_free(&amb_knowni);
- Uintlist_free(&ambcoords); /* LARGE_GENOMES not possible with suffix array */
+ nmismatches_acceptor = best_nmismatches - Substring_nmismatches_whole(donor);
+ *ambiguous = List_push(*ambiguous,
+ (void *) Stage3end_new_splice(&(*found_score),
+ /*nmismatches_donor*/Substring_nmismatches_whole(donor),nmismatches_acceptor,
+ donor,/*acceptor*/NULL,/*distance*/0U,
+ /*shortdistancep*/false,/*penalty*/0,querylength,/*amb_nmatches*/Substring_match_length_orig(acceptor),
+ /*ambcoords_donor*/NULL,ambcoords,
+ /*amb_knowni_donor*/NULL,amb_knowni,
+ /*amb_nmismatches_donor*/NULL,amb_nmismatches,
+ /*copy_donor_p*/true,/*copy_acceptor_p*/false,first_read_p,
+ Stage3end_sensedir(hit),/*sarrayp*/true));
+ Intlist_free(&amb_nmismatches);
+ Intlist_free(&amb_knowni);
+ Uintlist_free(&ambcoords); /* LARGE_GENOMES not possible with suffix array */
+
+ for (k = i; k < j; k++) {
+ hit = hitarray[k];
+ Stage3end_free(&hit);
+ }
+ }
- } else {
- fprintf(stderr,"Unexpected: Neither donor left %u nor acceptor left %u equals left2 %u\n",
- Substring_left_genomicseg(donor),Substring_left_genomicseg(acceptor),left2);
- abort();
+ i = j;
+ }
+ FREE(hitarray);
+ List_free(&donor_hits);
}
- for (p = spliceends_antisense; p != NULL; p = List_next(p)) {
- hit = (Stage3end_T) List_head(p);
- Stage3end_free(&hit);
+ if (acceptor_hits != NULL) {
+ hitarray = (Stage3end_T *) List_to_array_n(&n,acceptor_hits);
+ qsort(hitarray,n,sizeof(Stage3end_T),acceptor_match_length_cmp);
+ i = 0;
+ while (i < n) {
+ hit = hitarray[i];
+ acceptor = Stage3end_substring_acceptor(hit);
+ acceptor_length = Substring_match_length_orig(acceptor);
+ j = i + 1;
+ while (j < n && Substring_match_length_orig(Stage3end_substring_acceptor(hitarray[j])) == acceptor_length) {
+ j++;
+ }
+ if (j == i + 1) {
+ *singlesplicing = List_push(*singlesplicing,(void *) hit);
+ } else {
+ ambcoords = (Uintlist_T) NULL;
+ amb_knowni = (Intlist_T) NULL;
+ amb_nmismatches = (Intlist_T) NULL;
+
+ for (k = i; k < j; k++) {
+ donor = Stage3end_substring_donor(hitarray[k]);
+#ifdef LARGE_GENOMES
+ ambcoords = Uint8list_push(ambcoords,Substring_splicecoord(donor));
+#else
+ ambcoords = Uintlist_push(ambcoords,Substring_splicecoord(donor));
+#endif
+ amb_knowni = Intlist_push(amb_knowni,-1);
+ amb_nmismatches = Intlist_push(amb_nmismatches,Substring_nmismatches_whole(donor));
+ }
+
+ nmismatches_donor = best_nmismatches - Substring_nmismatches_whole(acceptor);
+ *ambiguous = List_push(*ambiguous,
+ (void *) Stage3end_new_splice(&(*found_score),
+ nmismatches_donor,/*nmismatches_acceptor*/Substring_nmismatches_whole(acceptor),
+ /*donor*/NULL,acceptor,/*distance*/0U,
+ /*shortdistancep*/false,/*penalty*/0,querylength,/*amb_nmatches*/Substring_match_length_orig(donor),
+ ambcoords,/*ambcoords_acceptor*/NULL,
+ amb_knowni,/*amb_knowni_acceptor*/NULL,
+ amb_nmismatches,/*amb_nmismatches_acceptor*/NULL,
+ /*copy_donor_p*/false,/*copy_acceptor_p*/true,first_read_p,
+ Stage3end_sensedir(hit),/*sarrayp*/true));
+ Intlist_free(&amb_nmismatches);
+ Intlist_free(&amb_knowni);
+ Uintlist_free(&ambcoords); /* LARGE_GENOMES not possible with suffix array */
+
+ for (k = i; k < j; k++) {
+ hit = hitarray[k];
+ Stage3end_free(&hit);
+ }
+ }
+
+ i = j;
+ }
+ FREE(hitarray);
+ List_free(&acceptor_hits);
}
+
List_free(&spliceends_antisense);
}
}
diff --git a/src/snpindex.c b/src/snpindex.c
index efe99f8..8734d46 100644
--- a/src/snpindex.c
+++ b/src/snpindex.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: snpindex.c 133555 2014-04-17 23:06:39Z twu $";
+static char rcsid[] = "$Id: snpindex.c 153955 2014-11-24 17:54:45Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -504,7 +504,7 @@ process_snp_block (int *nwarnings, Positionsptr_T *offsets, UINT4 *positions4, U
}
if (nsnps == 0) {
/* no snps */
- /* fprintf(stderr,"\nNo snps at position %lu, %s:%u",position,divstring,Interval_low(interval)); */
+ /* fprintf(stderr,"\nNo snps at position %llu, %s:%u",(unsigned long long) position,divstring,Interval_low(interval)); */
} else if (nsnps > 4) {
/* too many snps */
} else if (badcharp == true) {
@@ -551,8 +551,8 @@ process_snp_block (int *nwarnings, Positionsptr_T *offsets, UINT4 *positions4, U
oligo = Uintlist_head(p);
nt = shortoligo_nt(oligo,index1part);
if (samep(nt,&(refstring[starti]),index1part) == true) {
- fprintf(stderr,"Storing oligomer %s that is the same as the reference at %lu (%s:%u)\n",
- nt,position,divstring,chrpos+1U);
+ fprintf(stderr,"Storing oligomer %s that is the same as the reference at %llu (%s:%u)\n",
+ nt,(unsigned long long) position,divstring,chrpos+1U);
abort();
}
FREE(nt);
@@ -566,7 +566,7 @@ process_snp_block (int *nwarnings, Positionsptr_T *offsets, UINT4 *positions4, U
oligo = Uintlist_head(p);
offsets[oligo + 1U] += 1;
debug1(nt = shortoligo_nt(oligo,index1part);
- printf("Storing %s at %lu (%s:%u)\n",nt,position,divstring,chrpos+1U);
+ printf("Storing %s at %llu (%s:%u)\n",nt,(unsigned long long) position,divstring,chrpos+1U);
FREE(nt));
}
@@ -577,7 +577,7 @@ process_snp_block (int *nwarnings, Positionsptr_T *offsets, UINT4 *positions4, U
oligo = Uintlist_head(p);
positions8[offsets[oligo]++] = position;
debug1(nt = shortoligo_nt(oligo,index1part);
- printf("Storing %s at %lu (%s:%u)\n",nt,position,divstring,chrpos+1U);
+ printf("Storing %s at %llu (%s:%u)\n",nt,(unsigned long long) position,divstring,chrpos+1U);
FREE(nt));
}
@@ -586,7 +586,7 @@ process_snp_block (int *nwarnings, Positionsptr_T *offsets, UINT4 *positions4, U
oligo = Uintlist_head(p);
positions4[offsets[oligo]++] = (UINT4) position;
debug1(nt = shortoligo_nt(oligo,index1part);
- printf("Storing %s at %lu (%s:%u)\n",nt,position,divstring,chrpos+1U);
+ printf("Storing %s at %llu (%s:%u)\n",nt,(unsigned long long) position,divstring,chrpos+1U);
FREE(nt));
}
}
@@ -896,7 +896,7 @@ merge_positions8 (FILE *positions_high_fp, FILE *positions_low_fp,
ptr2++;
} else {
nt = shortoligo_nt(oligo,index1part);
- fprintf(stderr,"Problem: saw duplicate positions %lu in oligo %s\n",*ptr1,nt);
+ fprintf(stderr,"Problem: saw duplicate positions %llu in oligo %s\n",(unsigned long long) *ptr1,nt);
FREE(nt);
abort();
/*
@@ -1158,7 +1158,8 @@ main (int argc, char *argv[]) {
/* Copy genome */
nblocks = Genome_totallength(genome)/32U;
snp_blocks = (Genomecomp_T *) CALLOC(nblocks*3,sizeof(Genomecomp_T));
- fprintf(stderr,"Allocating %lu*3*%lu bytes for compressed genome\n",nblocks,sizeof(Genomecomp_T));
+ fprintf(stderr,"Allocating %llu*3*%d bytes for compressed genome\n",
+ (unsigned long long) nblocks,(int) sizeof(Genomecomp_T));
memcpy(snp_blocks,Genome_blocks(genome),nblocks*3*sizeof(Genomecomp_T));
/* Prepare for write */
@@ -1281,7 +1282,8 @@ main (int argc, char *argv[]) {
sprintf(filename2,"%s/%s.%s",destdir,filenames->offsets_basename_ptr,snps_root);
- fprintf(stderr,"Writing %lu offsets with %u total positions\n",oligospace+1,offsets[oligospace]);
+ fprintf(stderr,"Writing %llu offsets with %llu total positions\n",
+ (unsigned long long) oligospace+1,(unsigned long long) offsets[oligospace]);
if (compression_type == BITPACK64_COMPRESSION) {
Bitpack64_write_differential(/*ptrsfile*/filename1,/*compfile*/filename2,offsets,oligospace);
} else {
diff --git a/src/splice.c b/src/splice.c
index 20d2567..19ed48d 100644
--- a/src/splice.c
+++ b/src/splice.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: splice.c 148672 2014-09-23 18:51:53Z twu $";
+static char rcsid[] = "$Id: splice.c 153224 2014-11-13 23:02:28Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -416,7 +416,7 @@ Splice_solve_single_sense (int *found_score, int *nhits, List_T hits, List_T *lo
*nhits += 1;
return List_push(hits,(void *) Stage3end_new_splice(&(*found_score),best_segmenti_nmismatches,best_segmentj_nmismatches,
donor,acceptor,/*distance*/segmentj_left - segmenti_left,
- /*shortdistancep*/true,splicing_penalty,querylength,/*amb_nmatches*/0,
+ /*shortdistancep*/true,splicing_penalty,querylength,/*amb_length*/0,
/*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
/*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
/*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
@@ -434,7 +434,7 @@ Splice_solve_single_sense (int *found_score, int *nhits, List_T hits, List_T *lo
*lowprob = List_push(*lowprob,
(void *) Stage3end_new_splice(&(*found_score),best_segmenti_nmismatches,best_segmentj_nmismatches,
donor,acceptor,/*distance*/segmentj_left - segmenti_left,
- /*shortdistancep*/true,splicing_penalty,querylength,/*amb_nmatches*/0,
+ /*shortdistancep*/true,splicing_penalty,querylength,/*amb_length*/0,
/*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
/*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
/*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
@@ -480,7 +480,7 @@ Splice_solve_single_sense (int *found_score, int *nhits, List_T hits, List_T *lo
*nhits += 1;
return List_push(hits,(void *) Stage3end_new_splice(&(*found_score),best_segmentj_nmismatches,best_segmenti_nmismatches,
donor,acceptor,/*distance*/segmentj_left - segmenti_left,
- /*shortdistancep*/true,splicing_penalty,querylength,/*amb_nmatches*/0,
+ /*shortdistancep*/true,splicing_penalty,querylength,/*amb_length*/0,
/*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
/*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
/*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
@@ -498,7 +498,7 @@ Splice_solve_single_sense (int *found_score, int *nhits, List_T hits, List_T *lo
*lowprob = List_push(*lowprob,
(void *) Stage3end_new_splice(&(*found_score),best_segmentj_nmismatches,best_segmenti_nmismatches,
donor,acceptor,/*distance*/segmentj_left - segmenti_left,
- /*shortdistancep*/true,splicing_penalty,querylength,/*amb_nmatches*/0,
+ /*shortdistancep*/true,splicing_penalty,querylength,/*amb_length*/0,
/*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
/*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
/*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
@@ -858,7 +858,7 @@ Splice_solve_single_antisense (int *found_score, int *nhits, List_T hits, List_T
*nhits += 1;
return List_push(hits,(void *) Stage3end_new_splice(&(*found_score),best_segmenti_nmismatches,best_segmentj_nmismatches,
donor,acceptor,/*distance*/segmentj_left - segmenti_left,
- /*shortdistancep*/true,splicing_penalty,querylength,/*amb_nmatches*/0,
+ /*shortdistancep*/true,splicing_penalty,querylength,/*amb_length*/0,
/*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
/*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
/*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
@@ -876,7 +876,7 @@ Splice_solve_single_antisense (int *found_score, int *nhits, List_T hits, List_T
*lowprob = List_push(*lowprob,
(void *) Stage3end_new_splice(&(*found_score),best_segmenti_nmismatches,best_segmentj_nmismatches,
donor,acceptor,/*distance*/segmentj_left - segmenti_left,
- /*shortdistancep*/true,splicing_penalty,querylength,/*amb_nmatches*/0,
+ /*shortdistancep*/true,splicing_penalty,querylength,/*amb_length*/0,
/*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
/*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
/*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
@@ -922,7 +922,7 @@ Splice_solve_single_antisense (int *found_score, int *nhits, List_T hits, List_T
*nhits += 1;
return List_push(hits,(void *) Stage3end_new_splice(&(*found_score),best_segmentj_nmismatches,best_segmenti_nmismatches,
donor,acceptor,/*distance*/segmentj_left - segmenti_left,
- /*shortdistancep*/true,splicing_penalty,querylength,/*amb_nmatches*/0,
+ /*shortdistancep*/true,splicing_penalty,querylength,/*amb_length*/0,
/*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
/*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
/*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
@@ -940,7 +940,7 @@ Splice_solve_single_antisense (int *found_score, int *nhits, List_T hits, List_T
*lowprob = List_push(*lowprob,
(void *) Stage3end_new_splice(&(*found_score),best_segmentj_nmismatches,best_segmenti_nmismatches,
donor,acceptor,/*distance*/segmentj_left - segmenti_left,
- /*shortdistancep*/true,splicing_penalty,querylength,/*amb_nmatches*/0,
+ /*shortdistancep*/true,splicing_penalty,querylength,/*amb_length*/0,
/*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
/*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
/*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
@@ -1470,7 +1470,7 @@ Splice_solve_double (int *found_score, int *nhits, List_T hits, List_T *lowprob,
if (sufficient1p && sufficient2p && sufficient3p && sufficient4p) {
*nhits += 1;
hits = List_push(hits,(void *) Stage3end_new_shortexon(&(*found_score),donor,acceptor,shortexon,
- /*amb_nmatches_donor*/0,/*amb_nmatches_acceptor*/0,
+ /*amb_length_donor*/0,/*amb_length_acceptor*/0,
/*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
/*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
/*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
@@ -1488,7 +1488,7 @@ Splice_solve_double (int *found_score, int *nhits, List_T hits, List_T *lowprob,
} else if ((sufficient1p || sufficient2p) && (sufficient3p || sufficient4p)) {
*lowprob = List_push(*lowprob,
(void *) Stage3end_new_shortexon(&(*found_score),donor,acceptor,shortexon,
- /*amb_nmatches_donor*/0,/*amb_nmatches_acceptor*/0,
+ /*amb_length_donor*/0,/*amb_length_acceptor*/0,
/*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
/*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
/*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
@@ -1543,7 +1543,7 @@ Splice_solve_double (int *found_score, int *nhits, List_T hits, List_T *lowprob,
if (sufficient1p && sufficient2p && sufficient3p && sufficient4p) {
*nhits += 1;
hits = List_push(hits,(void *) Stage3end_new_shortexon(&(*found_score),donor,acceptor,shortexon,
- /*amb_nmatches_donor*/0,/*amb_nmatches_acceptor*/0,
+ /*amb_length_donor*/0,/*amb_length_acceptor*/0,
/*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
/*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
/*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
@@ -1561,7 +1561,7 @@ Splice_solve_double (int *found_score, int *nhits, List_T hits, List_T *lowprob,
} else if ((sufficient1p || sufficient2p) && (sufficient3p || sufficient4p)) {
*lowprob = List_push(*lowprob,
(void *) Stage3end_new_shortexon(&(*found_score),donor,acceptor,shortexon,
- /*amb_nmatches_donor*/0,/*amb_nmatches_acceptor*/0,
+ /*amb_length_donor*/0,/*amb_length_acceptor*/0,
/*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
/*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
/*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
@@ -1694,7 +1694,8 @@ group_by_segmenti_aux (int *found_score, List_T winners, List_T *ambiguous,
(void *) Stage3end_new_splice(&(*found_score),
/*nmismatches_donor*/Substring_nmismatches_whole(donor),nmismatches_acceptor,
donor,/*acceptor*/NULL,/*distance*/0U,
- /*shortdistancep*/false,/*penalty*/0,querylength,/*amb_nmatches*/Substring_nmatches_posttrim(acceptor),
+ /*shortdistancep*/false,/*penalty*/0,querylength,
+ /*amb_length*/Substring_match_length_orig(acceptor),
/*ambcoords_donor*/NULL,ambcoords,
/*amb_knowni_donor*/NULL,amb_knowni,
/*amb_nmismatches_donor*/NULL,amb_nmismatches,
@@ -1726,7 +1727,8 @@ group_by_segmenti_aux (int *found_score, List_T winners, List_T *ambiguous,
(void *) Stage3end_new_splice(&(*found_score),
nmismatches_donor,/*nmismatches_acceptor*/Substring_nmismatches_whole(acceptor),
/*donor*/NULL,acceptor,/*distance*/0U,
- /*shortdistancep*/false,/*penalty*/0,querylength,/*amb_nmatches*/Substring_nmatches_posttrim(donor),
+ /*shortdistancep*/false,/*penalty*/0,querylength,
+ /*amb_length*/Substring_match_length_orig(donor),
ambcoords,/*ambcoords_acceptor*/NULL,
amb_knowni,/*amb_knowni_acceptor*/NULL,
amb_nmismatches,/*amb_nmismatches_acceptor*/NULL,
@@ -1937,7 +1939,8 @@ group_by_segmentj_aux (int *found_score, List_T winners, List_T *ambiguous,
(void *) Stage3end_new_splice(&(*found_score),
/*nmismatches_donor*/Substring_nmismatches_whole(donor),nmismatches_acceptor,
donor,/*acceptor*/NULL,/*distance*/0U,
- /*shortdistancep*/false,/*penalty*/0,querylength,/*amb_nmatches*/Substring_nmatches_posttrim(acceptor),
+ /*shortdistancep*/false,/*penalty*/0,querylength,
+ /*amb_length*/Substring_match_length_orig(acceptor),
/*ambcoords_donor*/NULL,ambcoords,
/*amb_knowni_donor*/NULL,amb_knowni,
/*amb_nmismatches_donor*/NULL,amb_nmismatches,
@@ -1969,7 +1972,8 @@ group_by_segmentj_aux (int *found_score, List_T winners, List_T *ambiguous,
(void *) Stage3end_new_splice(&(*found_score),
nmismatches_donor,/*nmismatches_acceptor*/Substring_nmismatches_whole(acceptor),
/*donor*/NULL,acceptor,/*distance*/0U,
- /*shortdistancep*/false,/*penalty*/0,querylength,/*amb_nmatches*/Substring_nmatches_posttrim(donor),
+ /*shortdistancep*/false,/*penalty*/0,querylength,
+ /*amb_length*/Substring_match_length_orig(donor),
ambcoords,/*ambcoords_acceptor*/NULL,
amb_knowni,/*amb_knowni_acceptor*/NULL,
amb_nmismatches,/*amb_nmismatches_acceptor*/NULL,
diff --git a/src/stage1hr.c b/src/stage1hr.c
index f4e4209..7f65f45 100644
--- a/src/stage1hr.c
+++ b/src/stage1hr.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: stage1hr.c 151054 2014-10-16 19:58:21Z twu $";
+static char rcsid[] = "$Id: stage1hr.c 154087 2014-11-25 21:02:22Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -1539,7 +1539,8 @@ Batch_init (Batch_T batch, int querypos, int diagterm,
#ifdef NO_EXTENSIONS_BEFORE_ZERO
/* This prevents us from finding insertions at the beginning of the genome */
while (batch->npositions > 0 && batch->diagonal < (unsigned int) querylength) {
- debug11(printf("Eliminating diagonal %lu as straddling beginning of genome (Batch_init)\n",batch->diagonal));
+ debug11(printf("Eliminating diagonal %llu as straddling beginning of genome (Batch_init)\n",
+ (unsigned long long) batch->diagonal));
batch->npositions--;
if (batch->npositions > 0) {
#ifdef LARGE_GENOMES
@@ -1573,7 +1574,8 @@ Batch_init_simple (Batch_T batch, Univcoord_T *diagonals, int ndiagonals, int qu
batch->npositions = ndiagonals;
while (batch->npositions > 0 && batch->diagonal < (unsigned int) querylength) {
- debug11(printf("Eliminating diagonal %lu as straddling beginning of genome (Batch_init)\n",batch->diagonal));
+ debug11(printf("Eliminating diagonal %llu as straddling beginning of genome (Batch_init)\n",
+ (unsigned long long) batch->diagonal));
batch->npositions--;
if (batch->npositions > 0) {
/* positions are really diagonals, already in correct endianness */
@@ -1744,13 +1746,14 @@ static int
binary_search_bigendian (int lowi, int highi, Univcoord_T *positions, Univcoord_T goal) {
int middlei;
- debug10(printf("entered binary search with lowi=%d, highi=%d, goal=%lu\n",lowi,highi,goal));
+ debug10(printf("entered binary search with lowi=%d, highi=%d, goal=%llu\n",lowi,highi,(unsigned long long) goal));
while (lowi < highi) {
middlei = lowi + ((highi - lowi) / 2);
- debug10(printf(" binary: %d:%lu %d:%lu %d:%lu vs. %lu\n",
- lowi,Bigendian_convert_univcoord(positions[lowi]),middlei,Bigendian_convert_univcoord(positions[middlei]),
- highi,Bigendian_convert_univcoord(positions[highi]),goal));
+ debug10(printf(" binary: %d:%llu %d:%llu %d:%llu vs. %llu\n",
+ lowi,(unsigned long long) Bigendian_convert_univcoord(positions[lowi]),
+ middlei,(unsigned long long) Bigendian_convert_univcoord(positions[middlei]),
+ highi,(unsigned long long) Bigendian_convert_univcoord(positions[highi]),goal));
if (goal < Bigendian_convert_univcoord(positions[middlei])) {
highi = middlei;
} else if (goal > Bigendian_convert_univcoord(positions[middlei])) {
@@ -1773,15 +1776,17 @@ binary_search_large (int lowi, int highi, unsigned char *positions_high, UINT4 *
int middlei;
Univcoord_T position;
- debug10(printf("entered binary search with lowi=%d, highi=%d, goal=%lu\n",lowi,highi,goal));
+ debug10(printf("entered binary search with lowi=%d, highi=%d, goal=%llu\n",
+ lowi,highi,(unsigned long long) goal));
while (lowi < highi) {
middlei = lowi + ((highi - lowi) / 2);
position = ((Univcoord_T) positions_high[middlei] << 32) + positions_low[middlei];
- debug10(printf(" binary: %d:%lu %d:%lu %d:%lu vs. %lu\n",
- lowi,(positions_high[lowi] << 32) + positions_low[lowi],
- middlei,position,
- highi,(positions_high[highi] << 32) + positions_low[highi],goal));
+ debug10(printf(" binary: %d:%llu %d:%llu %d:%llu vs. %llu\n",
+ lowi,(unsigned long long) ((positions_high[lowi] << 32) + positions_low[lowi]),
+ middlei,(unsigned long long) position,
+ highi,(unsigned long long) ((positions_high[highi] << 32) + positions_low[highi]),
+ (unsigned long long) goal));
if (goal < position) {
highi = middlei;
} else if (goal > position) {
@@ -1802,13 +1807,16 @@ static int
binary_search (int lowi, int highi, Univcoord_T *positions, Univcoord_T goal) {
int middlei;
- debug10(printf("entered binary search with lowi=%d, highi=%d, goal=%lu\n",lowi,highi,goal));
+ debug10(printf("entered binary search with lowi=%d, highi=%d, goal=%llu\n",
+ lowi,highi,(unsigned long long) goal));
while (lowi < highi) {
middlei = lowi + ((highi - lowi) / 2);
- debug10(printf(" binary: %d:%lu %d:%lu %d:%lu vs. %lu\n",
- lowi,positions[lowi],middlei,positions[middlei],
- highi,positions[highi],goal));
+ debug10(printf(" binary: %d:%llu %d:%llu %d:%llu vs. %llu\n",
+ lowi,(unsigned long long) positions[lowi],
+ middlei,(unsigned long long) positions[middlei],
+ highi,(unsigned long long) positions[highi],
+ (unsigned long long) goal));
if (goal < positions[middlei]) {
highi = middlei;
} else if (goal > positions[middlei]) {
@@ -1828,7 +1836,8 @@ static int
binary_search_segments (int lowi, int highi, struct Segment_T *segments, Univcoord_T goal) {
int middlei, middlei_up, middlei_down;
- debug10(printf("entered binary search with lowi=%d, highi=%d, goal=%lu\n",lowi,highi,goal));
+ debug10(printf("entered binary search with lowi=%d, highi=%d, goal=%llu\n",
+ lowi,highi,(unsigned long long) goal));
while (lowi < highi) {
middlei = lowi + ((highi - lowi) / 2);
@@ -1838,9 +1847,11 @@ binary_search_segments (int lowi, int highi, struct Segment_T *segments, Univcoo
} else {
middlei_up = middlei_down = middlei;
}
- debug10(printf(" binary: %d:%lu %d:%lu %d:%lu vs. %lu\n",
- lowi,segments[lowi].diagonal,middlei,segments[middlei].diagonal,
- highi,segments[highi].diagonal,goal));
+ debug10(printf(" binary: %d:%llu %d:%llu %d:%llu vs. %llu\n",
+ lowi,(unsigned long long) segments[lowi].diagonal,
+ middlei,(unsigned long long) segments[middlei].diagonal,
+ highi,(unsigned long long) segments[highi].diagonal,
+ (unsigned long long) goal));
if (goal < segments[middlei_down].diagonal) {
highi = middlei_down;
} else if (goal > segments[middlei_up].diagonal) {
@@ -1873,8 +1884,8 @@ identify_multimiss_iter (int *found_score, Chrnum_T *chrnum, Univcoord_T *chroff
int nmismatches, j;
- debug7(printf("identify_multimiss_iter on diagonal %lu with spanningset of length %d and %d misses seen initially\n",
- goal,List_length(prev),nmisses_seen));
+ debug7(printf("identify_multimiss_iter on diagonal %llu with spanningset of length %d and %d misses seen initially\n",
+ (unsigned long long) goal,List_length(prev),nmisses_seen));
if (nmisses_seen > nmisses_allowed) {
debug7(printf("Result: skipping because %d misses seen > %d allowed\n",nmisses_seen,nmisses_allowed));
@@ -1883,8 +1894,8 @@ identify_multimiss_iter (int *found_score, Chrnum_T *chrnum, Univcoord_T *chroff
for (spanningset = prev->rest; spanningset /* != NULL */; prev = spanningset, spanningset = spanningset->rest) {
elt = (Spanningelt_T) spanningset->first;
- debug7(printf("nmisses seen %d, allowed %d, remaining %d, goal %lu: ",
- nmisses_seen,nmisses_allowed,List_length(prev->rest),goal));
+ debug7(printf("nmisses seen %d, allowed %d, remaining %d, goal %llu: ",
+ nmisses_seen,nmisses_allowed,List_length(prev->rest),(unsigned long long) goal));
if (elt->intersection_diagonals != NULL) {
/* Intersection diagonals already computed */
@@ -2095,7 +2106,7 @@ identify_multimiss_iter (int *found_score, Chrnum_T *chrnum, Univcoord_T *chroff
}
} else if (position > local_goal) {
/* Advanced past goal. Continue with one more miss seen. */
- debug7(printf(" compoundpos failed %lu > %lu --",position,local_goal));
+ debug7(printf(" compoundpos failed %llu > %llu --",(unsigned long long) position,(unsigned long long) local_goal));
if (++nmisses_seen > nmisses_allowed) {
debug7(printf(" nmisses seen %d > allowed %d, so returning\n",nmisses_seen,nmisses_allowed));
return hits;
@@ -2107,7 +2118,7 @@ identify_multimiss_iter (int *found_score, Chrnum_T *chrnum, Univcoord_T *chroff
}
} else {
/* Found goal. Advance past goal and continue with loop. */
- debug7(printf(" found %lu, advancing...",local_goal));
+ debug7(printf(" found %llu, advancing...",(unsigned long long) local_goal));
/* continue; -- Naturally falls to end of loop */
}
@@ -2189,7 +2200,9 @@ identify_multimiss_iter (int *found_score, Chrnum_T *chrnum, Univcoord_T *chroff
#ifdef LARGE_GENOMES
} else if ((((Univcoord_T) *elt->positions_high) << 32) + (*elt->positions_low) > local_goal) {
/* Already advanced past goal, so continue with one more miss seen. */
- debug7(printf(" one miss %lu > %lu --",(((Univcoord_T) *elt->positions_high) << 32) + (*elt->positions_low),local_goal));
+ debug7(printf(" one miss %llu > %llu --",
+ (unsigned long long) ((((Univcoord_T) *elt->positions_high) << 32) + (*elt->positions_low)),
+ (unsigned long long) local_goal));
if (++nmisses_seen > nmisses_allowed) {
debug7(printf(" nmisses seen %d > allowed %d, so returning\n",nmisses_seen,nmisses_allowed));
return hits;
@@ -2202,7 +2215,8 @@ identify_multimiss_iter (int *found_score, Chrnum_T *chrnum, Univcoord_T *chroff
#elif defined(WORDS_BIGENDIAN)
} else if (Bigendian_convert_univcoord(*elt->positions) > local_goal) {
/* Already advanced past goal, so continue with one more miss seen. */
- debug7(printf(" one miss %lu > %lu --",Bigendian_convert_univcoord(*elt->positions),local_goal));
+ debug7(printf(" one miss %llu > %llu --",
+ (unsigned long long) Bigendian_convert_univcoord(*elt->positions),(unsigned long long) local_goal));
if (++nmisses_seen > nmisses_allowed) {
debug7(printf(" nmisses seen %d > allowed %d, so returning\n",nmisses_seen,nmisses_allowed));
return hits;
@@ -2215,7 +2229,8 @@ identify_multimiss_iter (int *found_score, Chrnum_T *chrnum, Univcoord_T *chroff
#else
} else if (*elt->positions > local_goal) {
/* Already advanced past goal, so continue with one more miss seen. */
- debug7(printf(" one miss %lu > %lu --",*elt->positions,local_goal));
+ debug7(printf(" one miss %llu > %llu --",
+ (unsigned long long) *elt->positions,(unsigned long long) local_goal));
if (++nmisses_seen > nmisses_allowed) {
debug7(printf(" nmisses seen %d > allowed %d, so returning\n",nmisses_seen,nmisses_allowed));
return hits;
@@ -2252,8 +2267,8 @@ identify_multimiss_iter (int *found_score, Chrnum_T *chrnum, Univcoord_T *chroff
Univ_IIT_interval_bounds(&(*chroffset),&(*chrhigh),&(*chrlength),chromosome_iit,*chrnum,circular_typeint);
/* *chrhigh += 1; */
}
- debug(printf("Reporting perfect segment at left %lu and diagonal %lu, with chroffset %lu and chrhigh %lu\n",
- left,goal,*chroffset,*chrhigh));
+ debug(printf("Reporting perfect segment at left %llu and diagonal %llu, with chroffset %llu and chrhigh %llu\n",
+ (unsigned long long) left,(unsigned long long) goal,(unsigned long long) *chroffset,(unsigned long long) *chrhigh));
if (goal > *chrhigh) {
/* Query goes over end of chromosome */
debug(printf(" Ignore: goes over end of chromosome\n"));
@@ -2678,13 +2693,14 @@ find_spanning_exact_matches (int *found_score, int *nhits, List_T hits, T this,
while (--npositions0 >= 0 && nempty == 0 && *nhits <= maxpaths_search) {
#ifdef LARGE_GENOMES
- debug7(printf("diag0 %d:%lu+%d advancing\n",npositions0,(((Univcoord_T) *positions0_high++) << 32) + (*positions0_low++),diagterm0));
+ debug7(printf("diag0 %d:%llu+%d advancing\n",
+ npositions0,(unsigned long long) ((((Univcoord_T) *positions0_high++) << 32) + (*positions0_low++)),diagterm0));
diagonal0 = (((Univcoord_T) *positions0_high++) << 32) + (*positions0_low++) + diagterm0;
#elif defined(WORDS_BIGENDIAN)
- debug7(printf("diag0 %d:%lu+%d advancing\n",npositions0,Bigendian_convert_univcoord(*positions0),diagterm0));
+ debug7(printf("diag0 %d:%u+%d advancing\n",npositions0,Bigendian_convert_univcoord(*positions0),diagterm0));
diagonal0 = Bigendian_convert_univcoord(*positions0++) + diagterm0;
#else
- debug7(printf("diag0 %d:%lu+%d advancing\n",npositions0,(*positions0),diagterm0));
+ debug7(printf("diag0 %d:%u+%d advancing\n",npositions0,(*positions0),diagterm0));
diagonal0 = (*positions0++) + diagterm0;
#endif
hits = identify_multimiss_iter(&(*found_score),&chrnum,&chroffset,&chrhigh,&chrlength,&(*nhits),hits,diagonal0,
@@ -2720,7 +2736,7 @@ find_spanning_exact_matches (int *found_score, int *nhits, List_T hits, T this,
global_miss_querypos3 = 0;
while (--ndiagonals0 >= 0 && nempty == 0 && *nhits <= maxpaths_search) {
- debug7(printf("diag0 %d:%lu advancing\n",ndiagonals0,(*diagonals0)));
+ debug7(printf("diag0 %d:%llu advancing\n",ndiagonals0,(unsigned long long) (*diagonals0)));
hits = identify_multimiss_iter(&(*found_score),&chrnum,&chroffset,&chrhigh,&chrlength,&(*nhits),hits,*diagonals0++,
prev,&nempty,&global_miss_querypos5,&global_miss_querypos3,
querylength,/*query_compress*/query_compress_fwd,
@@ -2794,13 +2810,14 @@ find_spanning_exact_matches (int *found_score, int *nhits, List_T hits, T this,
while (--npositions0 >= 0 && nempty == 0 && *nhits <= maxpaths_search) {
#ifdef LARGE_GENOMES
- debug7(printf("diag0 %d:%lu+%d advancing\n",npositions0,(((Univcoord_T) *positions0_high++) << 32) + (*positions0_low++),diagterm0));
+ debug7(printf("diag0 %d:%llu+%d advancing\n",
+ npositions0,(unsigned long long) ((((Univcoord_T) *positions0_high++) << 32) + (*positions0_low++)),diagterm0));
diagonal0 = (((Univcoord_T) *positions0_high++) << 32) + (*positions0_low++) + diagterm0;
#elif defined(WORDS_BIGENDIAN)
- debug7(printf("diag0 %d:%lu+%d advancing\n",npositions0,Bigendian_convert_univcoord(*positions0),diagterm0));
+ debug7(printf("diag0 %d:%u+%d advancing\n",npositions0,Bigendian_convert_univcoord(*positions0),diagterm0));
diagonal0 = Bigendian_convert_univcoord(*positions0++) + diagterm0;
#else
- debug7(printf("diag0 %d:%lu+%d advancing\n",npositions0,(*positions0),diagterm0));
+ debug7(printf("diag0 %d:%u+%d advancing\n",npositions0,(*positions0),diagterm0));
diagonal0 = (*positions0++) + diagterm0;
#endif
hits = identify_multimiss_iter(&(*found_score),&chrnum,&chroffset,&chrhigh,&chrlength,&(*nhits),hits,diagonal0,
@@ -2836,7 +2853,7 @@ find_spanning_exact_matches (int *found_score, int *nhits, List_T hits, T this,
global_miss_querypos3 = 0;
while (--ndiagonals0 >= 0 && nempty == 0 && *nhits <= maxpaths_search) {
- debug7(printf("diag0 %d:%lu advancing\n",ndiagonals0,(*diagonals0)));
+ debug7(printf("diag0 %d:%llu advancing\n",ndiagonals0,(unsigned long long) (*diagonals0)));
hits = identify_multimiss_iter(&(*found_score),&chrnum,&chroffset,&chrhigh,&chrlength,&(*nhits),hits,*diagonals0++,
prev,&nempty,&global_miss_querypos5,&global_miss_querypos3,
querylength,/*query_compress*/query_compress_rev,
@@ -2905,7 +2922,7 @@ find_spanning_onemiss_matches (int *found_score, int *nhits, List_T hits, T this
while (ndiagonals0 > 0 && ndiagonals1 > 0 && nempty <= 1 && *nhits <= maxpaths_search) {
if ((diagonal0 = (*diagonals0)) < (diagonal1 = (*diagonals1))) {
- debug7(printf("diag0 %d:%lu advancing\n",ndiagonals0,diagonal0));
+ debug7(printf("diag0 %d:%llu advancing\n",ndiagonals0,(unsigned long long) diagonal0));
hits = identify_multimiss_iter(&(*found_score),&chrnum,&chroffset,&chrhigh,&chrlength,&(*nhits),hits,diagonal0,
prev,&nempty,&global_miss_querypos5,&global_miss_querypos3,
querylength,/*query_compress*/query_compress_fwd,
@@ -2915,7 +2932,7 @@ find_spanning_onemiss_matches (int *found_score, int *nhits, List_T hits, T this
--ndiagonals0;
} else if (diagonal1 < diagonal0) {
- debug7(printf("diag1 %d:%lu advancing\n",ndiagonals1,diagonal1));
+ debug7(printf("diag1 %d:%llu advancing\n",ndiagonals1,(unsigned long long) diagonal1));
hits = identify_multimiss_iter(&(*found_score),&chrnum,&chroffset,&chrhigh,&chrlength,&(*nhits),hits,diagonal1,
prev,&nempty,&global_miss_querypos5,&global_miss_querypos3,
querylength,/*query_compress*/query_compress_fwd,
@@ -2925,7 +2942,8 @@ find_spanning_onemiss_matches (int *found_score, int *nhits, List_T hits, T this
--ndiagonals1;
} else {
- debug7(printf("diag0&1 %d:%lu == %d:%lu advancing\n",ndiagonals0,diagonal0,ndiagonals1,diagonal1));
+ debug7(printf("diag0&1 %d:%llu == %d:%llu advancing\n",
+ ndiagonals0,(unsigned long long) diagonal0,ndiagonals1,(unsigned long long) diagonal1));
hits = identify_multimiss_iter(&(*found_score),&chrnum,&chroffset,&chrhigh,&chrlength,&(*nhits),hits,diagonal0,
prev,&nempty,&global_miss_querypos5,&global_miss_querypos3,
querylength,/*query_compress*/query_compress_fwd,
@@ -2939,7 +2957,7 @@ find_spanning_onemiss_matches (int *found_score, int *nhits, List_T hits, T this
}
while (--ndiagonals0 >= 0 && nempty == 0 && *nhits <= maxpaths_search) {
- debug7(printf("diag0 %d:%lu advancing\n",ndiagonals0+1,(*diagonals0)));
+ debug7(printf("diag0 %d:%llu advancing\n",ndiagonals0+1,(unsigned long long) (*diagonals0)));
hits = identify_multimiss_iter(&(*found_score),&chrnum,&chroffset,&chrhigh,&chrlength,&(*nhits),hits,*diagonals0++,
prev,&nempty,&global_miss_querypos5,&global_miss_querypos3,
querylength,/*query_compress*/query_compress_fwd,
@@ -2948,7 +2966,7 @@ find_spanning_onemiss_matches (int *found_score, int *nhits, List_T hits, T this
}
while (--ndiagonals1 >= 0 && nempty == 0 && *nhits <= maxpaths_search) {
- debug7(printf("diag1 %d:%lu advancing\n",ndiagonals1+1,(*diagonals1)));
+ debug7(printf("diag1 %d:%llu advancing\n",ndiagonals1+1,(unsigned long long) (*diagonals1)));
hits = identify_multimiss_iter(&(*found_score),&chrnum,&chroffset,&chrhigh,&chrlength,&(*nhits),hits,*diagonals1++,
prev,&nempty,&global_miss_querypos5,&global_miss_querypos3,
querylength,/*query_compress*/query_compress_fwd,
@@ -2995,7 +3013,7 @@ find_spanning_onemiss_matches (int *found_score, int *nhits, List_T hits, T this
while (ndiagonals0 > 0 && ndiagonals1 > 0 && nempty <= 1 && *nhits <= maxpaths_search) {
if ((diagonal0 = (*diagonals0)) < (diagonal1 = (*diagonals1))) {
- debug7(printf("diag0 %d:%lu advancing\n",ndiagonals0,(*diagonals0)));
+ debug7(printf("diag0 %d:%llu advancing\n",ndiagonals0,(unsigned long long) (*diagonals0)));
hits = identify_multimiss_iter(&(*found_score),&chrnum,&chroffset,&chrhigh,&chrlength,&(*nhits),hits,diagonal0,
prev,&nempty,&global_miss_querypos5,&global_miss_querypos3,
querylength,/*query_compress*/query_compress_rev,
@@ -3005,7 +3023,7 @@ find_spanning_onemiss_matches (int *found_score, int *nhits, List_T hits, T this
--ndiagonals0;
} else if (diagonal1 < diagonal0) {
- debug7(printf("diag1 %d:%lu advancing\n",ndiagonals1,(*diagonals1)));
+ debug7(printf("diag1 %d:%llu advancing\n",ndiagonals1,(unsigned long long) (*diagonals1)));
hits = identify_multimiss_iter(&(*found_score),&chrnum,&chroffset,&chrhigh,&chrlength,&(*nhits),hits,diagonal1,
prev,&nempty,&global_miss_querypos5,&global_miss_querypos3,
querylength,/*query_compress*/query_compress_rev,
@@ -3015,7 +3033,8 @@ find_spanning_onemiss_matches (int *found_score, int *nhits, List_T hits, T this
--ndiagonals1;
} else {
- debug7(printf("diag0&1 %d:%lu == %d:%lu advancing\n",ndiagonals0,diagonal0,ndiagonals1,diagonal1));
+ debug7(printf("diag0&1 %d:%llu == %d:%llu advancing\n",
+ ndiagonals0,(unsigned long long) diagonal0,ndiagonals1,(unsigned long long) diagonal1));
hits = identify_multimiss_iter(&(*found_score),&chrnum,&chroffset,&chrhigh,&chrlength,&(*nhits),hits,diagonal0,
prev,&nempty,&global_miss_querypos5,&global_miss_querypos3,
querylength,/*query_compress*/query_compress_rev,
@@ -3029,7 +3048,7 @@ find_spanning_onemiss_matches (int *found_score, int *nhits, List_T hits, T this
}
while (--ndiagonals0 >= 0 && nempty == 0 && *nhits <= maxpaths_search) {
- debug7(printf("diag0 %d:%lu advancing\n",ndiagonals0+1,(*diagonals0)));
+ debug7(printf("diag0 %d:%llu advancing\n",ndiagonals0+1,(unsigned long long) (*diagonals0)));
hits = identify_multimiss_iter(&(*found_score),&chrnum,&chroffset,&chrhigh,&chrlength,&(*nhits),hits,*diagonals0++,
prev,&nempty,&global_miss_querypos5,&global_miss_querypos3,
querylength,/*query_compress*/query_compress_rev,
@@ -3038,7 +3057,7 @@ find_spanning_onemiss_matches (int *found_score, int *nhits, List_T hits, T this
}
while (--ndiagonals1 >= 0 && nempty == 0 && *nhits <= maxpaths_search) {
- debug7(printf("diag1 %d:%lu advancing\n",ndiagonals1+1,(*diagonals1)));
+ debug7(printf("diag1 %d:%llu advancing\n",ndiagonals1+1,(unsigned long long) (*diagonals1)));
hits = identify_multimiss_iter(&(*found_score),&chrnum,&chroffset,&chrhigh,&chrlength,&(*nhits),hits,*diagonals1++,
prev,&nempty,&global_miss_querypos5,&global_miss_querypos3,
querylength,/*query_compress*/query_compress_rev,
@@ -3139,7 +3158,7 @@ find_spanning_multimiss_matches (int *found_score, int *nhits, List_T hits, T th
batch = heap[1];
diagonal = batch->diagonal;
count = 1;
- debug7(printf("at #%d, initial diagonal is %lu\n",batch->querypos,diagonal));
+ debug7(printf("at #%d, initial diagonal is %llu\n",batch->querypos,(unsigned long long) diagonal));
/* Update batch */
if (--batch->npositions <= 0) {
@@ -3153,17 +3172,19 @@ find_spanning_multimiss_matches (int *found_score, int *nhits, List_T hits, T th
}
/* Heapify down */
- debug6(printf("Starting heapify with %lu\n",diagonal));
+ debug6(printf("Starting heapify with %llu\n",(unsigned long long) diagonal));
parenti = 1;
smallesti = (heap[3]->diagonal < heap[2]->diagonal) ? 3 : 2;
- debug6(printf("Comparing left %d/right %d: %lu and %lu\n",2,3,heap[2]->diagonal,heap[3]->diagonal));
+ debug6(printf("Comparing left %d/right %d: %llu and %llu\n",
+ 2,3,(unsigned long long) heap[2]->diagonal,(unsigned long long)heap[3]->diagonal));
while (batch->diagonal > heap[smallesti]->diagonal) {
heap[parenti] = heap[smallesti];
parenti = smallesti;
smallesti = LEFT(parenti);
righti = smallesti+1;
- debug6(printf("Comparing left %d/right %d: %lu and %lu\n",
- smallesti,righti,heap[smallesti]->diagonal,heap[righti]->diagonal));
+ debug6(printf("Comparing left %d/right %d: %llu and %llu\n",
+ smallesti,righti,(unsigned long long) heap[smallesti]->diagonal,
+ (unsigned long long) heap[righti]->diagonal));
if (heap[righti]->diagonal < heap[smallesti]->diagonal) {
smallesti = righti;
}
@@ -3178,7 +3199,8 @@ find_spanning_multimiss_matches (int *found_score, int *nhits, List_T hits, T th
if (batch->diagonal == diagonal) {
count++;
- debug7(printf("at #%d, incrementing diagonal %lu to count %d\n",batch->querypos,diagonal,count));
+ debug7(printf("at #%d, incrementing diagonal %llu to count %d\n",
+ batch->querypos,(unsigned long long) diagonal,count));
} else {
/* End of diagonal */
if (count >= nrequired) {
@@ -3191,7 +3213,7 @@ find_spanning_multimiss_matches (int *found_score, int *nhits, List_T hits, T th
}
diagonal = batch->diagonal;
count = 1;
- debug7(printf("at #%d, next diagonal is %lu\n",batch->querypos,diagonal));
+ debug7(printf("at #%d, next diagonal is %llu\n",batch->querypos,(unsigned long long) diagonal));
}
/* Update batch */
@@ -3206,17 +3228,19 @@ find_spanning_multimiss_matches (int *found_score, int *nhits, List_T hits, T th
}
/* Heapify down */
- debug6(printf("Starting heapify with %lu\n",diagonal));
+ debug6(printf("Starting heapify with %llu\n",(unsigned long long) diagonal));
parenti = 1;
smallesti = (heap[3]->diagonal < heap[2]->diagonal) ? 3 : 2;
- debug6(printf("Comparing left %d/right %d: %lu and %lu\n",2,3,heap[2]->diagonal,heap[3]->diagonal));
+ debug6(printf("Comparing left %d/right %d: %llu and %llu\n",
+ 2,3,(unsigned long long) heap[2]->diagonal,(unsigned long long) heap[3]->diagonal));
while (batch->diagonal > heap[smallesti]->diagonal) {
heap[parenti] = heap[smallesti];
parenti = smallesti;
smallesti = LEFT(parenti);
righti = smallesti+1;
- debug6(printf("Comparing left %d/right %d: %lu and %lu\n",
- smallesti,righti,heap[smallesti]->diagonal,heap[righti]->diagonal));
+ debug6(printf("Comparing left %d/right %d: %llu and %llu\n",
+ smallesti,righti,(unsigned long long) heap[smallesti]->diagonal,
+ (unsigned long long) heap[righti]->diagonal));
if (heap[righti]->diagonal < heap[smallesti]->diagonal) {
smallesti = righti;
}
@@ -3298,7 +3322,7 @@ find_spanning_multimiss_matches (int *found_score, int *nhits, List_T hits, T th
batch = heap[1];
diagonal = batch->diagonal;
count = 1;
- debug7(printf("at #%d, initial diagonal is %lu\n",batch->querypos,diagonal));
+ debug7(printf("at #%d, initial diagonal is %llu\n",batch->querypos,(unsigned long long) diagonal));
/* Update batch */
if (--batch->npositions <= 0) {
@@ -3312,17 +3336,19 @@ find_spanning_multimiss_matches (int *found_score, int *nhits, List_T hits, T th
}
/* Heapify down */
- debug6(printf("Starting heapify with %lu\n",diagonal));
+ debug6(printf("Starting heapify with %llu\n",(unsigned long long) diagonal));
parenti = 1;
smallesti = (heap[3]->diagonal < heap[2]->diagonal) ? 3 : 2;
- debug6(printf("Comparing left %d/right %d: %lu and %lu\n",2,3,heap[2]->diagonal,heap[3]->diagonal));
+ debug6(printf("Comparing left %d/right %d: %llu and %llu\n",
+ 2,3,(unsigned long long) heap[2]->diagonal,(unsigned long long) heap[3]->diagonal));
while (batch->diagonal > heap[smallesti]->diagonal) {
heap[parenti] = heap[smallesti];
parenti = smallesti;
smallesti = LEFT(parenti);
righti = smallesti+1;
- debug6(printf("Comparing left %d/right %d: %lu and %lu\n",
- smallesti,righti,heap[smallesti]->diagonal,heap[righti]->diagonal));
+ debug6(printf("Comparing left %d/right %d: %llu and %llu\n",
+ smallesti,righti,(unsigned long long) heap[smallesti]->diagonal,
+ (unsigned long long) heap[righti]->diagonal));
if (heap[righti]->diagonal < heap[smallesti]->diagonal) {
smallesti = righti;
}
@@ -3337,7 +3363,8 @@ find_spanning_multimiss_matches (int *found_score, int *nhits, List_T hits, T th
if (batch->diagonal == diagonal) {
count++;
- debug7(printf("at #%d, incrementing diagonal %lu to count %d\n",batch->querypos,diagonal,count));
+ debug7(printf("at #%d, incrementing diagonal %llu to count %d\n",
+ batch->querypos,(unsigned long long) diagonal,count));
} else {
/* End of diagonal */
if (count >= nrequired) {
@@ -3349,7 +3376,7 @@ find_spanning_multimiss_matches (int *found_score, int *nhits, List_T hits, T th
}
diagonal = batch->diagonal;
count = 1;
- debug7(printf("at #%d, next diagonal is %lu\n",batch->querypos,diagonal));
+ debug7(printf("at #%d, next diagonal is %llu\n",batch->querypos,(unsigned long long) diagonal));
}
/* Update batch */
@@ -3364,17 +3391,19 @@ find_spanning_multimiss_matches (int *found_score, int *nhits, List_T hits, T th
}
/* Heapify down */
- debug6(printf("Starting heapify with %lu\n",diagonal));
+ debug6(printf("Starting heapify with %llu\n",(unsigned long long) diagonal));
parenti = 1;
smallesti = (heap[3]->diagonal < heap[2]->diagonal) ? 3 : 2;
- debug6(printf("Comparing left %d/right %d: %lu and %lu\n",2,3,heap[2]->diagonal,heap[3]->diagonal));
+ debug6(printf("Comparing left %d/right %d: %llu and %llu\n",
+ 2,3,(unsigned long long) heap[2]->diagonal,(unsigned long long) heap[3]->diagonal));
while (batch->diagonal > heap[smallesti]->diagonal) {
heap[parenti] = heap[smallesti];
parenti = smallesti;
smallesti = LEFT(parenti);
righti = smallesti+1;
- debug6(printf("Comparing left %d/right %d: %lu and %lu\n",
- smallesti,righti,heap[smallesti]->diagonal,heap[righti]->diagonal));
+ debug6(printf("Comparing left %d/right %d: %llu and %llu\n",
+ smallesti,righti,(unsigned long long) heap[smallesti]->diagonal,
+ (unsigned long long) heap[righti]->diagonal));
if (heap[righti]->diagonal < heap[smallesti]->diagonal) {
smallesti = righti;
}
@@ -3700,7 +3729,8 @@ identify_all_segments (int *nsegments, Segment_T **spliceable, int *nspliceable,
#endif
- debug1(printf("multiple_mm_%s, diagonal %lu, querypos %d\n",plusp ? "plus" : "minus",diagonal,querypos));
+ debug1(printf("multiple_mm_%s, diagonal %llu, querypos %d\n",
+ plusp ? "plus" : "minus",(unsigned long long) diagonal,querypos));
debug1(printf("first_querypos = %d => initial values: floor %d, floor_xfirst %d, floor_xlast %d, floor_left %d, floor_right %d\n",
first_querypos,floor,floor_xfirst,floor_xlast,floor_left,floor_right));
@@ -3795,10 +3825,11 @@ identify_all_segments (int *nsegments, Segment_T **spliceable, int *nspliceable,
}
#endif
- debug1(printf("diagonal %lu unchanged: last_querypos = %d, querypos = %d => floor increments by %d\n",
- diagonal,last_querypos,querypos,floor_incr));
- debug1(printf("*multiple_mm_%s, diagonal %lu, querypos %d, floor %d, floor_xfirst %d, floor_xlast %d, floor_left %d, floor_right %d\n",
- plusp ? "plus" : "minus",diagonal,querypos,floor,floor_xfirst,floor_xlast,floor_left,floor_right));
+ debug1(printf("diagonal %llu unchanged: last_querypos = %d, querypos = %d => floor increments by %d\n",
+ (unsigned long long) diagonal,last_querypos,querypos,floor_incr));
+ debug1(printf("*multiple_mm_%s, diagonal %llu, querypos %d, floor %d, floor_xfirst %d, floor_xlast %d, floor_left %d, floor_right %d\n",
+ plusp ? "plus" : "minus",(unsigned long long) diagonal,querypos,
+ floor,floor_xfirst,floor_xlast,floor_left,floor_right));
} else {
/* End of diagonal */
floor_incr = floors_to_pos3[last_querypos] /* floors->score[last_querypos][query_lastpos+index1interval] */;
@@ -3818,8 +3849,9 @@ identify_all_segments (int *nsegments, Segment_T **spliceable, int *nspliceable,
floor_right = floor_incr;
#endif
- debug1(printf("new diagonal %lu > last diagonal %lu: last_querypos = %d => final values: floor %d, floor_xfirst %d, floor_xlast %d, floor_left %d, floor_right %d\n",
- diagonal,last_diagonal,last_querypos,floor,floor_xfirst,floor_xlast,floor_left,floor_right));
+ debug1(printf("new diagonal %llu > last diagonal %llu: last_querypos = %d => final values: floor %d, floor_xfirst %d, floor_xlast %d, floor_left %d, floor_right %d\n",
+ (unsigned long long) diagonal,(unsigned long long) last_diagonal,last_querypos,
+ floor,floor_xfirst,floor_xlast,floor_left,floor_right));
if (last_diagonal > chrhigh) {
if (ptr > ptr_chrstart) {
@@ -3866,8 +3898,10 @@ identify_all_segments (int *nsegments, Segment_T **spliceable, int *nspliceable,
}
if (last_diagonal <= chrhigh) { /* FORMULA for high position */
/* position of high end is within current chromosome */
- debug1(printf(" => multiple_mm, diagonal %lu, query %d..%d, chrbounds %lu..%lu, floor %d, floor_xfirst %d, floor_xlast %d, floor_left %d, floor_right %d\n",
- last_diagonal,first_querypos,last_querypos,chroffset,chrhigh,floor,floor_xfirst,floor_xlast,floor_left,floor_right));
+ debug1(printf(" => multiple_mm, diagonal %llu, query %d..%d, chrbounds %llu..%llu, floor %d, floor_xfirst %d, floor_xlast %d, floor_left %d, floor_right %d\n",
+ (unsigned long long) last_diagonal,first_querypos,last_querypos,
+ (unsigned long long) chroffset,(unsigned long long) chrhigh,
+ floor,floor_xfirst,floor_xlast,floor_left,floor_right));
/* Save segment, but first advance splicesites past segment_left */
segment_left = last_diagonal - querylength;
@@ -3982,9 +4016,11 @@ identify_all_segments (int *nsegments, Segment_T **spliceable, int *nspliceable,
#endif
#endif
- debug1(printf("*multiple_mm_%s, diagonal %lu, querypos %d\n",plusp ? "plus" : "minus",diagonal,querypos));
- debug1(printf("start of diagonal %lu, first_querypos = %d => initial values: floor %d, floor_xfirst %d, floor_xlast %d, floor_left %d, floor_right %d\n",
- diagonal,first_querypos,floor,floor_xfirst,floor_xlast,floor_left,floor_right));
+ debug1(printf("*multiple_mm_%s, diagonal %llu, querypos %d\n",
+ plusp ? "plus" : "minus",(unsigned long long) diagonal,querypos));
+ debug1(printf("start of diagonal %llu, first_querypos = %d => initial values: floor %d, floor_xfirst %d, floor_xlast %d, floor_left %d, floor_right %d\n",
+ (unsigned long long) diagonal,first_querypos,
+ floor,floor_xfirst,floor_xlast,floor_left,floor_right));
}
last_querypos = querypos;
@@ -4119,8 +4155,10 @@ identify_all_segments (int *nsegments, Segment_T **spliceable, int *nspliceable,
debug1(printf("last_diagonal %u vs chrhigh %u (looking for <=)\n",last_diagonal,chrhigh));
if (last_diagonal <= chrhigh) { /* FORMULA for high position */
/* position of high end is within current chromosome */
- debug1(printf(" => multiple_mm, diagonal %lu, query %d..%d, chrbounds %lu..%lu, floor %d, floor_xfirst %d, floor_xlast %d, floor_left %d, floor_right %d\n",
- last_diagonal,first_querypos,last_querypos,chroffset,chrhigh,floor,floor_xfirst,floor_xlast,floor_left,floor_right));
+ debug1(printf(" => multiple_mm, diagonal %llu, query %d..%d, chrbounds %llu..%llu, floor %d, floor_xfirst %d, floor_xlast %d, floor_left %d, floor_right %d\n",
+ (unsigned long long) last_diagonal,first_querypos,last_querypos,
+ (unsigned long long) chroffset,(unsigned long long) chrhigh,
+ floor,floor_xfirst,floor_xlast,floor_left,floor_right));
/* Save segment, but first advance splicesites past segment_left */
segment_left = last_diagonal - querylength;
@@ -4195,7 +4233,7 @@ identify_all_segments (int *nsegments, Segment_T **spliceable, int *nspliceable,
#ifdef DEBUG19
for (k = 0, ptr0 = segments; ptr0 < ptr; k++, ptr0++) {
- printf("%d %lu\n",k,ptr0->diagonal);
+ printf("%d %llu\n",k,(unsigned long long) ptr0->diagonal);
}
printf("total_npositions = %d, nchromosomes = %d\n",total_npositions,nchromosomes);
#endif
@@ -4407,7 +4445,8 @@ identify_all_segments_for_terminals (int *nsegments,
#endif
- debug1(printf("multiple_mm_%s, diagonal %lu, querypos %d\n",plusp ? "plus" : "minus",diagonal,querypos));
+ debug1(printf("multiple_mm_%s, diagonal %llu, querypos %d\n",
+ plusp ? "plus" : "minus",(unsigned long long) diagonal,querypos));
debug1(printf("first_querypos = %d => initial values: floor_left %d, floor_right %d\n",
first_querypos,floor_left,floor_right));
@@ -4503,10 +4542,10 @@ identify_all_segments_for_terminals (int *nsegments,
}
#endif
- debug1(printf("diagonal %lu unchanged: last_querypos = %d, querypos = %d => floor increments by %d\n",
- diagonal,last_querypos,querypos,floor_incr));
- debug1(printf("*multiple_mm_%s, diagonal %lu, querypos %d, floor_left %d, floor_right %d\n",
- plusp ? "plus" : "minus",diagonal,querypos,floor_left,floor_right));
+ debug1(printf("diagonal %llu unchanged: last_querypos = %d, querypos = %d => floor increments by %d\n",
+ (unsigned long long) diagonal,last_querypos,querypos,floor_incr));
+ debug1(printf("*multiple_mm_%s, diagonal %llu, querypos %d, floor_left %d, floor_right %d\n",
+ plusp ? "plus" : "minus",(unsigned long long) diagonal,querypos,floor_left,floor_right));
} else {
/* End of diagonal */
floor_incr = floors_to_pos3[last_querypos] /* floors->score[last_querypos][query_lastpos+index1interval] */;
@@ -4528,8 +4567,9 @@ identify_all_segments_for_terminals (int *nsegments,
floor_right = floor_incr;
#endif
- debug1(printf("new diagonal %lu > last diagonal %lu: last_querypos = %d => final values: floor_left %d, floor_right %d, chrhigh %u\n",
- diagonal,last_diagonal,last_querypos,floor_left,floor_right,chrhigh));
+ debug1(printf("new diagonal %llu > last diagonal %llu: last_querypos = %d => final values: floor_left %d, floor_right %d, chrhigh %u\n",
+ (unsigned long long) diagonal,(unsigned long long) last_diagonal,
+ last_querypos,floor_left,floor_right,chrhigh));
if (last_diagonal > chrhigh) {
if (ptr > ptr_chrstart) {
@@ -4576,8 +4616,9 @@ identify_all_segments_for_terminals (int *nsegments,
}
if (last_diagonal <= chrhigh) { /* FORMULA for high position */
/* position of high end is within current chromosome */
- debug1(printf(" => multiple_mm, diagonal %lu, query %d..%d, chrbounds %lu..%lu, floor_left %d, floor_right %d\n",
- last_diagonal,first_querypos,last_querypos,chroffset,chrhigh,floor_left,floor_right));
+ debug1(printf(" => multiple_mm, diagonal %llu, query %d..%d, chrbounds %llu..%llu, floor_left %d, floor_right %d\n",
+ (unsigned long long) last_diagonal,first_querypos,last_querypos,
+ (unsigned long long) chroffset,(unsigned long long) chrhigh,floor_left,floor_right));
if (floor_left <= max_mismatches_allowed || floor_right <= max_mismatches_allowed) {
/* Save segment */
ptr->diagonal = last_diagonal;
@@ -4632,9 +4673,10 @@ identify_all_segments_for_terminals (int *nsegments,
#endif
#endif
- debug1(printf("*multiple_mm_%s, diagonal %lu, querypos %d\n",plusp ? "plus" : "minus",diagonal,querypos));
- debug1(printf("start of diagonal %lu, first_querypos = %d => initial values: floor_left %d, floor_right %d\n",
- diagonal,first_querypos,floor_left,floor_right));
+ debug1(printf("*multiple_mm_%s, diagonal %llu, querypos %d\n",
+ plusp ? "plus" : "minus",(unsigned long long) diagonal,querypos));
+ debug1(printf("start of diagonal %llu, first_querypos = %d => initial values: floor_left %d, floor_right %d\n",
+ (unsigned long long) diagonal,first_querypos,floor_left,floor_right));
}
last_querypos = querypos;
@@ -4767,8 +4809,9 @@ identify_all_segments_for_terminals (int *nsegments,
}
if (last_diagonal <= chrhigh) { /* FORMULA for high position */
/* position of high end is within current chromosome */
- debug1(printf(" => multiple_mm, diagonal %lu, query %d..%d, chrbounds %lu..%lu, floor_left %d, floor_right %d\n",
- last_diagonal,first_querypos,last_querypos,chroffset,chrhigh,floor_left,floor_right));
+ debug1(printf(" => multiple_mm, diagonal %llu, query %d..%d, chrbounds %llu..%llu, floor_left %d, floor_right %d\n",
+ (unsigned long long) last_diagonal,first_querypos,last_querypos,
+ (unsigned long long) chroffset,(unsigned long long) chrhigh,floor_left,floor_right));
if (floor_left <= max_mismatches_allowed || floor_right <= max_mismatches_allowed) {
/* Save segment */
ptr->diagonal = last_diagonal;
@@ -4886,8 +4929,8 @@ find_middle_indels (int *found_score, int *nhits, List_T hits,
debug2(printf("plus_nspliceable = %d\n",plus_nspliceable));
for (ptr = plus_spliceable; ptr < &(plus_spliceable[plus_nspliceable]); ptr++) {
segmenti = *ptr;
- debug2(printf("\nplus segmenti: diagonal %lu, querypos %d..%d\n",
- segmenti->diagonal,segmenti->querypos5,segmenti->querypos3));
+ debug2(printf("\nplus segmenti: diagonal %llu, querypos %d..%d\n",
+ (unsigned long long) segmenti->diagonal,segmenti->querypos5,segmenti->querypos3));
if (1 || segmenti->diagonal < (Univcoord_T) -1) { /* No markers were stored in spliceable */
/* Identify potential segmentj for segmenti */
segmentj_end = segmenti+1;
@@ -4900,9 +4943,9 @@ find_middle_indels (int *found_score, int *nhits, List_T hits,
}
for (segmentj = segmenti+1; segmentj < segmentj_end; segmentj++) {
- debug2(printf("plus insertion? diagonal %lu, querypos %d..%d => diagonal %lu, querypos %d..%d => ",
- segmenti->diagonal,segmenti->querypos5,segmenti->querypos3,
- segmentj->diagonal,segmentj->querypos5,segmentj->querypos3));
+ debug2(printf("plus insertion? diagonal %llu, querypos %d..%d => diagonal %llu, querypos %d..%d => ",
+ (unsigned long long) segmenti->diagonal,segmenti->querypos5,segmenti->querypos3,
+ (unsigned long long) segmentj->diagonal,segmentj->querypos5,segmentj->querypos3));
/* j5 j3 i5 i3 */
if (segmentj->querypos3 < segmenti->querypos5) {
indels = segmentj->diagonal - segmenti->diagonal; /* positive */
@@ -4964,9 +5007,9 @@ find_middle_indels (int *found_score, int *nhits, List_T hits,
}
for (segmentj = segmenti+1; segmentj < segmentj_end; segmentj++) {
- debug2(printf("plus deletion? diagonal %lu, querypos %d..%d => diagonal %lu, querypos %d..%d => ",
- segmenti->diagonal,segmenti->querypos5,segmenti->querypos3,
- segmentj->diagonal,segmentj->querypos5,segmentj->querypos3));
+ debug2(printf("plus deletion? diagonal %llu, querypos %d..%d => diagonal %llu, querypos %d..%d => ",
+ (unsigned long long) segmenti->diagonal,segmenti->querypos5,segmenti->querypos3,
+ (unsigned long long) segmentj->diagonal,segmentj->querypos5,segmentj->querypos3));
/* i5 i3 j5 j3 */
if (segmenti->querypos3 < segmentj->querypos5) {
indels = segmenti->diagonal - segmentj->diagonal; /* negative */
@@ -5026,8 +5069,8 @@ find_middle_indels (int *found_score, int *nhits, List_T hits,
debug2(printf("minus_nspliceable = %d\n",minus_nspliceable));
for (ptr = minus_spliceable; ptr < &(minus_spliceable[minus_nspliceable]); ptr++) {
segmenti = *ptr;
- debug2(printf("\nminus segmenti: diagonal %lu, querypos %d..%d\n",
- segmenti->diagonal,segmenti->querypos5,segmenti->querypos3));
+ debug2(printf("\nminus segmenti: diagonal %llu, querypos %d..%d\n",
+ (unsigned long long) segmenti->diagonal,segmenti->querypos5,segmenti->querypos3));
if (1 || segmenti->diagonal < (Univcoord_T) -1) { /* No markers were stored in spliceable */
/* Identify potential segmentj for segmenti */
segmentj_end = segmenti+1;
@@ -5040,9 +5083,9 @@ find_middle_indels (int *found_score, int *nhits, List_T hits,
}
for (segmentj = segmenti+1; segmentj < segmentj_end; segmentj++) {
- debug2(printf("minus deletion? diagonal %lu, querypos %d..%d => diagonal %lu, querypos %d..%d => ",
- segmenti->diagonal,segmenti->querypos5,segmenti->querypos3,
- segmentj->diagonal,segmentj->querypos5,segmentj->querypos3));
+ debug2(printf("minus deletion? diagonal %llu, querypos %d..%d => diagonal %llu, querypos %d..%d => ",
+ (unsigned long long) segmenti->diagonal,segmenti->querypos5,segmenti->querypos3,
+ (unsigned long long) segmentj->diagonal,segmentj->querypos5,segmentj->querypos3));
/* j5 j3 i5 i3 */
if (segmentj->querypos3 < segmenti->querypos5) {
indels = segmenti->diagonal - segmentj->diagonal; /* negative */
@@ -5105,9 +5148,9 @@ find_middle_indels (int *found_score, int *nhits, List_T hits,
}
for (segmentj = segmenti+1; segmentj < segmentj_end; segmentj++) {
- debug2(printf("minus insertion? diagonal %lu, querypos %d..%d => diagonal %lu, querypos %d..%d => ",
- segmenti->diagonal,segmenti->querypos5,segmenti->querypos3,
- segmentj->diagonal,segmentj->querypos5,segmentj->querypos3));
+ debug2(printf("minus insertion? diagonal %llu, querypos %d..%d => diagonal %llu, querypos %d..%d => ",
+ (unsigned long long) segmenti->diagonal,segmenti->querypos5,segmenti->querypos3,
+ (unsigned long long) segmentj->diagonal,segmentj->querypos5,segmentj->querypos3));
/* i5 i3 j5 j3 */
if (segmenti->querypos3 < segmentj->querypos5) {
indels = segmentj->diagonal - segmenti->diagonal; /* positive */
@@ -5411,7 +5454,8 @@ compute_end_indels_right (int *indels, int *nmismatches_longcont, int *nmismatch
if (max_end_insertions > 0) {
if (left < (unsigned int) max_end_insertions) {
- debug2e(printf("left %lu < max_end_insertions %d, so end = left\n",left,max_end_insertions));
+ debug2e(printf("left %llu < max_end_insertions %d, so end = left\n",
+ (unsigned long long) left,max_end_insertions));
end = left;
} else {
end = max_end_insertions;
@@ -5855,7 +5899,8 @@ compute_end_indels_left (int *indels, int *nmismatches_longcont, int *nmismatche
if (max_end_deletions > 0) {
if (left < (unsigned int) max_end_deletions) {
- debug2e(printf("left %lu < max_end_deletions %d, so start = left\n",left,max_end_deletions));
+ debug2e(printf("left %llu < max_end_deletions %d, so start = left\n",
+ (unsigned long long) left,max_end_deletions));
start = left;
} else {
start = 1;
@@ -6094,11 +6139,11 @@ solve_end_indel_low (int *found_score, int *nhits, List_T hits, Segment_T ptr,
debug2e(
if (plusp == true) {
- printf("\nsolve_end_indel_low: Getting genome at diagonal %lu - querylength %d - max_end_deletions %d = %lu.\n",
- diagonal,querylength,max_end_deletions,left-max_end_deletions);
+ printf("\nsolve_end_indel_low: Getting genome at diagonal %llu - querylength %d - max_end_deletions %d = %llu.\n",
+ (unsigned long long) diagonal,querylength,max_end_deletions,(unsigned long long) (left-max_end_deletions));
} else {
- printf("\nsolve_end_indel_low: Getting genome at diagonal %lu + 12 - querylength %d = %lu, max_end_deletions = %d.\n",
- diagonal,querylength,left,max_end_deletions);
+ printf("\nsolve_end_indel_low: Getting genome at diagonal %llu + 12 - querylength %d = %llu, max_end_deletions = %d.\n",
+ (unsigned long long) diagonal,querylength,(unsigned long long) left,max_end_deletions);
});
debug2e(gbuffer = (char *) CALLOC(querylength+max_end_deletions+1,sizeof(char)));
@@ -6260,11 +6305,11 @@ solve_end_indel_high (int *found_score, int *nhits, List_T hits, Segment_T ptr,
debug2e(
if (plusp == true) {
- printf("\nsolve_end_indel_high: Getting genome at diagonal %lu - querylength %d + max_end_deletions %d = %lu.\n",
- diagonal,querylength,max_end_deletions,left+max_end_deletions);
+ printf("\nsolve_end_indel_high: Getting genome at diagonal %llu - querylength %d + max_end_deletions %d = %llu.\n",
+ (unsigned long long) diagonal,querylength,max_end_deletions,(unsigned long long) (left+max_end_deletions));
} else {
- printf("\nsolve_end_indel_high: Getting genome at diagonal %lu + 12 - querylength %d = %lu, max_end_deletions = %d.\n",
- diagonal,querylength,left,max_end_deletions);
+ printf("\nsolve_end_indel_high: Getting genome at diagonal %llu + 12 - querylength %d = %llu, max_end_deletions = %d.\n",
+ (unsigned long long) diagonal,querylength,(unsigned long long) left,max_end_deletions);
});
debug2e(gbuffer = (char *) CALLOC(querylength+max_end_deletions+1,sizeof(char)));
@@ -6643,11 +6688,11 @@ find_singlesplices_plus (int *found_score, List_T hits, List_T *ambiguous, List_
/* Ends 1 (donor, plus) and 8 (antiacceptor, plus): mark known splice sites in segmenti */
while (j < nsplicesites && splicesites[j] < segmenti->diagonal) {
if (splicetypes[j] == DONOR) {
- debug4s(printf("Setting known donor %d for segmenti at %lu\n",j,splicesites[j]));
+ debug4s(printf("Setting known donor %d for segmenti at %llu\n",j,(unsigned long long) splicesites[j]));
segmenti_donor_knownpos[segmenti_donor_nknown] = splicesites[j] - segmenti_left;
segmenti_donor_knowni[segmenti_donor_nknown++] = j;
} else if (splicetypes[j] == ANTIACCEPTOR) {
- debug4s(printf("Setting known antiacceptor %d for segmenti at %lu\n",j,splicesites[j]));
+ debug4s(printf("Setting known antiacceptor %d for segmenti at %llu\n",j,(unsigned long long) splicesites[j]));
segmenti_antiacceptor_knownpos[segmenti_antiacceptor_nknown] = splicesites[j] - segmenti_left;
segmenti_antiacceptor_knowni[segmenti_antiacceptor_nknown++] = j;
}
@@ -6685,9 +6730,9 @@ find_singlesplices_plus (int *found_score, List_T hits, List_T *ambiguous, List_
segmentj = segmenti+1;
}
for ( ; segmentj < segmentj_end; segmentj++) {
- debug4s(printf("plus local? diagonal %lu, querypos %d..%d => diagonal %lu, querypos %d..%d => ",
- segmenti->diagonal,segmenti->querypos5,segmenti->querypos3,
- segmentj->diagonal,segmentj->querypos5,segmentj->querypos3));
+ debug4s(printf("plus local? diagonal %llu, querypos %d..%d => diagonal %llu, querypos %d..%d => ",
+ (unsigned long long) segmenti->diagonal,segmenti->querypos5,segmenti->querypos3,
+ (unsigned long long) segmentj->diagonal,segmentj->querypos5,segmentj->querypos3));
/* i5 i3 j5 j3 */
assert(segmenti->diagonal < segmentj->diagonal);
if (segmenti->querypos3 >= segmentj->querypos5) {
@@ -6750,11 +6795,11 @@ find_singlesplices_plus (int *found_score, List_T hits, List_T *ambiguous, List_
/* Ends 2 (acceptor, plus) and 7 (antidonor, plus): mark known splice sites in segmentj */
while (j < nsplicesites && splicesites[j] < segmentj->diagonal) {
if (splicetypes[j] == ACCEPTOR) {
- debug4s(printf("Setting known acceptor %d for segmentj at %lu\n",j,splicesites[j]));
+ debug4s(printf("Setting known acceptor %d for segmentj at %llu\n",j,(unsigned long long) splicesites[j]));
segmentj_acceptor_knownpos[segmentj_acceptor_nknown] = splicesites[j] - segmentj_left;
segmentj_acceptor_knowni[segmentj_acceptor_nknown++] = j;
} else if (splicetypes[j] == ANTIDONOR) {
- debug4s(printf("Setting known antidonor %d for segmentj at %lu\n",j,splicesites[j]));
+ debug4s(printf("Setting known antidonor %d for segmentj at %llu\n",j,(unsigned long long) splicesites[j]));
segmentj_antidonor_knownpos[segmentj_antidonor_nknown] = splicesites[j] - segmentj_left;
segmentj_antidonor_knowni[segmentj_antidonor_nknown++] = j;
}
@@ -6882,7 +6927,8 @@ find_singlesplices_plus (int *found_score, List_T hits, List_T *ambiguous, List_
(void *) Stage3end_new_splice(&(*found_score),
/*nmismatches_donor*/Substring_nmismatches_whole(donor),nmismatches_acceptor,
donor,/*acceptor*/NULL,/*distance*/0U,
- /*shortdistancep*/false,/*penalty*/0,querylength,/*amb_nmatches*/Substring_nmatches_posttrim(acceptor),
+ /*shortdistancep*/false,/*penalty*/0,querylength,
+ /*amb_length*/Substring_match_length_orig(acceptor),
/*ambcoords_donor*/NULL,ambcoords,
/*amb_knowni_donor*/NULL,amb_knowni,
/*amb_nmismatches_donor*/NULL,amb_nmismatches,
@@ -6914,7 +6960,8 @@ find_singlesplices_plus (int *found_score, List_T hits, List_T *ambiguous, List_
(void *) Stage3end_new_splice(&(*found_score),
nmismatches_donor,/*nmismatches_acceptor*/Substring_nmismatches_whole(acceptor),
/*donor*/NULL,acceptor,/*distance*/0U,
- /*shortdistancep*/false,/*penalty*/0,querylength,/*amb_nmatches*/Substring_nmatches_posttrim(donor),
+ /*shortdistancep*/false,/*penalty*/0,querylength,
+ /*amb_length*/Substring_match_length_orig(donor),
ambcoords,/*ambcoords_acceptor*/NULL,
amb_knowni,/*amb_knowni_acceptor*/NULL,
amb_nmismatches,/*amb_nmismatches_acceptor*/NULL,
@@ -7019,7 +7066,8 @@ find_singlesplices_plus (int *found_score, List_T hits, List_T *ambiguous, List_
(void *) Stage3end_new_splice(&(*found_score),
/*nmismatches_donor*/Substring_nmismatches_whole(donor),nmismatches_acceptor,
donor,/*acceptor*/NULL,/*distance*/0U,
- /*shortdistancep*/false,/*penalty*/0,querylength,/*amb_nmatches*/Substring_nmatches_posttrim(acceptor),
+ /*shortdistancep*/false,/*penalty*/0,querylength,
+ /*amb_length*/Substring_match_length_orig(acceptor),
/*ambcoords_donor*/NULL,ambcoords,
/*amb_knowni_donor*/NULL,amb_knowni,
/*amb_nmismatches_donor*/NULL,amb_nmismatches,
@@ -7051,7 +7099,8 @@ find_singlesplices_plus (int *found_score, List_T hits, List_T *ambiguous, List_
(void *) Stage3end_new_splice(&(*found_score),
nmismatches_donor,/*nmismatches_acceptor*/Substring_nmismatches_whole(acceptor),
/*donor*/NULL,acceptor,/*distance*/0U,
- /*shortdistancep*/false,/*penalty*/0,querylength,/*amb_nmatches*/Substring_nmatches_posttrim(donor),
+ /*shortdistancep*/false,/*penalty*/0,querylength,
+ /*amb_length*/Substring_match_length_orig(donor),
ambcoords,/*ambcoords_acceptor*/NULL,
amb_knowni,/*amb_knowni_acceptor*/NULL,
amb_nmismatches,/*amb_nmismatches_acceptor*/NULL,
@@ -7167,11 +7216,11 @@ find_singlesplices_minus (int *found_score, List_T hits, List_T *ambiguous, List
/* Ends 4 and 5: mark known splice sites in segmenti */
while (j < nsplicesites && splicesites[j] < segmenti->diagonal) {
if (splicetypes[j] == ANTIACCEPTOR) {
- debug4s(printf("Setting known antiacceptor %d for segmenti at %lu\n",j,splicesites[j]));
+ debug4s(printf("Setting known antiacceptor %d for segmenti at %llu\n",j,(unsigned long long) splicesites[j]));
segmenti_antiacceptor_knownpos[segmenti_antiacceptor_nknown] = splicesites[j] - segmenti_left;
segmenti_antiacceptor_knowni[segmenti_antiacceptor_nknown++] = j;
} else if (splicetypes[j] == DONOR) {
- debug4s(printf("Setting known donor %d for segmenti at %lu\n",j,splicesites[j]));
+ debug4s(printf("Setting known donor %d for segmenti at %llu\n",j,(unsigned long long) splicesites[j]));
segmenti_donor_knownpos[segmenti_donor_nknown] = splicesites[j] - segmenti_left;
segmenti_donor_knowni[segmenti_donor_nknown++] = j;
}
@@ -7209,9 +7258,9 @@ find_singlesplices_minus (int *found_score, List_T hits, List_T *ambiguous, List
segmentj = segmenti+1;
}
for ( ; segmentj < segmentj_end; segmentj++) {
- debug4s(printf("minus local? diagonal %lu, querypos %d..%d => diagonal %lu, querypos %d..%d => ",
- segmenti->diagonal,segmenti->querypos5,segmenti->querypos3,
- segmentj->diagonal,segmentj->querypos5,segmentj->querypos3));
+ debug4s(printf("minus local? diagonal %llu, querypos %d..%d => diagonal %llu, querypos %d..%d => ",
+ (unsigned long long) segmenti->diagonal,segmenti->querypos5,segmenti->querypos3,
+ (unsigned long long) segmentj->diagonal,segmentj->querypos5,segmentj->querypos3));
/* j5 j3 i5 i3 */
assert(segmenti->diagonal < segmentj->diagonal);
if (segmentj->querypos3 >= segmenti->querypos5) {
@@ -7274,11 +7323,11 @@ find_singlesplices_minus (int *found_score, List_T hits, List_T *ambiguous, List
/* Ends 3 and 6: mark known splice sites in segmentj */
while (j < nsplicesites && splicesites[j] < segmentj->diagonal) {
if (splicetypes[j] == ANTIDONOR) {
- debug4s(printf("Setting known antidonor %d for segmentj at %lu\n",j,splicesites[j]));
+ debug4s(printf("Setting known antidonor %d for segmentj at %llu\n",j,(unsigned long long) splicesites[j]));
segmentj_antidonor_knownpos[segmentj_antidonor_nknown] = splicesites[j] - segmentj_left;
segmentj_antidonor_knowni[segmentj_antidonor_nknown++] = j;
} else if (splicetypes[j] == ACCEPTOR) {
- debug4s(printf("Setting known acceptor %d for segmentj at %lu\n",j,splicesites[j]));
+ debug4s(printf("Setting known acceptor %d for segmentj at %llu\n",j,(unsigned long long) splicesites[j]));
segmentj_acceptor_knownpos[segmentj_acceptor_nknown] = splicesites[j] - segmentj_left;
segmentj_acceptor_knowni[segmentj_acceptor_nknown++] = j;
}
@@ -7405,7 +7454,8 @@ find_singlesplices_minus (int *found_score, List_T hits, List_T *ambiguous, List
(void *) Stage3end_new_splice(&(*found_score),
/*nmismatches_donor*/Substring_nmismatches_whole(donor),nmismatches_acceptor,
donor,/*acceptor*/NULL,/*distance*/0U,
- /*shortdistancep*/false,/*penalty*/0,querylength,/*amb_nmatches*/Substring_nmatches_posttrim(acceptor),
+ /*shortdistancep*/false,/*penalty*/0,querylength,
+ /*amb_length*/Substring_match_length_orig(acceptor),
/*ambcoords_donor*/NULL,ambcoords,
/*amb_knowni_donor*/NULL,amb_knowni,
/*amb_nmismatches_donor*/NULL,amb_nmismatches,
@@ -7437,7 +7487,8 @@ find_singlesplices_minus (int *found_score, List_T hits, List_T *ambiguous, List
(void *) Stage3end_new_splice(&(*found_score),
nmismatches_donor,/*nmismatches_acceptor*/Substring_nmismatches_whole(acceptor),
/*donor*/NULL,acceptor,/*distance*/0U,
- /*shortdistancep*/false,/*penalty*/0,querylength,/*amb_nmatches*/Substring_nmatches_posttrim(donor),
+ /*shortdistancep*/false,/*penalty*/0,querylength,
+ /*amb_length*/Substring_match_length_orig(donor),
ambcoords,/*ambcoords_acceptor*/NULL,
amb_knowni,/*amb_knowni_acceptor*/NULL,
amb_nmismatches,/*amb_nmismatches_acceptor*/NULL,
@@ -7542,7 +7593,8 @@ find_singlesplices_minus (int *found_score, List_T hits, List_T *ambiguous, List
(void *) Stage3end_new_splice(&(*found_score),
/*nmismatches_donor*/Substring_nmismatches_whole(donor),nmismatches_acceptor,
donor,/*acceptor*/NULL,/*distance*/0U,
- /*shortdistancep*/false,/*penalty*/0,querylength,/*amb_nmatches*/Substring_nmatches_posttrim(acceptor),
+ /*shortdistancep*/false,/*penalty*/0,querylength,
+ /*amb_length*/Substring_match_length_orig(acceptor),
/*ambcoords_donor*/NULL,ambcoords,
/*amb_knowni_donor*/NULL,amb_knowni,
/*amb_nmismatches_donor*/NULL,amb_nmismatches,
@@ -7574,7 +7626,8 @@ find_singlesplices_minus (int *found_score, List_T hits, List_T *ambiguous, List
(void *) Stage3end_new_splice(&(*found_score),
nmismatches_donor,/*nmismatches_acceptor*/Substring_nmismatches_whole(acceptor),
/*donor*/NULL,acceptor,/*distance*/0U,
- /*shortdistancep*/false,/*penalty*/0,querylength,/*amb_nmatches*/Substring_nmatches_posttrim(donor),
+ /*shortdistancep*/false,/*penalty*/0,querylength,
+ /*amb_length*/Substring_match_length_orig(donor),
ambcoords,/*ambcoords_acceptor*/NULL,
amb_knowni,/*amb_knowni_acceptor*/NULL,
amb_nmismatches,/*amb_nmismatches_acceptor*/NULL,
@@ -7698,7 +7751,7 @@ find_doublesplices (int *found_score, List_T hits, List_T *lowprob,
int *floors_from_neg3, *floors_to_pos3;
int nmismatches_shortexon_left, nmismatches_shortexon_middle, nmismatches_shortexon_right;
- int amb_nmatches_donor, amb_nmatches_acceptor;
+ int amb_length_donor, amb_length_acceptor;
int best_left_j, best_right_j;
bool shortexon_orig_plusp, shortexon_orig_minusp, saw_antidonor_p, saw_acceptor_p;
int leftpos, rightpos;
@@ -7737,7 +7790,7 @@ find_doublesplices (int *found_score, List_T hits, List_T *lowprob,
j = joffset;
while (j < nsplicesites && splicesites[j] < segmentm->diagonal) {
if (splicetypes[j] == DONOR) {
- debug4k(printf("Setting known donor %d for segmentm at %lu\n",j,splicesites[j]));
+ debug4k(printf("Setting known donor %d for segmentm at %llu\n",j,(unsigned long long) splicesites[j]));
segmentm_donor_knownpos[segmentm_donor_nknown] = splicesites[j] - segmentm_left;
segmentm_donor_knowni[segmentm_donor_nknown++] = j;
if (saw_acceptor_p == true) {
@@ -7745,7 +7798,7 @@ find_doublesplices (int *found_score, List_T hits, List_T *lowprob,
shortexon_orig_plusp = true;
}
} else if (splicetypes[j] == ANTIACCEPTOR) {
- debug4k(printf("Setting known antiacceptor %d for segmentm at %lu\n",j,splicesites[j]));
+ debug4k(printf("Setting known antiacceptor %d for segmentm at %llu\n",j,(unsigned long long) splicesites[j]));
segmentm_antiacceptor_knownpos[segmentm_antiacceptor_nknown] = splicesites[j] - segmentm_left;
segmentm_antiacceptor_knowni[segmentm_antiacceptor_nknown++] = j;
if (saw_antidonor_p == true) {
@@ -7753,12 +7806,12 @@ find_doublesplices (int *found_score, List_T hits, List_T *lowprob,
shortexon_orig_minusp = true;
}
} else if (splicetypes[j] == ACCEPTOR) {
- debug4k(printf("Saw known acceptor at %lu\n",splicesites[j]));
+ debug4k(printf("Saw known acceptor at %llu\n",(unsigned long long) splicesites[j]));
segmentm_acceptor_knownpos[segmentm_acceptor_nknown] = splicesites[j] - segmentm_left;
segmentm_acceptor_knowni[segmentm_acceptor_nknown++] = j;
saw_acceptor_p = true;
} else if (splicetypes[j] == ANTIDONOR) {
- debug4k(printf("Saw known antidonor at %lu\n",splicesites[j]));
+ debug4k(printf("Saw known antidonor at %llu\n",(unsigned long long) splicesites[j]));
segmentm_antidonor_knownpos[segmentm_antidonor_nknown] = splicesites[j] - segmentm_left;
segmentm_antidonor_knowni[segmentm_antidonor_nknown++] = j;
saw_antidonor_p = true;
@@ -7772,8 +7825,8 @@ find_doublesplices (int *found_score, List_T hits, List_T *lowprob,
if (novelsplicingp &&
segmentm->querypos3 >= index1part && segmentm->querypos5 <= query_lastpos - index1part &&
segmentm->left_splice_p == true && segmentm->right_splice_p == true) {
- debug4d(printf("segment diagonal %lu, querypos %d..%d\n",
- segmentm->diagonal,segmentm->querypos5,segmentm->querypos3));
+ debug4d(printf("segment diagonal %llu, querypos %d..%d\n",
+ (unsigned long long) segmentm->diagonal,segmentm->querypos5,segmentm->querypos3));
spliceends = (List_T) NULL;
@@ -7814,9 +7867,9 @@ find_doublesplices (int *found_score, List_T hits, List_T *lowprob,
}
for ( ; segmenti > segmenti_start; segmenti--) {
- debug4d(printf("local left? diagonal %lu, querypos %d..%d => diagonal %lu, querypos %d..%d\n",
- segmenti->diagonal,segmenti->querypos5,segmenti->querypos3,
- segmentm->diagonal,segmentm->querypos5,segmentm->querypos3));
+ debug4d(printf("local left? diagonal %llu, querypos %d..%d => diagonal %llu, querypos %d..%d\n",
+ (unsigned long long) segmenti->diagonal,segmenti->querypos5,segmenti->querypos3,
+ (unsigned long long) segmentm->diagonal,segmentm->querypos5,segmentm->querypos3));
/* i5 i3 m5 m3 */
assert(segmenti->diagonal < segmentm->diagonal);
if (segmenti->leftmost < 0) {
@@ -7830,14 +7883,14 @@ find_doublesplices (int *found_score, List_T hits, List_T *lowprob,
} else {
potentiali = List_push(potentiali,(void *) segmenti);
npotential_left++;
- debug4d(printf("Potential left #%d: %lu\n",npotential_left,segmenti->diagonal));
+ debug4d(printf("Potential left #%d: %llu\n",npotential_left,(unsigned long long) segmenti->diagonal));
}
}
for ( ; segmentj < segmentj_end; segmentj++) {
- debug4d(printf("local right? diagonal %lu, querypos %d..%d => diagonal %lu, querypos %d..%d\n",
- segmentm->diagonal,segmentm->querypos5,segmentm->querypos3,
- segmentj->diagonal,segmentj->querypos5,segmentj->querypos3));
+ debug4d(printf("local right? diagonal %llu, querypos %d..%d => diagonal %llu, querypos %d..%d\n",
+ (unsigned long long) segmentm->diagonal,segmentm->querypos5,segmentm->querypos3,
+ (unsigned long long) segmentj->diagonal,segmentj->querypos5,segmentj->querypos3));
/* m5 m3 j5 j3 */
assert(segmentm->diagonal < segmentj->diagonal);
if (segmentj->rightmost < 0) {
@@ -7851,7 +7904,7 @@ find_doublesplices (int *found_score, List_T hits, List_T *lowprob,
} else {
potentialj = List_push(potentialj,(void *) segmentj);
npotential_right++;
- debug4d(printf("Potential right #%d: %lu\n",npotential_right,segmentj->diagonal));
+ debug4d(printf("Potential right #%d: %llu\n",npotential_right,(unsigned long long) segmentj->diagonal));
}
}
@@ -7871,11 +7924,11 @@ find_doublesplices (int *found_score, List_T hits, List_T *lowprob,
if ((jj = segmenti->splicesites_i) >= 0) {
while (jj < nsplicesites && splicesites[jj] < segmenti->diagonal) {
if (splicetypes[jj] == DONOR) {
- debug4d(printf("Setting known donor %d for segmenti at %lu\n",jj,splicesites[jj]));
+ debug4d(printf("Setting known donor %d for segmenti at %llu\n",jj,(unsigned long long) splicesites[jj]));
segmenti_donor_knownpos[segmenti_donor_nknown] = splicesites[jj] - segmenti_left;
segmenti_donor_knowni[segmenti_donor_nknown++] = jj;
} else if (splicetypes[jj] == ANTIACCEPTOR) {
- debug4d(printf("Setting known antiacceptor %d for segmenti at %lu\n",jj,splicesites[jj]));
+ debug4d(printf("Setting known antiacceptor %d for segmenti at %llu\n",jj,(unsigned long long) splicesites[jj]));
segmenti_antiacceptor_knownpos[segmenti_antiacceptor_nknown] = splicesites[jj] - segmenti_left;
segmenti_antiacceptor_knowni[segmenti_antiacceptor_nknown++] = jj;
}
@@ -7912,11 +7965,11 @@ find_doublesplices (int *found_score, List_T hits, List_T *lowprob,
if ((jj = segmentj->splicesites_i) >= 0) {
while (jj < nsplicesites && splicesites[jj] < segmentj->diagonal) {
if (splicetypes[jj] == ACCEPTOR) {
- debug4d(printf("Setting known acceptor %d for segmentj at %lu\n",jj,splicesites[jj]));
+ debug4d(printf("Setting known acceptor %d for segmentj at %llu\n",jj,(unsigned long long) splicesites[jj]));
segmentj_acceptor_knownpos[segmentj_acceptor_nknown] = splicesites[jj] - segmentj_left;
segmentj_acceptor_knowni[segmentj_acceptor_nknown++] = jj;
} else if (splicetypes[jj] == ANTIDONOR) {
- debug4d(printf("Setting known antidonor %d for segmentj at %lu\n",jj,splicesites[jj]));
+ debug4d(printf("Setting known antidonor %d for segmentj at %llu\n",jj,(unsigned long long) splicesites[jj]));
segmentj_antidonor_knownpos[segmentj_antidonor_nknown] = splicesites[jj] - segmentj_left;
segmentj_antidonor_knowni[segmentj_antidonor_nknown++] = jj;
}
@@ -8070,7 +8123,7 @@ find_doublesplices (int *found_score, List_T hits, List_T *lowprob,
if (Intlist_length(donor_amb_nmismatches) == 1 && Intlist_length(acceptor_amb_nmismatches) == 1) {
hits = List_push(hits,(void *) Stage3end_new_shortexon(&(*found_score),donor,acceptor,shortexon,
- /*amb_nmatches_donor*/0,/*amb_nmatches_acceptor*/0,
+ /*amb_length_donor*/0,/*amb_length_acceptor*/0,
/*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
/*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
/*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
@@ -8079,7 +8132,7 @@ find_doublesplices (int *found_score, List_T hits, List_T *lowprob,
} else if (Intlist_length(donor_amb_nmismatches) > 1 && Intlist_length(acceptor_amb_nmismatches) == 1) {
hits = List_push(hits,(void *) Stage3end_new_shortexon(&(*found_score),/*donor*/NULL,acceptor,shortexon,
- /*amb_nmatches_donor*/Substring_nmatches_posttrim(donor),/*amb_nmatches_acceptor*/0,
+ /*amb_length_donor*/Substring_match_length_orig(donor),/*amb_length_acceptor*/0,
donor_ambcoords,/*acceptor_ambcoords*/NULL,
donor_amb_knowni,/*amb_knowni_acceptor*/NULL,
donor_amb_nmismatches,/*amb_nmismatches_acceptor*/NULL,
@@ -8088,7 +8141,7 @@ find_doublesplices (int *found_score, List_T hits, List_T *lowprob,
} else if (Intlist_length(donor_amb_nmismatches) == 1 && Intlist_length(acceptor_amb_nmismatches) > 1) {
hits = List_push(hits,(void *) Stage3end_new_shortexon(&(*found_score),donor,/*acceptor*/NULL,shortexon,
- /*amb_nmatches_donor*/0,/*amb_nmatches_acceptor*/Substring_nmatches_posttrim(acceptor),
+ /*amb_length_donor*/0,/*amb_length_acceptor*/Substring_match_length_orig(acceptor),
/*ambcoords_donor*/NULL,acceptor_ambcoords,
/*amb_knowni_donor*/NULL,acceptor_amb_knowni,
/*amb_nmismatches_donor*/NULL,acceptor_amb_nmismatches,
@@ -8096,8 +8149,8 @@ find_doublesplices (int *found_score, List_T hits, List_T *lowprob,
splicing_penalty,querylength,sensedir,/*sarrayp*/false));
} else {
hits = List_push(hits,(void *) Stage3end_new_shortexon(&(*found_score),/*donor*/NULL,/*acceptor*/NULL,shortexon,
- /*amb_nmatches_donor*/Substring_nmatches_posttrim(donor),
- /*amb_nmatches_acceptor*/Substring_nmatches_posttrim(acceptor),
+ /*amb_length_donor*/Substring_match_length_orig(donor),
+ /*amb_length_acceptor*/Substring_match_length_orig(acceptor),
donor_ambcoords,acceptor_ambcoords,
donor_amb_knowni,acceptor_amb_knowni,
donor_amb_nmismatches,acceptor_amb_nmismatches,
@@ -8160,8 +8213,9 @@ find_doublesplices (int *found_score, List_T hits, List_T *lowprob,
/*collect_all_p*/pairedp == true && first_read_p == plusp)) != NULL) {
ambp_right = (querylength - rightpos < min_shortend || Intlist_length(splicesites_i_right) > 1) ? true : false;
- debug4k(printf(" donor %s ... acceptor %d (%lu) ... donor %d (%lu) ... acceptor %s: %d + %d + %d mismatches\n",
- Intlist_to_string(splicesites_i_left),j1,splicesites[j1],j2,splicesites[j2],Intlist_to_string(splicesites_i_right),
+ debug4k(printf(" donor %s ... acceptor %d (%llu) ... donor %d (%llu) ... acceptor %s: %d + %d + %d mismatches\n",
+ Intlist_to_string(splicesites_i_left),j1,(unsigned long long) splicesites[j1],
+ j2,(unsigned long long) splicesites[j2],Intlist_to_string(splicesites_i_right),
nmismatches_shortexon_left,nmismatches_shortexon_middle,nmismatches_shortexon_right));
if (ambp_left == true && ambp_right == true) {
@@ -8175,14 +8229,14 @@ find_doublesplices (int *found_score, List_T hits, List_T *lowprob,
sensep,/*acceptor_ambp*/true,/*donor_ambp*/true,
segmentm->chrnum,segmentm->chroffset,segmentm->chrhigh,segmentm->chrlength);
if (shortexon != NULL) {
- debug4k(printf("New one-third shortexon at left %lu\n",segmentm_left));
+ debug4k(printf("New one-third shortexon at left %llu\n",(unsigned long long) segmentm_left));
ambcoords_donor = lookup_splicesites(splicesites_i_left,splicesites);
ambcoords_acceptor = lookup_splicesites(splicesites_i_right,splicesites);
- amb_nmatches_donor = leftpos - nmismatches_shortexon_left;
- amb_nmatches_acceptor = querylength - rightpos - nmismatches_shortexon_right;
+ amb_length_donor = leftpos /*- nmismatches_shortexon_left*/;
+ amb_length_acceptor = querylength - rightpos /*- nmismatches_shortexon_right*/;
segmentm->usedp = true;
hits = List_push(hits,(void *) Stage3end_new_shortexon(&(*found_score),/*donor*/NULL,/*acceptor*/NULL,shortexon,
- amb_nmatches_donor,amb_nmatches_acceptor,
+ amb_length_donor,amb_length_acceptor,
ambcoords_donor,ambcoords_acceptor,
/*amb_knowni_donor*/splicesites_i_left,/*amb_knowni_acceptor*/splicesites_i_right,
/*amb_nmismatches_donor*/nmismatches_list_left,/*amb_nmismatches_acceptor*/nmismatches_list_right,
@@ -8201,8 +8255,8 @@ find_doublesplices (int *found_score, List_T hits, List_T *lowprob,
debug4k(printf("ambp_left true, ambp_right false\n"));
best_right_j = Intlist_head(splicesites_i_right);
- debug4k(printf("shortexon with amb_acceptor at %d (%lu) ... donor at %d (%lu)\n",
- j1,splicesites[j1],j2,splicesites[j2]));
+ debug4k(printf("shortexon with amb_acceptor at %d (%llu) ... donor at %d (%llu)\n",
+ j1,(unsigned long long) splicesites[j1],j2,(unsigned long long) splicesites[j2]));
shortexon = Substring_new_shortexon(/*acceptor_coord*/splicesites[j1],/*acceptor_knowni*/j1,
/*donor_coord*/splicesites[j2],/*donor_knowni*/j2,
/*acceptor_pos*/leftpos,/*donor_pos*/rightpos,
@@ -8213,7 +8267,7 @@ find_doublesplices (int *found_score, List_T hits, List_T *lowprob,
sensep,/*acceptor_ambp*/true,/*donor_ambp*/false,
segmentm->chrnum,segmentm->chroffset,segmentm->chrhigh,segmentm->chrlength);
- debug4k(printf("acceptor at %d (%lu)\n",best_right_j,splicesites[best_right_j]));
+ debug4k(printf("acceptor at %d (%llu)\n",best_right_j,(unsigned long long) splicesites[best_right_j]));
acceptor = Substring_new_acceptor(/*acceptor_coord*/splicesites[best_right_j],/*acceptor_knowni*/best_right_j,
/*splice_pos*/rightpos,nmismatches_shortexon_right,
/*prob*/2.0,/*left*/splicesites[best_right_j]-rightpos,
@@ -8224,12 +8278,13 @@ find_doublesplices (int *found_score, List_T hits, List_T *lowprob,
if (shortexon != NULL) Substring_free(&shortexon);
if (acceptor != NULL) Substring_free(&acceptor);
} else {
- debug4k(printf("ambp_left true, ambp_right false: New two-thirds shortexon at left %lu\n",segmentm_left));
+ debug4k(printf("ambp_left true, ambp_right false: New two-thirds shortexon at left %llu\n",
+ (unsigned long long) segmentm_left));
ambcoords_donor = lookup_splicesites(splicesites_i_left,splicesites);
- amb_nmatches_donor = leftpos - nmismatches_shortexon_left;
+ amb_length_donor = leftpos /*- nmismatches_shortexon_left*/;
segmentm->usedp = true;
hits = List_push(hits,(void *) Stage3end_new_shortexon(&(*found_score),/*donor*/NULL,acceptor,shortexon,
- amb_nmatches_donor,/*amb_nmatches_acceptor*/0,
+ amb_length_donor,/*amb_length_acceptor*/0,
ambcoords_donor,/*ambcoords_acceptor*/NULL,
/*amb_knowni_donor*/splicesites_i_left,/*amb_knowni_acceptor*/NULL,
/*amb_nmismatches_donor*/nmismatches_list_left,/*amb_nmismatches_acceptor*/NULL,
@@ -8246,15 +8301,15 @@ find_doublesplices (int *found_score, List_T hits, List_T *lowprob,
debug4k(printf("ambp_left false, ambp_right true\n"));
best_left_j = Intlist_head(splicesites_i_left);
- debug4k(printf("donor at %d (%lu)\n",best_left_j,splicesites[best_left_j]));
+ debug4k(printf("donor at %d (%llu)\n",best_left_j,(unsigned long long) splicesites[best_left_j]));
donor = Substring_new_donor(/*donor_coord*/splicesites[best_left_j],/*donor_knowni*/best_left_j,
/*splice_pos*/leftpos,nmismatches_shortexon_left,
/*prob*/2.0,/*left*/splicesites[best_left_j]-leftpos,
query_compress,querylength,plusp,genestrand,first_read_p,sensep,segmentm->chrnum,
segmentm->chroffset,segmentm->chrhigh,segmentm->chrlength);
- debug4k(printf("shortexon with acceptor at %d (%lu) ... amb_donor %d (%lu)\n",
- j1,splicesites[j1],j2,splicesites[j2]));
+ debug4k(printf("shortexon with acceptor at %d (%llu) ... amb_donor %d (%llu)\n",
+ j1,(unsigned long long) splicesites[j1],j2,(unsigned long long) splicesites[j2]));
shortexon = Substring_new_shortexon(/*acceptor_coord*/splicesites[j1],/*acceptor_knowni*/j1,
/*donor_coord*/splicesites[j2],/*donor_knowni*/j2,
/*acceptor_pos*/leftpos,/*donor_pos*/rightpos,
@@ -8270,10 +8325,10 @@ find_doublesplices (int *found_score, List_T hits, List_T *lowprob,
if (shortexon != NULL) Substring_free(&shortexon);
} else {
ambcoords_acceptor = lookup_splicesites(splicesites_i_right,splicesites);
- amb_nmatches_acceptor = querylength - rightpos - nmismatches_shortexon_right;
+ amb_length_acceptor = querylength - rightpos /*- nmismatches_shortexon_right*/;
segmentm->usedp = true;
hits = List_push(hits,(void *) Stage3end_new_shortexon(&(*found_score),donor,/*acceptor*/NULL,shortexon,
- /*amb_nmatches_donor*/0,amb_nmatches_acceptor,
+ /*amb_length_donor*/0,amb_length_acceptor,
/*ambcoords_donor*/NULL,ambcoords_acceptor,
/*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/splicesites_i_right,
/*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/nmismatches_list_right,
@@ -8317,10 +8372,10 @@ find_doublesplices (int *found_score, List_T hits, List_T *lowprob,
if (shortexon != NULL) Substring_free(&shortexon);
if (acceptor != NULL) Substring_free(&acceptor);
} else {
- debug4k(printf("New shortexon at left %lu\n",segmentm_left));
+ debug4k(printf("New shortexon at left %llu\n",(unsigned long long) segmentm_left));
segmentm->usedp = true;
hits = List_push(hits,(void *) Stage3end_new_shortexon(&(*found_score),donor,acceptor,shortexon,
- /*amb_nmatches_donor*/0,/*amb_nmatches_acceptor*/0,
+ /*amb_length_donor*/0,/*amb_length_acceptor*/0,
/*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
/*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
/*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
@@ -8375,8 +8430,9 @@ find_doublesplices (int *found_score, List_T hits, List_T *lowprob,
/*collect_all_p*/pairedp == true && first_read_p == plusp)) != NULL) {
ambp_right = (querylength - rightpos < min_shortend || Intlist_length(splicesites_i_right) > 1) ? true : false;
- debug4k(printf(" antiacceptor %s ... antidonor %d (%lu) ... antiacceptor %d (%lu) ... antidonor %s: %d + %d + %d mismatches\n",
- Intlist_to_string(splicesites_i_left),j1,splicesites[j1],j2,splicesites[j2],Intlist_to_string(splicesites_i_right),
+ debug4k(printf(" antiacceptor %s ... antidonor %d (%llu) ... antiacceptor %d (%llu) ... antidonor %s: %d + %d + %d mismatches\n",
+ Intlist_to_string(splicesites_i_left),j1,(unsigned long long) splicesites[j1],
+ j2,(unsigned long long) splicesites[j2],Intlist_to_string(splicesites_i_right),
nmismatches_shortexon_left,nmismatches_shortexon_middle,nmismatches_shortexon_right));
if (ambp_left == true && ambp_right == true) {
@@ -8389,14 +8445,14 @@ find_doublesplices (int *found_score, List_T hits, List_T *lowprob,
sensep,/*acceptor_ambp*/true,/*donor_ambp*/true,
segmentm->chrnum,segmentm->chroffset,segmentm->chrhigh,segmentm->chrlength);
if (shortexon != NULL) {
- debug4k(printf("New one-third shortexon at left %lu\n",segmentm_left));
+ debug4k(printf("New one-third shortexon at left %llu\n",(unsigned long long) segmentm_left));
ambcoords_donor = lookup_splicesites(splicesites_i_right,splicesites);
ambcoords_acceptor = lookup_splicesites(splicesites_i_left,splicesites);
- amb_nmatches_donor = querylength - rightpos - nmismatches_shortexon_right;
- amb_nmatches_acceptor = leftpos - nmismatches_shortexon_left;
+ amb_length_donor = querylength - rightpos /*- nmismatches_shortexon_right*/;
+ amb_length_acceptor = leftpos /*- nmismatches_shortexon_left*/;
segmentm->usedp = true;
hits = List_push(hits,(void *) Stage3end_new_shortexon(&(*found_score),/*donor*/NULL,/*acceptor*/NULL,shortexon,
- amb_nmatches_donor,amb_nmatches_acceptor,
+ amb_length_donor,amb_length_acceptor,
ambcoords_donor,ambcoords_acceptor,
/*amb_knowni_donor*/splicesites_i_right,/*amb_knowni_acceptor*/splicesites_i_left,
/*amb_nmismatches_donor*/nmismatches_list_right,/*amb_nmismatches_acceptor*/nmismatches_list_left,
@@ -8415,8 +8471,8 @@ find_doublesplices (int *found_score, List_T hits, List_T *lowprob,
debug4k(printf("ambp_left true, ambp_right false\n"));
best_right_j = Intlist_head(splicesites_i_right);
- debug4k(printf("shortexon with amb_donor at %d (%lu) ... acceptor at %d (%lu)\n",
- j1,splicesites[j1],j2,splicesites[j2]));
+ debug4k(printf("shortexon with amb_donor at %d (%llu) ... acceptor at %d (%llu)\n",
+ j1,(unsigned long long) splicesites[j1],j2,(unsigned long long) splicesites[j2]));
shortexon = Substring_new_shortexon(/*acceptor_coord*/splicesites[j2],/*acceptor_knowni*/j2,
/*donor_coord*/splicesites[j1],/*donor_knowni*/j1,
/*acceptor_pos*/rightpos,/*donor_pos*/leftpos,nmismatches_shortexon_middle,
@@ -8426,7 +8482,7 @@ find_doublesplices (int *found_score, List_T hits, List_T *lowprob,
sensep,/*acceptor_ambp*/false,/*donor_ambp*/true,
segmentm->chrnum,segmentm->chroffset,segmentm->chrhigh,segmentm->chrlength);
- debug4k(printf("donor at %d (%lu)\n",best_right_j,splicesites[best_right_j]));
+ debug4k(printf("donor at %d (%llu)\n",best_right_j,(unsigned long long) splicesites[best_right_j]));
donor = Substring_new_donor(/*donor_coord*/splicesites[best_right_j],/*donor_knowni*/best_right_j,
/*splice_pos*/rightpos,nmismatches_shortexon_right,
/*prob*/2.0,/*left*/splicesites[best_right_j]-rightpos,
@@ -8438,10 +8494,10 @@ find_doublesplices (int *found_score, List_T hits, List_T *lowprob,
if (shortexon != NULL) Substring_free(&shortexon);
} else {
ambcoords_acceptor = lookup_splicesites(splicesites_i_left,splicesites);
- amb_nmatches_acceptor = leftpos - nmismatches_shortexon_left;
+ amb_length_acceptor = leftpos /*- nmismatches_shortexon_left*/;
segmentm->usedp = true;
hits = List_push(hits,(void *) Stage3end_new_shortexon(&(*found_score),donor,/*acceptor*/NULL,shortexon,
- /*amb_nmatches_donor*/0,amb_nmatches_acceptor,
+ /*amb_length_donor*/0,amb_length_acceptor,
/*ambcoords_donor*/NULL,ambcoords_acceptor,
/*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/splicesites_i_left,
/*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/nmismatches_list_left,
@@ -8458,15 +8514,15 @@ find_doublesplices (int *found_score, List_T hits, List_T *lowprob,
debug4k(printf("ambp_left false, ambp_right true\n"));
best_left_j = Intlist_head(splicesites_i_left);
- debug4k(printf("acceptor at %d (%lu)\n",best_left_j,splicesites[best_left_j]));
+ debug4k(printf("acceptor at %d (%llu)\n",best_left_j,(unsigned long long) splicesites[best_left_j]));
acceptor = Substring_new_acceptor(/*acceptor_coord*/splicesites[best_left_j],/*acceptor_knowni*/best_left_j,
/*splice_pos*/leftpos,nmismatches_shortexon_left,
/*prob*/2.0,/*left*/splicesites[best_left_j]-leftpos,
query_compress,querylength,plusp,genestrand,first_read_p,sensep,segmentm->chrnum,
segmentm->chroffset,segmentm->chrhigh,segmentm->chrlength);
- debug4k(printf("shortexon with donor at %d (%lu) ... amb_acceptor at %d (%lu)\n",
- j2,splicesites[j2],j1,splicesites[j1]));
+ debug4k(printf("shortexon with donor at %d (%llu) ... amb_acceptor at %d (%llu)\n",
+ j2,(unsigned long long) splicesites[j2],j1,(unsigned long long) plicesites[j1]));
shortexon = Substring_new_shortexon(/*acceptor_coord*/splicesites[j2],/*acceptor_knowni*/j2,
/*donor_coord*/splicesites[j1],/*donor_knowni*/j1,
/*acceptor_pos*/rightpos,/*donor_pos*/leftpos,nmismatches_shortexon_middle,
@@ -8480,12 +8536,13 @@ find_doublesplices (int *found_score, List_T hits, List_T *lowprob,
if (shortexon != NULL) Substring_free(&shortexon);
if (acceptor != NULL) Substring_free(&acceptor);
} else {
- debug4k(printf("ambp_left false, ambp_right true: New splice at left %lu\n",segmentm_left));
+ debug4k(printf("ambp_left false, ambp_right true: New splice at left %llu\n",
+ (unsigned long long) segmentm_left));
ambcoords_donor = lookup_splicesites(splicesites_i_right,splicesites);
- amb_nmatches_donor = querylength - rightpos - nmismatches_shortexon_right;
+ amb_length_donor = querylength - rightpos /*- nmismatches_shortexon_right*/;
segmentm->usedp = true;
hits = List_push(hits,(void *) Stage3end_new_shortexon(&(*found_score),/*donor*/NULL,acceptor,shortexon,
- amb_nmatches_donor,/*amb_nmatches_acceptor*/0,
+ amb_length_donor,/*amb_length_acceptor*/0,
ambcoords_donor,/*ambcoords_acceptor*/NULL,
/*amb_knowni_donor*/splicesites_i_right,/*amb_knowni_acceptor*/NULL,
/*amb_nmismatches_donor*/nmismatches_list_right,/*amb_nmismatches_acceptor*/NULL,
@@ -8527,10 +8584,10 @@ find_doublesplices (int *found_score, List_T hits, List_T *lowprob,
if (shortexon != NULL) Substring_free(&shortexon);
if (donor != NULL) Substring_free(&donor);
} else {
- debug4k(printf("New shortexon at left %lu\n",segmentm_left));
+ debug4k(printf("New shortexon at left %llu\n",(unsigned long long) segmentm_left));
segmentm->usedp = true;
hits = List_push(hits,(void *) Stage3end_new_shortexon(&(*found_score),donor,acceptor,shortexon,
- /*amb_nmatches_donor*/0,/*amb_nmatches_acceptor*/0,
+ /*amb_length_donor*/0,/*amb_length_acceptor*/0,
/*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
/*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
/*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
@@ -8607,8 +8664,9 @@ find_spliceends_shortend (List_T **shortend_donors, List_T **shortend_antidonors
} else if (segment->splicesites_i >= 0) {
segment_left = segment->diagonal - querylength; /* FORMULA: Corresponds to querypos 0 */
- debug4e(printf("find_spliceends_shortend: Checking up to %d mismatches at diagonal %lu (querypos %d..%d) - querylength %d = %lu, floors %d and %d\n",
- max_mismatches_allowed,segment->diagonal,segment->querypos5,segment->querypos3,querylength,segment_left,
+ debug4e(printf("find_spliceends_shortend: Checking up to %d mismatches at diagonal %llu (querypos %d..%d) - querylength %d = %llu, floors %d and %d\n",
+ max_mismatches_allowed,(unsigned long long) segment->diagonal,
+ segment->querypos5,segment->querypos3,querylength,(unsigned long long) segment_left,
floors_from_neg3[segment->querypos5],floors_to_pos3[segment->querypos3]));
debug4e(
@@ -8645,8 +8703,9 @@ find_spliceends_shortend (List_T **shortend_donors, List_T **shortend_antidonors
}
splice_pos_end = querylength - 1;
- debug4e(printf("Search for splice sites from %d up (%lu) to %d (%lu)\n",
- splice_pos_start,segment_left+splice_pos_start,splice_pos_end,segment_left+splice_pos_end));
+ debug4e(printf("Search for splice sites from %d up (%llu) to %d (%llu)\n",
+ splice_pos_start,(unsigned long long) segment_left+splice_pos_start,
+ splice_pos_end,(unsigned long long) segment_left+splice_pos_end));
jstart = segment->splicesites_i;
while (jstart < nsplicesites && splicesites[jstart] < segment_left + splice_pos_start) {
@@ -8659,7 +8718,7 @@ find_spliceends_shortend (List_T **shortend_donors, List_T **shortend_antidonors
nmismatches = 0;
for (j = jstart; j < jend; j++) {
- debug4e(printf("splicesites_i #%d is at %lu\n",j,splicesites[j]));
+ debug4e(printf("splicesites_i #%d is at %llu\n",j,(unsigned long long) splicesites[j]));
splice_pos = splicesites[j] - segment_left;
while (nmismatches < nmismatches_left && mismatch_positions[nmismatches] < splice_pos) { /* Changed from <= to < */
debug4e(printf(" mismatch at %d\n",mismatch_positions[nmismatches]));
@@ -8673,8 +8732,8 @@ find_spliceends_shortend (List_T **shortend_donors, List_T **shortend_antidonors
debug4e(printf("nmismatches %d > max_mismatches_allowed %d\n",nmismatches,max_mismatches_allowed));
} else if (splicetypes[j] == DONOR) {
debug4e(printf("Known donor #%d at querypos %d\n",j,splicesites[j] - segment_left));
- debug4e(printf("Known donor for segment at %lu, splice_pos %d (%d mismatches), stopi = %d\n",
- segment_left,splice_pos,nmismatches,splice_pos_end));
+ debug4e(printf("Known donor for segment at %llu, splice_pos %d (%d mismatches), stopi = %d\n",
+ segment_left,(unsigned long long) splice_pos,nmismatches,splice_pos_end));
sensep = (plusp == true) ? true : false;
if ((hit = Substring_new_donor(/*donor_coord*/splicesites[j],/*donor_knowni*/j,splice_pos,nmismatches,
/*prob*/2.0,/*left*/segment_left,query_compress,
@@ -8688,8 +8747,8 @@ find_spliceends_shortend (List_T **shortend_donors, List_T **shortend_antidonors
} else if (splicetypes[j] == ANTIACCEPTOR) {
debug4e(printf("Known antiacceptor #%d at querypos %d\n",j,splicesites[j] - segment_left));
- debug4e(printf("Known antiacceptor for segment at %lu, splice_pos %d (%d mismatches), stopi = %d\n",
- segment_left,splice_pos,nmismatches,splice_pos_end));
+ debug4e(printf("Known antiacceptor for segment at %llu, splice_pos %d (%d mismatches), stopi = %d\n",
+ segment_left,(unsigned long long) splice_pos,nmismatches,splice_pos_end));
sensep = (plusp == true) ? false : true;
if ((hit = Substring_new_acceptor(/*acceptor_coord*/splicesites[j],/*acceptor_knowni*/j,
splice_pos,nmismatches,/*prob*/2.0,/*left*/segment_left,query_compress,
@@ -8728,8 +8787,9 @@ find_spliceends_shortend (List_T **shortend_donors, List_T **shortend_antidonors
splice_pos_start = 1;
}
- debug4e(printf("Search for splice sites from %d (%lu) down to %d (%lu)\n",
- splice_pos_end,segment_left+splice_pos_end,splice_pos_start,segment_left+splice_pos_start));
+ debug4e(printf("Search for splice sites from %d (%llu) down to %d (%llu)\n",
+ splice_pos_end,(unsigned long long) segment_left+splice_pos_end,
+ splice_pos_start,(unsigned long long) segment_left+splice_pos_start));
jstart = segment->splicesites_i;
while (jstart < nsplicesites && splicesites[jstart] < segment_left + splice_pos_start) {
@@ -8742,7 +8802,7 @@ find_spliceends_shortend (List_T **shortend_donors, List_T **shortend_antidonors
nmismatches = 0;
for (j = jend - 1; j >= jstart; j--) {
- debug4e(printf("splicesites_i #%d is at %lu\n",j,splicesites[j]));
+ debug4e(printf("splicesites_i #%d is at %llu\n",j,(unsigned long long) splicesites[j]));
splice_pos = splicesites[j] - segment_left;
while (nmismatches < nmismatches_right && mismatch_positions[nmismatches] >= splice_pos) { /* Must be >= */
debug4e(printf(" mismatch at %d\n",mismatch_positions[nmismatches]));
@@ -8756,8 +8816,8 @@ find_spliceends_shortend (List_T **shortend_donors, List_T **shortend_antidonors
debug4e(printf("nmismatches %d > max_mismatches_allowed %d\n",nmismatches,max_mismatches_allowed));
} else if (splicetypes[j] == ACCEPTOR) {
debug4e(printf("Known acceptor #%d at querypos %d\n",j,splicesites[j] - segment_left));
- debug4e(printf("Known acceptor for segment at %lu, splice_pos %d (%d mismatches), stopi = %d\n",
- segment_left,splice_pos,nmismatches,splice_pos_start));
+ debug4e(printf("Known acceptor for segment at %llu, splice_pos %d (%d mismatches), stopi = %d\n",
+ (unsigned long long) segment_left,splice_pos,nmismatches,splice_pos_start));
sensep = (plusp == true) ? true : false;
if ((hit = Substring_new_acceptor(/*acceptor_coord*/splicesites[j],/*acceptor_knowni*/j,
splice_pos,nmismatches,/*prob*/2.0,/*left*/segment_left,query_compress,
@@ -8771,8 +8831,8 @@ find_spliceends_shortend (List_T **shortend_donors, List_T **shortend_antidonors
} else if (splicetypes[j] == ANTIDONOR) {
debug4e(printf("Known antidonor #%d at querypos %d\n",j,splicesites[j] - segment_left));
- debug4e(printf("Known antidonor for segmenti at %lu, splice_pos %d (%d mismatches), stopi = %d\n",
- segment_left,splice_pos,nmismatches,splice_pos_start));
+ debug4e(printf("Known antidonor for segmenti at %llu, splice_pos %d (%d mismatches), stopi = %d\n",
+ (unsigned long long) segment_left,splice_pos,nmismatches,splice_pos_start));
sensep = (plusp == true) ? false : true;
if ((hit = Substring_new_donor(/*donor_coord*/splicesites[j],/*donor_knowni*/j,splice_pos,nmismatches,
/*prob*/2.0,/*left*/segment_left,query_compress,
@@ -8858,8 +8918,9 @@ find_spliceends_distant (List_T **distant_donors, List_T **distant_antidonors,
if (segment->diagonal < (Univcoord_T) -1) {
segment_left = segment->diagonal - querylength; /* FORMULA: Corresponds to querypos 0 */
- debug4e(printf("find_spliceends: Checking up to %d mismatches at diagonal %lu (querypos %d..%d) - querylength %d = %lu, floors %d and %d\n",
- max_mismatches_allowed,segment->diagonal,segment->querypos5,segment->querypos3,querylength,segment_left,
+ debug4e(printf("find_spliceends: Checking up to %d mismatches at diagonal %llu (querypos %d..%d) - querylength %d = %llu, floors %d and %d\n",
+ max_mismatches_allowed,(unsigned long long) segment->diagonal,
+ segment->querypos5,segment->querypos3,querylength,(unsigned long long) segment_left,
floors_from_neg3[segment->querypos5],floors_to_pos3[segment->querypos3]));
debug4e(
@@ -8953,8 +9014,8 @@ find_spliceends_distant (List_T **distant_donors, List_T **distant_antidonors,
#endif
if (nmismatches <= max_mismatches_allowed) {
if (donori_knowni[i] >= 0) {
- debug4e(printf("Known donor for segment at %lu, splice_pos %d (%d mismatches), stopi = %d\n",
- segment_left,splice_pos,nmismatches,splice_pos_end));
+ debug4e(printf("Known donor for segment at %llu, splice_pos %d (%d mismatches), stopi = %d\n",
+ (unsigned long long) segment_left,splice_pos,nmismatches,splice_pos_end));
if ((hit = Substring_new_donor(/*donor_coord*/segment_left + splice_pos,/*donor_knowni*/donori_knowni[i],
splice_pos,nmismatches,/*prob*/2.0,/*left*/segment_left,query_compress,
@@ -8973,8 +9034,8 @@ find_spliceends_distant (List_T **distant_donors, List_T **distant_antidonors,
debug4e(printf("splice pos %d, nmismatches %d, prob %f, sufficient %d\n",
splice_pos,nmismatches,prob,sufficient_splice_prob_distant(splice_pos,nmismatches,prob)));
if (sufficient_splice_prob_distant(/*support*/splice_pos,nmismatches,prob)) {
- debug4e(printf("Novel donor for segment at %lu, splice_pos %d (%d mismatches), stopi = %d\n",
- segment_left,splice_pos,nmismatches,splice_pos_end));
+ debug4e(printf("Novel donor for segment at %llu, splice_pos %d (%d mismatches), stopi = %d\n",
+ (unsigned long long) segment_left,splice_pos,nmismatches,splice_pos_end));
if ((hit = Substring_new_donor(/*donor_coord*/segment_left + splice_pos,/*donor_knowni*/-1,
splice_pos,nmismatches,prob,/*left*/segment_left,query_compress,
querylength,plusp,genestrand,first_read_p,
@@ -9029,8 +9090,8 @@ find_spliceends_distant (List_T **distant_donors, List_T **distant_antidonors,
#endif
if (nmismatches <= max_mismatches_allowed) {
if (antiacceptori_knowni[i] >= 0) {
- debug4e(printf("Known antiacceptor for segment at %lu, splice_pos %d (%d mismatches), stopi = %d\n",
- segment_left,splice_pos,nmismatches,splice_pos_end));
+ debug4e(printf("Known antiacceptor for segment at %llu, splice_pos %d (%d mismatches), stopi = %d\n",
+ (unsigned long long) segment_left,splice_pos,nmismatches,splice_pos_end));
if ((hit = Substring_new_acceptor(/*acceptor_coord*/segment_left + splice_pos,/*acceptor_knowni*/antiacceptori_knowni[i],
splice_pos,nmismatches,/*prob*/2.0,/*left*/segment_left,query_compress,
querylength,plusp,genestrand,first_read_p,
@@ -9048,8 +9109,8 @@ find_spliceends_distant (List_T **distant_donors, List_T **distant_antidonors,
debug4e(printf("splice pos %d, nmismatches %d, prob %f, sufficient %d\n",
splice_pos,nmismatches,prob,sufficient_splice_prob_distant(splice_pos,nmismatches,prob)));
if (sufficient_splice_prob_distant(/*support*/splice_pos,nmismatches,prob)) {
- debug4e(printf("Novel antiacceptor for segment at %lu, splice_pos %d (%d mismatches), stopi = %d\n",
- segment_left,splice_pos,nmismatches,splice_pos_end));
+ debug4e(printf("Novel antiacceptor for segment at %llu, splice_pos %d (%d mismatches), stopi = %d\n",
+ (unsigned long long) segment_left,splice_pos,nmismatches,splice_pos_end));
if ((hit = Substring_new_acceptor(/*acceptor_coord*/segment_left + splice_pos,/*acceptor_knowni*/-1,
splice_pos,nmismatches,prob,/*left*/segment_left,query_compress,
querylength,plusp,genestrand,first_read_p,
@@ -9103,11 +9164,11 @@ find_spliceends_distant (List_T **distant_donors, List_T **distant_antidonors,
/* Known splicing */
while (j < nsplicesites && splicesites[j] <= segment_left + splice_pos_end) { /* Needs to be <= */
if (splicetypes[j] == ACCEPTOR) {
- debug4k(printf("Setting known acceptor %d for segment at %lu\n",j,splicesites[j]));
+ debug4k(printf("Setting known acceptor %d for segment at %llu\n",j,(unsigned long long) splicesites[j]));
segment_acceptor_knownpos[segment_acceptor_nknown] = splicesites[j] - segment_left;
segment_acceptor_knowni[segment_acceptor_nknown++] = j;
} else if (splicetypes[j] == ANTIDONOR) {
- debug4k(printf("Setting known antidonor %d for segment at %lu\n",j,splicesites[j]));
+ debug4k(printf("Setting known antidonor %d for segment at %llu\n",j,(unsigned long long) splicesites[j]));
segment_antidonor_knownpos[segment_antidonor_nknown] = splicesites[j] - segment_left;
segment_antidonor_knowni[segment_antidonor_nknown++] = j;
}
@@ -9154,8 +9215,8 @@ find_spliceends_distant (List_T **distant_donors, List_T **distant_antidonors,
#endif
if (nmismatches <= max_mismatches_allowed) {
if (acceptorj_knowni[i] >= 0) {
- debug4e(printf("Known acceptor for segment at %lu, splice_pos %d (%d mismatches), stopi = %d\n",
- segment_left,splice_pos,nmismatches,splice_pos_start));
+ debug4e(printf("Known acceptor for segment at %llu, splice_pos %d (%d mismatches), stopi = %d\n",
+ (unsigned long long) segment_left,splice_pos,nmismatches,splice_pos_start));
if ((hit = Substring_new_acceptor(/*acceptor_coord*/segment_left + splice_pos,/*acceptor_knowni*/acceptorj_knowni[i],
splice_pos,nmismatches,/*prob*/2.0,/*left*/segment_left,query_compress,
querylength,plusp,genestrand,first_read_p,
@@ -9173,8 +9234,8 @@ find_spliceends_distant (List_T **distant_donors, List_T **distant_antidonors,
debug4e(printf("splice pos %d, nmismatches %d, prob %f, sufficient %d\n",
splice_pos,nmismatches,prob,sufficient_splice_prob_distant(querylength - splice_pos,nmismatches,prob)));
if (sufficient_splice_prob_distant(/*support*/querylength - splice_pos,nmismatches,prob)) {
- debug4e(printf("Novel acceptor for segment at %lu, splice_pos %d (%d mismatches), stopi = %d\n",
- segment_left,splice_pos,nmismatches,splice_pos_start));
+ debug4e(printf("Novel acceptor for segment at %llu, splice_pos %d (%d mismatches), stopi = %d\n",
+ (unsigned long long) segment_left,splice_pos,nmismatches,splice_pos_start));
if ((hit = Substring_new_acceptor(/*acceptor_coord*/segment_left + splice_pos,/*acceptor_knowni*/-1,
splice_pos,nmismatches,prob,/*left*/segment_left,query_compress,
querylength,plusp,genestrand,first_read_p,
@@ -9229,8 +9290,8 @@ find_spliceends_distant (List_T **distant_donors, List_T **distant_antidonors,
#endif
if (nmismatches <= max_mismatches_allowed) {
if (antidonorj_knowni[i] >= 0) {
- debug4e(printf("Known antidonor for segmenti at %lu, splice_pos %d (%d mismatches), stopi = %d\n",
- segment_left,splice_pos,nmismatches,splice_pos_start));
+ debug4e(printf("Known antidonor for segmenti at %llu, splice_pos %d (%d mismatches), stopi = %d\n",
+ (unsigned long long) segment_left,splice_pos,nmismatches,splice_pos_start));
if ((hit = Substring_new_donor(/*donor_coord*/segment_left + splice_pos,/*donor_knowni*/antidonorj_knowni[i],
splice_pos,nmismatches,/*prob*/2.0,/*left*/segment_left,query_compress,
querylength,plusp,genestrand,first_read_p,
@@ -9248,8 +9309,8 @@ find_spliceends_distant (List_T **distant_donors, List_T **distant_antidonors,
debug4e(printf("splice pos %d, nmismatches %d, prob %f, sufficient %d\n",
splice_pos,nmismatches,prob,sufficient_splice_prob_distant(querylength - splice_pos,nmismatches,prob)));
if (sufficient_splice_prob_distant(/*support*/querylength - splice_pos,nmismatches,prob)) {
- debug4e(printf("Novel antidonor for segmenti at %lu, splice_pos %d (%d mismatches), stopi = %d\n",
- segment_left,splice_pos,nmismatches,splice_pos_start));
+ debug4e(printf("Novel antidonor for segmenti at %llu, splice_pos %d (%d mismatches), stopi = %d\n",
+ (unsigned long long) segment_left,splice_pos,nmismatches,splice_pos_start));
if ((hit = Substring_new_donor(/*donor_coord*/segment_left + splice_pos,/*donor_knowni*/-1,
splice_pos,nmismatches,prob,/*left*/segment_left,query_compress,
querylength,plusp,genestrand,first_read_p,
@@ -9337,10 +9398,11 @@ find_terminals (struct Segment_T *plus_segments, int plus_nsegments,
if (0 && segment->usedp == true) {
/* Previously skipped, but looks like a bad idea */
} else if (segment->diagonal < (Univcoord_T) -1) {
- debug4t(printf("plus: %lu, %d..%d\n",segment->diagonal,segment->querypos5,segment->querypos3));
+ debug4t(printf("plus: %llu, %d..%d\n",(unsigned long long) segment->diagonal,segment->querypos5,segment->querypos3));
segment_left = segment->diagonal - querylength; /* FORMULA: Corresponds to querypos 0 */
- debug4t(printf("identify_terminals_plus: Checking up to %d mismatches at diagonal %lu (querypos %d..%d) - querylength %d = %lu\n",
- max_mismatches_allowed,segment->diagonal,segment->querypos5,segment->querypos3,querylength,segment_left));
+ debug4t(printf("identify_terminals_plus: Checking up to %d mismatches at diagonal %llu (querypos %d..%d) - querylength %d = %llu\n",
+ max_mismatches_allowed,(unsigned long long) segment->diagonal,
+ segment->querypos5,segment->querypos3,querylength,(unsigned long long) segment_left));
debug4t(
gbuffer = (char *) CALLOC(querylength+1,sizeof(char));
Genome_fill_buffer_blocks(segment_left,querylength,gbuffer);
@@ -9555,10 +9617,11 @@ find_terminals (struct Segment_T *plus_segments, int plus_nsegments,
/* Previously skipped, but looks like a bad idea */
debug4t(printf("segment used\n"));
} else if (segment->diagonal < (Univcoord_T) -1) {
- debug4t(printf("minus: %lu, %d..%d\n",segment->diagonal,segment->querypos5,segment->querypos3));
+ debug4t(printf("minus: %llu, %d..%d\n",(unsigned long long) segment->diagonal,segment->querypos5,segment->querypos3));
segment_left = segment->diagonal - querylength;
- debug4t(printf("identify_terminals_minus: Getting genome at diagonal %lu (querypos %d..%d) + 12 - querylength %d = %lu\n",
- segment->diagonal,segment->querypos5,segment->querypos3,querylength,segment_left));
+ debug4t(printf("identify_terminals_minus: Getting genome at diagonal %llu (querypos %d..%d) + 12 - querylength %d = %llu\n",
+ (unsigned long long) segment->diagonal,segment->querypos5,segment->querypos3,querylength,
+ (unsigned long long) segment_left));
debug4t(
gbuffer = (char *) CALLOC(querylength+1,sizeof(char));
Genome_fill_buffer_blocks(segment_left,querylength,gbuffer);
@@ -9814,8 +9877,8 @@ find_terminals_by_width_only (struct Segment_T *plus_segments, int plus_nsegment
if (0 && segment->usedp == true) {
/* Previously skipped, but looks like a bad idea */
} else if (segment->diagonal < (Univcoord_T) -1) {
- debug4t(printf("identify_terminals_plus: Checking up to %d mismatches at diagonal %lu (querypos %d..%d)\n",
- max_mismatches_allowed,segment->diagonal,segment->querypos5,segment->querypos3));
+ debug4t(printf("identify_terminals_plus: Checking up to %d mismatches at diagonal %llu (querypos %d..%d)\n",
+ max_mismatches_allowed,(unsigned long long) segment->diagonal,segment->querypos5,segment->querypos3));
if (segment->querypos3 - segment->querypos5 > index1part) {
segment_left = segment->diagonal - querylength; /* FORMULA: Corresponds to querypos 0 */
@@ -9846,8 +9909,8 @@ find_terminals_by_width_only (struct Segment_T *plus_segments, int plus_nsegment
if (0 && segment->usedp == true) {
/* Previously skipped, but looks like a bad idea */
} else if (segment->diagonal < (Univcoord_T) -1) {
- debug4t(printf("identify_terminals_minus: Getting genome at diagonal %lu (querypos %d..%d)\n",
- segment->diagonal,segment->querypos5,segment->querypos3));
+ debug4t(printf("identify_terminals_minus: Getting genome at diagonal %llu (querypos %d..%d)\n",
+ (unsigned long long) segment->diagonal,segment->querypos5,segment->querypos3));
if (segment->querypos3 - segment->querypos5 > index1part) {
segment_left = segment->diagonal - querylength;
@@ -10008,7 +10071,8 @@ find_splicepairs_distant (int *found_score, int *ndistantsplicepairs,
while (p != NULL && q != NULL /* && *nsplicepairs <= MAXCHIMERAPATHS */) {
donor = (Substring_T) p->first;
acceptor = (Substring_T) q->first;
- debug4ld(printf("end1-end2: donor at %lu and acceptor at %lu\n",Substring_genomicstart(donor),Substring_genomicstart(acceptor)));
+ debug4ld(printf("end1-end2: donor at %llu and acceptor at %llu\n",
+ (unsigned long long) Substring_genomicstart(donor),(unsigned long long) Substring_genomicstart(acceptor)));
if ((pos = Substring_chimera_pos(donor)) < min_endlength_1) {
debug4ld(printf("chimera_pos of donor < min_endlength_1\n"));
@@ -10027,11 +10091,11 @@ find_splicepairs_distant (int *found_score, int *ndistantsplicepairs,
qsave = q;
while (p != NULL /* && *nsplicepairs <= MAXCHIMERAPATHS */ && Substring_chimera_pos(((Substring_T) p->first)) == pos) {
donor = (Substring_T) p->first;
- debug4ld(printf("donor at %lu, pos %d\n",Substring_genomicstart(donor),pos));
+ debug4ld(printf("donor at %llu, pos %d\n",(unsigned long long) Substring_genomicstart(donor),pos));
q = qsave;
while (q != NULL /* && *nsplicepairs <= MAXCHIMERAPATHS */ && Substring_chimera_pos(((Substring_T) q->first)) == pos) {
acceptor = (Substring_T) q->first;
- debug4ld(printf("acceptor at %lu, pos %d\n",Substring_genomicstart(acceptor),pos));
+ debug4ld(printf("acceptor at %llu, pos %d\n",(unsigned long long) Substring_genomicstart(acceptor),pos));
if (Substring_genomicstart(acceptor) == Substring_genomicstart(donor)) {
/* Skip. Really a continuous match. */
} else {
@@ -10052,15 +10116,16 @@ find_splicepairs_distant (int *found_score, int *ndistantsplicepairs,
distance = donor_genomicstart - acceptor_genomicstart;
shortdistancep = false; /* scramble */
}
- debug4ld(printf("1-2. Pushing a candidate at splice_pos %d (%d..%d), donor %lu to acceptor %lu. shortdistancep = %d\n",
+ debug4ld(printf("1-2. Pushing a candidate at splice_pos %d (%d..%d), donor %llu to acceptor %llu. shortdistancep = %d\n",
pos,min_endlength_1,querylength-min_endlength_2,
- Substring_genomicstart(donor),Substring_genomicstart(acceptor),shortdistancep));
+ (unsigned long long) Substring_genomicstart(donor),
+ (unsigned long long) Substring_genomicstart(acceptor),shortdistancep));
if (shortdistancep) {
*localsplicing = List_push(*localsplicing,
(void *) Stage3end_new_splice(&(*found_score),nmismatches1,nmismatches2,
donor,acceptor,distance,
- /*shortdistancep*/true,localsplicing_penalty,querylength,/*amb_nmatches*/0,
+ /*shortdistancep*/true,localsplicing_penalty,querylength,/*amb_length*/0,
/*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
/*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
/*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
@@ -10070,7 +10135,7 @@ find_splicepairs_distant (int *found_score, int *ndistantsplicepairs,
distantsplicing = List_push(distantsplicing,
(void *) Stage3end_new_splice(&(*found_score),nmismatches1,nmismatches2,
donor,acceptor,distance,
- /*shortdistancep*/false,distantsplicing_penalty,querylength,/*amb_nmatches*/0,
+ /*shortdistancep*/false,distantsplicing_penalty,querylength,/*amb_length*/0,
/*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
/*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
/*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
@@ -10096,7 +10161,9 @@ find_splicepairs_distant (int *found_score, int *ndistantsplicepairs,
while (p != NULL && q != NULL /* && *nsplicepairs <= MAXCHIMERAPATHS */) {
donor = (Substring_T) p->first;
acceptor = (Substring_T) q->first;
- debug4ld(printf("end3-end4: donor at %lu and acceptor at %lu\n",Substring_genomicstart(donor),Substring_genomicstart(acceptor)));
+ debug4ld(printf("end3-end4: donor at %llu and acceptor at %llu\n",
+ (unsigned long long) Substring_genomicstart(donor),
+ (unsigned long long) Substring_genomicstart(acceptor)));
if ((pos = Substring_chimera_pos(donor)) < min_endlength_1) {
debug4ld(printf("chimera_pos of donor < min_endlength_1\n"));
@@ -10114,11 +10181,11 @@ find_splicepairs_distant (int *found_score, int *ndistantsplicepairs,
qsave = q;
while (p != NULL /* && *nsplicepairs <= MAXCHIMERAPATHS */ && Substring_chimera_pos(((Substring_T) p->first)) == pos) {
donor = (Substring_T) p->first;
- debug4ld(printf("donor at %lu, pos %d\n",Substring_genomicstart(donor),pos));
+ debug4ld(printf("donor at %llu, pos %d\n",(unsigned long long) Substring_genomicstart(donor),pos));
q = qsave;
while (q != NULL /* && *nsplicepairs <= MAXCHIMERAPATHS */ && Substring_chimera_pos(((Substring_T) q->first)) == pos) {
acceptor = (Substring_T) q->first;
- debug4ld(printf("acceptor at %lu, pos %d\n",Substring_genomicstart(acceptor),pos));
+ debug4ld(printf("acceptor at %llu, pos %d\n",(unsigned long long) Substring_genomicstart(acceptor),pos));
if (Substring_genomicstart(acceptor) == Substring_genomicstart(donor)) {
/* Skip. Really a continuous match. */
} else {
@@ -10139,14 +10206,15 @@ find_splicepairs_distant (int *found_score, int *ndistantsplicepairs,
shortdistancep = false;
}
}
- debug4ld(printf("3-4. Pushing a candidate at splice_pos %d (%d..%d), donor %lu to acceptor %lu. shortdistancep = %d.\n",
+ debug4ld(printf("3-4. Pushing a candidate at splice_pos %d (%d..%d), donor %llu to acceptor %llu. shortdistancep = %d.\n",
pos,min_endlength_1,querylength-min_endlength_2,
- Substring_genomicstart(donor),Substring_genomicstart(acceptor),shortdistancep));
+ (unsigned long long) Substring_genomicstart(donor),
+ (unsigned long long) Substring_genomicstart(acceptor),shortdistancep));
if (shortdistancep) {
*localsplicing = List_push(*localsplicing,
(void *) Stage3end_new_splice(&(*found_score),nmismatches1,nmismatches2,
donor,acceptor,distance,
- /*shortdistancep*/true,localsplicing_penalty,querylength,/*amb_nmatches*/0,
+ /*shortdistancep*/true,localsplicing_penalty,querylength,/*amb_length*/0,
/*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
/*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
/*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
@@ -10156,7 +10224,7 @@ find_splicepairs_distant (int *found_score, int *ndistantsplicepairs,
distantsplicing = List_push(distantsplicing,
(void *) Stage3end_new_splice(&(*found_score),nmismatches1,nmismatches2,
donor,acceptor,distance,
- /*shortdistancep*/false,distantsplicing_penalty,querylength,/*amb_nmatches*/0,
+ /*shortdistancep*/false,distantsplicing_penalty,querylength,/*amb_length*/0,
/*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
/*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
/*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
@@ -10181,7 +10249,9 @@ find_splicepairs_distant (int *found_score, int *ndistantsplicepairs,
while (p != NULL && q != NULL /* && *nsplicepairs <= MAXCHIMERAPATHS */) {
donor = (Substring_T) p->first;
acceptor = (Substring_T) q->first;
- debug4ld(printf("end5-end6: donor at %lu and acceptor at %lu\n",Substring_genomicstart(donor),Substring_genomicstart(acceptor)));
+ debug4ld(printf("end5-end6: donor at %llu and acceptor at %llu\n",
+ (unsigned long long) Substring_genomicstart(donor),
+ (unsigned long long) Substring_genomicstart(acceptor)));
if ((pos = Substring_chimera_pos(donor)) < min_endlength_2) {
debug4ld(printf("chimera_pos of donor < min_endlength_2\n"));
@@ -10199,11 +10269,11 @@ find_splicepairs_distant (int *found_score, int *ndistantsplicepairs,
qsave = q;
while (p != NULL /* && *nsplicepairs <= MAXCHIMERAPATHS */ && Substring_chimera_pos(((Substring_T) p->first)) == pos) {
donor = (Substring_T) p->first;
- debug4ld(printf("donor at %lu, pos %d\n",Substring_genomicstart(donor),pos));
+ debug4ld(printf("donor at %llu, pos %d\n",(unsigned long long) Substring_genomicstart(donor),pos));
q = qsave;
while (q != NULL /* && *nsplicepairs <= MAXCHIMERAPATHS */ && Substring_chimera_pos(((Substring_T) q->first)) == pos) {
acceptor = (Substring_T) q->first;
- debug4ld(printf("acceptor at %lu, pos %d\n",Substring_genomicstart(acceptor),pos));
+ debug4ld(printf("acceptor at %llu, pos %d\n",(unsigned long long) Substring_genomicstart(acceptor),pos));
if (Substring_genomicstart(acceptor) == Substring_genomicstart(donor)) {
/* Skip. Really an continuous match. */
} else {
@@ -10225,14 +10295,15 @@ find_splicepairs_distant (int *found_score, int *ndistantsplicepairs,
}
}
- debug4ld(printf("5-6. Pushing a candidate at splice_pos %d (%d..%d), donor %lu to acceptor %lu. shortdistancep = %d\n",
+ debug4ld(printf("5-6. Pushing a candidate at splice_pos %d (%d..%d), donor %llu to acceptor %llu. shortdistancep = %d\n",
pos,min_endlength_2,querylength-min_endlength_1,
- Substring_genomicstart(donor),Substring_genomicstart(acceptor),shortdistancep));
+ (unsigned long long) Substring_genomicstart(donor),
+ (unsigned long long) Substring_genomicstart(acceptor),shortdistancep));
if (shortdistancep) {
*localsplicing = List_push(*localsplicing,
(void *) Stage3end_new_splice(&(*found_score),nmismatches1,nmismatches2,
donor,acceptor,distance,
- /*shortdistancep*/true,localsplicing_penalty,querylength,/*amb_nmatches*/0,
+ /*shortdistancep*/true,localsplicing_penalty,querylength,/*amb_length*/0,
/*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
/*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
/*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
@@ -10242,7 +10313,7 @@ find_splicepairs_distant (int *found_score, int *ndistantsplicepairs,
distantsplicing = List_push(distantsplicing,
(void *) Stage3end_new_splice(&(*found_score),nmismatches1,nmismatches2,
donor,acceptor,distance,
- /*shortdistancep*/false,distantsplicing_penalty,querylength,/*amb_nmatches*/0,
+ /*shortdistancep*/false,distantsplicing_penalty,querylength,/*amb_length*/0,
/*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
/*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
/*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
@@ -10267,7 +10338,9 @@ find_splicepairs_distant (int *found_score, int *ndistantsplicepairs,
while (p != NULL && q != NULL /* && *nsplicepairs <= MAXCHIMERAPATHS */) {
donor = (Substring_T) p->first;
acceptor = (Substring_T) q->first;
- debug4ld(printf("end7-end8: donor at %lu and acceptor at %lu\n",Substring_genomicstart(donor),Substring_genomicstart(acceptor)));
+ debug4ld(printf("end7-end8: donor at %llu and acceptor at %llu\n",
+ (unsigned long long) Substring_genomicstart(donor),
+ (unsigned long long) Substring_genomicstart(acceptor)));
if ((pos = Substring_chimera_pos(donor)) < min_endlength_2) {
debug4ld(printf("chimera_pos of donor < min_endlength_2\n"));
@@ -10286,11 +10359,11 @@ find_splicepairs_distant (int *found_score, int *ndistantsplicepairs,
while (p != NULL /* && *nsplicepairs <= MAXCHIMERAPATHS */ && Substring_chimera_pos(((Substring_T) p->first)) == pos) {
donor = (Substring_T) p->first;
- debug4ld(printf("donor at %lu, pos %d\n",Substring_genomicstart(donor),pos));
+ debug4ld(printf("donor at %llu, pos %d\n",(unsigned long long) Substring_genomicstart(donor),pos));
q = qsave;
while (q != NULL /* && *nsplicepairs <= MAXCHIMERAPATHS */ && Substring_chimera_pos(((Substring_T) q->first)) == pos) {
acceptor = (Substring_T) q->first;
- debug4ld(printf("acceptor at %lu, pos %d\n",Substring_genomicstart(acceptor),pos));
+ debug4ld(printf("acceptor at %llu, pos %d\n",(unsigned long long) Substring_genomicstart(acceptor),pos));
if (Substring_genomicstart(acceptor) == Substring_genomicstart(donor)) {
/* Skip. Really a continuous match. */
} else {
@@ -10311,14 +10384,15 @@ find_splicepairs_distant (int *found_score, int *ndistantsplicepairs,
distance = donor_genomicstart - acceptor_genomicstart;
shortdistancep = false; /* scramble */
}
- debug4ld(printf("7-8. Pushing a candidate at splice_pos %d (%d..%d), donor %lu to acceptor %lu. shortdistancep = %d.\n",
+ debug4ld(printf("7-8. Pushing a candidate at splice_pos %d (%d..%d), donor %llu to acceptor %llu. shortdistancep = %d.\n",
pos,min_endlength_2,querylength-min_endlength_1,
- Substring_genomicstart(donor),Substring_genomicstart(acceptor),shortdistancep));
+ (unsigned long long) Substring_genomicstart(donor),
+ (unsigned long long) Substring_genomicstart(acceptor),shortdistancep));
if (shortdistancep) {
*localsplicing = List_push(*localsplicing,
(void *) Stage3end_new_splice(&(*found_score),nmismatches1,nmismatches2,
donor,acceptor,distance,
- /*shortdistancep*/true,localsplicing_penalty,querylength,/*amb_nmatches*/0,
+ /*shortdistancep*/true,localsplicing_penalty,querylength,/*amb_length*/0,
/*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
/*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
/*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
@@ -10328,7 +10402,7 @@ find_splicepairs_distant (int *found_score, int *ndistantsplicepairs,
distantsplicing = List_push(distantsplicing,
(void *) Stage3end_new_splice(&(*found_score),nmismatches1,nmismatches2,
donor,acceptor,distance,
- /*shortdistancep*/false,distantsplicing_penalty,querylength,/*amb_nmatches*/0,
+ /*shortdistancep*/false,distantsplicing_penalty,querylength,/*amb_length*/0,
/*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
/*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
/*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
@@ -10358,7 +10432,9 @@ find_splicepairs_distant (int *found_score, int *ndistantsplicepairs,
while (p != NULL && q != NULL && *ndistantsplicepairs <= MAXCHIMERAPATHS) {
donor = (Substring_T) p->first;
acceptor = (Substring_T) q->first;
- debug4ld(printf("end1-end4: donor at %lu and acceptor at %lu\n",Substring_genomicstart(donor),Substring_genomicstart(acceptor)));
+ debug4ld(printf("end1-end4: donor at %llu and acceptor at %llu\n",
+ (unsigned long long) Substring_genomicstart(donor),
+ (unsigned long long) Substring_genomicstart(acceptor)));
if ((pos = Substring_chimera_pos(donor)) < min_endlength_1) {
debug4ld(printf("chimera_pos of donor < min_endlength_1\n"));
@@ -10376,11 +10452,11 @@ find_splicepairs_distant (int *found_score, int *ndistantsplicepairs,
qsave = q;
while (p != NULL && *ndistantsplicepairs <= MAXCHIMERAPATHS && Substring_chimera_pos(((Substring_T) p->first)) == pos) {
donor = (Substring_T) p->first;
- debug4ld(printf("donor at %lu, pos %d\n",Substring_genomicstart(donor),pos));
+ debug4ld(printf("donor at %llu, pos %d\n",(unsigned long long) Substring_genomicstart(donor),pos));
q = qsave;
while (q != NULL && *ndistantsplicepairs <= MAXCHIMERAPATHS && Substring_chimera_pos(((Substring_T) q->first)) == pos) {
acceptor = (Substring_T) q->first;
- debug4ld(printf("acceptor at %lu, pos %d\n",Substring_genomicstart(acceptor),pos));
+ debug4ld(printf("acceptor at %llu, pos %d\n",(unsigned long long) Substring_genomicstart(acceptor),pos));
if (Substring_chrnum(donor) != Substring_chrnum(acceptor)) {
distance = 0U;
} else if ((Substring_genomicstart(acceptor) - pos) > (Substring_genomicstart(donor) + pos)) {
@@ -10388,13 +10464,14 @@ find_splicepairs_distant (int *found_score, int *ndistantsplicepairs,
} else {
distance = (Substring_genomicstart(donor) + pos) - (Substring_genomicstart(acceptor) - pos);
}
- debug4ld(printf("1-4. Pushing a candidate at splice_pos %d (%d..%d), donor %lu to acceptor %lu. Different strands, so not shortdistance.\n",
+ debug4ld(printf("1-4. Pushing a candidate at splice_pos %d (%d..%d), donor %llu to acceptor %llu. Different strands, so not shortdistance.\n",
pos,min_endlength_1,querylength-min_endlength_2,
- Substring_genomicstart(donor),Substring_genomicstart(acceptor)));
+ (unsigned long long) Substring_genomicstart(donor),
+ (unsigned long long) Substring_genomicstart(acceptor)));
distantsplicing = List_push(distantsplicing,
(void *) Stage3end_new_splice(&(*found_score),nmismatches1,nmismatches2,
donor,acceptor,distance,
- /*shortdistancep*/false,distantsplicing_penalty,querylength,/*amb_nmatches*/0,
+ /*shortdistancep*/false,distantsplicing_penalty,querylength,/*amb_length*/0,
/*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
/*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
/*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
@@ -10416,7 +10493,9 @@ find_splicepairs_distant (int *found_score, int *ndistantsplicepairs,
while (p != NULL && q != NULL && *ndistantsplicepairs <= MAXCHIMERAPATHS) {
donor = (Substring_T) p->first;
acceptor = (Substring_T) q->first;
- debug4ld(printf("end3-end2: donor at %lu and acceptor at %lu\n",Substring_genomicstart(donor),Substring_genomicstart(acceptor)));
+ debug4ld(printf("end3-end2: donor at %llu and acceptor at %llu\n",
+ (unsigned long long) Substring_genomicstart(donor),
+ (unsigned long long) Substring_genomicstart(acceptor)));
if ((pos = Substring_chimera_pos(donor)) < min_endlength_1) {
debug4ld(printf("chimera_pos of donor < min_endlength_1\n"));
@@ -10434,11 +10513,11 @@ find_splicepairs_distant (int *found_score, int *ndistantsplicepairs,
qsave = q;
while (p != NULL && *ndistantsplicepairs <= MAXCHIMERAPATHS && Substring_chimera_pos(((Substring_T) p->first)) == pos) {
donor = (Substring_T) p->first;
- debug4ld(printf("donor at %lu, pos %d\n",Substring_genomicstart(donor),pos));
+ debug4ld(printf("donor at %llu, pos %d\n",(unsigned long long) Substring_genomicstart(donor),pos));
q = qsave;
while (q != NULL && *ndistantsplicepairs <= MAXCHIMERAPATHS && Substring_chimera_pos(((Substring_T) q->first)) == pos) {
acceptor = (Substring_T) q->first;
- debug4ld(printf("acceptor at %lu, pos %d\n",Substring_genomicstart(acceptor),pos));
+ debug4ld(printf("acceptor at %llu, pos %d\n",(unsigned long long) Substring_genomicstart(acceptor),pos));
if (Substring_chrnum(donor) != Substring_chrnum(acceptor)) {
distance = 0U;
} else if (Substring_genomicstart(acceptor) > Substring_genomicstart(donor)) {
@@ -10446,13 +10525,14 @@ find_splicepairs_distant (int *found_score, int *ndistantsplicepairs,
} else {
distance = (Substring_genomicstart(donor) - pos) - (Substring_genomicstart(acceptor) + pos);
}
- debug4ld(printf("3-2. Pushing a candidate at splice_pos %d (%d..%d), donor %lu to acceptor %lu. Different strands so not shortdistance.\n",
+ debug4ld(printf("3-2. Pushing a candidate at splice_pos %d (%d..%d), donor %llu to acceptor %llu. Different strands so not shortdistance.\n",
pos,min_endlength_1,querylength-min_endlength_2,
- Substring_genomicstart(donor),Substring_genomicstart(acceptor)));
+ (unsigned long long) Substring_genomicstart(donor),
+ (unsigned long long) Substring_genomicstart(acceptor)));
distantsplicing = List_push(distantsplicing,
(void *) Stage3end_new_splice(&(*found_score),nmismatches1,nmismatches2,
donor,acceptor,distance,
- /*shortdistancep*/false,distantsplicing_penalty,querylength,/*amb_nmatches*/0,
+ /*shortdistancep*/false,distantsplicing_penalty,querylength,/*amb_length*/0,
/*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
/*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
/*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
@@ -10475,7 +10555,9 @@ find_splicepairs_distant (int *found_score, int *ndistantsplicepairs,
while (p != NULL && q != NULL && *ndistantsplicepairs <= MAXCHIMERAPATHS) {
donor = (Substring_T) p->first;
acceptor = (Substring_T) q->first;
- debug4ld(printf("end5-end8: donor at %lu and acceptor at %lu\n",Substring_genomicstart(donor),Substring_genomicstart(acceptor)));
+ debug4ld(printf("end5-end8: donor at %llu and acceptor at %llu\n",
+ (unsigned long long) Substring_genomicstart(donor),
+ (unsigned long long) Substring_genomicstart(acceptor)));
if ((pos = Substring_chimera_pos(donor)) < min_endlength_2) {
debug4ld(printf("chimera_pos of donor < min_endlength_2\n"));
@@ -10493,11 +10575,11 @@ find_splicepairs_distant (int *found_score, int *ndistantsplicepairs,
qsave = q;
while (p != NULL && *ndistantsplicepairs <= MAXCHIMERAPATHS && Substring_chimera_pos(((Substring_T) p->first)) == pos) {
donor = (Substring_T) p->first;
- debug4ld(printf("donor at %lu, pos %d\n",Substring_genomicstart(donor),pos));
+ debug4ld(printf("donor at %llu, pos %d\n",(unsigned long long) Substring_genomicstart(donor),pos));
q = qsave;
while (q != NULL && *ndistantsplicepairs <= MAXCHIMERAPATHS && Substring_chimera_pos(((Substring_T) q->first)) == pos) {
acceptor = (Substring_T) q->first;
- debug4ld(printf("acceptor at %lu, pos %d\n",Substring_genomicstart(acceptor),pos));
+ debug4ld(printf("acceptor at %llu, pos %d\n",(unsigned long long) Substring_genomicstart(acceptor),pos));
if (Substring_chrnum(donor) != Substring_chrnum(acceptor)) {
distance = 0U;
} else if ((Substring_genomicstart(acceptor) - pos) > (Substring_genomicstart(donor) + pos)) {
@@ -10505,13 +10587,14 @@ find_splicepairs_distant (int *found_score, int *ndistantsplicepairs,
} else {
distance = (Substring_genomicstart(donor) + pos) - (Substring_genomicstart(acceptor) - pos);
}
- debug4ld(printf("5-8. Pushing a candidate at splice_pos %d (%d..%d), donor %lu to acceptor %lu. Different strands so not shortdistance.\n",
+ debug4ld(printf("5-8. Pushing a candidate at splice_pos %d (%d..%d), donor %llu to acceptor %llu. Different strands so not shortdistance.\n",
pos,min_endlength_2,querylength-min_endlength_1,
- Substring_genomicstart(donor),Substring_genomicstart(acceptor)));
+ (unsigned long long) Substring_genomicstart(donor),
+ (unsigned long long) Substring_genomicstart(acceptor)));
distantsplicing = List_push(distantsplicing,
(void *) Stage3end_new_splice(&(*found_score),nmismatches1,nmismatches2,
donor,acceptor,distance,
- /*shortdistancep*/false,distantsplicing_penalty,querylength,/*amb_nmatches*/0,
+ /*shortdistancep*/false,distantsplicing_penalty,querylength,/*amb_length*/0,
/*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
/*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
/*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
@@ -10533,7 +10616,9 @@ find_splicepairs_distant (int *found_score, int *ndistantsplicepairs,
while (p != NULL && q != NULL && *ndistantsplicepairs <= MAXCHIMERAPATHS) {
donor = (Substring_T) p->first;
acceptor = (Substring_T) q->first;
- debug4ld(printf("end7-end6: donor at %lu and acceptor at %lu\n",Substring_genomicstart(donor),Substring_genomicstart(acceptor)));
+ debug4ld(printf("end7-end6: donor at %llu and acceptor at %llu\n",
+ (unsigned long long) Substring_genomicstart(donor),
+ (unsigned long long) Substring_genomicstart(acceptor)));
if ((pos = Substring_chimera_pos(donor)) < min_endlength_2) {
debug4ld(printf("chimera_pos of donor < min_endlength_2\n"));
@@ -10551,11 +10636,11 @@ find_splicepairs_distant (int *found_score, int *ndistantsplicepairs,
qsave = q;
while (p != NULL && *ndistantsplicepairs <= MAXCHIMERAPATHS && Substring_chimera_pos(((Substring_T) p->first)) == pos) {
donor = (Substring_T) p->first;
- debug4ld(printf("donor at %lu, pos %d\n",Substring_genomicstart(donor),pos));
+ debug4ld(printf("donor at %llu, pos %d\n",(unsigned long long) Substring_genomicstart(donor),pos));
q = qsave;
while (q != NULL && *ndistantsplicepairs <= MAXCHIMERAPATHS && Substring_chimera_pos(((Substring_T) q->first)) == pos) {
acceptor = (Substring_T) q->first;
- debug4ld(printf("acceptor at %lu, pos %d\n",Substring_genomicstart(acceptor),pos));
+ debug4ld(printf("acceptor at %llu, pos %d\n",(unsigned long long) Substring_genomicstart(acceptor),pos));
if (Substring_chrnum(donor) != Substring_chrnum(acceptor)) {
distance = 0U;
} else if ((Substring_genomicstart(acceptor) + pos) > (Substring_genomicstart(donor) - pos)) {
@@ -10563,13 +10648,14 @@ find_splicepairs_distant (int *found_score, int *ndistantsplicepairs,
} else {
distance = (Substring_genomicstart(donor) - pos) - (Substring_genomicstart(acceptor) + pos);
}
- debug4ld(printf("7-6. Pushing a candidate at splice_pos %d (%d..%d), donor %lu to acceptor %lu. Different strands so not shortdistance.\n",
+ debug4ld(printf("7-6. Pushing a candidate at splice_pos %d (%d..%d), donor %llu to acceptor %llu. Different strands so not shortdistance.\n",
pos,min_endlength_2,querylength-min_endlength_1,
- Substring_genomicstart(donor),Substring_genomicstart(acceptor)));
+ (unsigned long long) Substring_genomicstart(donor),
+ (unsigned long long) Substring_genomicstart(acceptor)));
distantsplicing = List_push(distantsplicing,
(void *) Stage3end_new_splice(&(*found_score),nmismatches1,nmismatches2,
donor,acceptor,distance,
- /*shortdistancep*/false,distantsplicing_penalty,querylength,/*amb_nmatches*/0,
+ /*shortdistancep*/false,distantsplicing_penalty,querylength,/*amb_length*/0,
/*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
/*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
/*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
@@ -10615,7 +10701,7 @@ find_splicepairs_shortend (int *found_score, List_T hits,
Intlist_T splicesites_i;
Intlist_T nmismatches_list;
int nmismatches, nmismatches_shortend, nmisses_allowed, support, endlength;
- int amb_nmatches;
+ int amb_length;
#ifdef DEBUG4H
Univcoord_T leftbound, rightbound;
#endif
@@ -10681,12 +10767,12 @@ find_splicepairs_shortend (int *found_score, List_T hits,
if (endlength < min_shortend || Intlist_length(splicesites_i) > 1) {
ambcoords = lookup_splicesites(splicesites_i,splicesites);
- amb_nmatches = endlength - nmismatches_shortend;
- debug4h(printf("End 1: short-overlap donor_plus: Successful ambiguous from donor #%d with amb_nmatches %d\n",
- Substring_splicesites_knowni(donor),amb_nmatches));
+ amb_length = endlength /*- nmismatches_shortend*/;
+ debug4h(printf("End 1: short-overlap donor_plus: Successful ambiguous from donor #%d with amb_length %d\n",
+ Substring_splicesites_knowni(donor),amb_length));
hits = List_push(hits,(void *) Stage3end_new_splice(&(*found_score),nmismatches,nmismatches_shortend,
donor,/*acceptor*/NULL,/*distance*/0U,
- /*shortdistancep*/false,/*penalty*/0,querylength,amb_nmatches,
+ /*shortdistancep*/false,/*penalty*/0,querylength,amb_length,
/*ambcoords_donor*/NULL,ambcoords,
/*ambi_donor*/NULL,/*ambi_acceptor*/splicesites_i,
/*amb_nmismatches_donor*/NULL,/*nmismatches_acceptor*/nmismatches_list,
@@ -10710,7 +10796,7 @@ find_splicepairs_shortend (int *found_score, List_T hits,
Substring_splicesites_knowni(donor),Substring_splicesites_knowni(acceptor)));
hits = List_push(hits,(void *) Stage3end_new_splice(&(*found_score),nmismatches,nmismatches_shortend,
donor,acceptor,/*distance*/bestleft-origleft,
- /*shortdistancep*/true,localsplicing_penalty,querylength,/*amb_nmatches*/0,
+ /*shortdistancep*/true,localsplicing_penalty,querylength,/*amb_length*/0,
/*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
/*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
/*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
@@ -10754,12 +10840,12 @@ find_splicepairs_shortend (int *found_score, List_T hits,
if (endlength < min_shortend || Intlist_length(splicesites_i) > 1) {
ambcoords = lookup_splicesites(splicesites_i,splicesites);
- amb_nmatches = endlength - nmismatches_shortend;
- debug4h(printf("End 2: short-overlap acceptor_plus: Successful ambiguous from acceptor #%d with amb_nmatches %d\n",
- Substring_splicesites_knowni(acceptor),amb_nmatches));
+ amb_length = endlength /*- nmismatches_shortend*/;
+ debug4h(printf("End 2: short-overlap acceptor_plus: Successful ambiguous from acceptor #%d with amb_length %d\n",
+ Substring_splicesites_knowni(acceptor),amb_length));
hits = List_push(hits,(void *) Stage3end_new_splice(&(*found_score),nmismatches_shortend,nmismatches,
/*donor*/NULL,acceptor,/*distance*/0U,
- /*shortdistancep*/false,/*penalty*/0,querylength,amb_nmatches,
+ /*shortdistancep*/false,/*penalty*/0,querylength,amb_length,
ambcoords,/*ambcoords_acceptor*/NULL,
/*amb_knowni_donor*/splicesites_i,/*amb_knowni_acceptor*/NULL,
/*amb_nmismatches_donor*/nmismatches_list,/*amb_nmismatches_acceptor*/NULL,
@@ -10783,7 +10869,7 @@ find_splicepairs_shortend (int *found_score, List_T hits,
Substring_splicesites_knowni(acceptor),Substring_splicesites_knowni(donor)));
hits = List_push(hits,(void *) Stage3end_new_splice(&(*found_score),nmismatches_shortend,nmismatches,
donor,acceptor,/*distance*/origleft-bestleft,
- /*shortdistancep*/true,localsplicing_penalty,querylength,/*amb_nmatches*/0,
+ /*shortdistancep*/true,localsplicing_penalty,querylength,/*amb_length*/0,
/*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
/*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
/*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
@@ -10827,12 +10913,12 @@ find_splicepairs_shortend (int *found_score, List_T hits,
if (endlength < min_shortend || Intlist_length(splicesites_i) > 1) {
ambcoords = lookup_splicesites(splicesites_i,splicesites);
- amb_nmatches = endlength - nmismatches_shortend;
- debug4h(printf("End 3: short-overlap donor_minus: Successful ambiguous from donor #%d with amb_nmatches %d\n",
- Substring_splicesites_knowni(donor),amb_nmatches));
+ amb_length = endlength /*- nmismatches_shortend*/;
+ debug4h(printf("End 3: short-overlap donor_minus: Successful ambiguous from donor #%d with amb_length %d\n",
+ Substring_splicesites_knowni(donor),amb_length));
hits = List_push(hits,(void *) Stage3end_new_splice(&(*found_score),nmismatches,nmismatches_shortend,
donor,/*acceptor*/NULL,/*distance*/0U,
- /*shortdistancep*/false,/*penalty*/0,querylength,amb_nmatches,
+ /*shortdistancep*/false,/*penalty*/0,querylength,amb_length,
/*ambcoords_donor*/NULL,ambcoords,
/*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/splicesites_i,
/*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/nmismatches_list,
@@ -10856,7 +10942,7 @@ find_splicepairs_shortend (int *found_score, List_T hits,
Substring_splicesites_knowni(donor),Substring_splicesites_knowni(acceptor)));
hits = List_push(hits,(void *) Stage3end_new_splice(&(*found_score),nmismatches,nmismatches_shortend,
donor,acceptor,/*distance*/origleft-bestleft,
- /*shortdistancep*/true,localsplicing_penalty,querylength,/*amb_nmatches*/0,
+ /*shortdistancep*/true,localsplicing_penalty,querylength,/*amb_length*/0,
/*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
/*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
/*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
@@ -10901,12 +10987,12 @@ find_splicepairs_shortend (int *found_score, List_T hits,
if (endlength < min_shortend || Intlist_length(splicesites_i) > 1) {
ambcoords = lookup_splicesites(splicesites_i,splicesites);
- amb_nmatches = endlength - nmismatches_shortend;
- debug4h(printf("End 4: short-overlap acceptor_minus: Successful ambiguous from acceptor #%d with amb_nmatches %d\n",
- Substring_splicesites_knowni(acceptor),amb_nmatches));
+ amb_length = endlength /*- nmismatches_shortend*/;
+ debug4h(printf("End 4: short-overlap acceptor_minus: Successful ambiguous from acceptor #%d with amb_length %d\n",
+ Substring_splicesites_knowni(acceptor),amb_length));
hits = List_push(hits,(void *) Stage3end_new_splice(&(*found_score),nmismatches_shortend,nmismatches,
/*donor*/NULL,acceptor,/*distance*/0U,
- /*shortdistancep*/false,/*penalty*/0,querylength,amb_nmatches,
+ /*shortdistancep*/false,/*penalty*/0,querylength,amb_length,
ambcoords,/*ambcoords_acceptor*/NULL,
/*amb_knowni_donor*/splicesites_i,/*amb_knowni_acceptor*/NULL,
/*amb_nmismatches_donor*/nmismatches_list,/*amb_nmismatches_acceptor*/NULL,
@@ -10930,7 +11016,7 @@ find_splicepairs_shortend (int *found_score, List_T hits,
Substring_splicesites_knowni(acceptor),Substring_splicesites_knowni(donor)));
hits = List_push(hits,(void *) Stage3end_new_splice(&(*found_score),nmismatches_shortend,nmismatches,
donor,acceptor,/*distance*/bestleft-origleft,
- /*shortdistancep*/true,localsplicing_penalty,querylength,/*amb_nmatches*/0,
+ /*shortdistancep*/true,localsplicing_penalty,querylength,/*amb_length*/0,
/*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
/*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
/*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
@@ -10974,12 +11060,12 @@ find_splicepairs_shortend (int *found_score, List_T hits,
if (endlength < min_shortend || Intlist_length(splicesites_i) > 1) {
ambcoords = lookup_splicesites(splicesites_i,splicesites);
- amb_nmatches = endlength - nmismatches_shortend;
- debug4h(printf("End 5: short-overlap antidonor_plus: Successful ambiguous from antidonor #%d with amb_nmatches %d\n",
- Substring_splicesites_knowni(donor),amb_nmatches));
+ amb_length = endlength /*- nmismatches_shortend*/;
+ debug4h(printf("End 5: short-overlap antidonor_plus: Successful ambiguous from antidonor #%d with amb_length %d\n",
+ Substring_splicesites_knowni(donor),amb_length));
hits = List_push(hits,(void *) Stage3end_new_splice(&(*found_score),nmismatches,nmismatches_shortend,
donor,/*acceptor*/NULL,/*distance*/0U,
- /*shortdistancep*/false,/*penalty*/0,querylength,amb_nmatches,
+ /*shortdistancep*/false,/*penalty*/0,querylength,amb_length,
/*ambcoords_donor*/NULL,ambcoords,
/*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/splicesites_i,
/*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/nmismatches_list,
@@ -11003,7 +11089,7 @@ find_splicepairs_shortend (int *found_score, List_T hits,
Substring_splicesites_knowni(donor),Substring_splicesites_knowni(acceptor)));
hits = List_push(hits,(void *) Stage3end_new_splice(&(*found_score),nmismatches,nmismatches_shortend,
donor,acceptor,/*distance*/origleft-bestleft,
- /*shortdistancep*/true,localsplicing_penalty,querylength,/*amb_nmatches*/0,
+ /*shortdistancep*/true,localsplicing_penalty,querylength,/*amb_length*/0,
/*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
/*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
/*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
@@ -11048,12 +11134,12 @@ find_splicepairs_shortend (int *found_score, List_T hits,
if (endlength < min_shortend || Intlist_length(splicesites_i) > 1) {
ambcoords = lookup_splicesites(splicesites_i,splicesites);
- amb_nmatches = endlength - nmismatches_shortend;
- debug4h(printf("End 6: short-overlap antiacceptor_plus: Successful ambiguous from antiacceptor #%d with amb_nmatches %d\n",
- Substring_splicesites_knowni(acceptor),amb_nmatches));
+ amb_length = endlength /*- nmismatches_shortend*/;
+ debug4h(printf("End 6: short-overlap antiacceptor_plus: Successful ambiguous from antiacceptor #%d with amb_length %d\n",
+ Substring_splicesites_knowni(acceptor),amb_length));
hits = List_push(hits,(void *) Stage3end_new_splice(&(*found_score),nmismatches_shortend,nmismatches,
/*donor*/NULL,acceptor,/*distance*/0U,
- /*shortdistancep*/false,/*penalty*/0,querylength,amb_nmatches,
+ /*shortdistancep*/false,/*penalty*/0,querylength,amb_length,
ambcoords,/*ambcoords_acceptor*/NULL,
/*amb_knowni_donor*/splicesites_i,/*amb_knowni_acceptor*/NULL,
/*amb_nmismatches_donor*/nmismatches_list,/*amb_nmismatches_acceptor*/NULL,
@@ -11077,7 +11163,7 @@ find_splicepairs_shortend (int *found_score, List_T hits,
Substring_splicesites_knowni(acceptor),Substring_splicesites_knowni(donor)));
hits = List_push(hits,(void *) Stage3end_new_splice(&(*found_score),nmismatches_shortend,nmismatches,
donor,acceptor,/*distance*/bestleft-origleft,
- /*shortdistancep*/true,localsplicing_penalty,querylength,/*amb_nmatches*/0,
+ /*shortdistancep*/true,localsplicing_penalty,querylength,/*amb_length*/0,
/*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
/*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
/*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
@@ -11122,12 +11208,12 @@ find_splicepairs_shortend (int *found_score, List_T hits,
if (endlength < min_shortend || Intlist_length(splicesites_i) > 1) {
ambcoords = lookup_splicesites(splicesites_i,splicesites);
- amb_nmatches = endlength - nmismatches_shortend;
- debug4h(printf("End 7: short-overlap antidonor_minus: Successful ambiguous from antidonor #%d with amb_nmatches %d\n",
- Substring_splicesites_knowni(donor),amb_nmatches));
+ amb_length = endlength /*- nmismatches_shortend*/;
+ debug4h(printf("End 7: short-overlap antidonor_minus: Successful ambiguous from antidonor #%d with amb_length %d\n",
+ Substring_splicesites_knowni(donor),amb_length));
hits = List_push(hits,(void *) Stage3end_new_splice(&(*found_score),nmismatches,nmismatches_shortend,
donor,/*acceptor*/NULL,/*distance*/0U,
- /*shortdistancep*/false,/*penalty*/0,querylength,amb_nmatches,
+ /*shortdistancep*/false,/*penalty*/0,querylength,amb_length,
/*ambcoords_donor*/NULL,ambcoords,
/*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/splicesites_i,
/*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/nmismatches_list,
@@ -11151,7 +11237,7 @@ find_splicepairs_shortend (int *found_score, List_T hits,
Substring_splicesites_knowni(donor),Substring_splicesites_knowni(acceptor)));
hits = List_push(hits,(void *) Stage3end_new_splice(&(*found_score),nmismatches,nmismatches_shortend,
donor,acceptor,/*distance*/bestleft-origleft,
- /*shortdistancep*/true,localsplicing_penalty,querylength,/*amb_nmatches*/0,
+ /*shortdistancep*/true,localsplicing_penalty,querylength,/*amb_length*/0,
/*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
/*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
/*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
@@ -11195,12 +11281,12 @@ find_splicepairs_shortend (int *found_score, List_T hits,
if (endlength < min_shortend || Intlist_length(splicesites_i) > 1) {
ambcoords = lookup_splicesites(splicesites_i,splicesites);
- amb_nmatches = endlength - nmismatches_shortend;
- debug4h(printf("End 8: short-overlap antiacceptor_minus: Successful ambiguous from antiacceptor #%d with amb_nmatches %d\n",
- Substring_splicesites_knowni(acceptor),amb_nmatches));
+ amb_length = endlength /*- nmismatches_shortend*/;
+ debug4h(printf("End 8: short-overlap antiacceptor_minus: Successful ambiguous from antiacceptor #%d with amb_length %d\n",
+ Substring_splicesites_knowni(acceptor),amb_length));
hits = List_push(hits,(void *) Stage3end_new_splice(&(*found_score),nmismatches_shortend,nmismatches,
/*donor*/NULL,acceptor,/*distance*/0U,
- /*shortdistancep*/false,/*penalty*/0,querylength,amb_nmatches,
+ /*shortdistancep*/false,/*penalty*/0,querylength,amb_length,
ambcoords,/*ambcoords_acceptor*/NULL,
/*amb_knowni_donor*/splicesites_i,/*amb_knowni_acceptor*/NULL,
/*amb_nmismatches_donor*/nmismatches_list,/*amb_nmismatches_acceptor*/NULL,
@@ -11224,7 +11310,7 @@ find_splicepairs_shortend (int *found_score, List_T hits,
Substring_splicesites_knowni(acceptor),Substring_splicesites_knowni(donor)));
hits = List_push(hits,(void *) Stage3end_new_splice(&(*found_score),nmismatches_shortend,nmismatches,
donor,acceptor,/*distance*/origleft-bestleft,
- /*shortdistancep*/true,localsplicing_penalty,querylength,/*amb_nmatches*/0,
+ /*shortdistancep*/true,localsplicing_penalty,querylength,/*amb_length*/0,
/*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
/*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
/*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
@@ -11804,8 +11890,9 @@ run_gmap (bool *good_start_p, bool *good_end_p, History_T gmap_history,
int ncanonical, nsemicanonical, nnoncanonical;
int maxintronlen_bound;
- debug13(printf("Running GMAP at mappingstart %lu + %d = mappingend %lu, watsonp %d, sense_try %d, querylength %d, limits %u..%u\n",
- mappingstart-chroffset,mappingend-mappingstart,mappingend-chroffset,watsonp,sense_try,querylength,
+ debug13(printf("Running GMAP at mappingstart %u + %d = mappingend %u, watsonp %d, sense_try %d, querylength %d, limits %u..%u\n",
+ (Chrpos_T) (mappingstart-chroffset),mappingend-mappingstart,
+ (Chrpos_T) (mappingend-chroffset),watsonp,sense_try,querylength,
(Chrpos_T) (knownsplice_limit_low-chroffset),(Chrpos_T) (knownsplice_limit_high-chroffset)));
assert(mappingend > mappingstart);
@@ -12153,8 +12240,8 @@ align_single_hit_with_gmap (History_T gmap_history, Stage3end_T hit,
debug13(printf("starti = %d, endi = %d\n",starti,endi));
assert(starti >= endi);
for (i = starti; i >= endi; i--) {
- debug13(printf("diagonal %u (%lu), querypos %d..%d\n",
- (Chrpos_T) (plus_segments[i].diagonal - chroffset),plus_segments[i].diagonal,
+ debug13(printf("diagonal %u (%llu), querypos %d..%d\n",
+ (Chrpos_T) (plus_segments[i].diagonal - chroffset),(unsigned long long) plus_segments[i].diagonal,
plus_segments[i].querypos5,plus_segments[i].querypos3));
if (plus_segments[i].querypos5 >= STAGE2_MIN_OLIGO + index1interval) {
/* Case 3. Missing start of query, so there could be a middle splice */
@@ -12229,8 +12316,8 @@ align_single_hit_with_gmap (History_T gmap_history, Stage3end_T hit,
debug13(printf("starti = %d, endi = %d\n",starti,endi));
assert(starti >= endi);
for (i = starti; i >= endi; i--) {
- debug13(printf("diagonal %u (%lu), querypos %d..%d\n",
- (Chrpos_T) (plus_segments[i].diagonal - chroffset),plus_segments[i].diagonal,
+ debug13(printf("diagonal %u (%llu), querypos %d..%d\n",
+ (Chrpos_T) (plus_segments[i].diagonal - chroffset),(unsigned long long) plus_segments[i].diagonal,
plus_segments[i].querypos5,plus_segments[i].querypos3));
if (query_lastpos - plus_segments[i].querypos3 >= STAGE2_MIN_OLIGO + index1interval) {
/* Case 1. Missing end of query, so there could be a middle splice */
@@ -12363,8 +12450,8 @@ align_single_hit_with_gmap (History_T gmap_history, Stage3end_T hit,
debug13(printf("starti = %d, endi = %d\n",starti,endi));
assert(starti >= endi);
for (i = starti; i >= endi; i--) {
- debug13(printf("diagonal %u (%lu), querypos %d..%d\n",
- (Chrpos_T) (minus_segments[i].diagonal - chroffset),minus_segments[i].diagonal,
+ debug13(printf("diagonal %u (%llu), querypos %d..%d\n",
+ (Chrpos_T) (minus_segments[i].diagonal - chroffset),(unsigned long long) minus_segments[i].diagonal,
minus_segments[i].querypos5,minus_segments[i].querypos3));
if (query_lastpos - minus_segments[i].querypos3 >= STAGE2_MIN_OLIGO + index1interval) {
/* Case 2. Missing end of query, so there could be a middle splice */
@@ -12439,8 +12526,8 @@ align_single_hit_with_gmap (History_T gmap_history, Stage3end_T hit,
debug13(printf("starti = %d, endi = %d\n",starti,endi));
assert(starti >= endi);
for (i = starti; i >= endi; i--) {
- debug13(printf("diagonal %u (%lu), querypos %d..%d\n",
- (Chrpos_T) (minus_segments[i].diagonal - chroffset),minus_segments[i].diagonal,
+ debug13(printf("diagonal %u (%llu), querypos %d..%d\n",
+ (Chrpos_T) (minus_segments[i].diagonal - chroffset),(unsigned long long) minus_segments[i].diagonal,
minus_segments[i].querypos5,minus_segments[i].querypos3));
if (minus_segments[i].querypos5 >= STAGE2_MIN_OLIGO + index1interval) {
/* Case 4. Missing start of query, so there could be a middle splice */
@@ -13943,8 +14030,8 @@ align_halfmapping_with_gmap (History_T gmap_history, Stage3end_T hit5, Stage3end
debug13(printf("starti = %d, endi = %d\n",starti,endi));
assert(starti <= endi);
for (i = starti; i <= endi; i++) {
- debug13(printf("diagonal %u (%lu), querypos %d..%d\n",
- (Chrpos_T) (plus_segments[i].diagonal - chroffset),plus_segments[i].diagonal,
+ debug13(printf("diagonal %u (%llu), querypos %d..%d\n",
+ (Chrpos_T) (plus_segments[i].diagonal - chroffset),(unsigned long long) plus_segments[i].diagonal,
plus_segments[i].querypos5,plus_segments[i].querypos3));
if (query_lastpos - plus_segments[i].querypos3 >= STAGE2_MIN_OLIGO + index1interval) {
/* Case 1. Missing end of query, so there could be a middle splice */
@@ -14086,8 +14173,8 @@ align_halfmapping_with_gmap (History_T gmap_history, Stage3end_T hit5, Stage3end
debug13(printf("starti = %d, endi = %d\n",starti,endi));
assert(starti >= endi);
for (i = starti; i >= endi; i--) {
- debug13(printf("diagonal %u (%lu), querypos %d..%d\n",
- (Chrpos_T) (minus_segments[i].diagonal - chroffset),minus_segments[i].diagonal,
+ debug13(printf("diagonal %u (%llu), querypos %d..%d\n",
+ (Chrpos_T) (minus_segments[i].diagonal - chroffset),(unsigned long long) minus_segments[i].diagonal,
minus_segments[i].querypos5,minus_segments[i].querypos3));
if (query_lastpos - minus_segments[i].querypos3 >= STAGE2_MIN_OLIGO + index1interval) {
/* Case 2. Missing end of query, so there could be a middle splice */
@@ -14241,8 +14328,8 @@ align_halfmapping_with_gmap (History_T gmap_history, Stage3end_T hit5, Stage3end
debug13(printf("starti = %d, endi = %d\n",starti,endi));
assert(starti >= endi);
for (i = starti; i >= endi; i--) {
- debug13(printf("diagonal %u (%lu), querypos %d..%d\n",
- (Chrpos_T) (plus_segments[i].diagonal - chroffset),plus_segments[i].diagonal,
+ debug13(printf("diagonal %u (%llu), querypos %d..%d\n",
+ (Chrpos_T) (plus_segments[i].diagonal - chroffset),(unsigned long long) plus_segments[i].diagonal,
plus_segments[i].querypos5,plus_segments[i].querypos3));
if (plus_segments[i].querypos5 >= STAGE2_MIN_OLIGO + index1interval) {
/* Case 3. Missing start of query, so there could be a middle splice */
@@ -14380,8 +14467,8 @@ align_halfmapping_with_gmap (History_T gmap_history, Stage3end_T hit5, Stage3end
debug13(printf("starti = %d, endi = %d\n",starti,endi));
assert(starti <= endi);
for (i = starti; i <= endi; i++) {
- debug13(printf("diagonal %u (%lu), querypos %d..%d\n",
- (Chrpos_T) (minus_segments[i].diagonal - chroffset),minus_segments[i].diagonal,
+ debug13(printf("diagonal %u (%llu), querypos %d..%d\n",
+ (Chrpos_T) (minus_segments[i].diagonal - chroffset),(unsigned long long) minus_segments[i].diagonal,
minus_segments[i].querypos5,minus_segments[i].querypos3));
if (minus_segments[i].querypos5 >= STAGE2_MIN_OLIGO + index1interval) {
/* Case 4. Missing start of query, so there could be a middle splice */
diff --git a/src/stage2.c b/src/stage2.c
index 98060a8..dfd572d 100644
--- a/src/stage2.c
+++ b/src/stage2.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: stage2.c 151086 2014-10-16 23:52:16Z twu $";
+static char rcsid[] = "$Id: stage2.c 153953 2014-11-24 17:51:10Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -778,7 +778,7 @@ score_querypos_lookback_one (
int best_fwd_intronnfwd = 0, best_fwd_intronnrev = 0, best_fwd_intronnunk = 0;
int canonicalsgn = 0;
#endif
- bool adjacentp, donep;
+ bool donep;
int prev_querypos, prevhit;
Chrpos_T prevposition, gendistance;
Univcoord_T prevpos, currpos;
@@ -800,7 +800,7 @@ score_querypos_lookback_one (
enough_consecutive = 32;
/* Parameters for section D, assuming adjacent is false */
- adjacentp = false;
+ /* adjacentp = false; */
nlookback = nsufflookback;
lookback = sufflookback;
@@ -836,7 +836,7 @@ score_querypos_lookback_one (
best_fwd_intronnrev = prevlink->fwd_intronnrev;
best_fwd_intronnunk = prevlink->fwd_intronnunk;
#endif
- adjacentp = true;
+ /* adjacentp = true; */
/* Parameters for section D when adjacent is true, so we don't look so far back */
nlookback = 1;
@@ -909,7 +909,7 @@ score_querypos_lookback_one (
prevlink = &(/*links[prev_querypos]*/prev_links[prevhit]);
gendistance = position - prevposition;
- assert(gendistance > querydistance); /* True because gendistance > EQUAL_DISTANCE_NOT_SPLICING + querydistance */
+ assert(gendistance > (Chrpos_T) querydistance); /* True because gendistance > EQUAL_DISTANCE_NOT_SPLICING + querydistance */
diffdistance = gendistance - querydistance; /* No need for abs() */
fwd_score = prevlink->fwd_score + querydist_credit /*- querydist_penalty*/;
@@ -1415,7 +1415,7 @@ score_querypos_lookback_mult (
prevlink = &(/*links[prev_querypos]*/prev_links[prevhit]);
gendistance = position - prevposition;
- assert(gendistance > querydistance); /* True because gendistance > EQUAL_DISTANCE_NOT_SPLICING + querydistance */
+ assert(gendistance > (Chrpos_T) querydistance); /* True because gendistance > EQUAL_DISTANCE_NOT_SPLICING + querydistance */
diffdistance = gendistance - querydistance; /* No need for abs() */
fwd_score = prevlink->fwd_score + querydist_credit /*- querydist_penalty*/;
@@ -1648,7 +1648,7 @@ score_querypos_lookforward_one (
int best_fwd_intronnfwd = 0, best_fwd_intronnrev = 0, best_fwd_intronnunk = 0;
int canonicalsgn = 0;
#endif
- bool adjacentp, donep;
+ bool donep;
int prev_querypos, prevhit;
Chrpos_T prevposition, gendistance;
Univcoord_T prevpos, currpos;
@@ -1670,7 +1670,7 @@ score_querypos_lookforward_one (
enough_consecutive = 32;
/* Parameters for section D, assuming adjacent is false */
- adjacentp = false;
+ /* adjacentp = false; */
nlookback = nsufflookback;
lookback = sufflookback;
@@ -1706,7 +1706,7 @@ score_querypos_lookforward_one (
best_fwd_intronnrev = prevlink->fwd_intronnrev;
best_fwd_intronnunk = prevlink->fwd_intronnunk;
#endif
- adjacentp = true;
+ /* adjacentp = true; */
/* Parameters for section D when adjacent is true */
nlookback = 1;
lookback = sufflookback/2;
@@ -1777,7 +1777,7 @@ score_querypos_lookforward_one (
prevlink = &(/*links[prev_querypos]*/prev_links[prevhit]);
gendistance = prevposition - position;
- assert(gendistance > querydistance); /* True because gendistance > EQUAL_DISTANCE_NOT_SPLICING + querydistance */
+ assert(gendistance > (Chrpos_T) querydistance); /* True because gendistance > EQUAL_DISTANCE_NOT_SPLICING + querydistance */
diffdistance = gendistance - querydistance; /* No need for abs() */
fwd_score = prevlink->fwd_score + querydist_credit /*- querydist_penalty*/;
@@ -2277,7 +2277,7 @@ score_querypos_lookforward_mult (
prevlink = &(/*links[prev_querypos]*/prev_links[prevhit]);
gendistance = prevposition - position;
- assert(gendistance > querydistance); /* True because gendistance > EQUAL_DISTANCE_NOT_SPLICING + querydistance */
+ assert(gendistance > (Chrpos_T) querydistance); /* True because gendistance > EQUAL_DISTANCE_NOT_SPLICING + querydistance */
diffdistance = gendistance - querydistance; /* No need for abs() */
fwd_score = prevlink->fwd_score + querydist_credit /*- querydist_penalty*/;
diff --git a/src/stage3.c b/src/stage3.c
index ee23e78..695cf80 100644
--- a/src/stage3.c
+++ b/src/stage3.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: stage3.c 149605 2014-10-01 21:25:32Z twu $";
+static char rcsid[] = "$Id: stage3.c 153955 2014-11-24 17:54:45Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -454,7 +454,7 @@ Stage3_straintype (T this) {
int
Stage3_goodness (T this) {
debug2(printf("Overall goodness:\n"));
- debug2(printf(" %d matches, %d mismatches, %d qopens, %d qindels, %d topens, %d tindels => %d\n",
+ debug2(printf(" %d matches, %d mismatches, %d qopens, %d qindels, %d topens, %d tindels => goodness %d\n",
this->matches,this->mismatches,this->qopens,this->qindels,this->topens,this->tindels,this->goodness));
return this->goodness;
@@ -5391,7 +5391,7 @@ Stage3_print_sam (FILE *fp, char *abbrev, T this, int pathnum, int npaths,
/*clipdir*/0,/*hardclip5*/0,/*hardclip3*/querylength-this->circularpos,querylength,
this->watsonp,this->cdna_direction,chimera_part,chimera,
quality_shift,Sequence_firstp(queryseq),
- pathnum,npaths,absmq_score,first_absmq,second_absmq,chrpos,
+ pathnum,npaths,absmq_score,first_absmq,second_absmq,chrpos,this->chrlength,
mapq_score,sam_paired_p,sam_read_group_id,/*invertp*/false,
/*circularp*/true,/*merged_overlap_p*/false);
Pair_print_sam(fp,abbrev,this->pairarray,this->npairs,
@@ -5400,7 +5400,7 @@ Stage3_print_sam (FILE *fp, char *abbrev, T this, int pathnum, int npaths,
/*clipdir*/0,/*hardclip5*/this->circularpos,/*hardclip3*/0,querylength,
this->watsonp,this->cdna_direction,chimera_part,chimera,
quality_shift,Sequence_firstp(queryseq),
- pathnum,npaths,absmq_score,first_absmq,second_absmq,/*chrpos*/1,
+ pathnum,npaths,absmq_score,first_absmq,second_absmq,/*chrpos*/1,this->chrlength,
mapq_score,sam_paired_p,sam_read_group_id,/*invertp*/false,
/*circularp*/true,/*merged_overlap_p*/false);
} else {
@@ -5410,7 +5410,7 @@ Stage3_print_sam (FILE *fp, char *abbrev, T this, int pathnum, int npaths,
/*clipdir*/0,/*hardclip5*/0,/*hardclip3*/0,querylength,
this->watsonp,this->cdna_direction,chimera_part,chimera,
quality_shift,Sequence_firstp(queryseq),
- pathnum,npaths,absmq_score,first_absmq,second_absmq,chrpos,
+ pathnum,npaths,absmq_score,first_absmq,second_absmq,chrpos,this->chrlength,
mapq_score,sam_paired_p,sam_read_group_id,/*invertp*/false,
/*circularp*/false,/*merged_overlap_p*/false);
}
@@ -12735,6 +12735,9 @@ Stage3_compute (List_T *finalpairs, int *npairs, int *cdna_direction, int *sense
pairarray = make_pairarray(&(*npairs),&(*finalpairs),*cdna_direction,*sensedir,watsonp,
pairpool,queryseq_ptr,chroffset,chrhigh,
ngap,query_subseq_offset,skiplength,diagnosticp);
+ debug0(printf("Result (%d pairs): %d matches, %d mismatches, %d qopens, %d qindels, %d topens, %d tindels\n",
+ *npairs,*matches,*mismatches,*qopens,*qindels,*topens,*tindels));
+
#if 0
if (checkp == true && stage3debug == NO_STAGE3DEBUG &&
@@ -12747,9 +12750,7 @@ Stage3_compute (List_T *finalpairs, int *npairs, int *cdna_direction, int *sense
#endif
-#ifdef DEBUG
- Pair_dump_array(pairarray,*npairs,/*zerobasedp*/true);
-#endif
+ debug0(Pair_dump_array(pairarray,*npairs,/*zerobasedp*/true));
return pairarray;
}
diff --git a/src/stage3hr.c b/src/stage3hr.c
index 44e4a80..366b0ae 100644
--- a/src/stage3hr.c
+++ b/src/stage3hr.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: stage3hr.c 151052 2014-10-16 19:56:35Z twu $";
+static char rcsid[] = "$Id: stage3hr.c 154079 2014-11-25 18:31:31Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -24,7 +24,13 @@ static char rcsid[] = "$Id: stage3hr.c 151052 2014-10-16 19:56:35Z twu $";
#include "fastlog.h"
-#define SOFT_CLIPS_AVOID_CIRCULARIZATION 1 /* Needed to avoid CIGAR strings like 1S99H */
+#if 0
+/* Originally added to avoid CIGAR strings like 1S99H, but results in
+ errors if first chromosome is circular. Now checking in samprint.c
+ whether the CIGAR string is bad */
+#define SOFT_CLIPS_AVOID_CIRCULARIZATION 1
+#endif
+
#define MAX_HITS 100000
@@ -501,10 +507,10 @@ struct T {
int nchimera_known;
int nchimera_novel;
- int start_amb_nmatches; /* For splice, shortexon, and GMAP */
- int end_amb_nmatches; /* For splice, shortexon, and GMAP */
- int amb_nmatches_donor; /* For shortexon only */
- int amb_nmatches_acceptor; /* For shortexon only */
+ int start_amb_length; /* For splice, shortexon, and GMAP */
+ int end_amb_length; /* For splice, shortexon, and GMAP */
+ int amb_length_donor; /* For shortexon only */
+ int amb_length_acceptor; /* For shortexon only */
double start_amb_prob; /* For GMAP currently */
double end_amb_prob; /* For GMAP currently */
Endtype_T gmap_start_endtype; /* For GMAP, which has no substrings */
@@ -706,7 +712,11 @@ Stage3end_chrhigh (T this) {
Chrpos_T
Stage3end_chrlength (T this) {
- return this->chrlength;
+ if (this == NULL) {
+ return 0;
+ } else {
+ return this->chrlength;
+ }
}
Univcoord_T
@@ -910,12 +920,12 @@ Stage3end_trim_right (T this) {
int
Stage3end_trim_left_raw (T this) {
- return this->trim_left + this->start_amb_nmatches;
+ return this->trim_left + this->start_amb_length;
}
int
Stage3end_trim_right_raw (T this) {
- return this->trim_right + this->end_amb_nmatches;
+ return this->trim_right + this->end_amb_length;
}
int
@@ -1223,13 +1233,13 @@ Stage3end_end_ambiguous_p (T this) {
}
int
-Stage3end_amb_nmatches_start (T this) {
- return this->start_amb_nmatches;
+Stage3end_amb_length_start (T this) {
+ return this->start_amb_length;
}
int
-Stage3end_amb_nmatches_end (T this) {
- return this->end_amb_nmatches;
+Stage3end_amb_length_end (T this) {
+ return this->end_amb_length;
}
@@ -1706,94 +1716,80 @@ Stage3pair_filter_nonconcordant (List_T hitpairs) {
}
-static int
-gmap5_substring3_overlap (Stage3end_T hit5, Stage3end_T hit3, Substring_T substring) {
+static Univcoord_T
+gmap5_substring3_common_genomicpos (Stage3end_T hit5, Stage3end_T hit3, Substring_T substring) {
Univcoord_T chroffset;
int i;
- i = 0;
chroffset = hit3->chroffset;
- if (1 || hit5->plusp == true) {
- debug13(printf("plus goal: %u or %u\n",Substring_alignstart_trim(substring) - chroffset,Substring_alignend_trim(substring) - chroffset));
+ if (hit5->plusp == true) {
+ debug13(printf("plus goal: %u up to %u\n",Substring_alignstart_trim(substring) - chroffset,Substring_alignend_trim(substring) - chroffset));
+ i = 0;
while (i < hit5->npairs) {
- if (hit5->pairarray[i].genomepos == Substring_alignstart_trim(substring) - chroffset) {
- debug13(printf("case 1\n"));
- return (hit5->pairarray[i].querypos - hit5->pairarray[0].querypos + 1) +
- hit3->querylength - Substring_querystart(substring) - hit3->trim_right - hit3->end_amb_nmatches;
- } else if (hit5->pairarray[i].genomepos == Substring_alignend_trim(substring) - chroffset) {
- debug13(printf("case 2\n"));
- return (hit5->pairarray[i].querypos - hit5->pairarray[0].querypos + 1) +
- hit3->querylength - Substring_queryend(substring) - hit3->trim_right - hit3->end_amb_nmatches;
- } else {
+ debug13(printf(" pair %d: genomepos %u\n",i,hit5->pairarray[i].genomepos));
+ if (hit5->pairarray[i].genomepos < Substring_alignstart_trim(substring) - chroffset) {
i++;
+ } else if (hit5->pairarray[i].genomepos > Substring_alignend_trim(substring) - chroffset) {
+ i++;
+ } else {
+ debug13(printf("Returning common point at %llu\n",(unsigned long long) hit5->pairarray[i].genomepos + chroffset));
+ return hit5->pairarray[i].genomepos + chroffset;
}
}
return 0;
} else {
- debug13(printf("minus goal: %u or %u\n",Substring_alignstart_trim(substring) - chroffset,Substring_alignend_trim(substring) - chroffset));
- while (i < hit5->npairs) {
- if (hit5->pairarray[i].genomepos == Substring_alignstart_trim(substring) - chroffset) {
- debug13(printf("case 3: genomepos %u, at GMAP %d. substring trim %d, **%d..%d trim %d\n",
- hit5->pairarray[i].genomepos,hit5->pairarray[i].querypos,
- hit3->trim_left,Substring_querystart(substring),Substring_queryend(substring),hit3->trim_right));
- return (hit5->pairarray[i].querypos - hit5->pairarray[0].querypos + 1) +
- (hit3->querylength - Substring_querystart(substring) - hit3->trim_right - hit3->end_amb_nmatches); /* trim2_1&trim2_2 */
- } else if (hit5->pairarray[i].genomepos == Substring_alignend_trim(substring) - chroffset) {
- debug13(printf("case 4: genomepos %u, at GMAP %d. substring trim %d, %d..%d** trim %d\n",
- hit5->pairarray[i].genomepos,hit5->pairarray[i].querypos,
- hit3->trim_left,Substring_querystart(substring),Substring_queryend(substring),hit3->trim_right));
- return (hit5->pairarray[i].querypos - hit5->pairarray[0].querypos + 1) +
- hit3->querylength - Substring_queryend(substring) - hit3->trim_right - hit3->end_amb_nmatches;
+ debug13(printf("minus goal: %u down to %u\n",Substring_alignend_trim(substring) - chroffset,Substring_alignstart_trim(substring) - chroffset));
+ i = hit5->npairs - 1;
+ while (i >= 0) {
+ debug13(printf(" pair %d: genomepos %u\n",i,hit5->pairarray[i].genomepos));
+ if (hit5->pairarray[i].genomepos > Substring_alignstart_trim(substring) - chroffset) {
+ i--;
+ } else if (hit5->pairarray[i].genomepos < Substring_alignend_trim(substring) - chroffset) {
+ i--;
} else {
- i++;
+ debug13(printf("Returning common point at %llu\n",(unsigned long long) hit5->pairarray[i].genomepos + chroffset));
+ return hit5->pairarray[i].genomepos + chroffset;
}
}
return 0;
}
}
-static int
-substring5_gmap3_overlap (Stage3end_T hit5, Stage3end_T hit3, Substring_T substring) {
+static Univcoord_T
+substring5_gmap3_common_genomicpos (Stage3end_T hit5, Stage3end_T hit3, Substring_T substring) {
Univcoord_T chroffset;
int j;
- j = 0;
chroffset = hit5->chroffset;
- if (1 || hit5->plusp == true) {
- debug13(printf("plus goal: %u or %u\n",Substring_alignstart_trim(substring) - chroffset,Substring_alignend_trim(substring) - chroffset));
+ if (hit5->plusp == true) {
+ debug13(printf("plus goal: %u up to %u\n",Substring_alignstart_trim(substring) - chroffset,Substring_alignend_trim(substring) - chroffset));
+ j = 0;
while (j < hit3->npairs) {
- if (hit3->pairarray[j].genomepos == Substring_alignstart_trim(substring) - chroffset) {
- debug13(printf("case 4: genomepos %u, at GMAP %d. substring trim %d, **%d..%d trim %d\n",
- hit3->pairarray[j].genomepos,hit3->pairarray[j].querypos,
- hit5->trim_left,Substring_querystart(substring),Substring_queryend(substring),hit5->trim_right));
- return (hit3->pairarray[hit3->npairs-1].querypos - hit3->pairarray[j].querypos + 1) +
- Substring_querystart(substring) - hit5->trim_left - hit5->start_amb_nmatches; /* okay */
- } else if (hit3->pairarray[j].genomepos == Substring_alignend_trim(substring) - chroffset) {
- debug13(printf("case 6: genomepos %u, at GMAP %d. substring trim %d, %d..%d** trim %d\n",
- hit3->pairarray[j].genomepos,hit3->pairarray[j].querypos,
- hit5->trim_left,Substring_querystart(substring),Substring_queryend(substring),hit5->trim_right));
- return (hit3->pairarray[hit3->npairs-1].querypos - hit3->pairarray[j].querypos + 1) +
- Substring_queryend(substring) - hit5->trim_left - hit5->start_amb_nmatches; /* okay: trim2_2&trim2_1 */
- } else {
+ debug13(printf(" pair %d: genomepos %u\n",j,hit3->pairarray[j].genomepos));
+ if (hit3->pairarray[j].genomepos < Substring_alignstart_trim(substring) - chroffset) {
+ j++;
+ } else if (hit3->pairarray[j].genomepos > Substring_alignend_trim(substring) - chroffset) {
j++;
+ } else {
+ debug13(printf("Returning common point at %llu\n",(unsigned long long) hit3->pairarray[j].genomepos + chroffset));
+ return hit3->pairarray[j].genomepos + chroffset;
}
}
return 0;
-
+
} else {
- debug13(printf("minus goal: %u or %u\n",Substring_alignstart_trim(substring) - chroffset,Substring_alignend_trim(substring) - chroffset));
- while (j < hit3->npairs) {
- if (hit3->pairarray[j].genomepos == Substring_alignstart_trim(substring) - chroffset) {
- debug13(printf("case 7\n"));
- return (hit3->pairarray[hit3->npairs-1].querypos - hit3->pairarray[j].querypos + 1) +
- Substring_querystart(substring) - hit5->trim_left - hit5->start_amb_nmatches;
- } else if (hit3->pairarray[j].genomepos == Substring_alignend_trim(substring) - chroffset) {
- debug13(printf("case 8\n"));
- return (hit3->pairarray[hit3->npairs-1].querypos - hit3->pairarray[j].querypos + 1) +
- Substring_queryend(substring) - hit5->trim_left - hit5->start_amb_nmatches; /* okay: trim2_2&trim2_1 */
+ debug13(printf("minus goal: %u down to %u\n",Substring_alignstart_trim(substring) - chroffset,Substring_alignend_trim(substring) - chroffset));
+ j = hit3->npairs - 1;
+ while (j >= 0) {
+ debug13(printf(" pair %d: genomepos %u\n",j,hit3->pairarray[j].genomepos));
+ if (hit3->pairarray[j].genomepos > Substring_alignstart_trim(substring) - chroffset) {
+ j--;
+ } else if (hit3->pairarray[j].genomepos < Substring_alignend_trim(substring) - chroffset) {
+ j--;
} else {
- j++;
+ debug13(printf("Returning common point at %llu\n",(unsigned long long) hit3->pairarray[j].genomepos + chroffset));
+ return hit3->pairarray[j].genomepos + chroffset;
}
}
return 0;
@@ -1801,16 +1797,115 @@ substring5_gmap3_overlap (Stage3end_T hit5, Stage3end_T hit3, Substring_T substr
}
+static void
+find_ilengths (int *ilength_low, int *ilength_high, Stage3end_T hit, Univcoord_T common_genomicpos, Univcoord_T chroffset) {
+ int i;
+
+ debug13(printf("Finding ilengths for common_genomicpos %u\n",(Chrpos_T) (common_genomicpos - chroffset)));
+ if (hit->hittype == GMAP) {
+ debug13(printf("Type is GMAP\n"));
+ i = 0;
+ while (i < hit->npairs && hit->pairarray[i].genomepos != common_genomicpos - chroffset) {
+ i++;
+ }
+ if (i >= hit->npairs) {
+ abort();
+ } else if (hit->plusp == true) {
+ *ilength_low = hit->pairarray[i].querypos - hit->pairarray[0].querypos + 1;
+ *ilength_high = hit->pairarray[hit->npairs - 1].querypos - hit->pairarray[i].querypos + 1;
+ } else {
+ *ilength_low = hit->pairarray[hit->npairs - 1].querypos - hit->pairarray[i].querypos + 1;
+ *ilength_high = hit->pairarray[i].querypos - hit->pairarray[0].querypos + 1;
+ }
+
+ } else if (hit->plusp == true) {
+ debug13(printf("plus. Checking common genomicpos %llu against substring0 %p, substring1 %p, substring2 %p\n",
+ common_genomicpos,hit->substring0,hit->substring1,hit->substring2));
+ /* Add + 1 when subtracting alignstart, but not when starting from alignend */
+ if (Substring_overlap_point_trimmed_p(hit->substring0,common_genomicpos)) {
+ debug13(printf("substring0\n"));
+ *ilength_low = (common_genomicpos - Substring_alignstart_trim(hit->substring0) + 1);
+ *ilength_high = (Substring_alignend_trim(hit->substring0) - common_genomicpos /*+ 1*/)
+ + Substring_genomic_alignment_length(hit->substring1)
+ + Substring_genomic_alignment_length(hit->substring2);
+
+ } else if (Substring_overlap_point_trimmed_p(hit->substring1,common_genomicpos)) {
+ debug13(printf("substring1\n"));
+ *ilength_low = Substring_genomic_alignment_length(hit->substring0) +
+ common_genomicpos - Substring_alignstart_trim(hit->substring1) + 1;
+ *ilength_high = (Substring_alignend_trim(hit->substring1) - common_genomicpos /*+ 1*/)
+ + Substring_genomic_alignment_length(hit->substring2);
+ if (hit->hittype == INSERTION) {
+ *ilength_high += hit->nindels;
+ }
+
+ } else if (Substring_overlap_point_trimmed_p(hit->substring2,common_genomicpos)) {
+ debug13(printf("substring2\n"));
+ *ilength_low = Substring_genomic_alignment_length(hit->substring0) +
+ Substring_genomic_alignment_length(hit->substring1) +
+ (common_genomicpos - Substring_alignstart_trim(hit->substring2) + 1);
+ *ilength_high = (Substring_alignend_trim(hit->substring2) - common_genomicpos /*+ 1*/);
+ if (hit->hittype == INSERTION) {
+ *ilength_low += hit->nindels;
+ }
+
+ } else {
+ abort();
+ }
+
+ } else {
+ debug13(printf("minus. Checking common genomicpos %llu against substring0 %p, substring1 %p, substring2 %p\n",
+ common_genomicpos,hit->substring0,hit->substring1,hit->substring2));
+ /* Add + 1 when starting from alignstart, but not when subtracting alignend */
+ if (Substring_overlap_point_trimmed_p(hit->substring0,common_genomicpos)) {
+ debug13(printf("substring0\n"));
+ *ilength_low = Substring_genomic_alignment_length(hit->substring2) +
+ Substring_genomic_alignment_length(hit->substring1) +
+ (common_genomicpos - Substring_alignend_trim(hit->substring0) /*+ 1*/);
+ *ilength_high = (Substring_alignstart_trim(hit->substring0) - common_genomicpos + 1);
+
+ } else if (Substring_overlap_point_trimmed_p(hit->substring1,common_genomicpos)) {
+ debug13(printf("substring1\n"));
+ *ilength_low = Substring_genomic_alignment_length(hit->substring2) +
+ (common_genomicpos - Substring_alignend_trim(hit->substring1) /*+ 1*/);
+ *ilength_high = (Substring_alignstart_trim(hit->substring1) - common_genomicpos + 1)
+ + Substring_genomic_alignment_length(hit->substring0);
+ if (hit->hittype == INSERTION) {
+ *ilength_low += hit->nindels;
+ }
+
+ } else if (Substring_overlap_point_trimmed_p(hit->substring2,common_genomicpos)) {
+ debug13(printf("substring2\n"));
+ *ilength_low = (common_genomicpos - Substring_alignend_trim(hit->substring2) /*+ 1*/);
+ *ilength_high = (Substring_alignstart_trim(hit->substring2) - common_genomicpos + 1)
+ + Substring_genomic_alignment_length(hit->substring1)
+ + Substring_genomic_alignment_length(hit->substring0);
+ if (hit->hittype == INSERTION) {
+ *ilength_high += hit->nindels;
+ }
+
+ } else {
+ abort();
+ }
+ }
+
+ debug13(printf("Have ilength_low %d and ilength_high %d\n",*ilength_low,*ilength_high));
+ return;
+}
+
+
+
/* Needed to compute overlap properly. Based on pair_insert_length below, plus code for handling GMAP. */
-static Chrpos_T
-pair_insert_length_trimmed (Stage3end_T hit5, Stage3end_T hit3, int hit5_trimmed_length, int hit3_trimmed_length) {
+static Univcoord_T
+pair_common_genomicpos (Stage3end_T hit5, Stage3end_T hit3) {
+ Univcoord_T common_genomicpos;
int i, j;
- int overlap;
Univcoord_T start5, end5, start3, end3;
if (hit5->hittype == GMAP && hit3->hittype == GMAP) {
- i = j = 0;
+ debug13(printf("Computing overlap using dual GMAP\n"));
if (hit5->plusp == true) {
+ i = j = 0;
while (i < hit5->npairs && j < hit3->npairs) {
if (hit5->pairarray[i].genomepos < hit3->pairarray[j].genomepos) {
i++;
@@ -1819,13 +1914,14 @@ pair_insert_length_trimmed (Stage3end_T hit5, Stage3end_T hit3, int hit5_trimmed
} else {
debug13(printf("GMAP and GMAP show overlap at position %d, querypos %d and %d\n",
hit5->pairarray[i].genomepos,hit5->pairarray[i].querypos,hit3->pairarray[j].querypos));
- return (hit5->pairarray[i].querypos - hit5->pairarray[0].querypos) + (hit3->pairarray[hit3->npairs-1].querypos - hit3->pairarray[j].querypos);
+ return hit5->pairarray[i].genomepos + hit5->chroffset;
}
}
debug13(printf("GMAP and GMAP show no overlap\n"));
- return 0;
+ return 0U;
} else {
+ i = j = 0;
while (i < hit5->npairs && j < hit3->npairs) {
if (hit5->pairarray[i].genomepos > hit3->pairarray[j].genomepos) {
i++;
@@ -1834,74 +1930,72 @@ pair_insert_length_trimmed (Stage3end_T hit5, Stage3end_T hit3, int hit5_trimmed
} else {
debug13(printf("GMAP and GMAP show overlap at position %d, querypos %d and %d\n",
hit5->pairarray[i].genomepos,hit5->pairarray[i].querypos,hit3->pairarray[j].querypos));
- return (hit5->pairarray[i].querypos - hit5->pairarray[0].querypos) + (hit3->pairarray[hit3->npairs-1].querypos - hit3->pairarray[j].querypos);
+ return hit5->pairarray[i].genomepos + hit5->chroffset;
}
}
debug13(printf("GMAP and GMAP show no overlap\n"));
- return 0;
+ return 0U;
}
} else if (hit5->hittype == GMAP) {
- if ((overlap = gmap5_substring3_overlap(hit5,hit3,hit3->substring1)) > 0) {
- return overlap;
- } else if (hit3->substring2 != NULL && (overlap = gmap5_substring3_overlap(hit5,hit3,hit3->substring2)) > 0) {
- return overlap;
- } else if (hit3->substring0 != NULL && (overlap = gmap5_substring3_overlap(hit5,hit3,hit3->substring0)) > 0) {
- return overlap;
+ debug13(printf("Computing common point using 5' GMAP\n"));
+ if ((common_genomicpos = gmap5_substring3_common_genomicpos(hit5,hit3,hit3->substring1)) != 0) {
+ return common_genomicpos;
+ } else if (hit3->substring2 != NULL && (common_genomicpos = gmap5_substring3_common_genomicpos(hit5,hit3,hit3->substring2)) != 0) {
+ return common_genomicpos;
+ } else if (hit3->substring0 != NULL && (common_genomicpos = gmap5_substring3_common_genomicpos(hit5,hit3,hit3->substring0)) != 0) {
+ return common_genomicpos;
} else {
- return 0;
+ return 0U;
}
} else if (hit3->hittype == GMAP) {
- if ((overlap = substring5_gmap3_overlap(hit5,hit3,hit5->substring1)) > 0) {
- return overlap;
- } else if (hit5->substring2 != NULL && (overlap = substring5_gmap3_overlap(hit5,hit3,hit5->substring2)) > 0) {
- return overlap;
- } else if (hit5->substring0 != NULL && (overlap = substring5_gmap3_overlap(hit5,hit3,hit5->substring0)) > 0) {
- return overlap;
+ debug13(printf("Computing common point using 3' GMAP\n"));
+ if ((common_genomicpos = substring5_gmap3_common_genomicpos(hit5,hit3,hit5->substring1)) != 0) {
+ return common_genomicpos;
+ } else if (hit5->substring2 != NULL && (common_genomicpos = substring5_gmap3_common_genomicpos(hit5,hit3,hit5->substring2)) != 0) {
+ return common_genomicpos;
+ } else if (hit5->substring0 != NULL && (common_genomicpos = substring5_gmap3_common_genomicpos(hit5,hit3,hit5->substring0)) != 0) {
+ return common_genomicpos;
} else {
- return 0;
+ return 0U;
}
} else if (hit5->plusp == true && hit3->plusp == true) {
/* plus/plus */
- start5 = hit5->genomicstart + hit5->trim_left + hit5->start_amb_nmatches;
- end5 = hit5->genomicend - hit5->trim_right - hit5->end_amb_nmatches;
- start3 = hit3->genomicstart + hit3->trim_left + hit3->start_amb_nmatches;
- end3 = hit3->genomicend - hit3->trim_right - hit3->end_amb_nmatches;
+ debug13(printf("Computing overlap using substrings plus/plus\n"));
+
+ start5 = hit5->genomicstart + hit5->trim_left + hit5->start_amb_length;
+ end5 = hit5->genomicend - hit5->trim_right - hit5->end_amb_length;
+ start3 = hit3->genomicstart + hit3->trim_left + hit3->start_amb_length;
+ end3 = hit3->genomicend - hit3->trim_right - hit3->end_amb_length;
if (end3 < start5) {
/* Case 1 */
- return 0;
+ return false;
} else if (end5 < start3) {
/* Case 6 */
- return 0;
+ return false;
} else if (start3 < start5) {
if (end3 < end5) {
/* Case 2: Tails overlap. Go from start5 to end3 */
debug13(printf("plus case 2a: start5 %u\n",start5 - hit5->chroffset));
if (Substring_overlap_point_trimmed_p(hit3->substring0,start5)) {
- return (Substring_alignend_trim(hit3->substring0) - start5)
- + hit3->querylength - Substring_queryend(hit3->substring0) - hit3->trim_right - hit3->end_amb_nmatches;
+ return start5;
} else if (Substring_overlap_point_trimmed_p(hit3->substring1,start5)) {
- return (Substring_alignend_trim(hit3->substring1) - start5)
- + hit3->querylength - Substring_queryend(hit3->substring1) - hit3->trim_right - hit3->end_amb_nmatches;
+ return start5;
} else if (Substring_overlap_point_trimmed_p(hit3->substring2,start5)) {
- return (Substring_alignend_trim(hit3->substring2) - start5)
- /* + hit3->querylength - Substring_queryend(hit3->substring2) - hit3->trim_right - hit3->end_amb_nmatches */;
+ return start5;
}
/* Case 2: Tails overlap. Go from start5 to end3 */
debug13(printf("plus case 2b: end3 %u\n",end3 - hit3->chroffset));
if (Substring_overlap_point_trimmed_p(hit5->substring2,end3)) {
- return Substring_querystart(hit5->substring2) - hit5->trim_left - hit5->start_amb_nmatches +
- (end3 - Substring_alignstart_trim(hit5->substring2));
+ return end3;
} else if (Substring_overlap_point_trimmed_p(hit5->substring1,end3)) {
- return Substring_querystart(hit5->substring1) - hit5->trim_left - hit5->start_amb_nmatches +
- (end3 - Substring_alignstart_trim(hit5->substring1));
+ return end3;
} else if (Substring_overlap_point_trimmed_p(hit5->substring0,end3)) {
- return /* Substring_querystart(hit5->substring0) - hit5->trim_left - hit5->start_amb_nmatches + */
- (end3 - Substring_alignstart_trim(hit5->substring0));
+ return end3;
}
/* Fall through to general algorithm */
@@ -1909,14 +2003,11 @@ pair_insert_length_trimmed (Stage3end_T hit5, Stage3end_T hit3, int hit5_trimmed
/* Case 3: hit3 subsumes hit5 */
debug13(printf("plus case 3\n"));
if (Substring_overlap_point_trimmed_p(hit3->substring2,end5)) {
- return hit5_trimmed_length + (Substring_alignend_trim(hit3->substring2) - end5)
- /* + hit3->querylength - Substring_queryend(hit3->substring2) - hit3->trim_right - hit3->end_amb_nmatches */;
+ return end5;
} else if (Substring_overlap_point_trimmed_p(hit3->substring1,end5)) {
- return hit5_trimmed_length + (Substring_alignend_trim(hit3->substring1) - end5)
- + hit3->querylength - Substring_queryend(hit3->substring1) - hit3->trim_right - hit3->end_amb_nmatches;
+ return end5;
} else if (Substring_overlap_point_trimmed_p(hit3->substring0,end5)) {
- return hit5_trimmed_length + (Substring_alignend_trim(hit3->substring0) - end5)
- + hit3->querylength - Substring_queryend(hit3->substring0) - hit3->trim_right - hit3->end_amb_nmatches;
+ return end5;
}
/* Fall through to general algorithm */
}
@@ -1926,14 +2017,11 @@ pair_insert_length_trimmed (Stage3end_T hit5, Stage3end_T hit3, int hit5_trimmed
/* Case 4: hit5 subsumes hit3 */
debug13(printf("plus case 4\n"));
if (Substring_overlap_point_trimmed_p(hit5->substring0,start3)) {
- return /* (Substring_querystart(hit5->substring0) - hit5->trim_left - hit5->start_amb_nmatches) + */
- (start3 - Substring_alignstart_trim(hit5->substring0)) + hit3_trimmed_length;
+ return start3;
} else if (Substring_overlap_point_trimmed_p(hit5->substring1,start3)) {
- return (Substring_querystart(hit5->substring1) - hit5->trim_left - hit5->start_amb_nmatches) +
- (start3 - Substring_alignstart_trim(hit5->substring1)) + hit3_trimmed_length;
+ return start3;
} else if (Substring_overlap_point_trimmed_p(hit5->substring2,start3)) {
- return (Substring_querystart(hit5->substring2) - hit5->trim_left - hit5->start_amb_nmatches) +
- (start3 - Substring_alignstart_trim(hit5->substring2)) + hit3_trimmed_length;
+ return start3;
}
/* Fall through to general algorithm */
@@ -1941,27 +2029,21 @@ pair_insert_length_trimmed (Stage3end_T hit5, Stage3end_T hit3, int hit5_trimmed
/* Case 5: Based on hit3_trimmed_length */
debug13(printf("plus case 5a\n"));
if (Substring_overlap_point_trimmed_p(hit5->substring0,start3)) {
- return /* (Substring_querystart(hit5->substring0) - hit5->trim_left - hit5->start_amb_nmatches) + */
- (start3 - Substring_alignstart_trim(hit5->substring0)) + hit3_trimmed_length;
+ return start3;
} else if (Substring_overlap_point_trimmed_p(hit5->substring1,start3)) {
- return (Substring_querystart(hit5->substring1) - hit5->trim_left - hit5->start_amb_nmatches) +
- (start3 - Substring_alignstart_trim(hit5->substring1)) + hit3_trimmed_length;
+ return start3;
} else if (Substring_overlap_point_trimmed_p(hit5->substring2,start3)) {
- return (Substring_querystart(hit5->substring2) - hit5->trim_left - hit5->start_amb_nmatches) +
- (start3 - Substring_alignstart_trim(hit5->substring2)) + hit3_trimmed_length;
+ return start3;
}
/* Case 5: Based on hit5_trimmed_length */
debug13(printf("plus case 5b\n"));
if (Substring_overlap_point_trimmed_p(hit3->substring2,end5)) {
- return /* hit3->querylength - Substring_queryend(hit3->substring2) - hit3->trim_right - hit3->end_amb_nmatches + */
- (Substring_alignend_trim(hit3->substring2) - end5) + hit5_trimmed_length;
+ return end5;
} else if (Substring_overlap_point_trimmed_p(hit3->substring1,end5)) {
- return hit3->querylength - Substring_queryend(hit3->substring1) - hit3->trim_right - hit3->end_amb_nmatches +
- (Substring_alignend_trim(hit3->substring1) - end5) + hit5_trimmed_length;
+ return end5;
} else if (Substring_overlap_point_trimmed_p(hit3->substring0,end5)) {
- return hit3->querylength - Substring_queryend(hit3->substring0) - hit3->trim_right - hit3->end_amb_nmatches +
- (Substring_alignend_trim(hit3->substring0) - end5) + hit5_trimmed_length;
+ return end5;
}
/* Fall through to general algorithm */
}
@@ -1969,51 +2051,39 @@ pair_insert_length_trimmed (Stage3end_T hit5, Stage3end_T hit3, int hit5_trimmed
/* General algorithm */
debug13(printf("plus general: hit3->substring1\n"));
- if (Substring_overlap_segment_trimmed_p(hit5->substring1,hit3->substring1)) {
- return (Substring_querystart(hit5->substring1) - hit5->trim_left - hit5->start_amb_nmatches) +
- (Substring_alignend_trim(hit3->substring1) - Substring_alignstart_trim(hit5->substring1))
- + hit3->querylength - Substring_queryend(hit3->substring1) - hit3->trim_right - hit3->end_amb_nmatches;
- } else if (hit5->substring2 != NULL && Substring_overlap_segment_trimmed_p(hit5->substring2,hit3->substring1)) {
- return (Substring_querystart(hit5->substring2) - hit5->trim_left - hit5->start_amb_nmatches) +
- (Substring_alignend_trim(hit3->substring1) - Substring_alignstart_trim(hit5->substring2))
- + hit3->querylength - Substring_queryend(hit3->substring1) - hit3->trim_right - hit3->end_amb_nmatches;
- } else if (hit5->substring0 != NULL && Substring_overlap_segment_trimmed_p(hit5->substring0,hit3->substring1)) {
- return /* (Substring_querystart(hit5->substring0) - hit5->trim_left - hit5->start_amb_nmatches) + */
- (Substring_alignend_trim(hit3->substring1) - Substring_alignstart_trim(hit5->substring0))
- + hit3->querylength - Substring_queryend(hit3->substring1) - hit3->trim_right - hit3->end_amb_nmatches;
+ if ((common_genomicpos = Substring_overlap_segment_trimmed(hit5->substring1,hit3->substring1)) != 0) {
+ return common_genomicpos;
+ } else if (hit5->substring2 != NULL &&
+ (common_genomicpos = Substring_overlap_segment_trimmed(hit5->substring2,hit3->substring1)) != 0) {
+ return common_genomicpos;
+ } else if (hit5->substring0 != NULL &&
+ (common_genomicpos = Substring_overlap_segment_trimmed(hit5->substring0,hit3->substring1)) != 0) {
+ return common_genomicpos;
}
if (hit3->substring2 != NULL) {
debug13(printf("plus general: hit3->substring2\n"));
- if (Substring_overlap_segment_trimmed_p(hit5->substring1,hit3->substring2)) {
- return (Substring_querystart(hit5->substring1) - hit5->trim_left - hit5->start_amb_nmatches) +
- (Substring_alignend_trim(hit3->substring2) - Substring_alignstart_trim(hit5->substring1))
- /* + hit3->querylength - Substring_queryend(hit3->substring2) - hit3->trim_right - hit3->end_amb_nmatches */;
- } else if (hit5->substring2 != NULL && Substring_overlap_segment_trimmed_p(hit5->substring2,hit3->substring2)) {
- return (Substring_querystart(hit5->substring2) - hit5->trim_left - hit5->start_amb_nmatches) +
- (Substring_alignend_trim(hit3->substring2) - Substring_alignstart_trim(hit5->substring2))
- /* + hit3->querylength - Substring_queryend(hit3->substring2) - hit3->trim_right - hit3->end_amb_nmatches */;
- } else if (hit5->substring0 != NULL && Substring_overlap_segment_trimmed_p(hit5->substring0,hit3->substring2)) {
- return /* (Substring_querystart(hit5->substring0) - hit5->trim_left - hit5->start_amb_nmatches) + */
- (Substring_alignend_trim(hit3->substring2) - Substring_alignstart_trim(hit5->substring0))
- /* + hit3->querylength - Substring_queryend(hit3->substring2) - hit3->trim_right - hit3->end_amb_nmatches */;
+ if ((common_genomicpos = Substring_overlap_segment_trimmed(hit5->substring1,hit3->substring2)) != 0) {
+ return common_genomicpos;
+ } else if (hit5->substring2 != NULL &&
+ (common_genomicpos = Substring_overlap_segment_trimmed(hit5->substring2,hit3->substring2)) != 0) {
+ return common_genomicpos;
+ } else if (hit5->substring0 != NULL &&
+ (common_genomicpos = Substring_overlap_segment_trimmed(hit5->substring0,hit3->substring2)) != 0) {
+ return common_genomicpos;
}
}
if (hit3->substring0 != NULL) {
debug13(printf("plus general: hit3->substring0\n"));
- if (Substring_overlap_segment_trimmed_p(hit5->substring1,hit3->substring0)) {
- return (Substring_querystart(hit5->substring1) - hit5->trim_left - hit5->start_amb_nmatches) +
- (Substring_alignend_trim(hit3->substring0) - Substring_alignstart_trim(hit5->substring1))
- + hit3->querylength - Substring_queryend(hit3->substring0) - hit3->trim_right - hit3->end_amb_nmatches;
- } else if (hit5->substring2 != NULL && Substring_overlap_segment_trimmed_p(hit5->substring2,hit3->substring0)) {
- return (Substring_querystart(hit5->substring2) - hit5->trim_left - hit5->start_amb_nmatches) +
- (Substring_alignend_trim(hit3->substring0) - Substring_alignstart_trim(hit5->substring2))
- + hit3->querylength - Substring_queryend(hit3->substring0) - hit3->trim_right - hit3->end_amb_nmatches;
- } else if (hit5->substring0 != NULL && Substring_overlap_segment_trimmed_p(hit5->substring0,hit3->substring0)) {
- return /* (Substring_querystart(hit5->substring0) - hit5->trim_left - hit5->start_amb_nmatches) + */
- (Substring_alignend_trim(hit3->substring0) - Substring_alignstart_trim(hit5->substring0))
- + hit3->querylength - Substring_queryend(hit3->substring0) - hit3->trim_right - hit3->end_amb_nmatches;
+ if ((common_genomicpos = Substring_overlap_segment_trimmed(hit5->substring1,hit3->substring0)) != 0) {
+ return common_genomicpos;
+ } else if (hit5->substring2 != NULL &&
+ (common_genomicpos = Substring_overlap_segment_trimmed(hit5->substring2,hit3->substring0)) != 0) {
+ return common_genomicpos;
+ } else if (hit5->substring0 != NULL &&
+ (common_genomicpos = Substring_overlap_segment_trimmed(hit5->substring0,hit3->substring0)) != 0) {
+ return common_genomicpos;
}
}
@@ -2021,10 +2091,12 @@ pair_insert_length_trimmed (Stage3end_T hit5, Stage3end_T hit3, int hit5_trimmed
} else if (hit5->plusp == true && hit3->plusp == false) {
/* plus/minus */
- start5 = hit5->genomicstart + hit5->trim_left + hit5->start_amb_nmatches;
- end5 = hit5->genomicend - hit5->trim_right - hit5->end_amb_nmatches;
- start3 = hit3->genomicstart - hit3->trim_left - hit3->start_amb_nmatches;
- end3 = hit3->genomicend + hit3->trim_right + hit3->end_amb_nmatches;
+ debug13(printf("Computing overlap using substrings plus/minus\n"));
+
+ start5 = hit5->genomicstart + hit5->trim_left + hit5->start_amb_length;
+ end5 = hit5->genomicend - hit5->trim_right - hit5->end_amb_length;
+ start3 = hit3->genomicstart - hit3->trim_left - hit3->start_amb_length;
+ end3 = hit3->genomicend + hit3->trim_right + hit3->end_amb_length;
if (start3 < start5) {
/* Case 1 */
@@ -2037,27 +2109,21 @@ pair_insert_length_trimmed (Stage3end_T hit5, Stage3end_T hit3, int hit5_trimmed
/* Case 2: Tails overlap. Go from start5 to start3 */
debug13(printf("plus case 2a: start5 %u\n",start5 - hit5->chroffset));
if (Substring_overlap_point_trimmed_p(hit3->substring0,start5)) {
- return (Substring_alignstart_trim(hit3->substring0) - start5)
- + hit3->querylength - Substring_queryend(hit3->substring0) + hit3->trim_right + hit3->end_amb_nmatches;
+ return start5;
} else if (Substring_overlap_point_trimmed_p(hit3->substring1,start5)) {
- return (Substring_alignstart_trim(hit3->substring1) - start5)
- + hit3->querylength - Substring_queryend(hit3->substring1) + hit3->trim_right + hit3->end_amb_nmatches;
+ return start5;
} else if (Substring_overlap_point_trimmed_p(hit3->substring2,start5)) {
- return (Substring_alignstart_trim(hit3->substring2) - start5)
- /* + hit3->querylength - Substring_queryend(hit3->substring2) + hit3->trim_right + hit3->end_amb_nmatches */;
+ return start5;
}
/* Case 2: Tails overlap. Go from start5 to start3 */
debug13(printf("plus case 2b: start3 %u\n",start3 - hit3->chroffset));
if (Substring_overlap_point_trimmed_p(hit5->substring2,start3)) {
- return Substring_querystart(hit5->substring2) - hit5->trim_left - hit5->start_amb_nmatches +
- (start3 - Substring_alignstart_trim(hit5->substring2));
+ return start3;
} else if (Substring_overlap_point_trimmed_p(hit5->substring1,start3)) {
- return Substring_querystart(hit5->substring1) - hit5->trim_left - hit5->start_amb_nmatches +
- (start3 - Substring_alignstart_trim(hit5->substring1));
+ return start3;
} else if (Substring_overlap_point_trimmed_p(hit5->substring0,start3)) {
- return /* Substring_querystart(hit5->substring0) - hit5->trim_left - hit5->start_amb_nmatches + */
- (start3 - Substring_alignstart_trim(hit5->substring0));
+ return start3;
}
/* Fall through to general algorithm */
@@ -2065,14 +2131,11 @@ pair_insert_length_trimmed (Stage3end_T hit5, Stage3end_T hit3, int hit5_trimmed
/* Case 3: hit3 subsumes hit5 */
debug13(printf("plus case 3\n"));
if (Substring_overlap_point_trimmed_p(hit3->substring2,end5)) {
- return hit5_trimmed_length + (Substring_alignstart_trim(hit3->substring2) - end5)
- /* + hit3->querylength - Substring_queryend(hit3->substring2) + hit3->trim_right + hit3->end_amb_nmatches */;
+ return end5;
} else if (Substring_overlap_point_trimmed_p(hit3->substring1,end5)) {
- return hit5_trimmed_length + (Substring_alignstart_trim(hit3->substring1) - end5)
- + hit3->querylength - Substring_queryend(hit3->substring1) + hit3->trim_right + hit3->end_amb_nmatches;
+ return end5;
} else if (Substring_overlap_point_trimmed_p(hit3->substring0,end5)) {
- return hit5_trimmed_length + (Substring_alignstart_trim(hit3->substring0) - end5)
- + hit3->querylength - Substring_queryend(hit3->substring0) + hit3->trim_right + hit3->end_amb_nmatches;
+ return end5;
}
/* Fall through to general algorithm */
}
@@ -2082,14 +2145,11 @@ pair_insert_length_trimmed (Stage3end_T hit5, Stage3end_T hit3, int hit5_trimmed
/* Case 4: hit5 subsumes hit3 */
debug13(printf("plus case 4\n"));
if (Substring_overlap_point_trimmed_p(hit5->substring0,end3)) {
- return /* (Substring_querystart(hit5->substring0) - hit5->trim_left - hit5->start_amb_nmatches) + */
- (end3 - Substring_alignstart_trim(hit5->substring0)) + hit3_trimmed_length;
+ return end3;
} else if (Substring_overlap_point_trimmed_p(hit5->substring1,end3)) {
- return (Substring_querystart(hit5->substring1) - hit5->trim_left - hit5->start_amb_nmatches) +
- (end3 - Substring_alignstart_trim(hit5->substring1)) + hit3_trimmed_length;
+ return end3;
} else if (Substring_overlap_point_trimmed_p(hit5->substring2,end3)) {
- return (Substring_querystart(hit5->substring2) - hit5->trim_left - hit5->start_amb_nmatches) +
- (end3 - Substring_alignstart_trim(hit5->substring2)) + hit3_trimmed_length;
+ return end3;
}
/* Fall through to general algorithm */
@@ -2097,27 +2157,21 @@ pair_insert_length_trimmed (Stage3end_T hit5, Stage3end_T hit3, int hit5_trimmed
/* Case 5: Based on hit3_trimmed_length */
debug13(printf("plus case 5a\n"));
if (Substring_overlap_point_trimmed_p(hit5->substring0,end3)) {
- return /* (Substring_querystart(hit5->substring0) - hit5->trim_left - hit5->start_amb_nmatches) + */
- (end3 - Substring_alignstart_trim(hit5->substring0)) + hit3_trimmed_length;
+ return end3;
} else if (Substring_overlap_point_trimmed_p(hit5->substring1,end3)) {
- return (Substring_querystart(hit5->substring1) - hit5->trim_left - hit5->start_amb_nmatches) +
- (end3 - Substring_alignstart_trim(hit5->substring1)) + hit3_trimmed_length;
+ return end3;
} else if (Substring_overlap_point_trimmed_p(hit5->substring2,end3)) {
- return (Substring_querystart(hit5->substring2) - hit5->trim_left - hit5->start_amb_nmatches) +
- (end3 - Substring_alignstart_trim(hit5->substring2)) + hit3_trimmed_length;
+ return end3;
}
/* Case 5: Based on hit5_trimmed_length */
debug13(printf("plus case 5b\n"));
if (Substring_overlap_point_trimmed_p(hit3->substring2,end5)) {
- return /* hit3->querylength - Substring_queryend(hit3->substring2) + hit3->trim_right + hit3->end_amb_nmatches + */
- (Substring_alignstart_trim(hit3->substring2) - end5) + hit5_trimmed_length;
+ return end5;
} else if (Substring_overlap_point_trimmed_p(hit3->substring1,end5)) {
- return hit3->querylength - Substring_queryend(hit3->substring1) + hit3->trim_right + hit3->end_amb_nmatches +
- (Substring_alignstart_trim(hit3->substring1) - end5) + hit5_trimmed_length;
+ return end5;
} else if (Substring_overlap_point_trimmed_p(hit3->substring0,end5)) {
- return hit3->querylength - Substring_queryend(hit3->substring0) + hit3->trim_right + hit3->end_amb_nmatches +
- (Substring_alignstart_trim(hit3->substring0) - end5) + hit5_trimmed_length;
+ return end5;
}
/* Fall through to general algorithm */
}
@@ -2125,62 +2179,52 @@ pair_insert_length_trimmed (Stage3end_T hit5, Stage3end_T hit3, int hit5_trimmed
/* General algorithm */
debug13(printf("plus general: hit3->substring1\n"));
- if (Substring_overlap_segment_trimmed_p(hit5->substring1,hit3->substring1)) {
- return (Substring_querystart(hit5->substring1) - hit5->trim_left - hit5->start_amb_nmatches) +
- (Substring_alignstart_trim(hit3->substring1) - Substring_alignstart_trim(hit5->substring1))
- + hit3->querylength - Substring_queryend(hit3->substring1) + hit3->trim_right + hit3->end_amb_nmatches;
- } else if (hit5->substring2 != NULL && Substring_overlap_segment_trimmed_p(hit5->substring2,hit3->substring1)) {
- return (Substring_querystart(hit5->substring2) - hit5->trim_left - hit5->start_amb_nmatches) +
- (Substring_alignstart_trim(hit3->substring1) - Substring_alignstart_trim(hit5->substring2))
- + hit3->querylength - Substring_queryend(hit3->substring1) + hit3->trim_right + hit3->end_amb_nmatches;
- } else if (hit5->substring0 != NULL && Substring_overlap_segment_trimmed_p(hit5->substring0,hit3->substring1)) {
- return /* (Substring_querystart(hit5->substring0) - hit5->trim_left - hit5->start_amb_nmatches) + */
- (Substring_alignstart_trim(hit3->substring1) - Substring_alignstart_trim(hit5->substring0))
- + hit3->querylength - Substring_queryend(hit3->substring1) + hit3->trim_right + hit3->end_amb_nmatches;
+ if ((common_genomicpos = Substring_overlap_segment_trimmed(hit5->substring1,hit3->substring1)) != 0) {
+ return common_genomicpos;
+ } else if (hit5->substring2 != NULL &&
+ (common_genomicpos = Substring_overlap_segment_trimmed(hit5->substring2,hit3->substring1)) != 0) {
+ return common_genomicpos;
+ } else if (hit5->substring0 != NULL &&
+ (common_genomicpos = Substring_overlap_segment_trimmed(hit5->substring0,hit3->substring1)) != 0) {
+ return common_genomicpos;
}
if (hit3->substring2 != NULL) {
debug13(printf("plus general: hit3->substring2\n"));
- if (Substring_overlap_segment_trimmed_p(hit5->substring1,hit3->substring2)) {
- return (Substring_querystart(hit5->substring1) - hit5->trim_left - hit5->start_amb_nmatches) +
- (Substring_alignstart_trim(hit3->substring2) - Substring_alignstart_trim(hit5->substring1))
- /* + hit3->querylength - Substring_queryend(hit3->substring2) + hit3->trim_right + hit3->end_amb_nmatches */;
- } else if (hit5->substring2 != NULL && Substring_overlap_segment_trimmed_p(hit5->substring2,hit3->substring2)) {
- return (Substring_querystart(hit5->substring2) - hit5->trim_left - hit5->start_amb_nmatches) +
- (Substring_alignstart_trim(hit3->substring2) - Substring_alignstart_trim(hit5->substring2))
- /* + hit3->querylength - Substring_queryend(hit3->substring2) + hit3->trim_right + hit3->end_amb_nmatches */;
- } else if (hit5->substring0 != NULL && Substring_overlap_segment_trimmed_p(hit5->substring0,hit3->substring2)) {
- return /* (Substring_querystart(hit5->substring0) - hit5->trim_left - hit5->start_amb_nmatches) + */
- (Substring_alignstart_trim(hit3->substring2) - Substring_alignstart_trim(hit5->substring0))
- /* + hit3->querylength - Substring_queryend(hit3->substring2) + hit3->trim_right + hit3->end_amb_nmatches */;
+ if ((common_genomicpos = Substring_overlap_segment_trimmed(hit5->substring1,hit3->substring2)) != 0) {
+ return common_genomicpos;
+ } else if (hit5->substring2 != NULL &&
+ (common_genomicpos = Substring_overlap_segment_trimmed(hit5->substring2,hit3->substring2)) != 0) {
+ return common_genomicpos;
+ } else if (hit5->substring0 != NULL &&
+ (common_genomicpos = Substring_overlap_segment_trimmed(hit5->substring0,hit3->substring2)) != 0) {
+ return common_genomicpos;
}
}
if (hit3->substring0 != NULL) {
debug13(printf("plus general: hit3->substring0\n"));
- if (Substring_overlap_segment_trimmed_p(hit5->substring1,hit3->substring0)) {
- return (Substring_querystart(hit5->substring1) - hit5->trim_left - hit5->start_amb_nmatches) +
- (Substring_alignstart_trim(hit3->substring0) - Substring_alignstart_trim(hit5->substring1))
- + hit3->querylength - Substring_queryend(hit3->substring0) + hit3->trim_right + hit3->end_amb_nmatches;
- } else if (hit5->substring2 != NULL && Substring_overlap_segment_trimmed_p(hit5->substring2,hit3->substring0)) {
- return (Substring_querystart(hit5->substring2) - hit5->trim_left - hit5->start_amb_nmatches) +
- (Substring_alignstart_trim(hit3->substring0) - Substring_alignstart_trim(hit5->substring2))
- + hit3->querylength - Substring_queryend(hit3->substring0) + hit3->trim_right + hit3->end_amb_nmatches;
- } else if (hit5->substring0 != NULL && Substring_overlap_segment_trimmed_p(hit5->substring0,hit3->substring0)) {
- return /* (Substring_querystart(hit5->substring0) - hit5->trim_left - hit5->start_amb_nmatches) + */
- (Substring_alignstart_trim(hit3->substring0) - Substring_alignstart_trim(hit5->substring0))
- + hit3->querylength - Substring_queryend(hit3->substring0) + hit3->trim_right + hit3->end_amb_nmatches;
+ if ((common_genomicpos = Substring_overlap_segment_trimmed(hit5->substring1,hit3->substring0)) != 0) {
+ return common_genomicpos;
+ } else if (hit5->substring2 != NULL &&
+ (common_genomicpos = Substring_overlap_segment_trimmed(hit5->substring2,hit3->substring0)) != 0) {
+ return common_genomicpos;
+ } else if (hit5->substring0 != NULL &&
+ (common_genomicpos = Substring_overlap_segment_trimmed(hit5->substring0,hit3->substring0)) != 0) {
+ return common_genomicpos;
}
}
- return 0;
+ return 0U;
} else if (hit5->plusp == false && hit3->plusp == true) {
/* minus/plus */
- start5 = hit5->genomicstart - hit5->trim_left - hit5->start_amb_nmatches;
- end5 = hit5->genomicend + hit5->trim_right + hit5->end_amb_nmatches;
- start3 = hit3->genomicstart + hit3->trim_left + hit3->start_amb_nmatches;
- end3 = hit3->genomicend - hit3->trim_right - hit3->end_amb_nmatches;
+ debug13(printf("Computing overlap using substrings minus/plus\n"));
+
+ start5 = hit5->genomicstart - hit5->trim_left - hit5->start_amb_length;
+ end5 = hit5->genomicend + hit5->trim_right + hit5->end_amb_length;
+ start3 = hit3->genomicstart + hit3->trim_left + hit3->start_amb_length;
+ end3 = hit3->genomicend - hit3->trim_right - hit3->end_amb_length;
if (end3 < end5) {
/* Case 1 */
@@ -2193,27 +2237,21 @@ pair_insert_length_trimmed (Stage3end_T hit5, Stage3end_T hit3, int hit5_trimmed
/* Case 2: Tails overlap. Go from end5 to end3 */
debug13(printf("plus case 2a: end5 %u\n",end5 - hit5->chroffset));
if (Substring_overlap_point_trimmed_p(hit3->substring0,end5)) {
- return (Substring_alignend_trim(hit3->substring0) - end5)
- + hit3->querylength - Substring_queryend(hit3->substring0) - hit3->trim_right - hit3->end_amb_nmatches;
+ return end5;
} else if (Substring_overlap_point_trimmed_p(hit3->substring1,end5)) {
- return (Substring_alignend_trim(hit3->substring1) - end5)
- + hit3->querylength - Substring_queryend(hit3->substring1) - hit3->trim_right - hit3->end_amb_nmatches;
+ return end5;
} else if (Substring_overlap_point_trimmed_p(hit3->substring2,end5)) {
- return (Substring_alignend_trim(hit3->substring2) - end5)
- /* + hit3->querylength - Substring_queryend(hit3->substring2) - hit3->trim_right - hit3->end_amb_nmatches */;
+ return end5;
}
/* Case 2: Tails overlap. Go from end5 to end3 */
debug13(printf("plus case 2b: end3 %u\n",end3 - hit3->chroffset));
if (Substring_overlap_point_trimmed_p(hit5->substring2,end3)) {
- return Substring_queryend(hit5->substring2) + hit5->trim_left + hit5->start_amb_nmatches +
- (end3 - Substring_alignend_trim(hit5->substring2));
+ return end3;
} else if (Substring_overlap_point_trimmed_p(hit5->substring1,end3)) {
- return Substring_queryend(hit5->substring1) + hit5->trim_left + hit5->start_amb_nmatches +
- (end3 - Substring_alignend_trim(hit5->substring1));
+ return end3;
} else if (Substring_overlap_point_trimmed_p(hit5->substring0,end3)) {
- return /* Substring_querystart(hit5->substring0) + hit5->trim_left + hit5->start_amb_nmatches + */
- (end3 - Substring_alignend_trim(hit5->substring0));
+ return end3;
}
/* Fall through to general algorithm */
@@ -2221,14 +2259,11 @@ pair_insert_length_trimmed (Stage3end_T hit5, Stage3end_T hit3, int hit5_trimmed
/* Case 3: hit3 subsumes hit5 */
debug13(printf("plus case 3\n"));
if (Substring_overlap_point_trimmed_p(hit3->substring2,start5)) {
- return hit5_trimmed_length + (Substring_alignend_trim(hit3->substring2) - start5)
- /* + hit3->querylength - Substring_queryend(hit3->substring2) - hit3->trim_right - hit3->end_amb_nmatches */;
+ return start5;
} else if (Substring_overlap_point_trimmed_p(hit3->substring1,start5)) {
- return hit5_trimmed_length + (Substring_alignend_trim(hit3->substring1) - start5)
- + hit3->querylength - Substring_queryend(hit3->substring1) - hit3->trim_right - hit3->end_amb_nmatches;
+ return start5;
} else if (Substring_overlap_point_trimmed_p(hit3->substring0,start5)) {
- return hit5_trimmed_length + (Substring_alignend_trim(hit3->substring0) - start5)
- + hit3->querylength - Substring_queryend(hit3->substring0) - hit3->trim_right - hit3->end_amb_nmatches;
+ return start5;
}
/* Fall through to general algorithm */
}
@@ -2238,14 +2273,11 @@ pair_insert_length_trimmed (Stage3end_T hit5, Stage3end_T hit3, int hit5_trimmed
/* Case 4: hit5 subsumes hit3 */
debug13(printf("plus case 4\n"));
if (Substring_overlap_point_trimmed_p(hit5->substring0,start3)) {
- return /* (Substring_querystart(hit5->substring0) + hit5->trim_left + hit5->start_amb_nmatches) + */
- (start3 - Substring_alignend_trim(hit5->substring0)) + hit3_trimmed_length;
+ return start3;
} else if (Substring_overlap_point_trimmed_p(hit5->substring1,start3)) {
- return (Substring_querystart(hit5->substring1) + hit5->trim_left + hit5->start_amb_nmatches) +
- (start3 - Substring_alignend_trim(hit5->substring1)) + hit3_trimmed_length;
+ return start3;
} else if (Substring_overlap_point_trimmed_p(hit5->substring2,start3)) {
- return (Substring_querystart(hit5->substring2) + hit5->trim_left + hit5->start_amb_nmatches) +
- (start3 - Substring_alignend_trim(hit5->substring2)) + hit3_trimmed_length;
+ return start3;
}
/* Fall through to general algorithm */
@@ -2253,27 +2285,21 @@ pair_insert_length_trimmed (Stage3end_T hit5, Stage3end_T hit3, int hit5_trimmed
/* Case 5: Based on hit3_trimmed_length */
debug13(printf("plus case 5a\n"));
if (Substring_overlap_point_trimmed_p(hit5->substring0,start3)) {
- return /* (Substring_querystart(hit5->substring0) + hit5->trim_left + hit5->start_amb_nmatches) + */
- (start3 - Substring_alignend_trim(hit5->substring0)) + hit3_trimmed_length;
+ return start3;
} else if (Substring_overlap_point_trimmed_p(hit5->substring1,start3)) {
- return (Substring_querystart(hit5->substring1) + hit5->trim_left + hit5->start_amb_nmatches) +
- (start3 - Substring_alignend_trim(hit5->substring1)) + hit3_trimmed_length;
+ return start3;
} else if (Substring_overlap_point_trimmed_p(hit5->substring2,start3)) {
- return (Substring_querystart(hit5->substring2) + hit5->trim_left + hit5->start_amb_nmatches) +
- (start3 - Substring_alignend_trim(hit5->substring2)) + hit3_trimmed_length;
+ return start3;
}
/* Case 5: Based on hit5_trimmed_length */
debug13(printf("plus case 5b\n"));
if (Substring_overlap_point_trimmed_p(hit3->substring2,start5)) {
- return /* hit3->querylength - Substring_queryend(hit3->substring2) - hit3->trim_right - hit3->end_amb_nmatches + */
- (Substring_alignend_trim(hit3->substring2) - start5) + hit5_trimmed_length;
+ return start5;
} else if (Substring_overlap_point_trimmed_p(hit3->substring1,start5)) {
- return hit3->querylength - Substring_queryend(hit3->substring1) - hit3->trim_right - hit3->end_amb_nmatches +
- (Substring_alignend_trim(hit3->substring1) - start5) + hit5_trimmed_length;
+ return start5;
} else if (Substring_overlap_point_trimmed_p(hit3->substring0,start5)) {
- return hit3->querylength - Substring_queryend(hit3->substring0) - hit3->trim_right - hit3->end_amb_nmatches +
- (Substring_alignend_trim(hit3->substring0) - start5) + hit5_trimmed_length;
+ return start5;
}
/* Fall through to general algorithm */
}
@@ -2281,51 +2307,39 @@ pair_insert_length_trimmed (Stage3end_T hit5, Stage3end_T hit3, int hit5_trimmed
/* General algorithm */
debug13(printf("plus general: hit3->substring1\n"));
- if (Substring_overlap_segment_trimmed_p(hit5->substring1,hit3->substring1)) {
- return (Substring_querystart(hit5->substring1) + hit5->trim_left + hit5->start_amb_nmatches) +
- (Substring_alignend_trim(hit3->substring1) - Substring_alignend_trim(hit5->substring1))
- + hit3->querylength - Substring_queryend(hit3->substring1) - hit3->trim_right - hit3->end_amb_nmatches;
- } else if (hit5->substring2 != NULL && Substring_overlap_segment_trimmed_p(hit5->substring2,hit3->substring1)) {
- return (Substring_queryend(hit5->substring2) + hit5->trim_left + hit5->start_amb_nmatches) +
- (Substring_alignend_trim(hit3->substring1) - Substring_alignend_trim(hit5->substring2))
- + hit3->querylength - Substring_queryend(hit3->substring1) - hit3->trim_right - hit3->end_amb_nmatches;
- } else if (hit5->substring0 != NULL && Substring_overlap_segment_trimmed_p(hit5->substring0,hit3->substring1)) {
- return /* (Substring_querystart(hit5->substring0) + hit5->trim_left + hit5->start_amb_nmatches) + */
- (Substring_alignend_trim(hit3->substring1) - Substring_alignend_trim(hit5->substring0))
- + hit3->querylength - Substring_queryend(hit3->substring1) - hit3->trim_right - hit3->end_amb_nmatches;
+ if ((common_genomicpos = Substring_overlap_segment_trimmed(hit5->substring1,hit3->substring1)) != 0) {
+ return common_genomicpos;
+ } else if (hit5->substring2 != NULL &&
+ (common_genomicpos = Substring_overlap_segment_trimmed(hit5->substring2,hit3->substring1)) != 0) {
+ return common_genomicpos;
+ } else if (hit5->substring0 != NULL &&
+ (common_genomicpos = Substring_overlap_segment_trimmed(hit5->substring0,hit3->substring1)) != 0) {
+ return common_genomicpos;
}
if (hit3->substring2 != NULL) {
debug13(printf("plus general: hit3->substring2\n"));
- if (Substring_overlap_segment_trimmed_p(hit5->substring1,hit3->substring2)) {
- return (Substring_querystart(hit5->substring1) + hit5->trim_left + hit5->start_amb_nmatches) +
- (Substring_alignend_trim(hit3->substring2) - Substring_alignend_trim(hit5->substring1))
- /* + hit3->querylength - Substring_queryend(hit3->substring2) - hit3->trim_right - hit3->end_amb_nmatches */;
- } else if (hit5->substring2 != NULL && Substring_overlap_segment_trimmed_p(hit5->substring2,hit3->substring2)) {
- return (Substring_querystart(hit5->substring2) + hit5->trim_left + hit5->start_amb_nmatches) +
- (Substring_alignend_trim(hit3->substring2) - Substring_alignend_trim(hit5->substring2))
- /* + hit3->querylength - Substring_queryend(hit3->substring2) - hit3->trim_right - hit3->end_amb_nmatches */;
- } else if (hit5->substring0 != NULL && Substring_overlap_segment_trimmed_p(hit5->substring0,hit3->substring2)) {
- return /* (Substring_querystart(hit5->substring0) + hit5->trim_left + hit5->start_amb_nmatches) + */
- (Substring_alignend_trim(hit3->substring2) - Substring_alignend_trim(hit5->substring0))
- /* + hit3->querylength - Substring_queryend(hit3->substring2) - hit3->trim_right - hit3->end_amb_nmatches */;
+ if ((common_genomicpos = Substring_overlap_segment_trimmed(hit5->substring1,hit3->substring2)) != 0) {
+ return common_genomicpos;
+ } else if (hit5->substring2 != NULL &&
+ (common_genomicpos = Substring_overlap_segment_trimmed(hit5->substring2,hit3->substring2)) != 0) {
+ return common_genomicpos;
+ } else if (hit5->substring0 != NULL &&
+ (common_genomicpos = Substring_overlap_segment_trimmed(hit5->substring0,hit3->substring2)) != 0) {
+ return common_genomicpos;
}
}
if (hit3->substring0 != NULL) {
debug13(printf("plus general: hit3->substring0\n"));
- if (Substring_overlap_segment_trimmed_p(hit5->substring1,hit3->substring0)) {
- return (Substring_querystart(hit5->substring1) + hit5->trim_left + hit5->start_amb_nmatches) +
- (Substring_alignend_trim(hit3->substring0) - Substring_alignend_trim(hit5->substring1))
- + hit3->querylength - Substring_queryend(hit3->substring0) - hit3->trim_right - hit3->end_amb_nmatches;
- } else if (hit5->substring2 != NULL && Substring_overlap_segment_trimmed_p(hit5->substring2,hit3->substring0)) {
- return (Substring_querystart(hit5->substring2) + hit5->trim_left + hit5->start_amb_nmatches) +
- (Substring_alignend_trim(hit3->substring0) - Substring_alignend_trim(hit5->substring2))
- + hit3->querylength - Substring_queryend(hit3->substring0) - hit3->trim_right - hit3->end_amb_nmatches;
- } else if (hit5->substring0 != NULL && Substring_overlap_segment_trimmed_p(hit5->substring0,hit3->substring0)) {
- return /* (Substring_querystart(hit5->substring0) + hit5->trim_left + hit5->start_amb_nmatches) + */
- (Substring_alignend_trim(hit3->substring0) - Substring_alignend_trim(hit5->substring0))
- + hit3->querylength - Substring_queryend(hit3->substring0) - hit3->trim_right - hit3->end_amb_nmatches;
+ if ((common_genomicpos = Substring_overlap_segment_trimmed(hit5->substring1,hit3->substring0)) != 0) {
+ return common_genomicpos;
+ } else if (hit5->substring2 != NULL &&
+ (common_genomicpos = Substring_overlap_segment_trimmed(hit5->substring2,hit3->substring0)) != 0) {
+ return common_genomicpos;
+ } else if (hit5->substring0 != NULL &&
+ (common_genomicpos = Substring_overlap_segment_trimmed(hit5->substring0,hit3->substring0)) != 0) {
+ return common_genomicpos;
}
}
@@ -2333,10 +2347,12 @@ pair_insert_length_trimmed (Stage3end_T hit5, Stage3end_T hit3, int hit5_trimmed
} else if (hit5->plusp == false && hit3->plusp == false) {
/* minus/minus */
- start5 = hit5->genomicstart - hit5->trim_left - hit5->start_amb_nmatches;
- end5 = hit5->genomicend + hit5->trim_right + hit5->end_amb_nmatches;
- start3 = hit3->genomicstart - hit3->trim_left - hit3->start_amb_nmatches;
- end3 = hit3->genomicend + hit3->trim_right + hit3->end_amb_nmatches;
+ debug13(printf("Computing overlap using substrings minus/minus\n"));
+
+ start5 = hit5->genomicstart - hit5->trim_left - hit5->start_amb_length;
+ end5 = hit5->genomicend + hit5->trim_right + hit5->end_amb_length;
+ start3 = hit3->genomicstart - hit3->trim_left - hit3->start_amb_length;
+ end3 = hit3->genomicend + hit3->trim_right + hit3->end_amb_length;
if (end3 > start5) {
/* Case 1 */
@@ -2347,44 +2363,38 @@ pair_insert_length_trimmed (Stage3end_T hit5, Stage3end_T hit3, int hit5_trimmed
} else if (start3 > start5) {
if (end3 > end5) {
/* Case 2: Tails overlap. Go from start5 to end3 */
- debug13(printf("plus case 2a: start5 %u\n",start5 - hit5->chroffset));
+ debug13(printf("minus/minus case 2a: start5 %llu (%u)\n",start5,start5 - hit5->chroffset));
if (Substring_overlap_point_trimmed_p(hit3->substring0,start5)) {
- return (start5 - Substring_alignend_trim(hit3->substring0))
- + hit3->querylength - Substring_queryend(hit3->substring0) - hit3->trim_right - hit3->end_amb_nmatches;
+ debug13(printf("Success on hit3->substring0\n"));
+ return start5;
} else if (Substring_overlap_point_trimmed_p(hit3->substring1,start5)) {
- return (start5 - Substring_alignend_trim(hit3->substring1))
- + hit3->querylength - Substring_queryend(hit3->substring1) - hit3->trim_right - hit3->end_amb_nmatches;
+ debug13(printf("Success on hit3->substring1\n"));
+ return start5;
} else if (Substring_overlap_point_trimmed_p(hit3->substring2,start5)) {
- return (start5 - Substring_alignend_trim(hit3->substring2))
- /* + hit3->querylength - Substring_queryend(hit3->substring2) - hit3->trim_right - hit3->end_amb_nmatches */;
+ debug13(printf("Success on hit3->substring2\n"));
+ return start5;
}
/* Case 2: Tails overlap. Go from start5 to end3 */
debug13(printf("plus case 2b: end3 %u\n",end3 - hit3->chroffset));
if (Substring_overlap_point_trimmed_p(hit5->substring2,end3)) {
- return Substring_querystart(hit5->substring2) - hit5->trim_left - hit5->start_amb_nmatches +
- (Substring_alignstart_trim(hit5->substring2) - end3);
+ return end3;
} else if (Substring_overlap_point_trimmed_p(hit5->substring1,end3)) {
- return Substring_querystart(hit5->substring1) - hit5->trim_left - hit5->start_amb_nmatches +
- (Substring_alignstart_trim(hit5->substring1) - end3);
+ return end3;
} else if (Substring_overlap_point_trimmed_p(hit5->substring0,end3)) {
- return /* Substring_querystart(hit5->substring0) - hit5->trim_left - hit5->start_amb_nmatches + */
- (Substring_alignstart_trim(hit5->substring0) - end3);
+ return end3;
}
/* Fall through to general algorithm */
} else {
/* Case 3: hit3 subsumes hit5 */
- debug13(printf("minus case 3: end5 %u\n",end5 - hit5->chroffset));
+ debug13(printf("minus/minus case 3: end5 %u\n",end5 - hit5->chroffset));
if (Substring_overlap_point_trimmed_p(hit3->substring2,end5)) {
- return hit5_trimmed_length + (end5 - Substring_alignend_trim(hit3->substring2))
- /* + hit3->querylength - Substring_queryend(hit3->substring2) - hit3->trim_right - hit3->end_amb_nmatches */;
+ return end5;
} else if (Substring_overlap_point_trimmed_p(hit3->substring1,end5)) {
- return hit5_trimmed_length + (end5 - Substring_alignend_trim(hit3->substring1))
- + hit3->querylength - Substring_queryend(hit3->substring1) - hit3->trim_right - hit3->end_amb_nmatches;
+ return end5;
} else if (Substring_overlap_point_trimmed_p(hit3->substring0,end5)) {
- return hit5_trimmed_length + (end5 - Substring_alignend_trim(hit3->substring0))
- + hit3->querylength - Substring_queryend(hit3->substring0) - hit3->trim_right - hit3->end_amb_nmatches;
+ return end5;
}
/* Fall through to general algorithm */
}
@@ -2392,16 +2402,13 @@ pair_insert_length_trimmed (Stage3end_T hit5, Stage3end_T hit3, int hit5_trimmed
} else {
if (end3 > end5) {
/* Case 4: hit5 subsumes hit3 */
- debug13(printf("minus case 4\n"));
+ debug13(printf("minus/minus case 4: start3 %u\n",(Chrpos_T) (start3 - hit3->chroffset)));
if (Substring_overlap_point_trimmed_p(hit5->substring0,start3)) {
- return /* (Substring_querystart(hit5->substring0) - hit5->trim_left - hit5->start_amb_nmatches) + */
- (Substring_alignstart_trim(hit5->substring0) - start3) + hit3_trimmed_length;
+ return start3;
} else if (Substring_overlap_point_trimmed_p(hit5->substring1,start3)) {
- return (Substring_querystart(hit5->substring1) - hit5->trim_left - hit5->start_amb_nmatches) +
- (Substring_alignstart_trim(hit5->substring1) - start3) + hit3_trimmed_length;
+ return start3;
} else if (Substring_overlap_point_trimmed_p(hit5->substring2,start3)) {
- return (Substring_querystart(hit5->substring2) - hit5->trim_left - hit5->start_amb_nmatches) +
- (Substring_alignstart_trim(hit5->substring2) - start3) + hit3_trimmed_length;
+ return start3;
}
/* Fall through to general algorithm */
@@ -2409,79 +2416,61 @@ pair_insert_length_trimmed (Stage3end_T hit5, Stage3end_T hit3, int hit5_trimmed
/* Case 5: Based on hit3_trimmed_length */
debug13(printf("minus case 5a: start3 %u\n",start3 - hit3->chroffset));
if (Substring_overlap_point_trimmed_p(hit5->substring0,start3)) {
- return /* (Substring_querystart(hit5->substring0) - hit5->trim_left - hit5->start_amb_nmatches) + */
- (Substring_alignstart_trim(hit5->substring0) - start3) + hit3_trimmed_length;
+ return start3;
} else if (Substring_overlap_point_trimmed_p(hit5->substring1,start3)) {
- return (Substring_querystart(hit5->substring1) - hit5->trim_left - hit5->start_amb_nmatches) +
- (Substring_alignstart_trim(hit5->substring1) - start3) + hit3_trimmed_length;
+ return start3;
} else if (Substring_overlap_point_trimmed_p(hit5->substring2,start3)) {
- return (Substring_querystart(hit5->substring2) - hit5->trim_left - hit5->start_amb_nmatches) +
- (Substring_alignstart_trim(hit5->substring2) - start3) + hit3_trimmed_length;
+ return start3;
}
/* Case 5: Based on hit5_trimmed_length */
debug13(printf("minus case 5b: end5 %u\n",end5 - hit5->chroffset));
if (Substring_overlap_point_trimmed_p(hit3->substring2,end5)) {
- return /* hit3->querylength - Substring_queryend(hit3->substring2) - hit3->trim_right - hit3->end_amb_nmatches + */
- (end5 - Substring_alignend_trim(hit3->substring2)) + hit5_trimmed_length;
+ return end5;
} else if (Substring_overlap_point_trimmed_p(hit3->substring1,end5)) {
- return hit3->querylength - Substring_queryend(hit3->substring1) - hit3->trim_right - hit3->end_amb_nmatches +
- (end5 - Substring_alignend_trim(hit3->substring1)) + hit5_trimmed_length;
+ return end5;
} else if (Substring_overlap_point_trimmed_p(hit3->substring0,end5)) {
- return hit3->querylength - Substring_queryend(hit3->substring0) - hit3->trim_right - hit3->end_amb_nmatches +
- (end5 - Substring_alignend_trim(hit3->substring0)) + hit5_trimmed_length;
+ return end5;
}
/* Fall through to general algorithm */
}
}
/* General algorithm */
- debug13(printf("minus general: hit3->substring1\n"));
- if (Substring_overlap_segment_trimmed_p(hit5->substring1,hit3->substring1)) {
- return (Substring_querystart(hit5->substring1) - hit5->trim_left - hit5->start_amb_nmatches) +
- (Substring_alignstart_trim(hit5->substring1) - Substring_alignend_trim(hit3->substring1))
- + hit3->querylength - Substring_queryend(hit3->substring1) - hit3->trim_right - hit3->end_amb_nmatches;
- } else if (hit5->substring2 != NULL && Substring_overlap_segment_trimmed_p(hit5->substring2,hit3->substring1)) {
- return (Substring_querystart(hit5->substring2) - hit5->trim_left - hit5->start_amb_nmatches) +
- (Substring_alignstart_trim(hit5->substring2) - Substring_alignend_trim(hit3->substring1))
- + hit3->querylength - Substring_queryend(hit3->substring1) - hit3->trim_right - hit3->end_amb_nmatches;
- } else if (hit5->substring0 != NULL && Substring_overlap_segment_trimmed_p(hit5->substring0,hit3->substring1)) {
- return /* (Substring_querystart(hit5->substring0) - hit5->trim_left - hit5->start_amb_nmatches) + */
- (Substring_alignstart_trim(hit5->substring0) - Substring_alignend_trim(hit3->substring1))
- + hit3->querylength - Substring_queryend(hit3->substring1) - hit3->trim_right - hit3->end_amb_nmatches;
+ debug13(printf("minus/minus general: hit3->substring1\n"));
+ if ((common_genomicpos = Substring_overlap_segment_trimmed(hit5->substring1,hit3->substring1)) != 0) {
+ return common_genomicpos;
+ } else if (hit5->substring2 != NULL &&
+ (common_genomicpos = Substring_overlap_segment_trimmed(hit5->substring2,hit3->substring1)) != 0) {
+ return common_genomicpos;
+ } else if (hit5->substring0 != NULL &&
+ (common_genomicpos = Substring_overlap_segment_trimmed(hit5->substring0,hit3->substring1)) != 0) {
+ return common_genomicpos;
}
if (hit3->substring2 != NULL) {
debug13(printf("minus general: hit3->substring2\n"));
- if (Substring_overlap_segment_trimmed_p(hit5->substring1,hit3->substring2)) {
- return (Substring_querystart(hit5->substring1) - hit5->trim_left - hit5->start_amb_nmatches) +
- (Substring_alignstart_trim(hit5->substring1) - Substring_alignend_trim(hit3->substring2))
- /* + hit3->querylength - Substring_queryend(hit3->substring2) - hit3->trim_right - hit3->end_amb_nmatches */;
- } else if (hit5->substring2 != NULL && Substring_overlap_segment_trimmed_p(hit5->substring2,hit3->substring2)) {
- return (Substring_querystart(hit5->substring2) - hit5->trim_left - hit5->start_amb_nmatches) +
- (Substring_alignstart_trim(hit5->substring2) - Substring_alignend_trim(hit3->substring2))
- /* + hit3->querylength - Substring_queryend(hit3->substring2) - hit3->trim_right - hit3->end_amb_nmatches */;
- } else if (hit5->substring0 != NULL && Substring_overlap_segment_trimmed_p(hit5->substring0,hit3->substring2)) {
- return /* (Substring_querystart(hit5->substring0) - hit5->trim_left - hit5->start_amb_nmatches) + */
- (Substring_alignstart_trim(hit5->substring0) - Substring_alignend_trim(hit3->substring2))
- /* + hit3->querylength - Substring_queryend(hit3->substring2) - hit3->trim_right - hit3->end_amb_nmatches */;
+ if ((common_genomicpos = Substring_overlap_segment_trimmed(hit5->substring1,hit3->substring2)) != 0) {
+ return common_genomicpos;
+ } else if (hit5->substring2 != NULL &&
+ (common_genomicpos = Substring_overlap_segment_trimmed(hit5->substring2,hit3->substring2)) != 0) {
+ return common_genomicpos;
+ } else if (hit5->substring0 != NULL &&
+ (common_genomicpos = Substring_overlap_segment_trimmed(hit5->substring0,hit3->substring2)) != 0) {
+ return common_genomicpos;
}
}
if (hit3->substring0 != NULL) {
debug13(printf("minus general: hit3->substring0\n"));
- if (Substring_overlap_segment_trimmed_p(hit5->substring1,hit3->substring0)) {
- return (Substring_querystart(hit5->substring1) - hit5->trim_left - hit5->start_amb_nmatches) +
- (Substring_alignstart_trim(hit5->substring1) - Substring_alignend_trim(hit3->substring0))
- + hit3->querylength - Substring_queryend(hit3->substring0) - hit3->trim_right - hit3->end_amb_nmatches;
- } else if (hit5->substring2 != NULL && Substring_overlap_segment_trimmed_p(hit5->substring2,hit3->substring0)) {
- return (Substring_querystart(hit5->substring2) - hit5->trim_left - hit5->start_amb_nmatches) +
- (Substring_alignstart_trim(hit5->substring2) - Substring_alignend_trim(hit3->substring0))
- + hit3->querylength - Substring_queryend(hit3->substring0) - hit3->trim_right - hit3->end_amb_nmatches;
- } else if (hit5->substring0 != NULL && Substring_overlap_segment_trimmed_p(hit5->substring0,hit3->substring0)) {
- return /* (Substring_querystart(hit5->substring0) - hit5->trim_left - hit5->start_amb_nmatches) + */
- (Substring_alignstart_trim(hit5->substring0) - Substring_alignend_trim(hit3->substring0))
- + hit3->querylength - Substring_queryend(hit3->substring0) - hit3->trim_right - hit3->end_amb_nmatches;
+ if ((common_genomicpos = Substring_overlap_segment_trimmed(hit5->substring1,hit3->substring0)) != 0) {
+ return common_genomicpos;
+ } else if (hit5->substring2 != NULL &&
+ (common_genomicpos = Substring_overlap_segment_trimmed(hit5->substring2,hit3->substring0)) != 0) {
+ return common_genomicpos;
+ } else if (hit5->substring0 != NULL &&
+ (common_genomicpos = Substring_overlap_segment_trimmed(hit5->substring0,hit3->substring0)) != 0) {
+ return common_genomicpos;
}
}
@@ -2495,13 +2484,23 @@ pair_insert_length_trimmed (Stage3end_T hit5, Stage3end_T hit3, int hit5_trimmed
+
+/* Note: Do not alter this->insertlength, which is used for SAM
+ output. The insertlength computed here is used only for performing
+ --clip-overlap or --merge-overlap */
int
-Stage3pair_overlap (int *hardclip5, int *hardclip3, Stage3pair_T this) {
+Stage3pair_overlap (int *hardclip5_low, int *hardclip5_high, int *hardclip3_low, int *hardclip3_high, Stage3pair_T this) {
Stage3end_T hit5, hit3;
- int totallength, insertlength, minlength;
+ int totallength, insertlength;
int overlap;
int clipdir;
int hit5_trimmed_length, hit3_trimmed_length;
+ int ilength53, ilength35, ilength5_low, ilength5_high, ilength3_low, ilength3_high;
+ int common_shift, common_left, common_right;
+ Univcoord_T common_genomicpos;
+
+
+ *hardclip5_low = *hardclip5_high = *hardclip3_low = *hardclip3_high = 0;
hit5 = this->hit5;
hit3 = this->hit3;
@@ -2514,196 +2513,205 @@ Stage3pair_overlap (int *hardclip5, int *hardclip3, Stage3pair_T this) {
return 0;
} else {
debug13(printf("hit5 trim_left %d + amb_start %d, trim_right %d + amb_end %d, hit3 trim_left %d + amb_start %d, trim_right %d + amb_end %d\n",
- hit5->trim_left,hit5->start_amb_nmatches,hit5->trim_right,hit5->end_amb_nmatches,
- hit3->trim_left,hit3->start_amb_nmatches,hit3->trim_right,hit3->end_amb_nmatches));
+ hit5->trim_left,hit5->start_amb_length,hit5->trim_right,hit5->end_amb_length,
+ hit3->trim_left,hit3->start_amb_length,hit3->trim_right,hit3->end_amb_length));
if (hit5->plusp == true) {
- hit5_trimmed_length = hit5->querylength - hit5->trim_left - hit5->trim_right - hit5->start_amb_nmatches - hit5->end_amb_nmatches;
- hit3_trimmed_length = hit3->querylength - hit3->trim_left - hit3->trim_right - hit3->start_amb_nmatches - hit3->end_amb_nmatches;
+ /* plus */
+ hit5_trimmed_length = hit5->querylength - hit5->trim_left - hit5->trim_right - hit5->start_amb_length - hit5->end_amb_length;
+ hit3_trimmed_length = hit3->querylength - hit3->trim_left - hit3->trim_right - hit3->start_amb_length - hit3->end_amb_length;
totallength = hit5_trimmed_length + hit3_trimmed_length;
debug13(printf("totallength = %d, hit5 trimmed length = %d, hit3 trimmed length = %d\n",
totallength,hit5_trimmed_length,hit3_trimmed_length));
-#if 0
- hit5_inside = (hit5->genomicend - 1) - hit5->trim_right - hit5->end_amb_nmatches;
- hit3_inside = hit3->genomicstart + hit3->trim_left + hit3->start_amb_nmatches; /* because hit3 is inverted */
- insertlength = (hit3_inside + hit3_trimmed_length - 1) - (hit5_inside - hit5_trimmed_length + 1) + 1;
-#endif
debug13(printf("original insertlength: %d, trim+amb5: %d..%d, trim+amb3: %d..%d\n",
- this->insertlength,hit5->trim_left + hit5->start_amb_nmatches,
- hit5->trim_right + hit5->end_amb_nmatches,hit3->trim_left + hit3->start_amb_nmatches,
- hit3->trim_right + hit3->end_amb_nmatches));
- if (hit5->hittype == GMAP || hit3->hittype == GMAP) {
- insertlength = this->insertlength = pair_insert_length_trimmed(hit5,hit3,hit5_trimmed_length,hit3_trimmed_length);
- } else {
- insertlength = pair_insert_length_trimmed(hit5,hit3,hit5_trimmed_length,hit3_trimmed_length);
- }
- debug13(printf(" => trimmed insertlength: %d\n",insertlength));
+ this->insertlength,hit5->trim_left + hit5->start_amb_length,
+ hit5->trim_right + hit5->end_amb_length,hit3->trim_left + hit3->start_amb_length,
+ hit3->trim_right + hit3->end_amb_length));
- if (insertlength == 0) {
- debug13(printf("Cannot determine an insert length, so returning 0\n"));
- return 0;
- } else if ((overlap = totallength - insertlength) < 0) {
- debug13(printf("Overlap %d is negative, so returning 0\n",overlap));
+ if ((common_genomicpos = pair_common_genomicpos(hit5,hit3)) == 0) {
+ debug13(printf("Cannot determine a common point, so returning 0\n"));
return 0;
- } else {
- debug13(printf("overlap = %d\n",overlap));
- }
-
- if (hit5_trimmed_length <= hit3_trimmed_length) {
- debug13(printf("Overlap is %d and hit5 is shortest\n",overlap));
- minlength = hit5_trimmed_length;
- if (insertlength < minlength) {
- /* Clip tails equally */
- *hardclip5 = insertlength/2;
- *hardclip3 = insertlength - (*hardclip5);
- *hardclip3 += hit3->trim_right + hit3->end_amb_nmatches;
- *hardclip5 += hit5->trim_left + hit5->start_amb_nmatches;
- debug13(printf("Clip tails with hardclip5 %d and hardclip3 %d\n",*hardclip5,*hardclip3));
- clipdir = -1;
- } else if (overlap > minlength) {
- /* Clip heads, but limit clipping on hit5 */
- *hardclip5 = hit5_trimmed_length/2;
- *hardclip3 = overlap - (*hardclip5);
- *hardclip3 += hit3->trim_left + hit3->start_amb_nmatches;
- *hardclip5 += hit5->trim_right + hit5->end_amb_nmatches;
- debug13(printf("Clip heads limited with hardclip5 %d and hardclip3 %d\n",*hardclip5,*hardclip3));
- clipdir = +1;
- } else {
- /* Clip heads equally */
- *hardclip5 = overlap/2;
- *hardclip3 = overlap - (*hardclip5);
- *hardclip3 += hit3->trim_left + hit3->start_amb_nmatches;
- *hardclip5 += hit5->trim_right + hit5->end_amb_nmatches;
- debug13(printf("Clip heads with hardclip5 %d and hardclip3 %d\n",*hardclip5,*hardclip3));
- clipdir = +1;
- }
} else {
- debug13(printf("Overlap is %d and hit3 is shortest\n",overlap));
- minlength = hit3_trimmed_length;
- if (insertlength < minlength) {
- /* Clip tails equally */
- *hardclip3 = insertlength/2;
- *hardclip5 = insertlength - (*hardclip3);
- *hardclip3 += hit3->trim_right + hit3->end_amb_nmatches;
- *hardclip5 += hit5->trim_left + hit5->start_amb_nmatches;
- debug13(printf("Clip tails with hardclip5 %d and hardclip3 %d\n",*hardclip5,*hardclip3));
- clipdir = -1;
- } else if (overlap > minlength) {
- /* Clip heads, but limit clipping on hit3 */
- *hardclip3 = hit3_trimmed_length/2;
- *hardclip5 = overlap - (*hardclip3);
- *hardclip3 += hit3->trim_left + hit3->start_amb_nmatches;
- *hardclip5 += hit5->trim_right + hit5->end_amb_nmatches;
- debug13(printf("Clip heads limited with hardclip5 %d and hardclip3 %d\n",*hardclip5,*hardclip3));
- clipdir = +1;
+ find_ilengths(&ilength5_low,&ilength5_high,hit5,common_genomicpos,hit5->chroffset);
+ find_ilengths(&ilength3_low,&ilength3_high,hit3,common_genomicpos,hit3->chroffset);
+ debug13(printf("ilength53 is %d, ilength 35 is %d\n",ilength5_low + ilength3_high - 1,ilength3_low + ilength5_high - 1));
+
+ common_left = (ilength5_low < ilength3_low) ? ilength5_low : ilength3_low;
+ common_right = (ilength5_high < ilength3_high) ? ilength5_high : ilength3_high;
+ common_shift = (common_right - common_left)/2;
+ debug13(printf("Common_shift is %d\n",common_shift));
+
+ if ((ilength53 = ilength5_low + ilength3_high - 1) > (ilength35 = ilength3_low + ilength5_high - 1)) {
+ debug13(printf("plus, ilength53 is longer\n"));
+ if ((overlap = totallength - ilength53) < 0) {
+ debug13(printf("Overlap %d is negative, so returning 0\n",overlap));
+ return 0;
+ } else {
+ debug13(printf("Overlap is %d\n",overlap));
+ clipdir = +1;
+ }
+
+ if (hit5->hittype == GMAP || hit3->hittype == GMAP) {
+ /* Revise only for paired-ends involving GMAP and when successful. Observed to be the correct action. */
+ this->insertlength = ilength53;
+ }
+
+ /* Want to clip 5 high and 3 low */
+ *hardclip5_high = ilength5_high - common_shift;
+ *hardclip3_low = overlap - (*hardclip5_high);
+ debug13(printf("Clip for ilength53 plus is hardclip5 %d..%d and hardclip3 %d..%d\n",
+ *hardclip5_low,*hardclip5_high,*hardclip3_low,*hardclip3_high));
+ *hardclip5_high += hit5->trim_right + hit5->end_amb_length;
+ *hardclip3_low += hit3->trim_left + hit3->start_amb_length;
+ debug13(printf("Clip for ilength53 plus is hardclip5 %d..%d and hardclip3 %d..%d\n",
+ *hardclip5_low,*hardclip5_high,*hardclip3_low,*hardclip3_high));
+ if (*hardclip5_high < 0) {
+ *hardclip5_high = 0;
+ }
+ if (*hardclip3_low < 0) {
+ *hardclip3_low = 0;
+ }
+ debug13(printf("Clip for ilength53 plus is hardclip5 %d..%d and hardclip3 %d..%d\n",
+ *hardclip5_low,*hardclip5_high,*hardclip3_low,*hardclip3_high));
+
} else {
- /* Clip heads equally */
- *hardclip3 = overlap/2;
- *hardclip5 = overlap - (*hardclip3);
- *hardclip3 += hit3->trim_left + hit3->start_amb_nmatches;
- *hardclip5 += hit5->trim_right + hit5->end_amb_nmatches;
- debug13(printf("Clip heads with hardclip5 %d and hardclip3 %d\n",*hardclip5,*hardclip3));
- clipdir = +1;
+ debug13(printf("plus, ilength35 is longer\n"));
+ if ((overlap = totallength - ilength35) < 0) {
+ debug13(printf("Overlap %d is negative, so returning 0\n",overlap));
+ return 0;
+ } else {
+ debug13(printf("Overlap is %d\n",overlap));
+ clipdir = -1;
+ }
+
+ if (hit5->hittype == GMAP || hit3->hittype == GMAP) {
+ /* Revise only for paired-ends involving GMAP and when successful. Observed to be the correct action. */
+ this->insertlength = ilength35;
+ }
+
+ /* Want to clip 5 low and 3 high */
+ *hardclip5_low = ilength5_low + common_shift;
+ *hardclip3_high = overlap - (*hardclip5_low);
+ debug13(printf("Clip for ilength35 plus is hardclip5 %d..%d and hardclip3 %d..%d\n",
+ *hardclip5_low,*hardclip5_high,*hardclip3_low,*hardclip3_high));
+ *hardclip5_low += hit5->trim_left + hit5->start_amb_length;
+ *hardclip3_high += hit3->trim_right + hit3->end_amb_length;
+ debug13(printf("Clip for ilength35 plus is hardclip5 %d..%d and hardclip3 %d..%d\n",
+ *hardclip5_low,*hardclip5_high,*hardclip3_low,*hardclip3_high));
+ if (*hardclip5_low < 0) {
+ *hardclip5_low = 0;
+ }
+ if (*hardclip3_high < 0) {
+ *hardclip3_high = 0;
+ }
+ debug13(printf("Clip for ilength35 plus is hardclip5 %d..%d and hardclip3 %d..%d\n",
+ *hardclip5_low,*hardclip5_high,*hardclip3_low,*hardclip3_high));
}
- }
- debug13(printf("plus, so returning clipdir %d\n",clipdir));
- return clipdir;
+ debug13(printf("returning clipdir %d\n",clipdir));
+ return clipdir;
+ }
} else {
- hit5_trimmed_length = hit5->querylength - hit5->trim_left - hit5->trim_right - hit5->start_amb_nmatches - hit5->end_amb_nmatches;
- hit3_trimmed_length = hit3->querylength - hit3->trim_left - hit3->trim_right - hit3->start_amb_nmatches - hit3->end_amb_nmatches;
+ /* minus */
+ hit5_trimmed_length = hit5->querylength - hit5->trim_left - hit5->trim_right - hit5->start_amb_length - hit5->end_amb_length;
+ hit3_trimmed_length = hit3->querylength - hit3->trim_left - hit3->trim_right - hit3->start_amb_length - hit3->end_amb_length;
totallength = hit5_trimmed_length + hit3_trimmed_length;
debug13(printf("totallength = %d, hit5 trimmed length = %d, hit3 trimmed length = %d\n",
totallength,hit5_trimmed_length,hit3_trimmed_length));
-#if 0
- hit5_inside = hit5->genomicend + hit5->trim_right + hit5->end_amb_nmatches;
- hit3_inside = (hit3->genomicstart - 1) - hit3->trim_left - hit3->start_amb_nmatches; /* because hit3 is inverted */
- insertlength = (hit5_inside + hit5_trimmed_length - 1) - (hit3_inside - hit3_trimmed_length + 1) + 1;
-#endif
debug13(printf("original insertlength: %d, trim+amb5: %d..%d, trim+amb3: %d..%d\n",
- this->insertlength,hit5->trim_left + hit5->start_amb_nmatches,
- hit5->trim_right + hit5->end_amb_nmatches,hit3->trim_left + hit3->start_amb_nmatches,
- hit3->trim_right + hit3->end_amb_nmatches));
- if (hit5->hittype == GMAP || hit3->hittype == GMAP) {
- insertlength = this->insertlength = pair_insert_length_trimmed(hit5,hit3,hit5_trimmed_length,hit3_trimmed_length);
- } else {
- insertlength = pair_insert_length_trimmed(hit5,hit3,hit5_trimmed_length,hit3_trimmed_length);
- }
- debug13(printf(" => trimmed insertlength: %d\n",insertlength));
+ this->insertlength,hit5->trim_left + hit5->start_amb_length,
+ hit5->trim_right + hit5->end_amb_length,hit3->trim_left + hit3->start_amb_length,
+ hit3->trim_right + hit3->end_amb_length));
- if (insertlength == 0) {
- debug13(printf("Cannot determine an insert length, so returning 0\n"));
+ if ((common_genomicpos = pair_common_genomicpos(hit5,hit3)) == 0) {
+ debug13(printf("Cannot determine a common point, so returning 0\n"));
return 0;
- } else if ((overlap = totallength - insertlength) < 0) {
- debug13(printf("Overlap %d is negative, so returning 0\n",overlap));
- return 0;
- } else {
- debug13(printf("overlap = %d\n",overlap));
- }
-
- if (hit5_trimmed_length <= hit3_trimmed_length) {
- debug13(printf("Overlap is %d and hit5 is shortest\n",overlap));
- minlength = hit5_trimmed_length;
- if (insertlength < minlength) {
- /* Clip tails equally */
- *hardclip5 = insertlength/2;
- *hardclip3 = insertlength - (*hardclip5);
- *hardclip3 += hit3->trim_right + hit3->end_amb_nmatches;
- *hardclip5 += hit5->trim_left + hit5->start_amb_nmatches;
- debug13(printf("Clip tails with hardclip5 %d and hardclip3 %d\n",*hardclip5,*hardclip3));
- clipdir = -1;
- } else if (overlap > minlength) {
- /* Clip heads, but limit clipping on hit5 */
- *hardclip5 = hit5_trimmed_length/2;
- *hardclip3 = overlap - (*hardclip5);
- *hardclip3 += hit3->trim_left + hit3->start_amb_nmatches;
- *hardclip5 += hit5->trim_right + hit5->end_amb_nmatches;
- debug13(printf("Clip heads limited with hardclip5 %d and hardclip3 %d\n",*hardclip5,*hardclip3));
- clipdir = +1;
- } else {
- /* Clip heads equally */
- *hardclip5 = overlap/2;
- *hardclip3 = overlap - (*hardclip5);
- *hardclip3 += hit3->trim_left + hit3->start_amb_nmatches;
- *hardclip5 += hit5->trim_right + hit5->end_amb_nmatches;
- debug13(printf("Clip heads with hardclip5 %d and hardclip3 %d\n",*hardclip5,*hardclip3));
- clipdir = +1;
- }
} else {
- debug13(printf("Overlap is %d and hit3 is shortest\n",overlap));
- minlength = hit3_trimmed_length;
- if (insertlength < minlength) {
- /* Clip tails equally */
- *hardclip3 = insertlength/2;
- *hardclip5 = insertlength - (*hardclip3);
- *hardclip3 += hit3->trim_right + hit3->end_amb_nmatches;
- *hardclip5 += hit5->trim_left + hit5->start_amb_nmatches;
- debug13(printf("Clip tails with hardclip5 %d and hardclip3 %d\n",*hardclip5,*hardclip3));
- clipdir = -1;
- } else if (overlap > minlength) {
- /* Clip heads, but limit clipping on hit3 */
- *hardclip3 = hit3_trimmed_length/2;
- *hardclip5 = overlap - (*hardclip3);
- *hardclip3 += hit3->trim_left + hit3->start_amb_nmatches;
- *hardclip5 += hit5->trim_right + hit5->end_amb_nmatches;
- debug13(printf("Clip heads limited with hardclip5 %d and hardclip3 %d\n",*hardclip5,*hardclip3));
- clipdir = +1;
+ find_ilengths(&ilength5_low,&ilength5_high,hit5,common_genomicpos,hit5->chroffset);
+ find_ilengths(&ilength3_low,&ilength3_high,hit3,common_genomicpos,hit3->chroffset);
+ debug13(printf("ilength53lh is %d, ilength35lh is %d\n",ilength5_low + ilength3_high - 1,ilength3_low + ilength5_high - 1));
+
+ common_left = (ilength5_low < ilength3_low) ? ilength5_low : ilength3_low;
+ common_right = (ilength5_high < ilength3_high) ? ilength5_high : ilength3_high;
+ common_shift = (common_right - common_left)/2;
+ debug13(printf("Common shift is %d\n",common_shift));
+
+ if ((ilength53 = ilength5_low + ilength3_high - 1) > (ilength35 = ilength3_low + ilength5_high - 1)) {
+ debug13(printf("minus, ilength53 is longer\n"));
+ if ((overlap = totallength - ilength53) < 0) {
+ debug13(printf("Overlap %d is negative, so returning 0\n",overlap));
+ return 0;
+ } else {
+ debug13(printf("Overlap is %d\n",overlap));
+ clipdir = +1;
+ }
+
+ if (hit5->hittype == GMAP || hit3->hittype == GMAP) {
+ /* Revise only for paired-ends involving GMAP and when successful. Observed to be the correct action. */
+ this->insertlength = ilength53;
+ }
+
+ /* Want to clip 5 high and 3 low */
+ *hardclip5_high = ilength5_high - common_shift;
+ *hardclip3_low = overlap - (*hardclip5_high);
+ debug13(printf("Clip for ilength53 minus is hardclip5 %d..%d and hardclip3 %d..%d\n",
+ *hardclip5_low,*hardclip5_high,*hardclip3_low,*hardclip3_high));
+ *hardclip5_high += hit5->trim_left + hit5->start_amb_length;
+ *hardclip3_low += hit3->trim_right + hit3->end_amb_length;
+ debug13(printf("Clip for ilength53 minus is hardclip5 %d..%d and hardclip3 %d..%d\n",
+ *hardclip5_low,*hardclip5_high,*hardclip3_low,*hardclip3_high));
+ if (*hardclip5_high < 0) {
+ *hardclip5_high = 0;
+ }
+ if (*hardclip3_low < 0) {
+ *hardclip3_low = 0;
+ }
+ debug13(printf("Clip for ilength53 minus is hardclip5 %d..%d and hardclip3 %d..%d\n",
+ *hardclip5_low,*hardclip5_high,*hardclip3_low,*hardclip3_high));
+
} else {
- /* Clip heads equally */
- *hardclip3 = overlap/2;
- *hardclip5 = overlap - (*hardclip3);
- *hardclip3 += hit3->trim_left + hit3->start_amb_nmatches;
- *hardclip5 += hit5->trim_right + hit5->end_amb_nmatches;
- debug13(printf("Clip heads with hardclip5 %d and hardclip3 %d\n",*hardclip5,*hardclip3));
- clipdir = +1;
+ debug13(printf("minus, ilength35lh %d is longer than ilength53lh %d\n",ilength35,ilength53));
+ if ((overlap = totallength - ilength35) < 0) {
+ debug13(printf("Overlap %d is negative, so returning 0\n",overlap));
+ return 0;
+ } else {
+ debug13(printf("Overlap is %d\n",overlap));
+ clipdir = -1;
+ }
+
+ if (hit5->hittype == GMAP || hit3->hittype == GMAP) {
+ /* Revise only for paired-ends involving GMAP and when successful. Observed to be the correct action. */
+ this->insertlength = ilength35;
+ }
+
+ /* Want to clip 5 low and 3 high. Verified. */
+ debug13(printf("Clip for ilength35 minus is hardclip5 %d..%d and hardclip3 %d..%d\n",
+ *hardclip5_low,*hardclip5_high,*hardclip3_low,*hardclip3_high));
+ *hardclip5_low = ilength5_low + common_shift;
+ *hardclip3_high = overlap - (*hardclip5_low);
+ debug13(printf("Clip for ilength35 minus is hardclip5 %d..%d and hardclip3 %d..%d\n",
+ *hardclip5_low,*hardclip5_high,*hardclip3_low,*hardclip3_high));
+ *hardclip5_low += hit5->trim_right + hit5->end_amb_length;
+ *hardclip3_high += hit3->trim_left + hit3->start_amb_length;
+ debug13(printf("Clip for ilength35 minus is hardclip5 %d..%d and hardclip3 %d..%d\n",
+ *hardclip5_low,*hardclip5_high,*hardclip3_low,*hardclip3_high));
+ if (*hardclip5_low < 0) {
+ *hardclip5_low = 0;
+ }
+ if (*hardclip3_high < 0) {
+ *hardclip3_high = 0;
+ }
+ debug13(printf("Clip for ilength35 minus is hardclip5 %d..%d and hardclip3 %d..%d\n",
+ *hardclip5_low,*hardclip5_high,*hardclip3_low,*hardclip3_high));
}
}
- debug13(printf("minus, so returning -clipdir %d\n",-clipdir));
- return -clipdir;
+
+ debug13(printf("returning clipdir %d\n",clipdir));
+ return clipdir;
}
}
}
@@ -2907,12 +2915,12 @@ Stage3end_copy (T old) {
new->start_ambiguous_p = old->start_ambiguous_p;
new->end_ambiguous_p = old->end_ambiguous_p;
- new->start_amb_nmatches = old->start_amb_nmatches;
- new->end_amb_nmatches = old->end_amb_nmatches;
+ new->start_amb_length = old->start_amb_length;
+ new->end_amb_length = old->end_amb_length;
new->start_amb_prob = old->start_amb_prob;
new->end_amb_prob = old->end_amb_prob;
- new->amb_nmatches_donor = old->amb_nmatches_donor;
- new->amb_nmatches_acceptor = old->amb_nmatches_acceptor;
+ new->amb_length_donor = old->amb_length_donor;
+ new->amb_length_acceptor = old->amb_length_acceptor;
if ((new->nambcoords_donor = old->nambcoords_donor) == 0) {
new->ambcoords_donor = (Univcoord_T *) NULL;
@@ -3096,6 +3104,7 @@ compute_circularpos (int *alias, T hit) {
debug12(printf("Computing circularpos on hit at %u..%u with trim left %d and trim right %d\n",
hit->genomicstart - hit->chroffset,hit->genomicend - hit->chroffset,hit->trim_left,hit->trim_right));
if (circularp[hit->chrnum] == false) {
+ debug12(printf("Chromosome #%d is not circular\n",hit->chrnum));
/* This also handles hit->chrnum == 0, where translocation cannot be circular */
*alias = 0;
return -1;
@@ -3113,6 +3122,7 @@ compute_circularpos (int *alias, T hit) {
#endif
) {
/* All of read after trimming is in circular alias */
+ debug12(printf("Soft clip of %d on left avoids circularization\n",hit->trim_left));
*alias = +1;
return -1;
@@ -3124,6 +3134,7 @@ compute_circularpos (int *alias, T hit) {
#endif
) {
/* All of read after trimming is in circular proper */
+ debug12(printf("Soft clip of %d on right avoids circularization\n",hit->trim_right));
*alias = -1;
return -1;
@@ -3152,6 +3163,7 @@ compute_circularpos (int *alias, T hit) {
#endif
) {
/* All of read after trimming is in circular alias */
+ debug12(printf("Soft clip of %d on right avoids circularization\n",hit->trim_right));
debug12(printf("All of read after trimming is in circular alias\n"));
*alias = +1;
return -1;
@@ -3164,6 +3176,7 @@ compute_circularpos (int *alias, T hit) {
#endif
) {
/* All of read after trimming is in circular proper */
+ debug12(printf("Soft clip of %d on left avoids circularization\n",hit->trim_left));
debug12(printf("All of read after trimming is in circular proper\n"));
*alias = -1;
return -1;
@@ -3222,7 +3235,7 @@ Stage3end_new_exact (int *found_score, Univcoord_T left, int genomiclength, Comp
} else {
new = (T) MALLOC_OUT(sizeof(*new));
- debug0(printf("Stage3end_new_exact %p: left %lu, chrnum %d\n",new,left,chrnum));
+ debug0(printf("Stage3end_new_exact %p: left %llu, chrnum %d\n",new,(unsigned long long) left,chrnum));
new->substring1 = substring;
new->substring2 = (Substring_T) NULL;
@@ -3247,7 +3260,7 @@ Stage3end_new_exact (int *found_score, Univcoord_T left, int genomiclength, Comp
}
new->genomiclength = new->high - new->low;
new->guided_insertlength = 0U;
- debug0(printf("Assigned %lu to low and %lu to high\n",new->low,new->high));
+ debug0(printf("Assigned %llu to low and %llu to high\n",(unsigned long long) new->low,(unsigned long long) new->high));
new->hittype = EXACT;
@@ -3291,9 +3304,9 @@ Stage3end_new_exact (int *found_score, Univcoord_T left, int genomiclength, Comp
new->tally = -1L;
*found_score = 0;
- new->start_amb_nmatches = new->end_amb_nmatches = 0;
+ new->start_amb_length = new->end_amb_length = 0;
new->start_amb_prob = new->end_amb_prob = 0.0;
- new->amb_nmatches_donor = new->amb_nmatches_acceptor = 0;
+ new->amb_length_donor = new->amb_length_acceptor = 0;
new->start_ambiguous_p = new->end_ambiguous_p = false;
new->start_ambcoords = new->end_ambcoords = (Univcoord_T *) NULL;
@@ -3357,8 +3370,8 @@ Stage3end_new_substitution (int *found_score, int nmismatches_whole, Univcoord_T
} else {
new = (T) MALLOC_OUT(sizeof(*new));
- debug0(printf("Stage3end_new_substitution %p: left %lu, chrnum %d, nmismatches %d\n",
- new,left,chrnum,nmismatches_whole));
+ debug0(printf("Stage3end_new_substitution %p: left %llu, chrnum %d, nmismatches %d\n",
+ new,(unsigned long long) left,chrnum,nmismatches_whole));
new->substring1 = substring;
new->substring2 = (Substring_T) NULL;
@@ -3441,9 +3454,9 @@ Stage3end_new_substitution (int *found_score, int nmismatches_whole, Univcoord_T
*found_score = new->score;
}
- new->start_amb_nmatches = new->end_amb_nmatches = 0;
+ new->start_amb_length = new->end_amb_length = 0;
new->start_amb_prob = new->end_amb_prob = 0.0;
- new->amb_nmatches_donor = new->amb_nmatches_acceptor = 0;
+ new->amb_length_donor = new->amb_length_acceptor = 0;
new->start_ambiguous_p = new->end_ambiguous_p = false;
new->start_ambcoords = new->end_ambcoords = (Univcoord_T *) NULL;
@@ -3484,8 +3497,8 @@ Stage3end_new_insertion (int *found_score, int nindels, int indel_pos, int nmism
Univcoord_T genomicstart, genomicend;
Univcoord_T alignstart1, alignend1, alignstart2, alignend2;
- debug2(printf("Entered with left %lu, querylength %d, genomiclength %d, indel_pos %d\n",
- left,querylength,genomiclength,indel_pos));
+ debug2(printf("Entered with left %llu, querylength %d, genomiclength %d, indel_pos %d\n",
+ (unsigned long long) left,querylength,genomiclength,indel_pos));
debug2(printf("q: %s\n",query));
#if 0
debug2(printf("g: %s\n",genomicseg));
@@ -3543,8 +3556,8 @@ Stage3end_new_insertion (int *found_score, int nindels, int indel_pos, int nmism
} else {
new = (T) MALLOC_OUT(sizeof(*new));
- debug0(printf("Stage3end_new_insertion %p: left %lu, chrnum %d, nmismatches %d+%d, indel_pos %d, nindels %d\n",
- new,left,chrnum,nmismatches1_whole,nmismatches2_whole,indel_pos,nindels));
+ debug0(printf("Stage3end_new_insertion %p: left %llu, chrnum %d, nmismatches %d+%d, indel_pos %d, nindels %d\n",
+ new,(unsigned long long) left,chrnum,nmismatches1_whole,nmismatches2_whole,indel_pos,nindels));
new->substring1 = substring1;
new->substring2 = substring2;
@@ -3638,9 +3651,9 @@ Stage3end_new_insertion (int *found_score, int nindels, int indel_pos, int nmism
*found_score = new->score;
}
- new->start_amb_nmatches = new->end_amb_nmatches = 0;
+ new->start_amb_length = new->end_amb_length = 0;
new->start_amb_prob = new->end_amb_prob = 0.0;
- new->amb_nmatches_donor = new->amb_nmatches_acceptor = 0;
+ new->amb_length_donor = new->amb_length_acceptor = 0;
new->start_ambiguous_p = new->end_ambiguous_p = false;
new->start_ambcoords = new->end_ambcoords = (Univcoord_T *) NULL;
@@ -3681,8 +3694,8 @@ Stage3end_new_deletion (int *found_score, int nindels, int indel_pos, int nmisma
Univcoord_T alignstart1, alignend1, alignstart2, alignend2;
Univcoord_T left2;
- debug3(printf("Entered with left %lu, querylength %d, genomiclength %d, indel_pos %d\n",
- left,querylength,genomiclength,indel_pos));
+ debug3(printf("Entered with left %llu, querylength %d, genomiclength %d, indel_pos %d\n",
+ (unsigned long long) left,querylength,genomiclength,indel_pos));
#if 0
debug3(printf("q: %s\n",query));
debug3(printf("g: %s\n",genomicseg));
@@ -3709,8 +3722,10 @@ Stage3end_new_deletion (int *found_score, int nindels, int indel_pos, int nmisma
/* left1 = left; */
left2 = left + nindels;
- debug3(printf("plusp is true. genomicstart %lu, genomicend %lu, alignstart1 %lu, alignend1 %lu, alignstart2 %lu, alignend2 %lu, left1 %lu, left2 %lu\n",
- genomicstart,genomicend,alignstart1,alignend1,alignstart2,alignend2,left,left2));
+ debug3(printf("plusp is true. genomicstart %llu, genomicend %llu, alignstart1 %llu, alignend1 %llu, alignstart2 %llu, alignend2 %llu, left1 %llu, left2 %llu\n",
+ (unsigned long long) genomicstart,(unsigned long long) genomicend,
+ (unsigned long long) alignstart1,(unsigned long long) alignend1,(unsigned long long) alignstart2,
+ (unsigned long long) alignend2,(unsigned long long) left,(unsigned long long) left2));
}
} else {
@@ -3727,8 +3742,10 @@ Stage3end_new_deletion (int *found_score, int nindels, int indel_pos, int nmisma
/* left1 = left; */
left2 = left + nindels;
- debug3(printf("plusp is false. genomicstart %lu, genomicend %lu, alignstart1 %lu, alignend1 %lu, alignstart2 %lu, alignend2 %lu, left1 %lu, left2 %lu\n",
- genomicstart,genomicend,alignstart1,alignend1,alignstart2,alignend2,left,left2));
+ debug3(printf("plusp is false. genomicstart %llu, genomicend %llu, alignstart1 %llu, alignend1 %llu, alignstart2 %llu, alignend2 %llu, left1 %llu, left2 %llu\n",
+ (unsigned long long) genomicstart,(unsigned long long) genomicend,
+ (unsigned long long) alignstart1,(unsigned long long) alignend1,(unsigned long long) alignstart2,
+ (unsigned long long) alignend2,(unsigned long long) left,(unsigned long long) left2));
}
}
@@ -3755,8 +3772,8 @@ Stage3end_new_deletion (int *found_score, int nindels, int indel_pos, int nmisma
} else {
new = (T) MALLOC_OUT(sizeof(*new));
- debug0(printf("Stage3end_new_deletion %p: left %lu, chrnum %d, nmismatches %d+%d, indel_pos %d, nindels %d\n",
- new,left,chrnum,nmismatches1_whole,nmismatches2_whole,indel_pos,nindels));
+ debug0(printf("Stage3end_new_deletion %p: left %llu, chrnum %d, nmismatches %d+%d, indel_pos %d, nindels %d\n",
+ new,(unsigned long long) left,chrnum,nmismatches1_whole,nmismatches2_whole,indel_pos,nindels));
new->substring1 = substring1;
new->substring2 = substring2;
@@ -3863,9 +3880,9 @@ Stage3end_new_deletion (int *found_score, int nindels, int indel_pos, int nmisma
*found_score = new->score;
}
- new->start_amb_nmatches = new->end_amb_nmatches = 0;
+ new->start_amb_length = new->end_amb_length = 0;
new->start_amb_prob = new->end_amb_prob = 0.0;
- new->amb_nmatches_donor = new->amb_nmatches_acceptor = 0;
+ new->amb_length_donor = new->amb_length_acceptor = 0;
new->start_ambiguous_p = new->end_ambiguous_p = false;
new->start_ambcoords = new->end_ambcoords = (Univcoord_T *) NULL;
@@ -3897,7 +3914,7 @@ Stage3end_new_deletion (int *found_score, int nindels, int indel_pos, int nmisma
T
Stage3end_new_splice (int *found_score, int nmismatches_donor, int nmismatches_acceptor,
Substring_T donor, Substring_T acceptor, Chrpos_T distance,
- bool shortdistancep, int splicing_penalty, int querylength, int amb_nmatches,
+ bool shortdistancep, int splicing_penalty, int querylength, int amb_length,
#ifdef LARGE_GENOMES
Uint8list_T ambcoords_donor, Uint8list_T ambcoords_acceptor,
#else
@@ -3915,8 +3932,9 @@ Stage3end_new_splice (int *found_score, int nmismatches_donor, int nmismatches_a
#endif
new = (T) MALLOC_OUT(sizeof(*new));
- debug0(printf("Stage3end_new_splice %p with sensedir %d, donor substring %p and acceptor substring %p, and amb_nmatches %d\n",
- new,sensedir,donor,acceptor,amb_nmatches));
+ debug0(printf("Stage3end_new_splice %p with sensedir %d, donor substring %p and acceptor substring %p, and amb_length %d\n",
+ new,sensedir,donor,acceptor,amb_length));
+ assert(Substring_match_length_orig(donor) + Substring_match_length_orig(acceptor) + amb_length == querylength);
new->deletion = (char *) NULL;
new->querylength_adj = new->querylength = querylength;
@@ -4099,7 +4117,7 @@ Stage3end_new_splice (int *found_score, int nmismatches_donor, int nmismatches_a
new->genomicend = Substring_genomicend(acceptor);
new->start_ambiguous_p = true;
- new->start_amb_nmatches = amb_nmatches;
+ new->start_amb_length = amb_length;
new->start_amb_prob = 5.0;
new->start_ambcoords = new->ambcoords_donor;
new->start_nambcoords = new->nambcoords_donor;
@@ -4107,7 +4125,7 @@ Stage3end_new_splice (int *found_score, int nmismatches_donor, int nmismatches_a
new->start_amb_nmismatches = new->amb_nmismatches_donor;
new->end_ambiguous_p = false;
- new->end_amb_nmatches = 0;
+ new->end_amb_length = 0;
new->end_amb_prob = 5.0;
new->end_ambcoords = NULL;
new->end_nambcoords = 0;
@@ -4119,7 +4137,7 @@ Stage3end_new_splice (int *found_score, int nmismatches_donor, int nmismatches_a
new->genomicend = Substring_genomicend(donor);
new->end_ambiguous_p = true;
- new->end_amb_nmatches = amb_nmatches;
+ new->end_amb_length = amb_length;
new->end_amb_prob = 5.0;
new->end_ambcoords = new->ambcoords_acceptor;
new->end_nambcoords = new->nambcoords_acceptor;
@@ -4127,7 +4145,7 @@ Stage3end_new_splice (int *found_score, int nmismatches_donor, int nmismatches_a
new->end_amb_nmismatches = new->amb_nmismatches_acceptor;
new->start_ambiguous_p = false;
- new->start_amb_nmatches = 0;
+ new->start_amb_length = 0;
new->start_amb_prob = 5.0;
new->start_ambcoords = NULL;
new->start_nambcoords = 0;
@@ -4139,7 +4157,7 @@ Stage3end_new_splice (int *found_score, int nmismatches_donor, int nmismatches_a
new->genomicend = Substring_genomicend(acceptor);
new->start_ambiguous_p = new->end_ambiguous_p = false;
- new->start_amb_nmatches = new->end_amb_nmatches = 0;
+ new->start_amb_length = new->end_amb_length = 0;
new->start_amb_prob = new->end_amb_prob = 5.0;
new->start_ambcoords = new->end_ambcoords = NULL;
new->start_nambcoords = new->end_nambcoords = 0;
@@ -4153,7 +4171,7 @@ Stage3end_new_splice (int *found_score, int nmismatches_donor, int nmismatches_a
new->genomicend = Substring_genomicend(acceptor);
new->end_ambiguous_p = true;
- new->end_amb_nmatches = amb_nmatches;
+ new->end_amb_length = amb_length;
new->end_amb_prob = 5.0;
new->end_ambcoords = new->ambcoords_donor;
new->end_nambcoords = new->nambcoords_donor;
@@ -4161,7 +4179,7 @@ Stage3end_new_splice (int *found_score, int nmismatches_donor, int nmismatches_a
new->end_amb_nmismatches = new->amb_nmismatches_donor;
new->start_ambiguous_p = false;
- new->start_amb_nmatches = 0;
+ new->start_amb_length = 0;
new->start_amb_prob = 5.0;
new->start_ambcoords = NULL;
new->start_nambcoords = 0;
@@ -4173,7 +4191,7 @@ Stage3end_new_splice (int *found_score, int nmismatches_donor, int nmismatches_a
new->genomicend = Substring_genomicend(donor);
new->start_ambiguous_p = true;
- new->start_amb_nmatches = amb_nmatches;
+ new->start_amb_length = amb_length;
new->start_amb_prob = 5.0;
new->start_ambcoords = new->ambcoords_acceptor;
new->start_nambcoords = new->nambcoords_acceptor;
@@ -4181,7 +4199,7 @@ Stage3end_new_splice (int *found_score, int nmismatches_donor, int nmismatches_a
new->start_amb_nmismatches = new->amb_nmismatches_acceptor;
new->end_ambiguous_p = false;
- new->end_amb_nmatches = 0;
+ new->end_amb_length = 0;
new->end_amb_prob = 5.0;
new->end_ambcoords = NULL;
new->end_nambcoords = 0;
@@ -4192,7 +4210,7 @@ Stage3end_new_splice (int *found_score, int nmismatches_donor, int nmismatches_a
new->genomicstart = Substring_genomicstart(acceptor);
new->genomicend = Substring_genomicend(donor);
- new->start_amb_nmatches = new->end_amb_nmatches = 0;
+ new->start_amb_length = new->end_amb_length = 0;
new->start_amb_prob = new->end_amb_prob = 5.0;
new->start_ambiguous_p = new->end_ambiguous_p = false;
new->start_ambcoords = new->end_ambcoords = NULL;
@@ -4224,7 +4242,7 @@ Stage3end_new_splice (int *found_score, int nmismatches_donor, int nmismatches_a
printf("amb end %u\n",new->end_ambcoords[i]);
}
#endif
- debug0(printf("start_amb_nmatches %d, end_amb_nmatches %d\n",new->start_amb_nmatches,new->end_amb_nmatches));
+ debug0(printf("start_amb_length %d, end_amb_length %d\n",new->start_amb_length,new->end_amb_length));
#ifdef CHECK_ASSERTIONS
if (new->start_ambiguous_p == true && new->start_nambcoords == 0) {
@@ -4377,11 +4395,11 @@ Stage3end_new_splice (int *found_score, int nmismatches_donor, int nmismatches_a
new->nmatches = Substring_nmatches(acceptor);
new->nmatches_posttrim = Substring_nmatches_posttrim(acceptor);
if (favor_ambiguous_p == true) {
- new->nmatches += amb_nmatches;
+ new->nmatches += amb_length;
}
new->sensedir_nonamb = SENSE_NULL; /* Ignore sense based on ambiguous end */
debug0(printf("New splice has acceptor %d + amb %d matches, sensedir nonamb %d\n",
- Substring_nmatches(acceptor),amb_nmatches,new->sensedir_nonamb));
+ Substring_nmatches(acceptor),amb_length,new->sensedir_nonamb));
} else if (acceptor == NULL) {
/* new->mapq_loglik = Substring_mapq_loglik(donor); */
new->nmismatches_bothdiff = Substring_nmismatches_bothdiff(donor) + nmismatches_acceptor;
@@ -4389,11 +4407,11 @@ Stage3end_new_splice (int *found_score, int nmismatches_donor, int nmismatches_a
new->nmatches = Substring_nmatches(donor);
new->nmatches_posttrim = Substring_nmatches_posttrim(donor);
if (favor_ambiguous_p == true) {
- new->nmatches += amb_nmatches;
+ new->nmatches += amb_length;
}
new->sensedir_nonamb = SENSE_NULL; /* Ignore sense based on ambiguous end */
debug0(printf("New splice has donor %d + amb %d matches, sensedir nonamb %d\n",
- Substring_nmatches(donor),amb_nmatches,new->sensedir_nonamb));
+ Substring_nmatches(donor),amb_length,new->sensedir_nonamb));
} else {
/* new->mapq_loglik = Substring_mapq_loglik(donor) + Substring_mapq_loglik(acceptor); */
new->nmismatches_bothdiff = Substring_nmismatches_bothdiff(donor) + Substring_nmismatches_bothdiff(acceptor);
@@ -4463,7 +4481,7 @@ Stage3end_new_splice (int *found_score, int nmismatches_donor, int nmismatches_a
/* Donor ----(A distance)---- [A Shortexon D] ----(D distance)---- Acceptor */
T
Stage3end_new_shortexon (int *found_score, Substring_T donor, Substring_T acceptor, Substring_T shortexon,
- int amb_nmatches_donor, int amb_nmatches_acceptor,
+ int amb_length_donor, int amb_length_acceptor,
#ifdef LARGE_GENOMES
Uint8list_T ambcoords_donor, Uint8list_T ambcoords_acceptor,
#else
@@ -4478,7 +4496,9 @@ Stage3end_new_shortexon (int *found_score, Substring_T donor, Substring_T accept
new = (T) MALLOC_OUT(sizeof(*new));
debug0(printf("Stage3end_new_shortexon %p, amb_donor %d, amb_acceptor %d\n",
- new,amb_nmatches_donor,amb_nmatches_acceptor));
+ new,amb_length_donor,amb_length_acceptor));
+ assert(Substring_match_length_orig(donor) + Substring_match_length_orig(shortexon) + Substring_match_length_orig(acceptor) +
+ amb_length_donor + amb_length_acceptor == querylength);
new->deletion = (char *) NULL;
new->querylength_adj = new->querylength = querylength;
@@ -4566,8 +4586,8 @@ Stage3end_new_shortexon (int *found_score, Substring_T donor, Substring_T accept
shortdistancep,Substring_chrnum(donor),Substring_chrnum(acceptor),new->chrnum); */
- new->amb_nmatches_donor = amb_nmatches_donor;
- new->amb_nmatches_acceptor = amb_nmatches_acceptor;
+ new->amb_length_donor = amb_length_donor;
+ new->amb_length_acceptor = amb_length_acceptor;
#ifdef LARGE_GENOMES
new->ambcoords_donor = Uint8list_to_array_out(&new->nambcoords_donor,ambcoords_donor);
@@ -4587,14 +4607,14 @@ Stage3end_new_shortexon (int *found_score, Substring_T donor, Substring_T accept
new->genomicstart = (donor != NULL ? Substring_genomicstart(donor) : Substring_genomicstart(shortexon));
new->genomicend = (acceptor != NULL ? Substring_genomicend(acceptor) : Substring_genomicend(shortexon));
- new->start_amb_nmatches = new->amb_nmatches_donor;
+ new->start_amb_length = new->amb_length_donor;
new->start_amb_prob = 5.0;
new->start_ambcoords = new->ambcoords_donor;
new->start_nambcoords = new->nambcoords_donor;
new->start_amb_knowni = new->amb_knowni_donor;
new->start_amb_nmismatches = new->amb_nmismatches_donor;
- new->end_amb_nmatches = new->amb_nmatches_acceptor;
+ new->end_amb_length = new->amb_length_acceptor;
new->end_amb_prob = 5.0;
new->end_ambcoords = new->ambcoords_acceptor;
new->end_nambcoords = new->nambcoords_acceptor;
@@ -4608,14 +4628,14 @@ Stage3end_new_shortexon (int *found_score, Substring_T donor, Substring_T accept
new->genomicstart = (acceptor != NULL ? Substring_genomicstart(acceptor) : Substring_genomicstart(shortexon));
new->genomicend = (donor != NULL ? Substring_genomicend(donor) : Substring_genomicend(shortexon));
- new->start_amb_nmatches = new->amb_nmatches_acceptor;
+ new->start_amb_length = new->amb_length_acceptor;
new->start_amb_prob = 5.0;
new->start_ambcoords = new->ambcoords_acceptor;
new->start_nambcoords = new->nambcoords_acceptor;
new->start_amb_knowni = new->amb_knowni_acceptor;
new->start_amb_nmismatches = new->amb_nmismatches_acceptor;
- new->end_amb_nmatches = new->amb_nmatches_donor;
+ new->end_amb_length = new->amb_length_donor;
new->end_amb_prob = 5.0;
new->end_ambcoords = new->ambcoords_donor;
new->end_nambcoords = new->nambcoords_donor;
@@ -4640,7 +4660,7 @@ Stage3end_new_shortexon (int *found_score, Substring_T donor, Substring_T accept
debug0(printf(" hittype is %s, genomicpos %u..%u\n",
hittype_string(new->hittype),new->genomicstart - new->chroffset,new->genomicend - new->chroffset));
debug0(printf("start_ambiguous_p %d, end_ambiguous_p %d\n",new->start_ambiguous_p,new->end_ambiguous_p));
- debug0(printf("start_amb_nmatches %d, end_amb_nmatches %d\n",new->start_amb_nmatches,new->end_amb_nmatches));
+ debug0(printf("start_amb_length %d, end_amb_length %d\n",new->start_amb_length,new->end_amb_length));
new->genomiclength = new->high - new->low;
new->guided_insertlength = 0U;
@@ -4729,18 +4749,18 @@ Stage3end_new_shortexon (int *found_score, Substring_T donor, Substring_T accept
new->nmatches_posttrim = Substring_nmatches_posttrim(shortexon);
if (donor == NULL) {
if (favor_ambiguous_p == true) {
- new->nmatches += amb_nmatches_donor;
+ new->nmatches += amb_length_donor;
}
} else {
- /* assert(amb_nmatches_donor == 0); */
+ /* assert(amb_length_donor == 0); */
new->nmatches += Substring_nmatches(donor);
}
if (acceptor == NULL) {
if (favor_ambiguous_p == true) {
- new->nmatches += amb_nmatches_acceptor;
+ new->nmatches += amb_length_acceptor;
}
} else {
- /* assert(amb_nmatches_acceptor == 0); */
+ /* assert(amb_length_acceptor == 0); */
new->nmatches += Substring_nmatches(acceptor);
}
@@ -4791,8 +4811,8 @@ Stage3end_new_terminal (int querystart, int queryend, Univcoord_T left, Compress
int nmismatches_whole, minlength;
bool trim_left_p, trim_right_p;
- debug0(printf("\nStage3end_new_terminal possible: endtypes %s and %s, left %lu, querystart %d, queryend %d\n",
- Endtype_string(start_endtype),Endtype_string(end_endtype),left,querystart,queryend));
+ debug0(printf("\nStage3end_new_terminal possible: endtypes %s and %s, left %llu, querystart %d, queryend %d\n",
+ Endtype_string(start_endtype),Endtype_string(end_endtype),(unsigned long long) left,querystart,queryend));
if (plusp == true) {
if ((genomicend = left + querylength) > chrhigh) {
@@ -4863,7 +4883,7 @@ Stage3end_new_terminal (int querystart, int queryend, Univcoord_T left, Compress
alignstart_trim = Substring_alignstart_trim(substring);
alignend_trim = Substring_alignend_trim(substring);
- debug0(printf("alignstart_trim = %lu, alignend_trim = %lu\n",alignstart_trim,alignend_trim));
+ debug0(printf("alignstart_trim = %llu, alignend_trim = %llu\n",(unsigned long long) alignstart_trim,(unsigned long long) alignend_trim));
if (plusp == true) {
debug0(printf("plus: pos5 = %d, pos3 = %d\n",(int) (alignstart_trim-left),(int) (alignend_trim-left)));
nmismatches_whole =
@@ -4895,9 +4915,10 @@ Stage3end_new_terminal (int querystart, int queryend, Univcoord_T left, Compress
}
new = (T) MALLOC_OUT(sizeof(*new));
- debug0(printf("Stage3end_new_terminal %p: endtypes %s and %s, left %lu, genomicstart/end %lu..%lu, chrhigh %lu, chrnum %d, querystart %d, queryend %d\n",
+ debug0(printf("Stage3end_new_terminal %p: endtypes %s and %s, left %llu, genomicstart/end %llu..%llu, chrhigh %llu, chrnum %d, querystart %d, queryend %d\n",
new,Endtype_string(start_endtype),Endtype_string(end_endtype),
- left,genomicstart,genomicend,chrhigh,chrnum,querystart,queryend));
+ (unsigned long long) left,(unsigned long long) genomicstart,(unsigned long long) genomicend,
+ (unsigned long long) chrhigh,chrnum,querystart,queryend));
new->substring1 = substring;
new->substring2 = (Substring_T) NULL;
@@ -4974,9 +4995,9 @@ Stage3end_new_terminal (int querystart, int queryend, Univcoord_T left, Compress
/* new->gene_overlap = NO_KNOWN_GENE; -- initialized later when resolving multimappers */
new->tally = -1L;
- new->start_amb_nmatches = new->end_amb_nmatches = 0;
+ new->start_amb_length = new->end_amb_length = 0;
new->start_amb_prob = new->end_amb_prob = 5.0;
- new->amb_nmatches_donor = new->amb_nmatches_acceptor = 0;
+ new->amb_length_donor = new->amb_length_acceptor = 0;
new->start_ambiguous_p = new->end_ambiguous_p = false;
new->start_ambcoords = new->end_ambcoords = (Univcoord_T *) NULL;
@@ -5036,8 +5057,8 @@ ATAGCCCACACGTTCCCCTTAAATAAGACATCACGATGGATCACAGGTCTATCACCCTATTAACCACTCACGGGAG
}
if (genomicstart > genomicend) {
/* Must have started before coordinate 0 */
- debug0(printf("plusp and genomicstart %lu > genomicend %lu => started before coordinate 0\n",
- genomicstart,genomicend));
+ debug0(printf("plusp and genomicstart %llu > genomicend %llu => started before coordinate 0\n",
+ (unsigned long long) genomicstart,(unsigned long long) genomicend));
return (T) NULL;
}
} else {
@@ -5047,15 +5068,16 @@ ATAGCCCACACGTTCCCCTTAAATAAGACATCACGATGGATCACAGGTCTATCACCCTATTAACCACTCACGGGAG
genomicend = left;
if (genomicend > genomicstart) {
/* Must have started before coordinate 0 */
- debug0(printf("minusp and genomicend %lu > genomicstart %lu => started before coordinate 0\n",
- genomicend,genomicstart));
+ debug0(printf("minusp and genomicend %llu > genomicstart %llu => started before coordinate 0\n",
+ (unsigned long long) genomicend,(unsigned long long) genomicstart));
return (T) NULL;
}
}
new = (T) MALLOC_OUT(sizeof(*new));
- debug0(printf("Stage3end_new_gmap %p: left %lu, genomicstart/end %u..%u, chrhigh %lu, chrnum %d, nmismatches %d, cdna_direction %d, sensedir %d, max_match_length %d\n",
- new,left,genomicstart - chroffset,genomicend - chroffset,chrhigh,chrnum,nmismatches_whole,cdna_direction,sensedir,max_match_length));
+ debug0(printf("Stage3end_new_gmap %p: left %llu, genomicstart/end %u..%u, chrhigh %llu, chrnum %d, nmismatches %d, cdna_direction %d, sensedir %d, max_match_length %d\n",
+ new,(unsigned long long) left,(unsigned int) (genomicstart - chroffset),(unsigned int) (genomicend - chroffset),
+ (unsigned long long) chrhigh,chrnum,nmismatches_whole,cdna_direction,sensedir,max_match_length));
debug0(printf(" ambig_end_length_5 %d (prob %f), ambig_end_length_3 %d (prob %f)\n",ambig_end_length_5,ambig_prob_5,ambig_end_length_3,ambig_prob_3));
new->substring1 = (Substring_T) NULL;
@@ -5175,11 +5197,11 @@ ATAGCCCACACGTTCCCCTTAAATAAGACATCACGATGGATCACAGGTCTATCACCCTATTAACCACTCACGGGAG
if (plusp == true) {
prob1 = Maxent_hr_acceptor_prob(genomepos,chroffset);
prob2 = Maxent_hr_antidonor_prob(genomepos,chroffset);
- /* fprintf(stderr,"At %lu, acceptor prob %f, antidonor prob %f\n",genomepos,prob1,prob2); */
+ /* fprintf(stderr,"At %llu, acceptor prob %f, antidonor prob %f\n",(unsigned long long) genomepos,prob1,prob2); */
} else {
prob1 = Maxent_hr_donor_prob(genomepos,chroffset);
prob2 = Maxent_hr_antiacceptor_prob(genomepos,chroffset);
- /* fprintf(stderr,"At %lu, donor prob %f, antiacceptor prob %f\n",genomepos,prob1,prob2); */
+ /* fprintf(stderr,"At %llu, donor prob %f, antiacceptor prob %f\n",(unsigned long long) genomepos,prob1,prob2); */
}
if (prob1 > 0.90 || prob2 > 0.90) {
new->trim_left_splicep = true;
@@ -5198,11 +5220,11 @@ ATAGCCCACACGTTCCCCTTAAATAAGACATCACGATGGATCACAGGTCTATCACCCTATTAACCACTCACGGGAG
if (plusp == true) {
prob1 = Maxent_hr_donor_prob(genomepos,chroffset);
prob2 = Maxent_hr_antiacceptor_prob(genomepos,chroffset);
- /* fprintf(stderr,"At %lu, donor prob %f, antiacceptor prob %f\n",genomepos,prob1,prob2); */
+ /* fprintf(stderr,"At %llu, donor prob %f, antiacceptor prob %f\n",(unsigned long long) genomepos,prob1,prob2); */
} else {
prob1 = Maxent_hr_acceptor_prob(genomepos,chroffset);
prob2 = Maxent_hr_antidonor_prob(genomepos,chroffset);
- /* fprintf(stderr,"At %lu, acceptor prob %f, antidonor prob %f\n",genomepos,prob1,prob2); */
+ /* fprintf(stderr,"At %llu, acceptor prob %f, antidonor prob %f\n",(unsigned long long) genomepos,prob1,prob2); */
}
if (prob1 > 0.90 || prob2 > 0.90) {
new->trim_right_splicep = true;
@@ -5224,7 +5246,7 @@ ATAGCCCACACGTTCCCCTTAAATAAGACATCACGATGGATCACAGGTCTATCACCCTATTAACCACTCACGGGAG
/* new->gene_overlap = NO_KNOWN_GENE; -- initialized later when resolving multimappers */
new->tally = -1L;
- if ((new->start_amb_nmatches = ambig_end_length_5) == 0) {
+ if ((new->start_amb_length = ambig_end_length_5) == 0) {
new->gmap_start_endtype = END;
} else if (ambig_splicetype_5 == DONOR || ambig_splicetype_5 == ANTIDONOR) {
new->gmap_start_endtype = AMB_DON;
@@ -5237,7 +5259,7 @@ ATAGCCCACACGTTCCCCTTAAATAAGACATCACGATGGATCACAGGTCTATCACCCTATTAACCACTCACGGGAG
}
new->start_amb_prob = ambig_prob_5;
- if ((new->end_amb_nmatches = ambig_end_length_3) == 0) {
+ if ((new->end_amb_length = ambig_end_length_3) == 0) {
new->gmap_end_endtype = END;
} else if (ambig_splicetype_3 == DONOR || ambig_splicetype_3 == ANTIDONOR) {
new->gmap_end_endtype = AMB_DON;
@@ -5250,7 +5272,7 @@ ATAGCCCACACGTTCCCCTTAAATAAGACATCACGATGGATCACAGGTCTATCACCCTATTAACCACTCACGGGAG
}
new->end_amb_prob = ambig_prob_3;
- new->amb_nmatches_donor = new->amb_nmatches_acceptor = 0;
+ new->amb_length_donor = new->amb_length_acceptor = 0;
new->start_ambiguous_p = new->end_ambiguous_p = false;
new->start_ambcoords = new->end_ambcoords = (Univcoord_T *) NULL;
@@ -5767,22 +5789,26 @@ pair_insert_length (Stage3end_T hit5, Stage3end_T hit3) {
/* No overlap found between any combination of substrings */
if (hit5->plusp == true) {
if (hit5->genomicend > hit3->genomicstart + hit5->querylength_adj + hit3->querylength_adj) {
- debug10(printf("pair_insert_length: no overlap found, and %lu - %lu + %d + %d < 0, so returning 0\n",
- hit3->genomicstart,hit5->genomicend,hit5->querylength_adj,hit3->querylength_adj));
+ debug10(printf("pair_insert_length: no overlap found, and %llu - %llu + %d + %d < 0, so returning 0\n",
+ (unsigned long long) hit3->genomicstart,(unsigned long long) hit5->genomicend,
+ hit5->querylength_adj,hit3->querylength_adj));
return 0;
} else {
- debug10(printf("pair_insert_length: no overlap found, so returning %lu - %lu + %d + %d\n",
- hit3->genomicstart,hit5->genomicend,hit5->querylength_adj,hit3->querylength_adj));
+ debug10(printf("pair_insert_length: no overlap found, so returning %llu - %llu + %d + %d\n",
+ (unsigned long long) hit3->genomicstart,(unsigned long long) hit5->genomicend,
+ hit5->querylength_adj,hit3->querylength_adj));
}
return hit3->genomicstart - hit5->genomicend + hit5->querylength_adj + hit3->querylength_adj;
} else {
if (hit3->genomicstart > hit5->genomicend + hit5->querylength_adj + hit3->querylength_adj) {
- debug10(printf("pair_insert_length: no overlap found, and %lu - %lu + %d + %d < 0, so returning 0\n",
- hit5->genomicend,hit3->genomicstart,hit5->querylength_adj,hit3->querylength_adj));
+ debug10(printf("pair_insert_length: no overlap found, and %llu - %llu + %d + %d < 0, so returning 0\n",
+ (unsigned long long) hit5->genomicend,(unsigned long long) hit3->genomicstart,
+ hit5->querylength_adj,hit3->querylength_adj));
return 0;
} else {
- debug10(printf("pair_insert_length: no overlap found, so returning %lu - %lu + %d + %d\n",
- hit5->genomicend,hit3->genomicstart,hit5->querylength_adj,hit3->querylength_adj));
+ debug10(printf("pair_insert_length: no overlap found, so returning %llu - %llu + %d + %d\n",
+ (unsigned long long) hit5->genomicend,(unsigned long long) hit3->genomicstart,
+ hit5->querylength_adj,hit3->querylength_adj));
return hit5->genomicend - hit3->genomicstart + hit5->querylength_adj + hit3->querylength_adj;
}
}
@@ -6105,21 +6131,21 @@ Stage3end_optimal_score_aux (bool *eliminatedp, List_T hitlist, int cutoff_level
#endif
hit->score_eventrim += Pair_nmismatches_region(&nindelbreaks,hit->pairarray,hit->npairs,
- trim_left,trim_right,hit->start_amb_nmatches,hit->end_amb_nmatches,
+ trim_left,trim_right,hit->start_amb_length,hit->end_amb_length,
hit->querylength_adj);
debug4(printf(" add nmismatches %d.",Pair_nmismatches_region(&nindelbreaks,hit->pairarray,hit->npairs,
- trim_left,trim_right,hit->start_amb_nmatches,hit->end_amb_nmatches,
+ trim_left,trim_right,hit->start_amb_length,hit->end_amb_length,
hit->querylength_adj)));
#ifdef SCORE_INDELS
hit->score_eventrim += indel_penalty_middle * nindelbreaks;
#endif
if (hit->start_amb_prob < 0.9) {
- hit->score_eventrim += hit->start_amb_nmatches / ambig_end_interval;
- debug4(printf(" add amb start %d/%d.",hit->start_amb_nmatches,ambig_end_interval));
+ hit->score_eventrim += hit->start_amb_length / ambig_end_interval;
+ debug4(printf(" add amb start %d/%d.",hit->start_amb_length,ambig_end_interval));
}
if (hit->end_amb_prob < 0.9) {
- hit->score_eventrim += hit->end_amb_nmatches / ambig_end_interval;
- debug4(printf(" add amb end %d/%d.",hit->end_amb_nmatches,ambig_end_interval));
+ hit->score_eventrim += hit->end_amb_length / ambig_end_interval;
+ debug4(printf(" add amb end %d/%d.",hit->end_amb_length,ambig_end_interval));
}
debug4(printf(" RESULT: %d\n",hit->score_eventrim));
@@ -8945,35 +8971,36 @@ Stage3pair_print (Result_T result, Resulttype_T resulttype,
static List_T
Stage3end_convert_to_pairs (List_T pairs, T hit, Shortread_T queryseq,
- int clipdir, int hardclip, bool first_read_p, int queryseq_offset) {
+ int clipdir, int hardclip_low, int hardclip_high,
+ bool first_read_p, int queryseq_offset) {
if (hit->hittype == EXACT || hit->hittype == SUB || hit->hittype == TERMINAL) {
- return Substring_convert_to_pairs(pairs,hit->substring1,queryseq,clipdir,hardclip,first_read_p,queryseq_offset);
+ return Substring_convert_to_pairs(pairs,hit->substring1,queryseq,clipdir,hardclip_low,hardclip_high,first_read_p,queryseq_offset);
} else if (hit->hittype == INSERTION) {
- pairs = Substring_convert_to_pairs(pairs,hit->substring1,queryseq,clipdir,hardclip,first_read_p,queryseq_offset);
+ pairs = Substring_convert_to_pairs(pairs,hit->substring1,queryseq,clipdir,hardclip_low,hardclip_high,first_read_p,queryseq_offset);
pairs = Substring_add_insertion(pairs,hit->substring1,hit->substring2,/*insertionlength*/hit->nindels,queryseq,
- clipdir,hardclip,first_read_p,queryseq_offset);
- pairs = Substring_convert_to_pairs(pairs,hit->substring2,queryseq,clipdir,hardclip,first_read_p,queryseq_offset);
+ clipdir,hardclip_low,hardclip_high,first_read_p,queryseq_offset);
+ pairs = Substring_convert_to_pairs(pairs,hit->substring2,queryseq,clipdir,hardclip_low,hardclip_high,first_read_p,queryseq_offset);
return pairs;
} else if (hit->hittype == DELETION) {
- pairs = Substring_convert_to_pairs(pairs,hit->substring1,queryseq,clipdir,hardclip,first_read_p,queryseq_offset);
+ pairs = Substring_convert_to_pairs(pairs,hit->substring1,queryseq,clipdir,hardclip_low,hardclip_high,first_read_p,queryseq_offset);
pairs = Substring_add_deletion(pairs,hit->substring1,hit->substring2,/*deletion*/hit->deletion,/*deletionlength*/hit->nindels,
- clipdir,hardclip,first_read_p,queryseq_offset);
- pairs = Substring_convert_to_pairs(pairs,hit->substring2,queryseq,clipdir,hardclip,first_read_p,queryseq_offset);
+ clipdir,hardclip_low,hardclip_high,first_read_p,queryseq_offset);
+ pairs = Substring_convert_to_pairs(pairs,hit->substring2,queryseq,clipdir,hardclip_low,hardclip_high,first_read_p,queryseq_offset);
return pairs;
} else if (hit->hittype == HALFSPLICE_DONOR) {
- return Substring_convert_to_pairs(pairs,hit->substring_donor,queryseq,clipdir,hardclip,first_read_p,queryseq_offset);
+ return Substring_convert_to_pairs(pairs,hit->substring_donor,queryseq,clipdir,hardclip_low,hardclip_high,first_read_p,queryseq_offset);
} else if (hit->hittype == HALFSPLICE_ACCEPTOR) {
- return Substring_convert_to_pairs(pairs,hit->substring_acceptor,queryseq,clipdir,hardclip,first_read_p,queryseq_offset);
+ return Substring_convert_to_pairs(pairs,hit->substring_acceptor,queryseq,clipdir,hardclip_low,hardclip_high,first_read_p,queryseq_offset);
} else if (hit->hittype == SPLICE || hit->hittype == SAMECHR_SPLICE) {
- pairs = Substring_convert_to_pairs(pairs,hit->substring1,queryseq,clipdir,hardclip,first_read_p,queryseq_offset);
- pairs = Substring_add_intron(pairs,hit->substring1,hit->substring2,clipdir,hardclip,first_read_p,queryseq_offset);
- pairs = Substring_convert_to_pairs(pairs,hit->substring2,queryseq,clipdir,hardclip,first_read_p,queryseq_offset);
+ pairs = Substring_convert_to_pairs(pairs,hit->substring1,queryseq,clipdir,hardclip_low,hardclip_high,first_read_p,queryseq_offset);
+ pairs = Substring_add_intron(pairs,hit->substring1,hit->substring2,clipdir,hardclip_low,hardclip_high,first_read_p,queryseq_offset);
+ pairs = Substring_convert_to_pairs(pairs,hit->substring2,queryseq,clipdir,hardclip_low,hardclip_high,first_read_p,queryseq_offset);
return pairs;
} else if (hit->hittype == TRANSLOC_SPLICE) {
@@ -8982,19 +9009,19 @@ Stage3end_convert_to_pairs (List_T pairs, T hit, Shortread_T queryseq,
return NULL;
} else if (hit->hittype == ONE_THIRD_SHORTEXON) {
- return Substring_convert_to_pairs(pairs,/*shortexon*/hit->substring1,queryseq,clipdir,hardclip,first_read_p,queryseq_offset);
+ return Substring_convert_to_pairs(pairs,/*shortexon*/hit->substring1,queryseq,clipdir,hardclip_low,hardclip_high,first_read_p,queryseq_offset);
} else if (hit->hittype == TWO_THIRDS_SHORTEXON) {
if (hit->substring0 == NULL) {
- pairs = Substring_convert_to_pairs(pairs,hit->substring1,queryseq,clipdir,hardclip,first_read_p,queryseq_offset);
- pairs = Substring_add_intron(pairs,hit->substring1,hit->substring2,clipdir,hardclip,first_read_p,queryseq_offset);
- pairs = Substring_convert_to_pairs(pairs,hit->substring2,queryseq,clipdir,hardclip,first_read_p,queryseq_offset);
+ pairs = Substring_convert_to_pairs(pairs,hit->substring1,queryseq,clipdir,hardclip_low,hardclip_high,first_read_p,queryseq_offset);
+ pairs = Substring_add_intron(pairs,hit->substring1,hit->substring2,clipdir,hardclip_low,hardclip_high,first_read_p,queryseq_offset);
+ pairs = Substring_convert_to_pairs(pairs,hit->substring2,queryseq,clipdir,hardclip_low,hardclip_high,first_read_p,queryseq_offset);
return pairs;
} else if (hit->substring2 == NULL) {
- pairs = Substring_convert_to_pairs(pairs,hit->substring0,queryseq,clipdir,hardclip,first_read_p,queryseq_offset);
- pairs = Substring_add_intron(pairs,hit->substring0,hit->substring1,clipdir,hardclip,first_read_p,queryseq_offset);
- pairs = Substring_convert_to_pairs(pairs,hit->substring1,queryseq,clipdir,hardclip,first_read_p,queryseq_offset);
+ pairs = Substring_convert_to_pairs(pairs,hit->substring0,queryseq,clipdir,hardclip_low,hardclip_high,first_read_p,queryseq_offset);
+ pairs = Substring_add_intron(pairs,hit->substring0,hit->substring1,clipdir,hardclip_low,hardclip_high,first_read_p,queryseq_offset);
+ pairs = Substring_convert_to_pairs(pairs,hit->substring1,queryseq,clipdir,hardclip_low,hardclip_high,first_read_p,queryseq_offset);
return pairs;
} else {
@@ -9002,16 +9029,16 @@ Stage3end_convert_to_pairs (List_T pairs, T hit, Shortread_T queryseq,
}
} else if (hit->hittype == SHORTEXON) {
- pairs = Substring_convert_to_pairs(pairs,hit->substring0,queryseq,clipdir,hardclip,first_read_p,queryseq_offset);
- pairs = Substring_add_intron(pairs,hit->substring0,hit->substring1,clipdir,hardclip,first_read_p,queryseq_offset);
- pairs = Substring_convert_to_pairs(pairs,hit->substring1,queryseq,clipdir,hardclip,first_read_p,queryseq_offset);
- pairs = Substring_add_intron(pairs,hit->substring1,hit->substring2,clipdir,hardclip,first_read_p,queryseq_offset);
- pairs = Substring_convert_to_pairs(pairs,hit->substring2,queryseq,clipdir,hardclip,first_read_p,queryseq_offset);
+ pairs = Substring_convert_to_pairs(pairs,hit->substring0,queryseq,clipdir,hardclip_low,hardclip_high,first_read_p,queryseq_offset);
+ pairs = Substring_add_intron(pairs,hit->substring0,hit->substring1,clipdir,hardclip_low,hardclip_high,first_read_p,queryseq_offset);
+ pairs = Substring_convert_to_pairs(pairs,hit->substring1,queryseq,clipdir,hardclip_low,hardclip_high,first_read_p,queryseq_offset);
+ pairs = Substring_add_intron(pairs,hit->substring1,hit->substring2,clipdir,hardclip_low,hardclip_high,first_read_p,queryseq_offset);
+ pairs = Substring_convert_to_pairs(pairs,hit->substring2,queryseq,clipdir,hardclip_low,hardclip_high,first_read_p,queryseq_offset);
return pairs;
} else if (hit->hittype == GMAP) {
return Pair_convert_array_to_pairs(pairs,hit->pairarray,hit->npairs,hit->plusp,hit->querylength,
- clipdir,hardclip,first_read_p,queryseq_offset);
+ clipdir,hardclip_low,hardclip_high,first_read_p,queryseq_offset);
} else {
abort();
@@ -9023,7 +9050,8 @@ Stage3end_convert_to_pairs (List_T pairs, T hit, Shortread_T queryseq,
struct Pair_T *
Stage3pair_merge (int *npairs, int *querylength_merged, char **queryseq_merged, char **quality_merged,
Stage3pair_T this, Shortread_T queryseq5, Shortread_T queryseq3,
- int querylength5, int querylength3, int clipdir, int hardclip5, int hardclip3) {
+ int querylength5, int querylength3, int clipdir,
+ int hardclip5_low, int hardclip5_high, int hardclip3_low, int hardclip3_high) {
struct Pair_T *pairarray, *newpair;
Pair_T oldpair;
List_T pairs = NULL, p;
@@ -9040,12 +9068,13 @@ Stage3pair_merge (int *npairs, int *querylength_merged, char **queryseq_merged,
if (hit5->plusp == true) {
if (clipdir >= 0) {
- pairs = Stage3end_convert_to_pairs(pairs,hit5,queryseq5,clipdir,hardclip5,/*first_read_p*/true,
- /*queryseq_offset*/0);
- pairs = Stage3end_convert_to_pairs(pairs,hit3,queryseq3,clipdir,hardclip3,/*first_read_p*/false,
- /*queryseq_offset*/querylength5-hardclip5-hardclip3);
- querylengthA = querylength5 - hardclip5;
- querylengthB = querylength3 - hardclip3;
+ pairs = Stage3end_convert_to_pairs(pairs,hit5,queryseq5,clipdir,hardclip5_low,hardclip5_high,
+ /*first_read_p*/true,/*queryseq_offset*/0);
+ pairs = Stage3end_convert_to_pairs(pairs,hit3,queryseq3,clipdir,hardclip3_low,hardclip3_high,
+ /*first_read_p*/false,
+ /*queryseq_offset*/querylength5-hardclip5_low-hardclip5_high-hardclip3_low-hardclip3_high);
+ querylengthA = querylength5 - hardclip5_low - hardclip5_high;
+ querylengthB = querylength3 - hardclip3_low - hardclip3_high;
*querylength_merged = querylengthA + querylengthB;
*queryseq_merged = (char *) MALLOC_OUT((querylengthA+querylengthB+1) * sizeof(char));
@@ -9061,12 +9090,13 @@ Stage3pair_merge (int *npairs, int *querylength_merged, char **queryseq_merged,
}
} else {
- pairs = Stage3end_convert_to_pairs(pairs,hit3,queryseq3,clipdir,hardclip3,/*first_read_p*/false,
- /*queryseq_offset*/0);
- pairs = Stage3end_convert_to_pairs(pairs,hit5,queryseq5,clipdir,hardclip5,/*first_read_p*/true,
- /*queryseq_offset*/querylength3-hardclip3-hardclip5);
- querylengthA = querylength3 - hardclip3;
- querylengthB = querylength5 - hardclip5;
+ pairs = Stage3end_convert_to_pairs(pairs,hit3,queryseq3,clipdir,hardclip3_low,hardclip3_high,
+ /*first_read_p*/false,/*queryseq_offset*/0);
+ pairs = Stage3end_convert_to_pairs(pairs,hit5,queryseq5,clipdir,hardclip5_low,hardclip5_high,
+ /*first_read_p*/true,
+ /*queryseq_offset*/querylength3-hardclip3_low-hardclip3_high-hardclip5_low-hardclip5_high);
+ querylengthA = querylength3 - hardclip3_low - hardclip3_high;
+ querylengthB = querylength5 - hardclip5_low - hardclip5_high;
*querylength_merged = querylengthA + querylengthB;
*queryseq_merged = (char *) MALLOC_OUT((querylengthA+querylengthB+1) * sizeof(char));
@@ -9084,12 +9114,13 @@ Stage3pair_merge (int *npairs, int *querylength_merged, char **queryseq_merged,
} else {
if (clipdir >= 0) {
- pairs = Stage3end_convert_to_pairs(pairs,hit3,queryseq3,clipdir,hardclip3,/*first_read_p*/false,
- /*queryseq_offset*/0);
- pairs = Stage3end_convert_to_pairs(pairs,hit5,queryseq5,clipdir,hardclip5,/*first_read_p*/true,
- /*queryseq_offset*/querylength3-hardclip3-hardclip5);
- querylengthA = querylength3 - hardclip3;
- querylengthB = querylength5 - hardclip5;
+ pairs = Stage3end_convert_to_pairs(pairs,hit3,queryseq3,clipdir,hardclip3_low,hardclip3_high,
+ /*first_read_p*/false,/*queryseq_offset*/0);
+ pairs = Stage3end_convert_to_pairs(pairs,hit5,queryseq5,clipdir,hardclip5_low,hardclip5_high,
+ /*first_read_p*/true,
+ /*queryseq_offset*/querylength3-hardclip3_low-hardclip3_high-hardclip5_low-hardclip5_high);
+ querylengthA = querylength3 - hardclip3_low - hardclip3_high;
+ querylengthB = querylength5 - hardclip5_low - hardclip5_high;
*querylength_merged = querylengthA + querylengthB;
*queryseq_merged = (char *) MALLOC_OUT((querylengthA+querylengthB+1) * sizeof(char));
@@ -9105,12 +9136,13 @@ Stage3pair_merge (int *npairs, int *querylength_merged, char **queryseq_merged,
}
} else {
- pairs = Stage3end_convert_to_pairs(pairs,hit5,queryseq5,clipdir,hardclip5,/*first_read_p*/true,
- /*queryseq_offset*/0);
- pairs = Stage3end_convert_to_pairs(pairs,hit3,queryseq3,clipdir,hardclip3,/*first_read_p*/false,
- /*queryseq_offset*/querylength5-hardclip5-hardclip3);
- querylengthA = querylength5 - hardclip5;
- querylengthB = querylength3 - hardclip3;
+ pairs = Stage3end_convert_to_pairs(pairs,hit5,queryseq5,clipdir,hardclip5_low,hardclip5_high,
+ /*first_read_p*/true,/*queryseq_offset*/0);
+ pairs = Stage3end_convert_to_pairs(pairs,hit3,queryseq3,clipdir,hardclip3_low,hardclip3_high,
+ /*first_read_p*/false,
+ /*queryseq_offset*/querylength5-hardclip5_low-hardclip5_high-hardclip3_low-hardclip3_high);
+ querylengthA = querylength5 - hardclip5_low - hardclip5_high;
+ querylengthB = querylength3 - hardclip3_low - hardclip3_high;
*querylength_merged = querylengthA + querylengthB;
*queryseq_merged = (char *) MALLOC_OUT((querylengthA+querylengthB+1) * sizeof(char));
@@ -9218,7 +9250,7 @@ overlap3_gmap_minus (int *querypos, Chrpos_T *genomicstart, Chrpos_T *genomicend
static void
-resolve_inside_ambiguous_splice_plus (int *unresolved_amb_nmatches, T *hit5, T *hit3, bool *private5p, bool *private3p,
+resolve_inside_ambiguous_splice_plus (int *unresolved_amb_length, T *hit5, T *hit3, bool *private5p, bool *private3p,
Univcoord_T *splicesites,
Compress_T query5_compress_fwd, Compress_T query3_compress_fwd,
int localsplicing_penalty, int querylength5, int querylength3,
@@ -9248,7 +9280,7 @@ resolve_inside_ambiguous_splice_plus (int *unresolved_amb_nmatches, T *hit5, T *
Intlist_T amb_knowni, amb_nmismatches;
- *unresolved_amb_nmatches = 0;
+ *unresolved_amb_length = 0;
debug9(printf("resolve plus: hit5 %s ambiguous %d,%d and hit3 %s ambiguous %d,%d\n",
hittype_string((*hit5)->hittype),(*hit5)->start_ambiguous_p,(*hit5)->end_ambiguous_p,
@@ -9262,7 +9294,7 @@ resolve_inside_ambiguous_splice_plus (int *unresolved_amb_nmatches, T *hit5, T *
genomicend = (*hit5)->end_ambcoords[i]; /* splicesites[] */
for (j = 0; j < (*hit3)->start_nambcoords; j++) {
genomicstart = (*hit3)->start_ambcoords[j]; /* splicesites[] */
- debug9(printf(" %lu,%lu",genomicend - (*hit5)->chroffset,genomicstart - (*hit3)->chroffset));
+ debug9(printf(" %u,%u",(Chrpos_T) (genomicend - (*hit5)->chroffset),(Chrpos_T) (genomicstart - (*hit3)->chroffset)));
if (genomicend < genomicstart) {
nbounded++;
boundedi5 = i;
@@ -9302,8 +9334,8 @@ resolve_inside_ambiguous_splice_plus (int *unresolved_amb_nmatches, T *hit5, T *
#if 0
/* No longer holds for GMAP */
- assert((*hit5)->end_amb_nmatches > 0);
- assert((*hit3)->start_amb_nmatches > 0);
+ assert((*hit5)->end_amb_length > 0);
+ assert((*hit3)->start_amb_length > 0);
#endif
#ifdef USE_BINGO
@@ -9315,9 +9347,9 @@ resolve_inside_ambiguous_splice_plus (int *unresolved_amb_nmatches, T *hit5, T *
#endif
if (nbest == 0) {
- debug9(printf("\nnbest is zero: amb_nmatches = %d...%d",
- (*hit5)->end_amb_nmatches,(*hit3)->start_amb_nmatches));
- *unresolved_amb_nmatches = (*hit5)->end_amb_nmatches + (*hit3)->start_amb_nmatches;
+ debug9(printf("\nnbest is zero: amb_length = %d...%d",
+ (*hit5)->end_amb_length,(*hit3)->start_amb_length));
+ *unresolved_amb_length = (*hit5)->end_amb_length + (*hit3)->start_amb_length;
} else if (nbest == 1) {
debug9(printf("\nnbest is 1, with nmismatches %d\n",best_nmismatches));
new5p = true; new3p = true; bingoi5 = besti5; bingoi3 = besti3;
@@ -9330,7 +9362,7 @@ resolve_inside_ambiguous_splice_plus (int *unresolved_amb_nmatches, T *hit5, T *
best_nmismatches = querylength5;
for (i = 0; i < (*hit5)->end_nambcoords; i++) {
genomicend = (*hit5)->end_ambcoords[i]; /* splicesites[] */
- debug9(printf(" %lu",genomicend - (*hit5)->chroffset));
+ debug9(printf(" %u",(Chrpos_T) (genomicend - (*hit5)->chroffset)));
if (genomicend < (*hit3)->genomicstart /*allow overlap*/+ querylength3) {
nbounded++;
boundedi5 = i;
@@ -9365,8 +9397,8 @@ resolve_inside_ambiguous_splice_plus (int *unresolved_amb_nmatches, T *hit5, T *
#if 0
/* No longer holds for GMAP */
- assert((*hit5)->end_amb_nmatches > 0);
- assert((*hit3)->start_amb_nmatches == 0);
+ assert((*hit5)->end_amb_length > 0);
+ assert((*hit3)->start_amb_length == 0);
#endif
#ifdef USE_BINGO
@@ -9378,9 +9410,9 @@ resolve_inside_ambiguous_splice_plus (int *unresolved_amb_nmatches, T *hit5, T *
#endif
if (nbest == 0) {
- debug9(printf("\nnbest is zero: amb_nmatches = %d...%d",
- (*hit5)->end_amb_nmatches,(*hit3)->start_amb_nmatches));
- *unresolved_amb_nmatches = (*hit5)->end_amb_nmatches;
+ debug9(printf("\nnbest is zero: amb_length = %d...%d",
+ (*hit5)->end_amb_length,(*hit3)->start_amb_length));
+ *unresolved_amb_length = (*hit5)->end_amb_length;
} else if (nbest == 1) {
debug9(printf("\nnbest is 1, with nmismatches %d\n",best_nmismatches));
new5p = true; bingoi5 = besti5;
@@ -9393,7 +9425,7 @@ resolve_inside_ambiguous_splice_plus (int *unresolved_amb_nmatches, T *hit5, T *
best_nmismatches = querylength3;
for (j = 0; j < (*hit3)->start_nambcoords; j++) {
genomicstart = (*hit3)->start_ambcoords[j]; /* splicesites[] */
- debug9(printf(" %lu",genomicstart - (*hit3)->chroffset));
+ debug9(printf(" %u",(Chrpos_T) (genomicstart - (*hit3)->chroffset)));
if ((*hit5)->genomicend < genomicstart /*allow overlap*/+ querylength5) {
nbounded++;
boundedi3 = j;
@@ -9428,8 +9460,8 @@ resolve_inside_ambiguous_splice_plus (int *unresolved_amb_nmatches, T *hit5, T *
#if 0
/* No longer holds for GMAP */
- assert((*hit5)->end_amb_nmatches == 0);
- assert((*hit3)->start_amb_nmatches > 0);
+ assert((*hit5)->end_amb_length == 0);
+ assert((*hit3)->start_amb_length > 0);
#endif
#ifdef USE_BINGO
@@ -9441,9 +9473,9 @@ resolve_inside_ambiguous_splice_plus (int *unresolved_amb_nmatches, T *hit5, T *
#endif
if (nbest == 0) {
- debug9(printf("\nnbest is zero: amb_nmatches = %d...%d",
- (*hit5)->end_amb_nmatches,(*hit3)->start_amb_nmatches));
- *unresolved_amb_nmatches = (*hit3)->start_amb_nmatches;
+ debug9(printf("\nnbest is zero: amb_length = %d...%d",
+ (*hit5)->end_amb_length,(*hit3)->start_amb_length));
+ *unresolved_amb_length = (*hit3)->start_amb_length;
} else if (nbest == 1) {
debug9(printf("\nnbest is 1, with nmismatches %d\n",best_nmismatches));
new3p = true; bingoi3 = besti3;
@@ -9485,7 +9517,7 @@ resolve_inside_ambiguous_splice_plus (int *unresolved_amb_nmatches, T *hit5, T *
amb_nmismatches = Intlist_from_array(old->amb_nmismatches_donor,old->nambcoords_donor);
*hit5 = Stage3end_new_shortexon(&ignore_found_score,/*donor*/old->substringD,acceptor,shortexon,
- old->amb_nmatches_donor,/*amb_nmatches_acceptor*/0,
+ old->amb_length_donor,/*amb_length_acceptor*/0,
ambcoords,/*ambcoords_acceptor*/NULL,
amb_knowni,/*amb_knowni_acceptor*/NULL,
amb_nmismatches,/*amb_nmismatches_acceptor*/NULL,
@@ -9536,7 +9568,7 @@ resolve_inside_ambiguous_splice_plus (int *unresolved_amb_nmatches, T *hit5, T *
amb_nmismatches = Intlist_from_array(old->amb_nmismatches_acceptor,old->nambcoords_acceptor);
*hit5 = Stage3end_new_shortexon(&ignore_found_score,donor,/*acceptor*/old->substringA,shortexon,
- /*amb_nmatches_donor*/0,old->amb_nmatches_acceptor,
+ /*amb_length_donor*/0,old->amb_length_acceptor,
/*ambcoords_donor*/NULL,ambcoords,
/*amb_knowni_donor*/NULL,amb_knowni,
/*amb_nmismatches_donor*/NULL,amb_nmismatches,
@@ -9587,7 +9619,7 @@ resolve_inside_ambiguous_splice_plus (int *unresolved_amb_nmatches, T *hit5, T *
old = *hit5;
*hit5 = Stage3end_new_splice(&ignore_found_score,Substring_nmismatches_whole(donor),/*nmismatches_acceptor*/nmismatches_shortend,
donor,acceptor,/*distance*/acceptor_splicecoord - donor_splicecoord,
- /*shortdistancep*/true,localsplicing_penalty,querylength5,/*amb_nmatches*/0,
+ /*shortdistancep*/true,localsplicing_penalty,querylength5,/*amb_length*/0,
/*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
/*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
/*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
@@ -9623,7 +9655,7 @@ resolve_inside_ambiguous_splice_plus (int *unresolved_amb_nmatches, T *hit5, T *
old = *hit5;
*hit5 = Stage3end_new_splice(&ignore_found_score,/*nmismatches_donor*/nmismatches_shortend,Substring_nmismatches_whole(acceptor),
donor,acceptor,/*distance*/donor_splicecoord - acceptor_splicecoord,
- /*shortdistancep*/true,localsplicing_penalty,querylength5,/*amb_nmatches*/0,
+ /*shortdistancep*/true,localsplicing_penalty,querylength5,/*amb_length*/0,
/*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
/*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
/*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
@@ -9675,7 +9707,7 @@ resolve_inside_ambiguous_splice_plus (int *unresolved_amb_nmatches, T *hit5, T *
amb_nmismatches = Intlist_from_array(old->amb_nmismatches_donor,old->nambcoords_donor);
*hit3 = Stage3end_new_shortexon(&ignore_found_score,/*donor*/old->substringD,acceptor,shortexon,
- old->amb_nmatches_donor,/*amb_nmatches_acceptor*/0,
+ old->amb_length_donor,/*amb_length_acceptor*/0,
ambcoords,/*ambcoords_acceptor*/NULL,
amb_knowni,/*amb_knowni_acceptor*/NULL,
amb_nmismatches,/*amb_nmismatches_acceptor*/NULL,
@@ -9726,7 +9758,7 @@ resolve_inside_ambiguous_splice_plus (int *unresolved_amb_nmatches, T *hit5, T *
amb_nmismatches = Intlist_from_array(old->amb_nmismatches_acceptor,old->nambcoords_acceptor);
*hit3 = Stage3end_new_shortexon(&ignore_found_score,donor,/*acceptor*/old->substringA,shortexon,
- /*amb_nmatches_donor*/0,old->amb_nmatches_acceptor,
+ /*amb_length_donor*/0,old->amb_length_acceptor,
/*ambcoords_donor*/NULL,ambcoords,
/*amb_knowni_donor*/NULL,amb_knowni,
/*amb_nmismatches_donor*/NULL,amb_nmismatches,
@@ -9777,7 +9809,7 @@ resolve_inside_ambiguous_splice_plus (int *unresolved_amb_nmatches, T *hit5, T *
old = *hit3;
*hit3 = Stage3end_new_splice(&ignore_found_score,Substring_nmismatches_whole(donor),/*nmismatches_acceptor*/nmismatches_shortend,
donor,acceptor,/*distance*/donor_splicecoord - acceptor_splicecoord,
- /*shortdistancep*/true,localsplicing_penalty,querylength3,/*amb_nmatches*/0,
+ /*shortdistancep*/true,localsplicing_penalty,querylength3,/*amb_length*/0,
/*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
/*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
/*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
@@ -9807,15 +9839,15 @@ resolve_inside_ambiguous_splice_plus (int *unresolved_amb_nmatches, T *hit5, T *
querylength3,/*plusp*/true,genestrand,/*first_read_p*/false,/*sensep*/true,
Substring_chrnum(acceptor),Substring_chroffset(acceptor),
Substring_chrhigh(acceptor),Substring_chrlength(acceptor))) != NULL) {
- debug9(printf("Resolved halfsplice acceptor, End 2: Splice from acceptor %lu (%lu) to donor %lu (%lu), with nmismatches %d\n",
- Substring_splicecoord(acceptor) - Substring_chroffset(acceptor),
- acceptor_splicecoord - Substring_chroffset(acceptor),
- Substring_splicecoord(donor) - Substring_chroffset(donor),
- donor_splicecoord - Substring_chroffset(donor),nmismatches_shortend));
+ debug9(printf("Resolved halfsplice acceptor, End 2: Splice from acceptor %u (%u) to donor %u (%u), with nmismatches %d\n",
+ (Chrpos_T) (Substring_splicecoord(acceptor) - Substring_chroffset(acceptor)),
+ (Chrpos_T) (acceptor_splicecoord - Substring_chroffset(acceptor)),
+ (Chrpos_T) (Substring_splicecoord(donor) - Substring_chroffset(donor)),
+ (Chrpos_T) (donor_splicecoord - Substring_chroffset(donor)),nmismatches_shortend));
old = *hit3;
*hit3 = Stage3end_new_splice(&ignore_found_score,/*nmismatches_donor*/nmismatches_shortend,Substring_nmismatches_whole(acceptor),
donor,acceptor,/*distance*/acceptor_splicecoord - donor_splicecoord,
- /*shortdistancep*/true,localsplicing_penalty,querylength3,/*amb_nmatches*/0,
+ /*shortdistancep*/true,localsplicing_penalty,querylength3,/*amb_length*/0,
/*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
/*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
/*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
@@ -9837,7 +9869,7 @@ resolve_inside_ambiguous_splice_plus (int *unresolved_amb_nmatches, T *hit5, T *
static void
-resolve_inside_ambiguous_splice_minus (int *unresolved_amb_nmatches, T *hit5, T *hit3, bool *private5p, bool *private3p,
+resolve_inside_ambiguous_splice_minus (int *unresolved_amb_length, T *hit5, T *hit3, bool *private5p, bool *private3p,
Univcoord_T *splicesites,
Compress_T query5_compress_rev, Compress_T query3_compress_rev,
int localsplicing_penalty, int querylength5, int querylength3,
@@ -9867,7 +9899,7 @@ resolve_inside_ambiguous_splice_minus (int *unresolved_amb_nmatches, T *hit5, T
Intlist_T amb_knowni, amb_nmismatches;
- *unresolved_amb_nmatches = 0;
+ *unresolved_amb_length = 0;
debug9(printf("resolve minus: hit5 %s ambiguous %d,%d and hit3 %s ambiguous %d,%d\n",
hittype_string((*hit5)->hittype),(*hit5)->start_ambiguous_p,(*hit5)->end_ambiguous_p,
@@ -9881,7 +9913,7 @@ resolve_inside_ambiguous_splice_minus (int *unresolved_amb_nmatches, T *hit5, T
genomicend = (*hit5)->end_ambcoords[i]; /* splicesites[] */
for (j = 0; j < (*hit3)->start_nambcoords; j++) {
genomicstart = (*hit3)->start_ambcoords[j]; /* splicesites[] */
- debug9(printf(" %lu,%lu",genomicend - (*hit5)->chroffset,genomicstart - (*hit3)->chroffset));
+ debug9(printf(" %l,%u",(Chrpos_T) (genomicend - (*hit5)->chroffset),(Chrpos_T) (genomicstart - (*hit3)->chroffset)));
if (genomicstart < genomicend) {
nbounded++;
boundedi5 = i;
@@ -9921,8 +9953,8 @@ resolve_inside_ambiguous_splice_minus (int *unresolved_amb_nmatches, T *hit5, T
#if 0
/* No longer holds for GMAP */
- assert((*hit5)->end_amb_nmatches > 0);
- assert((*hit3)->start_amb_nmatches > 0);
+ assert((*hit5)->end_amb_length > 0);
+ assert((*hit3)->start_amb_length > 0);
#endif
#ifdef USE_BINGO
@@ -9934,9 +9966,9 @@ resolve_inside_ambiguous_splice_minus (int *unresolved_amb_nmatches, T *hit5, T
#endif
if (nbest == 0) {
- debug9(printf("\nnbest is zero: amb_nmatches = %d...%d",
- (*hit5)->end_amb_nmatches,(*hit3)->start_amb_nmatches));
- *unresolved_amb_nmatches = (*hit5)->end_amb_nmatches + (*hit3)->start_amb_nmatches;
+ debug9(printf("\nnbest is zero: amb_length = %d...%d",
+ (*hit5)->end_amb_length,(*hit3)->start_amb_length));
+ *unresolved_amb_length = (*hit5)->end_amb_length + (*hit3)->start_amb_length;
} else if (nbest == 1) {
debug9(printf("\nnbest is 1, with nmismatches %d\n",best_nmismatches));
new5p = true; new3p = true; bingoi5 = besti5; bingoi3 = besti3;
@@ -9949,7 +9981,7 @@ resolve_inside_ambiguous_splice_minus (int *unresolved_amb_nmatches, T *hit5, T
best_nmismatches = querylength5;
for (i = 0; i < (*hit5)->end_nambcoords; i++) {
genomicend = (*hit5)->end_ambcoords[i]; /* splicesites[] */
- debug9(printf(" %lu",genomicend - (*hit5)->chroffset));
+ debug9(printf(" %u",(Chrpos_T) (genomicend - (*hit5)->chroffset)));
if ((*hit3)->genomicstart < genomicend /*allow overlap*/+ querylength3) {
nbounded++;
boundedi5 = i;
@@ -9985,8 +10017,8 @@ resolve_inside_ambiguous_splice_minus (int *unresolved_amb_nmatches, T *hit5, T
#if 0
/* No longer holds for GMAP */
- assert((*hit5)->end_amb_nmatches > 0);
- assert((*hit3)->start_amb_nmatches == 0);
+ assert((*hit5)->end_amb_length > 0);
+ assert((*hit3)->start_amb_length == 0);
#endif
#ifdef USE_BINGO
@@ -9998,9 +10030,9 @@ resolve_inside_ambiguous_splice_minus (int *unresolved_amb_nmatches, T *hit5, T
#endif
if (nbest == 0) {
- debug9(printf("\nnbest is zero: amb_nmatches = %d...%d",
- (*hit5)->end_amb_nmatches,(*hit3)->start_amb_nmatches));
- *unresolved_amb_nmatches = (*hit5)->end_amb_nmatches;
+ debug9(printf("\nnbest is zero: amb_length = %d...%d",
+ (*hit5)->end_amb_length,(*hit3)->start_amb_length));
+ *unresolved_amb_length = (*hit5)->end_amb_length;
} else if (nbest == 1) {
debug9(printf("\nnbest is 1, with nmismatches %d\n",best_nmismatches));
new5p = true; bingoi5 = besti5;
@@ -10013,7 +10045,7 @@ resolve_inside_ambiguous_splice_minus (int *unresolved_amb_nmatches, T *hit5, T
best_nmismatches = querylength3;
for (j = 0; j < (*hit3)->start_nambcoords; j++) {
genomicstart = (*hit3)->start_ambcoords[j]; /* splicesites[] */
- debug9(printf(" %lu",genomicstart - (*hit3)->chroffset));
+ debug9(printf(" %u",(Chrpos_T) (genomicstart - (*hit3)->chroffset)));
if (genomicstart < (*hit5)->genomicend /*allow overlap*/+ querylength5) {
nbounded++;
boundedi3 = j;
@@ -10048,8 +10080,8 @@ resolve_inside_ambiguous_splice_minus (int *unresolved_amb_nmatches, T *hit5, T
#if 0
/* No longer holds for GMAP */
- assert((*hit5)->end_amb_nmatches == 0);
- assert((*hit3)->start_amb_nmatches > 0);
+ assert((*hit5)->end_amb_length == 0);
+ assert((*hit3)->start_amb_length > 0);
#endif
#ifdef USE_BINGO
@@ -10061,9 +10093,9 @@ resolve_inside_ambiguous_splice_minus (int *unresolved_amb_nmatches, T *hit5, T
#endif
if (nbest == 0) {
- debug9(printf("\nnbest is zero: amb_nmatches = %d...%d",
- (*hit5)->end_amb_nmatches,(*hit3)->start_amb_nmatches));
- *unresolved_amb_nmatches = (*hit3)->start_amb_nmatches;
+ debug9(printf("\nnbest is zero: amb_length = %d...%d",
+ (*hit5)->end_amb_length,(*hit3)->start_amb_length));
+ *unresolved_amb_length = (*hit3)->start_amb_length;
} else if (nbest == 1) {
debug9(printf("\nnbest is 1, with nmismatches %d\n",best_nmismatches));
new3p = true; bingoi3 = besti3;
@@ -10106,7 +10138,7 @@ resolve_inside_ambiguous_splice_minus (int *unresolved_amb_nmatches, T *hit5, T
amb_nmismatches = Intlist_from_array(old->amb_nmismatches_donor,old->nambcoords_donor);
*hit5 = Stage3end_new_shortexon(&ignore_found_score,/*donor*/old->substringD,acceptor,shortexon,
- old->amb_nmatches_donor,/*amb_nmatches_acceptor*/0,
+ old->amb_length_donor,/*amb_length_acceptor*/0,
ambcoords,/*ambcoords_acceptor*/NULL,
amb_knowni,/*amb_knowni_acceptor*/NULL,
amb_nmismatches,/*amb_nmismatches_acceptor*/NULL,
@@ -10157,7 +10189,7 @@ resolve_inside_ambiguous_splice_minus (int *unresolved_amb_nmatches, T *hit5, T
amb_nmismatches = Intlist_from_array(old->amb_nmismatches_acceptor,old->nambcoords_acceptor);
*hit5 = Stage3end_new_shortexon(&ignore_found_score,donor,/*acceptor*/old->substringA,shortexon,
- /*amb_nmatches_donor*/0,old->amb_nmatches_acceptor,
+ /*amb_length_donor*/0,old->amb_length_acceptor,
/*ambcoords_donor*/NULL,ambcoords,
/*amb_knowni_donor*/NULL,amb_knowni,
/*amb_nmismatches_donor*/NULL,amb_nmismatches,
@@ -10207,7 +10239,7 @@ resolve_inside_ambiguous_splice_minus (int *unresolved_amb_nmatches, T *hit5, T
old = *hit5;
*hit5 = Stage3end_new_splice(&ignore_found_score,Substring_nmismatches_whole(donor),/*nmismatches_acceptor*/nmismatches_shortend,
donor,acceptor,/*distance*/donor_splicecoord - acceptor_splicecoord,
- /*shortdistancep*/true,localsplicing_penalty,querylength5,/*amb_nmatches*/0,
+ /*shortdistancep*/true,localsplicing_penalty,querylength5,/*amb_length*/0,
/*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
/*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
/*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
@@ -10244,7 +10276,7 @@ resolve_inside_ambiguous_splice_minus (int *unresolved_amb_nmatches, T *hit5, T
old = *hit5;
*hit5 = Stage3end_new_splice(&ignore_found_score,/*nmismatches_donor*/nmismatches_shortend,Substring_nmismatches_whole(acceptor),
donor,acceptor,/*distance*/acceptor_splicecoord - donor_splicecoord,
- /*shortdistancep*/true,localsplicing_penalty,querylength5,/*amb_nmatches*/0,
+ /*shortdistancep*/true,localsplicing_penalty,querylength5,/*amb_length*/0,
/*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
/*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
/*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
@@ -10296,7 +10328,7 @@ resolve_inside_ambiguous_splice_minus (int *unresolved_amb_nmatches, T *hit5, T
amb_nmismatches = Intlist_from_array(old->amb_nmismatches_donor,old->nambcoords_donor);
*hit3 = Stage3end_new_shortexon(&ignore_found_score,/*donor*/old->substringD,acceptor,shortexon,
- old->amb_nmatches_donor,/*amb_nmatches_acceptor*/0,
+ old->amb_length_donor,/*amb_length_acceptor*/0,
ambcoords,/*ambcoords_acceptor*/NULL,
amb_knowni,/*amb_knowni_acceptor*/NULL,
amb_nmismatches,/*amb_nmismatches_acceptor*/NULL,
@@ -10347,7 +10379,7 @@ resolve_inside_ambiguous_splice_minus (int *unresolved_amb_nmatches, T *hit5, T
amb_nmismatches = Intlist_from_array(old->amb_nmismatches_acceptor,old->nambcoords_acceptor);
*hit3 = Stage3end_new_shortexon(&ignore_found_score,donor,/*acceptor*/old->substringA,shortexon,
- /*amb_nmatches_donor*/0,old->amb_nmatches_acceptor,
+ /*amb_length_donor*/0,old->amb_length_acceptor,
/*ambcoords_donor*/NULL,ambcoords,
/*amb_knowni_donor*/NULL,amb_knowni,
/*amb_nmismatches_donor*/NULL,amb_nmismatches,
@@ -10397,7 +10429,7 @@ resolve_inside_ambiguous_splice_minus (int *unresolved_amb_nmatches, T *hit5, T
old = *hit3;
*hit3 = Stage3end_new_splice(&ignore_found_score,Substring_nmismatches_whole(donor),/*nmismatches_acceptor*/nmismatches_shortend,
donor,acceptor,/*distance*/acceptor_splicecoord - donor_splicecoord,
- /*shortdistancep*/true,localsplicing_penalty,querylength3,/*amb_nmatches*/0,
+ /*shortdistancep*/true,localsplicing_penalty,querylength3,/*amb_length*/0,
/*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
/*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
/*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
@@ -10433,7 +10465,7 @@ resolve_inside_ambiguous_splice_minus (int *unresolved_amb_nmatches, T *hit5, T
old = *hit3;
*hit3 = Stage3end_new_splice(&ignore_found_score,/*nmismatches_donor*/nmismatches_shortend,Substring_nmismatches_whole(acceptor),
donor,acceptor,/*distance*/donor_splicecoord - acceptor_splicecoord,
- /*shortdistancep*/true,localsplicing_penalty,querylength3,/*amb_nmatches*/0,
+ /*shortdistancep*/true,localsplicing_penalty,querylength3,/*amb_length*/0,
/*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
/*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
/*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
@@ -10492,7 +10524,7 @@ Stage3pair_new (T hit5, T hit3, Univcoord_T *splicesites,
Stage3end_T copy;
Chrpos_T chrstart, chrend, chrpos;
int querypos;
- int unresolved_amb_nmatches = 0;
+ int unresolved_amb_length = 0;
int found_score = 0;
bool overreach5p, overreach3p;
@@ -10551,14 +10583,16 @@ Stage3pair_new (T hit5, T hit3, Univcoord_T *splicesites,
new->dir = +1;
new->insertlength = (hit3->genomicstart - hit5->genomicend) + querylength5 + querylength3;
new->insertlength_expected_sign = insertlength_expected(new->insertlength);
- debug10(printf("plus, no overlap: insert length %d = start3 %lu - end5 %lu + %d + %d\n",
- new->insertlength,hit3->genomicstart,hit5->genomicend,querylength5,querylength3));
+ debug10(printf("plus, no overlap: insert length %d = start3 %llu - end5 %llu + %d + %d\n",
+ new->insertlength,(unsigned long long) hit3->genomicstart,
+ (unsigned long long) hit5->genomicend,querylength5,querylength3));
} else if (hit5->plusp == false && hit3->plusp == false) {
new->dir = -1;
new->insertlength = (hit5->genomicend - hit3->genomicstart) + querylength5 + querylength3;
new->insertlength_expected_sign = insertlength_expected(new->insertlength);
- debug10(printf("minus, no overlap: insert length %d = end5 %lu - start3 %lu + %d + %d\n",
- new->insertlength,hit5->genomicend,hit3->genomicstart,querylength5,querylength3));
+ debug10(printf("minus, no overlap: insert length %d = end5 %llu - start3 %llu + %d + %d\n",
+ new->insertlength,(unsigned long long) hit5->genomicend,
+ (unsigned long long) hit3->genomicstart,querylength5,querylength3));
} else {
new->dir = 0;
new->insertlength = pair_insert_length_unpaired(hit5,hit3); /* was 0 */
@@ -10572,26 +10606,27 @@ Stage3pair_new (T hit5, T hit3, Univcoord_T *splicesites,
if (expect_concordant_p == true) {
/* Try to resolve ambiguity on inside of concordant ends */
- resolve_inside_ambiguous_splice_plus(&unresolved_amb_nmatches,&hit5,&hit3,&private5p,&private3p,
+ resolve_inside_ambiguous_splice_plus(&unresolved_amb_length,&hit5,&hit3,&private5p,&private3p,
splicesites,query5_compress_fwd,query3_compress_fwd,
localsplicing_penalty,querylength5,querylength3,genestrand);
}
/* Have 5-start..end and 3-start..end */
- debug10(printf("plus: comparing hit5->genomicend %lu <= hit3->genomicstart %lu\n",
- hit5->genomicend,hit3->genomicstart));
+ debug10(printf("plus: comparing hit5->genomicend %llu <= hit3->genomicstart %llu\n",
+ (unsigned long long) hit5->genomicend,(unsigned long long) hit3->genomicstart));
if (hit5->genomicend <= hit3->genomicstart) {
/* No overlap */
new->insertlength = (hit3->genomicstart - hit5->genomicend) + querylength5 + querylength3;
new->insertlength_expected_sign = insertlength_expected(new->insertlength);
- debug10(printf("plus, no overlap: insert length %d = start3 %lu - end5 %lu + %d + %d\n",
- new->insertlength,hit3->genomicstart,hit5->genomicend,querylength5,querylength3));
+ debug10(printf("plus, no overlap: insert length %d = start3 %llu - end5 %llu + %d + %d\n",
+ new->insertlength,(unsigned long long) hit3->genomicstart,
+ (unsigned long long) hit5->genomicend,querylength5,querylength3));
} else if ((chrpos = overlap3_gmap_plus(&querypos,&chrstart,&chrend,/*hit*/hit3,/*gmap*/hit5)) > 0U) {
new->insertlength = /* end3 */ chrend - /* start5 */ (chrpos - querypos);
new->insertlength_expected_sign = insertlength_expected(new->insertlength);
- debug10(printf("plus, overlap: insert length %d = end3 %lu - start5 (%lu - %d)\n",
- new->insertlength,chrend,chrpos,querypos));
+ debug10(printf("plus, overlap: insert length %d = end3 %llu - start5 (%llu - %d)\n",
+ new->insertlength,(unsigned long long) chrend,(unsigned long long) chrpos,querypos));
} else {
/* Still no overlap */
new->insertlength = (hit3->genomicstart - hit5->genomicend) + querylength5 + querylength3;
@@ -10603,26 +10638,27 @@ Stage3pair_new (T hit5, T hit3, Univcoord_T *splicesites,
if (expect_concordant_p == true) {
/* Try to resolve ambiguity on inside of concordant ends */
- resolve_inside_ambiguous_splice_minus(&unresolved_amb_nmatches,&hit5,&hit3,&private5p,&private3p,
+ resolve_inside_ambiguous_splice_minus(&unresolved_amb_length,&hit5,&hit3,&private5p,&private3p,
splicesites,query5_compress_rev,query3_compress_rev,
localsplicing_penalty,querylength5,querylength3,genestrand);
}
/* Have 3-end..start and 5-end..start */
- debug10(printf("minus: comparing hit3->genomicstart %lu <= hit5->genomicend %lu\n",
- hit3->genomicstart,hit5->genomicend));
+ debug10(printf("minus: comparing hit3->genomicstart %llu <= hit5->genomicend %llu\n",
+ (unsigned long long) hit3->genomicstart,(unsigned long long) hit5->genomicend));
if (hit3->genomicstart <= hit5->genomicend) {
/* No overlap */
new->insertlength = (hit5->genomicend - hit3->genomicstart) + querylength5 + querylength3;
new->insertlength_expected_sign = insertlength_expected(new->insertlength);
- debug10(printf("minus, no overlap: insert length %d = end5 %lu - start3 %lu + %d + %d\n",
- new->insertlength,hit5->genomicend,hit3->genomicstart,querylength5,querylength3));
+ debug10(printf("minus, no overlap: insert length %d = end5 %llu - start3 %llu + %d + %d\n",
+ new->insertlength,(unsigned long long) hit5->genomicend,
+ (unsigned long long) hit3->genomicstart,querylength5,querylength3));
} else if ((chrpos = overlap3_gmap_minus(&querypos,&chrstart,&chrend,/*hit*/hit3,/*gmap*/hit5)) > 0U) {
new->insertlength = /* start5 */ (chrpos + querypos) - /* end3 */ chrend + 1;
new->insertlength_expected_sign = insertlength_expected(new->insertlength);
- debug10(printf("minus, overlap: insert length %d = start5 (%lu + %d) - end3 %lu + 1\n",
- new->insertlength,chrpos,querypos,chrend));
+ debug10(printf("minus, overlap: insert length %d = start5 (%llu + %d) - end3 %llu + 1\n",
+ new->insertlength,(unsigned long long) chrpos,querypos,(unsigned long long) chrend));
} else {
/* Still no overlap */
new->insertlength = (hit5->genomicend - hit3->genomicstart) + querylength5 + querylength3;
@@ -10641,26 +10677,28 @@ Stage3pair_new (T hit5, T hit3, Univcoord_T *splicesites,
if (expect_concordant_p == true) {
/* Try to resolve ambiguity on inside of concordant ends */
- resolve_inside_ambiguous_splice_plus(&unresolved_amb_nmatches,&hit5,&hit3,&private5p,&private3p,
+ resolve_inside_ambiguous_splice_plus(&unresolved_amb_length,&hit5,&hit3,&private5p,&private3p,
splicesites,query5_compress_fwd,query3_compress_fwd,
localsplicing_penalty,querylength5,querylength3,genestrand);
}
/* Have 5-start..end and 3-start..end */
- debug10(printf("plus: comparing hit5->genomicend %lu <= hit3->genomicstart %lu\n",
- hit5->genomicend,hit3->genomicstart));
+ debug10(printf("plus: comparing hit5->genomicend %llu <= hit3->genomicstart %llu\n",
+ (unsigned long long) hit5->genomicend,(unsigned long long) hit3->genomicstart));
if (hit5->genomicend <= hit3->genomicstart) {
/* No overlap */
new->insertlength = (hit3->genomicstart - hit5->genomicend) + querylength5 + querylength3;
new->insertlength_expected_sign = insertlength_expected(new->insertlength);
- debug10(printf("plus, no overlap: insert length %d = start3 %lu - end5 %lu + %d + %d\n",
- new->insertlength,hit3->genomicstart,hit5->genomicend,querylength5,querylength3));
+ debug10(printf("plus, no overlap: insert length %d = start3 %llu - end5 %llu + %d + %d\n",
+ new->insertlength,(unsigned long long) hit3->genomicstart,
+ (unsigned long long) hit5->genomicend,querylength5,querylength3));
} else if ((chrpos = overlap5_gmap_plus(&querypos,&chrstart,&chrend,/*hit*/hit5,/*gmap*/hit3)) > 0U) {
new->insertlength = /* end3 */ (chrpos - querypos + querylength3) - /* start5 */ chrstart;
new->insertlength_expected_sign = insertlength_expected(new->insertlength);
- debug10(printf("plus, overlap: insert length %d = end3 (%lu - %d + %d) - start5 %lu\n",
- new->insertlength,chrpos,querypos,querylength3,chrstart));
+ debug10(printf("plus, overlap: insert length %d = end3 (%llu - %d + %d) - start5 %llu\n",
+ new->insertlength,(unsigned long long) chrpos,querypos,querylength3,
+ (unsigned long long) chrstart));
} else {
/* Still no overlap */
new->insertlength = (hit3->genomicstart - hit5->genomicend) + querylength5 + querylength3;
@@ -10672,25 +10710,27 @@ Stage3pair_new (T hit5, T hit3, Univcoord_T *splicesites,
if (expect_concordant_p == true) {
/* Try to resolve ambiguity on inside of concordant ends */
- resolve_inside_ambiguous_splice_minus(&unresolved_amb_nmatches,&hit5,&hit3,&private5p,&private3p,
+ resolve_inside_ambiguous_splice_minus(&unresolved_amb_length,&hit5,&hit3,&private5p,&private3p,
splicesites,query5_compress_rev,query3_compress_rev,
localsplicing_penalty,querylength5,querylength3,genestrand);
}
/* Have 3-end..start and 5-end..start */
- debug10(printf("minus: comparing hit3->genomicstart %lu <= hit5->genomicend %lu\n",
- hit3->genomicstart,hit5->genomicend));
+ debug10(printf("minus: comparing hit3->genomicstart %llu <= hit5->genomicend %llu\n",
+ (unsigned long long) hit3->genomicstart,(unsigned long long) hit5->genomicend));
if (hit3->genomicstart <= hit5->genomicend) {
/* No overlap */
new->insertlength = (hit5->genomicend - hit3->genomicstart) + querylength5 + querylength3;
new->insertlength_expected_sign = insertlength_expected(new->insertlength);
- debug10(printf("minus, no overlap: insert length %d = end5 %lu - start3 %lu + %d + %d\n",
- new->insertlength,hit5->genomicend,hit3->genomicstart,querylength5,querylength3));
+ debug10(printf("minus, no overlap: insert length %d = end5 %llu - start3 %llu + %d + %d\n",
+ new->insertlength,(unsigned long long) hit5->genomicend,
+ (unsigned long long) hit3->genomicstart,querylength5,querylength3));
} else if ((chrpos = overlap5_gmap_minus(&querypos,&chrstart,&chrend,/*hit*/hit5,/*gmap*/hit3)) > 0U) {
new->insertlength = /* start5 */ chrstart - /* end3 */ (chrpos + querypos - querylength3) - 1;
new->insertlength_expected_sign = insertlength_expected(new->insertlength);
- debug10(printf("minus, overlap: insert length %d = start5 %lu - end3 (%lu + %d - %d) - 1\n",
- new->insertlength,chrstart,chrpos,querypos,querylength3));
+ debug10(printf("minus, overlap: insert length %d = start5 %llu - end3 (%llu + %d - %d) - 1\n",
+ new->insertlength,(unsigned long long) chrstart,(unsigned long long) chrpos,
+ querypos,querylength3));
} else {
/* Still no overlap */
new->insertlength = (hit5->genomicend - hit3->genomicstart) + querylength5 + querylength3;
@@ -10777,7 +10817,7 @@ Stage3pair_new (T hit5, T hit3, Univcoord_T *splicesites,
if (hit5->sensedir == SENSE_FORWARD) {
copy = Stage3end_new_splice(&found_score,/*nmismatches_donor*/Substring_nmismatches_whole(hit5->substring1),
/*nmismatches_acceptor*/0,/*donor*/hit5->substring1,/*acceptor*/NULL,/*distance*/0U,
- /*shortdistancep*/true,localsplicing_penalty,hit5->querylength,/*amb_nmatches*/0,
+ /*shortdistancep*/true,localsplicing_penalty,hit5->querylength,/*amb_length*/0,
/*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
/*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
/*copy_donor_p*/true,/*copy_acceptor_p*/false,/*first_read_p*/true,
@@ -10786,7 +10826,7 @@ Stage3pair_new (T hit5, T hit3, Univcoord_T *splicesites,
copy = Stage3end_new_splice(&found_score,/*nmismatches_donor*/0,
/*nmismatches_acceptor*/Substring_nmismatches_whole(hit5->substring1),/*donor*/NULL,
/*acceptor*/hit5->substring1,/*distance*/0U,
- /*shortdistancep*/true,localsplicing_penalty,hit5->querylength,/*amb_nmatches*/0,
+ /*shortdistancep*/true,localsplicing_penalty,hit5->querylength,/*amb_length*/0,
/*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
/*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
/*copy_donor_p*/false,/*copy_acceptor_p*/true,/*first_read_p*/true,
@@ -10808,7 +10848,7 @@ Stage3pair_new (T hit5, T hit3, Univcoord_T *splicesites,
copy = Stage3end_new_splice(&found_score,/*nmismatches_donor*/0,
/*nmismatches_acceptor*/Substring_nmismatches_whole(hit3->substring2),/*donor*/NULL,
/*acceptor*/hit3->substring2,/*distance*/0U,
- /*shortdistancep*/true,localsplicing_penalty,hit3->querylength,/*amb_nmatches*/0,
+ /*shortdistancep*/true,localsplicing_penalty,hit3->querylength,/*amb_length*/0,
/*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
/*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
/*copy_donor_p*/false,/*copy_acceptor_p*/true,/*first_read_p*/true,
@@ -10816,7 +10856,7 @@ Stage3pair_new (T hit5, T hit3, Univcoord_T *splicesites,
} else if (hit3->sensedir == SENSE_ANTI) {
copy = Stage3end_new_splice(&found_score,/*nmismatches_donor*/Substring_nmismatches_whole(hit3->substring2),
/*nmismatches_acceptor*/0,/*donor*/hit3->substring2,/*acceptor*/NULL,/*distance*/0U,
- /*shortdistancep*/true,localsplicing_penalty,hit3->querylength,/*amb_nmatches*/0,
+ /*shortdistancep*/true,localsplicing_penalty,hit3->querylength,/*amb_length*/0,
/*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
/*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
/*copy_donor_p*/true,/*copy_acceptor_p*/false,/*first_read_p*/true,
@@ -10834,7 +10874,7 @@ Stage3pair_new (T hit5, T hit3, Univcoord_T *splicesites,
}
/* Try to resolve ambiguity on inside of concordant ends */
- resolve_inside_ambiguous_splice_plus(&unresolved_amb_nmatches,&hit5,&hit3,&private5p,&private3p,
+ resolve_inside_ambiguous_splice_plus(&unresolved_amb_length,&hit5,&hit3,&private5p,&private3p,
splicesites,query5_compress_fwd,query3_compress_fwd,
localsplicing_penalty,querylength5,querylength3,genestrand);
}
@@ -10844,12 +10884,14 @@ Stage3pair_new (T hit5, T hit3, Univcoord_T *splicesites,
/* No overlap */
new->insertlength = (hit3->genomicstart - hit5->genomicend) + querylength5 + querylength3;
new->insertlength_expected_sign = insertlength_expected(new->insertlength);
- debug10(printf("plus, no overlap: insert length %d = start3 %lu - end5 %lu + %d + %d\n",
- new->insertlength,hit3->genomicstart,hit5->genomicend,querylength5,querylength3));
+ debug10(printf("plus, no overlap: insert length %d = start3 %llu - end5 %llu + %d + %d\n",
+ new->insertlength,(unsigned long long) hit3->genomicstart,
+ (unsigned long long) hit5->genomicend,querylength5,querylength3));
#if 0
} else if (hit5->genomicend > hit3->genomicend + SUBSUMPTION_SLOP) {
/* hit5 subsumes hit3 */
- debug10(printf("plus, subsumption %lu > %lu\n",hit5->genomicend,hit3->genomicend));
+ debug10(printf("plus, subsumption %llu > %llu\n",
+ (unsigned long long) hit5->genomicend,(unsigned long long) hit3->genomicend));
new->insertlength = 0;
new->insertlength_expected_sign = false;
#endif
@@ -10900,7 +10942,7 @@ Stage3pair_new (T hit5, T hit3, Univcoord_T *splicesites,
if (hit5->sensedir == SENSE_FORWARD) {
copy = Stage3end_new_splice(&found_score,/*nmismatches_donor*/Substring_nmismatches_whole(hit5->substring1),
/*nmismatches_acceptor*/0,/*donor*/hit5->substring1,/*acceptor*/NULL,/*distance*/0U,
- /*shortdistancep*/true,localsplicing_penalty,hit5->querylength,/*amb_nmatches*/0,
+ /*shortdistancep*/true,localsplicing_penalty,hit5->querylength,/*amb_length*/0,
/*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
/*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
/*copy_donor_p*/true,/*copy_acceptor_p*/false,/*first_read_p*/false,
@@ -10909,7 +10951,7 @@ Stage3pair_new (T hit5, T hit3, Univcoord_T *splicesites,
copy = Stage3end_new_splice(&found_score,/*nmismatches_donor*/0,
/*nmismatches_acceptor*/Substring_nmismatches_whole(hit5->substring1),/*donor*/NULL,
/*acceptor*/hit5->substring1,/*distance*/0U,
- /*shortdistancep*/true,localsplicing_penalty,hit5->querylength,/*amb_nmatches*/0,
+ /*shortdistancep*/true,localsplicing_penalty,hit5->querylength,/*amb_length*/0,
/*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
/*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
/*copy_donor_p*/false,/*copy_acceptor_p*/true,/*first_read_p*/false,
@@ -10931,7 +10973,7 @@ Stage3pair_new (T hit5, T hit3, Univcoord_T *splicesites,
copy = Stage3end_new_splice(&found_score,/*nmismatches_donor*/0,
/*nmismatches_acceptor*/Substring_nmismatches_whole(hit3->substring2),/*donor*/NULL,
/*acceptor*/hit3->substring2,/*distance*/0U,
- /*shortdistancep*/true,localsplicing_penalty,hit3->querylength,/*amb_nmatches*/0,
+ /*shortdistancep*/true,localsplicing_penalty,hit3->querylength,/*amb_length*/0,
/*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
/*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
/*copy_donor_p*/false,/*copy_acceptor_p*/true,/*first_read_p*/false,
@@ -10939,7 +10981,7 @@ Stage3pair_new (T hit5, T hit3, Univcoord_T *splicesites,
} else if (hit3->sensedir == SENSE_ANTI) {
copy = Stage3end_new_splice(&found_score,/*nmismatches_donor*/Substring_nmismatches_whole(hit3->substring2),
/*nmismatches_acceptor*/0,/*donor*/hit3->substring2,/*acceptor*/NULL,/*distance*/0U,
- /*shortdistancep*/true,localsplicing_penalty,hit3->querylength,/*amb_nmatches*/0,
+ /*shortdistancep*/true,localsplicing_penalty,hit3->querylength,/*amb_length*/0,
/*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
/*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
/*copy_donor_p*/true,/*copy_acceptor_p*/false,/*first_read_p*/false,
@@ -10957,7 +10999,7 @@ Stage3pair_new (T hit5, T hit3, Univcoord_T *splicesites,
}
/* Try to resolve ambiguity on inside of concordant ends */
- resolve_inside_ambiguous_splice_minus(&unresolved_amb_nmatches,&hit5,&hit3,&private5p,&private3p,
+ resolve_inside_ambiguous_splice_minus(&unresolved_amb_length,&hit5,&hit3,&private5p,&private3p,
splicesites,query5_compress_rev,query3_compress_rev,
localsplicing_penalty,querylength5,querylength3,genestrand);
}
@@ -10967,12 +11009,14 @@ Stage3pair_new (T hit5, T hit3, Univcoord_T *splicesites,
/* No overlap */
new->insertlength = (hit5->genomicend - hit3->genomicstart) + querylength5 + querylength3;
new->insertlength_expected_sign = insertlength_expected(new->insertlength);
- debug10(printf("minus, no overlap: insert length %d = end5 %lu - start3 %lu + %d + %d\n",
- new->insertlength,hit5->genomicend,hit3->genomicstart,querylength5,querylength3));
+ debug10(printf("minus, no overlap: insert length %d = end5 %llu - start3 %llu + %d + %d\n",
+ new->insertlength,(unsigned long long) hit5->genomicend,
+ (unsigned long long) hit3->genomicstart,querylength5,querylength3));
#if 0
} else if (hit3->genomicstart > hit5->genomicstart + SUBSUMPTION_SLOP) {
/* hit3 subsumes hit5 */
- debug10(printf("minus, subsumption %lu > %lu\n",hit3->genomicstart,hit5->genomicstart));
+ debug10(printf("minus, subsumption %llu > %llu\n",
+ (unsigned long long) hit3->genomicstart,(unsigned long long) hit5->genomicstart));
new->insertlength = 0;
new->insertlength_expected_sign = false;
#endif
@@ -11043,8 +11087,8 @@ Stage3pair_new (T hit5, T hit3, Univcoord_T *splicesites,
}
/* Do not alter score, so the alignmnent terminates at the known splice site */
- new->score = hit5->score + hit3->score /* + unresolved_amb_nmatches */;
- new->nmatches = hit5->nmatches + hit3->nmatches - unresolved_amb_nmatches;
+ new->score = hit5->score + hit3->score /* + unresolved_amb_length */;
+ new->nmatches = hit5->nmatches + hit3->nmatches - unresolved_amb_length;
new->nmatches_posttrim = hit5->nmatches_posttrim + hit3->nmatches_posttrim;
new->indel_low = hit5->indel_low + hit3->indel_low;
/* new->overlap_known_gene_p = false; -- initialized later when resolving multimappers */
@@ -11055,7 +11099,8 @@ Stage3pair_new (T hit5, T hit3, Univcoord_T *splicesites,
#if 0
if (new->low > new->high) {
- fprintf(stderr,"new->low %lu > new->high %lu, hit5->chrnum %d\n",new->low,new->high,hit5->chrnum);
+ fprintf(stderr,"new->low %llu > new->high %llu, hit5->chrnum %d\n",
+ (unsigned long long) new->low,(unsigned long long) new->high,hit5->chrnum);
abort();
}
#endif
@@ -11674,8 +11719,9 @@ Stage3pair_sort_distance (List_T hitpairlist) {
debug(
for (p = sorted, i = 0; p != NULL; p = p->rest, i++) {
hitpair = (Stage3pair_T) p->first;
- printf(" Final %d: %lu-%lu (dir = %d), insert length %u\n",
- i,hitpair->low,hitpair->high,hitpair->dir,hitpair->insertlength);
+ printf(" Final %d: %llu-%llu (dir = %d), insert length %u\n",
+ i,(unsigned long long) hitpair->low,(unsigned long long) hitpair->high,
+ hitpair->dir,hitpair->insertlength);
}
);
@@ -12857,11 +12903,11 @@ Stage3pair_optimal_score_aux (bool *eliminatedp, List_T hitpairlist, int cutoff_
hit5->genomicstart - hit5->chroffset,hit5->genomicend - hit5->chroffset,hittype_string(hit5->hittype),
hit5->trim_left,hit5->trim_left_splicep ? " (splice)" : "",
hit5->trim_right,hit5->trim_right_splicep ? " (splice)" : "",
- hit5->start_amb_nmatches,hit5->end_amb_nmatches,
+ hit5->start_amb_length,hit5->end_amb_length,
hit3->genomicstart - hit3->chroffset,hit3->genomicend - hit3->chroffset,hittype_string(hit3->hittype),
hit3->trim_left,hit3->trim_left_splicep ? " (splice)" : "",
hit3->trim_right,hit3->trim_right_splicep ? " (splice)" : "",
- hit3->start_amb_nmatches,hit3->end_amb_nmatches));
+ hit3->start_amb_length,hit3->end_amb_length));
if (hit5->hittype == TERMINAL) {
/* Don't allow terminals to set trims, because they don't attempt to extend to ends */
#if 0
@@ -12996,22 +13042,22 @@ Stage3pair_optimal_score_aux (bool *eliminatedp, List_T hitpairlist, int cutoff_
#endif
hit5->score_eventrim += Pair_nmismatches_region(&nindelbreaks,hit5->pairarray,hit5->npairs,
- trim_left_5,trim_right_5,hit5->start_amb_nmatches,hit5->end_amb_nmatches,
+ trim_left_5,trim_right_5,hit5->start_amb_length,hit5->end_amb_length,
hit5->querylength_adj);
debug6(printf(" add nmismatches %d.",Pair_nmismatches_region(&nindelbreaks,hit5->pairarray,hit5->npairs,
- trim_left_5,trim_right_5,hit5->start_amb_nmatches,hit5->end_amb_nmatches,
+ trim_left_5,trim_right_5,hit5->start_amb_length,hit5->end_amb_length,
hit5->querylength_adj)));
#ifdef SCORE_INDELS
hit5->score_eventrim += indel_penalty_middle * nindelbreaks;
debug6(printf(" add indelbreaks %d.",indel_penalty_middle * nindelbreaks));
#endif
if (hit5->start_amb_prob < 0.9) {
- hit5->score_eventrim += hit5->start_amb_nmatches / ambig_end_interval;
- debug6(printf(" add amb start %d/%d (prob %f).",hit5->start_amb_nmatches,ambig_end_interval,hit5->start_amb_prob));
+ hit5->score_eventrim += hit5->start_amb_length / ambig_end_interval;
+ debug6(printf(" add amb start %d/%d (prob %f).",hit5->start_amb_length,ambig_end_interval,hit5->start_amb_prob));
}
if (hit5->end_amb_prob < 0.9) {
- hit5->score_eventrim += hit5->end_amb_nmatches / ambig_end_interval;
- debug6(printf(" add amb end %d/%d (prob %f).",hit5->end_amb_nmatches,ambig_end_interval,hit5->end_amb_prob));
+ hit5->score_eventrim += hit5->end_amb_length / ambig_end_interval;
+ debug6(printf(" add amb end %d/%d (prob %f).",hit5->end_amb_length,ambig_end_interval,hit5->end_amb_prob));
}
debug6(printf(" RESULT: %d\n",hit5->score_eventrim));
@@ -13077,22 +13123,22 @@ Stage3pair_optimal_score_aux (bool *eliminatedp, List_T hitpairlist, int cutoff_
#endif
hit3->score_eventrim += Pair_nmismatches_region(&nindelbreaks,hit3->pairarray,hit3->npairs,
- trim_left_3,trim_right_3,hit3->start_amb_nmatches,hit3->end_amb_nmatches,
+ trim_left_3,trim_right_3,hit3->start_amb_length,hit3->end_amb_length,
hit3->querylength_adj);
debug6(printf(" add nmismatches %d.",Pair_nmismatches_region(&nindelbreaks,hit3->pairarray,hit3->npairs,
- trim_left_3,trim_right_3,hit3->start_amb_nmatches,hit3->end_amb_nmatches,
+ trim_left_3,trim_right_3,hit3->start_amb_length,hit3->end_amb_length,
hit3->querylength_adj)));
#ifdef SCORE_INDELS
hit3->score_eventrim += indel_penalty_middle * nindelbreaks;
debug6(printf(" add indelbreaks %d.",indel_penalty_middle * nindelbreaks));
#endif
if (hit3->start_amb_prob < 0.9) {
- hit3->score_eventrim += hit3->start_amb_nmatches / ambig_end_interval;
- debug6(printf(" add amb start %d/%d (prob %f).",hit3->start_amb_nmatches,ambig_end_interval,hit3->start_amb_prob));
+ hit3->score_eventrim += hit3->start_amb_length / ambig_end_interval;
+ debug6(printf(" add amb start %d/%d (prob %f).",hit3->start_amb_length,ambig_end_interval,hit3->start_amb_prob));
}
if (hit3->end_amb_prob < 0.9) {
- hit3->score_eventrim += hit3->end_amb_nmatches / ambig_end_interval;
- debug6(printf(" add amb end %d/%d (prob %f).",hit3->end_amb_nmatches,ambig_end_interval,hit3->end_amb_prob));
+ hit3->score_eventrim += hit3->end_amb_length / ambig_end_interval;
+ debug6(printf(" add amb end %d/%d (prob %f).",hit3->end_amb_length,ambig_end_interval,hit3->end_amb_prob));
}
debug6(printf(" RESULT: %d\n",hit3->score_eventrim));
} else {
@@ -13593,7 +13639,8 @@ pair_up_concordant_aux (bool *abort_pairing_p, int *found_score, int *nconcordan
} else if (SENSE_INCONSISTENT_P(hit5->sensedir,hit3->sensedir)) {
debug5(printf(" => sense inconsistent: %d | %d = %d",hit5->sensedir,hit3->sensedir,hit5->sensedir|hit3->sensedir));
} else if (hit3->genomicend < hit5->genomicstart) {
- debug5(printf(" => scramble because end3 %lu < start5 %lu\n",hit3->genomicend,hit5->genomicstart));
+ debug5(printf(" => scramble because end3 %llu < start5 %llu\n",
+ (unsigned long long) hit3->genomicend,(unsigned long long) hit5->genomicstart));
if (*nsamechr <= maxpairedpaths &&
(stage3pair = Stage3pair_new(Stage3end_copy(hit5),Stage3end_copy(hit3),splicesites,
query5_compress_fwd,query5_compress_rev,
@@ -13612,7 +13659,13 @@ pair_up_concordant_aux (bool *abort_pairing_p, int *found_score, int *nconcordan
/*pairtype*/CONCORDANT,splicing_penalty,
/*private5p*/false,/*private3p*/false,/*expect_concordant_p*/true)) != NULL) {
- if (pairscore < new_found_score) {
+ if (hit5->start_amb_length > 0 || hit5->end_amb_length > 0 ||
+ hit3->start_amb_length > 0 || hit3->end_amb_length > 0) {
+ /* Don't use ambiguous splices to update found_score*/
+ hitpairs = List_push(hitpairs,(void *) stage3pair);
+ (*nconcordant)++;
+
+ } else if (pairscore < new_found_score) {
new_found_score = pairscore;
debug5(printf(" => tentatively updating found_score to be %d",new_found_score));
hitpairs = List_push(hitpairs,(void *) stage3pair);
@@ -13702,7 +13755,8 @@ pair_up_concordant_aux (bool *abort_pairing_p, int *found_score, int *nconcordan
} else if (SENSE_INCONSISTENT_P(hit3->sensedir,hit5->sensedir)) {
debug5(printf(" => sense inconsistent: %d | %d = %d",hit5->sensedir,hit3->sensedir,hit5->sensedir|hit3->sensedir));
} else if (hit5->genomicstart < hit3->genomicend) {
- debug5(printf(" => scramble because start5 %lu < end3 %lu\n",hit5->genomicstart,hit3->genomicend));
+ debug5(printf(" => scramble because start5 %llu < end3 %llu\n",
+ (unsigned long long) hit5->genomicstart,(unsigned long long) hit3->genomicend));
if (*nsamechr <= maxpairedpaths &&
(stage3pair = Stage3pair_new(Stage3end_copy(hit5),Stage3end_copy(hit3),splicesites,
query5_compress_fwd,query5_compress_rev,
@@ -13721,7 +13775,11 @@ pair_up_concordant_aux (bool *abort_pairing_p, int *found_score, int *nconcordan
/*pairtype*/CONCORDANT,splicing_penalty,
/*private5p*/false,/*private3p*/false,/*expect_concordant_p*/true)) != NULL) {
- if (pairscore < new_found_score) {
+ if (hit5->start_amb_length > 0 || hit5->end_amb_length > 0 ||
+ hit3->start_amb_length > 0 || hit3->end_amb_length > 0) {
+ /* Don't use ambiguous splices to update found_score*/
+
+ } else if (pairscore < new_found_score) {
new_found_score = pairscore;
debug5(printf(" => updating new_found_score to be %d",new_found_score));
hitpairs = List_push(hitpairs,(void *) stage3pair);
diff --git a/src/stage3hr.h b/src/stage3hr.h
index a7cd35f..253276d 100644
--- a/src/stage3hr.h
+++ b/src/stage3hr.h
@@ -1,4 +1,4 @@
-/* $Id: stage3hr.h 148359 2014-09-19 22:09:34Z twu $ */
+/* $Id: stage3hr.h 154023 2014-11-25 03:45:18Z twu $ */
#ifndef STAGE3HR_INCLUDED
#define STAGE3HR_INCLUDED
@@ -216,9 +216,9 @@ Stage3end_start_ambiguous_p (T this);
extern bool
Stage3end_end_ambiguous_p (T this);
extern int
-Stage3end_amb_nmatches_start (T this);
+Stage3end_amb_length_start (T this);
extern int
-Stage3end_amb_nmatches_end (T this);
+Stage3end_amb_length_end (T this);
extern bool
Stage3end_gmap_triedp (T this);
@@ -273,7 +273,7 @@ Stage3pair_concordantp (List_T hitpairs);
extern List_T
Stage3pair_filter_nonconcordant (List_T hitpairs);
extern int
-Stage3pair_overlap (int *hardclip5, int *hardclip3, Stage3pair_T this);
+Stage3pair_overlap (int *hardclip5_low, int *hardclip5_high, int *hardclip3_low, int *hardclip3_high, Stage3pair_T this);
extern void
Stage3pair_set_private5p (Stage3pair_T this);
extern void
@@ -322,7 +322,7 @@ Stage3end_new_terminal (int querystart, int queryend, Univcoord_T left, Compress
extern T
Stage3end_new_splice (int *found_score, int donor_nmismatches, int acceptor_nmismatches,
Substring_T donor, Substring_T acceptor, Chrpos_T distance,
- bool shortdistancep, int splicing_penalty, int querylength, int amb_nmatches,
+ bool shortdistancep, int splicing_penalty, int querylength, int amb_length,
#ifdef LARGE_GENOMES
Uint8list_T ambcoords_donor, Uint8list_T ambcoords_acceptor,
#else
@@ -334,7 +334,7 @@ Stage3end_new_splice (int *found_score, int donor_nmismatches, int acceptor_nmis
bool first_read_p, int sensedir, bool sarrayp);
extern T
Stage3end_new_shortexon (int *found_score, Substring_T donor, Substring_T acceptor, Substring_T shortexon,
- int amb_nmatches_donor, int amb_nmatches_acceptor,
+ int amb_length_donor, int amb_length_acceptor,
#ifdef LARGE_GENOMES
Uint8list_T ambcoords_donor, Uint8list_T ambcoords_acceptor,
#else
@@ -425,7 +425,8 @@ Stage3pair_new (T hit5, T hit3, Univcoord_T *splicesites,
struct Pair_T *
Stage3pair_merge (int *npairs, int *querylength_merged, char **queryseq_merged, char **quality_merged,
Stage3pair_T this, Shortread_T queryseq5, Shortread_T queryseq3,
- int querylength5, int querylength3, int clipdir, int hardclip5, int hardclip3);
+ int querylength5, int querylength3, int clipdir,
+ int hardclip5_low, int hardclip5_high, int hardclip3_low, int hardclip3_high);
extern void
Stage3pair_privatize (Stage3pair_T *array, int npairs);
diff --git a/src/substring.c b/src/substring.c
index 1057ebb..acd84ae 100644
--- a/src/substring.c
+++ b/src/substring.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: substring.c 148865 2014-09-24 22:19:10Z twu $";
+static char rcsid[] = "$Id: substring.c 154023 2014-11-25 03:45:18Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -1154,12 +1154,13 @@ Substring_overlap_point_trimmed_p (T substring, Univcoord_T endpos) {
if (substring->plusp == true) {
low = substring->alignstart_trim;
high = substring->alignend_trim;
+ debug3(printf("Checking overlap between plus %u..%u and %u",low,high,endpos));
} else {
low = substring->alignend_trim;
high = substring->alignstart_trim;
+ debug3(printf("Checking overlap between minus %u..%u and %u",low,high,endpos));
}
- debug3(printf("Checking overlap between %u..%u and %u",low,high,endpos));
if (endpos < low) {
debug3(printf(" => no because %u < %u\n",endpos,low));
@@ -1174,8 +1175,9 @@ Substring_overlap_point_trimmed_p (T substring, Univcoord_T endpos) {
}
-bool
-Substring_overlap_segment_trimmed_p (T substring1, T substring2) {
+Univcoord_T
+Substring_overlap_segment_trimmed (T substring1, T substring2) {
+ Univcoord_T maxlow, minhigh;
Univcoord_T low1, high1, low2, high2;
if (substring1->plusp == true) {
@@ -1198,13 +1200,16 @@ Substring_overlap_segment_trimmed_p (T substring1, T substring2) {
if (high2 < low1) {
debug3(printf(" => no because %u < %u\n",high2,low1));
- return false;
+ return 0;
} else if (low2 > high1) {
debug3(printf(" => no because %u > %u\n",low2,high1));
- return false;
+ return 0;
} else {
- debug3(printf(" => yes\n"));
- return true;
+ maxlow = (low1 > low2) ? low1 : low2;
+ minhigh = (high1 < high2) ? high1 : high2;
+ debug3(printf(" => yes. maxlow %llu, minhigh %llu. returning %llu\n",
+ maxlow,minhigh,maxlow + (minhigh - maxlow)/2));
+ return maxlow + (minhigh - maxlow)/2;
}
}
@@ -2219,6 +2224,18 @@ Substring_alignend_trim (T this) {
}
+#if 0
+Univcoord_T
+Substring_alignmid_trim (T this) {
+ if (this->alignend_trim > this->alignstart_trim) {
+ return this->alignstart_trim + (this->alignend_trim - this->alignstart_trim)/2;
+ } else {
+ return this->alignend_trim + (this->alignstart_trim - this->alignend_trim)/2;
+ }
+}
+#endif
+
+
Univcoord_T
Substring_left_genomicseg (T this) {
return this->left_genomicseg;
@@ -4211,35 +4228,15 @@ Substring_count_mismatches_region (T this, int trim_left, int trim_right,
List_T
Substring_convert_to_pairs (List_T pairs, T substring, Shortread_T queryseq,
- int clipdir, int hardclip, bool first_read_p, int queryseq_offset) {
+ int clipdir, int hardclip_low, int hardclip_high,
+ bool first_read_p, int queryseq_offset) {
int querystart, queryend, querypos, i;
- int hardclip_low, hardclip_high;
Chrpos_T chrpos;
char *seq1;
char genome;
if (substring == NULL) {
return pairs;
-
- } else if (first_read_p == true) {
- debug6(printf("first read\n"));
- if (clipdir >= 0) {
- hardclip_low = 0;
- hardclip_high = hardclip; /* hardclip5 */
- } else {
- hardclip_low = hardclip; /* hardclip5 */
- hardclip_high = 0;
- }
-
- } else {
- debug6(printf("second read\n"));
- if (clipdir >= 0) {
- hardclip_low = hardclip; /* hardclip3 */
- hardclip_high = 0;
- } else {
- hardclip_low = 0;
- hardclip_high = hardclip; /* hardclip3 */
- }
}
@@ -4341,33 +4338,12 @@ Substring_convert_to_pairs (List_T pairs, T substring, Shortread_T queryseq,
List_T
Substring_add_insertion (List_T pairs, T substringA, T substringB, int insertionlength, Shortread_T queryseq,
- int clipdir, int hardclip, bool first_read_p, int queryseq_offset) {
+ int clipdir, int hardclip_low, int hardclip_high, bool first_read_p, int queryseq_offset) {
int querystartA, queryendA, querystartB, queryendB, querypos, i;
- int hardclip_low, hardclip_high;
Chrpos_T chrendA;
char *seq1;
- if (first_read_p == true) {
- if (clipdir >= 0) {
- hardclip_low = 0;
- hardclip_high = hardclip; /* hardclip5 */
- } else {
- hardclip_low = hardclip; /* hardclip5 */
- hardclip_high = 0;
- }
-
- } else {
- if (clipdir >= 0) {
- hardclip_low = hardclip; /* hardclip3 */
- hardclip_high = 0;
- } else {
- hardclip_low = 0;
- hardclip_high = hardclip; /* hardclip3 */
- }
- }
-
-
if (substringA->plusp == true) {
if (hardclip_low > substringA->querystart) {
querystartA = hardclip_low;
@@ -4439,32 +4415,10 @@ Substring_add_insertion (List_T pairs, T substringA, T substringB, int insertion
List_T
Substring_add_deletion (List_T pairs, T substringA, T substringB, char *deletion, int deletionlength,
- int clipdir, int hardclip, bool first_read_p, int queryseq_offset) {
+ int clipdir, int hardclip_low, int hardclip_high, bool first_read_p, int queryseq_offset) {
int querystartA, queryendA, querystartB, queryendB, k;
- int hardclip_low, hardclip_high;
Chrpos_T chrendA;
-
- if (first_read_p == true) {
- if (clipdir >= 0) {
- hardclip_low = 0;
- hardclip_high = hardclip; /* hardclip5 */
- } else {
- hardclip_low = hardclip; /* hardclip5 */
- hardclip_high = 0;
- }
-
- } else {
- if (clipdir >= 0) {
- hardclip_low = hardclip; /* hardclip3 */
- hardclip_high = 0;
- } else {
- hardclip_low = 0;
- hardclip_high = hardclip; /* hardclip3 */
- }
- }
-
-
if (substringA->plusp == true) {
if (hardclip_low > substringA->querystart) {
querystartA = hardclip_low;
@@ -4543,32 +4497,11 @@ Substring_add_deletion (List_T pairs, T substringA, T substringB, char *deletion
List_T
Substring_add_intron (List_T pairs, T substringA, T substringB,
- int clipdir, int hardclip, bool first_read_p, int queryseq_offset) {
+ int clipdir, int hardclip_low, int hardclip_high,
+ bool first_read_p, int queryseq_offset) {
int querystartA, queryendA, querystartB, queryendB;
- int hardclip_low, hardclip_high;
Chrpos_T chrendA;
-
- if (first_read_p == true) {
- if (clipdir >= 0) {
- hardclip_low = 0;
- hardclip_high = hardclip; /* hardclip5 */
- } else {
- hardclip_low = hardclip; /* hardclip5 */
- hardclip_high = 0;
- }
-
- } else {
- if (clipdir >= 0) {
- hardclip_low = hardclip; /* hardclip3 */
- hardclip_high = 0;
- } else {
- hardclip_low = 0;
- hardclip_high = hardclip; /* hardclip3 */
- }
- }
-
-
if (substringA->plusp == true) {
if (hardclip_low > substringA->querystart) {
querystartA = hardclip_low;
diff --git a/src/substring.h b/src/substring.h
index 47f10bc..6b10158 100644
--- a/src/substring.h
+++ b/src/substring.h
@@ -1,4 +1,4 @@
-/* $Id: substring.h 148359 2014-09-19 22:09:34Z twu $ */
+/* $Id: substring.h 154023 2014-11-25 03:45:18Z twu $ */
#ifndef SUBSTRING_INCLUDED
#define SUBSTRING_INCLUDED
@@ -73,8 +73,8 @@ extern Chrpos_T
Substring_insert_length (T substring5, T substring3);
extern bool
Substring_overlap_point_trimmed_p (T substring, Univcoord_T endpos);
-extern bool
-Substring_overlap_segment_trimmed_p (T substring1, T substring2);
+extern Univcoord_T
+Substring_overlap_segment_trimmed (T substring1, T substring2);
extern Univcoord_T
Substring_splicecoord (T this);
@@ -293,16 +293,16 @@ Substring_count_mismatches_region (T this, int trim_left, int trim_right,
extern List_T
Substring_convert_to_pairs (List_T pairs, T substring, Shortread_T queryseq,
- int clipdir, int hardclip, bool first_read_p, int queryseq_offset);
+ int clipdir, int hardclip_low, int hardclip_high, bool first_read_p, int queryseq_offset);
extern List_T
Substring_add_insertion (List_T pairs, T substringA, T substringB, int insertionlength, Shortread_T queryseq,
- int clipdir, int hardclip, bool first_read_p, int queryseq_offset);
+ int clipdir, int hardclip_low, int hardclip_high, bool first_read_p, int queryseq_offset);
extern List_T
Substring_add_deletion (List_T pairs, T substringA, T substringB, char *deletion, int deletionlength,
- int clipdir, int hardclip, bool first_read_p, int queryseq_offset);
+ int clipdir, int hardclip_low, int hardclip_high, bool first_read_p, int queryseq_offset);
extern List_T
Substring_add_intron (List_T pairs, T substringA, T substringB,
- int clipdir, int hardclip, bool first_read_p, int queryseq_offset);
+ int clipdir, int hardclip_low, int hardclip_high, bool first_read_p, int queryseq_offset);
#undef T
#endif
diff --git a/src/table.c b/src/table.c
index 0735e7e..0ac6e44 100644
--- a/src/table.c
+++ b/src/table.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: table.c 115432 2013-11-18 18:21:03Z twu $";
+static char rcsid[] = "$Id: table.c 153955 2014-11-24 17:54:45Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -94,13 +94,23 @@ Table_get (T table, const void *key) {
assert(table);
/* assert(key); -- Doesn't hold for atomic 0 */
i = (*table->hash)(key)%table->size;
- /* printf("Doing Table_get on %s at bucket %d\n",(char *) key, i); */
+ /* fprintf(stderr,"Doing Table_get on %p (%s) at bucket %d\n",key,(char *) key, i); */
for (p = table->buckets[i]; p; p = p->link) {
- /* printf(" Comparing %s with %s at %p, key = %p\n",(char *) key, (char *) p->key, p, p->key); */
+ /* fprintf(stderr," Comparing keys %p and %p\n",key,p->key); */
if ((*table->cmp)(key, p->key) == 0) {
+ /* fprintf(stderr,"Success: keys are identical\n"); */
break;
}
}
+
+#if 0
+ if (p == NULL) {
+ fprintf(stderr,"p is NULL\n");
+ } else {
+ fprintf(stderr,"Found p with value %p\n",p->value);
+ }
+#endif
+
return p ? p->value : NULL;
}
@@ -110,6 +120,8 @@ Table_put (T table, const void *key, void *value) {
struct binding *p;
void *prev;
+ /* fprintf(stderr,"Doing Table_put of key = %p (%s), value %p\n",key,(char *) key,value); */
+
assert(table);
/* assert(key); -- Doesn't hold for atomic 0 */
i = (*table->hash)(key)%table->size;
@@ -119,6 +131,7 @@ Table_put (T table, const void *key, void *value) {
}
}
if (p == NULL) {
+ /* fprintf(stderr,"New entry\n"); */
NEW(p);
p->key = key;
/* printf("Doing Table_put at %p, key = %p\n",p,p->key); */
@@ -127,6 +140,7 @@ Table_put (T table, const void *key, void *value) {
table->length++;
prev = NULL;
} else {
+ /* fprintf(stderr,"Existing entry\n"); */
prev = p->value;
}
p->value = value;
diff --git a/src/tableuint8.c b/src/tableuint8.c
index 6e11dab..3dbbfbc 100644
--- a/src/tableuint8.c
+++ b/src/tableuint8.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: tableuint8.c 99737 2013-06-27 19:33:03Z twu $";
+static char rcsid[] = "$Id: tableuint8.c 153955 2014-11-24 17:54:45Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -73,13 +73,23 @@ Tableuint8_get (T table, const void *key) {
assert(table);
/* assert(key); -- Doesn't hold for atomic 0 */
i = (*table->hash)(key)%table->size;
- /* printf("Doing Tableuint8_get on %s at bucket %d\n",(char *) key, i); */
+ /* fprintf(stderr,"Doing Tableuint8_get on %p at bucket %d\n",(char *) key, i); */
for (p = table->buckets[i]; p; p = p->link) {
- /* printf(" Comparing %s with %s at %p, key = %p\n",(char *) key, (char *) p->key, p, p->key); */
+ /* fprintf(stderr," Comparing keys %p and %p\n",key,p->key); */
if ((*table->cmp)(key, p->key) == 0) {
+ /* fprintf(stderr,"Success: keys are identical\n"); */
break;
}
}
+
+#if 0
+ if (p == NULL) {
+ fprintf(stderr,"p is NULL\n");
+ } else {
+ fprintf(stderr,"Found p with value %llu\n",p->value);
+ }
+#endif
+
return p ? p->value : 0;
}
@@ -89,6 +99,8 @@ Tableuint8_put (T table, const void *key, UINT8 value) {
struct binding *p;
UINT8 prev;
+ /* fprintf(stderr,"Doing Tableuint8_put of key = %p, value %llu\n",key,value); */
+
assert(table);
/* assert(key); -- Doesn't hold for atomic 0 */
i = (*table->hash)(key)%table->size;
@@ -98,14 +110,15 @@ Tableuint8_put (T table, const void *key, UINT8 value) {
}
}
if (p == NULL) {
+ /* fprintf(stderr,"New entry\n"); */
NEW(p);
p->key = key;
- /* printf("Doing Tableuint8_put at %p, key = %p\n",p,p->key); */
p->link = table->buckets[i];
table->buckets[i] = p;
table->length++;
prev = 0;
} else {
+ /* fprintf(stderr,"Existing entry\n"); */
prev = p->value;
}
p->value = value;
diff --git a/src/uniqscan.c b/src/uniqscan.c
index 505a992..5fb7c85 100644
--- a/src/uniqscan.c
+++ b/src/uniqscan.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: uniqscan.c 149319 2014-09-30 02:15:42Z twu $";
+static char rcsid[] = "$Id: uniqscan.c 153955 2014-11-24 17:54:45Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -365,8 +365,8 @@ print_program_version () {
#endif
fprintf(stdout,"\n");
- fprintf(stdout,"Sizes: off_t (%lu), size_t (%lu), unsigned int (%lu), long int (%lu)\n",
- sizeof(off_t),sizeof(size_t),sizeof(unsigned int),sizeof(long int));
+ fprintf(stdout,"Sizes: off_t (%d), size_t (%d), unsigned int (%d), long int (%d), long long int (%d)\n",
+ (int) sizeof(off_t),(int) sizeof(size_t),(int) sizeof(unsigned int),(int) sizeof(long int),(int) sizeof(long long int));
fprintf(stdout,"Default gmap directory: %s\n",GMAPDB);
fprintf(stdout,"Maximum read length: %d\n",MAX_READLENGTH);
fprintf(stdout,"Thomas D. Wu, Genentech, Inc.\n");
@@ -1226,7 +1226,8 @@ main (int argc, char *argv[]) {
Pair_setup(trim_mismatch_score,trim_indel_score,/*sam_insert_0M_p*/false,
/*force_xs_direction_p*/false,/*md_lowercase_variant_p*/false,
/*snps_p*/snps_iit ? true : false,
- Univ_IIT_genomelength(chromosome_iit,/*with_circular_alias*/false));
+ Univ_IIT_genomelength(chromosome_iit,/*with_circular_alias*/false),
+ /*cigar_action*/CIGAR_ACTION_IGNORE);
Stage3_setup(/*splicingp*/novelsplicingp == true || knownsplicingp == true,novelsplicingp,
/*require_splicedir_p*/false,splicing_iit,splicing_divint_crosstable,
donor_typeint,acceptor_typeint,
diff --git a/src/univinterval.c b/src/univinterval.c
index 1f4072c..87a8dcd 100644
--- a/src/univinterval.c
+++ b/src/univinterval.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: univinterval.c 102893 2013-07-25 22:11:12Z twu $";
+static char rcsid[] = "$Id: univinterval.c 153955 2014-11-24 17:54:45Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -68,7 +68,7 @@ Univinterval_table_free (void **x) {
void
Univinterval_print (T this) {
- printf("%lu %lu %d",this->low,this->high,this->type);
+ printf("%llu %llu %d",(unsigned long long) this->low,(unsigned long long) this->high,this->type);
return;
}
@@ -211,7 +211,8 @@ Univinterval_cmp (const void *a, const void *b) {
T x = * (T *) a;
T y = * (T *) b;
- debug(printf("Comparing %lu..%lu with %lu..%lu => ",x->low,x->high,y->low,y->high));
+ debug(printf("Comparing %llu..%llu with %llu..%llu => ",
+ (unsigned long long) x->low,(unsigned long long) x->high,(unsigned long long) y->low,(unsigned long long) y->high));
if (x->low < y->low) {
debug(printf("-1\n"));
return -1;
@@ -242,7 +243,8 @@ Univinterval_cmp_low (const void *a, const void *b) {
T x = * (T *) a;
T y = * (T *) b;
- debug(printf("Comparing %lu..%lu with %lu..%lu => ",x->low,x->high,y->low,y->high));
+ debug(printf("Comparing %llu..%llu with %llu..%llu => ",
+ (unsigned long long) x->low,(unsigned long long) x->high,(unsigned long long) y->low,(unsigned long long) y->high));
if (x->low < y->low) {
debug(printf("-1\n"));
return -1;
@@ -267,7 +269,8 @@ Univinterval_cmp_high (const void *a, const void *b) {
T x = * (T *) a;
T y = * (T *) b;
- debug(printf("Comparing %lu..%lu with %lu..%lu => ",x->low,x->high,y->low,y->high));
+ debug(printf("Comparing %llu..%llu with %llu..%llu => ",
+ (unsigned long long) x->low,(unsigned long long) x->high,(unsigned long long) y->low,(unsigned long long) y->high));
if (x->high < y->high) {
debug(printf("-1\n"));
return -1;
diff --git a/tests/Makefile.in b/tests/Makefile.in
index 5306c8e..6277d56 100644
--- a/tests/Makefile.in
+++ b/tests/Makefile.in
@@ -49,6 +49,7 @@ am__aclocal_m4_deps = $(top_srcdir)/config/libtool.m4 \
$(top_srcdir)/config/mmap-flags.m4 \
$(top_srcdir)/config/acx_mmap_fixed.m4 \
$(top_srcdir)/config/acx_mmap_variable.m4 \
+ $(top_srcdir)/config/ax_mpi.m4 \
$(top_srcdir)/config/acx_pthread.m4 \
$(top_srcdir)/config/builtin-popcount.m4 \
$(top_srcdir)/config/struct-stat64.m4 \
@@ -117,6 +118,8 @@ LTLIBOBJS = @LTLIBOBJS@
MAKEINFO = @MAKEINFO@
MAX_READLENGTH = @MAX_READLENGTH@
MKDIR_P = @MKDIR_P@
+MPICC = @MPICC@
+MPILIBS = @MPILIBS@
NM = @NM@
NMEDIT = @NMEDIT@
OBJDUMP = @OBJDUMP@
diff --git a/util/Makefile.in b/util/Makefile.in
index 190fe80..bf4f035 100644
--- a/util/Makefile.in
+++ b/util/Makefile.in
@@ -58,6 +58,7 @@ am__aclocal_m4_deps = $(top_srcdir)/config/libtool.m4 \
$(top_srcdir)/config/mmap-flags.m4 \
$(top_srcdir)/config/acx_mmap_fixed.m4 \
$(top_srcdir)/config/acx_mmap_variable.m4 \
+ $(top_srcdir)/config/ax_mpi.m4 \
$(top_srcdir)/config/acx_pthread.m4 \
$(top_srcdir)/config/builtin-popcount.m4 \
$(top_srcdir)/config/struct-stat64.m4 \
@@ -152,6 +153,8 @@ LTLIBOBJS = @LTLIBOBJS@
MAKEINFO = @MAKEINFO@
MAX_READLENGTH = @MAX_READLENGTH@
MKDIR_P = @MKDIR_P@
+MPICC = @MPICC@
+MPILIBS = @MPILIBS@
NM = @NM@
NMEDIT = @NMEDIT@
OBJDUMP = @OBJDUMP@
diff --git a/util/gmap_build.pl.in b/util/gmap_build.pl.in
index 522ab8b..a03d8bd 100644
--- a/util/gmap_build.pl.in
+++ b/util/gmap_build.pl.in
@@ -1,5 +1,5 @@
#! @PERL@
-# $Id: gmap_build.pl.in 150410 2014-10-09 21:56:30Z twu $
+# $Id: gmap_build.pl.in 153958 2014-11-24 17:56:42Z twu $
use warnings;
@@ -69,6 +69,8 @@ if (!defined($dbname)) {
$dbdir = $1;
$dbname = $2;
if (defined($destdir) && $destdir =~ /\S/) {
+ # Note: The -D and -F arguments to gmapindex are different from the -D argument to gmap/gsnap.
+ # For gmapindex, we use -D /path/to/dir/dbname -d dbname. For gmap/gsnap, we use -D /path/to/dir -d dbname.
$destdir = $destdir . "/" . $dbname;
} else {
$destdir = $dbdir;
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/gmap.git
More information about the debian-med-commit
mailing list