[med-svn] [gmap] 01/04: Imported Upstream version 2016-07-11.v2
Alex Mestiashvili
malex-guest at moszumanska.debian.org
Fri Jul 22 08:30:54 UTC 2016
This is an automated email from the git hooks/post-receive script.
malex-guest pushed a commit to branch master
in repository gmap.
commit f9bd6b92ebf41209793525fffa97ac53053fe6a7
Author: Alexandre Mestiashvili <alex at biotec.tu-dresden.de>
Date: Fri Jul 22 08:43:16 2016 +0200
Imported Upstream version 2016-07-11.v2
---
ChangeLog | 109 +++
Makefile.in | 2 +-
TODO | 3 -
VERSION | 2 +-
configure | 56 +-
configure.ac | 21 +-
src/ChangeLog | 0
src/Makefile.am | 4 +-
src/Makefile.in | 129 ++-
src/compile | 165 ----
src/doublelist.c | 18 +-
src/doublelist.h | 4 +-
src/filestring.c | 209 ++++-
src/gmap.c | 24 +-
src/gsnap.c | 12 +-
src/indel.c | 8 +-
src/intlist.c | 19 +-
src/intlist.h | 4 +-
src/pair.c | 99 +-
src/pair.h | 4 +-
src/resulthr.c | 10 +-
src/resulthr.h | 5 +-
src/samprint.c | 19 +-
src/sarray-read.c | 387 +++++++-
src/sedgesort.c | 203 +++++
src/sedgesort.h | 9 +
src/shortread.c | 170 +++-
src/splice.c | 21 +-
src/stage1hr.c | 2636 ++++++++++++++++++++++++++++++++++++++++++-----------
src/stage3.c | 11 +-
src/stage3.h | 4 +-
src/stage3hr.c | 437 ++++++---
src/stage3hr.h | 4 +-
src/substring.c | 71 +-
src/substring.h | 4 +-
src/uintlist.c | 17 +-
src/uintlist.h | 4 +-
src/uniqscan.c | 6 +-
src/univdiag.c | 22 +
src/univdiag.h | 4 +-
40 files changed, 3823 insertions(+), 1113 deletions(-)
diff --git a/ChangeLog b/ChangeLog
index e8ec9be..6abfd3f 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,112 @@
+2016-07-12 twu
+
+ * 2016-07-01-better-triage, archive.html, doublelist.c, doublelist.h,
+ gmap.c, gsnap.c, intlist.c, intlist.h, sarray-read.c, src, stage1hr.c,
+ stage3.c, stage3.h, stage3hr.c, substring.c, uintlist.c, uintlist.h,
+ uniqscan.c: Merged revisions from trunk
+
+ * config.site.rescomp.prd, configure.ac: Added -fomit-frame-pointer
+
+ * univdiag.c, univdiag.h: Implemented Univdiag_diagonal_cmp
+
+ * substring.c: Using new format types for filestrings
+
+ * stage1hr.c: Separated code for heaps and loser trees. Reverted to using
+ heaps.
+
+ * shortread.c: Using new format types for filestrings
+
+ * sarray-read.c: Using Sedgewick sort. For left and right diagonals, trying
+ to consolidate segments on the same diagonal
+
+ * samprint.c: Consolidated some calls to Filestring_put
+
+ * gsnap.c: Making --end-detail=low the default
+
+ * stage3.c: Restored backwards motion of ptr
+
+ * stage3hr.h: Added Stage3pair_determine_pairtype
+
+ * stage3hr.c: Changed trim threshold for paired terminals from 15 to 8.
+ Solving ends if the trims are 30 or more, regardless of --end-detail
+ value.
+
+ * pair.c: Added debugging macros for hiding method information
+
+ * filestring.c: Made transfer_string routine more efficient. Added ability
+ to transfer strings in reverse and reverse complement format.
+
+ * Makefile.gsnaptoo.am, sedgesort.c, sedgesort.h: Added Sedgewick sort
+ routines
+
+2016-07-06 twu
+
+ * stage1hr.c: Replaced heap in spanningset algorithm with a loser tree.
+ Allowing different anchor length criteria for different mods. Changing
+ paired result priorities so concordant results take priority over
+ concordant terminals, which then take priority over samechr and
+ translocations.
+
+2016-07-01 twu
+
+ * substring.c, substring.h: Defining Substring_nmismatches_region
+
+ * stage3hr.c: Using max_trim instead of total_trim to determine whether a
+ pair is a concordant terminal. Revised criteria for performing GMAP
+ algorithm on middle part of substrings, now based on mismatches in any
+ substring region being greater than 2.
+
+ * stage1hr.c: Revised criteria for performing spanningset and completeset
+ algorithms. Spanningset run if a terminal end has total trim > 15.
+ Completeset run if a terminal score is greater than done_level.
+
+ * sarray-read.c: Ignoring value of nmisses_allowed, thereby allowing more
+ solutions to come from the suffix array algorithm
+
+ * pair.c, stage3.c: Restored backwards movement in processing pairs, but
+ making an exception when we reach the last pair
+
+2016-06-30 twu
+
+ * 2016-07-01-better-triage: Created branch to triage better for GSNAP or
+ GMAP
+
+ * pair.c, stage3.c: No longer going backward after an indel, which could
+ cause an infinite loop
+
+ * splice.c: Using looser criteria for accepting a splice
+
+ * sarray-read.c: Revising previous number of mismatches instead of replacing
+ it
+
+ * substring.c: Using correct memory category for substrings
+
+ * stage3hr.c: Penalizing bad introns
+
+ * pair.c, pair.h: Pair_nmismatches_region returning number of bad introns
+
+ * indel.c: Improved debugging statements
+
+ * gmap.c: Including -K for backward compatibility
+
+ * stage1hr.c: Merged revision 193193 from branches/2016-06-29-add-listpool
+ to change from lists to a vector for anchor_segments
+
+2016-06-29 twu
+
+ * resulthr.c, resulthr.h: Added UNPAIRED_TERMINALS result type
+
+ * stage1hr.c: Handling unpaired_terminals. Consolidated memory allocation
+ for plus and minus cases in Stage1hr_T object.
+
+2016-06-21 twu
+
+ * shortread.c: Made fixes for --force-single-end option to work properly
+
+2016-06-15 twu
+
+ * configure.ac: Added provision for user-selected SIMD level
+
2016-06-09 twu
* cpuid.c: Providing more detailed information from standalone program
diff --git a/Makefile.in b/Makefile.in
index a43454f..6d532b2 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -184,7 +184,7 @@ am__DIST_COMMON = $(srcdir)/Makefile.in $(top_srcdir)/config/compile \
$(top_srcdir)/config/config.sub \
$(top_srcdir)/config/install-sh $(top_srcdir)/config/ltmain.sh \
$(top_srcdir)/config/missing AUTHORS COPYING ChangeLog INSTALL \
- NEWS README TODO config/compile config/config.guess \
+ NEWS README config/compile config/config.guess \
config/config.sub config/install-sh config/ltmain.sh \
config/missing
DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
diff --git a/TODO b/TODO
deleted file mode 100644
index c5a7bd7..0000000
--- a/TODO
+++ /dev/null
@@ -1,3 +0,0 @@
-
-Add flag that allows for splitting afterwards.
-
diff --git a/VERSION b/VERSION
index 765dd15..d04a41d 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-2016-06-09
\ No newline at end of file
+2016-07-11
\ No newline at end of file
diff --git a/configure b/configure
index 3cd3025..54ac889 100755
--- a/configure
+++ b/configure
@@ -1,6 +1,6 @@
#! /bin/sh
# Guess values for system-dependent variables and create Makefiles.
-# Generated by GNU Autoconf 2.69 for gmap 2016-06-09.
+# Generated by GNU Autoconf 2.69 for gmap 2016-07-11.
#
# Report bugs to <Thomas Wu <twu at gene.com>>.
#
@@ -590,8 +590,8 @@ MAKEFLAGS=
# Identity of this package.
PACKAGE_NAME='gmap'
PACKAGE_TARNAME='gmap'
-PACKAGE_VERSION='2016-06-09'
-PACKAGE_STRING='gmap 2016-06-09'
+PACKAGE_VERSION='2016-07-11'
+PACKAGE_STRING='gmap 2016-07-11'
PACKAGE_BUGREPORT='Thomas Wu <twu at gene.com>'
PACKAGE_URL=''
@@ -810,6 +810,7 @@ enable_pthreads
enable_alloca
enable_mmap
enable_simd
+with_simd_level
with_gmapdb
enable_zlib
enable_bzlib
@@ -1367,7 +1368,7 @@ if test "$ac_init_help" = "long"; then
# Omit some internal or obsolete options to make the list less imposing.
# This message is too long to be a string in the A/UX 3.1 sh.
cat <<_ACEOF
-\`configure' configures gmap 2016-06-09 to adapt to many kinds of systems.
+\`configure' configures gmap 2016-07-11 to adapt to many kinds of systems.
Usage: $0 [OPTION]... [VAR=VALUE]...
@@ -1438,7 +1439,7 @@ fi
if test -n "$ac_init_help"; then
case $ac_init_help in
- short | recursive ) echo "Configuration of gmap 2016-06-09:";;
+ short | recursive ) echo "Configuration of gmap 2016-07-11:";;
esac
cat <<\_ACEOF
@@ -1486,10 +1487,13 @@ Optional Packages:
--with-gnu-ld assume the C compiler uses GNU ld [default=no]
--with-sysroot[=DIR] Search for dependent libraries within DIR (or the
compiler's sysroot if not specified).
+ --with-simd-level=STRING
+ User-selected SIMD level (sse2, ssse3, sse41, sse42,
+ avx2)
--with-gmapdb=DIR Default GMAP database directory
Some influential environment variables:
- CFLAGS Compiler flags (default: -O3)
+ CFLAGS Compiler flags (default: -O3 -fomit-frame-pointer)
MPI_CFLAGS Compiler flags (default: -O3)
CC C compiler command
LDFLAGS linker flags, e.g. -L<lib dir> if you have libraries in a
@@ -1570,7 +1574,7 @@ fi
test -n "$ac_init_help" && exit $ac_status
if $ac_init_version; then
cat <<\_ACEOF
-gmap configure 2016-06-09
+gmap configure 2016-07-11
generated by GNU Autoconf 2.69
Copyright (C) 2012 Free Software Foundation, Inc.
@@ -2176,7 +2180,7 @@ cat >config.log <<_ACEOF
This file contains any messages produced by compilers while
running configure, to aid debugging if configure makes a mistake.
-It was created by gmap $as_me 2016-06-09, which was
+It was created by gmap $as_me 2016-07-11, which was
generated by GNU Autoconf 2.69. Invocation command line was
$ $0 $@
@@ -2526,8 +2530,8 @@ ac_compiler_gnu=$ac_cv_c_compiler_gnu
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking package version" >&5
$as_echo_n "checking package version... " >&6; }
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: 2016-06-09" >&5
-$as_echo "2016-06-09" >&6; }
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: 2016-07-11" >&5
+$as_echo "2016-07-11" >&6; }
### Read defaults
@@ -2577,11 +2581,11 @@ fi
$as_echo_n "checking CFLAGS... " >&6; }
if test x"$CFLAGS" = x; then
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: not set by user so using default -O3" >&5
-$as_echo "not set by user so using default -O3" >&6; }
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: not set by user so using default -O3 -fomit-frame-pointer" >&5
+$as_echo "not set by user so using default -O3 -fomit-frame-pointer" >&6; }
EXP_VAR=CFLAGS
- FROM_VAR='-O3'
+ FROM_VAR='-O3 -fomit-frame-pointer'
prefix_save=$prefix
exec_prefix_save=$exec_prefix
@@ -4392,7 +4396,7 @@ fi
# Define the identity of the package.
PACKAGE='gmap'
- VERSION='2016-06-09'
+ VERSION='2016-07-11'
cat >>confdefs.h <<_ACEOF
@@ -18803,6 +18807,26 @@ else
compile_level=none
fi
+
+# User-selected compile level
+
+# Check whether --with-simd-level was given.
+if test "${with_simd_level+set}" = set; then :
+ withval=$with_simd_level; answer="$withval"
+else
+ answer=""
+fi
+
+if test x"$answer" != x; then
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking for user-selected SIMD level" >&5
+$as_echo_n "checking for user-selected SIMD level... " >&6; }
+ compile_level=$answer
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: $compile_level" >&5
+$as_echo "$compile_level" >&6; }
+fi
+
+
+
if test "$compile_level" = avx2; then
MAKE_AVX2_TRUE=
MAKE_AVX2_FALSE='#'
@@ -20048,7 +20072,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
# report actual input values of CONFIG_FILES etc. instead of their
# values after options handling.
ac_log="
-This file was extended by gmap $as_me 2016-06-09, which was
+This file was extended by gmap $as_me 2016-07-11, which was
generated by GNU Autoconf 2.69. Invocation command line was
CONFIG_FILES = $CONFIG_FILES
@@ -20114,7 +20138,7 @@ _ACEOF
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
ac_cs_version="\\
-gmap config.status 2016-06-09
+gmap config.status 2016-07-11
configured by $0, generated by GNU Autoconf 2.69,
with options \\"\$ac_cs_config\\"
diff --git a/configure.ac b/configure.ac
index c92fe71..e991099 100644
--- a/configure.ac
+++ b/configure.ac
@@ -65,10 +65,10 @@ fi
# Set default CFLAGS if not already set by user
AC_MSG_CHECKING(CFLAGS)
-AC_ARG_VAR([CFLAGS], [Compiler flags (default: -O3)])
+AC_ARG_VAR([CFLAGS], [Compiler flags (default: -O3 -fomit-frame-pointer)])
if test x"$CFLAGS" = x; then
- AC_MSG_RESULT(not set by user so using default -O3)
- ACX_EXPAND(CFLAGS,'-O3')
+ AC_MSG_RESULT(not set by user so using default -O3 -fomit-frame-pointer)
+ ACX_EXPAND(CFLAGS,'-O3 -fomit-frame-pointer')
else
AC_MSG_RESULT($CFLAGS)
fi
@@ -554,6 +554,21 @@ else
compile_level=none
fi
+
+# User-selected compile level
+AC_ARG_WITH([simd-level],
+ AC_HELP_STRING([--with-simd-level=STRING],
+ [User-selected SIMD level (sse2, ssse3, sse41, sse42, avx2)]),
+ [answer="$withval"],
+ [answer=""])
+if test x"$answer" != x; then
+ AC_MSG_CHECKING(for user-selected SIMD level)
+ compile_level=$answer
+ AC_MSG_RESULT($compile_level)
+fi
+
+
+
AM_CONDITIONAL(MAKE_AVX2,[test "$compile_level" = avx2])
AM_CONDITIONAL(MAKE_SSE4_2,[test "$compile_level" = sse42])
AM_CONDITIONAL(MAKE_SSE4_1,[test "$compile_level" = sse41])
diff --git a/src/ChangeLog b/src/ChangeLog
deleted file mode 100644
index e69de29..0000000
diff --git a/src/Makefile.am b/src/Makefile.am
index c38ec89..0b9f209 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -271,7 +271,7 @@ GSNAP_FILES = fopen.h bool.h types.h separator.h comp.h \
splicestringpool.c splicestringpool.h splicetrie_build.c splicetrie_build.h splicetrie.c splicetrie.h \
splice.c splice.h indel.c indel.h bitpack64-access.c bitpack64-access.h \
bytecoding.c bytecoding.h \
- univdiagdef.h univdiag.c univdiag.h sarray-read.c sarray-read.h \
+ univdiagdef.h univdiag.c univdiag.h sedgesort.c sedgesort.h sarray-read.c sarray-read.h \
stage1hr.c stage1hr.h \
request.c request.h resulthr.c resulthr.h output.c output.h \
inbuffer.c inbuffer.h samheader.c samheader.h outbuffer.c outbuffer.h \
@@ -430,7 +430,7 @@ UNIQSCAN_FILES = fopen.h bool.h types.h separator.h comp.h \
splicestringpool.c splicestringpool.h splicetrie_build.c splicetrie_build.h splicetrie.c splicetrie.h \
splice.c splice.h indel.c indel.h bitpack64-access.c bitpack64-access.h \
bytecoding.c bytecoding.h \
- univdiagdef.h univdiag.c univdiag.h sarray-read.c sarray-read.h \
+ univdiagdef.h univdiag.c univdiag.h sedgesort.c sedgesort.h sarray-read.c sarray-read.h \
stage1hr.c stage1hr.h resulthr.c resulthr.h \
datadir.c datadir.h mode.h parserange.c parserange.h \
getopt.c getopt1.c getopt.h uniqscan.c
diff --git a/src/Makefile.in b/src/Makefile.in
index 2d51a86..29d2b96 100644
--- a/src/Makefile.in
+++ b/src/Makefile.in
@@ -1100,6 +1100,7 @@ am__objects_21 = gsnap_avx2-except.$(OBJEXT) \
gsnap_avx2-indel.$(OBJEXT) \
gsnap_avx2-bitpack64-access.$(OBJEXT) \
gsnap_avx2-bytecoding.$(OBJEXT) gsnap_avx2-univdiag.$(OBJEXT) \
+ gsnap_avx2-sedgesort.$(OBJEXT) \
gsnap_avx2-sarray-read.$(OBJEXT) gsnap_avx2-stage1hr.$(OBJEXT) \
gsnap_avx2-request.$(OBJEXT) gsnap_avx2-resulthr.$(OBJEXT) \
gsnap_avx2-output.$(OBJEXT) gsnap_avx2-inbuffer.$(OBJEXT) \
@@ -1172,6 +1173,7 @@ am__objects_22 = gsnap_nosimd-except.$(OBJEXT) \
gsnap_nosimd-bitpack64-access.$(OBJEXT) \
gsnap_nosimd-bytecoding.$(OBJEXT) \
gsnap_nosimd-univdiag.$(OBJEXT) \
+ gsnap_nosimd-sedgesort.$(OBJEXT) \
gsnap_nosimd-sarray-read.$(OBJEXT) \
gsnap_nosimd-stage1hr.$(OBJEXT) gsnap_nosimd-request.$(OBJEXT) \
gsnap_nosimd-resulthr.$(OBJEXT) gsnap_nosimd-output.$(OBJEXT) \
@@ -1236,6 +1238,7 @@ am__objects_23 = gsnap_sse2-except.$(OBJEXT) \
gsnap_sse2-indel.$(OBJEXT) \
gsnap_sse2-bitpack64-access.$(OBJEXT) \
gsnap_sse2-bytecoding.$(OBJEXT) gsnap_sse2-univdiag.$(OBJEXT) \
+ gsnap_sse2-sedgesort.$(OBJEXT) \
gsnap_sse2-sarray-read.$(OBJEXT) gsnap_sse2-stage1hr.$(OBJEXT) \
gsnap_sse2-request.$(OBJEXT) gsnap_sse2-resulthr.$(OBJEXT) \
gsnap_sse2-output.$(OBJEXT) gsnap_sse2-inbuffer.$(OBJEXT) \
@@ -1301,7 +1304,7 @@ am__objects_24 = gsnap_sse41-except.$(OBJEXT) \
gsnap_sse41-indel.$(OBJEXT) \
gsnap_sse41-bitpack64-access.$(OBJEXT) \
gsnap_sse41-bytecoding.$(OBJEXT) \
- gsnap_sse41-univdiag.$(OBJEXT) \
+ gsnap_sse41-univdiag.$(OBJEXT) gsnap_sse41-sedgesort.$(OBJEXT) \
gsnap_sse41-sarray-read.$(OBJEXT) \
gsnap_sse41-stage1hr.$(OBJEXT) gsnap_sse41-request.$(OBJEXT) \
gsnap_sse41-resulthr.$(OBJEXT) gsnap_sse41-output.$(OBJEXT) \
@@ -1367,7 +1370,7 @@ am__objects_25 = gsnap_sse42-except.$(OBJEXT) \
gsnap_sse42-indel.$(OBJEXT) \
gsnap_sse42-bitpack64-access.$(OBJEXT) \
gsnap_sse42-bytecoding.$(OBJEXT) \
- gsnap_sse42-univdiag.$(OBJEXT) \
+ gsnap_sse42-univdiag.$(OBJEXT) gsnap_sse42-sedgesort.$(OBJEXT) \
gsnap_sse42-sarray-read.$(OBJEXT) \
gsnap_sse42-stage1hr.$(OBJEXT) gsnap_sse42-request.$(OBJEXT) \
gsnap_sse42-resulthr.$(OBJEXT) gsnap_sse42-output.$(OBJEXT) \
@@ -1433,7 +1436,7 @@ am__objects_26 = gsnap_ssse3-except.$(OBJEXT) \
gsnap_ssse3-indel.$(OBJEXT) \
gsnap_ssse3-bitpack64-access.$(OBJEXT) \
gsnap_ssse3-bytecoding.$(OBJEXT) \
- gsnap_ssse3-univdiag.$(OBJEXT) \
+ gsnap_ssse3-univdiag.$(OBJEXT) gsnap_ssse3-sedgesort.$(OBJEXT) \
gsnap_ssse3-sarray-read.$(OBJEXT) \
gsnap_ssse3-stage1hr.$(OBJEXT) gsnap_ssse3-request.$(OBJEXT) \
gsnap_ssse3-resulthr.$(OBJEXT) gsnap_ssse3-output.$(OBJEXT) \
@@ -2020,10 +2023,11 @@ am__objects_39 = uniqscan-except.$(OBJEXT) uniqscan-assert.$(OBJEXT) \
uniqscan-splicetrie.$(OBJEXT) uniqscan-splice.$(OBJEXT) \
uniqscan-indel.$(OBJEXT) uniqscan-bitpack64-access.$(OBJEXT) \
uniqscan-bytecoding.$(OBJEXT) uniqscan-univdiag.$(OBJEXT) \
- uniqscan-sarray-read.$(OBJEXT) uniqscan-stage1hr.$(OBJEXT) \
- uniqscan-resulthr.$(OBJEXT) uniqscan-datadir.$(OBJEXT) \
- uniqscan-parserange.$(OBJEXT) uniqscan-getopt.$(OBJEXT) \
- uniqscan-getopt1.$(OBJEXT) uniqscan-uniqscan.$(OBJEXT)
+ uniqscan-sedgesort.$(OBJEXT) uniqscan-sarray-read.$(OBJEXT) \
+ uniqscan-stage1hr.$(OBJEXT) uniqscan-resulthr.$(OBJEXT) \
+ uniqscan-datadir.$(OBJEXT) uniqscan-parserange.$(OBJEXT) \
+ uniqscan-getopt.$(OBJEXT) uniqscan-getopt1.$(OBJEXT) \
+ uniqscan-uniqscan.$(OBJEXT)
dist_uniqscan_OBJECTS = $(am__objects_39)
uniqscan_OBJECTS = $(dist_uniqscan_OBJECTS)
uniqscan_DEPENDENCIES = $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1) \
@@ -2186,7 +2190,7 @@ am__define_uniq_tagged_files = \
ETAGS = etags
CTAGS = ctags
am__DIST_COMMON = $(srcdir)/Makefile.in $(srcdir)/config.h.in \
- $(top_srcdir)/config/depcomp ChangeLog compile
+ $(top_srcdir)/config/depcomp
DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
ACLOCAL = @ACLOCAL@
ALLOCA = @ALLOCA@
@@ -2533,7 +2537,7 @@ GSNAP_FILES = fopen.h bool.h types.h separator.h comp.h \
splicestringpool.c splicestringpool.h splicetrie_build.c splicetrie_build.h splicetrie.c splicetrie.h \
splice.c splice.h indel.c indel.h bitpack64-access.c bitpack64-access.h \
bytecoding.c bytecoding.h \
- univdiagdef.h univdiag.c univdiag.h sarray-read.c sarray-read.h \
+ univdiagdef.h univdiag.c univdiag.h sedgesort.c sedgesort.h sarray-read.c sarray-read.h \
stage1hr.c stage1hr.h \
request.c request.h resulthr.c resulthr.h output.c output.h \
inbuffer.c inbuffer.h samheader.c samheader.h outbuffer.c outbuffer.h \
@@ -2675,7 +2679,7 @@ UNIQSCAN_FILES = fopen.h bool.h types.h separator.h comp.h \
splicestringpool.c splicestringpool.h splicetrie_build.c splicetrie_build.h splicetrie.c splicetrie.h \
splice.c splice.h indel.c indel.h bitpack64-access.c bitpack64-access.h \
bytecoding.c bytecoding.h \
- univdiagdef.h univdiag.c univdiag.h sarray-read.c sarray-read.h \
+ univdiagdef.h univdiag.c univdiag.h sedgesort.c sedgesort.h sarray-read.c sarray-read.h \
stage1hr.c stage1hr.h resulthr.c resulthr.h \
datadir.c datadir.h mode.h parserange.c parserange.h \
getopt.c getopt1.c getopt.h uniqscan.c
@@ -4439,6 +4443,7 @@ distclean-compile:
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_avx2-samheader.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_avx2-samprint.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_avx2-sarray-read.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_avx2-sedgesort.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_avx2-segmentpos.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_avx2-semaphore.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_avx2-sequence.Po at am__quote@
@@ -4528,6 +4533,7 @@ distclean-compile:
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_nosimd-samheader.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_nosimd-samprint.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_nosimd-sarray-read.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_nosimd-sedgesort.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_nosimd-segmentpos.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_nosimd-semaphore.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_nosimd-sequence.Po at am__quote@
@@ -4617,6 +4623,7 @@ distclean-compile:
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_sse2-samheader.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_sse2-samprint.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_sse2-sarray-read.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_sse2-sedgesort.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_sse2-segmentpos.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_sse2-semaphore.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_sse2-sequence.Po at am__quote@
@@ -4706,6 +4713,7 @@ distclean-compile:
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_sse41-samheader.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_sse41-samprint.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_sse41-sarray-read.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_sse41-sedgesort.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_sse41-segmentpos.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_sse41-semaphore.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_sse41-sequence.Po at am__quote@
@@ -4795,6 +4803,7 @@ distclean-compile:
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_sse42-samheader.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_sse42-samprint.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_sse42-sarray-read.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_sse42-sedgesort.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_sse42-segmentpos.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_sse42-semaphore.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_sse42-sequence.Po at am__quote@
@@ -4884,6 +4893,7 @@ distclean-compile:
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_ssse3-samheader.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_ssse3-samprint.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_ssse3-sarray-read.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_ssse3-sedgesort.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_ssse3-segmentpos.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_ssse3-semaphore.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_ssse3-sequence.Po at am__quote@
@@ -5612,6 +5622,7 @@ distclean-compile:
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/uniqscan-reader.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/uniqscan-resulthr.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/uniqscan-sarray-read.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/uniqscan-sedgesort.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/uniqscan-segmentpos.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/uniqscan-semaphore.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/uniqscan-sequence.Po at am__quote@
@@ -23390,6 +23401,20 @@ gsnap_avx2-univdiag.obj: univdiag.c
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx2_CFLAGS) $(CFLAGS) -c -o gsnap_avx2-univdiag.obj `if test -f 'univdiag.c'; then $(CYGPATH_W) 'univdiag.c'; else $(CYGPATH_W) '$(srcdir)/univdiag.c'; fi`
+gsnap_avx2-sedgesort.o: sedgesort.c
+ at am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx2_CFLAGS) $(CFLAGS) -MT gsnap_avx2-sedgesort.o -MD -MP -MF $(DEPDIR)/gsnap_avx2-sedgesort.Tpo -c -o gsnap_avx2-sedgesort.o `test -f 'sedgesort.c' || echo '$(srcdir)/'`sedgesort.c
+ at am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx2-sedgesort.Tpo $(DEPDIR)/gsnap_avx2-sedgesort.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='sedgesort.c' object='gsnap_avx2-sedgesort.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx2_CFLAGS) $(CFLAGS) -c -o gsnap_avx2-sedgesort.o `test -f 'sedgesort.c' || echo '$(srcdir)/'`sedgesort.c
+
+gsnap_avx2-sedgesort.obj: sedgesort.c
+ at am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx2_CFLAGS) $(CFLAGS) -MT gsnap_avx2-sedgesort.obj -MD -MP -MF $(DEPDIR)/gsnap_avx2-sedgesort.Tpo -c -o gsnap_avx2-sedgesort.obj `if test -f 'sedgesort.c'; then $(CYGPATH_W) 'sedgesort.c'; else $(CYGPATH_W) '$(srcdir)/sedgesort.c'; fi`
+ at am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx2-sedgesort.Tpo $(DEPDIR)/gsnap_avx2-sedgesort.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='sedgesort.c' object='gsnap_avx2-sedgesort.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx2_CFLAGS) $(CFLAGS) -c -o gsnap_avx2-sedgesort.obj `if test -f 'sedgesort.c'; then $(CYGPATH_W) 'sedgesort.c'; else $(CYGPATH_W) '$(srcdir)/sedgesort.c'; fi`
+
gsnap_avx2-sarray-read.o: sarray-read.c
@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx2_CFLAGS) $(CFLAGS) -MT gsnap_avx2-sarray-read.o -MD -MP -MF $(DEPDIR)/gsnap_avx2-sarray-read.Tpo -c -o gsnap_avx2-sarray-read.o `test -f 'sarray-read.c' || echo '$(srcdir)/'`sarray-read.c
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx2-sarray-read.Tpo $(DEPDIR)/gsnap_avx2-sarray-read.Po
@@ -24636,6 +24661,20 @@ gsnap_nosimd-univdiag.obj: univdiag.c
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_nosimd_CFLAGS) $(CFLAGS) -c -o gsnap_nosimd-univdiag.obj `if test -f 'univdiag.c'; then $(CYGPATH_W) 'univdiag.c'; else $(CYGPATH_W) '$(srcdir)/univdiag.c'; fi`
+gsnap_nosimd-sedgesort.o: sedgesort.c
+ at am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_nosimd_CFLAGS) $(CFLAGS) -MT gsnap_nosimd-sedgesort.o -MD -MP -MF $(DEPDIR)/gsnap_nosimd-sedgesort.Tpo -c -o gsnap_nosimd-sedgesort.o `test -f 'sedgesort.c' || echo '$(srcdir)/'`sedgesort.c
+ at am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_nosimd-sedgesort.Tpo $(DEPDIR)/gsnap_nosimd-sedgesort.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='sedgesort.c' object='gsnap_nosimd-sedgesort.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_nosimd_CFLAGS) $(CFLAGS) -c -o gsnap_nosimd-sedgesort.o `test -f 'sedgesort.c' || echo '$(srcdir)/'`sedgesort.c
+
+gsnap_nosimd-sedgesort.obj: sedgesort.c
+ at am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_nosimd_CFLAGS) $(CFLAGS) -MT gsnap_nosimd-sedgesort.obj -MD -MP -MF $(DEPDIR)/gsnap_nosimd-sedgesort.Tpo -c -o gsnap_nosimd-sedgesort.obj `if test -f 'sedgesort.c'; then $(CYGPATH_W) 'sedgesort.c'; else $(CYGPATH_W) '$(srcdir)/sedgesort.c'; fi`
+ at am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_nosimd-sedgesort.Tpo $(DEPDIR)/gsnap_nosimd-sedgesort.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='sedgesort.c' object='gsnap_nosimd-sedgesort.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_nosimd_CFLAGS) $(CFLAGS) -c -o gsnap_nosimd-sedgesort.obj `if test -f 'sedgesort.c'; then $(CYGPATH_W) 'sedgesort.c'; else $(CYGPATH_W) '$(srcdir)/sedgesort.c'; fi`
+
gsnap_nosimd-sarray-read.o: sarray-read.c
@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_nosimd_CFLAGS) $(CFLAGS) -MT gsnap_nosimd-sarray-read.o -MD -MP -MF $(DEPDIR)/gsnap_nosimd-sarray-read.Tpo -c -o gsnap_nosimd-sarray-read.o `test -f 'sarray-read.c' || echo '$(srcdir)/'`sarray-read.c
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_nosimd-sarray-read.Tpo $(DEPDIR)/gsnap_nosimd-sarray-read.Po
@@ -25882,6 +25921,20 @@ gsnap_sse2-univdiag.obj: univdiag.c
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_sse2_CFLAGS) $(CFLAGS) -c -o gsnap_sse2-univdiag.obj `if test -f 'univdiag.c'; then $(CYGPATH_W) 'univdiag.c'; else $(CYGPATH_W) '$(srcdir)/univdiag.c'; fi`
+gsnap_sse2-sedgesort.o: sedgesort.c
+ at am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_sse2_CFLAGS) $(CFLAGS) -MT gsnap_sse2-sedgesort.o -MD -MP -MF $(DEPDIR)/gsnap_sse2-sedgesort.Tpo -c -o gsnap_sse2-sedgesort.o `test -f 'sedgesort.c' || echo '$(srcdir)/'`sedgesort.c
+ at am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_sse2-sedgesort.Tpo $(DEPDIR)/gsnap_sse2-sedgesort.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='sedgesort.c' object='gsnap_sse2-sedgesort.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_sse2_CFLAGS) $(CFLAGS) -c -o gsnap_sse2-sedgesort.o `test -f 'sedgesort.c' || echo '$(srcdir)/'`sedgesort.c
+
+gsnap_sse2-sedgesort.obj: sedgesort.c
+ at am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_sse2_CFLAGS) $(CFLAGS) -MT gsnap_sse2-sedgesort.obj -MD -MP -MF $(DEPDIR)/gsnap_sse2-sedgesort.Tpo -c -o gsnap_sse2-sedgesort.obj `if test -f 'sedgesort.c'; then $(CYGPATH_W) 'sedgesort.c'; else $(CYGPATH_W) '$(srcdir)/sedgesort.c'; fi`
+ at am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_sse2-sedgesort.Tpo $(DEPDIR)/gsnap_sse2-sedgesort.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='sedgesort.c' object='gsnap_sse2-sedgesort.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_sse2_CFLAGS) $(CFLAGS) -c -o gsnap_sse2-sedgesort.obj `if test -f 'sedgesort.c'; then $(CYGPATH_W) 'sedgesort.c'; else $(CYGPATH_W) '$(srcdir)/sedgesort.c'; fi`
+
gsnap_sse2-sarray-read.o: sarray-read.c
@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_sse2_CFLAGS) $(CFLAGS) -MT gsnap_sse2-sarray-read.o -MD -MP -MF $(DEPDIR)/gsnap_sse2-sarray-read.Tpo -c -o gsnap_sse2-sarray-read.o `test -f 'sarray-read.c' || echo '$(srcdir)/'`sarray-read.c
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_sse2-sarray-read.Tpo $(DEPDIR)/gsnap_sse2-sarray-read.Po
@@ -27128,6 +27181,20 @@ gsnap_sse41-univdiag.obj: univdiag.c
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_sse41_CFLAGS) $(CFLAGS) -c -o gsnap_sse41-univdiag.obj `if test -f 'univdiag.c'; then $(CYGPATH_W) 'univdiag.c'; else $(CYGPATH_W) '$(srcdir)/univdiag.c'; fi`
+gsnap_sse41-sedgesort.o: sedgesort.c
+ at am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_sse41_CFLAGS) $(CFLAGS) -MT gsnap_sse41-sedgesort.o -MD -MP -MF $(DEPDIR)/gsnap_sse41-sedgesort.Tpo -c -o gsnap_sse41-sedgesort.o `test -f 'sedgesort.c' || echo '$(srcdir)/'`sedgesort.c
+ at am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_sse41-sedgesort.Tpo $(DEPDIR)/gsnap_sse41-sedgesort.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='sedgesort.c' object='gsnap_sse41-sedgesort.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_sse41_CFLAGS) $(CFLAGS) -c -o gsnap_sse41-sedgesort.o `test -f 'sedgesort.c' || echo '$(srcdir)/'`sedgesort.c
+
+gsnap_sse41-sedgesort.obj: sedgesort.c
+ at am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_sse41_CFLAGS) $(CFLAGS) -MT gsnap_sse41-sedgesort.obj -MD -MP -MF $(DEPDIR)/gsnap_sse41-sedgesort.Tpo -c -o gsnap_sse41-sedgesort.obj `if test -f 'sedgesort.c'; then $(CYGPATH_W) 'sedgesort.c'; else $(CYGPATH_W) '$(srcdir)/sedgesort.c'; fi`
+ at am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_sse41-sedgesort.Tpo $(DEPDIR)/gsnap_sse41-sedgesort.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='sedgesort.c' object='gsnap_sse41-sedgesort.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_sse41_CFLAGS) $(CFLAGS) -c -o gsnap_sse41-sedgesort.obj `if test -f 'sedgesort.c'; then $(CYGPATH_W) 'sedgesort.c'; else $(CYGPATH_W) '$(srcdir)/sedgesort.c'; fi`
+
gsnap_sse41-sarray-read.o: sarray-read.c
@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_sse41_CFLAGS) $(CFLAGS) -MT gsnap_sse41-sarray-read.o -MD -MP -MF $(DEPDIR)/gsnap_sse41-sarray-read.Tpo -c -o gsnap_sse41-sarray-read.o `test -f 'sarray-read.c' || echo '$(srcdir)/'`sarray-read.c
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_sse41-sarray-read.Tpo $(DEPDIR)/gsnap_sse41-sarray-read.Po
@@ -28374,6 +28441,20 @@ gsnap_sse42-univdiag.obj: univdiag.c
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_sse42_CFLAGS) $(CFLAGS) -c -o gsnap_sse42-univdiag.obj `if test -f 'univdiag.c'; then $(CYGPATH_W) 'univdiag.c'; else $(CYGPATH_W) '$(srcdir)/univdiag.c'; fi`
+gsnap_sse42-sedgesort.o: sedgesort.c
+ at am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_sse42_CFLAGS) $(CFLAGS) -MT gsnap_sse42-sedgesort.o -MD -MP -MF $(DEPDIR)/gsnap_sse42-sedgesort.Tpo -c -o gsnap_sse42-sedgesort.o `test -f 'sedgesort.c' || echo '$(srcdir)/'`sedgesort.c
+ at am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_sse42-sedgesort.Tpo $(DEPDIR)/gsnap_sse42-sedgesort.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='sedgesort.c' object='gsnap_sse42-sedgesort.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_sse42_CFLAGS) $(CFLAGS) -c -o gsnap_sse42-sedgesort.o `test -f 'sedgesort.c' || echo '$(srcdir)/'`sedgesort.c
+
+gsnap_sse42-sedgesort.obj: sedgesort.c
+ at am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_sse42_CFLAGS) $(CFLAGS) -MT gsnap_sse42-sedgesort.obj -MD -MP -MF $(DEPDIR)/gsnap_sse42-sedgesort.Tpo -c -o gsnap_sse42-sedgesort.obj `if test -f 'sedgesort.c'; then $(CYGPATH_W) 'sedgesort.c'; else $(CYGPATH_W) '$(srcdir)/sedgesort.c'; fi`
+ at am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_sse42-sedgesort.Tpo $(DEPDIR)/gsnap_sse42-sedgesort.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='sedgesort.c' object='gsnap_sse42-sedgesort.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_sse42_CFLAGS) $(CFLAGS) -c -o gsnap_sse42-sedgesort.obj `if test -f 'sedgesort.c'; then $(CYGPATH_W) 'sedgesort.c'; else $(CYGPATH_W) '$(srcdir)/sedgesort.c'; fi`
+
gsnap_sse42-sarray-read.o: sarray-read.c
@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_sse42_CFLAGS) $(CFLAGS) -MT gsnap_sse42-sarray-read.o -MD -MP -MF $(DEPDIR)/gsnap_sse42-sarray-read.Tpo -c -o gsnap_sse42-sarray-read.o `test -f 'sarray-read.c' || echo '$(srcdir)/'`sarray-read.c
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_sse42-sarray-read.Tpo $(DEPDIR)/gsnap_sse42-sarray-read.Po
@@ -29620,6 +29701,20 @@ gsnap_ssse3-univdiag.obj: univdiag.c
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_ssse3_CFLAGS) $(CFLAGS) -c -o gsnap_ssse3-univdiag.obj `if test -f 'univdiag.c'; then $(CYGPATH_W) 'univdiag.c'; else $(CYGPATH_W) '$(srcdir)/univdiag.c'; fi`
+gsnap_ssse3-sedgesort.o: sedgesort.c
+ at am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_ssse3_CFLAGS) $(CFLAGS) -MT gsnap_ssse3-sedgesort.o -MD -MP -MF $(DEPDIR)/gsnap_ssse3-sedgesort.Tpo -c -o gsnap_ssse3-sedgesort.o `test -f 'sedgesort.c' || echo '$(srcdir)/'`sedgesort.c
+ at am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_ssse3-sedgesort.Tpo $(DEPDIR)/gsnap_ssse3-sedgesort.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='sedgesort.c' object='gsnap_ssse3-sedgesort.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_ssse3_CFLAGS) $(CFLAGS) -c -o gsnap_ssse3-sedgesort.o `test -f 'sedgesort.c' || echo '$(srcdir)/'`sedgesort.c
+
+gsnap_ssse3-sedgesort.obj: sedgesort.c
+ at am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_ssse3_CFLAGS) $(CFLAGS) -MT gsnap_ssse3-sedgesort.obj -MD -MP -MF $(DEPDIR)/gsnap_ssse3-sedgesort.Tpo -c -o gsnap_ssse3-sedgesort.obj `if test -f 'sedgesort.c'; then $(CYGPATH_W) 'sedgesort.c'; else $(CYGPATH_W) '$(srcdir)/sedgesort.c'; fi`
+ at am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_ssse3-sedgesort.Tpo $(DEPDIR)/gsnap_ssse3-sedgesort.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='sedgesort.c' object='gsnap_ssse3-sedgesort.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_ssse3_CFLAGS) $(CFLAGS) -c -o gsnap_ssse3-sedgesort.obj `if test -f 'sedgesort.c'; then $(CYGPATH_W) 'sedgesort.c'; else $(CYGPATH_W) '$(srcdir)/sedgesort.c'; fi`
+
gsnap_ssse3-sarray-read.o: sarray-read.c
@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_ssse3_CFLAGS) $(CFLAGS) -MT gsnap_ssse3-sarray-read.o -MD -MP -MF $(DEPDIR)/gsnap_ssse3-sarray-read.Tpo -c -o gsnap_ssse3-sarray-read.o `test -f 'sarray-read.c' || echo '$(srcdir)/'`sarray-read.c
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_ssse3-sarray-read.Tpo $(DEPDIR)/gsnap_ssse3-sarray-read.Po
@@ -39910,6 +40005,20 @@ uniqscan-univdiag.obj: univdiag.c
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(uniqscan_CFLAGS) $(CFLAGS) -c -o uniqscan-univdiag.obj `if test -f 'univdiag.c'; then $(CYGPATH_W) 'univdiag.c'; else $(CYGPATH_W) '$(srcdir)/univdiag.c'; fi`
+uniqscan-sedgesort.o: sedgesort.c
+ at am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(uniqscan_CFLAGS) $(CFLAGS) -MT uniqscan-sedgesort.o -MD -MP -MF $(DEPDIR)/uniqscan-sedgesort.Tpo -c -o uniqscan-sedgesort.o `test -f 'sedgesort.c' || echo '$(srcdir)/'`sedgesort.c
+ at am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/uniqscan-sedgesort.Tpo $(DEPDIR)/uniqscan-sedgesort.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='sedgesort.c' object='uniqscan-sedgesort.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(uniqscan_CFLAGS) $(CFLAGS) -c -o uniqscan-sedgesort.o `test -f 'sedgesort.c' || echo '$(srcdir)/'`sedgesort.c
+
+uniqscan-sedgesort.obj: sedgesort.c
+ at am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(uniqscan_CFLAGS) $(CFLAGS) -MT uniqscan-sedgesort.obj -MD -MP -MF $(DEPDIR)/uniqscan-sedgesort.Tpo -c -o uniqscan-sedgesort.obj `if test -f 'sedgesort.c'; then $(CYGPATH_W) 'sedgesort.c'; else $(CYGPATH_W) '$(srcdir)/sedgesort.c'; fi`
+ at am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/uniqscan-sedgesort.Tpo $(DEPDIR)/uniqscan-sedgesort.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='sedgesort.c' object='uniqscan-sedgesort.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(uniqscan_CFLAGS) $(CFLAGS) -c -o uniqscan-sedgesort.obj `if test -f 'sedgesort.c'; then $(CYGPATH_W) 'sedgesort.c'; else $(CYGPATH_W) '$(srcdir)/sedgesort.c'; fi`
+
uniqscan-sarray-read.o: sarray-read.c
@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(uniqscan_CFLAGS) $(CFLAGS) -MT uniqscan-sarray-read.o -MD -MP -MF $(DEPDIR)/uniqscan-sarray-read.Tpo -c -o uniqscan-sarray-read.o `test -f 'sarray-read.c' || echo '$(srcdir)/'`sarray-read.c
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/uniqscan-sarray-read.Tpo $(DEPDIR)/uniqscan-sarray-read.Po
diff --git a/src/compile b/src/compile
deleted file mode 100644
index 51522be..0000000
--- a/src/compile
+++ /dev/null
@@ -1,165 +0,0 @@
--*- mode: compilation; default-directory: "~/bioinfo/gmap/trunk/src/" -*-
-Compilation started at Mon Dec 14 14:13:20
-
-make -k gsnap.sse42
-/gne/home/twu/bin/gcc -DHAVE_CONFIG_H -I. -pthread -DTARGET=\"x86_64-unknown-linux-gnu\" -DGMAPDB=\"/gne/research/data/bioinfo/gmap/data/genomes\" -DMAX_READLENGTH=300 -DGSNAP=1 -DHAVE_SSE2=1 -DHAVE_SSSE3=1 -DHAVE_SSE4_1=1 -DHAVE_SSE4_2=1 -msse2 -mssse3 -msse4.1 -msse4.2 -mpopcnt -g -Wall -Wextra -DCHECK_ASSERTIONS=1 -MT gsnap_sse42-dynprog_simd.o -MD -MP -MF .deps/gsnap_sse42-dynprog_simd.Tpo -c -o gsnap_sse42-dynprog_simd.o `test -f 'dynprog_simd.c' || echo './'`dynprog_simd.c
-dynprog_simd.c: In function ‘Dynprog_simd_8’:
-dynprog_simd.c:2143:4: warning: array subscript has type ‘char’ [-Wchar-subscripts]
- na2 = revp ? nt_to_int_array[gsequence[1-c]] : nt_to_int_array[gsequence[c-1]];
- ^
-dynprog_simd.c:2143:4: warning: array subscript has type ‘char’ [-Wchar-subscripts]
-dynprog_simd.c:2144:4: warning: array subscript has type ‘char’ [-Wchar-subscripts]
- na2_alt = revp ? nt_to_int_array[gsequence_alt[1-c]] : nt_to_int_array[gsequence_alt[c-1
- ^
-dynprog_simd.c:2144:4: warning: array subscript has type ‘char’ [-Wchar-subscripts]
-dynprog_simd.c:2347:4: warning: array subscript has type ‘char’ [-Wchar-subscripts]
- na2 = revp ? nt_to_int_array[gsequence[1-c]] : nt_to_int_array[gsequence[c-1]];
- ^
-dynprog_simd.c:2347:4: warning: array subscript has type ‘char’ [-Wchar-subscripts]
-dynprog_simd.c:2348:4: warning: array subscript has type ‘char’ [-Wchar-subscripts]
- na2_alt = revp ? nt_to_int_array[gsequence_alt[1-c]] : nt_to_int_array[gsequence_alt[c-1
- ^
-dynprog_simd.c:2348:4: warning: array subscript has type ‘char’ [-Wchar-subscripts]
-dynprog_simd.c:1942:33: warning: variable ‘extend_ladder’ set but not used [-Wunused-but-set-variable]
- __m128i gap_open, gap_extend, extend_ladder, complement_dummy;
- ^
-dynprog_simd.c: In function ‘Dynprog_simd_8_upper’:
-dynprog_simd.c:2770:4: warning: array subscript has type ‘char’ [-Wchar-subscripts]
- na2 = revp ? nt_to_int_array[gsequence[1-c]] : nt_to_int_array[gsequence[c-1]];
- ^
-dynprog_simd.c:2770:4: warning: array subscript has type ‘char’ [-Wchar-subscripts]
-dynprog_simd.c:2771:4: warning: array subscript has type ‘char’ [-Wchar-subscripts]
- na2_alt = revp ? nt_to_int_array[gsequence_alt[1-c]] : nt_to_int_array[gsequence_alt[c-1
- ^
-dynprog_simd.c:2771:4: warning: array subscript has type ‘char’ [-Wchar-subscripts]
-dynprog_simd.c:2896:4: warning: array subscript has type ‘char’ [-Wchar-subscripts]
- na2 = revp ? nt_to_int_array[gsequence[1-c]] : nt_to_int_array[gsequence[c-1]];
- ^
-dynprog_simd.c:2896:4: warning: array subscript has type ‘char’ [-Wchar-subscripts]
-dynprog_simd.c:2897:4: warning: array subscript has type ‘char’ [-Wchar-subscripts]
- na2_alt = revp ? nt_to_int_array[gsequence_alt[1-c]] : nt_to_int_array[gsequence_alt[c-1
- ^
-dynprog_simd.c:2897:4: warning: array subscript has type ‘char’ [-Wchar-subscripts]
-dynprog_simd.c:2632:8: warning: unused variable ‘na2_single’ [-Wunused-variable]
- char na2_single;
- ^
-dynprog_simd.c:2626:70: warning: unused variable ‘pairscore’ [-Wunused-variable]
- Score8_T *pairscores[5], *pairscores_std_ptr, *pairscores_alt_ptr, pairscore;
- ^
-dynprog_simd.c: In function ‘Dynprog_simd_8_lower’:
-dynprog_simd.c:3238:3: error: ‘extend_ladder’ undeclared (first use in this function)
- extend_ladder = _mm_setr_epi8(0,extend,2*extend,3*extend,4*extend,5*extend,6*extend,7*ext
- ^
-dynprog_simd.c:3238:3: note: each undeclared identifier is reported only once for each function it appears in
-dynprog_simd.c:3267:4: warning: array subscript has type ‘char’ [-Wchar-subscripts]
- na1 = revp ? nt_to_int_array[rsequence[1-r]] : nt_to_int_array[rsequence[r-1]];
- ^
-dynprog_simd.c:3267:4: warning: array subscript has type ‘char’ [-Wchar-subscripts]
-dynprog_simd.c:3389:4: warning: array subscript has type ‘char’ [-Wchar-subscripts]
- na1 = revp ? nt_to_int_array[rsequence[1-r]] : nt_to_int_array[rsequence[r-1]];
- ^
-dynprog_simd.c:3389:4: warning: array subscript has type ‘char’ [-Wchar-subscripts]
-dynprog_simd.c:3089:8: warning: unused variable ‘na2_single’ [-Wunused-variable]
- char na2_single;
- ^
-dynprog_simd.c:3083:45: warning: unused variable ‘pairscore’ [-Wunused-variable]
- Score8_T *pairscores[5], *pairscores_ptr, pairscore;
- ^
-dynprog_simd.c: In function ‘Dynprog_simd_16’:
-dynprog_simd.c:3739:4: warning: array subscript has type ‘char’ [-Wchar-subscripts]
- na2 = revp ? nt_to_int_array[gsequence[1-c]] : nt_to_int_array[gsequence[c-1]];
- ^
-dynprog_simd.c:3739:4: warning: array subscript has type ‘char’ [-Wchar-subscripts]
-dynprog_simd.c:3740:4: warning: array subscript has type ‘char’ [-Wchar-subscripts]
- na2_alt = revp ? nt_to_int_array[gsequence_alt[1-c]] : nt_to_int_array[gsequence_alt[c-1
- ^
-dynprog_simd.c:3740:4: warning: array subscript has type ‘char’ [-Wchar-subscripts]
-dynprog_simd.c:3923:4: warning: array subscript has type ‘char’ [-Wchar-subscripts]
- na2 = revp ? nt_to_int_array[gsequence[1-c]] : nt_to_int_array[gsequence[c-1]];
- ^
-dynprog_simd.c:3923:4: warning: array subscript has type ‘char’ [-Wchar-subscripts]
-dynprog_simd.c:3924:4: warning: array subscript has type ‘char’ [-Wchar-subscripts]
- na2_alt = revp ? nt_to_int_array[gsequence_alt[1-c]] : nt_to_int_array[gsequence_alt[c-1
- ^
-dynprog_simd.c:3924:4: warning: array subscript has type ‘char’ [-Wchar-subscripts]
-dynprog_simd.c:3563:33: warning: variable ‘extend_ladder’ set but not used [-Wunused-but-set-variable]
- __m128i gap_open, gap_extend, extend_ladder, complement_dummy;
- ^
-dynprog_simd.c: In function ‘Dynprog_simd_16_upper’:
-dynprog_simd.c:4259:3: error: ‘extend_ladder’ undeclared (first use in this function)
- extend_ladder = _mm_setr_epi16(0,extend,2*extend,3*extend,4*extend,5*extend,6*extend,7*ex
- ^
-dynprog_simd.c:4284:4: warning: array subscript has type ‘char’ [-Wchar-subscripts]
- na2 = revp ? nt_to_int_array[gsequence[1-c]] : nt_to_int_array[gsequence[c-1]];
- ^
-dynprog_simd.c:4284:4: warning: array subscript has type ‘char’ [-Wchar-subscripts]
-dynprog_simd.c:4285:4: warning: array subscript has type ‘char’ [-Wchar-subscripts]
- na2_alt = revp ? nt_to_int_array[gsequence_alt[1-c]] : nt_to_int_array[gsequence_alt[c-1
- ^
-dynprog_simd.c:4285:4: warning: array subscript has type ‘char’ [-Wchar-subscripts]
-dynprog_simd.c:4381:4: warning: array subscript has type ‘char’ [-Wchar-subscripts]
- na2 = revp ? nt_to_int_array[gsequence[1-c]] : nt_to_int_array[gsequence[c-1]];
- ^
-dynprog_simd.c:4381:4: warning: array subscript has type ‘char’ [-Wchar-subscripts]
-dynprog_simd.c:4382:4: warning: array subscript has type ‘char’ [-Wchar-subscripts]
- na2_alt = revp ? nt_to_int_array[gsequence_alt[1-c]] : nt_to_int_array[gsequence_alt[c-1
- ^
-dynprog_simd.c:4382:4: warning: array subscript has type ‘char’ [-Wchar-subscripts]
-dynprog_simd.c:4158:8: warning: unused variable ‘na2_single’ [-Wunused-variable]
- char na2_single;
- ^
-dynprog_simd.c:4152:71: warning: unused variable ‘pairscore’ [-Wunused-variable]
- Score16_T *pairscores[5], *pairscores_std_ptr, *pairscores_alt_ptr, pairscore;
- ^
-dynprog_simd.c: In function ‘Dynprog_simd_16_lower’:
-dynprog_simd.c:4675:3: error: ‘extend_ladder’ undeclared (first use in this function)
- extend_ladder = _mm_setr_epi16(0,extend,2*extend,3*extend,4*extend,5*extend,6*extend,7*ex
- ^
-dynprog_simd.c:4699:4: warning: array subscript has type ‘char’ [-Wchar-subscripts]
- na1 = revp ? nt_to_int_array[rsequence[1-r]] : nt_to_int_array[rsequence[r-1]];
- ^
-dynprog_simd.c:4699:4: warning: array subscript has type ‘char’ [-Wchar-subscripts]
-dynprog_simd.c:4792:4: warning: array subscript has type ‘char’ [-Wchar-subscripts]
- na1 = revp ? nt_to_int_array[rsequence[1-r]] : nt_to_int_array[rsequence[r-1]];
- ^
-dynprog_simd.c:4792:4: warning: array subscript has type ‘char’ [-Wchar-subscripts]
-dynprog_simd.c:4542:8: warning: unused variable ‘na2_single’ [-Wunused-variable]
- char na2_single;
- ^
-dynprog_simd.c:4536:46: warning: unused variable ‘pairscore’ [-Wunused-variable]
- Score16_T *pairscores[5], *pairscores_ptr, pairscore;
- ^
-dynprog_simd.c: In function ‘Dynprog_traceback_8_lower’:
-dynprog_simd.c:5278:8: warning: unused variable ‘add_dashes_p’ [-Wunused-variable]
- bool add_dashes_p;
- ^
-dynprog_simd.c:5275:11: warning: unused parameter ‘cdna_direction’ [-Wunused-parameter]
- int cdna_direction, bool watsonp, int dynprogindex) {
- ^
-dynprog_simd.c: In function ‘Dynprog_traceback_16_lower’:
-dynprog_simd.c:5662:8: warning: unused variable ‘add_dashes_p’ [-Wunused-variable]
- bool add_dashes_p;
- ^
-dynprog_simd.c:5659:12: warning: unused parameter ‘cdna_direction’ [-Wunused-parameter]
- int cdna_direction, bool watsonp, int dynprogindex) {
- ^
-dynprog_simd.c: At top level:
-dynprog_simd.c:1:13: warning: ‘rcsid’ defined but not used [-Wunused-variable]
- static char rcsid[] = "$Id: dynprog_simd.c 146623 2014-09-02 21:31:32Z twu $";
- ^
-dynprog_simd.c:510:1: warning: ‘Directions8_print’ defined but not used [-Wunused-function]
- Directions8_print (Direction8_T **directions_nogap, Direction8_T **directions_Egap, Directi
- ^
-dynprog_simd.c:604:1: warning: ‘Directions8_print_ud’ defined but not used [-Wunused-function]
- Directions8_print_ud (Direction8_T **directions_nogap, Direction8_T **directions_Egap,
- ^
-dynprog_simd.c:713:1: warning: ‘Directions16_print’ defined but not used [-Wunused-function]
- Directions16_print (Direction16_T **directions_nogap, Direction16_T **directions_Egap, Dire
- ^
-dynprog_simd.c:807:1: warning: ‘Directions16_print_ud’ defined but not used [-Wunused-function]
- Directions16_print_ud (Direction16_T **directions_nogap, Direction16_T **directions_Egap,
- ^
-make: *** [gsnap_sse42-dynprog_simd.o] Error 1
-make: Target `gsnap.sse42' not remade because of errors.
-
-Compilation exited abnormally with code 2 at Mon Dec 14 14:13:23
diff --git a/src/doublelist.c b/src/doublelist.c
index d7afff4..0dd3c3a 100644
--- a/src/doublelist.c
+++ b/src/doublelist.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: doublelist.c 166641 2015-05-29 21:13:04Z twu $";
+static char rcsid[] = "$Id: doublelist.c 193899 2016-07-12 04:41:34Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -65,6 +65,22 @@ Doublelist_free (T *list) {
}
T
+Doublelist_keep_one (T list, int i) {
+ T head;
+
+ while (--i >= 0) {
+ /* Pop */
+ head = list->rest;
+ FREE(list);
+ list = head;
+ }
+
+ Doublelist_free(&list->rest);
+ return list;
+}
+
+
+T
Doublelist_reverse (T list) {
T head = NULL, next;
diff --git a/src/doublelist.h b/src/doublelist.h
index a2f2cfe..e6e271f 100644
--- a/src/doublelist.h
+++ b/src/doublelist.h
@@ -1,4 +1,4 @@
-/* $Id: doublelist.h 166641 2015-05-29 21:13:04Z twu $ */
+/* $Id: doublelist.h 193899 2016-07-12 04:41:34Z twu $ */
#ifndef DOUBLELIST_INCLUDED
#define DOUBLELIST_INCLUDED
@@ -10,6 +10,8 @@ extern T Doublelist_pop (T list, double *index);
extern double Doublelist_head (T list);
extern T Doublelist_next (T list);
extern void Doublelist_free (T *list);
+extern T
+Doublelist_keep_one (T list, int i);
extern T Doublelist_reverse (T list);
extern int Doublelist_length (T list);
extern double *
diff --git a/src/filestring.c b/src/filestring.c
index 94a6811..ebd9651 100644
--- a/src/filestring.c
+++ b/src/filestring.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: filestring.c 162093 2015-03-26 18:54:22Z twu $";
+static char rcsid[] = "$Id: filestring.c 194346 2016-07-18 17:06:16Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -9,6 +9,7 @@ static char rcsid[] = "$Id: filestring.c 162093 2015-03-26 18:54:22Z twu $";
#include <ctype.h> /* For isdigit() */
#include "assert.h"
#include "mem.h"
+#include "complement.h"
#include "list.h"
@@ -218,8 +219,9 @@ transfer_char (T this, char c) {
return;
}
-void
-transfer_string (T this, char *string, int bufferlen) {
+
+static void
+transfer_buffer (T this, char *string, int bufferlen) {
char *block, *q;
for (q = string; --bufferlen >= 0 && *q != '\0'; q++) {
@@ -243,6 +245,121 @@ transfer_string (T this, char *string, int bufferlen) {
}
+static void
+transfer_string (T this, char *string, int stringlen) {
+ char *block, *q;
+
+ q = string;
+ while (this->nleft <= stringlen) {
+ strncpy(this->ptr,q,this->nleft);
+ q += this->nleft;
+ stringlen -= this->nleft;
+
+ block = (char *) MALLOC_OUT(BLOCKSIZE * sizeof(char));
+ this->blocks = List_push_out(this->blocks,(void *) block);
+ this->nleft = BLOCKSIZE;
+ this->ptr = &(block[0]);
+ }
+
+ strncpy(this->ptr,q,stringlen);
+ this->ptr += stringlen;
+ this->nleft -= stringlen;
+
+ return;
+}
+
+
+static void
+reverse_inplace (char *string, unsigned int length) {
+ char temp, *p, *q;
+ unsigned int i;
+
+ p = string;
+ q = &(string[length-1]);
+
+ for (i = 0; i < length/2; i++) {
+ temp = *p;
+ *p++ = *q;
+ *q-- = temp;
+ }
+
+ return;
+}
+
+static void
+transfer_string_reverse (T this, char *string, int stringlen) {
+ char *block, *q;
+
+ q = &(string[stringlen]);
+ while (this->nleft <= stringlen) {
+ q -= this->nleft;
+ strncpy(this->ptr,q,this->nleft);
+ reverse_inplace(this->ptr,this->nleft);
+ stringlen -= this->nleft;
+
+ block = (char *) MALLOC_OUT(BLOCKSIZE * sizeof(char));
+ this->blocks = List_push_out(this->blocks,(void *) block);
+ this->nleft = BLOCKSIZE;
+ this->ptr = &(block[0]);
+ }
+
+ strncpy(this->ptr,string,stringlen);
+ reverse_inplace(this->ptr,stringlen);
+ this->ptr += stringlen;
+ this->nleft -= stringlen;
+
+ return;
+}
+
+
+static char complCode[128] = COMPLEMENT_LC;
+
+static void
+revcomp_inplace (char *string, unsigned int length) {
+ char temp, *p, *q;
+ unsigned int i;
+
+ p = string;
+ q = &(string[length-1]);
+
+ for (i = 0; i < length/2; i++) {
+ temp = complCode[(int) *p];
+ *p++ = complCode[(int) *q];
+ *q-- = temp;
+ }
+ if (p == q) {
+ *p = complCode[(int) *p];
+ }
+
+ return;
+}
+
+static void
+transfer_string_revcomp (T this, char *string, int stringlen) {
+ char *block, *q;
+
+ q = &(string[stringlen]);
+ while (this->nleft <= stringlen) {
+ q -= this->nleft;
+ strncpy(this->ptr,q,this->nleft);
+ revcomp_inplace(this->ptr,this->nleft);
+ stringlen -= this->nleft;
+
+ block = (char *) MALLOC_OUT(BLOCKSIZE * sizeof(char));
+ this->blocks = List_push_out(this->blocks,(void *) block);
+ this->nleft = BLOCKSIZE;
+ this->ptr = &(block[0]);
+ }
+
+ strncpy(this->ptr,string,stringlen);
+ revcomp_inplace(this->ptr,stringlen);
+ this->ptr += stringlen;
+ this->nleft -= stringlen;
+
+ return;
+}
+
+
#define BUFFERLEN 1024
@@ -254,7 +371,8 @@ Filestring_put (T this, const char *format, ...) {
char *block;
const char *p;
char *q, c;
- int precision;
+ char *string;
+ int precision, stringlen, i;
va_start(values,format);
@@ -281,9 +399,18 @@ Filestring_put (T this, const char *format, ...) {
break;
case 's': /* string */
- for (q = va_arg(values, char *); *q != '\0'; q++) {
- transfer_char(this,*q);
- }
+ q = va_arg(values, char *);
+ transfer_string(this,q,strlen(q));
+ break;
+
+ case 'r': /* string reversed */
+ q = va_arg(values, char *);
+ transfer_string_reverse(this,q,strlen(q));
+ break;
+
+ case 'R': /* string reversed */
+ q = va_arg(values, char *);
+ transfer_string_revcomp(this,q,strlen(q));
break;
case '.': /* float or double */
@@ -297,22 +424,29 @@ Filestring_put (T this, const char *format, ...) {
switch (*p) {
case 'f':
sprintf(BUFFER,"%.*f",precision,va_arg(values, double));
- transfer_string(this,BUFFER,BUFFERLEN);
+ transfer_buffer(this,BUFFER,BUFFERLEN);
break;
case 'e':
sprintf(BUFFER,"%.*e",precision,va_arg(values, double));
- transfer_string(this,BUFFER,BUFFERLEN);
+ transfer_buffer(this,BUFFER,BUFFERLEN);
break;
case 'g':
sprintf(BUFFER,"%.*g",precision,va_arg(values, double));
- transfer_string(this,BUFFER,BUFFERLEN);
+ transfer_buffer(this,BUFFER,BUFFERLEN);
break;
case 's':
- sprintf(BUFFER,"%.*s",precision,va_arg(values, char *));
- transfer_string(this,BUFFER,BUFFERLEN);
+ transfer_string(this,/*string*/va_arg(values, char *),/*stringlen*/precision);
+ break;
+
+ case 'r':
+ transfer_string_reverse(this,/*string*/va_arg(values, char *),/*stringlen*/precision);
+ break;
+
+ case 'R':
+ transfer_string_revcomp(this,/*string*/va_arg(values, char *),/*stringlen*/precision);
break;
default: fprintf(stderr,"Cannot parse %%.%d%c\n",precision,*p); abort();
@@ -325,15 +459,45 @@ Filestring_put (T this, const char *format, ...) {
switch (*++p) {
case 'd':
sprintf(BUFFER,"%*d",precision,va_arg(values, int));
- transfer_string(this,BUFFER,BUFFERLEN);
+ transfer_buffer(this,BUFFER,BUFFERLEN);
break;
case 'u':
sprintf(BUFFER,"%*u",precision,va_arg(values, unsigned int));
- transfer_string(this,BUFFER,BUFFERLEN);
+ transfer_buffer(this,BUFFER,BUFFERLEN);
break;
case 's':
- sprintf(BUFFER,"%*s",precision,va_arg(values, char *));
- transfer_string(this,BUFFER,BUFFERLEN);
+ /* Right justify */
+ string = va_arg(values, char *);
+ if ((stringlen = (int) strlen(string)) < precision) {
+ for (i = 0; i < precision - stringlen; i++) {
+ transfer_char(this,' ');
+ }
+ transfer_string(this,string,stringlen);
+ } else {
+ transfer_string(this,string,/*stringlen*/precision);
+ }
+ break;
+ case 'r':
+ string = va_arg(values, char *);
+ if ((stringlen = (int) strlen(string)) < precision) {
+ for (i = 0; i < precision - stringlen; i++) {
+ transfer_char(this,' ');
+ }
+ transfer_string_reverse(this,string,stringlen);
+ } else {
+ transfer_string_reverse(this,string,/*stringlen*/precision);
+ }
+ break;
+ case 'R':
+ string = va_arg(values, char *);
+ if ((stringlen = (int) strlen(string)) < precision) {
+ for (i = 0; i < precision - stringlen; i++) {
+ transfer_char(this,' ');
+ }
+ transfer_string_revcomp(this,string,stringlen);
+ } else {
+ transfer_string_revcomp(this,string,/*stringlen*/precision);
+ }
break;
default: fprintf(stderr,"Cannot parse %%*%c\n",*p); abort();
}
@@ -341,40 +505,41 @@ Filestring_put (T this, const char *format, ...) {
case 'd': /* int */
sprintf(BUFFER,"%d",va_arg(values, int));
- transfer_string(this,BUFFER,BUFFERLEN);
+ transfer_buffer(this,BUFFER,BUFFERLEN);
break;
case 'f': /* float */
sprintf(BUFFER,"%f",va_arg(values, double));
- transfer_string(this,BUFFER,BUFFERLEN);
+ transfer_buffer(this,BUFFER,BUFFERLEN);
break;
case 'u': /* unsigned int */
sprintf(BUFFER,"%u",va_arg(values, unsigned int));
- transfer_string(this,BUFFER,BUFFERLEN);
+ transfer_buffer(this,BUFFER,BUFFERLEN);
break;
case 'l':
switch (*++p) {
case 'd': /* long int */
sprintf(BUFFER,"%ld",va_arg(values, long int));
- transfer_string(this,BUFFER,BUFFERLEN);
+ transfer_buffer(this,BUFFER,BUFFERLEN);
break;
case 'u': /* unsigned long */
sprintf(BUFFER,"%lu",va_arg(values, unsigned long));
- transfer_string(this,BUFFER,BUFFERLEN);
+ transfer_buffer(this,BUFFER,BUFFERLEN);
break;
case 'l':
switch (*++p) {
case 'd': /* long long int */
sprintf(BUFFER,"%lld",va_arg(values, long long int));
- transfer_string(this,BUFFER,BUFFERLEN);
+ transfer_buffer(this,BUFFER,BUFFERLEN);
break;
case 'u': /* unsigned long long */
sprintf(BUFFER,"%llu",va_arg(values, unsigned long long));
+ transfer_buffer(this,BUFFER,BUFFERLEN);
break;
default: fprintf(stderr,"Cannot parse %%ll%c\n",*p); abort();
diff --git a/src/gmap.c b/src/gmap.c
index 59a75ac..5141a2c 100644
--- a/src/gmap.c
+++ b/src/gmap.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: gmap.c 190430 2016-05-24 21:29:20Z twu $";
+static char rcsid[] = "$Id: gmap.c 193899 2016-07-12 04:41:34Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -5129,9 +5129,9 @@ parse_command_line (int argc, char *argv[], int optind) {
while ((opt = getopt_long(argc,argv,
#ifdef PMAP
- "q:D:a:d:k:Gg:2B:w:L:x:1t:s:c:SA03468:9n:f:ZO5o:V:v:M:m:ebu:E:PQYNI:i:l:",
+ "q:D:a:d:k:Gg:2B:K:w:L:x:1t:s:c:SA03468:9n:f:ZO5o:V:v:M:m:ebu:E:PQYNI:i:l:",
#else
- "q:D:d:k:Gg:2B:w:L:x:1t:s:c:p:SA03468:9n:f:ZO5o:V:v:M:m:ebu:E:PQFa:Tz:j:YNI:i:l:",
+ "q:D:d:k:Gg:2B:K:w:L:x:1t:s:c:p:SA03468:9n:f:ZO5o:V:v:M:m:ebu:E:PQFa:Tz:j:YNI:i:l:",
#endif
long_options, &long_option_index)) != -1) {
switch (opt) {
@@ -5414,7 +5414,12 @@ parse_command_line (int argc, char *argv[], int optind) {
}
break;
- case 'K': maxintronlen = atoi(check_valid_int(optarg)); break;
+ case 'K':
+ /* Included for backwards compatibility. Sets both
+ --max-intronlength-middle and --max-intronlength-ends */
+ maxintronlen = maxintronlen_ends = atoi(check_valid_int(optarg));
+ break;
+
case 'w': shortsplicedist = strtoul(check_valid_int(optarg),NULL,10); break;
case 'L': maxtotallen_bound = atoi(check_valid_int(optarg)); break;
@@ -6577,8 +6582,7 @@ main (int argc, char *argv[]) {
min_intronlength,max_deletionlength,/*min_indel_end_matches*/6,
maxpeelback_distalmedial,nullgap,extramaterial_end,extramaterial_paired,
extraband_single,extraband_end,extraband_paired,
- ngap,maxintronlen,maxintronlen_ends,minendexon,
- /*output_sam_p*/printtype == SAM ? true : false,homopolymerp,stage3debug);
+ ngap,maxintronlen,maxintronlen_ends,minendexon,homopolymerp,stage3debug);
Splicetrie_setup(splicesites,splicefrags_ref,splicefrags_alt,
trieoffsets_obs,triecontents_obs,trieoffsets_max,triecontents_max,
/*snpp*/false,amb_closest_p,/*amb_clip_p*/true,/*min_shortend*/2);
@@ -6958,10 +6962,14 @@ Usage: gmap [OPTIONS...] <FASTA files...>, or\n\
a genomic gap will be considered a deletion rather than an intron.\n\
",min_intronlength);
fprintf(stdout,"\
- --max-intronlength-middle=INT Max length for one internal intron (default %d)\n\
+ --max-intronlength-middle=INT Max length for one internal intron (default %d). Note: for backward\n\
+ compatibility, the -K flag will set both --max-intronlength-middle\n\
+ and --max-intronlength-ends.\n\
",maxintronlen);
fprintf(stdout,"\
- --max-intronlength-ends=INT Max length for first or last intron (default %d)\n\
+ --max-intronlength-ends=INT Max length for first or last intron (default %d). Note: for backward\n\
+ compatibility, the -K flag will set both --max-intronlength-middle\n\
+ and --max-intronlength-ends.\n\
",maxintronlen_ends);
fprintf(stdout,"\
--trim-end-exons=INT Trim end exons with fewer than given number of matches\n\
diff --git a/src/gsnap.c b/src/gsnap.c
index 4876ab9..e0b4048 100644
--- a/src/gsnap.c
+++ b/src/gsnap.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: gsnap.c 190431 2016-05-24 21:29:49Z twu $";
+static char rcsid[] = "$Id: gsnap.c 193899 2016-07-12 04:41:34Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -342,7 +342,7 @@ static int min_shortend = 2;
static int antistranded_penalty = 0; /* Most RNA-Seq is non-stranded */
/* Now that we don't use terminals, can have end_detail being high without too much slowdown */
-static int end_detail = 2; /* 2 (high), 1 (medium), or 0 (low) */
+static int end_detail = 0; /* 2 (high), 1 (medium), or 0 (low) */
static Width_T index1part;
static Width_T required_index1part = 0;
@@ -2469,7 +2469,7 @@ open_input_streams_parser (int *nextchar, int *nchars1, int *nchars2, char ***fi
fprintf(stderr,"Cannot open file %s\n",(*files)[0]);
exit(9);
} else {
- debugf(fprintf(stderr,"Master opening file %s using fopen\n",(*files)[0]));
+ debugf(fprintf(stderr,"Master opening file %s using fopen for input\n",(*files)[0]));
*nextchar = Shortread_input_init(&(*nchars1),*input);
}
}
@@ -2522,7 +2522,7 @@ open_input_streams_parser (int *nextchar, int *nchars1, int *nchars2, char ***fi
fprintf(stderr,"Cannot open file %s\n",(*files)[0]);
exit(9);
} else {
- debugf(fprintf(stderr,"Master opening file %s using fopen\n",(*files)[0]));
+ debugf(fprintf(stderr,"Master opening file %s using fopen for input2\n",(*files)[0]));
/* nextchar2 = */ Shortread_input_init(&(*nchars2),*input2);
}
}
@@ -3308,7 +3308,7 @@ worker_setup (char *genomesubdir, char *fileroot) {
maxpeelback_distalmedial,nullgap,extramaterial_end,extramaterial_paired,
extraband_single,extraband_end,extraband_paired,
ngap,/*maxintronlen*/shortsplicedist,/*maxintronlen_ends*/shortsplicedist,
- /*minendexon*/0,output_sam_p,/*homopolymerp*/false,/*stage3debug*/NO_STAGE3DEBUG);
+ /*minendexon*/0,/*homopolymerp*/false,/*stage3debug*/NO_STAGE3DEBUG);
Oligoindex_hr_setup(Genome_blocks(genomecomp),mode);
Stage2_setup(/*splicingp*/novelsplicingp == true || knownsplicingp == true,/*cross_species_p*/false,
suboptimal_score_start,suboptimal_score_end,sufflookback,nsufflookback,
@@ -4201,7 +4201,7 @@ is still designed to be fast.\n\
",trim_indel_score);
fprintf(stdout,"\
- --end-detail=STRING Amount of alignment detail at ends of read: high (default), medium, or low\n\
+ --end-detail=STRING Amount of alignment detail at ends of read: high, medium, or low (default)\n\
Note: medium detail could increase speed by 20%% or so, but will miss some\n\
splices at the ends of reads\n\
");
diff --git a/src/indel.c b/src/indel.c
index 10ae2a9..18d2aba 100644
--- a/src/indel.c
+++ b/src/indel.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: indel.c 184486 2016-02-18 03:21:44Z twu $";
+static char rcsid[] = "$Id: indel.c 193229 2016-06-30 22:31:10Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -181,7 +181,8 @@ Indel_resolve_middle_insertion (int *best_nmismatches_i, int *best_nmismatches_j
return querylength - best_indel_pos - indels;
#else
} else {
- debug2(printf("Returning %d\n",best_indel_pos));
+ debug2(printf("Returning %d with mismatches %d+%d\n",
+ best_indel_pos,*best_nmismatches_i,*best_nmismatches_j));
return best_indel_pos;
#endif
}
@@ -331,7 +332,8 @@ Indel_resolve_middle_deletion (int *best_nmismatches_i, int *best_nmismatches_j,
return querylength - best_indel_pos;
#else
} else {
- debug2(printf("Returning %d\n",best_indel_pos));
+ debug2(printf("Returning %d with nmismatches %d+%d\n",
+ best_indel_pos,*best_nmismatches_i,*best_nmismatches_j));
return best_indel_pos;
#endif
}
diff --git a/src/intlist.c b/src/intlist.c
index ea443a4..0dfd82d 100644
--- a/src/intlist.c
+++ b/src/intlist.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: intlist.c 166641 2015-05-29 21:13:04Z twu $";
+static char rcsid[] = "$Id: intlist.c 193899 2016-07-12 04:41:34Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -103,6 +103,23 @@ Intlist_free_in (T *list) {
}
T
+Intlist_keep_one (T list, int i) {
+ T head;
+
+ while (--i >= 0) {
+ /* Pop */
+ head = list->rest;
+ FREE(list);
+ list = head;
+ }
+
+ Intlist_free(&list->rest);
+ return list;
+}
+
+
+
+T
Intlist_reverse (T list) {
T head = NULL, next;
diff --git a/src/intlist.h b/src/intlist.h
index 16851b5..ba85895 100644
--- a/src/intlist.h
+++ b/src/intlist.h
@@ -1,4 +1,4 @@
-/* $Id: intlist.h 166641 2015-05-29 21:13:04Z twu $ */
+/* $Id: intlist.h 193899 2016-07-12 04:41:34Z twu $ */
#ifndef INTLIST_INCLUDED
#define INTLIST_INCLUDED
@@ -27,6 +27,8 @@ extern void
Intlist_free (T *list);
extern void
Intlist_free_in (T *list);
+extern T
+Intlist_keep_one (T list, int i);
extern T
Intlist_reverse (T list);
extern int
diff --git a/src/pair.c b/src/pair.c
index c0cbc92..6085751 100644
--- a/src/pair.c
+++ b/src/pair.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: pair.c 190563 2016-05-25 22:50:22Z twu $";
+static char rcsid[] = "$Id: pair.c 193885 2016-07-12 03:21:37Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -3544,8 +3544,10 @@ print_endtypes (Filestring_T fp,
/* Based on print_pair_info in stage3hr.c */
static void
print_pair_info (Filestring_T fp, int insertlength, int pairscore, Pairtype_T pairtype) {
+#ifndef NO_COMPARE
FPRINTF(fp,"pair_score:%d",pairscore);
FPRINTF(fp,",insert_length:%d",insertlength);
+#endif
switch (pairtype) {
case CONCORDANT: break;
@@ -3555,6 +3557,7 @@ print_pair_info (Filestring_T fp, int insertlength, int pairscore, Pairtype_T pa
case CONCORDANT_TRANSLOCATIONS: break;
case CONCORDANT_TERMINAL: break;
case PAIRED_UNSPECIFIED: abort();
+ case PAIRED_TERMINALS: abort();
case UNSPECIFIED: break;
case UNPAIRED: abort();
}
@@ -3724,6 +3727,7 @@ Pair_print_gsnap (Filestring_T fp, struct T *pairs_querydir, int npairs, int nse
donor_typeint,acceptor_typeint);
if (firstp == true) {
+#ifndef NO_COMPARE
FPRINTF(fp,"\tsegs:%d,align_score:%d,mapq:%d",nsegments,score,mapq_score);
switch (gmap_source) {
case GMAP_VIA_SUBSTRINGS: FPRINTF(fp,",method:gmap_via_substrings"); break;
@@ -3731,6 +3735,7 @@ Pair_print_gsnap (Filestring_T fp, struct T *pairs_querydir, int npairs, int nse
case GMAP_VIA_REGION: FPRINTF(fp,",method:gmap_via_region"); break;
case GMAP_NOT_APPLICABLE: abort();
}
+#endif
if (pairedp == true) {
FPRINTF(fp,"\t");
print_pair_info(fp,insertlength,pairscore,/*pairtype*/CONCORDANT);
@@ -3811,6 +3816,7 @@ Pair_print_gsnap (Filestring_T fp, struct T *pairs_querydir, int npairs, int nse
donor_typeint,acceptor_typeint);
if (firstp == true) {
+#ifndef NO_COMPARE
FPRINTF(fp,"\tsegs:%d,align_score:%d,mapq:%d",nsegments,score,mapq_score);
switch (gmap_source) {
case GMAP_VIA_SUBSTRINGS: FPRINTF(fp,",method:gmap_via_substrings"); break;
@@ -3818,6 +3824,7 @@ Pair_print_gsnap (Filestring_T fp, struct T *pairs_querydir, int npairs, int nse
case GMAP_VIA_REGION: FPRINTF(fp,",method:gmap_via_region"); break;
case GMAP_NOT_APPLICABLE: abort();
}
+#endif
if (pairedp == true) {
FPRINTF(fp,"\t");
print_pair_info(fp,insertlength,pairscore,/*pairtype*/CONCORDANT);
@@ -3885,6 +3892,7 @@ Pair_print_gsnap (Filestring_T fp, struct T *pairs_querydir, int npairs, int nse
donor_typeint,acceptor_typeint);
if (firstp == true) {
+#ifndef NO_COMPARE
FPRINTF(fp,"\tsegs:%d,align_score:%d,mapq:%d",nsegments,score,mapq_score);
switch (gmap_source) {
case GMAP_VIA_SUBSTRINGS: FPRINTF(fp,",method:gmap_via_substrings"); break;
@@ -3892,6 +3900,7 @@ Pair_print_gsnap (Filestring_T fp, struct T *pairs_querydir, int npairs, int nse
case GMAP_VIA_REGION: FPRINTF(fp,",method:gmap_via_region"); break;
case GMAP_NOT_APPLICABLE: abort();
}
+#endif
if (pairedp == true) {
FPRINTF(fp,"\t");
print_pair_info(fp,insertlength,pairscore,/*pairtype*/CONCORDANT);
@@ -4013,6 +4022,7 @@ Pair_print_gsnap (Filestring_T fp, struct T *pairs_querydir, int npairs, int nse
donor_typeint,acceptor_typeint);
if (firstp == true) {
+#ifndef NO_COMPARE
FPRINTF(fp,"\tsegs:%d,align_score:%d,mapq:%d",nsegments,score,mapq_score);
switch (gmap_source) {
case GMAP_VIA_SUBSTRINGS: FPRINTF(fp,",method:gmap_via_substrings"); break;
@@ -4020,6 +4030,7 @@ Pair_print_gsnap (Filestring_T fp, struct T *pairs_querydir, int npairs, int nse
case GMAP_VIA_REGION: FPRINTF(fp,",method:gmap_via_region"); break;
case GMAP_NOT_APPLICABLE: abort();
}
+#endif
if (pairedp == true) {
FPRINTF(fp,"\t");
print_pair_info(fp,insertlength,pairscore,/*pairtype*/CONCORDANT);
@@ -4375,8 +4386,8 @@ Pair_guess_cdna_direction_array (int *sensedir, struct T *pairs_querydir, int np
this = ptr++;
i++;
}
- /* ptr--; */
- /* i--; */
+ ptr--;
+ i--;
splice_site_probs(&sense_prob,&antisense_prob,
prev_splicesitep,splicesitep,chroffset,
@@ -4395,8 +4406,8 @@ Pair_guess_cdna_direction_array (int *sensedir, struct T *pairs_querydir, int np
this = ptr++;
i++;
}
- /* ptr--; */
- /* i--; */
+ ptr--;
+ i--;
splice_site_probs(&sense_prob,&antisense_prob,
prev_splicesitep,splicesitep,chroffset,
@@ -7205,16 +7216,17 @@ Pair_array_nmatches_posttrim (struct T *pairarray, int npairs, int pos5, int pos
int
-Pair_nmismatches_region (int *nindelbreaks, struct T *pairs, int npairs,
+Pair_nmismatches_region (int *nindelbreaks, int *nbadintrons, struct T *pairs, int npairs,
int trim_left, int trim_right, int start_amb_nmatches, int end_amb_nmatches,
int querylength) {
int nmismatches = 0;
/* bool in_intron = false; */
/* bool indelp = false; */
+ bool in_exon = false;
int i = 0;
T this;
- *nindelbreaks = 0;
+ *nindelbreaks = *nbadintrons = 0;
/* Handle GMAP alignments that are not extended to the end */
this = &(pairs[0]);
@@ -7226,34 +7238,59 @@ Pair_nmismatches_region (int *nindelbreaks, struct T *pairs, int npairs,
while (i < npairs) {
this = &(pairs[i]);
- if (this->comp == INDEL_COMP || this->comp == SHORTGAP_COMP) {
- /* Count indelbreaks, even if outside of trimmed region */
- if (this->genome == ' ') {
- /* INSERTION */
- while (i < npairs && this->genome == ' ') {
- /* (*total_nindels) += 1; */
- this = &(pairs[i++]);
- }
- i--;
- (*nindelbreaks) += 1;
-
- } else if (this->cdna == ' ') {
- /* DELETION */
- while (i < npairs && this->cdna == ' ') {
- /* (*total_nindels) -= 1; */
- this = &(pairs[i++]);
+
+ if (this->gapp) {
+ if (in_exon == true) {
+ /* SPLICE START */
+ if (this->comp == FWD_CANONICAL_INTRON_COMP || this->comp == REV_CANONICAL_INTRON_COMP) {
+ /* Okay */
+ } else {
+ /* Count bad introns, even if outside of trimmed region */
+ (*nbadintrons) += 1;
}
- i--;
- (*nindelbreaks) += 1;
+ in_exon = false;
}
- } else if (this->querypos < trim_left) {
- /* Skip for counting mismatches */
- } else if (this->querypos >= querylength - trim_right) {
- /* Skip for counting mismatches */
- } else if (this->comp == MISMATCH_COMP) {
- nmismatches++;
+ } else if (this->comp == INTRONGAP_COMP) {
+ /* May want to print dinucleotides */
+
+ } else {
+ /* Remaining possibilities are MATCH_COMP, DYNPROG_MATCH_COMP, AMBIGUOUS_COMP, INDEL_COMP,
+ SHORTGAP_COMP, or MISMATCH_COMP */
+ if (in_exon == false) {
+ /* SPLICE CONTINUATION */
+ in_exon = true;
+ }
+ if (this->comp == INDEL_COMP || this->comp == SHORTGAP_COMP) {
+ /* Count indelbreaks, even if outside of trimmed region */
+ if (this->genome == ' ') {
+ /* INSERTION */
+ while (i < npairs && this->genome == ' ') {
+ /* (*total_nindels) += 1; */
+ this = &(pairs[i++]);
+ }
+ i--;
+ (*nindelbreaks) += 1;
+
+ } else if (this->cdna == ' ') {
+ /* DELETION */
+ while (i < npairs && this->cdna == ' ') {
+ /* (*total_nindels) -= 1; */
+ this = &(pairs[i++]);
+ }
+ i--;
+ (*nindelbreaks) += 1;
+ }
+
+ } else if (this->querypos < trim_left) {
+ /* Skip for counting mismatches */
+ } else if (this->querypos >= querylength - trim_right) {
+ /* Skip for counting mismatches */
+ } else if (this->comp == MISMATCH_COMP) {
+ nmismatches++;
+ }
}
+
i++;
}
diff --git a/src/pair.h b/src/pair.h
index 6147d2b..0663017 100644
--- a/src/pair.h
+++ b/src/pair.h
@@ -1,4 +1,4 @@
-/* $Id: pair.h 188752 2016-05-01 17:28:22Z twu $ */
+/* $Id: pair.h 193230 2016-06-30 22:32:37Z twu $ */
#ifndef PAIR_INCLUDED
#define PAIR_INCLUDED
@@ -275,7 +275,7 @@ Pair_nmatches_posttrim (int *max_match_length, List_T pairs, int pos5, int pos3)
extern int
Pair_array_nmatches_posttrim (struct T *pairs, int npairs, int pos5, int pos3);
extern int
-Pair_nmismatches_region (int *nindelbreaks, struct T *pairs, int npairs,
+Pair_nmismatches_region (int *nindelbreaks, int *nbadintrons, struct T *pairs, int npairs,
int trim_left, int trim_right, int start_amb_nmatches, int end_amb_nmatches,
int querylength);
diff --git a/src/resulthr.c b/src/resulthr.c
index b54806c..778bf7a 100644
--- a/src/resulthr.c
+++ b/src/resulthr.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: resulthr.c 186728 2016-03-30 23:04:10Z twu $";
+static char rcsid[] = "$Id: resulthr.c 193043 2016-06-29 20:34:33Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -56,6 +56,7 @@ Pairtype_string (Pairtype_T pairtype) {
case PAIRED_INVERSION: return "paired_scramble";
case PAIRED_SCRAMBLE: return "paired_scramble";
case PAIRED_TOOLONG: return "paired_toolong";
+ case PAIRED_TERMINALS: return "paired_terminals";
case CONCORDANT_TRANSLOCATIONS: return "concordant_translocations";
case CONCORDANT_TERMINAL: return "concordant_terminal";
case UNPAIRED: return "unpaired";
@@ -195,6 +196,13 @@ Result_paired_read_new (int id, void **resultarray, int npaths_primary, int npat
new->resulttype = PAIRED_MULT;
}
+ } else if (final_pairtype == PAIRED_TERMINALS) {
+ if (npaths_primary + npaths_altloc <= 1) {
+ new->resulttype = PAIRED_UNIQ;
+ } else {
+ new->resulttype = PAIRED_MULT;
+ }
+
} else if (final_pairtype == CONCORDANT) {
if (npaths_primary + npaths_altloc > 1) {
new->resulttype = CONCORDANT_MULT;
diff --git a/src/resulthr.h b/src/resulthr.h
index ba8e9b4..92c4420 100644
--- a/src/resulthr.h
+++ b/src/resulthr.h
@@ -1,4 +1,4 @@
-/* $Id: resulthr.h 182440 2016-01-15 22:42:45Z twu $ */
+/* $Id: resulthr.h 193043 2016-06-29 20:34:33Z twu $ */
#ifndef RESULTHR_INCLUDED
#define RESULTHR_INCLUDED
@@ -7,7 +7,8 @@
/* PAIRED_UNSPECIFIED assigned only by Stage1hr_paired_read */
typedef enum {CONCORDANT, PAIRED_UNSPECIFIED, PAIRED_INVERSION, PAIRED_SCRAMBLE, PAIRED_TOOLONG,
- CONCORDANT_TRANSLOCATIONS, CONCORDANT_TERMINAL, UNPAIRED, UNSPECIFIED} Pairtype_T;
+ PAIRED_TERMINALS, CONCORDANT_TRANSLOCATIONS, CONCORDANT_TERMINAL,
+ UNPAIRED, UNSPECIFIED} Pairtype_T;
typedef enum {SINGLEEND_NOMAPPING, PAIREDEND_NOMAPPING,
SINGLEEND_UNIQ, SINGLEEND_TRANSLOC, SINGLEEND_MULT,
diff --git a/src/samprint.c b/src/samprint.c
index bf47f05..3ca6985 100644
--- a/src/samprint.c
+++ b/src/samprint.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: samprint.c 186666 2016-03-29 21:52:28Z twu $";
+static char rcsid[] = "$Id: samprint.c 193892 2016-07-12 04:07:27Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -393,12 +393,10 @@ SAM_print_nomapping (Filestring_T fp, char *abbrev, Shortread_T queryseq, Stage3
/* Since there is no mapping, we print the original query sequence. */
if (invertp == false) {
Shortread_print_chopped_sam(fp,queryseq,/*hardclip_low*/0,/*hardclip_high*/0);
- FPRINTF(fp,"\t");
Shortread_print_quality(fp,queryseq,/*hardclip_low*/0,/*hardclip_high*/0,
quality_shift,/*show_chopped_p*/false);
} else {
Shortread_print_chopped_revcomp_sam(fp,queryseq,/*hardclip_low*/0,/*hardclip_high*/0);
- FPRINTF(fp,"\t");
Shortread_print_quality_revcomp(fp,queryseq,/*hardclip_low*/0,/*hardclip_high*/0,
quality_shift,/*show_chopped_p*/false);
}
@@ -1231,10 +1229,9 @@ print_substrings (Filestring_T fp, char *abbrev, Stage3end_T stage3end, Stage3en
/* 5. MAPQ: Mapping quality */
- FPRINTF(fp,"\t%d",mapq_score);
+ FPRINTF(fp,"\t%d\t",mapq_score);
/* 6. CIGAR */
- FPRINTF(fp,"\t");
substrings_LtoH = Stage3end_substrings_LtoH(stage3end);
junctions_LtoH = Stage3end_junctions_LtoH(stage3end);
substringL = (Substring_T) List_head(substrings_LtoH);
@@ -1583,12 +1580,10 @@ print_substrings (Filestring_T fp, char *abbrev, Stage3end_T stage3end, Stage3en
/* Queryseq has already been inverted, so just measure plusp relative to its current state */
if (plusp == true) {
Shortread_print_chopped_sam(fp,queryseq,hardclip_low,hardclip_high);
- FPRINTF(fp,"\t");
Shortread_print_quality(fp,queryseq,hardclip_low,hardclip_high,
quality_shift,/*show_chopped_p*/false);
} else {
Shortread_print_chopped_revcomp_sam(fp,queryseq,hardclip_low,hardclip_high);
- FPRINTF(fp,"\t");
Shortread_print_quality_revcomp(fp,queryseq,hardclip_low,hardclip_high,
quality_shift,/*show_chopped_p*/false);
}
@@ -2226,10 +2221,9 @@ print_halfdonor (Filestring_T fp, char *abbrev, Substring_T donor, Stage3end_T t
/* 5. MAPQ: Mapping quality */
- FPRINTF(fp,"\t%d",mapq_score);
+ FPRINTF(fp,"\t%d\t",mapq_score);
/* 6. CIGAR */
- FPRINTF(fp,"\t");
if (Stage3end_sensedir(this) == SENSE_ANTI) {
sensep = false;
} else {
@@ -2414,12 +2408,10 @@ print_halfdonor (Filestring_T fp, char *abbrev, Substring_T donor, Stage3end_T t
/* Queryseq has already been inverted, so just measure plusp relative to its current state */
if (plusp == true) {
Shortread_print_chopped_sam(fp,queryseq,hardclip_low,hardclip_high);
- FPRINTF(fp,"\t");
Shortread_print_quality(fp,queryseq,hardclip_low,hardclip_high,
quality_shift,/*show_chopped_p*/false);
} else {
Shortread_print_chopped_revcomp_sam(fp,queryseq,hardclip_low,hardclip_high);
- FPRINTF(fp,"\t");
Shortread_print_quality_revcomp(fp,queryseq,hardclip_low,hardclip_high,
quality_shift,/*show_chopped_p*/false);
}
@@ -2742,10 +2734,9 @@ print_halfacceptor (Filestring_T fp, char *abbrev, Substring_T acceptor, Stage3e
/* 5. MAPQ: Mapping quality */
- FPRINTF(fp,"\t%d",mapq_score);
+ FPRINTF(fp,"\t%d\t",mapq_score);
/* 6. CIGAR */
- FPRINTF(fp,"\t");
if (Stage3end_sensedir(this) == SENSE_ANTI) {
sensep = false;
} else {
@@ -2921,12 +2912,10 @@ print_halfacceptor (Filestring_T fp, char *abbrev, Substring_T acceptor, Stage3e
/* Queryseq has already been inverted, so just measure plusp relative to its current state */
if (plusp == true) {
Shortread_print_chopped_sam(fp,queryseq,hardclip_low,hardclip_high);
- FPRINTF(fp,"\t");
Shortread_print_quality(fp,queryseq,hardclip_low,hardclip_high,
quality_shift,/*show_chopped_p*/false);
} else {
Shortread_print_chopped_revcomp_sam(fp,queryseq,hardclip_low,hardclip_high);
- FPRINTF(fp,"\t");
Shortread_print_quality_revcomp(fp,queryseq,hardclip_low,hardclip_high,
quality_shift,/*show_chopped_p*/false);
}
diff --git a/src/sarray-read.c b/src/sarray-read.c
index 321510b..de751cf 100644
--- a/src/sarray-read.c
+++ b/src/sarray-read.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: sarray-read.c 191632 2016-06-09 22:02:23Z twu $";
+static char rcsid[] = "$Id: sarray-read.c 193899 2016-07-12 04:41:34Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -45,6 +45,7 @@ static char rcsid[] = "$Id: sarray-read.c 191632 2016-06-09 22:02:23Z twu $";
#include "substring.h"
#include "junction.h"
#include "stage3hr.h"
+#include "sedgesort.h"
#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
@@ -2143,7 +2144,11 @@ Elt_fill_positions_all (Elt_T this, T sarray) {
this->positions_allocated = this->positions = (Univcoord_T *) NULL;
this->npositions_allocated = this->npositions = 0;
} else {
- this->positions_allocated = this->positions = (Univcoord_T *) CALLOC(this->npositions,sizeof(Univcoord_T));
+#ifdef USE_QSORT
+ this->positions_allocated = this->positions = (Univcoord_T *) MALLOC(this->npositions * sizeof(Univcoord_T));
+#else
+ this->positions_allocated = this->positions = (Univcoord_T *) MALLOC((this->npositions + 1) * sizeof(Univcoord_T));
+#endif
i = 0;
ptr = this->initptr;
while (ptr <= this->finalptr) {
@@ -2152,7 +2157,11 @@ Elt_fill_positions_all (Elt_T this, T sarray) {
}
}
this->npositions = i;
+#ifdef USE_QSORT
qsort(this->positions,this->npositions,sizeof(Univcoord_T),Univcoord_compare);
+#else
+ Sedgesort_uint4(this->positions,this->npositions);
+#endif
}
}
@@ -2261,7 +2270,7 @@ fill_positions_std (int *npositions, Univcoord_T low_adj, Univcoord_T high_adj,
debug7(printf("Std method found %d positions\n",*npositions));
if (*npositions > GUESS_ALLOCATION) {
/* Copy the positions we have stored so far */
- more_positions = (Univcoord_T *) CALLOC(*npositions,sizeof(Univcoord_T));
+ more_positions = (Univcoord_T *) MALLOC((*npositions) * sizeof(Univcoord_T));
memcpy(more_positions,positions,GUESS_ALLOCATION*sizeof(Univcoord_T));
FREE(positions);
positions = more_positions;
@@ -2340,7 +2349,11 @@ fill_positions_filtered_first (Elt_T this, T sarray, Univcoord_T low, Univcoord_
this->all_positions = (Univcoord_T *) NULL;
} else {
+#ifdef USE_QSORT
positions_temp = out = (Univcoord_T *) MALLOCA((this->finalptr - this->initptr + 1) * sizeof(Univcoord_T));
+#else
+ positions_temp = out = (Univcoord_T *) MALLOCA((this->finalptr - this->initptr + 1 + 1) * sizeof(Univcoord_T));
+#endif
low_adj = low + this->querystart;
high_adj = high + this->querystart;
@@ -2454,10 +2467,18 @@ fill_positions_filtered_first (Elt_T this, T sarray, Univcoord_T low, Univcoord_
this->positions_allocated = this->positions = (Univcoord_T *) NULL;
} else {
debug7(printf("Sorting %d positions\n",this->npositions));
+#ifdef USE_QSORT
qsort(positions_temp,this->npositions,sizeof(Univcoord_T),Univcoord_compare);
+#else
+ Sedgesort_uint4(positions_temp,this->npositions);
+#endif
/* Need to copy positions before the goal */
+#ifdef USE_QSORT
this->positions_allocated = this->positions = MALLOC(this->npositions * sizeof(Univcoord_T));
+#else
+ this->positions_allocated = this->positions = MALLOC((this->npositions + 1) * sizeof(Univcoord_T));
+#endif
memcpy(this->positions,positions_temp,this->npositions * sizeof(Univcoord_T));
#ifdef DEBUG7
for (i = 0; i < this->npositions; i++) {
@@ -2694,10 +2715,18 @@ fill_positions_filtered_first (Elt_T this, T sarray, Univcoord_T low, Univcoord_
this->positions_allocated = this->positions = (Univcoord_T *) NULL;
} else {
debug7(printf("Sorting %d positions\n",this->npositions));
+#ifdef USE_QSORT
qsort(positions_temp,this->npositions,sizeof(Univcoord_T),Univcoord_compare);
+#else
+ Sedgesort_uint4(positions_temp,this->npositions);
+#endif
/* Need to copy positions before the goal */
+#ifdef USE_QSORT
this->positions_allocated = this->positions = MALLOC(this->npositions * sizeof(Univcoord_T));
+#else
+ this->positions_allocated = this->positions = MALLOC((this->npositions + 1) * sizeof(Univcoord_T));
+#endif
memcpy(this->positions,positions_temp,this->npositions * sizeof(Univcoord_T));
#ifdef DEBUG7
for (i = 0; i < this->npositions; i++) {
@@ -2762,7 +2791,11 @@ fill_positions_filtered_first (Elt_T this, T sarray, Univcoord_T low, Univcoord_
this->all_positions = (Univcoord_T *) NULL;
} else {
+#ifdef USE_QSORT
positions_temp = (Univcoord_T *) MALLOCA((this->finalptr - this->initptr + 1) * sizeof(Univcoord_T));
+#else
+ positions_temp = (Univcoord_T *) MALLOCA((this->finalptr - this->initptr + 1 + 1) * sizeof(Univcoord_T));
+#endif
low_adj = low + this->querystart;
high_adj = high + this->querystart;
@@ -2912,7 +2945,11 @@ fill_positions_filtered_first (Elt_T this, T sarray, Univcoord_T low, Univcoord_
this->positions_allocated = this->positions = (Univcoord_T *) NULL;
} else {
debug7(printf("Sorting %d positions\n",this->npositions));
+#ifdef USE_QSORT
qsort(positions_temp,this->npositions,sizeof(Univcoord_T),Univcoord_compare);
+#else
+ Sedgesort_uint4(positions_temp,this->npositions);
+#endif
/* Need to copy positions before the goal */
this->positions_allocated = this->positions = MALLOC(this->npositions * sizeof(Univcoord_T));
@@ -2978,7 +3015,11 @@ fill_positions_filtered_first (Elt_T this, T sarray, Univcoord_T low, Univcoord_
} else {
/* Guess at allocation size */
- this->positions_allocated = this->positions = (Univcoord_T *) CALLOC(GUESS_ALLOCATION,sizeof(Univcoord_T));
+#ifdef USE_QSORT
+ this->positions_allocated = this->positions = (Univcoord_T *) MALLOC(GUESS_ALLOCATION * sizeof(Univcoord_T));
+#else
+ this->positions_allocated = this->positions = (Univcoord_T *) MALLOC((GUESS_ALLOCATION + 1) * sizeof(Univcoord_T));
+#endif
low_adj = low + this->querystart;
high_adj = high + this->querystart;
@@ -3157,7 +3198,11 @@ fill_positions_filtered_first (Elt_T this, T sarray, Univcoord_T low, Univcoord_
/* Handle the case if we exceeded GUESS_ALLOCATION */
/* Copy the positions we have stored so far */
- more_positions = (Univcoord_T *) CALLOC(this->npositions,sizeof(Univcoord_T));
+#ifdef USE_QSORT
+ more_positions = (Univcoord_T *) MALLOC(this->npositions * sizeof(Univcoord_T));
+#else
+ more_positions = (Univcoord_T *) MALLOC((this->npositions + 1) * sizeof(Univcoord_T));
+#endif
memcpy(more_positions,this->positions,GUESS_ALLOCATION*sizeof(Univcoord_T));
FREE(this->positions_allocated);
this->positions_allocated = this->positions = more_positions;
@@ -3270,7 +3315,11 @@ fill_positions_filtered_first (Elt_T this, T sarray, Univcoord_T low, Univcoord_
#endif
}
+#ifdef USE_QSORT
qsort(this->positions,this->npositions,sizeof(Univcoord_T),Univcoord_compare);
+#else
+ Sedgesort_uint4(this->positions,this->npositions);
+#endif
debug7(printf("Sorting %d positions\n",this->npositions));
#if 0
@@ -3419,7 +3468,11 @@ Elt_fill_positions_filtered (Elt_T this, T sarray, Univcoord_T goal, Univcoord_T
this->all_positions = (Univcoord_T *) NULL;
this->n_all_positions = 0;
} else {
+#ifdef USE_QSORT
this->all_positions = (Univcoord_T *) MALLOC(this->n_all_positions*sizeof(Univcoord_T));
+#else
+ this->all_positions = (Univcoord_T *) MALLOC((this->n_all_positions + 1) * sizeof(Univcoord_T));
+#endif
#ifdef WORDS_BIGENDIAN
for (i = 0; i < this->n_all_positions; i++) {
this->all_positions[i] = Bigendian_convert_uint(sarray->array[this->initptr+i]);
@@ -3427,7 +3480,11 @@ Elt_fill_positions_filtered (Elt_T this, T sarray, Univcoord_T goal, Univcoord_T
#else
memcpy(this->all_positions,&(sarray->array[this->initptr]),this->n_all_positions*sizeof(Univcoord_T));
#endif
+#ifdef USE_QSORT
qsort(this->all_positions,this->n_all_positions,sizeof(Univcoord_T),Univcoord_compare);
+#else
+ Sedgesort_uint4(this->all_positions,this->n_all_positions);
+#endif
}
#ifdef DEBUG10
for (i = 0; i < this->n_all_positions; i++) {
@@ -3736,9 +3793,17 @@ solve_twopart (int *found_score, List_T *subs, List_T *indels, List_T *ambiguous
debug7(printf("same is at %u from %d to %d\n",left,querystart_same,queryend_same));
n = Uintlist_length(difflist);
+#ifdef USE_QSORT
array = (UINT4 *) MALLOCA(n * sizeof(UINT4));
+#else
+ array = (UINT4 *) MALLOCA((n + 1) * sizeof(UINT4));
+#endif
Uintlist_fill_array_and_free(array,&difflist);
+#ifdef USE_QSORT
qsort(array,n,sizeof(Univcoord_T),Univcoord_compare);
+#else
+ Sedgesort(array,n);
+#endif
debug7(printf("Have %d matching diffs\n",n));
spliceends_sense = spliceends_antisense = (List_T) NULL;
@@ -4403,9 +4468,17 @@ solve_twopart (int *found_score, List_T *subs, List_T *indels, List_T *ambiguous
debug7(printf("same is at %u from %d to %d\n",left,querystart_same,queryend_same));
n = Uintlist_length(difflist);
+#ifdef USE_QSORT
array = (UINT4 *) MALLOCA(n * sizeof(UINT4));
+#else
+ array = (UINT4 *) MALLOCA((n + 1) * sizeof(UINT4));
+#endif
Uintlist_fill_array_and_free(array,&difflist);
+#ifdef USE_QSORT
qsort(array,n,sizeof(Univcoord_T),Univcoord_compare);
+#else
+ Sedgesort_uint4(array,n);
+#endif
debug7(printf("Have %d matching diffs\n",n));
spliceends_sense = spliceends_antisense = (List_T) NULL;
@@ -5082,12 +5155,13 @@ get_diagonals (Univdiag_T *middle_diagonal, List_T *best_right_diagonals, List_T
bool successp;
UINT4 nmatches;
- int i, j;
+ int i, j, k;
List_T p;
Univdiag_T *diagonal_array, diagonal, prev_diagonal;
int querypos;
int ndiagonals;
+ List_T left_diagonals, right_diagonals;
#ifdef SUBDIVIDE_NOMATCHES
Chrpos_T low_chrpos, high_chrpos;
@@ -5471,7 +5545,7 @@ get_diagonals (Univdiag_T *middle_diagonal, List_T *best_right_diagonals, List_T
List_free(&(elt_tree[best_i]));
- *all_right_diagonals = (List_T) NULL;
+ right_diagonals = (List_T) NULL;
for (i = nelts - 1; i > best_i; --i) { /* Go in this order to avoid reversing list at the end */
for (p = elt_tree[i]; p != NULL; p = List_next(p)) {
elt = (Elt_T) p->first;
@@ -5479,13 +5553,13 @@ get_diagonals (Univdiag_T *middle_diagonal, List_T *best_right_diagonals, List_T
/* Created by oligoindex */
diagonal = Univdiag_new(elt->querystart_leftward,elt->queryend_leftward,/*univdiagonal*/elt->positions[0]);
diagonal->nmismatches_known_p = false;
- *all_right_diagonals = List_push(*all_right_diagonals,(void *) diagonal);
+ right_diagonals = List_push(right_diagonals,(void *) diagonal);
} else if (elt->querystart_leftward < elt->queryend_leftward) {
for (j = elt->npositions - 1; j >= 0; --j) { /* Go in this order to avoid reversing list at the end */
debug13(printf("Creating right diagonal: query %d..%d (leftward %d..%d), diagonal %u\n",
elt->querystart,elt->queryend,elt->querystart_leftward,elt->queryend_leftward,elt->positions[j] - chroffset));
- *all_right_diagonals = List_push(*all_right_diagonals,Univdiag_new(elt->querystart_leftward,elt->queryend_leftward,
- /*univdiagonal*/elt->positions[j]));
+ right_diagonals = List_push(right_diagonals,Univdiag_new(elt->querystart_leftward,elt->queryend_leftward,
+ /*univdiagonal*/elt->positions[j]));
}
}
if (elt->temporaryp == true) {
@@ -5498,7 +5572,7 @@ get_diagonals (Univdiag_T *middle_diagonal, List_T *best_right_diagonals, List_T
}
- *all_left_diagonals = (List_T) NULL;
+ left_diagonals = (List_T) NULL;
for (i = 0; i < best_i; i++) { /* Go in this order to avoid reversing list at the end */
for (p = elt_tree[i]; p != NULL; p = List_next(p)) {
elt = (Elt_T) p->first;
@@ -5506,13 +5580,13 @@ get_diagonals (Univdiag_T *middle_diagonal, List_T *best_right_diagonals, List_T
/* Created by oligoindex */
diagonal = Univdiag_new(elt->querystart_leftward,elt->queryend_leftward,/*univdiagonal*/elt->positions[0]);
diagonal->nmismatches_known_p = false; /* Signifies that we don't know the number of mismatches */
- *all_left_diagonals = List_push(*all_left_diagonals,(void *) diagonal);
+ left_diagonals = List_push(left_diagonals,(void *) diagonal);
} else if (elt->querystart_leftward < elt->queryend_leftward) {
for (j = 0; j < elt->npositions; j++) { /* Go in this order to avoid reversing list at the end */
debug13(printf("Creating left diagonal: query %d..%d (leftward %d..%d), diagonal %u\n",
elt->querystart,elt->queryend,elt->querystart_leftward,elt->queryend_leftward,elt->positions[j] - chroffset));
- *all_left_diagonals = List_push(*all_left_diagonals,Univdiag_new(elt->querystart_leftward,elt->queryend_leftward,
- /*univdiagonal*/elt->positions[j]));
+ left_diagonals = List_push(left_diagonals,Univdiag_new(elt->querystart_leftward,elt->queryend_leftward,
+ /*univdiagonal*/elt->positions[j]));
}
}
if (elt->temporaryp == true) {
@@ -5530,19 +5604,44 @@ get_diagonals (Univdiag_T *middle_diagonal, List_T *best_right_diagonals, List_T
/* A. Compute right diagonals */
/* A1. Scoring for dynamic programming */
- diagonal_array = (Univdiag_T *) List_to_array_n(&ndiagonals,*all_right_diagonals);
+ diagonal_array = (Univdiag_T *) List_to_array_n(&ndiagonals,right_diagonals);
+ List_free(&right_diagonals);
#ifdef DEBUG12
- printf("Right side before sorting\n");
+ printf("Right side before consolidating\n");
for (i = 0; i < ndiagonals; i++) {
diagonal = diagonal_array[i];
printf("%d..%d at %u\n",diagonal->querystart,diagonal->queryend,diagonal->univdiagonal);
}
#endif
+ *all_right_diagonals = (List_T) NULL;
+ qsort(diagonal_array,ndiagonals,sizeof(Univdiag_T),Univdiag_diagonal_cmp);
+ i = 0;
+ while (i < ndiagonals) {
+ j = i;
+ while (j < ndiagonals && diagonal_array[j]->univdiagonal == diagonal_array[i]->univdiagonal) {
+ j++;
+ }
+ if (j == i) {
+ *all_right_diagonals = List_push(*all_right_diagonals,(void *) diagonal_array[i]);
+ } else {
+ *all_right_diagonals = List_push(*all_right_diagonals,
+ (void *) Univdiag_new(diagonal_array[i]->querystart,
+ diagonal_array[j-1]->queryend,
+ diagonal_array[i]->univdiagonal));
+ for (k = i; k < j; k++) {
+ Univdiag_free(&(diagonal_array[k]));
+ }
+ }
+ i = j;
+ }
+ FREE(diagonal_array);
+
/* TODO: May be able to skip this sorting step */
+ diagonal_array = (Univdiag_T *) List_to_array_n(&ndiagonals,*all_right_diagonals);
qsort(diagonal_array,ndiagonals,sizeof(Univdiag_T),Univdiag_ascending_cmp);
#ifdef DEBUG12
- printf("Right side after sorting\n");
+ printf("Right side after consolidating and sorting\n");
for (i = 0; i < ndiagonals; i++) {
diagonal = diagonal_array[i];
printf("%d..%d at %u\n",diagonal->querystart,diagonal->queryend,diagonal->univdiagonal);
@@ -5632,25 +5731,51 @@ get_diagonals (Univdiag_T *middle_diagonal, List_T *best_right_diagonals, List_T
/* C. Compute left diagonals */
/* C1. Scoring for dynamic programming */
- diagonal_array = (Univdiag_T *) List_to_array_n(&ndiagonals,*all_left_diagonals);
+ diagonal_array = (Univdiag_T *) List_to_array_n(&ndiagonals,left_diagonals);
+ List_free(&left_diagonals);
#ifdef DEBUG12
- printf("Left side before sorting\n");
+ printf("Left side before consolidating\n");
for (i = 0; i < ndiagonals; i++) {
diagonal = diagonal_array[i];
printf("%d..%d at %u\n",diagonal->querystart,diagonal->queryend,diagonal->univdiagonal);
}
#endif
+ *all_left_diagonals = (List_T) NULL;
+ qsort(diagonal_array,ndiagonals,sizeof(Univdiag_T),Univdiag_diagonal_cmp);
+ i = 0;
+ while (i < ndiagonals) {
+ j = i;
+ while (j < ndiagonals && diagonal_array[j]->univdiagonal == diagonal_array[i]->univdiagonal) {
+ j++;
+ }
+ if (j == i) {
+ *all_left_diagonals = List_push(*all_left_diagonals,(void *) diagonal_array[i]);
+ } else {
+ *all_left_diagonals = List_push(*all_left_diagonals,
+ (void *) Univdiag_new(diagonal_array[i]->querystart,
+ diagonal_array[j-1]->queryend,
+ diagonal_array[i]->univdiagonal));
+ for (k = i; k < j; k++) {
+ Univdiag_free(&(diagonal_array[k]));
+ }
+ }
+ i = j;
+ }
+ FREE(diagonal_array);
+
/* TODO: May be able to skip this sorting step */
+ diagonal_array = (Univdiag_T *) List_to_array_n(&ndiagonals,*all_left_diagonals);
qsort(diagonal_array,ndiagonals,sizeof(Univdiag_T),Univdiag_descending_cmp);
#ifdef DEBUG12
- printf("Left side after sorting\n");
+ printf("Left side after consolidating and sorting\n");
for (i = 0; i < ndiagonals; i++) {
diagonal = diagonal_array[i];
printf("%d..%d at %u\n",diagonal->querystart,diagonal->queryend,diagonal->univdiagonal);
}
#endif
+
for (i = 0; i < ndiagonals; i++) {
diagonal = diagonal_array[i];
debug13(printf("%d: %d..%d at %u\n",i,diagonal->querystart,diagonal->queryend,diagonal->univdiagonal));
@@ -5804,6 +5929,11 @@ find_best_path (List_T *right_paths, Intlist_T *right_endpoints_sense, Intlist_T
int best_knowni_i, best_knowni_j, best_nmismatches_i, best_nmismatches_j;
double best_prob_i, best_prob_j;
+ Chrpos_T first_dist_sense, second_dist_sense, first_dist_antisense, second_dist_antisense;
+ double first_prob_sense, second_prob_sense, first_prob_antisense, second_prob_antisense;
+ int firsti_sense, secondi_sense, firsti_antisense, secondi_antisense;
+ int sensei, antisensei;
+
int segmenti_donor_nknown, segmentj_acceptor_nknown,
segmentj_antidonor_nknown, segmenti_antiacceptor_nknown;
#ifdef HAVE_ALLOCA
@@ -5900,7 +6030,7 @@ find_best_path (List_T *right_paths, Intlist_T *right_endpoints_sense, Intlist_T
ambig_path = (List_T) List_head(p);
diagonal = (Univdiag_T) List_head(ambig_path);
left = diagonal->univdiagonal;
- debug13(printf("left %u, prev_left %u\n",left,prev_left));
+ debug13(printf("left %u, prev_left %u, difference %d\n",left,prev_left,(int) left - prev_left));
if (left < prev_left) {
/* Insertion */
debug13(printf("Found insertion\n"));
@@ -5918,6 +6048,15 @@ find_best_path (List_T *right_paths, Intlist_T *right_endpoints_sense, Intlist_T
left = right_indel_diagonal->univdiagonal;
} else {
+ debug13(printf("Still have %d right_paths. Distinguish by looking for best splice\n",List_length(*right_paths)));
+ first_dist_sense = second_dist_sense = 0;
+ first_prob_sense = second_prob_sense = 0.0;
+ firsti_sense = secondi_sense = -1;
+ first_dist_antisense = second_dist_antisense = 0;
+ first_prob_antisense = second_prob_antisense = 0.0;
+ firsti_antisense = secondi_antisense = -1;
+ sensei = antisensei = 0;
+
for (p = *right_paths; p != NULL; p = List_next(p)) {
ambig_path = (List_T) List_head(p);
diagonal = (Univdiag_T) List_head(ambig_path);
@@ -5986,6 +6125,20 @@ find_best_path (List_T *right_paths, Intlist_T *right_endpoints_sense, Intlist_T
segmenti_donor_nknown,segmentj_acceptor_nknown,
segmentj_antidonor_nknown,segmenti_antiacceptor_nknown,
max_mismatches_allowed,plusp,genestrand)) >= 0) {
+ debug13(printf("Found sense splice_pos %d with probs %f and %f\n",splice_pos,best_prob_i,best_prob_j));
+ if (best_prob_i + best_prob_j > first_prob_sense) {
+ second_dist_sense = first_dist_sense;
+ second_prob_sense = first_prob_sense;
+ secondi_sense = firsti_sense;
+ first_dist_sense = left - prev_left;
+ first_prob_sense = best_prob_i + best_prob_j;
+ firsti_sense = sensei;
+ } else if (best_prob_i + best_prob_j > second_prob_sense) {
+ second_dist_sense = left - prev_left;
+ second_prob_sense = best_prob_i + best_prob_j;
+ secondi_sense = sensei;
+ }
+
*right_endpoints_sense = Intlist_push(*right_endpoints_sense,splice_pos);
*right_queryends_sense = Intlist_push(*right_queryends_sense,diagonal->queryend + 1);
*right_ambcoords_sense = Uintlist_push(*right_ambcoords_sense,left + splice_pos);
@@ -5994,6 +6147,7 @@ find_best_path (List_T *right_paths, Intlist_T *right_endpoints_sense, Intlist_T
*right_amb_nmismatchesj_sense = Intlist_push(*right_amb_nmismatchesj_sense,best_nmismatches_j);
*right_amb_probsi_sense = Doublelist_push(*right_amb_probsi_sense,best_prob_i);
*right_amb_probsj_sense = Doublelist_push(*right_amb_probsj_sense,best_prob_j);
+ sensei++;
}
if ((splice_pos = Splice_resolve_antisense(&best_knowni_i,&best_knowni_j,&best_nmismatches_i,&best_nmismatches_j,
@@ -6007,6 +6161,20 @@ find_best_path (List_T *right_paths, Intlist_T *right_endpoints_sense, Intlist_T
segmenti_donor_nknown,segmentj_acceptor_nknown,
segmentj_antidonor_nknown,segmenti_antiacceptor_nknown,
max_mismatches_allowed,plusp,genestrand)) >= 0) {
+ debug13(printf("Found antisense splice_pos %d with probs %f and %f\n",splice_pos,best_prob_i,best_prob_j));
+ if (best_prob_i + best_prob_j > first_prob_antisense) {
+ second_dist_antisense = first_dist_antisense;
+ second_prob_antisense = first_prob_antisense;
+ secondi_antisense = firsti_antisense;
+ first_dist_antisense = left - prev_left;
+ first_prob_antisense = best_prob_i + best_prob_j;
+ firsti_antisense = antisensei;
+ } else if (best_prob_i + best_prob_j > second_prob_antisense) {
+ second_dist_antisense = left - prev_left;
+ second_prob_antisense = best_prob_i + best_prob_j;
+ secondi_antisense = antisensei;
+ }
+
*right_endpoints_antisense = Intlist_push(*right_endpoints_antisense,splice_pos);
*right_queryends_antisense = Intlist_push(*right_queryends_antisense,diagonal->queryend + 1);
*right_ambcoords_antisense = Uintlist_push(*right_ambcoords_antisense,left + splice_pos);
@@ -6015,6 +6183,39 @@ find_best_path (List_T *right_paths, Intlist_T *right_endpoints_sense, Intlist_T
*right_amb_nmismatchesj_antisense = Intlist_push(*right_amb_nmismatchesj_antisense,best_nmismatches_j);
*right_amb_probsi_antisense = Doublelist_push(*right_amb_probsi_antisense,best_prob_i);
*right_amb_probsj_antisense = Doublelist_push(*right_amb_probsj_antisense,best_prob_j);
+ antisensei++;
+ }
+ }
+
+ if (Intlist_length(*right_endpoints_sense) > 1) {
+ if (first_dist_sense < second_dist_sense/2) {
+ debug13(printf("first dist sense %u is significantly shorter than second dist sense %u. Keeping %d from end\n",
+ first_dist_sense,second_dist_sense,firsti_sense));
+ firsti_sense = Intlist_length(*right_endpoints_sense) - 1 - firsti_sense; /* Because we don't reverse lists */
+ *right_endpoints_sense = Intlist_keep_one(*right_endpoints_sense,firsti_sense);
+ *right_queryends_sense = Intlist_keep_one(*right_queryends_sense,firsti_sense);
+ *right_ambcoords_sense = Uintlist_keep_one(*right_ambcoords_sense,firsti_sense);
+ *right_amb_knowni_sense = Intlist_keep_one(*right_amb_knowni_sense,firsti_sense);
+ *right_amb_nmismatchesi_sense = Intlist_keep_one(*right_amb_nmismatchesi_sense,firsti_sense);
+ *right_amb_nmismatchesj_sense = Intlist_keep_one(*right_amb_nmismatchesj_sense,firsti_sense);
+ *right_amb_probsi_sense = Doublelist_keep_one(*right_amb_probsi_sense,firsti_sense);
+ *right_amb_probsj_sense = Doublelist_keep_one(*right_amb_probsj_sense,firsti_sense);
+ }
+ }
+
+ if (Intlist_length(*right_endpoints_antisense) > 1) {
+ if (first_dist_antisense < second_dist_antisense/2) {
+ debug13(printf("first dist antisense %u is significantly shorter than second dist antisense %u. Keeping %d from end\n",
+ first_dist_antisense,second_dist_antisense,firsti_antisense));
+ firsti_antisense = Intlist_length(*right_endpoints_antisense) - 1 - firsti_antisense; /* Because we don't reverse lists */
+ *right_endpoints_antisense = Intlist_keep_one(*right_endpoints_antisense,firsti_antisense);
+ *right_queryends_antisense = Intlist_keep_one(*right_queryends_antisense,firsti_antisense);
+ *right_ambcoords_antisense = Uintlist_keep_one(*right_ambcoords_antisense,firsti_antisense);
+ *right_amb_knowni_antisense = Intlist_keep_one(*right_amb_knowni_antisense,firsti_antisense);
+ *right_amb_nmismatchesi_antisense = Intlist_keep_one(*right_amb_nmismatchesi_antisense,firsti_antisense);
+ *right_amb_nmismatchesj_antisense = Intlist_keep_one(*right_amb_nmismatchesj_antisense,firsti_antisense);
+ *right_amb_probsi_antisense = Doublelist_keep_one(*right_amb_probsi_antisense,firsti_antisense);
+ *right_amb_probsj_antisense = Doublelist_keep_one(*right_amb_probsj_antisense,firsti_antisense);
}
}
}
@@ -6198,7 +6399,7 @@ find_best_path (List_T *right_paths, Intlist_T *right_endpoints_sense, Intlist_T
ambig_path = (List_T) List_head(p);
prev_diagonal = (Univdiag_T) List_head(ambig_path);
prev_left = prev_diagonal->univdiagonal;
- debug13(printf("left %u, prev_left %u\n",left,prev_left));
+ debug13(printf("left %u, prev_left %u, difference %d\n",left,prev_left,(int) left - prev_left));
if (left < prev_left) {
/* Insertion */
debug13(printf("Found insertion\n"));
@@ -6216,6 +6417,15 @@ find_best_path (List_T *right_paths, Intlist_T *right_endpoints_sense, Intlist_T
/* queryend = left_indel_diagonal->querystart; */
} else {
+ debug13(printf("Still have %d left_paths. Distinguish by looking for best splice\n",List_length(*left_paths)));
+ first_dist_sense = second_dist_sense = 0;
+ first_prob_sense = second_prob_sense = 0.0;
+ firsti_sense = secondi_sense = -1;
+ first_dist_antisense = second_dist_antisense = 0;
+ first_prob_antisense = second_prob_antisense = 0.0;
+ firsti_antisense = secondi_antisense = -1;
+ sensei = antisensei = 0;
+
for (p = *left_paths; p != NULL; p = List_next(p)) {
ambig_path = (List_T) List_head(p);
prev_diagonal = (Univdiag_T) List_head(ambig_path);
@@ -6284,6 +6494,20 @@ find_best_path (List_T *right_paths, Intlist_T *right_endpoints_sense, Intlist_T
segmenti_donor_nknown,segmentj_acceptor_nknown,
segmentj_antidonor_nknown,segmenti_antiacceptor_nknown,
max_mismatches_allowed,plusp,genestrand)) >= 0) {
+ debug13(printf("Found sense splice_pos %d with probs %f and %f\n",splice_pos,best_prob_i,best_prob_j));
+ if (best_prob_i + best_prob_j > first_prob_sense) {
+ second_dist_sense = first_dist_sense;
+ second_prob_sense = first_prob_sense;
+ secondi_sense = firsti_sense;
+ first_dist_sense = left - prev_left;
+ first_prob_sense = best_prob_i + best_prob_j;
+ firsti_sense = sensei;
+ } else if (best_prob_i + best_prob_j > second_prob_sense) {
+ second_dist_sense = left - prev_left;
+ second_prob_sense = best_prob_i + best_prob_j;
+ secondi_sense = sensei;
+ }
+
*left_endpoints_sense = Intlist_push(*left_endpoints_sense,splice_pos);
*left_querystarts_sense = Intlist_push(*left_querystarts_sense,prev_diagonal->querystart);
*left_ambcoords_sense = Uintlist_push(*left_ambcoords_sense,prev_left + splice_pos);
@@ -6292,6 +6516,7 @@ find_best_path (List_T *right_paths, Intlist_T *right_endpoints_sense, Intlist_T
*left_amb_nmismatchesj_sense = Intlist_push(*left_amb_nmismatchesj_sense,best_nmismatches_j);
*left_amb_probsi_sense = Doublelist_push(*left_amb_probsi_sense,best_prob_i);
*left_amb_probsj_sense = Doublelist_push(*left_amb_probsj_sense,best_prob_j);
+ sensei++;
}
if ((splice_pos = Splice_resolve_antisense(&best_knowni_i,&best_knowni_j,&best_nmismatches_i,&best_nmismatches_j,
@@ -6305,6 +6530,20 @@ find_best_path (List_T *right_paths, Intlist_T *right_endpoints_sense, Intlist_T
segmenti_donor_nknown,segmentj_acceptor_nknown,
segmentj_antidonor_nknown,segmenti_antiacceptor_nknown,
max_mismatches_allowed,plusp,genestrand)) >= 0) {
+ debug13(printf("Found antisense splice_pos %d with probs %f and %f\n",splice_pos,best_prob_i,best_prob_j));
+ if (best_prob_i + best_prob_j > first_prob_antisense) {
+ second_dist_antisense = first_dist_antisense;
+ second_prob_antisense = first_prob_antisense;
+ secondi_antisense = firsti_antisense;
+ first_dist_antisense = left - prev_left;
+ first_prob_antisense = best_prob_i + best_prob_j;
+ firsti_antisense = antisensei;
+ } else if (best_prob_i + best_prob_j > second_prob_antisense) {
+ second_dist_antisense = left - prev_left;
+ second_prob_antisense = best_prob_i + best_prob_j;
+ secondi_antisense = antisensei;
+ }
+
*left_endpoints_antisense = Intlist_push(*left_endpoints_antisense,splice_pos);
*left_querystarts_antisense = Intlist_push(*left_querystarts_antisense,prev_diagonal->querystart);
*left_ambcoords_antisense = Uintlist_push(*left_ambcoords_antisense,prev_left + splice_pos);
@@ -6313,6 +6552,39 @@ find_best_path (List_T *right_paths, Intlist_T *right_endpoints_sense, Intlist_T
*left_amb_nmismatchesj_antisense = Intlist_push(*left_amb_nmismatchesj_antisense,best_nmismatches_j);
*left_amb_probsi_antisense = Doublelist_push(*left_amb_probsi_antisense,best_prob_i);
*left_amb_probsj_antisense = Doublelist_push(*left_amb_probsj_antisense,best_prob_j);
+ antisensei++;
+ }
+ }
+
+ if (Intlist_length(*left_endpoints_sense) > 1) {
+ if (first_dist_sense < second_dist_sense/2) {
+ debug13(printf("first dist sense %u is significantly shorter than second dist sense %u. Keeping %d from end\n",
+ first_dist_sense,second_dist_sense,firsti_sense));
+ firsti_sense = Intlist_length(*left_endpoints_sense) - 1 - firsti_sense; /* Because we don't reverse lists */
+ *left_endpoints_sense = Intlist_keep_one(*left_endpoints_sense,firsti_sense);
+ *left_querystarts_sense = Intlist_keep_one(*left_querystarts_sense,firsti_sense);
+ *left_ambcoords_sense = Uintlist_keep_one(*left_ambcoords_sense,firsti_sense);
+ *left_amb_knowni_sense = Intlist_keep_one(*left_amb_knowni_sense,firsti_sense);
+ *left_amb_nmismatchesi_sense = Intlist_keep_one(*left_amb_nmismatchesi_sense,firsti_sense);
+ *left_amb_nmismatchesj_sense = Intlist_keep_one(*left_amb_nmismatchesj_sense,firsti_sense);
+ *left_amb_probsi_sense = Doublelist_keep_one(*left_amb_probsi_sense,firsti_sense);
+ *left_amb_probsj_sense = Doublelist_keep_one(*left_amb_probsj_sense,firsti_sense);
+ }
+ }
+
+ if (Intlist_length(*left_endpoints_antisense) > 1) {
+ if (first_dist_antisense < second_dist_antisense/2) {
+ debug13(printf("first dist antisense %u is significantly shorter than second dist antisense %u. Keeping %d from end\n",
+ first_dist_antisense,second_dist_antisense,firsti_antisense));
+ firsti_antisense = Intlist_length(*left_endpoints_antisense) - 1 - firsti_antisense; /* Because we don't reverse lists */
+ *left_endpoints_antisense = Intlist_keep_one(*left_endpoints_antisense,firsti_antisense);
+ *left_querystarts_antisense = Intlist_keep_one(*left_querystarts_antisense,firsti_antisense);
+ *left_ambcoords_antisense = Uintlist_keep_one(*left_ambcoords_antisense,firsti_antisense);
+ *left_amb_knowni_antisense = Intlist_keep_one(*left_amb_knowni_antisense,firsti_antisense);
+ *left_amb_nmismatchesi_antisense = Intlist_keep_one(*left_amb_nmismatchesi_antisense,firsti_antisense);
+ *left_amb_nmismatchesj_antisense = Intlist_keep_one(*left_amb_nmismatchesj_antisense,firsti_antisense);
+ *left_amb_probsi_antisense = Doublelist_keep_one(*left_amb_probsi_antisense,firsti_antisense);
+ *left_amb_probsj_antisense = Doublelist_keep_one(*left_amb_probsj_antisense,firsti_antisense);
}
}
}
@@ -6944,7 +7216,7 @@ solve_via_segments (int *found_score, bool *completep, List_T hits, List_T middl
Chrpos_T splice_distance;
int querystart_for_merge, querystart, queryend, ignore;
int max_leftward;
- int nmismatches;
+ int nmismatches, prev_nmismatches;
bool fillin_p;
int indel_pos;
@@ -7114,12 +7386,18 @@ solve_via_segments (int *found_score, bool *completep, List_T hits, List_T middl
antisense_junctions = List_push(antisense_junctions,Junction_new_insertion(nindels));
}
- sense_nmismatches = Intlist_pop(sense_nmismatches,&ignore);
- sense_nmismatches = Intlist_push(sense_nmismatches,best_nmismatches_i);
+ if ((prev_nmismatches = Intlist_head(sense_nmismatches)) < 0) {
+ /* Still need to compute */
+ } else {
+ Intlist_head_set(sense_nmismatches,best_nmismatches_i + prev_nmismatches);
+ }
sense_nmismatches = Intlist_push(sense_nmismatches,best_nmismatches_j);
- antisense_nmismatches = Intlist_pop(antisense_nmismatches,&ignore);
- antisense_nmismatches = Intlist_push(antisense_nmismatches,best_nmismatches_i);
+ if ((prev_nmismatches = Intlist_head(antisense_nmismatches)) < 0) {
+ /* Still need to compute */
+ } else {
+ Intlist_head_set(antisense_nmismatches,best_nmismatches_i + prev_nmismatches);
+ }
antisense_nmismatches = Intlist_push(antisense_nmismatches,best_nmismatches_j);
sense_lefts = Uintlist_push(sense_lefts,prev_left);
@@ -7127,7 +7405,8 @@ solve_via_segments (int *found_score, bool *completep, List_T hits, List_T middl
sense_endpoints = Intlist_push(sense_endpoints,indel_pos);
antisense_endpoints = Intlist_push(antisense_endpoints,indel_pos);
- debug13(printf("insertion pos in range %d..%d is %d\n",prev_diagonal->querystart,diagonal->queryend,indel_pos));
+ debug13(printf("insertion pos in range %d..%d is %d with nmatches %d+%d\n",
+ prev_diagonal->querystart,diagonal->queryend,indel_pos,best_nmismatches_i,best_nmismatches_j));
} else if (left <= prev_left + max_deletionlen) {
/* Deletion */
@@ -7154,12 +7433,18 @@ solve_via_segments (int *found_score, bool *completep, List_T hits, List_T middl
antisense_junctions = List_push(antisense_junctions,Junction_new_deletion(nindels,deletionpos));
}
- sense_nmismatches = Intlist_pop(sense_nmismatches,&ignore);
- sense_nmismatches = Intlist_push(sense_nmismatches,best_nmismatches_i);
+ if ((prev_nmismatches = Intlist_head(sense_nmismatches)) < 0) {
+ /* Still need to compute */
+ } else {
+ Intlist_head_set(sense_nmismatches,best_nmismatches_i + prev_nmismatches);
+ }
sense_nmismatches = Intlist_push(sense_nmismatches,best_nmismatches_j);
- antisense_nmismatches = Intlist_pop(antisense_nmismatches,&ignore);
- antisense_nmismatches = Intlist_push(antisense_nmismatches,best_nmismatches_i);
+ if ((prev_nmismatches = Intlist_head(antisense_nmismatches)) < 0) {
+ /* Still need to compute */
+ } else {
+ Intlist_head_set(antisense_nmismatches,best_nmismatches_i + prev_nmismatches);
+ }
antisense_nmismatches = Intlist_push(antisense_nmismatches,best_nmismatches_j);
sense_lefts = Uintlist_push(sense_lefts,prev_left);
@@ -7167,7 +7452,8 @@ solve_via_segments (int *found_score, bool *completep, List_T hits, List_T middl
sense_endpoints = Intlist_push(sense_endpoints,indel_pos);
antisense_endpoints = Intlist_push(antisense_endpoints,indel_pos);
- debug13(printf("deletion pos in range %d..%d is %d\n",prev_diagonal->querystart,diagonal->queryend,indel_pos));
+ debug13(printf("deletion pos in range %d..%d is %d with nmismatches %d+%d\n",
+ prev_diagonal->querystart,diagonal->queryend,indel_pos,best_nmismatches_i,best_nmismatches_j));
} else {
/* Splice */
@@ -7245,9 +7531,13 @@ solve_via_segments (int *found_score, bool *completep, List_T hits, List_T middl
sense_junctions = List_push(sense_junctions,Junction_new_splice(splice_distance,SENSE_FORWARD,
/*donor_prob*/best_prob_j,/*acceptor_prob*/best_prob_i));
}
- debug13(printf("sense splice_pos in range %d..%d is %d\n",prev_diagonal->querystart,diagonal->queryend,splice_pos));
- sense_nmismatches = Intlist_pop(sense_nmismatches,&ignore);
- sense_nmismatches = Intlist_push(sense_nmismatches,best_nmismatches_i);
+ debug13(printf("sense splice_pos in range %d..%d is %d with mismatches %d+%d\n",
+ prev_diagonal->querystart,diagonal->queryend,splice_pos,best_nmismatches_i,best_nmismatches_j));
+ if ((prev_nmismatches = Intlist_head(sense_nmismatches)) < 0) {
+ /* Still need to compute */
+ } else {
+ Intlist_head_set(sense_nmismatches,best_nmismatches_i + prev_nmismatches);
+ }
sense_nmismatches = Intlist_push(sense_nmismatches,best_nmismatches_j);
sense_lefts = Uintlist_push(sense_lefts,prev_left);
@@ -7273,9 +7563,13 @@ solve_via_segments (int *found_score, bool *completep, List_T hits, List_T middl
antisense_junctions = List_push(antisense_junctions,Junction_new_splice(splice_distance,SENSE_ANTI,
/*donor_prob*/best_prob_i,/*acceptor_prob*/best_prob_j));
}
- debug13(printf("antisense splice_pos in range %d..%d is %d\n",prev_diagonal->querystart,diagonal->queryend,splice_pos));
- antisense_nmismatches = Intlist_pop(antisense_nmismatches,&ignore);
- antisense_nmismatches = Intlist_push(antisense_nmismatches,best_nmismatches_i);
+ debug13(printf("antisense splice_pos in range %d..%d is %d with nmismatches %d+%d\n",
+ prev_diagonal->querystart,diagonal->queryend,splice_pos,best_nmismatches_i,best_nmismatches_j));
+ if ((prev_nmismatches = Intlist_head(antisense_nmismatches)) < 0) {
+ /* Still need to compute */
+ } else {
+ Intlist_head_set(antisense_nmismatches,best_nmismatches_i + prev_nmismatches);
+ }
antisense_nmismatches = Intlist_push(antisense_nmismatches,best_nmismatches_j);
antisense_lefts = Uintlist_push(antisense_lefts,prev_left);
}
@@ -7358,7 +7652,7 @@ solve_via_segments (int *found_score, bool *completep, List_T hits, List_T middl
right_ambcoords_sense,right_amb_knowni_sense,
right_amb_nmismatchesj_sense,right_amb_probsj_sense,
/*amb_common_prob*/Doublelist_head(right_amb_probsi_sense),
- /*amb_donor_common_p*/false,/*substring1p*/false);
+ /*amb_donor_common_p*/false,/*substring1p*/true);
}
}
@@ -7433,7 +7727,7 @@ solve_via_segments (int *found_score, bool *completep, List_T hits, List_T middl
right_ambcoords_antisense,right_amb_knowni_antisense,
right_amb_nmismatchesj_antisense,right_amb_probsj_antisense,
/*amb_common_prob*/Doublelist_head(right_amb_probsi_antisense),
- /*amb_donor_common_p*/true,/*substring1p*/false);
+ /*amb_donor_common_p*/true,/*substring1p*/true);
}
}
@@ -7556,7 +7850,7 @@ solve_via_segments (int *found_score, bool *completep, List_T hits, List_T middl
left_ambcoords_sense,left_amb_knowni_sense,
left_amb_nmismatchesi_sense,left_amb_probsi_sense,
/*amb_common_prob*/Doublelist_head(left_amb_probsj_sense),
- /*amb_donor_common_p*/true,/*substring1p*/true);
+ /*amb_donor_common_p*/true,/*substring1p*/false);
}
}
@@ -7655,7 +7949,7 @@ solve_via_segments (int *found_score, bool *completep, List_T hits, List_T middl
left_ambcoords_antisense,left_amb_knowni_antisense,
left_amb_nmismatchesi_antisense,left_amb_probsi_antisense,
/*amb_common_prob*/Doublelist_head(left_amb_probsj_antisense),
- /*amb_donor_common_p*/false,/*substring1p*/true);
+ /*amb_donor_common_p*/false,/*substring1p*/false);
}
}
@@ -7903,12 +8197,13 @@ Sarray_search_greedy (int *found_score, char *queryuc_ptr, char *queryrc, int qu
#endif
+ debug(printf("\nStarting Sarray_search_greedy with querylength %d and indexsize %d and nmisses_allowed %d, genestrand %d\n",
+ querylength,sarray_fwd->indexsize,nmisses_allowed,genestrand));
if (nmisses_allowed < 0) {
nmisses_allowed = 0;
+ } else {
+ nmisses_allowed = querylength;
}
- debug(printf("\nStarting Sarray_search_greedy with querylength %d and indexsize %d and nmisses_allowed %d, genestrand %d\n",
- querylength,sarray_fwd->indexsize,nmisses_allowed,genestrand));
-
*found_score = querylength;
if (genestrand == +2) {
diff --git a/src/sedgesort.c b/src/sedgesort.c
new file mode 100644
index 0000000..5fde4cc
--- /dev/null
+++ b/src/sedgesort.c
@@ -0,0 +1,203 @@
+static char rcsid[] = "$Id: sedgesort.c 193883 2016-07-12 03:14:35Z twu $";
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+
+#include "sedgesort.h"
+
+
+#define CUTOFF 50
+#define SWAP(x,y) {temp = (x); (x) = (y); (y) = temp;}
+
+#if 0
+/* insertion sort */
+static void
+insort_uint4 (register unsigned int array[], register int len) {
+ register int i, j;
+ register unsigned int temp;
+
+ for (i = 1; i < len; i++) {
+ j = i;
+ temp = array[j];
+ while (j > 0 && array[j-1] > temp) {
+ array[j] = array[j-1];
+ j--;
+ }
+ array[j] = temp;
+ }
+
+ return;
+}
+#endif
+
+
+#if 0
+static void
+insertion_sort_uint4 (register unsigned int array[], register int len) {
+ register int i, j;
+ register unsigned int *hi, *lo;
+ register unsigned int temp;
+ int thresh;
+
+ thresh = (len < CUTOFF + 1) ? len : CUTOFF + 1;
+
+ /* Find smallest element in first threshold and place it at array
+ beginning. This is the smallest array element, and the operation
+ speeds up the inner loop of insertion sort */
+
+ j = 0;
+ for (i = 1; i < thresh; i++) {
+ if (array[i] < array[j]) {
+ j = i;
+ }
+ }
+ SWAP(array[j],array[0]);
+
+
+#if 0
+ /* Insertion sort, running from left to right */
+ i = 1;
+ while (++i < len) {
+ temp = array[i];
+ j = i - 1;
+ while (temp < array[j]) {
+ j--;
+ }
+ j++;
+
+ if (j != i) {
+ hi = lo = &(array[i]);
+ while (--lo >= &(array[j])) {
+ *hi = *lo;
+ hi = lo;
+ }
+ *hi = temp;
+ }
+ }
+#else
+ /* Faster under -O3 */
+ for (i = 1; i < len; i++) {
+ j = i;
+ temp = array[j];
+ while (array[j-1] > temp) {
+ array[j] = array[j-1];
+ j--;
+ }
+ array[j] = temp;
+ }
+#endif
+
+
+ return;
+}
+#endif
+
+
+static void
+insertion_sort_ptr_uint4 (register unsigned int array[], register int len) {
+ register unsigned int *ptri, *ptrj, *ptrk;
+ register unsigned int temp;
+ int thresh;
+
+ thresh = (len < CUTOFF + 1) ? len : CUTOFF + 1;
+
+ /* Find smallest element in first threshold and place it at array
+ beginning. This is the smallest array element, and the operation
+ speeds up the inner loop of insertion sort */
+
+ ptrj = &(array[0]);
+ for (ptri = &(array[1]); ptri < &(array[thresh]); ptri++) {
+ if (*ptri < *ptrj) {
+ ptrj = ptri;
+ }
+ }
+ SWAP(array[0],*ptrj);
+
+ for (ptri = &(array[1]); ptri < &(array[len]); ptri++) {
+ temp = *ptri;
+ ptrk = ptri;
+ ptrj = ptri - 1;
+ while (temp < *ptrj) {
+ *ptrk = *ptrj;
+ ptrk = ptrj--;
+ }
+ *ptrk = temp;
+ }
+
+ return;
+}
+
+
+#if 0
+static void
+partial_quickersort_uint4 (register unsigned int array[], register int lower,
+ register int upper) {
+ register int i, j;
+ register unsigned int temp, pivot;
+
+ if (upper - lower > CUTOFF) {
+ SWAP(array[lower], array[(upper+lower)/2]);
+ i = lower;
+ j = upper + 1;
+ pivot = array[lower];
+
+ while (1) {
+ do i++; while (array[i] < pivot);
+ do j--; while (array[j] > pivot);
+ if (j < i) break;
+ SWAP(array[i], array[j]);
+ }
+
+ SWAP(array[lower], array[j]);
+ partial_quickersort_uint4(array, lower, j-1);
+ partial_quickersort_uint4(array, i, upper);
+ }
+
+ return;
+}
+#endif
+
+
+static void
+partial_quickersort_ptr_uint4 (register unsigned int array[], register int lower,
+ register int upper) {
+ register unsigned int *ptri, *ptrj;
+ register unsigned int temp, pivot;
+
+ if (upper - lower > CUTOFF) {
+ SWAP(array[lower], array[(upper+lower)/2]);
+ ptri = &(array[lower]);
+ ptrj = &(array[upper + 1]);
+ pivot = *ptri;
+
+ while (1) {
+ do ptri++; while (*ptri < pivot);
+ do ptrj--; while (*ptrj > pivot);
+ if (ptrj < ptri) break;
+ SWAP(*ptri, *ptrj);
+ }
+
+ SWAP(array[lower], *ptrj);
+ partial_quickersort_ptr_uint4(array, lower, (ptrj-1) - array);
+ partial_quickersort_ptr_uint4(array, ptri - array, upper);
+ }
+
+ return;
+}
+
+
+/* Requires array[len] to be available */
+void
+Sedgesort_uint4 (register unsigned int array[], register int len) {
+
+ array[len] = -1U;
+
+ partial_quickersort_ptr_uint4(array, 0, len-1);
+ insertion_sort_ptr_uint4(array, len);
+ return;
+}
+
+
+
+
diff --git a/src/sedgesort.h b/src/sedgesort.h
new file mode 100644
index 0000000..28512a3
--- /dev/null
+++ b/src/sedgesort.h
@@ -0,0 +1,9 @@
+/* $Id: sedgesort.h 193883 2016-07-12 03:14:35Z twu $ */
+#ifndef SEDGESORT_INCLUDED
+#define SEDGESORT_INCLUDED
+
+extern void
+Sedgesort_uint4 (register unsigned int array[], register int len);
+
+#endif
+
diff --git a/src/shortread.c b/src/shortread.c
index a4d6b99..0ac4f4b 100644
--- a/src/shortread.c
+++ b/src/shortread.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: shortread.c 183889 2016-02-05 20:34:55Z twu $";
+static char rcsid[] = "$Id: shortread.c 193894 2016-07-12 04:09:51Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -2802,7 +2802,7 @@ Shortread_read_fasta_text (int *nextchar, int *nchars1, int *nchars2, T *queryse
*input2 = NULL;
(*files) += 1;
(*nfiles) -= 1;
- nextchar2 = '\0';
+ *nextchar = '\0'; /* Was nextchar2 = '\0', which is incorrect */
}
} else {
@@ -2828,7 +2828,7 @@ Shortread_read_fasta_text (int *nextchar, int *nchars1, int *nchars2, T *queryse
}
}
- debug(printf("** Getting header\n"));
+ debug(printf("** Getting header for fasta text\n"));
if ((acc = input_header(&(*nchars1),&filterp,&restofheader,*nextchar,*input1,skipp)) == NULL) {
/* fprintf(stderr,"No header\n"); */
/* File ends after >. Don't process, but loop again */
@@ -3078,7 +3078,7 @@ read_fasta_filecontents (int *nextchar, T *queryseq2,
*input2 = NULL;
(*files) += 1;
(*nfiles) -= 1;
- nextchar2 = '\0';
+ *nextchar = '\0'; /* Was nextchar2 = '\0', which is incorrect */
}
#else
if ((*input1 = FOPEN_READ_TEXT((*files)[0])) == NULL) {
@@ -3137,7 +3137,7 @@ read_fasta_filecontents (int *nextchar, T *queryseq2,
}
}
- debug(printf("** Getting header\n"));
+ debug(printf("** Getting header for fasta filecontents\n"));
if ((acc = input_header_filecontents(&filterp,&restofheader,*nextchar,&(*filecontents1),skipp)) == NULL) {
/* fprintf(stderr,"No header\n"); */
/* File ends after >. Don't process, but loop again */
@@ -3355,7 +3355,7 @@ Shortread_read_fasta_gzip (int *nextchar, T *queryseq2,
*input2 = NULL;
(*files) += 1;
(*nfiles) -= 1;
- nextchar2 = '\0';
+ *nextchar = '\0'; /* Was nextchar2 = '\0', which is incorrect */
}
} else {
@@ -3384,7 +3384,7 @@ Shortread_read_fasta_gzip (int *nextchar, T *queryseq2,
}
}
- debug(printf("** Getting header\n"));
+ debug(printf("** Getting header for fasta gzip\n"));
if ((acc = input_header_gzip(&filterp,&restofheader,*nextchar,
#ifdef USE_MPI
filestring1,
@@ -3673,7 +3673,7 @@ Shortread_read_fasta_bzip2 (int *nextchar, T *queryseq2,
*input2 = NULL;
(*files) += 1;
(*nfiles) -= 1;
- nextchar2 = '\0';
+ *nextchar = '\0'; /* Was nextchar2 = '\0', which is incorrect */
}
} else {
@@ -3699,7 +3699,7 @@ Shortread_read_fasta_bzip2 (int *nextchar, T *queryseq2,
}
}
- debug(printf("** Getting header\n"));
+ debug(printf("** Getting header for fasta bzip2\n"));
if ((acc = input_header_bzip2(&filterp,&restofheader,*nextchar,
#ifdef USE_MPI
filestring1,
@@ -3984,7 +3984,7 @@ Shortread_read_fastq_text (int *nextchar, int *nchars1, int *nchars2, T *queryse
*input2 = NULL;
(*files) += 1;
(*nfiles) -= 1;
- nextchar2 = '\0';
+ *nextchar = '\0'; /* Was nextchar2 = '\0', which is incorrect */
}
} else {
@@ -4008,7 +4008,13 @@ Shortread_read_fastq_text (int *nextchar, int *nchars1, int *nchars2, T *queryse
}
}
- debug(printf("** Getting header\n"));
+ if (*nextchar == '\0') {
+ if ((*nextchar = Shortread_input_init(&(*nchars1),*input1)) == EOF) {
+ return (T) NULL;
+ }
+ }
+
+ debug(printf("** Getting header for fastq text\n"));
if ((acc = input_header_fastq(&(*nchars1),&filterp,&restofheader,*nextchar,*input1,skipp)) == NULL) {
/* fprintf(stderr,"No header\n"); */
/* File ends after >. Don't process, but loop again */
@@ -4179,7 +4185,7 @@ read_fastq_filecontents (int *nextchar, T *queryseq2,
*input2 = NULL;
(*files) += 1;
(*nfiles) -= 1;
- nextchar2 = '\0';
+ *nextchar = '\0'; /* Was nextchar2 = '\0', which is incorrect */
}
#else
if ((*input1 = FOPEN_READ_TEXT((*files)[0])) == NULL) {
@@ -4238,7 +4244,13 @@ read_fastq_filecontents (int *nextchar, T *queryseq2,
}
}
- debug(printf("** Getting header\n"));
+ if (*nextchar == '\0') {
+ if ((*nextchar = Shortread_input_init_filecontents(&(*filecontents1))) == '\0') {
+ return (T) NULL;
+ }
+ }
+
+ debug(printf("** Getting header for fastq filecontents\n"));
if ((acc = input_header_fastq_filecontents(&filterp,&restofheader,*nextchar,&(*filecontents1),skipp)) == NULL) {
/* fprintf(stderr,"No header\n"); */
/* File ends after >. Don't process, but loop again */
@@ -4389,7 +4401,7 @@ Shortread_read_fastq_gzip (int *nextchar, T *queryseq2,
*input2 = NULL;
(*files) += 1;
(*nfiles) -= 1;
- nextchar2 = '\0';
+ *nextchar = '\0'; /* Was nextchar2 = '\0', which is incorrect */
} else {
if ((*input1 = gzopen((*files)[0],"rb")) == NULL) {
@@ -4416,7 +4428,14 @@ Shortread_read_fastq_gzip (int *nextchar, T *queryseq2,
}
}
- debug(printf("** Getting header\n"));
+
+ if (*nextchar == '\0') {
+ if ((*nextchar = Shortread_input_init_gzip(*input1)) == EOF) {
+ return (T) NULL;
+ }
+ }
+
+ debug(printf("** Getting header for fastq gzip\n"));
if ((acc = input_header_fastq_gzip(&filterp,&restofheader,*nextchar,
#ifdef USE_MPI
filestring1,
@@ -4594,7 +4613,7 @@ Shortread_read_fastq_bzip2 (int *nextchar, T *queryseq2,
*input2 = NULL;
(*files) += 1;
(*nfiles) -= 1;
- nextchar2 = '\0';
+ *nextchar = '\0'; /* Was nextchar2 = '\0', which is incorrect */
} else {
if ((*input1 = Bzip2_new((*files)[0])) == NULL) {
@@ -4613,7 +4632,13 @@ Shortread_read_fastq_bzip2 (int *nextchar, T *queryseq2,
}
}
- debug(printf("** Getting header\n"));
+ if (*nextchar == '\0') {
+ if ((*nextchar = Shortread_input_init_bzip2(*input1)) == EOF) {
+ return (T) NULL;
+ }
+ }
+
+ debug(printf("** Getting header for fastq bzip2\n"));
if ((acc = input_header_fastq_bzip2(&filterp,&restofheader,*nextchar,
#ifdef USE_MPI
filestring1,
@@ -5114,12 +5139,21 @@ void
Shortread_print_oneline_revcomp (Filestring_T fp, T this) {
int i = 0;
+#if 0
for (i = this->fulllength-1; i >= 0; --i) {
FPRINTF(fp,"%c",complCode[(int) this->contents[i]]);
}
+#else
+ FPRINTF(fp,"%.*R",this->fulllength,this->contents);
+#endif
+
+#if 0
for (i = this->choplength-1; i >= 0; --i) {
FPRINTF(fp,"%c",complCode[(int) this->chop[i]]);
}
+#else
+ FPRINTF(fp,"%.*R",this->choplength,this->chop);
+#endif
return;
}
@@ -5132,15 +5166,16 @@ Shortread_print_chopped_sam (Filestring_T fp, T this, int hardclip_low, int hard
#endif
if (this->fulllength == 0 || isspace(this->contents[0])) {
- FPRINTF(fp,"\t(null)");
+ FPRINTF(fp,"\t(null)\t");
} else {
#ifdef PRINT_INDIVIDUAL_CHARS
FPRINTF(fp,"\t");
for (i = hardclip_low; i < this->fulllength - hardclip_high; i++) {
FPRINTF(fp,"%c",this->contents[i]);
}
+ FPRINTF(fp,"\t");
#else
- FPRINTF(fp,"\t%.*s",this->fulllength - hardclip_high - hardclip_low,&(this->contents[hardclip_low]));
+ FPRINTF(fp,"\t%.*s\t",this->fulllength - hardclip_high - hardclip_low,&(this->contents[hardclip_low]));
#endif
}
return;
@@ -5150,10 +5185,15 @@ void
Shortread_print_chopped_revcomp_sam (Filestring_T fp, T this, int hardclip_low, int hardclip_high) {
int i;
+#ifdef PRINT_INDIVIDUAL_CHARS
FPRINTF(fp,"\t");
for (i = this->fulllength - 1 - hardclip_low; i >= hardclip_high; --i) {
FPRINTF(fp,"%c",complCode[(int) this->contents[i]]);
}
+ FPRINTF(fp,"\t");
+#else
+ FPRINTF(fp,"\t%.*R\t",this->fulllength - hardclip_high - hardclip_low,&(this->contents[hardclip_high]));
+#endif
return;
}
@@ -5173,8 +5213,7 @@ Shortread_print_chopped_end (Filestring_T fp, T this, int hardclip_low, int hard
#else
FPRINTF(fp,"%.*s",hardclip_low,&(this->contents[0]));
#endif
- return;
-
+
} else {
#ifdef PRINT_INDIVIDUAL_CHARS
for (i = this->fulllength - hardclip_high; i < this->fulllength; i++) {
@@ -5183,8 +5222,9 @@ Shortread_print_chopped_end (Filestring_T fp, T this, int hardclip_low, int hard
#else
FPRINTF(fp,"%.*s",hardclip_high,&(this->contents[this->fulllength - hardclip_high]));
#endif
- return;
}
+
+ return;
}
/* For samprint XH field */
@@ -5193,17 +5233,25 @@ Shortread_print_chopped_end_revcomp (Filestring_T fp, T this, int hardclip_low,
int i;
if (hardclip_low > 0) {
+#ifdef PRINT_INDIVIDUAL_CHARS
for (i = this->fulllength - 1; i >= this->fulllength - hardclip_low; --i) {
FPRINTF(fp,"%c",complCode[(int) this->contents[i]]);
}
- return;
+#else
+ FPRINTF(fp,"%.*R",hardclip_low,&(this->contents[this->fulllength - hardclip_low]));
+#endif
} else {
+#ifdef PRINT_INDIVIDUAL_CHARS
for (i = hardclip_high - 1; i >= 0; --i) {
FPRINTF(fp,"%c",complCode[(int) this->contents[i]]);
}
- return;
+#else
+ FPRINTF(fp,"%.*R",hardclip_high,&(this->contents[0]));
+#endif
}
+
+ return;
}
@@ -5242,17 +5290,25 @@ Shortread_print_chopped_end_quality_reverse (Filestring_T fp, T this, int hardcl
int i;
if (hardclip_low > 0) {
+#ifdef PRINT_INDIVIDUAL_CHARS
for (i = this->fulllength - 1; i >= this->fulllength - hardclip_low; --i) {
FPRINTF(fp,"%c",this->quality[i]);
}
- return;
+#else
+ FPRINTF(fp,"%.*r",hardclip_low,&(this->quality[this->fulllength - hardclip_low]));
+#endif
} else {
+#ifdef PRINT_INDIVIDUAL_CHARS
for (i = hardclip_high - 1; i >= 0; --i) {
FPRINTF(fp,"%c",this->quality[i]);
}
- return;
+#else
+ FPRINTF(fp,"%.*r",hardclip_high,&(this->quality[0]));
+#endif
}
+
+ return;
}
@@ -5274,11 +5330,15 @@ Shortread_print_chop (Filestring_T fp, T this, bool invertp) {
if (this->chop != NULL) {
FPRINTF(fp,"\tXP:Z:");
if (invertp == false) {
- FPRINTF(fp,"%s",this->chop);
+ FPRINTF(fp,"%.*s",this->choplength,this->chop);
} else {
+#ifdef PRINT_INDIVIDUAL_CHARS
for (i = this->choplength - 1; i >= 0; i--) {
FPRINTF(fp,"%c",complCode[(int) this->chop[i]]);
}
+#else
+ FPRINTF(fp,"%.*R",this->choplength,this->chop);
+#endif
}
}
@@ -5304,6 +5364,10 @@ Shortread_print_quality (Filestring_T fp, T this, int hardclip_low, int hardclip
if (this->quality == NULL) {
FPRINTF(fp,"*");
+
+ } else if (shift == 0) {
+ FPRINTF(fp,"%.*s",this->fulllength - hardclip_high - hardclip_low,&(this->quality[hardclip_low]));
+
} else {
for (i = hardclip_low; i < this->fulllength - hardclip_high; i++) {
if ((c = this->quality[i] + shift) <= 32) {
@@ -5317,13 +5381,18 @@ Shortread_print_quality (Filestring_T fp, T this, int hardclip_low, int hardclip
if (show_chopped_p == true) {
assert(hardclip_high == 0);
- for (i = 0; i < this->choplength; i++) {
- if ((c = this->chop_quality[i] + shift) <= 32) {
- fprintf(stderr,"Warning: With a quality-print-shift of %d, QC score %c becomes non-printable. May need to specify --quality-protocol or --quality-print-shift\n",
- shift,this->chop_quality[i]);
- abort();
- } else {
- FPRINTF(fp,"%c",c);
+ if (shift == 0) {
+ FPRINTF(fp,"%.*s",this->choplength,this->chop_quality);
+
+ } else {
+ for (i = 0; i < this->choplength; i++) {
+ if ((c = this->chop_quality[i] + shift) <= 32) {
+ fprintf(stderr,"Warning: With a quality-print-shift of %d, QC score %c becomes non-printable. May need to specify --quality-protocol or --quality-print-shift\n",
+ shift,this->chop_quality[i]);
+ abort();
+ } else {
+ FPRINTF(fp,"%c",c);
+ }
}
}
}
@@ -5341,6 +5410,10 @@ Shortread_print_quality_revcomp (Filestring_T fp, T this, int hardclip_low, int
if (this->quality == NULL) {
FPRINTF(fp,"*");
+
+ } else if (shift == 0) {
+ FPRINTF(fp,"%.*r",this->fulllength - hardclip_low - hardclip_high,&(this->quality[hardclip_high]));
+
} else {
for (i = this->fulllength - 1 - hardclip_low; i >= hardclip_high; --i) {
if ((c = this->quality[i] + shift) <= 32) {
@@ -5354,13 +5427,18 @@ Shortread_print_quality_revcomp (Filestring_T fp, T this, int hardclip_low, int
if (show_chopped_p == true) {
/* assert(hardclip_low == 0); */
- for (i = this->choplength - 1; i >= 0; i--) {
- if ((c = this->chop_quality[i] + shift) <= 32) {
- fprintf(stderr,"Warning: With a quality-print-shift of %d, QC score %c becomes non-printable. May need to specify --quality-protocol or --quality-print-shift\n",
- shift,this->chop_quality[i]);
- abort();
- } else {
- FPRINTF(fp,"%c",c);
+ if (shift == 0) {
+ FPRINTF(fp,"%.*r",this->choplength,this->chop_quality);
+
+ } else {
+ for (i = this->choplength - 1; i >= 0; i--) {
+ if ((c = this->chop_quality[i] + shift) <= 32) {
+ fprintf(stderr,"Warning: With a quality-print-shift of %d, QC score %c becomes non-printable. May need to specify --quality-protocol or --quality-print-shift\n",
+ shift,this->chop_quality[i]);
+ abort();
+ } else {
+ FPRINTF(fp,"%c",c);
+ }
}
}
}
@@ -5373,9 +5451,14 @@ void
Shortread_print_oneline_uc (Filestring_T fp, T this) {
int i = 0;
+#ifdef PRINT_INDIVIDUAL_CHARS
for (i = 0; i < this->fulllength; i++) {
FPRINTF(fp,"%c",this->contents_uc[i]);
}
+#else
+ FPRINTF(fp,"%.*s",this->fulllength,this->contents_uc);
+#endif
+
return;
}
@@ -5383,9 +5466,14 @@ void
Shortread_print_oneline_revcomp_uc (Filestring_T fp, T this) {
int i = 0;
+#ifdef PRINT_INDIVIDUAL_CHARS
for (i = this->fulllength-1; i >= 0; --i) {
FPRINTF(fp,"%c",complCode[(int) this->contents_uc[i]]);
}
+#else
+ FPRINTF(fp,"%.*R",this->fulllength,this->contents_uc);
+#endif
+
return;
}
diff --git a/src/splice.c b/src/splice.c
index d08bd47..5605e64 100644
--- a/src/splice.c
+++ b/src/splice.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: splice.c 191633 2016-06-09 22:03:28Z twu $";
+static char rcsid[] = "$Id: splice.c 193235 2016-06-30 22:34:59Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -88,18 +88,21 @@ sufficient_splice_prob_local (int support, int nmatches, int nmismatches, double
#endif
/* Do not compare against true or false */
-/* Loosest criterion */
+/* Want loose criterion, otherwise, we incur slowdown from having to
+ run GSNAP algorithm */
static int
sufficient_splice_prob_local (int support, int nmismatches, double spliceprob) {
support -= 3*nmismatches;
- if (support < 14) {
- return (spliceprob > 0.95);
- } else if (support < 20) {
- return (spliceprob > 0.90);
- } else if (support < 26) {
- return (spliceprob > 0.85);
- } else {
+ if (support <= 9) {
+ return (spliceprob > 0.80);
+ } else if (support <= 12) {
return (spliceprob > 0.70);
+ } else if (support <= 15) {
+ return (spliceprob > 0.60);
+ } else if (support <= 25) {
+ return (spliceprob > 0.50);
+ } else {
+ return (spliceprob > 0.40);
}
}
diff --git a/src/stage1hr.c b/src/stage1hr.c
index 73b0574..ae138e9 100644
--- a/src/stage1hr.c
+++ b/src/stage1hr.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: stage1hr.c 191136 2016-06-03 17:32:53Z twu $";
+static char rcsid[] = "$Id: stage1hr.c 193899 2016-07-12 04:41:34Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -65,6 +65,7 @@ static char rcsid[] = "$Id: stage1hr.c 191136 2016-06-03 17:32:53Z twu $";
#endif
#endif
+#define USE_HEAP 1
#define SPEED 1
@@ -73,7 +74,11 @@ static char rcsid[] = "$Id: stage1hr.c 191136 2016-06-03 17:32:53Z twu $";
within each batch are already sorted. Also, heapsort can handle
8-byte positions. */
-#define LONG_ENDSPLICES 1 /* Necessary to get outside splices correctly */
+/* Not useful to have LONG_ENDSPLICES, which allow for two pairmax
+ distances, because long insert lengths will get pruned anyway by
+ Stage3pair_new. In future, could have LONG_ENDSPLICES, but require
+ inside alignment to start within one pairmax */
+/* #define LONG_ENDSPLICES 1 */ /* Necessary to get outside splices correctly */
#define NO_EXTENSIONS_BEFORE_ZERO 1
@@ -453,6 +458,34 @@ struct Segment_T {
static int
+Segment_mod_length_cmp (const void *a, const void *b) {
+ Segment_T x = * (Segment_T *) a;
+ Segment_T y = * (Segment_T *) b;
+
+ int xlength, ylength;
+ int xmod, ymod;
+
+ xmod = x->querypos5 % index1interval;
+ ymod = y->querypos5 % index1interval;
+
+ if (xmod < ymod) {
+ return -1;
+ } else if (ymod < xmod) {
+ return +1;
+ } else {
+ xlength = x->querypos3 - x->querypos5;
+ ylength = y->querypos3 - y->querypos5;
+ if (xlength > ylength) {
+ return -1;
+ } else if (ylength > xlength) {
+ return +1;
+ } else {
+ return 0;
+ }
+ }
+}
+
+static int
Segment_length_cmp (const void *a, const void *b) {
Segment_T x = * (Segment_T *) a;
Segment_T y = * (Segment_T *) b;
@@ -782,36 +815,30 @@ struct T {
bool read_oligos_p;
#ifdef LARGE_GENOMES
- unsigned char **plus_positions_high_allocated;
+ unsigned char **positions_high_allocated;
unsigned char **plus_positions_high; /* points to above[index1interval-1] */
- UINT4 **plus_positions_low_allocated;
- UINT4 **plus_positions_low; /* points to above[index1interval-1] */
- unsigned char **minus_positions_high_allocated;
unsigned char **minus_positions_high; /* points to above[index1interval-1] */
- UINT4 **minus_positions_low_allocated;
+
+ UINT4 **positions_low_allocated;
+ UINT4 **plus_positions_low; /* points to above[index1interval-1] */
UINT4 **minus_positions_low; /* points to above[index1interval-1] */
#else
- Univcoord_T **plus_positions_allocated;
+ Univcoord_T **positions_allocated;
Univcoord_T **plus_positions; /* points to above[index1interval-1] */
- Univcoord_T **minus_positions_allocated;
Univcoord_T **minus_positions; /* points to above[index1interval-1] */
#endif
- int *plus_npositions_allocated;
+ int *npositions_allocated;
int *plus_npositions; /* points to above[index1interval-1] */
-
- int *minus_npositions_allocated;
int *minus_npositions; /* points to above[index1interval-1] */
- bool *plus_retrievedp_allocated;
+ bool *retrievedp_allocated;
bool *plus_retrievedp; /* points to above[index1interval-1] */
- bool *minus_retrievedp_allocated;
bool *minus_retrievedp; /* points to above[index1interval-1] */
#ifdef USE_ALLOCP
- bool *plus_allocp_allocated;
+ bool *allocp_allocated;
bool *plus_allocp; /* points to above[index1interval-1] */
- bool *minus_allocp_allocated;
bool *minus_allocp; /* points to above[index1interval-1] */
#endif
@@ -820,9 +847,8 @@ struct T {
#endif
bool *omitted;
- Oligospace_T *forward_oligos_allocated;
+ Oligospace_T *oligos_allocated;
Oligospace_T *forward_oligos; /* points to above[index1interval-1] */
- Oligospace_T *revcomp_oligos_allocated;
Oligospace_T *revcomp_oligos; /* points to above[index1interval-1] */
struct Segment_T *plus_segments;
@@ -951,29 +977,22 @@ Stage1_free (T *old, int querylength) {
#endif
}
- FREE((*old)->revcomp_oligos_allocated);
- FREE((*old)->forward_oligos_allocated);
+ FREE((*old)->oligos_allocated);
FREE((*old)->omitted);
#ifdef USE_VALIDP
FREE((*old)->validp);
#endif
#ifdef LARGE_GENOMES
- FREE((*old)->plus_positions_high_allocated);
- FREE((*old)->plus_positions_low_allocated);
- FREE((*old)->minus_positions_high_allocated);
- FREE((*old)->minus_positions_low_allocated);
+ FREE((*old)->positions_high_allocated);
+ FREE((*old)->positions_low_allocated);
#else
- FREE((*old)->plus_positions_allocated);
- FREE((*old)->minus_positions_allocated);
+ FREE((*old)->positions_allocated);
#endif
- FREE((*old)->plus_npositions_allocated);
- FREE((*old)->minus_npositions_allocated);
+ FREE((*old)->npositions_allocated);
#ifdef USE_ALLOCP
- FREE((*old)->plus_allocp_allocated);
- FREE((*old)->minus_allocp_allocated);
+ FREE((*old)->allocp_allocated);
#endif
- FREE((*old)->plus_retrievedp_allocated);
- FREE((*old)->minus_retrievedp_allocated);
+ FREE((*old)->retrievedp_allocated);
FREE(*old);
}
@@ -1551,26 +1570,22 @@ Stage1_new (int querylength) {
new->read_oligos_p = false;
#ifdef LARGE_GENOMES
- new->plus_positions_high_allocated = (unsigned char **) MALLOC((querylength+overhang) * sizeof(unsigned char *));
- new->plus_positions_high = &(new->plus_positions_high_allocated[overhang]);
- new->plus_positions_low_allocated = (UINT4 **) MALLOC((querylength+overhang) * sizeof(UINT4 *));
- new->plus_positions_low = &(new->plus_positions_low_allocated[overhang]);
-
- new->minus_positions_high_allocated = (unsigned char **) MALLOC((querylength+overhang) *sizeof(unsigned char *));
- new->minus_positions_high = &(new->minus_positions_high_allocated[overhang]);
- new->minus_positions_low_allocated = (UINT4 **) MALLOC((querylength+overhang) *sizeof(UINT4 *));
- new->minus_positions_low = &(new->minus_positions_low_allocated[overhang]);
+ new->positions_high_allocated = (unsigned char **) MALLOC(2 * (querylength+overhang) * sizeof(unsigned char *));
+ new->plus_positions_high = &(new->positions_high_allocated[overhang]);
+ new->minus_positions_high = &(new->positions_high_allocated[(querylength+overhang)+overhang]);
+
+ new->positions_low_allocated = (UINT4 **) MALLOC(2 * (querylength+overhang) * sizeof(UINT4 *));
+ new->plus_positions_low = &(new->positions_low_allocated[overhang]);
+ new->minus_positions_low = &(new->positions_low_allocated[(querylength+overhang)+overhang]);
#else
- new->plus_positions_allocated = (Univcoord_T **) MALLOC((querylength+overhang) * sizeof(Univcoord_T *));
- new->plus_positions = &(new->plus_positions_allocated[overhang]);
- new->minus_positions_allocated = (Univcoord_T **) MALLOC((querylength+overhang) *sizeof(Univcoord_T *));
- new->minus_positions = &(new->minus_positions_allocated[overhang]);
+ new->positions_allocated = (Univcoord_T **) MALLOC(2 * (querylength+overhang) * sizeof(Univcoord_T *));
+ new->plus_positions = &(new->positions_allocated[overhang]);
+ new->minus_positions = &(new->positions_allocated[(querylength+overhang)+overhang]);
#endif
- new->plus_npositions_allocated = (int *) MALLOC((querylength+overhang) * sizeof(int));
- new->plus_npositions = &(new->plus_npositions_allocated[overhang]);
- new->minus_npositions_allocated = (int *) MALLOC((querylength+overhang) * sizeof(int));
- new->minus_npositions = &(new->minus_npositions_allocated[overhang]);
+ new->npositions_allocated = (int *) MALLOC(2 * (querylength+overhang) * sizeof(int));
+ new->plus_npositions = &(new->npositions_allocated[overhang]);
+ new->minus_npositions = &(new->npositions_allocated[(querylength+overhang)+overhang]);
#if 0
/* No need to initialize, since we assign all values below */
@@ -1583,17 +1598,15 @@ Stage1_new (int querylength) {
#endif
/* Can be MALLOC, since we initialize in read_oligos() */
- new->plus_retrievedp_allocated = (bool *) MALLOC((querylength+overhang) * sizeof(bool));
- new->minus_retrievedp_allocated = (bool *) MALLOC((querylength+overhang) * sizeof(bool));
- new->plus_retrievedp = &(new->plus_retrievedp_allocated[overhang]);
- new->minus_retrievedp = &(new->minus_retrievedp_allocated[overhang]);
+ new->retrievedp_allocated = (bool *) MALLOC(2 * (querylength+overhang) * sizeof(bool));
+ new->plus_retrievedp = &(new->retrievedp_allocated[overhang]);
+ new->minus_retrievedp = &(new->retrievedp_allocated[(querylength+overhang)+overhang]);
#ifdef USE_ALLOCP
/* Never set to true, so never used */
- new->plus_allocp_allocated = (bool *) CALLOC(querylength+overhang,sizeof(bool));
- new->minus_allocp_allocated = (bool *) CALLOC(querylength+overhang,sizeof(bool));
- new->plus_allocp = &(new->plus_allocp_allocated[overhang]);
- new->minus_allocp = &(new->minus_allocp_allocated[overhang]);
+ new->allocp_allocated = (bool *) CALLOC(2 * (querylength+overhang),sizeof(bool));
+ new->plus_allocp = &(new->allocp_allocated[overhang]);
+ new->minus_allocp = &(new->allocp_allocated[(querylength+overhang)+overhang]);
#endif
#ifdef USE_VALIDP
@@ -1601,10 +1614,9 @@ Stage1_new (int querylength) {
#endif
new->omitted = (bool *) CALLOC(querylength,sizeof(bool));
- new->forward_oligos_allocated = (Oligospace_T *) CALLOC(querylength+overhang,sizeof(Oligospace_T));
- new->forward_oligos = &(new->forward_oligos_allocated[overhang]);
- new->revcomp_oligos_allocated = (Oligospace_T *) CALLOC(querylength+overhang,sizeof(Oligospace_T));
- new->revcomp_oligos = &(new->revcomp_oligos_allocated[overhang]);
+ new->oligos_allocated = (Oligospace_T *) CALLOC(2 * (querylength+overhang),sizeof(Oligospace_T));
+ new->forward_oligos = &(new->oligos_allocated[overhang]);
+ new->revcomp_oligos = &(new->oligos_allocated[(querylength+overhang)+overhang]);
new->plus_segments = (struct Segment_T *) NULL;
new->minus_segments = (struct Segment_T *) NULL;
@@ -1660,14 +1672,17 @@ make_complement_inplace (char *sequence, unsigned int length) {
/************************************************************************/
-#define PARENT(i) (i >> 1)
-#define LEFT(i) (i << 1)
-#define RIGHT(i) ((i << 1) | 1)
+#define PARENT(i) ((i) >> 1)
+#define LEFT(i) ((i) << 1)
+#define RIGHT(i) (((i) << 1) | 1)
typedef struct Batch_T *Batch_T;
struct Batch_T {
+#ifndef USE_HEAP
+ int nodei; /* Node in loser tree. Also used for debugging */
+#endif
int querypos;
int diagterm;
int npositions;
@@ -1691,10 +1706,19 @@ Batch_init (Batch_T batch, int querypos, int diagterm,
#else
Univcoord_T *positions,
#endif
- int npositions, int querylength) {
+ int npositions, int querylength
+#ifndef USE_HEAP
+ , int nodei
+#endif
+ ) {
+#ifndef USE_HEAP
+ batch->nodei = nodei;
+#endif
batch->querypos = querypos;
batch->diagterm = diagterm;
+ batch->npositions = npositions;
+
#ifdef LARGE_GENOMES
batch->positions_high = positions_high;
batch->positions_low = positions_low;
@@ -1706,7 +1730,7 @@ Batch_init (Batch_T batch, int querypos, int diagterm,
batch->positions = positions;
batch->diagonal = *positions + diagterm;
#endif
- batch->npositions = npositions;
+
#ifdef NO_EXTENSIONS_BEFORE_ZERO
/* This prevents us from finding insertions at the beginning of the genome */
@@ -1738,12 +1762,30 @@ Batch_init (Batch_T batch, int querypos, int diagterm,
static void
-Batch_init_simple (Batch_T batch, Univcoord_T *diagonals, int ndiagonals, int querylength, int querypos) {
+Batch_init_simple (Batch_T batch, Univcoord_T *diagonals, int ndiagonals, int querylength,
+#ifdef USE_HEAP
+ int querypos
+#else
+ int nodei
+#endif
+ ) {
+#ifdef USE_HEAP
batch->querypos = querypos;
+#else
+ batch->nodei = nodei;
+#endif
batch->positions = diagonals;
- batch->diagonal = *diagonals; /* Already in correct endianness */
batch->npositions = ndiagonals;
+#ifdef USE_HEAP
+ batch->diagonal = *diagonals; /* Already in correct endianness */
+#else
+ if (batch->npositions == 0) {
+ batch->diagonal = -1U;
+ } else {
+ batch->diagonal = *diagonals; /* Already in correct endianness */
+ }
+#endif
while (batch->npositions > 0 && batch->diagonal < (unsigned int) querylength) {
debug11(printf("Eliminating diagonal %llu as straddling beginning of genome (Batch_init)\n",
@@ -3244,6 +3286,38 @@ find_spanning_onemiss_matches (int *found_score, int *nhits, List_T hits, T this
}
+static void
+init_tree (Batch_T *losers, int heapsize) {
+ Batch_T *winners;
+ int nodei, lefti, righti;
+
+ winners = (Batch_T *) MALLOCA((2*heapsize + 1) * sizeof(Batch_T));
+ for (nodei = 2*heapsize - 1; nodei >= heapsize; nodei--) {
+ winners[nodei] = losers[nodei];
+ }
+
+ for (nodei = heapsize - 1; nodei >= 1; --nodei) {
+ lefti = LEFT(nodei);
+ righti = lefti + 1;
+
+ if (winners[lefti]->diagonal < winners[righti]->diagonal) {
+ winners[nodei] = winners[lefti];
+ losers[nodei] = winners[righti];
+
+ } else {
+ winners[nodei] = winners[righti];
+ losers[nodei] = winners[lefti];
+ }
+ }
+
+ losers[0] = winners[1];
+ FREEA(winners);
+
+ return;
+}
+
+
+#ifdef USE_HEAP
static List_T
find_spanning_multimiss_matches (int *found_score, int *nhits, List_T hits, T this, int genestrand,
int nrequired, int querylength, Compress_T query_compress_fwd, Compress_T query_compress_rev,
@@ -3254,11 +3328,16 @@ find_spanning_multimiss_matches (int *found_score, int *nhits, List_T hits, T th
int nunion = nmisses_allowed + nrequired, nelts, elti;
int heapsize, count, mod, i;
int ndiagonals, nempty;
- int parenti, smallesti, righti;
+ int parenti;
int global_miss_querypos5, global_miss_querypos3;
int elt_miss_querypos5, elt_miss_querypos3;
- struct Batch_T *batchpool, sentinel_struct;
- Batch_T *heap, batch, sentinel;
+ struct Batch_T *batchpool;
+
+ struct Batch_T sentinel_struct;
+ Batch_T *heap, sentinel;
+ int smallesti, righti;
+
+ Batch_T batch;
Univcoord_T chroffset, chrhigh;
Chrpos_T chrlength;
Chrnum_T chrnum;
@@ -3271,6 +3350,7 @@ find_spanning_multimiss_matches (int *found_score, int *nhits, List_T hits, T th
batchpool = (struct Batch_T *) MALLOCA(nunion * sizeof(struct Batch_T));
heap = (Batch_T *) MALLOCA((2*(nunion+1)+1+1) * sizeof(Batch_T)); /* being liberal with allocation */
+
/* Plus */
for (mod = 0; mod < index1interval; mod++) {
array = this->plus_spanningset[mod];
@@ -3330,7 +3410,7 @@ find_spanning_multimiss_matches (int *found_score, int *nhits, List_T hits, T th
batch = heap[1];
diagonal = batch->diagonal;
count = 1;
- debug7(printf("at #%d, initial diagonal is %llu\n",batch->querypos,(unsigned long long) diagonal));
+ debug7(printf("at #%d, initial diagonal is %llu\n",batch->nodei,(unsigned long long) diagonal));
/* Update batch */
if (--batch->npositions <= 0) {
@@ -3372,7 +3452,7 @@ find_spanning_multimiss_matches (int *found_score, int *nhits, List_T hits, T th
if (batch->diagonal == diagonal) {
count++;
debug7(printf("at #%d, incrementing diagonal %llu to count %d\n",
- batch->querypos,(unsigned long long) diagonal,count));
+ batch->nodei,(unsigned long long) diagonal,count));
} else {
/* End of diagonal */
if (count >= nrequired) {
@@ -3385,7 +3465,7 @@ find_spanning_multimiss_matches (int *found_score, int *nhits, List_T hits, T th
}
diagonal = batch->diagonal;
count = 1;
- debug7(printf("at #%d, next diagonal is %llu\n",batch->querypos,(unsigned long long) diagonal));
+ debug7(printf("at #%d, next diagonal is %llu\n",batch->nodei,(unsigned long long) diagonal));
}
/* Update batch */
@@ -3494,7 +3574,7 @@ find_spanning_multimiss_matches (int *found_score, int *nhits, List_T hits, T th
batch = heap[1];
diagonal = batch->diagonal;
count = 1;
- debug7(printf("at #%d, initial diagonal is %llu\n",batch->querypos,(unsigned long long) diagonal));
+ debug7(printf("at #%d, initial diagonal is %llu\n",batch->nodei,(unsigned long long) diagonal));
/* Update batch */
if (--batch->npositions <= 0) {
@@ -3536,7 +3616,7 @@ find_spanning_multimiss_matches (int *found_score, int *nhits, List_T hits, T th
if (batch->diagonal == diagonal) {
count++;
debug7(printf("at #%d, incrementing diagonal %llu to count %d\n",
- batch->querypos,(unsigned long long) diagonal,count));
+ batch->nodei,(unsigned long long) diagonal,count));
} else {
/* End of diagonal */
if (count >= nrequired) {
@@ -3548,7 +3628,7 @@ find_spanning_multimiss_matches (int *found_score, int *nhits, List_T hits, T th
}
diagonal = batch->diagonal;
count = 1;
- debug7(printf("at #%d, next diagonal is %llu\n",batch->querypos,(unsigned long long) diagonal));
+ debug7(printf("at #%d, next diagonal is %llu\n",batch->nodei,(unsigned long long) diagonal));
}
/* Update batch */
@@ -3602,144 +3682,454 @@ find_spanning_multimiss_matches (int *found_score, int *nhits, List_T hits, T th
return hits;
}
+#endif
+
+#ifndef USE_HEAP
+/* Uses a loser tree */
+static List_T
+find_spanning_multimiss_matches (int *found_score, int *nhits, List_T hits, T this, int genestrand,
+ int nrequired, int querylength, Compress_T query_compress_fwd, Compress_T query_compress_rev,
+ int nmisses_allowed) {
+ Univcoord_T *diagonals, diagonal;
+ Spanningelt_T *array;
+ List_T prev;
+ int nunion = nmisses_allowed + nrequired, nelts, elti;
+ int heapsize, count, mod, i;
+ int ndiagonals, nempty;
+ int parenti;
+ int global_miss_querypos5, global_miss_querypos3;
+ int elt_miss_querypos5, elt_miss_querypos3;
+ struct Batch_T *batchpool;
+ Batch_T *losers, current;
+ Univcoord_T current_diagonal, winner_diagonal;
-/************************************************************************/
+ Batch_T batch;
+ Univcoord_T chroffset, chrhigh;
+ Chrpos_T chrlength;
+ Chrnum_T chrnum;
+ debug(printf("Starting find_spanning_multimiss_matches with %d misses allowed\n",nmisses_allowed));
-#if 0
-static void
-trim_ends_unknowns_only (int *trim5, int *trim3, char *sequence1, char *sequence2, int length) {
- int pos;
+ batchpool = (struct Batch_T *) MALLOCA(nunion * sizeof(struct Batch_T));
+ losers = (Batch_T *) MALLOCA((2*(nunion+1)+1+1) * sizeof(Batch_T)); /* being liberal with allocation */
- pos = 0;
- while (pos < length && sequence2[pos] == OUTOFBOUNDS) {
- pos++;
- }
- debug8(printf("outofbounds: trim 5': at %d: %c != %c\n",pos,sequence2[pos],OUTOFBOUNDS));
- *trim5 = pos;
- pos = length-1;
- debug8(printf("outofbounds: trim 3': %d:%c\n",pos,sequence2[pos]));
- while (pos >= 0 && sequence2[pos] == OUTOFBOUNDS) {
- pos--;
- }
- *trim3 = pos+1;
- debug8(printf("outofbounds: trim 3': %d - %d\n",length,*trim3));
- *trim3 = length - (*trim3);
+ /* Plus */
+ for (mod = 0; mod < index1interval; mod++) {
+ array = this->plus_spanningset[mod];
+ nelts = this->plus_spanningset_nelts[mod];
+ debug(printf("Multimiss plus mod %d, nelts %d\n",mod,nelts));
- debug8(
- printf("At query ->: %.*s\n",length,sequence1);
- printf("At genome->: %.*s\n",length,sequence2);
- printf("%02d %02d ->: ",*trim5,*trim3);
- for (pos = 0; pos < *trim5; pos++) {
- printf(" ");
- }
- for ( ; pos < length - (*trim3); pos++) {
- printf("*");
- }
- for ( ; pos < length; pos++) {
- printf(" ");
- }
- printf("\n");
- );
+ qsort(array,nelts,sizeof(Spanningelt_T),Spanningelt_candidates_cmp);
+ if (nelts > nunion) {
+ qsort(&(array[nunion]),nelts-nunion,sizeof(Spanningelt_T),Spanningelt_pruning_cmp);
+ }
+ for (elti = 0; elti < nelts; elti++) {
+ Spanningelt_reset(array[elti]);
+ }
- return;
-}
+ debug(printf("*** find_spanning_multimiss_matches, %d misses allowed, plus mod %d\n",nmisses_allowed,mod));
+ debug(Spanningelt_print_array(array,nelts));
+
+ /* Put first few pointers into heap */
+ heapsize = (nelts < nunion) ? nelts : nunion;
+ global_miss_querypos5 = querylength;
+ global_miss_querypos3 = 0;
+ for (elti = 0; elti < nelts && elti < nunion; elti++) {
+ /* Get list as a special one, and perform conversion if necessary */
+ diagonals = Spanningelt_diagonals(&ndiagonals,(Spanningelt_T) array[elti],&elt_miss_querypos5,&elt_miss_querypos3);
+ if (elt_miss_querypos5 < global_miss_querypos5) global_miss_querypos5 = elt_miss_querypos5;
+ if (elt_miss_querypos3 > global_miss_querypos3) global_miss_querypos3 = elt_miss_querypos3;
+
+ batch = &(batchpool[elti]);
+ debug(printf("Adding batch %d of size %d...",elti,ndiagonals));
+ Batch_init_simple(batch,diagonals,ndiagonals,querylength,/*nodei*/heapsize + elti);
+ losers[heapsize + elti] = batch;
+ debug(printf("\n"));
+ }
+ debug(printf("heapsize is %d\n",heapsize));
+ if (heapsize > 0) {
+#if 0
+ prev = List_push(List_copy(spanningset),(void *) NULL); /* Add a dummy list elt to front */
+#else
+ prev = (struct List_T *) MALLOCA((nelts - elti + 1) * sizeof(struct List_T));
+ List_fill_array_with_handle(prev,(void *) &(array[elti]),nelts - elti);
#endif
+ nempty = 0;
+ init_tree(losers,heapsize);
-/************************************************************************/
+ debug7(printf("*** multimiss mod %d plus:\n",mod));
+ /* Initialize loop */
+ batch = losers[0];
+ diagonal = batch->diagonal;
+ count = 1;
+ debug7(printf("at #%d, initial diagonal is %llu\n",batch->nodei,(unsigned long long) diagonal));
-/* Returns a master pointer (segments) to the block of segments */
-/* If end_indel_mismatches_allowed set to 0, won't save any segments for end indels. */
-static List_T
-find_complete_mm (int *found_score, int *nhits, List_T hits, List_T anchor_segments,
- int querylength, Compress_T query_compress,
- int max_mismatches_allowed, bool plusp, int genestrand) {
- Stage3end_T hit;
- int nmismatches;
- Univcoord_T left;
- Segment_T segmenti;
- List_T p;
+ /* Update batch */
+ if (--batch->npositions <= 0) {
+ batch->diagonal = -1U;
+ } else {
+ /* Use this batch for heapify */
+ /* These positions are diagonals, and already in correct endianness */
+ batch->diagonal = *(++batch->positions);
+ }
- for (p = anchor_segments; p != NULL; p = List_next(p)) {
- segmenti = (Segment_T) List_head(p);
- assert(segmenti->diagonal != (Univcoord_T) -1);
- if (segmenti->floor <= max_mismatches_allowed) {
- left = segmenti->diagonal - querylength;
- nmismatches = Genome_count_mismatches_limit(query_compress,left,/*pos5*/0,/*pos3*/querylength,
- max_mismatches_allowed,plusp,genestrand);
- if (nmismatches <= max_mismatches_allowed) {
- if ((hit = Stage3end_new_substitution(&(*found_score),nmismatches,
- left,/*genomiclength*/querylength,
- query_compress,plusp,genestrand,segmenti->chrnum,
- segmenti->chroffset,segmenti->chrhigh,segmenti->chrlength,
- /*sarrayp*/false)) != NULL) {
- segmenti->usedp = true;
- *nhits += 1;
- hits = List_push(hits,(void *) hit);
+ /* Update tree */
+ winner_diagonal = batch->diagonal;
+ for (parenti = PARENT(batch->nodei); parenti > 0; parenti = PARENT(parenti)) {
+ current = losers[parenti];
+ current_diagonal = current->diagonal;
+ if (current_diagonal < winner_diagonal) {
+ losers[parenti] = batch;
+ batch = current;
+ winner_diagonal = current_diagonal;
}
}
- }
- }
+ losers[0] = batch;
- return hits;
-}
+ /* Iterate through heap */
+ chrhigh = 0U;
+ while ((batch = losers[0])->diagonal != -1U && *nhits <= maxpaths_search) {
+ if (batch->diagonal == diagonal) {
+ count++;
+ debug7(printf("at #%d, incrementing diagonal %llu to count %d\n",
+ batch->nodei,(unsigned long long) diagonal,count));
+ } else {
+ /* End of diagonal */
+ if (count >= nrequired) {
+ /* printf("Testing %d..%d\n",miss_querypos5,miss_querypos3); */
+ hits = identify_multimiss_iter(&(*found_score),&chrnum,&chroffset,&chrhigh,&chrlength,&(*nhits),hits,diagonal,
+ prev,&nempty,&global_miss_querypos5,&global_miss_querypos3,
+ querylength,/*query_compress*/query_compress_fwd,
+ /*plusp*/true,genestrand,nmisses_allowed,
+ /*nmisses_seen*/nunion-count+nempty,global_miss_querypos5,global_miss_querypos3);
+ }
+ diagonal = batch->diagonal;
+ count = 1;
+ debug7(printf("at #%d, next diagonal is %llu\n",batch->nodei,(unsigned long long) diagonal));
+ }
-/* TODO: Change spliceable to be an attribute of the segment. Then we
- can loop over anchor_segments only */
-static struct Segment_T *
-identify_all_segments (int *nsegments, List_T *anchor_segments, Segment_T **spliceable, int *nspliceable,
-#ifdef LARGE_GENOMES
- unsigned char **positions_high, UINT4 **positions_low,
-#else
- Univcoord_T **positions,
-#endif
- int *npositions, bool *omitted, int querylength, int query_lastpos, Floors_T floors,
- bool plusp) {
- List_T all_segments = NULL;
- struct Segment_T *segments = NULL;
- Segment_T *array;
- int length_threshold;
- int nanchors, n;
+ /* Update batch */
+ if (--batch->npositions <= 0) {
+ batch->diagonal = -1U;
+ } else {
+ /* Use this batch for heapify */
+ /* These positions are diagonals, and already in correct endianness */
+ batch->diagonal = *(++batch->positions);
+ }
- Batch_T batch, sentinel;
- struct Batch_T sentinel_struct, *batchpool;
- Batch_T *heap;
- int heapsize = 0;
- int parenti, smallesti, righti, i;
- int querypos, first_querypos, last_querypos;
- int floor_left, floor_right, floor_incr;
- int floor, floor_xfirst, floor_xlast, *floors_from_xfirst, *floors_to_xlast;
- int *floors_from_neg3, *floors_to_pos3;
- /* int exclude_xfirst, exclude_xlast; */
- Univcoord_T diagonal, segment_left, last_diagonal, chroffset = 0U, chrhigh = 0U;
- Chrpos_T chrlength, max_distance;
- Chrnum_T chrnum = 1;
-#ifdef OLD_FLOOR_ENDS
- int halfquerylength, halfquery_lastpos;
-#endif
+ /* Update tree */
+ winner_diagonal = batch->diagonal;
+ for (parenti = PARENT(batch->nodei); parenti > 0; parenti = PARENT(parenti)) {
+ current = losers[parenti];
+ current_diagonal = current->diagonal;
+ if (current_diagonal < winner_diagonal) {
+ losers[parenti] = batch;
+ batch = current;
+ winner_diagonal = current_diagonal;
+ }
+ }
+ losers[0] = batch;
+ }
-#ifdef DIAGONAL_ADD_QUERYPOS
- UINT8 diagonal_add_querypos;
-#endif
- int total_npositions = 0;
- int joffset = 0, j;
+ /* Terminate loop */
+ if (count >= nrequired && *nhits <= maxpaths_search) {
+ hits = identify_multimiss_iter(&(*found_score),&chrnum,&chroffset,&chrhigh,&chrlength,&(*nhits),hits,diagonal,
+ prev,&nempty,&global_miss_querypos5,&global_miss_querypos3,
+ querylength,/*query_compress*/query_compress_fwd,
+ /*plusp*/true,genestrand,nmisses_allowed,
+ /*nmisses_seen*/nunion-count+nempty,global_miss_querypos5,global_miss_querypos3);
+ }
-#ifdef DEBUG
- List_T p;
- Segment_T segment;
-#endif
+ FREEA(prev);
+ }
+ }
+
+ /* Minus */
+ for (mod = 0; mod < index1interval; mod++) {
+ array = this->minus_spanningset[mod];
+ nelts = this->minus_spanningset_nelts[mod];
+ debug(printf("Multimiss minus mod %d, nelts %d\n",mod,nelts));
+
+ qsort(array,nelts,sizeof(Spanningelt_T),Spanningelt_candidates_cmp);
+ if (nelts > nunion) {
+ qsort(&(array[nunion]),nelts-nunion,sizeof(Spanningelt_T),Spanningelt_pruning_cmp);
+ }
+ for (elti = 0; elti < nelts; elti++) {
+ Spanningelt_reset(array[elti]);
+ }
+
+ debug(printf("*** find_spanning_multimiss_matches, %d misses_allowed, minus mod %d\n",nmisses_allowed,mod));
+ debug(Spanningelt_print_array(array,nelts));
+
+ /* Put first few pointers into heap */
+ heapsize = (nelts < nunion) ? nelts : nunion;
+ global_miss_querypos5 = querylength;
+ global_miss_querypos3 = 0;
+ for (elti = 0; elti < nelts && elti < nunion; elti++) {
+ /* Get list as a special one, and perform conversion if necessary */
+ diagonals = Spanningelt_diagonals(&ndiagonals,(Spanningelt_T) array[elti],&elt_miss_querypos5,&elt_miss_querypos3);
+ if (elt_miss_querypos5 < global_miss_querypos5) global_miss_querypos5 = elt_miss_querypos5;
+ if (elt_miss_querypos3 > global_miss_querypos3) global_miss_querypos3 = elt_miss_querypos3;
+
+ batch = &(batchpool[elti]);
+ debug(printf("Adding batch %d of size %d...",elti,ndiagonals));
+ Batch_init_simple(batch,diagonals,ndiagonals,querylength,/*querypos*/heapsize + elti);
+ losers[heapsize + elti] = batch;
+ debug(printf("\n"));
+ }
+ debug(printf("heapsize is %d\n",heapsize));
+
+ if (heapsize > 0) {
+#if 0
+ prev = List_push(List_copy(spanningset),(void **) NULL); /* Add a dummy list elt to front */
+#else
+ prev = (struct List_T *) MALLOCA((nelts - elti + 1) * sizeof(struct List_T));
+ List_fill_array_with_handle(prev,(void *) &(array[elti]),nelts - elti);
+#endif
+ nempty = 0;
+
+ init_tree(losers,heapsize);
+
+ debug7(printf("*** multimiss mod %d minus:\n",mod));
+
+ /* Initialize loop */
+ batch = losers[0];
+ diagonal = batch->diagonal;
+ count = 1;
+ debug7(printf("at #%d, initial diagonal is %llu\n",batch->nodei,(unsigned long long) diagonal));
+
+ /* Update batch */
+ if (--batch->npositions <= 0) {
+ batch->diagonal = -1U;
+ } else {
+ /* Use this batch for heapify */
+ /* These positions are diagonals, and already in correct endianness */
+ batch->diagonal = *(++batch->positions);
+ }
+
+ /* Update tree */
+ winner_diagonal = batch->diagonal;
+ for (parenti = PARENT(batch->nodei); parenti > 0; parenti = PARENT(parenti)) {
+ current = losers[parenti];
+ current_diagonal = current->diagonal;
+ if (current_diagonal < winner_diagonal) {
+ losers[parenti] = batch;
+ batch = current;
+ winner_diagonal = current_diagonal;
+ }
+ }
+ losers[0] = batch;
+
+ /* Iterate through heap */
+ chrhigh = 0U;
+ while ((batch = losers[0])->diagonal != -1U && *nhits <= maxpaths_search) {
+
+ if (batch->diagonal == diagonal) {
+ count++;
+ debug7(printf("at #%d, incrementing diagonal %llu to count %d\n",
+ batch->nodei,(unsigned long long) diagonal,count));
+ } else {
+ /* End of diagonal */
+ if (count >= nrequired) {
+ hits = identify_multimiss_iter(&(*found_score),&chrnum,&chroffset,&chrhigh,&chrlength,&(*nhits),hits,diagonal,
+ prev,&nempty,&global_miss_querypos5,&global_miss_querypos3,
+ querylength,/*query_compress*/query_compress_rev,
+ /*plusp*/false,genestrand,nmisses_allowed,
+ /*nmisses_seen*/nunion-count+nempty,global_miss_querypos5,global_miss_querypos3);
+ }
+ diagonal = batch->diagonal;
+ count = 1;
+ debug7(printf("at #%d, next diagonal is %llu\n",batch->nodei,(unsigned long long) diagonal));
+ }
+
+ /* Update batch */
+ if (--batch->npositions <= 0) {
+ batch->diagonal = -1U;
+ } else {
+ /* Use this batch for heapify */
+ /* These positions are diagonals, and already in correct endianness */
+ batch->diagonal = *(++batch->positions);
+ }
+
+ /* Update tree */
+ winner_diagonal = batch->diagonal;
+ for (parenti = PARENT(batch->nodei); parenti > 0; parenti = PARENT(parenti)) {
+ current = losers[parenti];
+ current_diagonal = current->diagonal;
+ if (current_diagonal < winner_diagonal) {
+ losers[parenti] = batch;
+ batch = current;
+ winner_diagonal = current_diagonal;
+ }
+ }
+ losers[0] = batch;
+ }
+
+ /* Terminate loop */
+ if (count >= nrequired && *nhits <= maxpaths_search) {
+ hits = identify_multimiss_iter(&(*found_score),&chrnum,&chroffset,&chrhigh,&chrlength,&(*nhits),hits,diagonal,
+ prev,&nempty,&global_miss_querypos5,&global_miss_querypos3,
+ querylength,/*query_compress*/query_compress_rev,
+ /*plusp*/false,genestrand,nmisses_allowed,
+ /*nmisses_seen*/nunion-count+nempty,global_miss_querypos5,global_miss_querypos3);
+ }
+
+ FREEA(prev);
+ }
+ }
+
+ FREEA(losers);
+ FREEA(batchpool);
+
+ return hits;
+}
+#endif
+
+
+/************************************************************************/
+
+
+#if 0
+static void
+trim_ends_unknowns_only (int *trim5, int *trim3, char *sequence1, char *sequence2, int length) {
+ int pos;
+
+ pos = 0;
+ while (pos < length && sequence2[pos] == OUTOFBOUNDS) {
+ pos++;
+ }
+ debug8(printf("outofbounds: trim 5': at %d: %c != %c\n",pos,sequence2[pos],OUTOFBOUNDS));
+ *trim5 = pos;
+
+ pos = length-1;
+ debug8(printf("outofbounds: trim 3': %d:%c\n",pos,sequence2[pos]));
+ while (pos >= 0 && sequence2[pos] == OUTOFBOUNDS) {
+ pos--;
+ }
+ *trim3 = pos+1;
+ debug8(printf("outofbounds: trim 3': %d - %d\n",length,*trim3));
+ *trim3 = length - (*trim3);
+
+ debug8(
+ printf("At query ->: %.*s\n",length,sequence1);
+ printf("At genome->: %.*s\n",length,sequence2);
+ printf("%02d %02d ->: ",*trim5,*trim3);
+ for (pos = 0; pos < *trim5; pos++) {
+ printf(" ");
+ }
+ for ( ; pos < length - (*trim3); pos++) {
+ printf("*");
+ }
+ for ( ; pos < length; pos++) {
+ printf(" ");
+ }
+ printf("\n");
+ );
+
+ return;
+}
+#endif
+
+
+/************************************************************************/
+
+
+/* Returns a master pointer (segments) to the block of segments */
+/* If end_indel_mismatches_allowed set to 0, won't save any segments for end indels. */
+static List_T
+find_complete_mm (int *found_score, int *nhits, List_T hits, Segment_T *anchor_segments, int nanchors,
+ int querylength, Compress_T query_compress,
+ int max_mismatches_allowed, bool plusp, int genestrand) {
+ Stage3end_T hit;
+ int nmismatches;
+ Univcoord_T left;
+ Segment_T segmenti, *p;
+
+ for (p = &(anchor_segments[0]); p < &(anchor_segments[nanchors]); p++) {
+ segmenti = *p;
+ assert(segmenti->diagonal != (Univcoord_T) -1);
+ if (segmenti->floor <= max_mismatches_allowed) {
+ left = segmenti->diagonal - querylength;
+ nmismatches = Genome_count_mismatches_limit(query_compress,left,/*pos5*/0,/*pos3*/querylength,
+ max_mismatches_allowed,plusp,genestrand);
+ if (nmismatches <= max_mismatches_allowed) {
+ if ((hit = Stage3end_new_substitution(&(*found_score),nmismatches,
+ left,/*genomiclength*/querylength,
+ query_compress,plusp,genestrand,segmenti->chrnum,
+ segmenti->chroffset,segmenti->chrhigh,segmenti->chrlength,
+ /*sarrayp*/false)) != NULL) {
+ segmenti->usedp = true;
+ *nhits += 1;
+ hits = List_push(hits,(void *) hit);
+ }
+ }
+ }
+ }
+
+ return hits;
+}
+
+
+#ifdef USE_HEAP
+/* TODO: Change spliceable to be an attribute of the segment. Then we
+ can loop over anchor_segments only */
+static struct Segment_T *
+identify_all_segments (int *nsegments, Segment_T **anchor_segments, int *nanchors,
+ Segment_T **spliceable, int *nspliceable,
+#ifdef LARGE_GENOMES
+ unsigned char **positions_high, UINT4 **positions_low,
+#else
+ Univcoord_T **positions,
+#endif
+ int *npositions, bool *omitted, int querylength, int query_lastpos,
+ Floors_T floors, bool plusp) {
+ struct Segment_T *segments = NULL;
+ Segment_T *all_segments, *ptr_all, *ptr_anchor, *dest, *src;
+ int length_threshold;
+ int n_all_segments, n;
+ int nanchors_bymod[MAX_INDEX1INTERVAL], naccept_bymod[MAX_INDEX1INTERVAL];
+ int mod;
+ int k;
+
+ struct Batch_T *batchpool;
+ struct Batch_T sentinel_struct;
+ Batch_T *heap, sentinel;
+ int smallesti, righti;
+ Batch_T batch;
+ int heapsize = 0;
+ int parenti, i;
+ int querypos, first_querypos, last_querypos;
+ int floor_left, floor_right, floor_incr;
+ int floor, floor_xfirst, floor_xlast, *floors_from_xfirst, *floors_to_xlast;
+ int *floors_from_neg3, *floors_to_pos3;
+ /* int exclude_xfirst, exclude_xlast; */
+ Univcoord_T diagonal, segment_left, last_diagonal, chroffset = 0U, chrhigh = 0U;
+ Chrpos_T chrlength, max_distance;
+ Chrnum_T chrnum = 1;
+#ifdef OLD_FLOOR_ENDS
+ int halfquerylength, halfquery_lastpos;
+#endif
+
+#ifdef DIAGONAL_ADD_QUERYPOS
+ UINT8 diagonal_add_querypos;
+#endif
+ int total_npositions = 0;
+ int joffset = 0, j;
+
+#ifdef DEBUG
+ Segment_T segment, *p;
+#endif
Segment_T ptr, ptr_chrstart;
Segment_T *ptr_spliceable;
/* bool next_spliceable_p; */
#ifdef DEBUG19
Segment_T ptr0;
- int k;
#endif
#ifndef SLOW_CHR_UPDATE
Univcoord_T goal;
@@ -3751,11 +4141,11 @@ identify_all_segments (int *nsegments, List_T *anchor_segments, Segment_T **spli
int nsplicesites_local;
debug(printf("*** Starting identify_all_segments on %s ***\n",plusp ? "plus" : "minus"));
- assert(*anchor_segments == NULL);
if (floors == NULL) {
*nsegments = 0;
- *anchor_segments = (List_T) NULL;
+ *anchor_segments = (Segment_T *) NULL;
+ *nanchors = 0;
*spliceable = (Segment_T *) NULL;
*nspliceable = 0;
return (struct Segment_T *) NULL;
@@ -3803,7 +4193,8 @@ identify_all_segments (int *nsegments, List_T *anchor_segments, Segment_T **spli
Batch_init(batch,querypos,/*diagterm*/querylength - querypos,positions_high[querypos],positions_low[querypos],
npositions[querypos],querylength);
#else
- Batch_init(batch,querypos,/*diagterm*/querylength - querypos,positions[querypos],npositions[querypos],querylength);
+ Batch_init(batch,querypos,/*diagterm*/querylength - querypos,positions[querypos],
+ npositions[querypos],querylength);
#endif
total_npositions += npositions[querypos];
if (batch->npositions > 0) {
@@ -3828,7 +4219,8 @@ identify_all_segments (int *nsegments, List_T *anchor_segments, Segment_T **spli
Batch_init(batch,querypos,/*diagterm*/querypos + index1part,positions_high[querypos],positions_low[querypos],
npositions[querypos],querylength);
#else
- Batch_init(batch,querypos,/*diagterm*/querypos + index1part,positions[querypos],npositions[querypos],querylength);
+ Batch_init(batch,querypos,/*diagterm*/querypos + index1part,positions[querypos],
+ npositions[querypos],querylength);
#endif
total_npositions += npositions[querypos];
if (batch->npositions > 0) {
@@ -3859,6 +4251,10 @@ identify_all_segments (int *nsegments, List_T *anchor_segments, Segment_T **spli
/* Putting chr marker "segments" after each chromosome */
segments = (struct Segment_T *) MALLOC((total_npositions + nchromosomes) * sizeof(struct Segment_T));
ptr_chrstart = ptr = &(segments[0]);
+ all_segments = (Segment_T *) MALLOC(total_npositions * sizeof(Segment_T));
+ ptr_all = &(all_segments[0]);
+ *anchor_segments = (Segment_T *) MALLOC(total_npositions * sizeof(Segment_T));
+ ptr_anchor = &((*anchor_segments)[0]);
if (overall_max_distance == 0) {
ptr_spliceable = *spliceable = (Segment_T *) NULL;
} else {
@@ -3966,31 +4362,923 @@ identify_all_segments (int *nsegments, List_T *anchor_segments, Segment_T **spli
}
}
#else
- diagonal = batch->diagonal;
- smallesti = ((heap[3]->diagonal < heap[2]->diagonal) ||
- ((heap[3]->diagonal == heap[2]->diagonal) &&
- (heap[3]->querypos < heap[2]->querypos))) ? 3 : 2;
- /* Note that diagonal/querypos will never exceed a sentinel diagonal/querypos */
- while (diagonal > heap[smallesti]->diagonal ||
- (diagonal == heap[smallesti]->diagonal &&
- querypos > heap[smallesti]->querypos)) {
- heap[parenti] = heap[smallesti];
- parenti = smallesti;
- smallesti = LEFT(parenti);
- righti = smallesti+1;
- if ((heap[righti]->diagonal < heap[smallesti]->diagonal) ||
- ((heap[righti]->diagonal == heap[smallesti]->diagonal) &&
- (heap[righti]->querypos < heap[smallesti]->querypos))) {
- smallesti = righti;
+ diagonal = batch->diagonal;
+ smallesti = ((heap[3]->diagonal < heap[2]->diagonal) ||
+ ((heap[3]->diagonal == heap[2]->diagonal) &&
+ (heap[3]->querypos < heap[2]->querypos))) ? 3 : 2;
+ /* Note that diagonal/querypos will never exceed a sentinel diagonal/querypos */
+ while (diagonal > heap[smallesti]->diagonal ||
+ (diagonal == heap[smallesti]->diagonal &&
+ querypos > heap[smallesti]->querypos)) {
+ heap[parenti] = heap[smallesti];
+ parenti = smallesti;
+ smallesti = LEFT(parenti);
+ righti = smallesti+1;
+ if ((heap[righti]->diagonal < heap[smallesti]->diagonal) ||
+ ((heap[righti]->diagonal == heap[smallesti]->diagonal) &&
+ (heap[righti]->querypos < heap[smallesti]->querypos))) {
+ smallesti = righti;
+ }
+ }
+#endif
+ heap[parenti] = batch;
+
+
+ /* Continue after initialization */
+ while (heapsize > 0) {
+ batch = heap[1];
+ querypos = batch->querypos;
+ diagonal = batch->diagonal;
+ debug14(printf("diagonal = %u, querypos = %d\n",last_diagonal,last_querypos));
+
+ if (diagonal == last_diagonal) {
+ /* Continuing exact match or substitution */
+ floor_incr = floors->scorefrom[last_querypos][querypos];
+ floor += floor_incr;
+ floor_xfirst += floor_incr;
+ floor_xlast += floor_incr;
+
+#ifdef OLD_FLOOR_ENDS
+ /* Why is this here? Just set floor_left at start and floor_right at end. */
+ if (querypos < halfquery_lastpos) {
+ floor_left += floor_incr;
+ } else if (last_querypos < halfquery_lastpos) {
+ /* Finish floor_left */
+ floor_left += floors->scorefrom[last_querypos][halfquery_lastpos+index1interval];
+ }
+ if (querypos >= halfquerylength) {
+ if (last_querypos < halfquerylength) {
+ /* Start floor_right */
+ floor_right = floors->scorefrom[halfquerylength-index1interval][querypos];
+ } else {
+ floor_right += floor_incr;
+ }
+ }
+#endif
+
+ debug1(printf("diagonal %llu unchanged: last_querypos = %d, querypos = %d => floor increments by %d\n",
+ (unsigned long long) diagonal,last_querypos,querypos,floor_incr));
+ debug1(printf("*multiple_mm_%s, diagonal %llu, querypos %d, floor %d, floor_xfirst %d, floor_xlast %d, floor_left %d, floor_right %d\n",
+ plusp ? "plus" : "minus",(unsigned long long) diagonal,querypos,
+ floor,floor_xfirst,floor_xlast,floor_left,floor_right));
+
+ } else {
+ /* End of diagonal */
+ floor_incr = floors_to_pos3[last_querypos] /* floors->score[last_querypos][query_lastpos+index1interval] */;
+ floor += floor_incr;
+ floor_xfirst += floor_incr;
+ floor_xlast += floors_to_xlast[last_querypos]; /* floors->score[last_querypos][xlast_to]; */
+
+#ifdef OLD_FLOOR_ENDS
+ if (last_querypos < halfquery_lastpos) {
+ floor_left += floors->scorefrom[last_querypos][halfquery_lastpos+index1interval];
+ floor_right = floors->scorefrom[halfquerylength-index1interval][query_lastpos+index1interval];
+ }
+ if (last_querypos >= halfquerylength) {
+ floor_right += floor_incr;
+ }
+#else
+ floor_right = floor_incr;
+#endif
+
+ debug1(printf("new diagonal %llu > last diagonal %llu: last_querypos = %d => final values: floor %d, floor_xfirst %d, floor_xlast %d, floor_left %d, floor_right %d\n",
+ (unsigned long long) diagonal,(unsigned long long) last_diagonal,last_querypos,
+ floor,floor_xfirst,floor_xlast,floor_left,floor_right));
+
+ if (last_diagonal > chrhigh) {
+ if (ptr > ptr_chrstart) {
+ /* Add chr marker segment */
+ debug14(printf("=== ptr %p > ptr_chrstart %p, so adding chr marker segment\n",ptr,ptr_chrstart));
+ ptr->diagonal = (Univcoord_T) -1;
+ ptr_chrstart = ++ptr;
+ }
+
+ /* update chromosome bounds, based on low end */
+#ifdef SLOW_CHR_UPDATE
+ chrnum = Univ_IIT_get_one(chromosome_iit,last_diagonal-querylength,last_diagonal-querylength);
+ Univ_IIT_interval_bounds(&chroffset,&chrhigh,&chrlength,chromosome_iit,chrnum,circular_typeint);
+ /* chrhigh += 1; */
+#else
+ j = 1;
+#ifdef NO_EXTENSIONS_BEFORE_ZERO
+ goal = last_diagonal - querylength + 1;
+#else
+ goal = last_diagonal + 1;
+#endif
+ while (j < nchromosomes_local && chrhighs_local[j] < goal) {
+ j <<= 1; /* gallop by 2 */
+ }
+ if (j >= nchromosomes_local) {
+ j = binary_search(j >> 1,nchromosomes_local,chrhighs_local,goal);
+ } else {
+ j = binary_search(j >> 1,j,chrhighs_local,goal);
+ }
+ chrnum += j;
+#ifdef DEBUG15
+ if (chrnum != Univ_IIT_get_one(chromosome_iit,last_diagonal-querylength,last_diagonal-querylength)) {
+ fprintf(stderr,"Got chrnum %d, but wanted %d\n",
+ chrnum,Univ_IIT_get_one(chromosome_iit,last_diagonal-querylength,last_diagonal-querylength));
+ abort();
+ }
+#endif
+ chroffset = chroffsets[chrnum-1];
+ chrhigh = chrhighs[chrnum-1];
+ chrlength = chrlengths[chrnum-1];
+ chrhighs_local += j;
+ nchromosomes_local -= j;
+#endif
+ }
+ if (last_diagonal <= chrhigh) { /* FORMULA for high position */
+ /* position of high end is within current chromosome */
+ debug1(printf(" => multiple_mm, diagonal %llu, query %d..%d, chrbounds %llu..%llu, floor %d, floor_xfirst %d, floor_xlast %d, floor_left %d, floor_right %d\n",
+ (unsigned long long) last_diagonal,first_querypos,last_querypos,
+ (unsigned long long) chroffset,(unsigned long long) chrhigh,
+ floor,floor_xfirst,floor_xlast,floor_left,floor_right));
+
+ /* Save segment, but first advance splicesites past segment_left */
+ segment_left = last_diagonal - querylength;
+ max_distance = overall_max_distance;
+ if (splicesites_local[0] >= last_diagonal) {
+ ptr->splicesites_i = -1;
+ } else if (Splicetrie_splicesite_p(segment_left,/*pos5*/1,/*pos3*/querylength) == false) {
+ ptr->splicesites_i = -1;
+ } else {
+ if (splicesites_local[0] < segment_left) {
+ j = 1;
+ while (j < nsplicesites_local && splicesites_local[j] < segment_left) {
+ j <<= 1; /* gallop by 2 */
+ }
+ if (j >= nsplicesites_local) {
+ j = binary_search(j >> 1,nsplicesites_local,splicesites_local,segment_left);
+ } else {
+ j = binary_search(j >> 1,j,splicesites_local,segment_left);
+ }
+ joffset += j;
+ splicesites_local += j;
+ nsplicesites_local -= j;
+ }
+
+ if (splicesites_local[0] >= last_diagonal) {
+ ptr->splicesites_i = -1;
+ } else {
+ ptr->splicesites_i = joffset;
+ j = joffset;
+ while (j < nsplicesites && splicesites[j] < last_diagonal) {
+ if (splicedists[j] > max_distance) {
+ max_distance = splicedists[j];
+ }
+ j++;
+ }
+ }
+ }
+
+ /* Save segment */
+ ptr->diagonal = last_diagonal;
+ ptr->chrnum = chrnum;
+ ptr->chroffset = chroffset;
+ ptr->chrhigh = chrhigh;
+ ptr->chrlength = chrlength;
+ ptr->querypos5 = first_querypos;
+ ptr->querypos3 = last_querypos;
+
+ /* FORMULA */
+ if (plusp) {
+ ptr->lowpos = ptr->diagonal - querylength + ptr->querypos5;
+ ptr->highpos = ptr->diagonal - querylength + ptr->querypos3 + index1part;
+ } else {
+ ptr->lowpos = ptr->diagonal - ptr->querypos3 - index1part - index1part;
+ ptr->highpos = ptr->diagonal - ptr->querypos5 - index1part;
+ }
+
+ ptr->floor = floor;
+ ptr->floor_xfirst = floor_xfirst;
+ ptr->floor_xlast = floor_xlast;
+ ptr->floor_left = floor_left;
+ ptr->floor_right = floor_right;
+ ptr->leftmost = ptr->rightmost = -1;
+ ptr->left_splice_p = ptr->right_splice_p = false;
+#if 0
+ ptr->leftspan = ptr->rightspan = -1;
+#endif
+ ptr->usedp = false;
+ ptr->pairablep = false;
+
+#if 0
+ /* Not doing this, because the max_distance test is already good enough */
+ if (plusp) {
+ /* For plus-strand splicing, require segmenti->querypos3 < segmentj->querypos5,
+ so if segmenti->querypos3 is too high, then it is not spliceable */
+ if (last_querypos > query_lastpos) {
+ /* Not spliceable */
+ } else if (diagonal <= last_diagonal + max_distance) {
+ *ptr_spliceable++ = ptr;
+ }
+ } else {
+ /* For minus-strand splicing, require segmenti->querypos5 > segmentj->querypos3,
+ so if segmenti->querypos5 is too low, then it is not spliceable */
+ if (first_querypos < index1part) {
+ /* Not spliceable */
+ } else if (diagonal <= last_diagonal + max_distance) {
+ *ptr_spliceable++ = ptr;
+ }
+ }
+#endif
+ if (diagonal <= last_diagonal + max_distance) {
+ *ptr_spliceable++ = ptr;
+ debug4s(printf("%s diagonal %u is spliceable because next one is at %u\n",
+ plusp ? "plus" : "minus",last_diagonal,diagonal));
+ } else {
+ debug4s(printf("%s diagonal %u is not spliceable because next one is at %u\n",
+ plusp ? "plus" : "minus",last_diagonal,diagonal));
+ }
+ debug14(printf("Saving segment at %u (%u), query %d..%d",last_diagonal,last_diagonal-chroffset,ptr->querypos5,ptr->querypos3));
+ *ptr_all++ = ptr;
+ if (last_querypos >= first_querypos + /*min_segment_length*/1) {
+
+ *ptr_anchor++ = ptr;
+ debug14(printf(" ANCHOR"));
+ }
+ debug14(printf("\n"));
+ ptr++;
+ }
+
+ /* Prepare next diagonal */
+ first_querypos = querypos;
+ last_diagonal = diagonal;
+ floor_incr = floors_from_neg3[first_querypos] /* floors->score[-index1interval][first_querypos] */;
+ floor = floor_incr;
+ floor_xlast = floor_incr;
+ floor_xfirst = floors_from_xfirst[first_querypos]; /* floors->score[xfirst_from][first_querypos]; */
+
+#ifdef OLD_FLOOR_ENDS
+ if (querypos < halfquery_lastpos) {
+ floor_left = floor_incr;
+ } else {
+ floor_left = floors->scorefrom[-index1interval][halfquery_lastpos];
+ }
+ if (querypos < halfquerylength) {
+ floor_right = floors->scorefrom[halfquerylength-index1interval][query_lastpos];
+ } else {
+ floor_right = floors->scorefrom[halfquerylength-index1interval][first_querypos];
+ }
+#else
+ floor_left = floor_incr;
+#ifdef DEBUG1
+ floor_right = -99; /* For debugging output */
+#endif
+#endif
+
+ debug1(printf("*multiple_mm_%s, diagonal %llu, querypos %d\n",
+ plusp ? "plus" : "minus",(unsigned long long) diagonal,querypos));
+ debug1(printf("start of diagonal %llu, first_querypos = %d => initial values: floor %d, floor_xfirst %d, floor_xlast %d, floor_left %d, floor_right %d\n",
+ (unsigned long long) diagonal,first_querypos,
+ floor,floor_xfirst,floor_xlast,floor_left,floor_right));
+
+ }
+ last_querypos = querypos;
+
+
+ if (--batch->npositions <= 0) {
+ /* Use last entry in heap for insertion */
+ batch = heap[heapsize];
+ querypos = batch->querypos;
+ heap[heapsize--] = sentinel;
+
+ } else {
+ /* Use this batch for insertion (same querypos) */
+#ifdef LARGE_GENOMES
+ batch->diagonal = ((Univcoord_T) *(++batch->positions_high) << 32) + *(++batch->positions_low) + batch->diagterm;
+#elif defined(WORDS_BIGENDIAN)
+ batch->diagonal = Bigendian_convert_univcoord(*(++batch->positions)) + batch->diagterm;
+#else
+ batch->diagonal = *(++batch->positions) + batch->diagterm;
+#endif
+#ifdef DIAGONAL_ADD_QUERYPOS
+ batch->diagonal_add_querypos = (UINT8) batch->diagonal;
+ batch->diagonal_add_querypos <<= 32;
+ batch->diagonal_add_querypos |= querypos /* Previously added 2 because querypos was -2: + 2*/;
+#endif
+ }
+
+ /* heapify */
+ parenti = 1;
+#ifdef DIAGONAL_ADD_QUERYPOS
+ diagonal_add_querypos = batch->diagonal_add_querypos;
+ smallesti = (heap[3]->diagonal_add_querypos < heap[2]->diagonal_add_querypos) ? 3 : 2;
+ while (diagonal_add_querypos > heap[smallesti]->diagonal_add_querypos) {
+ heap[parenti] = heap[smallesti];
+ parenti = smallesti;
+ smallesti = LEFT(parenti);
+ righti = smallesti+1;
+ if (heap[righti]->diagonal_add_querypos < heap[smallesti]->diagonal_add_querypos) {
+ smallesti = righti;
+ }
+ }
+#else
+ diagonal = batch->diagonal;
+ smallesti = ((heap[3]->diagonal < heap[2]->diagonal) ||
+ ((heap[3]->diagonal == heap[2]->diagonal) &&
+ (heap[3]->querypos < heap[2]->querypos))) ? 3 : 2;
+ /* Note that diagonal/querypos will never exceed a sentinel diagonal/querypos */
+ while (diagonal > heap[smallesti]->diagonal ||
+ (diagonal == heap[smallesti]->diagonal &&
+ querypos > heap[smallesti]->querypos)) {
+ heap[parenti] = heap[smallesti];
+ parenti = smallesti;
+ smallesti = LEFT(parenti);
+ righti = smallesti+1;
+ if ((heap[righti]->diagonal < heap[smallesti]->diagonal) ||
+ ((heap[righti]->diagonal == heap[smallesti]->diagonal) &&
+ (heap[righti]->querypos < heap[smallesti]->querypos))) {
+ smallesti = righti;
+ }
+ }
+#endif
+ heap[parenti] = batch;
+ }
+ debug14(printf("diagonal = %u, querypos = %d\n",last_diagonal,last_querypos));
+ debug14(printf("\n"));
+
+ /* Terminate loop. */
+ floor_incr = floors_to_pos3[last_querypos]; /* floors->score[last_querypos][query_lastpos+index1interval]; */
+ floor += floor_incr;
+ floor_xfirst += floor_incr;
+ floor_xlast += floors_to_xlast[last_querypos]; /* floors->score[last_querypos][xlast_to]; */
+
+#ifdef OLD_FLOOR_ENDS
+ if (last_querypos < halfquery_lastpos) {
+ floor_left += floors->scorefrom[last_querypos][halfquery_lastpos+index1interval];
+ floor_right = floors->scorefrom[halfquerylength-index1interval][query_lastpos+index1interval];
+ }
+ if (last_querypos >= halfquerylength) {
+ floor_right += floor_incr;
+ }
+#else
+ floor_right = floor_incr;
+#endif
+
+ debug1(printf("no more diagonals: last_querypos = %d => terminal values: floor %d, floor_xfirst %d, floor_xlast %d, floor_left %d, floor_right %d\n",
+ last_querypos,floor,floor_xfirst,floor_xlast,floor_left,floor_right));
+
+ debug1(printf("last_diagonal %u vs chrhigh %u (looking for >)\n",last_diagonal,chrhigh));
+ if (last_diagonal > chrhigh) {
+ if (ptr > ptr_chrstart) {
+ /* Add chr marker segment */
+ debug14(printf("=== ptr %p > ptr_chrstart %p, so adding chr marker segment\n",ptr,ptr_chrstart));
+ ptr->diagonal = (Univcoord_T) -1;
+ ptr_chrstart = ++ptr;
+ }
+
+ /* update chromosome bounds, based on low end */
+#ifdef SLOW_CHR_UPDATE
+ chrnum = Univ_IIT_get_one(chromosome_iit,last_diagonal-querylength,last_diagonal-querylength);
+ Univ_IIT_interval_bounds(&chroffset,&chrhigh,&chrlength,chromosome_iit,chrnum,circular_typeint);
+ /* chrhigh += 1; */
+#else
+ j = 1;
+#ifdef NO_EXTENSIONS_BEFORE_ZERO
+ goal = last_diagonal - querylength + 1;
+#else
+ goal = last_diagonal + 1;
+#endif
+ while (j < nchromosomes_local && chrhighs_local[j] < goal) {
+ j <<= 1; /* gallop by 2 */
+ }
+ if (j >= nchromosomes_local) {
+ j = binary_search(j >> 1,nchromosomes_local,chrhighs_local,goal);
+ } else {
+ j = binary_search(j >> 1,j,chrhighs_local,goal);
+ }
+ chrnum += j;
+#ifdef DEBUG15
+ if (chrnum != Univ_IIT_get_one(chromosome_iit,last_diagonal-querylength,last_diagonal-querylength)) {
+ fprintf(stderr,"Got chrnum %d, but wanted %d\n",
+ chrnum,Univ_IIT_get_one(chromosome_iit,last_diagonal-querylength,last_diagonal-querylength));
+ abort();
+ }
+#endif
+ chroffset = chroffsets[chrnum-1];
+ chrhigh = chrhighs[chrnum-1];
+ chrlength = chrlengths[chrnum-1];
+ chrhighs_local += j;
+ nchromosomes_local -= j;
+#endif
+ }
+
+ debug1(printf("last_diagonal %u vs chrhigh %u (looking for <=)\n",last_diagonal,chrhigh));
+ if (last_diagonal <= chrhigh) { /* FORMULA for high position */
+ /* position of high end is within current chromosome */
+ debug1(printf(" => multiple_mm, diagonal %llu, query %d..%d, chrbounds %llu..%llu, floor %d, floor_xfirst %d, floor_xlast %d, floor_left %d, floor_right %d\n",
+ (unsigned long long) last_diagonal,first_querypos,last_querypos,
+ (unsigned long long) chroffset,(unsigned long long) chrhigh,
+ floor,floor_xfirst,floor_xlast,floor_left,floor_right));
+
+ /* Save segment, but first advance splicesites past segment_left */
+ segment_left = last_diagonal - querylength;
+#if 0
+ /* Last segment is not spliceable */
+ max_distance = overall_max_distance;
+#endif
+ if (splicesites_local[0] >= last_diagonal) {
+ ptr->splicesites_i = -1;
+ } else if (Splicetrie_splicesite_p(segment_left,/*pos5*/1,/*pos3*/querylength) == false) {
+ ptr->splicesites_i = -1;
+ } else {
+ if (splicesites_local[0] < segment_left) {
+ j = 1;
+ while (j < nsplicesites_local && splicesites_local[j] < segment_left) {
+ j <<= 1; /* gallop by 2 */
+ }
+ if (j >= nsplicesites_local) {
+ j = binary_search(j >> 1,nsplicesites_local,splicesites_local,segment_left);
+ } else {
+ j = binary_search(j >> 1,j,splicesites_local,segment_left);
+ }
+ joffset += j;
+ splicesites_local += j;
+ nsplicesites_local -= j;
+ }
+
+ if (splicesites_local[0] >= last_diagonal) {
+ ptr->splicesites_i = -1;
+ } else {
+ ptr->splicesites_i = joffset;
+#if 0
+ /* Last segment is not spliceable */
+ if (splicedists[joffset] > overall_max_distance) {
+ max_distance = splicedists[joffset];
+ }
+#endif
+ }
+ }
+
+ /* Save segment */
+ ptr->diagonal = last_diagonal;
+ ptr->chrnum = chrnum;
+ ptr->chroffset = chroffset;
+ ptr->chrhigh = chrhigh;
+ ptr->chrlength = chrlength;
+ ptr->querypos5 = first_querypos;
+ ptr->querypos3 = last_querypos;
+
+ /* FORMULA */
+ if (plusp) {
+ ptr->lowpos = ptr->diagonal - querylength + ptr->querypos5;
+ ptr->highpos = ptr->diagonal - querylength + ptr->querypos3 + index1part;
+ } else {
+ ptr->lowpos = ptr->diagonal - ptr->querypos3 - index1part - index1part;
+ ptr->highpos = ptr->diagonal - ptr->querypos5 - index1part;
+ }
+
+ ptr->floor = floor;
+ ptr->floor_xfirst = floor_xfirst;
+ ptr->floor_xlast = floor_xlast;
+ ptr->floor_left = floor_left;
+ ptr->floor_right = floor_right;
+ ptr->leftmost = ptr->rightmost = -1;
+ ptr->left_splice_p = ptr->right_splice_p = false;
+#if 0
+ ptr->leftspan = ptr->rightspan = -1;
+#endif
+ ptr->usedp = false;
+ ptr->pairablep = false;
+
+ /* Last segment is not spliceable */
+ debug14(printf("Saving segment at %u (%u), query %d..%d",last_diagonal,last_diagonal - chroffset,ptr->querypos5,ptr->querypos3));
+ *ptr_all++ = ptr;
+ if (last_querypos >= first_querypos + /*min_segment_length*/1) {
+ debug14(printf(" ANCHOR"));
+ *ptr_anchor++ = ptr;
+ }
+ debug14(printf("\n"));
+ ptr++;
+ }
+
+
+ if (ptr > ptr_chrstart) {
+ /* Final chr marker segment */
+ debug14(printf("=== ptr %p > ptr_chrstart %p, so adding final chr marker segment\n",ptr,ptr_chrstart));
+ ptr->diagonal = (Univcoord_T) -1;
+ /* ptr_chrstart = */ ++ptr;
+ }
+
+#ifdef DEBUG19
+ for (k = 0, ptr0 = segments; ptr0 < ptr; k++, ptr0++) {
+ printf("%d %llu\n",k,(unsigned long long) ptr0->diagonal);
+ }
+ printf("total_npositions = %d, nchromosomes = %d\n",total_npositions,nchromosomes);
+#endif
+
+ FREEA(heap);
+ FREEA(batchpool);
+
+ /* Note: segments is in descending diagonal order. Will need to
+ reverse before solving middle deletions */
+
+ *nsegments = ptr - segments;
+ *nanchors = ptr_anchor - *anchor_segments;
+ *nspliceable = ptr_spliceable - *spliceable;
+ debug(printf("nsegments = %d, of which %d are spliceable (total_npositions = %d, nchromosomes = %d)\n",
+ *nsegments,*nspliceable,total_npositions,nchromosomes));
+ debug1(printf("nsegments = %d, of which %d are spliceable (total_npositions = %d, nchromosomes = %d)\n",
+ *nsegments,*nspliceable,total_npositions,nchromosomes));
+
+ assert(*nsegments <= total_npositions + nchromosomes);
+ assert(*nanchors <= total_npositions);
+ assert(*nspliceable <= total_npositions);
+
+ n_all_segments = ptr_all - all_segments;
+ debug(printf("%d all segments\n",n_all_segments));
+ debug(printf("%d anchor segments\n",*nanchors));
+
+ if (n_all_segments <= max_anchors) {
+ /* Might as well use all segments */
+ FREE(*anchor_segments);
+ *anchor_segments = all_segments;
+ *nanchors = n_all_segments;
+
+ } else if (*nanchors <= max_anchors) {
+ /* Use only the good anchor segments */
+ FREE(all_segments);
+
+ } else {
+ /* Need to limit anchor segments */
+ FREE(all_segments);
+
+ /* Treat each mod separately */
+ qsort(*anchor_segments,*nanchors,sizeof(Segment_T),Segment_mod_length_cmp);
+
+ mod = 0;
+ i = 0;
+ while (mod < index1interval) {
+ j = i;
+ while (j < *nanchors && (*anchor_segments)[j]->querypos5 % index1interval == mod) {
+ j++;
+ }
+ nanchors_bymod[mod] = j - i;
+
+ if (j - i <= max_anchors) {
+ naccept_bymod[mod] = j - i;
+ } else {
+ k = i + max_anchors;
+ length_threshold = (*anchor_segments)[k]->querypos3 - (*anchor_segments)[k]->querypos5;
+ while (k < j && k < i + max_anchors + /*ties*/100 &&
+ (*anchor_segments)[k]->querypos3 - (*anchor_segments)[k]->querypos5 == length_threshold) {
+ k++;
+ }
+ naccept_bymod[mod] = k - i;
+ }
+
+ debug(printf("For mod %d, accepting %d out of %d anchor segments with length threshold %d\n",
+ mod,naccept_bymod[mod],nanchors_bymod[mod],length_threshold));
+ i = j;
+ mod++;
+ }
+
+ /* Move good anchors to start of array */
+ dest = src = &((*anchor_segments)[0]);
+ *nanchors = 0;
+ for (mod = 0; mod < index1interval; mod++) {
+ memmove((void *) dest,(void *) src,naccept_bymod[mod] * sizeof(Segment_T));
+ dest += naccept_bymod[mod];
+ src += nanchors_bymod[mod];
+ *nanchors += naccept_bymod[mod];
+ }
+
+ /* Re-sort in diagonal order */
+ qsort(*anchor_segments,*nanchors,sizeof(Segment_T),Segment_diagonal_cmp);
+ }
+
+
+#ifdef DEBUG19
+ printf("%d total segments\n",*nsegments);
+ for (ptr0 = segments; ptr0 < ptr; ptr0++) {
+ printf("%u %d..%d\n",ptr0->diagonal,ptr0->querypos5,ptr0->querypos3);
+ }
+#endif
+
+#ifdef DEBUG
+ printf("%d selected anchor segments\n",*nanchors);
+ for (p = &(*anchor_segments)[0]; p< &((*anchor_segments)[*nanchors]); p++) {
+ segment = (Segment_T) *p;
+ printf("%u %d..%d\n",segment->diagonal,segment->querypos5,segment->querypos3);
+ }
+#endif
+
+ return segments;
+}
+#endif
+
+
+
+#ifndef USE_HEAP
+/* Uses a loser tree */
+/* TODO: Change spliceable to be an attribute of the segment. Then we
+ can loop over anchor_segments only */
+static struct Segment_T *
+identify_all_segments (int *nsegments, Segment_T **anchor_segments, int *nanchors,
+ Segment_T **spliceable, int *nspliceable,
+#ifdef LARGE_GENOMES
+ unsigned char **positions_high, UINT4 **positions_low,
+#else
+ Univcoord_T **positions,
+#endif
+ int *npositions, bool *omitted, int querylength, int query_lastpos,
+ Floors_T floors, bool plusp) {
+ struct Segment_T *segments = NULL;
+ Segment_T *all_segments, *ptr_all, *ptr_anchor, *dest, *src;
+ int length_threshold;
+ int n_all_segments, n;
+ int nanchors_bymod[MAX_INDEX1INTERVAL], naccept_bymod[MAX_INDEX1INTERVAL];
+ int mod;
+ int k;
+
+ struct Batch_T *batchpool;
+ Batch_T *losers, current;
+ Batch_T batch;
+ int heapsize;
+ int parenti, elti, i;
+ int querypos, first_querypos, last_querypos;
+ int floor_left, floor_right, floor_incr;
+ int floor, floor_xfirst, floor_xlast, *floors_from_xfirst, *floors_to_xlast;
+ int *floors_from_neg3, *floors_to_pos3;
+ /* int exclude_xfirst, exclude_xlast; */
+ Univcoord_T diagonal, segment_left, last_diagonal, chroffset = 0U, chrhigh = 0U;
+ Chrpos_T chrlength, max_distance;
+ Chrnum_T chrnum = 1;
+#ifdef OLD_FLOOR_ENDS
+ int halfquerylength, halfquery_lastpos;
+#endif
+
+#ifdef DIAGONAL_ADD_QUERYPOS
+ UINT8 winner_diagonal_add_querypos, current_diagonal_add_querypos;
+#else
+ Univcoord_T winner_diagonal, current_diagonal;
+ int winner_querypos, current_querypos;
+#endif
+ int total_npositions = 0;
+ int joffset = 0, j;
+
+#ifdef DEBUG
+ Segment_T segment, *p;
+#endif
+
+ Segment_T ptr, ptr_chrstart;
+ Segment_T *ptr_spliceable;
+ /* bool next_spliceable_p; */
+#ifdef DEBUG19
+ Segment_T ptr0;
+#endif
+#ifndef SLOW_CHR_UPDATE
+ Univcoord_T goal;
+ int nchromosomes_local = nchromosomes;
+ Univcoord_T *chrhighs_local = chrhighs;
+#endif
+
+ Univcoord_T *splicesites_local, splicesites_static[1];
+ int nsplicesites_local;
+
+ debug(printf("*** Starting identify_all_segments on %s ***\n",plusp ? "plus" : "minus"));
+
+ if (floors == NULL) {
+ *nsegments = 0;
+ *anchor_segments = (Segment_T *) NULL;
+ *nanchors = 0;
+ *spliceable = (Segment_T *) NULL;
+ *nspliceable = 0;
+ return (struct Segment_T *) NULL;
+ }
+
+ if (splicesites == NULL) {
+ splicesites_local = splicesites_static;
+ splicesites_local[0] = (Univcoord_T) -1;
+ nsplicesites_local = 0;
+ } else {
+ splicesites_local = splicesites;
+ nsplicesites_local = nsplicesites;
+ }
+
+#ifdef OLD_FLOOR_ENDS
+ halfquerylength = querylength / 2;
+ halfquery_lastpos = halfquerylength - index1part;
+#endif
+
+ heapsize = 0;
+ for (querypos = 0, i = 0; querypos <= query_lastpos; querypos++) {
+ if (omitted[querypos] == true) {
+ /* Skip */
+ } else if (npositions[querypos] > 0) {
+ heapsize++;
+ } else {
+ /* Skip */
+ }
+ }
+ if (heapsize == 0) {
+ *nsegments = 0;
+ return (struct Segment_T *) NULL;
+ } else {
+ /* Set up batches */
+ batchpool = (struct Batch_T *) MALLOCA(heapsize * sizeof(struct Batch_T));
+ losers = (Batch_T *) MALLOCA((2*heapsize+1) * sizeof(Batch_T));
+ }
+
+ /* Don't add entries for compoundpos positions (skip querypos -2, -1, lastpos+1, lastpos+2) */
+ if (plusp) {
+ elti = 0;
+ for (querypos = 0, i = 0; querypos <= query_lastpos; querypos++) {
+ if (omitted[querypos] == true) {
+ debug1(printf("Not adding batch for querypos %d with %d positions, omitted %d\n",
+ querypos,npositions[querypos],omitted[querypos]));
+ } else if (npositions[querypos] > 0) {
+ debug1(printf("Adding batch for querypos %d with %d positions, omitted %d\n",
+ querypos,npositions[querypos],omitted[querypos]));
+ batch = &(batchpool[elti]);
+#ifdef LARGE_GENOMES
+ Batch_init(batch,querypos,/*diagterm*/querylength - querypos,positions_high[querypos],positions_low[querypos],
+ npositions[querypos],querylength,/*nodei*/heapsize + elti);
+#else
+ Batch_init(batch,querypos,/*diagterm*/querylength - querypos,positions[querypos],
+ npositions[querypos],querylength,/*nodei*/heapsize + elti);
+#endif
+ losers[heapsize + elti++] = batch;
+ total_npositions += npositions[querypos];
+ } else {
+ debug1(printf("Not adding batch for querypos %d with %d positions, omitted %d\n",
+ querypos,npositions[querypos],omitted[querypos]));
+ }
+ }
+ } else {
+ elti = 0;
+ for (querypos = 0, i = 0; querypos <= query_lastpos; querypos++) {
+ if (omitted[querypos] == true) {
+ debug1(printf("Not adding batch for querypos %d with %d positions, omitted %d\n",
+ querypos,npositions[querypos],omitted[querypos]));
+ } else if (npositions[querypos] > 0) {
+ debug1(printf("Adding batch for querypos %d with %d positions, omitted %d\n",
+ querypos,npositions[querypos],omitted[querypos]));
+ batch = &(batchpool[elti]);
+#ifdef LARGE_GENOMES
+ Batch_init(batch,querypos,/*diagterm*/querypos + index1part,positions_high[querypos],positions_low[querypos],
+ npositions[querypos],querylength,/*nodei*/heapsize + elti);
+#else
+ Batch_init(batch,querypos,/*diagterm*/querypos + index1part,positions[querypos],
+ npositions[querypos],querylength,/*nodei*/heapsize + elti);
+#endif
+ losers[heapsize + elti++] = batch;
+ total_npositions += npositions[querypos];
+ } else {
+ debug1(printf("Not adding batch for querypos %d with %d positions, omitted %d\n",
+ querypos,npositions[querypos],omitted[querypos]));
+ }
+ }
+ }
+ debug14(printf("Initial total_npositions = %d\n",total_npositions));
+
+ init_tree(losers,heapsize);
+
+
+ /* Putting chr marker "segments" after each chromosome */
+ segments = (struct Segment_T *) MALLOC((total_npositions + nchromosomes) * sizeof(struct Segment_T));
+ ptr_chrstart = ptr = &(segments[0]);
+ all_segments = (Segment_T *) MALLOC(total_npositions * sizeof(Segment_T));
+ ptr_all = &(all_segments[0]);
+ *anchor_segments = (Segment_T *) MALLOC(total_npositions * sizeof(Segment_T));
+ ptr_anchor = &((*anchor_segments)[0]);
+ if (overall_max_distance == 0) {
+ ptr_spliceable = *spliceable = (Segment_T *) NULL;
+ } else {
+ ptr_spliceable = *spliceable = (Segment_T *) CALLOC(total_npositions,sizeof(Segment_T));
+ }
+
+ /*
+ if ((exclude_xfirst = firstbound-2-index1part-max_end_insertions) < 3) {
+ exclude_xfirst = 3;
+ }
+ if ((exclude_xlast = lastbound+1+max_end_insertions) > query_lastpos-3) {
+ exclude_xlast = query_lastpos-3;
+ }
+ */
+
+#if 0
+ /* Should account for firstbound and lastbound */
+ floors_from_xfirst = floors->scorefrom[/* xfirst_from = */ firstbound-index1interval+max_end_insertions];
+ floors_to_xlast = floors->scoreto[/* xlast_to = */ lastbound+1+index1interval-index1part-max_end_insertions];
+#else
+ /* This was previously run in identify_all_segments and not in identify_all_segments_for_terminals */
+ if (spansize /* +max_end_insertions */ > query_lastpos + index1interval) {
+ floors_from_xfirst = floors->scorefrom[query_lastpos+index1interval];
+ } else {
+ floors_from_xfirst = floors->scorefrom[spansize /* +max_end_insertions */];
+ }
+ if (query_lastpos-spansize /* -max_end_insertions */ < -index1interval) {
+ floors_to_xlast = floors->scoreto[-index1interval];
+ } else {
+ floors_to_xlast = floors->scoreto[query_lastpos-spansize /* -max_end_insertions */];
+ }
+#endif
+ floors_from_neg3 = floors->scorefrom[-index1interval];
+ floors_to_pos3 = floors->scoreto[query_lastpos+index1interval];
+
+
+ /* Initialize loop */
+ batch = losers[0];
+ first_querypos = last_querypos = querypos = batch->querypos;
+ last_diagonal = diagonal = batch->diagonal;
+
+ floor_incr = floors_from_neg3[first_querypos];
+ floor = floor_incr;
+ floor_xlast = floor_incr;
+ floor_xfirst = floors_from_xfirst[first_querypos] /* floors->scorefrom[xfirst_from][first_querypos] */;
+
+#ifdef OLD_FLOOR_ENDS
+ if (querypos < halfquery_lastpos) {
+ floor_left = floor_incr;
+ } else {
+ floor_left = floors->scorefrom[-index1interval][halfquery_lastpos];
+ }
+ if (querypos < halfquerylength) {
+ floor_right = floors->scorefrom[halfquerylength-index1interval][query_lastpos];
+ } else {
+ floor_right = floors->scorefrom[halfquerylength-index1interval][first_querypos];
+ }
+#else
+ floor_left = floor_incr;
+#ifdef DEBUG1
+ floor_right = -99;
+#endif
+#endif
+
+
+ debug1(printf("multiple_mm_%s, diagonal %llu, querypos %d\n",
+ plusp ? "plus" : "minus",(unsigned long long) diagonal,querypos));
+ debug1(printf("first_querypos = %d => initial values: floor %d, floor_xfirst %d, floor_xlast %d, floor_left %d, floor_right %d\n",
+ first_querypos,floor,floor_xfirst,floor_xlast,floor_left,floor_right));
+
+ if (--batch->npositions <= 0) {
+#ifdef DIAGONAL_ADD_QUERYPOS
+ batch->diagonal_add_querypos = (UINT8) -1; /* infinity */
+ /* batch->diagonal_add_querypos <<= 32; */
+#else
+ batch->querypos = querylength; /* essentially infinity */
+ batch->diagonal = (Univcoord_T) -1; /* infinity */
+#endif
+
+ } else {
+ /* Use this batch for insertion (same querypos) */
+#ifdef LARGE_GENOMES
+ batch->diagonal = ((Univcoord_T) *(++batch->positions_high) << 32) + *(++batch->positions_low) + batch->diagterm;
+#elif defined(WORDS_BIGENDIAN)
+ batch->diagonal = Bigendian_convert_univcoord(*(++batch->positions)) + batch->diagterm;
+#else
+ batch->diagonal = *(++batch->positions) + batch->diagterm;
+#endif
+#ifdef DIAGONAL_ADD_QUERYPOS
+ batch->diagonal_add_querypos = (UINT8) batch->diagonal;
+ batch->diagonal_add_querypos <<= 32;
+ batch->diagonal_add_querypos |= querypos /* Previously added 2 because querypos was -2: + 2*/;
+#endif
+ }
+
+ /* Update tree */
+#ifdef DIAGONAL_ADD_QUERYPOS
+ winner_diagonal_add_querypos = batch->diagonal_add_querypos;
+ for (parenti = PARENT(batch->nodei); parenti > 0; parenti = PARENT(parenti)) {
+ current = losers[parenti];
+ current_diagonal_add_querypos = current->diagonal_add_querypos;
+ if (current_diagonal_add_querypos < winner_diagonal_add_querypos) {
+ losers[parenti] = batch;
+ batch = current;
+ winner_diagonal_add_querypos = current_diagonal_add_querypos;
+ }
+ }
+#else
+ winner_diagonal = batch->diagonal;
+ winner_querypos = batch->querypos;
+ for (parenti = PARENT(batch->nodei); parenti > 0; parenti = PARENT(parenti)) {
+ current = losers[parenti];
+ current_diagonal = current->diagonal;
+ current_querypos = current->querypos;
+ if (current_diagonal < winner_diagonal ||
+ (current_diagonal == winner_diagonal && current_querypos < winner_querypos)) {
+ losers[parenti] = batch;
+ batch = current;
+ winner_diagonal = current_diagonal;
+ winner_querypos = current_querypos;
}
}
#endif
- heap[parenti] = batch;
+ losers[0] = batch;
/* Continue after initialization */
- while (heapsize > 0) {
- batch = heap[1];
+ while (
+#ifdef DIAGONAL_ADD_QUERYPOS
+ (batch = losers[0])->diagonal_add_querypos != (UINT8) -1
+#else
+ (batch = losers[0])->diagonal != (Univcoord_T) -1
+#endif
+ ) {
+ /* batch = losers[0]; */
querypos = batch->querypos;
diagonal = batch->diagonal;
debug14(printf("diagonal = %u, querypos = %d\n",last_diagonal,last_querypos));
@@ -4196,9 +5484,10 @@ identify_all_segments (int *nsegments, List_T *anchor_segments, Segment_T **spli
plusp ? "plus" : "minus",last_diagonal,diagonal));
}
debug14(printf("Saving segment at %u, query %d..%d",last_diagonal,ptr->querypos5,ptr->querypos3));
- all_segments = List_push(all_segments,(void *) ptr);
+ *ptr_all++ = ptr;
if (last_querypos >= first_querypos + /*min_segment_length*/1) {
- *anchor_segments = List_push(*anchor_segments,(void *) ptr);
+
+ *ptr_anchor++ = ptr;
debug14(printf(" ANCHOR"));
}
debug14(printf("\n"));
@@ -4242,15 +5531,18 @@ identify_all_segments (int *nsegments, List_T *anchor_segments, Segment_T **spli
if (--batch->npositions <= 0) {
- /* Use last entry in heap for insertion */
- batch = heap[heapsize];
- querypos = batch->querypos;
- heap[heapsize--] = sentinel;
+#ifdef DIAGONAL_ADD_QUERYPOS
+ batch->diagonal_add_querypos = (UINT8) -1; /* infinity */
+ /* batch->diagonal_add_querypos <<= 32; */
+#else
+ batch->querypos = querylength; /* essentially infinity */
+ batch->diagonal = (Univcoord_T) -1; /* infinity */
+#endif
} else {
/* Use this batch for insertion (same querypos) */
#ifdef LARGE_GENOMES
- batch->diagonal = ((Univcoord_T) *(++batch->positions_high) << 32) + *(++batch->positions_low) + batch->diagterm;
+ batch->diagonal = ((Univcoord_T) *(++batch->positions_high) << 32) + *(++batch->positions_low) + batch->diagterm;
#elif defined(WORDS_BIGENDIAN)
batch->diagonal = Bigendian_convert_univcoord(*(++batch->positions)) + batch->diagterm;
#else
@@ -4263,41 +5555,35 @@ identify_all_segments (int *nsegments, List_T *anchor_segments, Segment_T **spli
#endif
}
- /* heapify */
- parenti = 1;
+ /* Update tree */
#ifdef DIAGONAL_ADD_QUERYPOS
- diagonal_add_querypos = batch->diagonal_add_querypos;
- smallesti = (heap[3]->diagonal_add_querypos < heap[2]->diagonal_add_querypos) ? 3 : 2;
- while (diagonal_add_querypos > heap[smallesti]->diagonal_add_querypos) {
- heap[parenti] = heap[smallesti];
- parenti = smallesti;
- smallesti = LEFT(parenti);
- righti = smallesti+1;
- if (heap[righti]->diagonal_add_querypos < heap[smallesti]->diagonal_add_querypos) {
- smallesti = righti;
+ winner_diagonal_add_querypos = batch->diagonal_add_querypos;
+ for (parenti = PARENT(batch->nodei); parenti > 0; parenti = PARENT(parenti)) {
+ current = losers[parenti];
+ current_diagonal_add_querypos = current->diagonal_add_querypos;
+ if (current_diagonal_add_querypos < winner_diagonal_add_querypos) {
+ losers[parenti] = batch;
+ batch = current;
+ winner_diagonal_add_querypos = current_diagonal_add_querypos;
}
}
#else
- diagonal = batch->diagonal;
- smallesti = ((heap[3]->diagonal < heap[2]->diagonal) ||
- ((heap[3]->diagonal == heap[2]->diagonal) &&
- (heap[3]->querypos < heap[2]->querypos))) ? 3 : 2;
- /* Note that diagonal/querypos will never exceed a sentinel diagonal/querypos */
- while (diagonal > heap[smallesti]->diagonal ||
- (diagonal == heap[smallesti]->diagonal &&
- querypos > heap[smallesti]->querypos)) {
- heap[parenti] = heap[smallesti];
- parenti = smallesti;
- smallesti = LEFT(parenti);
- righti = smallesti+1;
- if ((heap[righti]->diagonal < heap[smallesti]->diagonal) ||
- ((heap[righti]->diagonal == heap[smallesti]->diagonal) &&
- (heap[righti]->querypos < heap[smallesti]->querypos))) {
- smallesti = righti;
+ winner_diagonal = batch->diagonal;
+ winner_querypos = batch->querypos;
+ for (parenti = PARENT(batch->nodei); parenti > 0; parenti = PARENT(parenti)) {
+ current = losers[parenti];
+ current_diagonal = current->diagonal;
+ current_querypos = current->querypos;
+ if (current_diagonal < winner_diagonal ||
+ (current_diagonal == winner_diagonal && current_querypos < winner_querypos)) {
+ losers[parenti] = batch;
+ batch = current;
+ winner_diagonal = current_diagonal;
+ winner_querypos = current_querypos;
}
}
#endif
- heap[parenti] = batch;
+ losers[0] = batch;
}
debug14(printf("diagonal = %u, querypos = %d\n",last_diagonal,last_querypos));
debug14(printf("\n"));
@@ -4448,10 +5734,10 @@ identify_all_segments (int *nsegments, List_T *anchor_segments, Segment_T **spli
/* Last segment is not spliceable */
debug14(printf("Saving segment at %u, query %d..%d",last_diagonal,ptr->querypos5,ptr->querypos3));
- all_segments = List_push(all_segments,(void *) ptr);
+ *ptr_all++ = ptr;
if (last_querypos >= first_querypos + /*min_segment_length*/1) {
debug14(printf(" ANCHOR"));
- *anchor_segments = List_push(*anchor_segments,(void *) ptr);
+ *ptr_anchor++ = ptr;
}
debug14(printf("\n"));
ptr++;
@@ -4472,13 +5758,14 @@ identify_all_segments (int *nsegments, List_T *anchor_segments, Segment_T **spli
printf("total_npositions = %d, nchromosomes = %d\n",total_npositions,nchromosomes);
#endif
- FREEA(heap);
+ FREEA(losers);
FREEA(batchpool);
/* Note: segments is in descending diagonal order. Will need to
reverse before solving middle deletions */
*nsegments = ptr - segments;
+ *nanchors = ptr_anchor - *anchor_segments;
*nspliceable = ptr_spliceable - *spliceable;
debug(printf("nsegments = %d, of which %d are spliceable (total_npositions = %d, nchromosomes = %d)\n",
*nsegments,*nspliceable,total_npositions,nchromosomes));
@@ -4486,40 +5773,69 @@ identify_all_segments (int *nsegments, List_T *anchor_segments, Segment_T **spli
*nsegments,*nspliceable,total_npositions,nchromosomes));
assert(*nsegments <= total_npositions + nchromosomes);
+ assert(*nanchors <= total_npositions);
+ assert(*nspliceable <= total_npositions);
- debug(printf("%d all segments\n",List_length(all_segments)));
- debug(printf("%d anchor segments\n",List_length(*anchor_segments)));
- if (List_length(all_segments) <= max_anchors) {
+ n_all_segments = ptr_all - all_segments;
+ debug(printf("%d all segments\n",n_all_segments));
+ debug(printf("%d anchor segments\n",*nanchors));
+
+ if (n_all_segments <= max_anchors) {
/* Might as well use all segments */
- List_free(&(*anchor_segments));
- *anchor_segments = List_reverse(all_segments);
+ FREE(*anchor_segments);
+ *anchor_segments = all_segments;
+ *nanchors = n_all_segments;
- } else if (List_length(*anchor_segments) <= max_anchors) {
+ } else if (*nanchors <= max_anchors) {
/* Use only the good anchor segments */
- List_free(&all_segments);
- *anchor_segments = List_reverse(*anchor_segments);
+ FREE(all_segments);
} else {
/* Need to limit anchor segments */
- List_free(&all_segments);
+ FREE(all_segments);
- array = (Segment_T *) List_to_array_n(&nanchors,*anchor_segments);
- qsort(array,nanchors,sizeof(Segment_T),Segment_length_cmp);
- List_free(&(*anchor_segments));
- *anchor_segments = (List_T) NULL;
+ /* Treat each mod separately */
+ qsort(*anchor_segments,*nanchors,sizeof(Segment_T),Segment_mod_length_cmp);
- length_threshold = array[max_anchors]->querypos3 - array[max_anchors]->querypos5;
- n = max_anchors;
- while (n < nanchors && n < max_anchors + /*ties*/100 && array[n]->querypos3 - array[n]->querypos5 == length_threshold) {
- n++;
+ mod = 0;
+ i = 0;
+ while (mod < index1interval) {
+ j = i;
+ while (j < *nanchors && (*anchor_segments)[j]->querypos5 % index1interval == mod) {
+ j++;
+ }
+ nanchors_bymod[mod] = j - i;
+
+ if (j - i <= max_anchors) {
+ naccept_bymod[mod] = j - i;
+ } else {
+ k = i + max_anchors;
+ length_threshold = (*anchor_segments)[k]->querypos3 - (*anchor_segments)[k]->querypos5;
+ while (k < j && k < i + max_anchors + /*ties*/100 &&
+ (*anchor_segments)[k]->querypos3 - (*anchor_segments)[k]->querypos5 == length_threshold) {
+ k++;
+ }
+ naccept_bymod[mod] = k - i;
+ }
+
+ debug(printf("For mod %d, accepting %d out of %d anchor segments with length threshold %d\n",
+ mod,naccept_bymod[mod],nanchors_bymod[mod],length_threshold));
+ i = j;
+ mod++;
}
- /* Re-sort in diagonal order */
- qsort(array,n,sizeof(Segment_T),Segment_diagonal_cmp);
- for (i = n-1; i >= 0; i--) {
- *anchor_segments = List_push(*anchor_segments,(void *) array[i]);
+ /* Move good anchors to start of array */
+ dest = src = &((*anchor_segments)[0]);
+ *nanchors = 0;
+ for (mod = 0; mod < index1interval; mod++) {
+ memmove((void *) dest,(void *) src,naccept_bymod[mod] * sizeof(Segment_T));
+ dest += naccept_bymod[mod];
+ src += nanchors_bymod[mod];
+ *nanchors += naccept_bymod[mod];
}
- FREE(array);
+
+ /* Re-sort in diagonal order */
+ qsort(*anchor_segments,*nanchors,sizeof(Segment_T),Segment_diagonal_cmp);
}
@@ -4531,15 +5847,17 @@ identify_all_segments (int *nsegments, List_T *anchor_segments, Segment_T **spli
#endif
#ifdef DEBUG
- printf("%d selected anchor segments\n",List_length(*anchor_segments));
- for (p = *anchor_segments; p != NULL; p = List_next(p)) {
- segment = (Segment_T) List_head(p);
+ printf("%d selected anchor segments\n",*nanchors);
+ for (p = &(*anchor_segments)[0]; p< &((*anchor_segments)[*nanchors]); p++) {
+ segment = (Segment_T) *p;
printf("%u %d..%d\n",segment->diagonal,segment->querypos5,segment->querypos3);
}
#endif
return segments;
}
+#endif
+
#if 0
@@ -4673,33 +5991,36 @@ pair_up_segments (struct Segment_T *plus_segments_5, int plus_nsegments_5,
static void
-pair_up_anchor_segments (List_T plus_anchor_segments_5, List_T minus_anchor_segments_5,
- List_T plus_anchor_segments_3, List_T minus_anchor_segments_3,
- Chrpos_T pairmax) {
+pair_up_anchor_segments (Segment_T *plus_anchor_segments_5, Segment_T *minus_anchor_segments_5,
+ Segment_T *plus_anchor_segments_3, Segment_T *minus_anchor_segments_3,
+ int n_plus_anchors_5, int n_minus_anchors_5,
+ int n_plus_anchors_3, int n_minus_anchors_3, Chrpos_T pairmax) {
/* Univcoord_T insert_start; */
Segment_T segment5, segment3;
- List_T q, pstart, pend, p;
+ Segment_T *q, *pstart, *pend, *p;
debug(printf("Entering pair_up_anchor_segments\n"));
/* plus/plus */
- pstart = plus_anchor_segments_3;
- for (q = plus_anchor_segments_5; q != NULL && pstart != NULL; q = List_next(q)) {
- segment5 = (Segment_T) List_head(q);
+ pstart = &(plus_anchor_segments_3[0]);
+ for (q = &(plus_anchor_segments_5[0]);
+ q < &(plus_anchor_segments_5[n_plus_anchors_5]) && pstart < &(plus_anchor_segments_3[n_plus_anchors_3]);
+ q++) {
+ segment5 = *q;
assert(segment5->diagonal != (Univcoord_T) -1);
/* insert_start = segment5->diagonal; */
- while (pstart != NULL && ((Segment_T) pstart->first)->diagonal < segment5->diagonal) {
- pstart = List_next(pstart);
+ while (pstart < &(plus_anchor_segments_3[n_plus_anchors_3]) && (*pstart)->diagonal < segment5->diagonal) {
+ pstart++;
}
pend = pstart;
- while (pend != NULL && ((Segment_T) pend->first)->diagonal < segment5->diagonal + pairmax) {
- pend = List_next(pend);
+ while (pend < &(plus_anchor_segments_3[n_plus_anchors_3]) && (*pend)->diagonal < segment5->diagonal + pairmax) {
+ pend++;
}
- for (p = pstart; p != pend; p = List_next(p)) {
- segment3 = (Segment_T) List_head(p);
+ for (p = pstart; p != pend; p++) {
+ segment3 = *p;
assert(segment3->diagonal - segment5->diagonal < pairmax);
debug5(printf("Setting plus segments to be pairable: %u and %u (distance %u)\n",
segment5->diagonal,segment3->diagonal,segment3->diagonal - segment5->diagonal));
@@ -4709,23 +6030,25 @@ pair_up_anchor_segments (List_T plus_anchor_segments_5, List_T minus_anchor_segm
}
/* minus/minus */
- pstart = minus_anchor_segments_5;
- for (q = minus_anchor_segments_3; q != NULL && pstart != NULL; q = List_next(q)) {
- segment3 = (Segment_T) List_head(q);
+ pstart = &(minus_anchor_segments_5[0]);
+ for (q = &(minus_anchor_segments_3[0]);
+ q < &(minus_anchor_segments_3[n_minus_anchors_3]) && pstart < &(minus_anchor_segments_5[n_minus_anchors_5]);
+ q++) {
+ segment3 = *q;
assert(segment3->diagonal != (Univcoord_T) -1);
/* insert_start = segment3->diagonal; */
- while (pstart != NULL && ((Segment_T) pstart->first)->diagonal < segment3->diagonal) {
- pstart = List_next(pstart);
+ while (pstart < &(minus_anchor_segments_5[n_minus_anchors_5]) && (*pstart)->diagonal < segment3->diagonal) {
+ pstart++;
}
pend = pstart;
- while (pend != NULL && ((Segment_T) pend->first)->diagonal < segment3->diagonal + pairmax) {
- pend = List_next(pend);
+ while (pend < &(minus_anchor_segments_5[n_minus_anchors_5]) && (*pend)->diagonal < segment3->diagonal + pairmax) {
+ pend++;
}
- for (p = pstart; p != pend; p = List_next(p)) {
- segment5 = (Segment_T) List_head(p);
+ for (p = pstart; p != pend; p++) {
+ segment5 = *p;
assert(segment5->diagonal - segment3->diagonal < pairmax);
debug5(printf("Setting minus segments to be pairable: %u and %u (distance %u)\n",
segment3->diagonal,segment5->diagonal,segment5->diagonal - segment3->diagonal));
@@ -6328,7 +7651,8 @@ solve_end_indel_high (int *found_score, int *nhits, List_T hits, Segment_T ptr,
of them */
static List_T
find_end_indels (int *found_score, int *nhits, List_T hits,
- List_T plus_anchor_segments, List_T minus_anchor_segments,
+ Segment_T *plus_anchor_segments, Segment_T *minus_anchor_segments,
+ int n_plus_anchors, int n_minus_anchors,
#ifdef DEBUG2E
char *queryuc_ptr, char *queryrc,
#endif
@@ -6336,14 +7660,13 @@ find_end_indels (int *found_score, int *nhits, List_T hits,
Compress_T query_compress_fwd, Compress_T query_compress_rev,
int max_end_insertions, int max_end_deletions, int min_indel_end_matches,
int indel_penalty_end, int max_mismatches_allowed, int genestrand) {
- Segment_T ptr;
- List_T p;
+ Segment_T ptr, *p;
debug(printf("*** find_end_indels with max_mismatches_allowed %d ***\n",
max_mismatches_allowed));
- for (p = plus_anchor_segments; p != NULL; p = List_next(p)) {
- ptr = (Segment_T) List_head(p);
+ for (p = &(plus_anchor_segments[0]); p < &(plus_anchor_segments[n_plus_anchors]); p++) {
+ ptr = *p;
if (ptr->diagonal < (Univcoord_T) -1) {
if (ptr->floor_xfirst <= max_mismatches_allowed) {
@@ -6374,8 +7697,8 @@ find_end_indels (int *found_score, int *nhits, List_T hits,
}
}
- for (p = minus_anchor_segments; p != NULL; p = List_next(p)) {
- ptr = (Segment_T) List_head(p);
+ for (p = &(minus_anchor_segments[0]); p < &(minus_anchor_segments[n_minus_anchors]); p++) {
+ ptr = *p;
if (ptr->diagonal < (Univcoord_T) -1) {
if (ptr->floor_xfirst <= max_mismatches_allowed) {
@@ -9211,7 +10534,7 @@ find_doublesplices (int *found_score, List_T hits, List_T *lowprob,
static void
find_spliceends_shortend (List_T **shortend_donors, List_T **shortend_antidonors,
List_T **shortend_acceptors, List_T **shortend_antiacceptors,
- List_T anchor_segments,
+ Segment_T *anchor_segments, int nanchors,
#ifdef DEBUG4E
char *queryptr,
#endif
@@ -9221,8 +10544,7 @@ find_spliceends_shortend (List_T **shortend_donors, List_T **shortend_antidonors
char *gbuffer;
#endif
- List_T p;
- Segment_T segment;
+ Segment_T segment, *p;
Substring_T hit;
Univcoord_T segment_left;
int nmismatches, jstart, jend, j;
@@ -9249,8 +10571,8 @@ find_spliceends_shortend (List_T **shortend_donors, List_T **shortend_antidonors
floors_from_neg3 = floors->scorefrom[-index1interval];
floors_to_pos3 = floors->scoreto[query_lastpos+index1interval];
- for (p = anchor_segments; p != NULL; p = List_next(p)) {
- segment = (Segment_T) List_head(p);
+ for (p = &(anchor_segments[0]); p < &(anchor_segments[nanchors]); p++) {
+ segment = *p;
assert(segment->diagonal != (Univcoord_T) -1);
if (segment->splicesites_i >= 0) {
segment_left = segment->diagonal - querylength; /* FORMULA: Corresponds to querypos 0 */
@@ -9450,7 +10772,7 @@ find_spliceends_shortend (List_T **shortend_donors, List_T **shortend_antidonors
static void
find_spliceends_distant_dna_plus (List_T **distant_startfrags, List_T **distant_endfrags,
- List_T anchor_segments,
+ Segment_T *anchor_segments, int nanchors,
#ifdef DEBUG4E
char *queryptr,
#endif
@@ -9460,8 +10782,7 @@ find_spliceends_distant_dna_plus (List_T **distant_startfrags, List_T **distant_
char *gbuffer;
#endif
- List_T p;
- Segment_T segment;
+ Segment_T segment, *p;
Substring_T hit;
Univcoord_T segment_left;
int nmismatches;
@@ -9485,8 +10806,8 @@ find_spliceends_distant_dna_plus (List_T **distant_startfrags, List_T **distant_
floors_from_neg3 = floors->scorefrom[-index1interval];
floors_to_pos3 = floors->scoreto[query_lastpos+index1interval];
- for (p = anchor_segments; p != NULL; p = List_next(p)) {
- segment = (Segment_T) List_head(p);
+ for (p = &(anchor_segments[0]); p < &(anchor_segments[nanchors]); p++) {
+ segment = *p;
assert(segment->diagonal != (Univcoord_T) -1);
segment_left = segment->diagonal - querylength; /* FORMULA: Corresponds to querypos 0 */
@@ -9623,7 +10944,7 @@ find_spliceends_distant_dna_plus (List_T **distant_startfrags, List_T **distant_
static void
find_spliceends_distant_dna_minus (List_T **distant_startfrags, List_T **distant_endfrags,
- List_T anchor_segments,
+ Segment_T *anchor_segments, int nanchors,
#ifdef DEBUG4E
char *queryptr,
#endif
@@ -9633,8 +10954,7 @@ find_spliceends_distant_dna_minus (List_T **distant_startfrags, List_T **distant
char *gbuffer;
#endif
- List_T p;
- Segment_T segment;
+ Segment_T segment, *p;
Substring_T hit;
Univcoord_T segment_left;
int nmismatches;
@@ -9658,8 +10978,8 @@ find_spliceends_distant_dna_minus (List_T **distant_startfrags, List_T **distant
floors_from_neg3 = floors->scorefrom[-index1interval];
floors_to_pos3 = floors->scoreto[query_lastpos+index1interval];
- for (p = anchor_segments; p != NULL; p = List_next(p)) {
- segment = (Segment_T) List_head(p);
+ for (p = &(anchor_segments[0]); p < &(anchor_segments[nanchors]); p++) {
+ segment = *p;
assert(segment->diagonal != (Univcoord_T) -1);
segment_left = segment->diagonal - querylength; /* FORMULA: Corresponds to querypos 0 */
@@ -9800,7 +11120,7 @@ find_spliceends_distant_dna_minus (List_T **distant_startfrags, List_T **distant
static void
find_spliceends_distant_rna (List_T **distant_donors, List_T **distant_antidonors,
List_T **distant_acceptors, List_T **distant_antiacceptors,
- List_T anchor_segments,
+ Segment_T *anchor_segments, int nanchors,
#ifdef DEBUG4E
char *queryptr,
#endif
@@ -9810,8 +11130,7 @@ find_spliceends_distant_rna (List_T **distant_donors, List_T **distant_antidonor
char *gbuffer;
#endif
- List_T p;
- Segment_T segment;
+ Segment_T segment, *p;
Substring_T hit;
Univcoord_T segment_left;
int nmismatches, j, i;
@@ -9858,8 +11177,8 @@ find_spliceends_distant_rna (List_T **distant_donors, List_T **distant_antidonor
floors_from_neg3 = floors->scorefrom[-index1interval];
floors_to_pos3 = floors->scoreto[query_lastpos+index1interval];
- for (p = anchor_segments; p != NULL; p = List_next(p)) {
- segment = (Segment_T) List_head(p);
+ for (p = &(anchor_segments[0]); p < &(anchor_segments[nanchors]); p++) {
+ segment = *p;
assert(segment->diagonal != (Univcoord_T) -1);
segment_left = segment->diagonal - querylength; /* FORMULA: Corresponds to querypos 0 */
@@ -10289,7 +11608,8 @@ find_spliceends_distant_rna (List_T **distant_donors, List_T **distant_antidonor
/* Integrates terminals found from ends by counting mismatches, and
those where querypos3 - querypos5 is long enough */
static List_T
-find_terminals (List_T plus_anchor_segments, List_T minus_anchor_segments,
+find_terminals (Segment_T *plus_anchor_segments, Segment_T *minus_anchor_segments,
+ int n_plus_anchors, int n_minus_anchors,
int querylength, int query_lastpos,
Compress_T query_compress_fwd, Compress_T query_compress_rev,
int max_mismatches_allowed, int genestrand) {
@@ -10297,8 +11617,8 @@ find_terminals (List_T plus_anchor_segments, List_T minus_anchor_segments,
char *gbuffer;
#endif
List_T plus_terminals_middle = NULL, plus_terminals_left = NULL, plus_terminals_right = NULL,
- minus_terminals_middle = NULL, minus_terminals_left = NULL, minus_terminals_right = NULL, p;
- Segment_T segment;
+ minus_terminals_middle = NULL, minus_terminals_left = NULL, minus_terminals_right = NULL, q;
+ Segment_T segment, *p;
Stage3end_T hit;
Univcoord_T segment_left;
int nmismatches_left, nmismatches_right;
@@ -10330,9 +11650,10 @@ find_terminals (List_T plus_anchor_segments, List_T minus_anchor_segments,
}
nterminals_left = nterminals_right = nterminals_middle = 0;
- for (p = plus_anchor_segments; p != NULL && (/*nterminals_middle < MAX_NTERMINALS ||*/ nterminals_left < MAX_NTERMINALS || nterminals_right < MAX_NTERMINALS);
- p = List_next(p)) {
- segment = (Segment_T) List_head(p);
+ for (p = &(plus_anchor_segments[0]);
+ p < &(plus_anchor_segments[n_plus_anchors]) && (/*nterminals_middle < MAX_NTERMINALS ||*/ nterminals_left < MAX_NTERMINALS || nterminals_right < MAX_NTERMINALS);
+ p++) {
+ segment = *p;
if (0 && segment->usedp == true) {
/* Previously skipped, but looks like a bad idea */
} else if (segment->diagonal < (Univcoord_T) -1) {
@@ -10520,8 +11841,8 @@ find_terminals (List_T plus_anchor_segments, List_T minus_anchor_segments,
if (nterminals_middle >= MAX_NTERMINALS) {
- for (p = plus_terminals_middle; p != NULL; p = p->rest) {
- hit = (Stage3end_T) p->first;
+ for (q = plus_terminals_middle; q != NULL; q = q->rest) {
+ hit = (Stage3end_T) q->first;
Stage3end_free(&hit);
}
List_free(&plus_terminals_middle);
@@ -10529,8 +11850,8 @@ find_terminals (List_T plus_anchor_segments, List_T minus_anchor_segments,
}
if (nterminals_left >= MAX_NTERMINALS) {
- for (p = plus_terminals_left; p != NULL; p = p->rest) {
- hit = (Stage3end_T) p->first;
+ for (q = plus_terminals_left; q != NULL; q = q->rest) {
+ hit = (Stage3end_T) q->first;
Stage3end_free(&hit);
}
List_free(&plus_terminals_left);
@@ -10538,8 +11859,8 @@ find_terminals (List_T plus_anchor_segments, List_T minus_anchor_segments,
}
if (nterminals_right >= MAX_NTERMINALS) {
- for (p = plus_terminals_right; p != NULL; p = p->rest) {
- hit = (Stage3end_T) p->first;
+ for (q = plus_terminals_right; q != NULL; q = q->rest) {
+ hit = (Stage3end_T) q->first;
Stage3end_free(&hit);
}
List_free(&plus_terminals_right);
@@ -10547,9 +11868,10 @@ find_terminals (List_T plus_anchor_segments, List_T minus_anchor_segments,
}
nterminals_left = nterminals_right = nterminals_middle = 0;
- for (p = minus_anchor_segments; p != NULL && (/*nterminals_middle < MAX_NTERMINALS ||*/ nterminals_left < MAX_NTERMINALS || nterminals_right < MAX_NTERMINALS);
- p = List_next(p)) {
- segment = (Segment_T) List_head(p);
+ for (p = &(minus_anchor_segments[0]);
+ p < &(minus_anchor_segments[n_minus_anchors]) && (/*nterminals_middle < MAX_NTERMINALS ||*/ nterminals_left < MAX_NTERMINALS || nterminals_right < MAX_NTERMINALS);
+ p++) {
+ segment = *p;
if (0 && segment->usedp == true) {
/* Previously skipped, but looks like a bad idea */
debug4t(printf("segment used\n"));
@@ -10731,8 +12053,8 @@ find_terminals (List_T plus_anchor_segments, List_T minus_anchor_segments,
}
if (nterminals_middle >= MAX_NTERMINALS) {
- for (p = minus_terminals_middle; p != NULL; p = p->rest) {
- hit = (Stage3end_T) p->first;
+ for (q = minus_terminals_middle; q != NULL; q = q->rest) {
+ hit = (Stage3end_T) q->first;
Stage3end_free(&hit);
}
List_free(&minus_terminals_middle);
@@ -10740,8 +12062,8 @@ find_terminals (List_T plus_anchor_segments, List_T minus_anchor_segments,
}
if (nterminals_left >= MAX_NTERMINALS) {
- for (p = minus_terminals_left; p != NULL; p = p->rest) {
- hit = (Stage3end_T) p->first;
+ for (q = minus_terminals_left; q != NULL; q = q->rest) {
+ hit = (Stage3end_T) q->first;
Stage3end_free(&hit);
}
List_free(&minus_terminals_left);
@@ -10749,8 +12071,8 @@ find_terminals (List_T plus_anchor_segments, List_T minus_anchor_segments,
}
if (nterminals_right >= MAX_NTERMINALS) {
- for (p = minus_terminals_right; p != NULL; p = p->rest) {
- hit = (Stage3end_T) p->first;
+ for (q = minus_terminals_right; q != NULL; q = q->rest) {
+ hit = (Stage3end_T) q->first;
Stage3end_free(&hit);
}
List_free(&minus_terminals_right);
@@ -10879,8 +12201,14 @@ find_terminals_by_width_only (struct Segment_T *plus_segments, int plus_nsegment
static void
-fetch_positions_for_all_12mers (T this, Indexdb_T plus_indexdb, Indexdb_T minus_indexdb, int query_lastpos) {
+fetch_positions_for_all_12mers (T this, Indexdb_T plus_indexdb, Indexdb_T minus_indexdb,
+ char *queryuc_ptr, int querylength, int query_lastpos) {
int querypos;
+ bool allvalidp;
+
+ if (this->read_oligos_p == false) {
+ read_oligos(&allvalidp,this,queryuc_ptr,querylength,query_lastpos,/*genestrand*/0);
+ }
/* querypos -2, -1, query_lastpos+1, and query_lastpos+2 are special cases */
/* if allvalidp is true, then 0 and query_lastpos should have been done already */
@@ -12761,16 +14089,19 @@ find_12mer_bounds (int *firstbound, int *lastbound, bool *omitted, int query_las
static Floors_T
compute_floors (bool *any_omitted_p, bool *alloc_floors_p, Floors_T *floors_array,
- T this, int querylength, int query_lastpos, Indexdb_T plus_indexdb, Indexdb_T minus_indexdb,
+ T this, char *queryuc_ptr, int querylength, int query_lastpos,
+ Indexdb_T plus_indexdb, Indexdb_T minus_indexdb,
int indexdb_size_threshold, int max_end_insertions,
bool omit_frequent_p, bool omit_repetitive_p, bool keep_floors_p) {
Floors_T floors;
bool all_omitted_p;
+
if (this->all_positions_fetched_p == true) {
omit_oligos_clear(this,query_lastpos);
} else {
- fetch_positions_for_all_12mers(this,plus_indexdb,minus_indexdb,query_lastpos);
+ fetch_positions_for_all_12mers(this,plus_indexdb,minus_indexdb,
+ queryuc_ptr,querylength,query_lastpos);
}
debug(printf("Omitting frequent/repetitive oligos\n"));
@@ -12804,7 +14135,8 @@ compute_floors (bool *any_omitted_p, bool *alloc_floors_p, Floors_T *floors_arra
static void
complete_set_mm_indels (int *found_score, bool *segments_computed_p,
- List_T *plus_anchor_segments, List_T *minus_anchor_segments,
+ Segment_T **plus_anchor_segments, Segment_T **minus_anchor_segments,
+ int *n_plus_anchors, int *n_minus_anchors,
int *opt_level, int *done_level, int user_maxlevel,
bool revise_levels_p, int *nhits, List_T *subs, List_T *indels, T this,
Compress_T query_compress_fwd, Compress_T query_compress_rev,
@@ -12853,31 +14185,31 @@ complete_set_mm_indels (int *found_score, bool *segments_computed_p,
max_mismatches_allowed,*done_level,fast_level));
if (1 || max_mismatches_allowed >= 0) {
- this->plus_segments = identify_all_segments(&this->plus_nsegments,&(*plus_anchor_segments),
+ this->plus_segments = identify_all_segments(&this->plus_nsegments,&(*plus_anchor_segments),&(*n_plus_anchors),
&this->plus_spliceable,&this->plus_nspliceable,
#ifdef LARGE_GENOMES
this->plus_positions_high,this->plus_positions_low,
#else
this->plus_positions,
#endif
- this->plus_npositions,this->omitted,querylength,query_lastpos,floors,
- /*plusp*/true);
- this->minus_segments = identify_all_segments(&this->minus_nsegments,&(*minus_anchor_segments),
+ this->plus_npositions,this->omitted,querylength,query_lastpos,
+ floors,/*plusp*/true);
+ this->minus_segments = identify_all_segments(&this->minus_nsegments,&(*minus_anchor_segments),&(*n_minus_anchors),
&this->minus_spliceable,&this->minus_nspliceable,
#ifdef LARGE_GENOMES
this->minus_positions_high,this->minus_positions_low,
#else
this->minus_positions,
#endif
- this->minus_npositions,this->omitted,querylength,query_lastpos,floors,
- /*plusp*/false);
+ this->minus_npositions,this->omitted,querylength,query_lastpos,
+ floors,/*plusp*/false);
- *subs = find_complete_mm(&(*found_score),&(*nhits),*subs,*plus_anchor_segments,
+ *subs = find_complete_mm(&(*found_score),&(*nhits),*subs,*plus_anchor_segments,*n_plus_anchors,
querylength,/*queryptr:queryuc_ptr,*/
/*query_compress*/query_compress_fwd,
/*max_mismatches_allowed*/*done_level,/*plusp*/true,genestrand);
- *subs = find_complete_mm(&(*found_score),&(*nhits),*subs,*minus_anchor_segments,
+ *subs = find_complete_mm(&(*found_score),&(*nhits),*subs,*minus_anchor_segments,*n_minus_anchors,
querylength,/*queryptr:queryrc,*/
/*query_compress*/query_compress_rev,
/*max_mismatches_allowed*/*done_level,/*plusp*/false,genestrand);
@@ -12908,24 +14240,24 @@ complete_set_mm_indels (int *found_score, bool *segments_computed_p,
/* Need to reverse, because middle indelsplicing procedure depends on ascending diagonal order */
if (*segments_computed_p == false) {
- this->plus_segments = identify_all_segments(&this->plus_nsegments,&(*plus_anchor_segments),
+ this->plus_segments = identify_all_segments(&this->plus_nsegments,&(*plus_anchor_segments),&(*n_plus_anchors),
&this->plus_spliceable,&this->plus_nspliceable,
#ifdef LARGE_GENOMES
this->plus_positions_high,this->plus_positions_low,
#else
this->plus_positions,
#endif
- this->plus_npositions,this->omitted,querylength,query_lastpos,floors,
- /*plusp*/true);
- this->minus_segments = identify_all_segments(&this->minus_nsegments,&(*minus_anchor_segments),
+ this->plus_npositions,this->omitted,querylength,query_lastpos,
+ floors,/*plusp*/true);
+ this->minus_segments = identify_all_segments(&this->minus_nsegments,&(*minus_anchor_segments),&(*n_minus_anchors),
&this->minus_spliceable,&this->minus_nspliceable,
#ifdef LARGE_GENOMES
this->minus_positions_high,this->minus_positions_low,
#else
this->minus_positions,
#endif
- this->minus_npositions,this->omitted,querylength,query_lastpos,floors,
- /*plusp*/false);
+ this->minus_npositions,this->omitted,querylength,query_lastpos,
+ floors,/*plusp*/false);
*segments_computed_p = true;
}
@@ -12946,6 +14278,7 @@ complete_set_mm_indels (int *found_score, bool *segments_computed_p,
if (allow_end_indels_p == true) {
debug(printf("*** Stage 6. End indels with %d-%d mismatches allowed\n",indel_level,indel_penalty));
*indels = find_end_indels(&(*found_score),&(*nhits),*indels,*plus_anchor_segments,*minus_anchor_segments,
+ *n_plus_anchors,*n_minus_anchors,
#ifdef DEBUG2E
queryuc_ptr,queryrc,
#endif
@@ -12986,6 +14319,7 @@ complete_set_mm_indels (int *found_score, bool *segments_computed_p,
debug(printf("*** Stage 6 (end). End indels with %d-%d mismatches allowed, found_score = %d\n",
*done_level,indel_penalty_end,*found_score));
*indels = find_end_indels(&(*found_score),&(*nhits),*indels,*plus_anchor_segments,*minus_anchor_segments,
+ *n_plus_anchors,*n_minus_anchors,
#ifdef DEBUG2E
queryuc_ptr,queryrc,
#endif
@@ -13362,7 +14696,6 @@ run_gmap_for_region (bool *good_start_p, bool *good_end_p, History_T gmap_histor
Stage2_middle(stage2),Stage2_all_starts(stage2),Stage2_all_ends(stage2),
#ifdef END_KNOWNSPLICING_SHORTCUT
cutoff_level,/*queryptr*/watsonp ? queryuc_ptr : queryrc,
- watsonp ? query_compress_fwd : query_compress_rev,
#endif
/*queryseq_ptr*/queryuc_ptr,queryuc_ptr,querylength,/*skiplength*/0,
#ifdef EXTRACT_GENOMICSEG
@@ -14277,7 +15610,8 @@ convert_plus_segments_to_gmap (List_T hits, char *queryuc_ptr, int querylength,
#ifdef END_KNOWNSPLICING_SHORTCUT
char *queryrc, bool invertedp,
#endif
- List_T anchor_segments, struct Segment_T *plus_segments, int plus_nsegments,
+ Segment_T *anchor_segments, int nanchors,
+ struct Segment_T *plus_segments, int plus_nsegments,
Oligoindex_array_T oligoindices_minor,
Pairpool_T pairpool, Diagpool_T diagpool, Cellpool_T cellpool,
Dynprog_T dynprogL, Dynprog_T dynprogM, Dynprog_T dynprogR,
@@ -14295,8 +15629,7 @@ convert_plus_segments_to_gmap (List_T hits, char *queryuc_ptr, int querylength,
bool novelp; /* Want any of the segments in startk..(endk-1) to not be used */
bool pairablep; /* Want any of the segments in startk..(endk-1) to be pairable */
- List_T p;
- Segment_T anchor_segment, segment;
+ Segment_T anchor_segment, segment, *p;
int anchork, startk, endk, n, i, j, firstj, lastj, k, best_starti, best_endi;
Stage3end_T hit;
@@ -14353,8 +15686,8 @@ convert_plus_segments_to_gmap (List_T hits, char *queryuc_ptr, int querylength,
}
anchork = 0;
- for (p = anchor_segments; p != NULL; p = List_next(p)) {
- anchor_segment = (Segment_T) List_head(p);
+ for (p = &(anchor_segments[0]); p < &(anchor_segments[nanchors]); p++) {
+ anchor_segment = *p;
assert(anchor_segment->diagonal != (Univcoord_T) -1);
while (plus_segments[anchork].diagonal != anchor_segment->diagonal) {
anchork++;
@@ -14375,6 +15708,7 @@ convert_plus_segments_to_gmap (List_T hits, char *queryuc_ptr, int querylength,
/* Dynamic programming on left (low) side (querypos5) */
if ((n = (anchork - 1) - (startk + 1) + 1) == 0) {
+ debug13(printf("On querypos5 side, n == 0\n"));
best_starti = -1;
} else {
prev_left = &(prev_allocated[startk+1]);
@@ -14391,10 +15725,14 @@ convert_plus_segments_to_gmap (List_T hits, char *queryuc_ptr, int querylength,
lastj++;
}
- for (j = 0; j < lastj; j++) {
+ /* Go leftward to favor shorter splices */
+ for (j = lastj - 1; j >= 0; --j) {
+ debug13(printf("querypos5 side: %d..%d %u..%u\n",
+ sorted[j]->querypos5,sorted[j]->querypos3,sorted[j]->lowpos,sorted[j]->highpos));
+
best_score = 0;
besti = -1;
- for (i = 0; i < j; i++) {
+ for (i = j - 1; i >= 0; --i) {
if (sorted[i]->lowpos >= sorted[j]->lowpos) {
/* Skip, since doesn't add nucleotides to left */
} else if (sorted[i]->highpos < sorted[j]->lowpos) {
@@ -14417,7 +15755,7 @@ convert_plus_segments_to_gmap (List_T hits, char *queryuc_ptr, int querylength,
/* Anchor segment */
best_score = 0;
best_starti = -1;
- for (i = 0; i < lastj; i++) {
+ for (i = lastj - 1; i >= 0; --i) {
if (sorted[i]->lowpos >= anchor_segment->lowpos) {
/* Skip, since doesn't add nucleotides to left */
} else if (sorted[i]->highpos < anchor_segment->lowpos) {
@@ -14435,6 +15773,7 @@ convert_plus_segments_to_gmap (List_T hits, char *queryuc_ptr, int querylength,
/* Dynamic programming on right (high) side (querypos3) */
if ((n = (endk - 1) - (anchork + 1) + 1) == 0) {
+ debug13(printf("On querypos3 side, n == 0\n"));
best_endi = -1;
} else {
prev_right = &(prev_allocated[anchork+1]);
@@ -14451,10 +15790,14 @@ convert_plus_segments_to_gmap (List_T hits, char *queryuc_ptr, int querylength,
firstj--;
}
- for (j = n - 1; j > firstj; j--) {
+ /* Go rightward to favor shorter splices */
+ for (j = firstj + 1; j < n; j++) {
+ debug13(printf("querypos3 side: %d..%d %u..%u\n",
+ sorted[j]->querypos5,sorted[j]->querypos3,sorted[j]->lowpos,sorted[j]->highpos));
+
best_score = 0;
besti = -1;
- for (i = n - 1; i > j; i--) {
+ for (i = j + 1; i < n; i++) {
if (sorted[i]->highpos <= sorted[i]->highpos) {
/* Skip, since doesn't add nucleotides to right */
} else if (sorted[i]->lowpos > sorted[j]->highpos) {
@@ -14477,7 +15820,7 @@ convert_plus_segments_to_gmap (List_T hits, char *queryuc_ptr, int querylength,
/* Anchor segment */
best_score = 0;
best_endi = -1;
- for (i = n - 1; i > firstj; i--) {
+ for (i = firstj + 1; i < n; i++) {
if (sorted[i]->highpos <= anchor_segment->highpos) {
/* Skip, since doesn't add nucleotides to right */
} else if (sorted[i]->lowpos > anchor_segment->highpos) {
@@ -14991,7 +16334,8 @@ convert_minus_segments_to_gmap (List_T hits, char *queryuc_ptr, int querylength,
#ifdef END_KNOWNSPLICING_SHORTCUT
char *queryrc, bool invertedp,
#endif
- List_T anchor_segments, struct Segment_T *minus_segments, int minus_nsegments,
+ Segment_T *anchor_segments, int nanchors,
+ struct Segment_T *minus_segments, int minus_nsegments,
Oligoindex_array_T oligoindices_minor,
Pairpool_T pairpool, Diagpool_T diagpool, Cellpool_T cellpool,
Dynprog_T dynprogL, Dynprog_T dynprogM, Dynprog_T dynprogR,
@@ -15009,8 +16353,7 @@ convert_minus_segments_to_gmap (List_T hits, char *queryuc_ptr, int querylength,
bool novelp; /* Want any of the segments in startk..(endk-1) to not be used */
bool pairablep; /* Want any of the segments in startk..(endk-1) to be pairable */
- List_T p;
- Segment_T anchor_segment, segment;
+ Segment_T anchor_segment, segment, *p;
int anchork, startk, endk, n, i, j, firstj, lastj, k, best_starti, best_endi;
Stage3end_T hit;
@@ -15067,8 +16410,8 @@ convert_minus_segments_to_gmap (List_T hits, char *queryuc_ptr, int querylength,
}
anchork = 0;
- for (p = anchor_segments; p != NULL; p = List_next(p)) {
- anchor_segment = (Segment_T) List_head(p);
+ for (p = &(anchor_segments[0]); p < &(anchor_segments[nanchors]); p++) {
+ anchor_segment = *p;
assert(anchor_segment->diagonal != (Univcoord_T) -1);
while (minus_segments[anchork].diagonal != anchor_segment->diagonal) {
anchork++;
@@ -15089,6 +16432,7 @@ convert_minus_segments_to_gmap (List_T hits, char *queryuc_ptr, int querylength,
/* Dynamic programming on left (low) side (querypos3) */
if ((n = (anchork - 1) - (startk + 1) + 1) == 0) {
+ debug13(printf("On querypos5 side, n == 0\n"));
best_starti = -1;
} else {
prev_left = &(prev_allocated[startk+1]);
@@ -15105,10 +16449,14 @@ convert_minus_segments_to_gmap (List_T hits, char *queryuc_ptr, int querylength,
lastj++;
}
- for (j = 0; j < lastj; j++) {
+ /* Go rightward to favor shorter splices */
+ for (j = lastj - 1; j >= 0; --j) {
+ debug13(printf("querypos3 side: %d..%d %u..%u\n",
+ sorted[j]->querypos5,sorted[j]->querypos3,sorted[j]->lowpos,sorted[j]->highpos));
+
best_score = 0;
besti = -1;
- for (i = 0; i < j; i++) {
+ for (i = j - 1; i >= 0; --i) {
if (sorted[i]->lowpos >= sorted[j]->lowpos) {
/* Skip, since doesn't add nucleotides to left */
} else if (sorted[i]->highpos < sorted[j]->lowpos) {
@@ -15131,7 +16479,7 @@ convert_minus_segments_to_gmap (List_T hits, char *queryuc_ptr, int querylength,
/* Anchor segment */
best_score = 0;
best_starti = -1;
- for (i = 0; i < lastj; i++) {
+ for (i = lastj - 1; i >= 0; --i) {
if (sorted[i]->lowpos >= anchor_segment->lowpos) {
/* Skip, since doesn't add nucleotides to left */
} else if (sorted[i]->highpos < anchor_segment->lowpos) {
@@ -15149,6 +16497,7 @@ convert_minus_segments_to_gmap (List_T hits, char *queryuc_ptr, int querylength,
/* Dynamic programming on right (high) side (querypos5) */
if ((n = (endk - 1) - (anchork + 1) + 1) == 0) {
+ debug13(printf("On querypos5 side, n == 0\n"));
best_endi = -1;
} else {
prev_right = &(prev_allocated[anchork+1]);
@@ -15165,10 +16514,14 @@ convert_minus_segments_to_gmap (List_T hits, char *queryuc_ptr, int querylength,
firstj--;
}
- for (j = n - 1; j > firstj; j--) {
+ /* Go leftward to favor shorter splices */
+ for (j = firstj + 1; j < n; j++) {
+ debug13(printf("querypos5 side: %d..%d %u..%u\n",
+ sorted[j]->querypos5,sorted[j]->querypos3,sorted[j]->lowpos,sorted[j]->highpos));
+
best_score = 0;
besti = -1;
- for (i = n - 1; i > j; i--) {
+ for (i = j + 1; i < n; i++) {
if (sorted[i]->highpos <= sorted[i]->highpos) {
/* Skip, since doesn't add nucleotides to right */
} else if (sorted[i]->lowpos > sorted[j]->highpos) {
@@ -15191,7 +16544,7 @@ convert_minus_segments_to_gmap (List_T hits, char *queryuc_ptr, int querylength,
/* Anchor segment */
best_score = 0;
best_endi = -1;
- for (i = n - 1; i > firstj; i--) {
+ for (i = firstj + 1; i < n; i++) {
if (sorted[i]->highpos <= anchor_segment->highpos) {
/* Skip, since doesn't add nucleotides to right */
} else if (sorted[i]->lowpos > anchor_segment->highpos) {
@@ -15828,9 +17181,9 @@ align_end (int *cutoff_level, T this,
bool keep_floors_p, int genestrand, bool first_read_p) {
List_T hits, greedy = NULL, subs = NULL, terminals = NULL, indels = NULL,
singlesplicing = NULL, doublesplicing = NULL, shortendsplicing = NULL,
- longsinglesplicing = NULL, distantsplicing = NULL, gmap_hits = NULL;
- List_T plus_anchor_segments = NULL, minus_anchor_segments = NULL;
- List_T p;
+ longsinglesplicing = NULL, distantsplicing = NULL, gmap_hits = NULL, q;
+ Segment_T *plus_anchor_segments = NULL, *minus_anchor_segments = NULL;
+ int n_plus_anchors = 0, n_minus_anchors = 0;
Stage3end_T hit, gmap1, gmap2;
int found_score, done_level, opt_level, fast_level, mismatch_level, nmismatches;
int max_splice_mismatches, i;
@@ -15931,7 +17284,6 @@ align_end (int *cutoff_level, T this,
/* Search 1: Suffix array */
- completesetp = true;
#ifdef LARGE_GENOMES
spanningsetp = true;
#else
@@ -15950,6 +17302,13 @@ align_end (int *cutoff_level, T this,
}
debug(printf("SA> found_score %d, opt_level %d, done_level %d\n",found_score,opt_level,done_level));
+ for (q = greedy; q != NULL; q = List_next(q)) {
+ hit = (Stage3end_T) List_head(q);
+ if (Stage3end_total_trim(hit) > 15) {
+ spanningsetp = true;
+ }
+ }
+ debug(printf("spanningsetp %d\n",spanningsetp));
}
#endif
@@ -16018,10 +17377,22 @@ align_end (int *cutoff_level, T this,
/* Search 3: Subs/indels via complete set */
/* 4, 5. Complete set mismatches and indels, omitting frequent oligos */
+ completesetp = false;
+ for (q = subs; q != NULL; q = List_next(q)) {
+ hit = (Stage3end_T) List_head(q);
+ debug(printf("Hit has total score of %d\n",Stage3end_score(hit)));
+ if (Stage3end_score(hit) > done_level) {
+ completesetp = true;
+ }
+ }
+ debug(printf("completesetp %d\n",completesetp));
+
+#if 0
if (found_score <= done_level) {
debug(printf("Test for completeset: false because found_score %d >done_level %d\n",found_score,done_level));
completesetp = false;
}
+#endif
if (querylength < min_kmer_readlength) {
completesetp = false;
@@ -16032,12 +17403,13 @@ align_end (int *cutoff_level, T this,
read_oligos(&allvalidp,this,queryuc_ptr,querylength,query_lastpos,/*genestrand*/0);
}
- floors = compute_floors(&any_omitted_p,&alloc_floors_p,floors_array,this,querylength,query_lastpos,
+ floors = compute_floors(&any_omitted_p,&alloc_floors_p,floors_array,this,queryuc_ptr,querylength,query_lastpos,
plus_indexdb,minus_indexdb,indexdb_size_threshold,max_end_insertions,
/*omit_frequent_p*/true,/*omit_repetitive_p*/true,keep_floors_p);
floors_computed_p = true;
complete_set_mm_indels(&found_score,&segments_computed_p,
&plus_anchor_segments,&minus_anchor_segments,
+ &n_plus_anchors,&n_minus_anchors,
&opt_level,&done_level,user_maxlevel,/*revise_levels_p*/true,
&nhits,&subs,&indels,this,query_compress_fwd,query_compress_rev,
#if defined(DEBUG2) || defined(DEBUG2E)
@@ -16068,31 +17440,31 @@ align_end (int *cutoff_level, T this,
debug(printf("*** Stage 6. Single splicing masking frequent oligos with done_level %d ***\n",done_level));
/* Always mask frequent oligos for splicing, which must be transcriptional */
if (floors_computed_p == false) {
- floors = compute_floors(&any_omitted_p,&alloc_floors_p,floors_array,this,querylength,query_lastpos,
+ floors = compute_floors(&any_omitted_p,&alloc_floors_p,floors_array,this,queryuc_ptr,querylength,query_lastpos,
plus_indexdb,minus_indexdb,indexdb_size_threshold,max_end_insertions,
/*omit_frequent_p*/true,/*omit_repetitive_p*/true,keep_floors_p);
floors_computed_p = true;
}
if (segments_computed_p == false) {
- this->plus_segments = identify_all_segments(&this->plus_nsegments,&plus_anchor_segments,
+ this->plus_segments = identify_all_segments(&this->plus_nsegments,&plus_anchor_segments,&n_plus_anchors,
&this->plus_spliceable,&this->plus_nspliceable,
#ifdef LARGE_GENOMES
this->plus_positions_high,this->plus_positions_low,
#else
this->plus_positions,
#endif
- this->plus_npositions,this->omitted,querylength,query_lastpos,floors,
- /*plusp*/true);
- this->minus_segments = identify_all_segments(&this->minus_nsegments,&minus_anchor_segments,
+ this->plus_npositions,this->omitted,querylength,query_lastpos,
+ floors,/*plusp*/true);
+ this->minus_segments = identify_all_segments(&this->minus_nsegments,&minus_anchor_segments,&n_minus_anchors,
&this->minus_spliceable,&this->minus_nspliceable,
#ifdef LARGE_GENOMES
this->minus_positions_high,this->minus_positions_low,
#else
this->minus_positions,
#endif
- this->minus_npositions,this->omitted,querylength,query_lastpos,floors,
- /*plusp*/false);
+ this->minus_npositions,this->omitted,querylength,query_lastpos,
+ floors,/*plusp*/false);
segments_computed_p = true;
}
@@ -16128,7 +17500,7 @@ align_end (int *cutoff_level, T this,
if (done_level >= localsplicing_penalty) {
debug(printf("*** Stage 7. Double splicing masking frequent oligos with done_level %d ***\n",done_level));
if (floors_computed_p == false) {
- floors = compute_floors(&any_omitted_p,&alloc_floors_p,floors_array,this,querylength,query_lastpos,
+ floors = compute_floors(&any_omitted_p,&alloc_floors_p,floors_array,this,queryuc_ptr,querylength,query_lastpos,
plus_indexdb,minus_indexdb,indexdb_size_threshold,max_end_insertions,
/*omit_frequent_p*/true,/*omit_repetitive_p*/true,keep_floors_p);
floors_computed_p = true;
@@ -16174,15 +17546,15 @@ align_end (int *cutoff_level, T this,
antiacceptors_minus = (List_T *) CALLOCA(max_splice_mismatches+1,sizeof(List_T));
if (floors_computed_p == false) {
- floors = compute_floors(&any_omitted_p,&alloc_floors_p,floors_array,this,querylength,query_lastpos,
+ floors = compute_floors(&any_omitted_p,&alloc_floors_p,floors_array,this,queryuc_ptr,querylength,query_lastpos,
plus_indexdb,minus_indexdb,indexdb_size_threshold,max_end_insertions,
/*omit_frequent_p*/true,/*omit_repetitive_p*/true,keep_floors_p);
floors_computed_p = true;
}
- debug(printf("Starting find_spliceends (plus) with %d anchor segments\n",List_length(plus_anchor_segments)));
+ debug(printf("Starting find_spliceends (plus) with %d anchor segments\n",n_plus_anchors));
find_spliceends_shortend(&donors_plus,&antidonors_plus,&acceptors_plus,&antiacceptors_plus,
- plus_anchor_segments,
+ plus_anchor_segments,n_plus_anchors,
#ifdef DEBUG4E
/*queryptr*/queryuc_ptr,
#endif
@@ -16190,9 +17562,9 @@ align_end (int *cutoff_level, T this,
max_splice_mismatches,/*plusp*/true,genestrand);
debug(printf("Finished find_spliceends (plus)\n"));
- debug(printf("Starting find_spliceends (minus) with %d anchor segments\n",List_length(minus_anchor_segments)));
+ debug(printf("Starting find_spliceends (minus) with %d anchor segments\n",n_minus_anchors));
find_spliceends_shortend(&antidonors_minus,&donors_minus,&antiacceptors_minus,&acceptors_minus,
- minus_anchor_segments,
+ minus_anchor_segments,n_minus_anchors,
#ifdef DEBUG4E
/*queryptr*/queryrc,
#endif
@@ -16244,7 +17616,6 @@ align_end (int *cutoff_level, T this,
debug(printf(" singlesplicing %d\n",List_length(singlesplicing)));
debug(printf(" doublesplicing %d\n",List_length(doublesplicing)));
debug(printf(" shortendsplicing: %d\n",List_length(shortendsplicing)));
- debug(printf(" longsinglesplicing %d\n",List_length(longsinglesplicing)));
debug(printf(" done_level: %d\n",done_level));
hits = List_append(greedy,
@@ -16256,8 +17627,8 @@ align_end (int *cutoff_level, T this,
if (knownsplicingp || novelsplicingp || find_dna_chimeras_p) {
/* Search 7: Distant splicing */
min_trim = querylength;
- for (p = hits; p != NULL; p = List_next(p)) {
- hit = (Stage3end_T) List_head(p);
+ for (q = hits; q != NULL; q = q->rest) {
+ hit = (Stage3end_T) q->first;
if ((trim = Stage3end_total_trim(hit)) < min_trim) {
min_trim = trim;
}
@@ -16281,14 +17652,14 @@ align_end (int *cutoff_level, T this,
endfrags_minus = (List_T *) CALLOCA(max_splice_mismatches+1,sizeof(List_T));
if (floors_computed_p == false) {
- floors = compute_floors(&any_omitted_p,&alloc_floors_p,floors_array,this,querylength,query_lastpos,
+ floors = compute_floors(&any_omitted_p,&alloc_floors_p,floors_array,this,queryuc_ptr,querylength,query_lastpos,
plus_indexdb,minus_indexdb,indexdb_size_threshold,max_end_insertions,
/*omit_frequent_p*/true,/*omit_repetitive_p*/true,keep_floors_p);
floors_computed_p = true;
}
debug(printf("Starting find_spliceends_distant_dna_plus\n"));
- find_spliceends_distant_dna_plus(&startfrags_plus,&endfrags_plus,plus_anchor_segments,
+ find_spliceends_distant_dna_plus(&startfrags_plus,&endfrags_plus,plus_anchor_segments,n_plus_anchors,
#ifdef DEBUG4E
/*queryptr*/queryuc_ptr,
#endif
@@ -16297,7 +17668,7 @@ align_end (int *cutoff_level, T this,
debug(printf("Finished find_spliceends_distant_dna_plus\n"));
debug(printf("Starting find_spliceends_distant_dna_minus\n"));
- find_spliceends_distant_dna_minus(&startfrags_minus,&endfrags_minus,minus_anchor_segments,
+ find_spliceends_distant_dna_minus(&startfrags_minus,&endfrags_minus,minus_anchor_segments,n_minus_anchors,
#ifdef DEBUG4E
/*queryptr*/queryrc,
#endif
@@ -16393,7 +17764,7 @@ align_end (int *cutoff_level, T this,
antiacceptors_minus = (List_T *) CALLOCA(max_splice_mismatches+1,sizeof(List_T));
if (floors_computed_p == false) {
- floors = compute_floors(&any_omitted_p,&alloc_floors_p,floors_array,this,querylength,query_lastpos,
+ floors = compute_floors(&any_omitted_p,&alloc_floors_p,floors_array,this,queryuc_ptr,querylength,query_lastpos,
plus_indexdb,minus_indexdb,indexdb_size_threshold,max_end_insertions,
/*omit_frequent_p*/true,/*omit_repetitive_p*/true,keep_floors_p);
floors_computed_p = true;
@@ -16401,7 +17772,7 @@ align_end (int *cutoff_level, T this,
debug(printf("Starting find_spliceends_distant_rna (plus)\n"));
find_spliceends_distant_rna(&donors_plus,&antidonors_plus,&acceptors_plus,&antiacceptors_plus,
- plus_anchor_segments,
+ plus_anchor_segments,n_plus_anchors,
#ifdef DEBUG4E
/*queryptr*/queryuc_ptr,
#endif
@@ -16412,7 +17783,7 @@ align_end (int *cutoff_level, T this,
debug(printf("Starting find_spliceends_distant_rna (minus)\n"));
find_spliceends_distant_rna(&antidonors_minus,&donors_minus,&antiacceptors_minus,&acceptors_minus,
- minus_anchor_segments,
+ minus_anchor_segments,n_minus_anchors,
#ifdef DEBUG4E
/*queryptr*/queryrc,
#endif
@@ -16513,17 +17884,14 @@ align_end (int *cutoff_level, T this,
FREEA(antiacceptors_minus);
}
- debug(printf("%d single splices, %d double splices, %d short-end splices, %d long single splices, %d distant splices\n",
+ debug(printf("%d single splices, %d double splices, %d long single splices, %d distant splices\n",
List_length(singlesplicing),List_length(doublesplicing),
- List_length(shortendsplicing),List_length(longsinglesplicing),
- List_length(distantsplicing)));
+ List_length(longsinglesplicing),List_length(distantsplicing)));
}
hits = List_append(hits,
- List_append(longsinglesplicing,
- List_append(shortendsplicing,distantsplicing)));
-
+ List_append(longsinglesplicing,distantsplicing));
/* Search 8: Terminals */
@@ -16532,7 +17900,7 @@ align_end (int *cutoff_level, T this,
shortendsplicing || longsinglesplicing || distantsplicing) */
if (0 && found_score > opt_level) {
terminals = find_terminals(plus_anchor_segments,minus_anchor_segments,
- querylength,query_lastpos,
+ n_plus_anchors,n_minus_anchors,querylength,query_lastpos,
query_compress_fwd,query_compress_rev,
/*max_mismatches_allowed*/done_level,genestrand);
@@ -16543,7 +17911,7 @@ align_end (int *cutoff_level, T this,
if (gmap_segments_p == false) {
gmapp = false;
} else if (found_score < trigger_score_for_gmap) {
- debug(printf("Test for stage 9: true because found_score %d >= trigger_score_for_gmap %d\n",found_score,trigger_score_for_gmap));
+ debug(printf("Test for stage 9: false because found_score %d < trigger_score_for_gmap %d\n",found_score,trigger_score_for_gmap));
gmapp = false;
} else if (min_trim < min_distantsplicing_end_matches) {
gmapp = false;
@@ -16558,7 +17926,7 @@ align_end (int *cutoff_level, T this,
#ifdef END_KNOWNSPLICING_SHORTCUT
queryrc,Shortread_invertedp(queryseq),
#endif
- plus_anchor_segments,this->plus_segments,this->plus_nsegments,
+ plus_anchor_segments,n_plus_anchors,this->plus_segments,this->plus_nsegments,
oligoindices_minor,
pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,
genestrand,/*require_pairing_p*/false);
@@ -16566,7 +17934,7 @@ align_end (int *cutoff_level, T this,
#ifdef END_KNOWNSPLICING_SHORTCUT
queryrc,Shortread_invertedp(queryseq),
#endif
- minus_anchor_segments,this->minus_segments,this->minus_nsegments,
+ minus_anchor_segments,n_minus_anchors,this->minus_segments,this->minus_nsegments,
oligoindices_minor,
pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,
genestrand,/*require_pairing_p*/false);
@@ -16593,8 +17961,8 @@ align_end (int *cutoff_level, T this,
debug13(printf("%d hits\n",List_length(hits)));
debug13(printf("For each hit, running GMAP on single end to match with hit\n"));
- for (p = hits; p != NULL && i < max_gmap_improvement; p = List_next(p)) {
- hit = (Stage3end_T) List_head(p);
+ for (q = hits; q != NULL && i < max_gmap_improvement; q = q->rest) {
+ hit = (Stage3end_T) q->first;
align_single_hit_with_gmap(&gmap1,&gmap2,hit,queryuc_ptr,querylength,
#ifdef END_KNOWNSPLICING_SHORTCUT
queryrc,Shortread_invertedp(queryseq),
@@ -16662,8 +18030,8 @@ align_end (int *cutoff_level, T this,
hits = Stage3end_remove_circular_alias(hits);
hits = Stage3end_remove_duplicates(hits); /* Aliases can cause duplicates */
- List_free(&plus_anchor_segments);
- List_free(&minus_anchor_segments);
+ FREE(plus_anchor_segments);
+ FREE(minus_anchor_segments);
return hits;
}
@@ -16677,7 +18045,8 @@ single_read (int *npaths_primary, int *npaths_altloc, int *first_absmq, int *sec
int indel_penalty_middle, int indel_penalty_end,
bool allow_end_indels_p, int max_end_insertions, int max_end_deletions, int min_indel_end_matches,
int localsplicing_penalty, int distantsplicing_penalty, int min_shortend,
- Oligoindex_array_T oligoindices_minor, Pairpool_T pairpool, Diagpool_T diagpool, Cellpool_T cellpool,
+ Oligoindex_array_T oligoindices_minor,
+ Pairpool_T pairpool, Diagpool_T diagpool, Cellpool_T cellpool,
Dynprog_T dynprogL, Dynprog_T dynprogM, Dynprog_T dynprogR,
bool keep_floors_p) {
Stage3end_T *stage3array;
@@ -17854,8 +19223,8 @@ align_pair_with_gmap (Pairtype_T *final_pairtype, List_T result,
i,Stage3end_hittype_string(hit5),Stage3end_hittype_string(hit3)));
/* Was querylength5 - Stage3end_matches(hit5) > 5 */
- debug13(printf("Looking at hit5 with nmismatches %d - %d ?<= cutoff_level %d\n",
- querylength5,Stage3end_nmatches_posttrim(hit5),cutoff_level_5));
+ debug13(printf("Looking at hit5 %p with nmismatches %d - %d ?<= cutoff_level %d\n",
+ hit5,querylength5,Stage3end_nmatches_posttrim(hit5),cutoff_level_5));
#if 0
if (Stage3end_sarrayp(hit5) == true && redo_for_sense_p == false) {
@@ -17874,7 +19243,11 @@ align_pair_with_gmap (Pairtype_T *final_pairtype, List_T result,
} /* else */
#endif
- if (querylength5 - Stage3end_nmatches_posttrim(hit5) <= cutoff_level_5) {
+ if (Stage3end_hittype(hit5) == GMAP) {
+ /* Skip */
+ debug13(printf("Skipping hit5 already of type GMAP\n"));
+
+ } else if (querylength5 - Stage3end_nmatches_posttrim(hit5) <= cutoff_level_5) {
/* Skip, because already good enough */
debug13(printf("Skipping hit5 with nmismatches %d - %d <= cutoff_level %d\n",
querylength5,Stage3end_nmatches_posttrim(hit5),cutoff_level_5));
@@ -17887,7 +19260,7 @@ align_pair_with_gmap (Pairtype_T *final_pairtype, List_T result,
#endif
oligoindices_minor,
pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,
- genestrand,/*extend_ends_p*/true);
+ genestrand,/*extend_ends_p*/true); /* if false, then causes major slowdown */
if (gmap5_1 != NULL) {
debug13(missing_hit = querylength5 - Stage3end_nmatches_posttrim(hit5));
debug13(missing_gmap = querylength5 - Stage3end_nmatches_posttrim(gmap5_1));
@@ -17901,10 +19274,14 @@ align_pair_with_gmap (Pairtype_T *final_pairtype, List_T result,
}
}
- debug13(printf("Looking at hit3 with nmismatches %d - %d ?<= cutoff_level %d\n",
- querylength3,Stage3end_nmatches_posttrim(hit3),cutoff_level_3));
+ debug13(printf("Looking at hit3 %p with nmismatches %d - %d ?<= cutoff_level %d\n",
+ hit3,querylength3,Stage3end_nmatches_posttrim(hit3),cutoff_level_3));
- if (querylength3 - Stage3end_nmatches_posttrim(hit3) <= cutoff_level_3) {
+ if (Stage3end_hittype(hit3) == GMAP) {
+ /* Skip */
+ debug13(printf("Skipping hit3 already of type GMAP\n"));
+
+ } else if (querylength3 - Stage3end_nmatches_posttrim(hit3) <= cutoff_level_3) {
/* Skip, because already good enough */
debug13(printf("Skipping hit3 with nmismatches %d - %d <= cutoff_level %d\n",
querylength3,Stage3end_nmatches_posttrim(hit3),cutoff_level_3));
@@ -17917,7 +19294,7 @@ align_pair_with_gmap (Pairtype_T *final_pairtype, List_T result,
#endif
oligoindices_minor,
pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,
- genestrand,/*extend_ends_p*/true);
+ genestrand,/*extend_ends_p*/true); /* if false, then causes major slowdown */
if (gmap3_1 != NULL) {
debug13(missing_hit = querylength3 - Stage3end_nmatches_posttrim(hit3));
debug13(missing_gmap = querylength3 - Stage3end_nmatches_posttrim(gmap3_1));
@@ -17946,20 +19323,19 @@ align_pair_with_gmap (Pairtype_T *final_pairtype, List_T result,
/* Stage3end_free(&gmap5); -- done by Stage3pair_new */
debug13(printf(" => NULL, so eliminating\n"));
+ } else if (Stage3pair_determine_pairtype(newpair) != CONCORDANT) {
+ debug13(printf(" => not concordant, so eliminating\n"));
+
} else if (replacedp == false) {
/* Convert to gmap-gmap */
debug13(printf(" => replacement\n"));
- if (Stage3pair_pairtype(newpair) == CONCORDANT) {
- *final_pairtype = CONCORDANT;
- }
+ *final_pairtype = CONCORDANT;
List_head_set(p,(void *) newpair);
replacedp = true;
} else {
debug13(printf(" => addition\n"));
- if (Stage3pair_pairtype(newpair) == CONCORDANT) {
- *final_pairtype = CONCORDANT;
- }
+ *final_pairtype = CONCORDANT;
rest = List_push(List_next(p),(void *) newpair);
List_tail_set(p,rest);
p = rest;
@@ -17997,20 +19373,19 @@ align_pair_with_gmap (Pairtype_T *final_pairtype, List_T result,
/* Stage3end_free(&gmap5); -- done by Stage3pair_new */
debug13(printf(" => NULL\n"));
+ } else if (Stage3pair_determine_pairtype(newpair) != CONCORDANT) {
+ debug13(printf(" => not concordant, so eliminating\n"));
+
} else if (replacedp == false) {
/* Convert to gmap-xx */
debug13(printf(" => replacement\n"));
- if (Stage3pair_pairtype(newpair) == CONCORDANT) {
- *final_pairtype = CONCORDANT;
- }
+ *final_pairtype = CONCORDANT;
List_head_set(p,(void *) newpair);
replacedp = true;
} else {
debug13(printf(" => addition\n"));
- if (Stage3pair_pairtype(newpair) == CONCORDANT) {
- *final_pairtype = CONCORDANT;
- }
+ *final_pairtype = CONCORDANT;
rest = List_push(List_next(p),(void *) newpair);
List_tail_set(p,rest);
p = rest;
@@ -18035,20 +19410,19 @@ align_pair_with_gmap (Pairtype_T *final_pairtype, List_T result,
/* Stage3end_free(&gmap3); -- done by Stage3pair_new */
debug13(printf(" => NULL\n"));
+ } else if (Stage3pair_determine_pairtype(newpair) != CONCORDANT) {
+ debug13(printf(" => not concordant, so eliminating\n"));
+
} else if (replacedp == false) {
/* Convert to xx-gmap */
debug13(printf(" => replacement\n"));
- if (Stage3pair_pairtype(newpair) == CONCORDANT) {
- *final_pairtype = CONCORDANT;
- }
+ *final_pairtype = CONCORDANT;
List_head_set(p,(void *) newpair);
replacedp = true;
} else {
debug13(printf(" => addition\n"));
- if (Stage3pair_pairtype(newpair) == CONCORDANT) {
- *final_pairtype = CONCORDANT;
- }
+ *final_pairtype = CONCORDANT;
rest = List_push(List_next(p),(void *) newpair);
List_tail_set(p,rest);
p = rest;
@@ -18064,7 +19438,7 @@ align_pair_with_gmap (Pairtype_T *final_pairtype, List_T result,
}
}
- debug13(printf("End of align_pair_with_gmap\n"));
+ debug13(printf("End of align_pair_with_gmap with %d results\n",List_length(result)));
return result;
}
@@ -18188,7 +19562,9 @@ align_pair (bool *abort_pairing_p, int *found_score, int *cutoff_level_5, int *c
List_T halfmapping5, halfmapping3, a;
Stage3end_T hit5, hit3, gmap5, gmap3;
List_T hitarray5[HITARRAY_N], hitarray3[HITARRAY_N];
- List_T plus_anchor_segments_5 = NULL, minus_anchor_segments_5 = NULL, plus_anchor_segments_3 = NULL, minus_anchor_segments_3 = NULL;
+ Segment_T *plus_anchor_segments_5 = NULL, *minus_anchor_segments_5 = NULL,
+ *plus_anchor_segments_3 = NULL, *minus_anchor_segments_3 = NULL;
+ int n_plus_anchors_5 = 0, n_minus_anchors_5 = 0, n_plus_anchors_3 = 0, n_minus_anchors_3 = 0;
List_T greedy5 = NULL, subs5 = NULL, terminals5 = NULL,
indels5 = NULL, singlesplicing5 = NULL, doublesplicing5 = NULL,
distantsplicing5 = NULL, gmap5_hits = NULL;
@@ -18354,8 +19730,8 @@ align_pair (bool *abort_pairing_p, int *found_score, int *cutoff_level_5, int *c
hitarray3,/*narray3*/HITARRAY_GREEDY+1,
*cutoff_level_5,*cutoff_level_3,
querylength5,querylength3,maxpairedpaths,genestrand);
- debug(printf("After pairing sarray, found %d concordant, %d samechr, found_score %d\n",
- nconcordant,nsamechr,*found_score));
+ debug(printf("After pairing sarray, found %d concordant, %d samechr, %d terminals, found_score %d\n",
+ nconcordant,nsamechr,List_length(*terminals),*found_score));
debug(printf("SA> found_score = %d, done_level %d,%d\n",*found_score,done_level_5,done_level_3));
return Stage3pair_remove_circular_alias(hitpairs);
@@ -18364,7 +19740,6 @@ align_pair (bool *abort_pairing_p, int *found_score, int *cutoff_level_5, int *c
#endif
/* Search 1: Suffix array */
- completeset5p = completeset3p = true;
#ifdef LARGE_GENOMES
spanningset5p = spanningset3p = true;
#else
@@ -18395,8 +19770,8 @@ align_pair (bool *abort_pairing_p, int *found_score, int *cutoff_level_5, int *c
*cutoff_level_5,*cutoff_level_3,
querylength5,querylength3,maxpairedpaths,genestrand);
- debug(printf("After pairing sarray, found %d concordant, %d samechr, found_score %d\n",
- nconcordant,nsamechr,*found_score));
+ debug(printf("After pairing sarray, found %d concordant, %d samechr, %d terminals, found_score %d\n",
+ nconcordant,nsamechr,List_length(*terminals),*found_score));
if (*abort_pairing_p == true) {
*hits5 = greedy5;
*hits3 = greedy3;
@@ -18421,13 +19796,33 @@ align_pair (bool *abort_pairing_p, int *found_score, int *cutoff_level_5, int *c
debug(printf("nconcordant %d\n",nconcordant));
if (nconcordant == 0) {
- /* Need to have this to compensate for greediness of suffix array algorithm */
- debug(printf("nconcordant is 0, so we are doing spanningset\n"));
- spanningset5p = spanningset3p = true;
+ if (*terminals == NULL) {
+ /* Need to have this to compensate for greediness of suffix array algorithm */
+ debug(printf("nconcordant is 0, so we are doing spanningset on both sides\n"));
+ spanningset5p = spanningset3p = true;
+ } else {
+ /* Have concordant terminals, so may need to refine just one end */
+ for (p = *terminals; p != NULL; p = List_next(p)) {
+ newpair = (Stage3pair_T) List_head(p);
+ debug(printf("Terminal has total trims of %d and %d\n",
+ Stage3end_total_trim(Stage3pair_hit5(newpair)),
+ Stage3end_total_trim(Stage3pair_hit3(newpair))));
+ if (Stage3end_total_trim(Stage3pair_hit5(newpair)) > 15) {
+ spanningset5p = true;
+ }
+ if (Stage3end_total_trim(Stage3pair_hit3(newpair)) > 15) {
+ spanningset3p = true;
+ }
+ debug(printf("spanningset5p %d, spanningset3p %d\n",spanningset5p,spanningset3p));
+ }
+ }
+#if 0
} else if (*found_score >= done_level_5 + done_level_3) {
+ /* Can eliminate this, because nconcordant no longer includes terminals, which are our main concern */
debug(printf("found_score %d >= done_level_5 %d + done_level_3 %d,, so we are doing spanningset\n",
*found_score,done_level_5,done_level_3));
spanningset5p = spanningset3p = true;
+#endif
}
}
#endif
@@ -18495,8 +19890,8 @@ align_pair (bool *abort_pairing_p, int *found_score, int *cutoff_level_5, int *c
hitarray3,/*narray3*/HITARRAY_SUBS+1,
*cutoff_level_5,*cutoff_level_3,
querylength5,querylength3,maxpairedpaths,genestrand);
- debug(printf("After pairing exact, found %d concordant, %d samechr, found_score %d\n",
- nconcordant,nsamechr,*found_score));
+ debug(printf("After pairing exact, found %d concordant, %d samechr, %d terminals, found_score %d\n",
+ nconcordant,nsamechr,List_length(*terminals),*found_score));
if (*abort_pairing_p == true) {
*hits5 = List_append(greedy5,subs5);
*hits3 = List_append(greedy3,subs3);
@@ -18544,8 +19939,8 @@ align_pair (bool *abort_pairing_p, int *found_score, int *cutoff_level_5, int *c
hitarray3,/*narray3*/HITARRAY_SUBS+1,
*cutoff_level_5,*cutoff_level_3,
querylength5,querylength3,maxpairedpaths,genestrand);
- debug(printf("After pairing one mismatch, found %d concordant, %d samechr, found_score %d\n",
- nconcordant,nsamechr,*found_score));
+ debug(printf("After pairing one mismatch, found %d concordant, %d samechr, %d terminals, found_score %d\n",
+ nconcordant,nsamechr,List_length(*terminals),*found_score));
if (*abort_pairing_p == true) {
*hits5 = subs5;
*hits3 = subs3;
@@ -18607,8 +20002,8 @@ align_pair (bool *abort_pairing_p, int *found_score, int *cutoff_level_5, int *c
hitarray3,/*narray3*/HITARRAY_SUBS+1,
*cutoff_level_5,*cutoff_level_3,
querylength5,querylength3,maxpairedpaths,genestrand);
- debug(printf("After pairing spanning set, found %d concordant, %d samechr, found_score %d\n",
- nconcordant,nsamechr,*found_score));
+ debug(printf("After pairing spanning set, found %d concordant, %d samechr, %d terminals, found_score %d\n",
+ nconcordant,nsamechr,List_length(*terminals),*found_score));
if (*abort_pairing_p == true) {
*hits5 = List_append(greedy5,subs5);
*hits3 = List_append(greedy3,subs3);
@@ -18634,10 +20029,38 @@ align_pair (bool *abort_pairing_p, int *found_score, int *cutoff_level_5, int *c
/* Search 3: Subs/indels via complete set algorithm */
/* 4/5A. Complete set mismatches and indels, omitting frequent oligos */
- if (*found_score <= done_level_5 + done_level_3) {
+ completeset5p = completeset3p = false;
+ if (nconcordant == 0) {
+ if (*terminals == NULL) {
+ debug(printf("nconcordant is 0, so we are doing completeset on both sides\n"));
+ completeset5p = completeset3p = true;
+ } else {
+ /* Have concordant terminals, so may need to refine just one
+ end. A single bad alignment is enough to refine that end;
+ otherwise, the algorithm misses alignments. */
+ for (p = *terminals; p != NULL; p = List_next(p)) {
+ newpair = (Stage3pair_T) List_head(p);
+ debug(printf("Terminal has total scores of %d and %d\n",
+ Stage3end_score(Stage3pair_hit5(newpair)),
+ Stage3end_score(Stage3pair_hit3(newpair))));
+ if (Stage3end_score(Stage3pair_hit5(newpair)) > done_level_5) {
+ completeset5p = true;
+ }
+ if (Stage3end_score(Stage3pair_hit3(newpair)) > done_level_3) {
+ completeset3p = true;
+ }
+ debug(printf("completeset5p %d, completeset3p %d\n",completeset5p,completeset3p));
+ }
+ }
+
+#if 0
+ } else if (*found_score <= done_level_5 + done_level_3) {
debug(printf("Test for completeset: false because *found_score %d < done_level_5 %d + done_level_3 %d\n",
*found_score,done_level_5,done_level_3));
completeset5p = completeset3p = false;
+#endif
+
+#if 0
} else {
if (better_free_end_exists_p(greedy5,subs5,terminals5,indels5,singlesplicing5,doublesplicing5,querylength5) == true) {
completeset3p = true; /* Do search on other end using complete set algorithm */
@@ -18646,6 +20069,7 @@ align_pair (bool *abort_pairing_p, int *found_score, int *cutoff_level_5, int *c
completeset5p = true; /* Do search on other end using complete set algorithm */
}
debug(printf("Test for completeset using better_free_end_exists_p: completeset5p %d, completeset3p %d\n",completeset5p,completeset3p));
+#endif
}
if (querylength5 < min_kmer_readlength) {
@@ -18661,7 +20085,7 @@ align_pair (bool *abort_pairing_p, int *found_score, int *cutoff_level_5, int *c
read_oligos(&allvalidp5,this5,queryuc_ptr_5,querylength5,query5_lastpos,genestrand);
}
- floors5 = compute_floors(&any_omitted_p_5,&alloc_floors_p_5,floors_array,this5,
+ floors5 = compute_floors(&any_omitted_p_5,&alloc_floors_p_5,floors_array,this5,queryuc_ptr_5,
querylength5,query5_lastpos,plus_indexdb_5,minus_indexdb_5,
indexdb_size_threshold,max_end_insertions,/*omit_frequent_p*/true,/*omit_repetitive_p*/true,
keep_floors_p);
@@ -18669,6 +20093,7 @@ align_pair (bool *abort_pairing_p, int *found_score, int *cutoff_level_5, int *c
ignore_found_score = *found_score;
complete_set_mm_indels(&ignore_found_score,&segments5_computed_p,
&plus_anchor_segments_5,&minus_anchor_segments_5,
+ &n_plus_anchors_5,&n_minus_anchors_5,
&opt_level,&done_level_5,user_maxlevel_5,/*revise_levels_p*/false,
&nhits5,&subs5,&indels5,this5,query5_compress_fwd,query5_compress_rev,
#if defined(DEBUG2) || defined(DEBUG2E)
@@ -18687,7 +20112,7 @@ align_pair (bool *abort_pairing_p, int *found_score, int *cutoff_level_5, int *c
read_oligos(&allvalidp3,this3,queryuc_ptr_3,querylength3,query3_lastpos,genestrand);
}
- floors3 = compute_floors(&any_omitted_p_3,&alloc_floors_p_3,floors_array,this3,
+ floors3 = compute_floors(&any_omitted_p_3,&alloc_floors_p_3,floors_array,this3,queryuc_ptr_3,
querylength3,query3_lastpos,plus_indexdb_3,minus_indexdb_3,
indexdb_size_threshold,max_end_insertions,/*omit_frequent_p*/true,/*omit_repetitive_p*/true,
keep_floors_p);
@@ -18695,6 +20120,7 @@ align_pair (bool *abort_pairing_p, int *found_score, int *cutoff_level_5, int *c
ignore_found_score = *found_score;
complete_set_mm_indels(&ignore_found_score,&segments3_computed_p,
&plus_anchor_segments_3,&minus_anchor_segments_3,
+ &n_plus_anchors_3,&n_minus_anchors_3,
&opt_level,&done_level_3,user_maxlevel_3,/*revise_levels_p*/false,
&nhits3,&subs3,&indels3,this3,query3_compress_fwd,query3_compress_rev,
#if defined(DEBUG2) || defined(DEBUG2E)
@@ -18723,8 +20149,8 @@ align_pair (bool *abort_pairing_p, int *found_score, int *cutoff_level_5, int *c
hitarray3,/*narray3*/HITARRAY_INDELS+1,
*cutoff_level_5,*cutoff_level_3,
querylength5,querylength3,maxpairedpaths,genestrand);
- debug(printf("After pairing complete set mismatches and indels, found %d concordant, %d nsamechr, found_score %d\n",
- nconcordant,nsamechr,*found_score));
+ debug(printf("After pairing complete set mismatches and indels, found %d concordant, %d nsamechr, %d terminals, found_score %d\n",
+ nconcordant,nsamechr,List_length(*terminals),*found_score));
if (*abort_pairing_p == true) {
*hits5 = List_append(greedy5,List_append(subs5,indels5));
*hits3 = List_append(greedy3,List_append(subs3,indels3));
@@ -18770,7 +20196,7 @@ align_pair (bool *abort_pairing_p, int *found_score, int *cutoff_level_5, int *c
debug(printf("*** Stage 6A. Single splicing masking frequent oligos with done_level %d ***\n",done_level_5));
/* Always mask frequent oligos for splicing, which must be transcriptional */
if (floors5_computed_p == false) {
- floors5 = compute_floors(&any_omitted_p_5,&alloc_floors_p_5,floors_array,this5,
+ floors5 = compute_floors(&any_omitted_p_5,&alloc_floors_p_5,floors_array,this5,queryuc_ptr_5,
querylength5,query5_lastpos,plus_indexdb_5,minus_indexdb_5,
indexdb_size_threshold,max_end_insertions,/*omit_frequent_p*/true,/*omit_repetitive_p*/true,
keep_floors_p);
@@ -18778,24 +20204,24 @@ align_pair (bool *abort_pairing_p, int *found_score, int *cutoff_level_5, int *c
}
if (segments5_computed_p == false) {
- this5->plus_segments = identify_all_segments(&this5->plus_nsegments,&plus_anchor_segments_5,
+ this5->plus_segments = identify_all_segments(&this5->plus_nsegments,&plus_anchor_segments_5,&n_plus_anchors_5,
&this5->plus_spliceable,&this5->plus_nspliceable,
#ifdef LARGE_GENOMES
this5->plus_positions_high,this5->plus_positions_low,
#else
this5->plus_positions,
#endif
- this5->plus_npositions,this5->omitted,querylength5,query5_lastpos,floors5,
- /*plusp*/true);
- this5->minus_segments = identify_all_segments(&this5->minus_nsegments,&minus_anchor_segments_5,
+ this5->plus_npositions,this5->omitted,querylength5,query5_lastpos,
+ floors5,/*plusp*/true);
+ this5->minus_segments = identify_all_segments(&this5->minus_nsegments,&minus_anchor_segments_5,&n_minus_anchors_5,
&this5->minus_spliceable,&this5->minus_nspliceable,
#ifdef LARGE_GENOMES
this5->minus_positions_high,this5->minus_positions_low,
#else
this5->minus_positions,
#endif
- this5->minus_npositions,this5->omitted,querylength5,query5_lastpos,floors5,
- /*plusp*/false);
+ this5->minus_npositions,this5->omitted,querylength5,query5_lastpos,
+ floors5,/*plusp*/false);
segments5_computed_p = true;
}
@@ -18824,7 +20250,7 @@ align_pair (bool *abort_pairing_p, int *found_score, int *cutoff_level_5, int *c
debug(printf("*** Stage 6B. Single splicing masking frequent oligos with done_level %d ***\n",done_level_3));
/* Always mask frequent oligos for splicing, which must be transcriptional */
if (floors3_computed_p == false) {
- floors3 = compute_floors(&any_omitted_p_3,&alloc_floors_p_3,floors_array,this3,
+ floors3 = compute_floors(&any_omitted_p_3,&alloc_floors_p_3,floors_array,this3,queryuc_ptr_3,
querylength3,query3_lastpos,plus_indexdb_3,minus_indexdb_3,
indexdb_size_threshold,max_end_insertions,/*omit_frequent_p*/true,/*omit_repetitive_p*/true,
keep_floors_p);
@@ -18832,24 +20258,24 @@ align_pair (bool *abort_pairing_p, int *found_score, int *cutoff_level_5, int *c
}
if (segments3_computed_p == false) {
- this3->plus_segments = identify_all_segments(&this3->plus_nsegments,&plus_anchor_segments_3,
+ this3->plus_segments = identify_all_segments(&this3->plus_nsegments,&plus_anchor_segments_3,&n_plus_anchors_3,
&this3->plus_spliceable,&this3->plus_nspliceable,
#ifdef LARGE_GENOMES
this3->plus_positions_high,this3->plus_positions_low,
#else
this3->plus_positions,
#endif
- this3->plus_npositions,this3->omitted,querylength3,query3_lastpos,floors3,
- /*plusp*/true);
- this3->minus_segments = identify_all_segments(&this3->minus_nsegments,&minus_anchor_segments_3,
+ this3->plus_npositions,this3->omitted,querylength3,query3_lastpos,
+ floors3,/*plusp*/true);
+ this3->minus_segments = identify_all_segments(&this3->minus_nsegments,&minus_anchor_segments_3,&n_minus_anchors_3,
&this3->minus_spliceable,&this3->minus_nspliceable,
#ifdef LARGE_GENOMES
this3->minus_positions_high,this3->minus_positions_low,
#else
this3->minus_positions,
#endif
- this3->minus_npositions,this3->omitted,querylength3,query3_lastpos,floors3,
- /*plusp*/false);
+ this3->minus_npositions,this3->omitted,querylength3,query3_lastpos,
+ floors3,/*plusp*/false);
segments3_computed_p = true;
}
@@ -18877,8 +20303,8 @@ align_pair (bool *abort_pairing_p, int *found_score, int *cutoff_level_5, int *c
hitarray3,/*narray3*/HITARRAY_SINGLESPLICING+1,
*cutoff_level_5,*cutoff_level_3,
querylength5,querylength3,maxpairedpaths,genestrand);
- debug(printf("After pairing single splicing, found %d concordant, %d nsamechr, found_score %d\n",
- nconcordant,nsamechr,*found_score));
+ debug(printf("After pairing single splicing, found %d concordant, %d nsamechr, %d terminals, found_score %d\n",
+ nconcordant,nsamechr,List_length(*terminals),*found_score));
if (*abort_pairing_p == true) {
if (alloc_floors_p_5 == true) {
Floors_free(&floors5);
@@ -18914,7 +20340,7 @@ align_pair (bool *abort_pairing_p, int *found_score, int *cutoff_level_5, int *c
if (done_level_5 >= localsplicing_penalty) {
debug(printf("*** Stage 7A. Double splicing masking frequent oligos with done_level %d ***\n",done_level_5));
if (floors5_computed_p == false) {
- floors5 = compute_floors(&any_omitted_p_5,&alloc_floors_p_5,floors_array,this5,
+ floors5 = compute_floors(&any_omitted_p_5,&alloc_floors_p_5,floors_array,this5,queryuc_ptr_5,
querylength5,query5_lastpos,plus_indexdb_5,minus_indexdb_5,
indexdb_size_threshold,max_end_insertions,/*omit_frequent_p*/true,/*omit_repetitive_p*/true,
keep_floors_p);
@@ -18933,7 +20359,7 @@ align_pair (bool *abort_pairing_p, int *found_score, int *cutoff_level_5, int *c
if (done_level_3 >= localsplicing_penalty) {
debug(printf("*** Stage 7B. Double splicing masking frequent oligos with done_level %d ***\n",done_level_3));
if (floors3_computed_p == false) {
- floors3 = compute_floors(&any_omitted_p_3,&alloc_floors_p_3,floors_array,this3,
+ floors3 = compute_floors(&any_omitted_p_3,&alloc_floors_p_3,floors_array,this3,queryuc_ptr_3,
querylength3,query3_lastpos,plus_indexdb_3,minus_indexdb_3,
indexdb_size_threshold,max_end_insertions,/*omit_frequent_p*/true,/*omit_repetitive_p*/true,
keep_floors_p);
@@ -18961,8 +20387,8 @@ align_pair (bool *abort_pairing_p, int *found_score, int *cutoff_level_5, int *c
hitarray3,/*narray3*/HITARRAY_DOUBLESPLICING+1,
*cutoff_level_5,*cutoff_level_3,
querylength5,querylength3,maxpairedpaths,genestrand);
- debug(printf("After pairing double splicing, found %d concordant, %d samechr, found_score %d\n",
- nconcordant,nsamechr,*found_score));
+ debug(printf("After pairing double splicing, found %d concordant, %d samechr, %d terminals, found_score %d\n",
+ nconcordant,nsamechr,List_length(*terminals),*found_score));
if (*abort_pairing_p == true) {
if (alloc_floors_p_5 == true) {
Floors_free(&floors5);
@@ -19027,14 +20453,15 @@ align_pair (bool *abort_pairing_p, int *found_score, int *cutoff_level_5, int *c
debug(printf("***Trying to pair up segments***\n"));
pair_up_anchor_segments(plus_anchor_segments_5,minus_anchor_segments_5,
plus_anchor_segments_3,minus_anchor_segments_3,
- pairmax);
+ n_plus_anchors_5,n_minus_anchors_5,
+ n_plus_anchors_3,n_minus_anchors_3,pairmax);
if (gmap5p == true) {
gmap5_hits = convert_plus_segments_to_gmap(/*hits*/NULL,queryuc_ptr_5,querylength5,query5_lastpos,
#ifdef END_KNOWNSPLICING_SHORTCUT
queryrc5,Shortread_invertedp(queryseq5),
#endif
- plus_anchor_segments_5,this5->plus_segments,this5->plus_nsegments,
+ plus_anchor_segments_5,n_plus_anchors_5,this5->plus_segments,this5->plus_nsegments,
oligoindices_minor,
pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,
genestrand,/*require_pairing_p*/true);
@@ -19042,7 +20469,7 @@ align_pair (bool *abort_pairing_p, int *found_score, int *cutoff_level_5, int *c
#ifdef END_KNOWNSPLICING_SHORTCUT
queryrc5,Shortread_invertedp(queryseq5),
#endif
- minus_anchor_segments_5,this5->minus_segments,this5->minus_nsegments,
+ minus_anchor_segments_5,n_minus_anchors_5,this5->minus_segments,this5->minus_nsegments,
oligoindices_minor,
pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,
genestrand,/*require_pairing_p*/true);
@@ -19059,7 +20486,7 @@ align_pair (bool *abort_pairing_p, int *found_score, int *cutoff_level_5, int *c
#ifdef END_KNOWNSPLICING_SHORTCUT
queryrc3,Shortread_invertedp(queryseq3),
#endif
- plus_anchor_segments_3,this3->plus_segments,this3->plus_nsegments,
+ plus_anchor_segments_3,n_plus_anchors_3,this3->plus_segments,this3->plus_nsegments,
oligoindices_minor,
pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,
genestrand,/*require_pairing_p*/true);
@@ -19067,7 +20494,7 @@ align_pair (bool *abort_pairing_p, int *found_score, int *cutoff_level_5, int *c
#ifdef END_KNOWNSPLICING_SHORTCUT
queryrc3,Shortread_invertedp(queryseq3),
#endif
- minus_anchor_segments_3,this3->minus_segments,this3->minus_nsegments,
+ minus_anchor_segments_3,n_minus_anchors_3,this3->minus_segments,this3->minus_nsegments,
oligoindices_minor,
pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,
genestrand,/*require_pairing_p*/true);
@@ -19090,8 +20517,8 @@ align_pair (bool *abort_pairing_p, int *found_score, int *cutoff_level_5, int *c
/*hitarray3*/&(*hits3),/*narray3*/1,
*cutoff_level_5,*cutoff_level_3,
querylength5,querylength3,maxpairedpaths,genestrand);
- debug(printf("11> After pairing GMAP, found %d concordant, %d samechr, found_score %d\n",
- nconcordant,nsamechr,*found_score));
+ debug(printf("11> After pairing GMAP, found %d concordant, %d samechr, %d terminals, found_score %d\n",
+ nconcordant,nsamechr,List_length(*terminals),*found_score));
if (*abort_pairing_p == false) {
opt_level = (*found_score < opt_level) ? *found_score : opt_level;
if ((done_level_5 = opt_level + subopt_levels) > user_maxlevel_5) {
@@ -19143,7 +20570,7 @@ align_pair (bool *abort_pairing_p, int *found_score, int *cutoff_level_5, int *c
antiacceptors_minus_5 = (List_T *) CALLOCA(max_splice_mismatches_5+1,sizeof(List_T));
if (floors5_computed_p == false) {
- floors5 = compute_floors(&any_omitted_p_5,&alloc_floors_p_5,floors_array,this5,
+ floors5 = compute_floors(&any_omitted_p_5,&alloc_floors_p_5,floors_array,this5,queryuc_ptr_5,
querylength5,query5_lastpos,plus_indexdb_5,minus_indexdb_5,
indexdb_size_threshold,max_end_insertions,/*omit_frequent_p*/true,/*omit_repetitive_p*/true,
keep_floors_p);
@@ -19151,9 +20578,9 @@ align_pair (bool *abort_pairing_p, int *found_score, int *cutoff_level_5, int *c
}
/* 11A. Distant splicing */
- debug(printf("Starting find_spliceends (plus) on 5' end with %d anchor segments\n",List_length(plus_anchor_segments_5)));
+ debug(printf("Starting find_spliceends (plus) on 5' end with %d anchor segments\n",n_plus_anchors_5));
find_spliceends_distant_rna(&donors_plus_5,&antidonors_plus_5,&acceptors_plus_5,&antiacceptors_plus_5,
- plus_anchor_segments_5,
+ plus_anchor_segments_5,n_plus_anchors_5,
#ifdef DEBUG4E
/*queryptr*/queryuc_ptr_5,
#endif
@@ -19161,9 +20588,9 @@ align_pair (bool *abort_pairing_p, int *found_score, int *cutoff_level_5, int *c
max_splice_mismatches_5,/*plusp*/true,genestrand);
debug(printf("Finished find_spliceends (plus)\n"));
- debug(printf("Starting find_spliceends (minus) on 5' end with %d anchor segments\n",List_length(minus_anchor_segments_5)));
+ debug(printf("Starting find_spliceends (minus) on 5' end with %d anchor segments\n",n_minus_anchors_5));
find_spliceends_distant_rna(&antidonors_minus_5,&donors_minus_5,&antiacceptors_minus_5,&acceptors_minus_5,
- minus_anchor_segments_5,
+ minus_anchor_segments_5,n_minus_anchors_5,
#ifdef DEBUG4E
/*queryptr*/queryrc5,
#endif
@@ -19245,7 +20672,7 @@ align_pair (bool *abort_pairing_p, int *found_score, int *cutoff_level_5, int *c
antiacceptors_minus_3 = (List_T *) CALLOCA(max_splice_mismatches_3+1,sizeof(List_T));
if (floors3_computed_p == false) {
- floors3 = compute_floors(&any_omitted_p_3,&alloc_floors_p_3,floors_array,this3,
+ floors3 = compute_floors(&any_omitted_p_3,&alloc_floors_p_3,floors_array,this3,queryuc_ptr_3,
querylength3,query3_lastpos,plus_indexdb_3,minus_indexdb_3,
indexdb_size_threshold,max_end_insertions,/*omit_frequent_p*/true,/*omit_repetitive_p*/true,
keep_floors_p);
@@ -19253,9 +20680,9 @@ align_pair (bool *abort_pairing_p, int *found_score, int *cutoff_level_5, int *c
}
/* 11B. Distant splicing */
- debug(printf("Starting find_spliceends (plus) on 3' end with %d anchor segments\n",List_length(plus_anchor_segments_3)));
+ debug(printf("Starting find_spliceends (plus) on 3' end with %d anchor segments\n",n_plus_anchors_3));
find_spliceends_distant_rna(&donors_plus_3,&antidonors_plus_3,&acceptors_plus_3,&antiacceptors_plus_3,
- plus_anchor_segments_3,
+ plus_anchor_segments_3,n_plus_anchors_3,
#ifdef DEBUG4E
/*queryptr*/queryuc_ptr_3,
#endif
@@ -19263,9 +20690,9 @@ align_pair (bool *abort_pairing_p, int *found_score, int *cutoff_level_5, int *c
max_splice_mismatches_3,/*plusp*/true,genestrand);
debug(printf("Finished find_spliceends (plus)\n"));
- debug(printf("Starting find_spliceends (minus) on 3' end with %d anchor segments\n",List_length(minus_anchor_segments_3)));
+ debug(printf("Starting find_spliceends (minus) on 3' end with %d anchor segments\n",n_minus_anchors_3));
find_spliceends_distant_rna(&antidonors_minus_3,&donors_minus_3,&antiacceptors_minus_3,&acceptors_minus_3,
- minus_anchor_segments_3,
+ minus_anchor_segments_3,n_minus_anchors_3,
#ifdef DEBUG4E
/*queryptr*/queryrc3,
#endif
@@ -19353,8 +20780,8 @@ align_pair (bool *abort_pairing_p, int *found_score, int *cutoff_level_5, int *c
/*hitarray3*/&(*hits3),/*narray3*/1,
*cutoff_level_5,*cutoff_level_3,
querylength5,querylength3,maxpairedpaths,genestrand);
- debug(printf("10> After pairing long single splicing, found %d concordant, %d samechr, found_score %d\n",
- nconcordant,nsamechr,*found_score));
+ debug(printf("10> After pairing long single splicing, found %d concordant, %d samechr, %d terminals, found_score %d\n",
+ nconcordant,nsamechr,List_length(*terminals),*found_score));
if (*abort_pairing_p == false) {
opt_level = (*found_score < opt_level) ? *found_score : opt_level;
@@ -19391,8 +20818,8 @@ align_pair (bool *abort_pairing_p, int *found_score, int *cutoff_level_5, int *c
/*hitarray3*/&(*hits3),/*narray3*/1,
*cutoff_level_5,*cutoff_level_3,
querylength5,querylength3,maxpairedpaths,genestrand);
- debug(printf("11> After pairing distant splicing, found %d concordant, %d samechr, found_score %d\n",
- nconcordant,nsamechr,*found_score));
+ debug(printf("11> After pairing distant splicing, found %d concordant, %d samechr, %d terminals, found_score %d\n",
+ nconcordant,nsamechr,List_length(*terminals),*found_score));
if (*abort_pairing_p == false) {
opt_level = (*found_score < opt_level) ? *found_score : opt_level;
@@ -19411,12 +20838,14 @@ align_pair (bool *abort_pairing_p, int *found_score, int *cutoff_level_5, int *c
if (0) {
if (nconcordant == 0 || *found_score > opt_level) {
terminals5 = find_terminals(plus_anchor_segments_5,minus_anchor_segments_5,
+ n_plus_anchors_5,n_minus_anchors_5,
querylength5,query5_lastpos,
query5_compress_fwd,query5_compress_rev,
/*max_mismatches_allowed*/done_level_5,genestrand);
*hits5 = List_append(*hits5,terminals5);
terminals3 = find_terminals(plus_anchor_segments_3,minus_anchor_segments_3,
+ n_plus_anchors_3,n_minus_anchors_3,
querylength3,query3_lastpos,
query3_compress_fwd,query3_compress_rev,
/*max_mismatches_allowed*/done_level_3,genestrand);
@@ -19428,8 +20857,8 @@ align_pair (bool *abort_pairing_p, int *found_score, int *cutoff_level_5, int *c
/*hitarray3*/&(*hits3),/*narray3*/1,
*cutoff_level_5,*cutoff_level_3,
querylength5,querylength3,maxpairedpaths,genestrand);
- debug(printf("After pairing terminals, found %d concordant, %d nsamechr, found_score %d\n",
- nconcordant,nsamechr,*found_score));
+ debug(printf("After pairing terminals, found %d concordant, %d nsamechr, %d terminals, found_score %d\n",
+ nconcordant,nsamechr,List_length(*terminals),*found_score));
}
}
@@ -19534,8 +20963,12 @@ align_pair (bool *abort_pairing_p, int *found_score, int *cutoff_level_5, int *c
#else
if ((newpair = Stage3pair_new(Stage3end_copy(hit5),gmap3,genestrand,/*pairtype*/CONCORDANT,
/*private5p*/true,/*private3p*/true,/*expect_concordant_p*/true)) == NULL) {
- debug13(printf( "newpair is NULL\n"));
+ debug13(printf("newpair is NULL\n"));
/* Stage3end_free(&gmap3); -- done by Stage3pair_new */
+
+ } else if (Stage3pair_determine_pairtype(newpair) != CONCORDANT) {
+ debug13(printf(" => not concordant, so eliminating\n"));
+
} else {
nconcordant += 1;
debug13(printf("New pair => nconcordant %d\n",nconcordant));
@@ -19616,6 +21049,10 @@ align_pair (bool *abort_pairing_p, int *found_score, int *cutoff_level_5, int *c
/*private5p*/true,/*private3p*/true,/*expect_concordant_p*/true)) == NULL) {
debug13(printf( "newpair is NULL\n"));
/* Stage3end_free(&gmap5); -- done by Stage3pair_new */
+
+ } else if (Stage3pair_determine_pairtype(newpair) != CONCORDANT) {
+ debug13(printf(" => not concordant, so eliminating\n"));
+
} else {
nconcordant += 1;
debug13(printf("new pair => nconcordant %d\n",nconcordant));
@@ -19652,7 +21089,7 @@ align_pair (bool *abort_pairing_p, int *found_score, int *cutoff_level_5, int *c
antiacceptors_minus_5 = (List_T *) CALLOCA(max_splice_mismatches_5+1,sizeof(List_T));
if (floors5_computed_p == false) {
- floors5 = compute_floors(&any_omitted_p_5,&alloc_floors_p_5,floors_array,this5,
+ floors5 = compute_floors(&any_omitted_p_5,&alloc_floors_p_5,floors_array,this5,queryuc_ptr_5,
querylength5,query5_lastpos,plus_indexdb_5,minus_indexdb_5,
indexdb_size_threshold,max_end_insertions,/*omit_frequent_p*/true,/*omit_repetitive_p*/true,
keep_floors_p);
@@ -19660,7 +21097,7 @@ align_pair (bool *abort_pairing_p, int *found_score, int *cutoff_level_5, int *c
}
find_spliceends_shortend(&donors_plus_5,&antidonors_plus_5,&acceptors_plus_5,&antiacceptors_plus_5,
- plus_anchor_segments_5,
+ plus_anchor_segments_5,n_plus_anchors_5,
#ifdef DEBUG4E
queryuc_ptr_5,
#endif
@@ -19668,7 +21105,7 @@ align_pair (bool *abort_pairing_p, int *found_score, int *cutoff_level_5, int *c
/*max_mismatches_allowed*/max_splice_mismatches_5,/*plusp*/true,genestrand);
find_spliceends_shortend(&antidonors_minus_5,&donors_minus_5,&antiacceptors_minus_5,&acceptors_minus_5,
- minus_anchor_segments_5,
+ minus_anchor_segments_5,n_minus_anchors_5,
#ifdef DEBUG4E
/*queryptr*/queryrc5,
#endif
@@ -19703,7 +21140,7 @@ align_pair (bool *abort_pairing_p, int *found_score, int *cutoff_level_5, int *c
antiacceptors_minus_3 = (List_T *) CALLOCA(max_splice_mismatches_3+1,sizeof(List_T));
if (floors3_computed_p == false) {
- floors3 = compute_floors(&any_omitted_p_3,&alloc_floors_p_3,floors_array,this3,
+ floors3 = compute_floors(&any_omitted_p_3,&alloc_floors_p_3,floors_array,this3,queryuc_ptr_3,
querylength3,query3_lastpos,plus_indexdb_3,minus_indexdb_3,
indexdb_size_threshold,max_end_insertions,/*omit_frequent_p*/true,/*omit_repetitive_p*/true,
keep_floors_p);
@@ -19711,7 +21148,7 @@ align_pair (bool *abort_pairing_p, int *found_score, int *cutoff_level_5, int *c
}
find_spliceends_shortend(&donors_plus_3,&antidonors_plus_3,&acceptors_plus_3,&antiacceptors_plus_3,
- plus_anchor_segments_3,
+ plus_anchor_segments_3,n_plus_anchors_3,
#ifdef DEBUG4E
queryuc_ptr_3,
#endif
@@ -19719,7 +21156,7 @@ align_pair (bool *abort_pairing_p, int *found_score, int *cutoff_level_5, int *c
/*max_mismatches_allowed*/max_splice_mismatches_3,/*plusp*/true,genestrand);
find_spliceends_shortend(&antidonors_minus_3,&donors_minus_3,&antiacceptors_minus_3,&acceptors_minus_3,
- minus_anchor_segments_3,
+ minus_anchor_segments_3,n_minus_anchors_3,
#ifdef DEBUG4E
/*queryptr*/queryrc3,
#endif
@@ -19746,8 +21183,8 @@ align_pair (bool *abort_pairing_p, int *found_score, int *cutoff_level_5, int *c
hitarray3,/*narray3*/HITARRAY_DOUBLESPLICING+1,
*cutoff_level_5,*cutoff_level_3,
querylength5,querylength3,maxpairedpaths,genestrand);
- debug(printf("After pairing short-overlap splicing, found %d concordant, %d samechr, found_score %d\n",
- nconcordant,nsamechr,*found_score));
+ debug(printf("After pairing short-overlap splicing, found %d concordant, %d samechr, %d terminals, found_score %d\n",
+ nconcordant,nsamechr,List_length(*terminals),*found_score));
if (*abort_pairing_p == false) {
opt_level = (*found_score < opt_level) ? *found_score : opt_level;
if ((done_level_5 = opt_level + subopt_levels) > user_maxlevel_5) {
@@ -19935,10 +21372,10 @@ align_pair (bool *abort_pairing_p, int *found_score, int *cutoff_level_5, int *c
hitpairs = Stage3pair_remove_overlaps(hitpairs,/*translocp*/false,/*finalp*/true);
#endif
- List_free(&plus_anchor_segments_5);
- List_free(&minus_anchor_segments_5);
- List_free(&plus_anchor_segments_3);
- List_free(&minus_anchor_segments_3);
+ FREE(plus_anchor_segments_5);
+ FREE(minus_anchor_segments_5);
+ FREE(plus_anchor_segments_3);
+ FREE(minus_anchor_segments_3);
return hitpairs;
}
@@ -19946,7 +21383,7 @@ align_pair (bool *abort_pairing_p, int *found_score, int *cutoff_level_5, int *c
static Pairtype_T
choose_among_paired (int *best_nmatches_paired, int *best_nmatches_5, int *best_nmatches_3,
- List_T hitpairs, List_T samechr, List_T conc_transloc) {
+ List_T hitpairs, List_T samechr, List_T conc_transloc, List_T terminals) {
Pairtype_T final_pairtype = UNPAIRED;
List_T p;
Stage3pair_T hitpair;
@@ -19966,30 +21403,43 @@ choose_among_paired (int *best_nmatches_paired, int *best_nmatches_5, int *best_
}
}
- *best_nmatches_paired += 1; /* penalty for choosing translocation over others */
+ if (hitpairs == NULL) {
+ for (p = terminals; p != NULL; p = p->rest) {
+ hitpair = (Stage3pair_T) p->first;
+ if ((nmatches = Stage3pair_nmatches_posttrim(&nmatches5,&nmatches3,hitpair)) > *best_nmatches_paired) {
+ final_pairtype = PAIRED_TERMINALS;
+ *best_nmatches_paired = nmatches;
+ *best_nmatches_5 = nmatches5;
+ *best_nmatches_3 = nmatches3;
+ }
+ }
- for (p = conc_transloc; p != NULL; p = p->rest) {
- hitpair = (Stage3pair_T) p->first;
- if ((nmatches = Stage3pair_nmatches_posttrim(&nmatches5,&nmatches3,hitpair)) > *best_nmatches_paired) {
- final_pairtype = CONCORDANT_TRANSLOCATIONS;
- *best_nmatches_paired = nmatches;
- *best_nmatches_5 = nmatches5;
- *best_nmatches_3 = nmatches3;
+ *best_nmatches_paired += 1; /* penalty for choosing translocation over others */
+
+ for (p = conc_transloc; p != NULL; p = p->rest) {
+ hitpair = (Stage3pair_T) p->first;
+ if ((nmatches = Stage3pair_nmatches_posttrim(&nmatches5,&nmatches3,hitpair)) > *best_nmatches_paired) {
+ final_pairtype = CONCORDANT_TRANSLOCATIONS;
+ *best_nmatches_paired = nmatches;
+ *best_nmatches_5 = nmatches5;
+ *best_nmatches_3 = nmatches3;
+ }
}
- }
- for (p = samechr; p != NULL; p = p->rest) {
- hitpair = (Stage3pair_T) p->first;
- if ((nmatches = Stage3pair_nmatches_posttrim(&nmatches5,&nmatches3,hitpair)) > *best_nmatches_paired) {
- final_pairtype = PAIRED_UNSPECIFIED;
- *best_nmatches_paired = nmatches;
- *best_nmatches_5 = nmatches5;
- *best_nmatches_3 = nmatches3;
+ for (p = samechr; p != NULL; p = p->rest) {
+ hitpair = (Stage3pair_T) p->first;
+ if ((nmatches = Stage3pair_nmatches_posttrim(&nmatches5,&nmatches3,hitpair)) > *best_nmatches_paired) {
+ final_pairtype = PAIRED_UNSPECIFIED;
+ *best_nmatches_paired = nmatches;
+ *best_nmatches_5 = nmatches5;
+ *best_nmatches_3 = nmatches3;
+ }
}
}
debug16(printf("best_nmatches_paired among paired = %d = %d + %d\n",
*best_nmatches_paired,*best_nmatches_5,*best_nmatches_3));
+ debug16(printf("final pairtype: %s\n",Pairtype_string(final_pairtype)));
return final_pairtype;
}
@@ -20163,7 +21613,7 @@ consolidate_paired_results (int *npaths_primary, int *npaths_altloc, int *first_
debug16(printf("Entered consolidate_paired_results. Passing pointer %p\n",&best_nmatches_paired));
*final_pairtype = choose_among_paired(&best_nmatches_paired,&best_nmatches_paired_5,&best_nmatches_paired_3,
- hitpairs,samechr,conc_transloc);
+ hitpairs,samechr,conc_transloc,terminals);
if (*final_pairtype == CONCORDANT) {
/* Have concordant results */
@@ -20220,6 +21670,7 @@ consolidate_paired_results (int *npaths_primary, int *npaths_altloc, int *first_
oligoindices_minor,pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,
cutoff_level_5,cutoff_level_3,/*expect_concordant_p*/true);
if (Stage3pair_sense_consistent_p(result) == false) {
+ debug16(printf("sense is not consistent\n"));
result = align_pair_with_gmap(&(*final_pairtype),result,
queryuc_ptr_5,queryuc_ptr_3,querylength5,querylength3,
oligoindices_minor,pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,
@@ -20243,7 +21694,7 @@ consolidate_paired_results (int *npaths_primary, int *npaths_altloc, int *first_
} else if (*final_pairtype == PAIRED_UNSPECIFIED) {
/* Have paired results */
- debug16(printf("Have paired results\n"));
+ debug16(printf("Have paired unspecified\n"));
for (p = hitpairs; p != NULL; p = List_next(p)) {
stage3pair = (Stage3pair_T) List_head(p);
Stage3pair_free(&stage3pair);
@@ -20306,11 +21757,94 @@ consolidate_paired_results (int *npaths_primary, int *npaths_altloc, int *first_
debug16(printf("Found remaining concordant solution, so removing non-concordant ones\n"));
*final_pairtype = CONCORDANT;
result = Stage3pair_filter_nonconcordant(result);
+ debug16(printf("Concordant results: %d\n",List_length(result)));
} else {
*final_pairtype = PAIRED_UNSPECIFIED;
}
}
+ } else if (*final_pairtype == PAIRED_TERMINALS) {
+ /* Have paired results */
+ debug16(printf("Have paired terminals: %d\n",List_length(terminals)));
+ for (p = hitpairs; p != NULL; p = List_next(p)) {
+ stage3pair = (Stage3pair_T) List_head(p);
+ Stage3pair_free(&stage3pair);
+ }
+ List_free(&hitpairs);
+
+ for (p = samechr; p != NULL; p = List_next(p)) {
+ stage3pair = (Stage3pair_T) List_head(p);
+ Stage3pair_free(&stage3pair);
+ }
+ List_free(&samechr);
+
+ for (p = conc_transloc; p != NULL; p = List_next(p)) {
+ stage3pair = (Stage3pair_T) List_head(p);
+ Stage3pair_free(&stage3pair);
+ }
+ List_free(&conc_transloc);
+
+ if (gmap_improvement_p == false) {
+ debug16(printf("No GMAP improvement: Before removing overlaps, %d results\n",List_length(terminals)));
+ result = Stage3pair_optimal_score(terminals,
+ query5_compress_fwd,query5_compress_rev,
+ query3_compress_fwd,query3_compress_rev,
+ querylength5,querylength3,/*keep_gmap_p*/true,/*finalp*/true);
+ result = Stage3pair_remove_overlaps(result,/*translocp*/false,/*finalp*/true);
+ result = Stage3pair_optimal_score(result,
+ query5_compress_fwd,query5_compress_rev,
+ query3_compress_fwd,query3_compress_rev,
+ querylength5,querylength3,/*keep_gmap_p*/false,/*finalp*/true);
+ result = Stage3pair_resolve_multimapping(result);
+
+ } else {
+ debug16(printf("GMAP improvement: Before removing overlaps, %d results\n",List_length(terminals)));
+ result = Stage3pair_optimal_score(terminals,
+ query5_compress_fwd,query5_compress_rev,
+ query3_compress_fwd,query3_compress_rev,
+ querylength5,querylength3,/*keep_gmap_p*/true,/*finalp*/false);
+ result = Stage3pair_remove_overlaps(result,/*translocp*/false,/*finalp*/false);
+ result = Stage3pair_optimal_score(result,
+ query5_compress_fwd,query5_compress_rev,
+ query3_compress_fwd,query3_compress_rev,
+ querylength5,querylength3,/*keep_gmap_p*/false,/*finalp*/false);
+ result = Stage3pair_resolve_multimapping(result);
+
+ result = align_pair_with_gmap(&(*final_pairtype),result,
+ queryuc_ptr_5,queryuc_ptr_3,querylength5,querylength3,
+ oligoindices_minor,pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,
+ cutoff_level_5,cutoff_level_3,/*expect_concordant_p*/false);
+ result = Stage3pair_optimal_score(result,
+ query5_compress_fwd,query5_compress_rev,
+ query3_compress_fwd,query3_compress_rev,
+ querylength5,querylength3,/*keep_gmap_p*/true,/*finalp*/true);
+ result = Stage3pair_remove_overlaps(result,/*translocp*/false,/*finalp*/true);
+ result = Stage3pair_optimal_score(result,
+ query5_compress_fwd,query5_compress_rev,
+ query3_compress_fwd,query3_compress_rev,
+ querylength5,querylength3,/*keep_gmap_p*/false,/*finalp*/true);
+ result = Stage3pair_resolve_multimapping(result);
+
+#if 0
+ debug16(printf("Assigning pairtypes\n"));
+ *final_pairtype = PAIRED_TERMINALS;
+ for (p = result; p != NULL; p = List_next(p)) {
+ stage3pair = (Stage3pair_T) List_head(p);
+ if (Stage3pair_assign_pairtype(stage3pair) == CONCORDANT) {
+ *final_pairtype = CONCORDANT;
+ }
+ }
+ if (*final_pairtype == CONCORDANT) {
+ debug16(printf("Found remaining concordant solution, so removing non-concordant ones\n"));
+ result = Stage3pair_filter_nonconcordant(result);
+ debug16(printf("Concordant results: %d\n",List_length(result)));
+ }
+#endif
+ }
+
+ *final_pairtype = CONCORDANT; /* Because paired terminals are each concordant */
+
+
} else if (*final_pairtype == CONCORDANT_TRANSLOCATIONS) {
debug16(printf("Have %d concordant translocation results\n",List_length(conc_transloc)));
for (p = hitpairs; p != NULL; p = List_next(p)) {
@@ -20447,6 +21981,7 @@ consolidate_paired_results (int *npaths_primary, int *npaths_altloc, int *first_
oligoindices_minor,pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,
cutoff_level_5,cutoff_level_3,/*expect_concordant_p*/true);
if (Stage3pair_sense_consistent_p(result) == false) {
+ debug16(printf("sense is not consistent\n"));
result = align_pair_with_gmap(&(*final_pairtype),result,
queryuc_ptr_5,queryuc_ptr_3,querylength5,querylength3,
oligoindices_minor,pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,
@@ -20539,6 +22074,7 @@ consolidate_paired_results (int *npaths_primary, int *npaths_altloc, int *first_
stage3list_gc(&singlehits3);
stage3list_gc(&singlehits5);
+ debug16(printf("1 Exiting consolidate_paired_results\n"));
return stage3pairarray;
}
}
@@ -20610,6 +22146,7 @@ consolidate_paired_results (int *npaths_primary, int *npaths_altloc, int *first_
}
debug16(printf("Result is NULL, and we have %d hits on 5' end and %d hits on 3' end\n",
(*nhits5_primary) + (*nhits5_altloc),(*nhits3_primary) + (*nhits3_altloc)));
+ debug16(printf("2 Exiting consolidate_paired_results\n"));
return (Stage3pair_T *) NULL;
} else {
@@ -20624,6 +22161,7 @@ consolidate_paired_results (int *npaths_primary, int *npaths_altloc, int *first_
*nhits5_primary = *nhits5_altloc = 0;
*nhits3_primary = *nhits3_altloc = 0;
*stage3array5 = *stage3array3 = (Stage3end_T *) NULL;
+ debug16(printf("3 Exiting consolidate_paired_results\n"));
return (Stage3pair_T *) NULL;
} else {
@@ -20644,6 +22182,8 @@ consolidate_paired_results (int *npaths_primary, int *npaths_altloc, int *first_
*nhits5_primary = *nhits5_altloc = 0;
*nhits3_primary = *nhits3_altloc = 0;
*stage3array5 = *stage3array3 = (Stage3end_T *) NULL;
+ debug16(printf("4 Exiting consolidate_paired_results\n"));
+
return stage3pairarray;
}
}
diff --git a/src/stage3.c b/src/stage3.c
index 23ec75c..dabb211 100644
--- a/src/stage3.c
+++ b/src/stage3.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: stage3.c 191630 2016-06-09 22:00:21Z twu $";
+static char rcsid[] = "$Id: stage3.c 193899 2016-07-12 04:41:34Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -319,7 +319,6 @@ static int maxintronlen_ends;
static int minendexon;
static bool maximize_coverage_p = false;
-static bool output_sam_p;
static Stage3debug_T stage3debug;
static bool homopolymerp;
@@ -334,7 +333,7 @@ Stage3_setup (bool splicingp_in, bool novelsplicingp_in, bool require_splicedir_
int extramaterial_end_in, int extramaterial_paired_in,
int extraband_single_in, int extraband_end_in, int extraband_paired_in,
int ngap_in, int maxintronlen_in, int maxintronlen_ends_in, int minendexon_in,
- bool output_sam_p_in, bool homopolymerp_in, Stage3debug_T stage3debug_in) {
+ bool homopolymerp_in, Stage3debug_T stage3debug_in) {
splicingp = splicingp_in;
novelsplicingp = novelsplicingp_in;
require_splicedir_p = require_splicedir_p_in;
@@ -366,7 +365,6 @@ Stage3_setup (bool splicingp_in, bool novelsplicingp_in, bool require_splicedir_
maxintronlen_ends = maxintronlen_ends_in;
minendexon = minendexon_in;
- output_sam_p = output_sam_p_in;
homopolymerp = homopolymerp_in;
stage3debug = stage3debug_in;
@@ -4217,7 +4215,8 @@ fill_in_gaps (List_T path, Pairpool_T pairpool, char *queryseq_ptr,
rightpair = pairs->first;
if (/* (stage3debug > NO_STAGE3DEBUG && stage3debug < POST_INTRONS) || */
- (output_sam_p == false && pair->comp == DUALBREAK_COMP)) {
+ (/*output_sam_p == false && */ pair->comp == DUALBREAK_COMP)) {
+ /* output_sam_p == false clause gives incorrect CIGAR starts */
pairs = add_dualbreak(pairs,queryseq_ptr,
chroffset,chrhigh,cdna_direction,watsonp,
leftpair,rightpair,pairpool,ngap);
@@ -8582,7 +8581,7 @@ traverse_dual_break (List_T pairs, List_T *path, Pair_T leftpair, Pair_T rightpa
lastpair = (Pair_T) gappairs->first;
firstpair = (Pair_T) List_last_value(gappairs);
debug14(printf("gappairs goes from %d to %d\n",firstpair->querypos,lastpair->querypos));
- if (1 || (firstpair->querypos == querydp5 && lastpair->querypos == querydp3)) {
+ if (firstpair->querypos == querydp5 && lastpair->querypos == querydp3) {
/* fprintf(stderr,"%d..%d .. %d..%d\n",querydp5,firstpair->querypos,lastpair->querypos,querydp3); */
debug14(printf(" => entire query sequence bridged or not, but taking it regardless\n"));
pairs = Pairpool_transfer(pairs,gappairs);
diff --git a/src/stage3.h b/src/stage3.h
index f9d1bde..ba18dae 100644
--- a/src/stage3.h
+++ b/src/stage3.h
@@ -1,4 +1,4 @@
-/* $Id: stage3.h 190429 2016-05-24 21:28:28Z twu $ */
+/* $Id: stage3.h 193899 2016-07-12 04:41:34Z twu $ */
#ifndef STAGE3_INCLUDED
#define STAGE3_INCLUDED
@@ -62,7 +62,7 @@ Stage3_setup (bool splicingp_in, bool novelsplicingp_in, bool require_splicedir_
int extramaterial_end_in, int extramaterial_paired_in,
int extraband_single_in, int extraband_end_in, int extraband_paired_in,
int ngap_in, int maxintronlen_in, int maxintronlen_ends_in, int minendexon_in,
- bool output_sam_p_in, bool homopolymerp_in, Stage3debug_T stage3debug_in);
+ bool homopolymerp_in, Stage3debug_T stage3debug_in);
extern bool
Stage3_chimera_left_p (T this);
diff --git a/src/stage3hr.c b/src/stage3hr.c
index eecca77..0f617cf 100644
--- a/src/stage3hr.c
+++ b/src/stage3hr.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: stage3hr.c 191133 2016-06-03 17:26:49Z twu $";
+static char rcsid[] = "$Id: stage3hr.c 193899 2016-07-12 04:41:34Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -64,7 +64,8 @@ static char rcsid[] = "$Id: stage3hr.c 191133 2016-06-03 17:26:49Z twu $";
/* #define TERMINAL_SECOND_CLASS 1 -- enabling this leads to poor results */
#define TERMINAL_COMPUTE_MINLENGTH 40
-/* #define SCORE_INDELS 1 -- Needed to compare genomic positions with and without indels */
+#define SCORE_INDELS_EVENTRIM 1 /* Needed to compare genomic positions with and without indels */
+#define EVENTRIM_BADINTRON_PENALTY 2
#define OUTERLENGTH_SLOP 100
@@ -377,7 +378,7 @@ Stage3hr_setup (bool invert_first_p_in, bool invert_second_p_in, Genome_T genome
}
-#ifdef DEBUG0
+#if defined(DEBUG0) || defined(DEBUG5)
static char *
print_sense (int sense) {
if (sense == SENSE_NULL) {
@@ -1887,6 +1888,29 @@ Stage3pair_total_trim (Stage3pair_T this) {
}
int
+Stage3pair_max_trim (Stage3pair_T this) {
+ int trim5, trim3;
+ T hit;
+
+#if 0
+ /* Don't want ambiguous ends for purpose of defining concordant terminals */
+ trim5 = Stage3end_total_trim(this->hit5);
+ trim3 = Stage3end_total_trim(this->hit3);
+#else
+ hit = this->hit5;
+ trim5 = hit->trim_left + hit->trim_right;
+ hit = this->hit3;
+ trim3 = hit->trim_left + hit->trim_right;
+#endif
+
+ if (trim5 > trim3) {
+ return trim5;
+ } else {
+ return trim3;
+ }
+}
+
+int
Stage3pair_nmatches_posttrim (int *nmatches5, int *nmatches3, Stage3pair_T this) {
*nmatches5 = this->hit5->nmatches_posttrim;
*nmatches3 = this->hit3->nmatches_posttrim;
@@ -1901,9 +1925,16 @@ Stage3pair_concordantp (List_T hitpairs) {
for (p = hitpairs; p != NULL; p = List_next(p)) {
hitpair = (Stage3pair_T) List_head(p);
+#if 0
+ /* Not necessary, since we are getting the result after GMAP align pair */
+ if (Stage3_determine_pairtype(hitpair->hit5,hitpair->hit3) == CONCORDANT) {
+ return true;
+ }
+#else
if (hitpair->pairtype == CONCORDANT) {
return true;
}
+#endif
}
return false;
}
@@ -2098,9 +2129,10 @@ find_ilengths (int *ilength_low, int *ilength_high, Stage3end_T hit, Univcoord_T
common_genomicpos - hit->chroffset);
for (p = hit->substrings_1toN; p != NULL; p = List_next(p)) {
substring = (Substring_T) List_head(p);
- printf("substring %p: %u..%u\n",
+ printf("substring %p: %u..%u, trim %d..%d\n",
substring,Substring_alignstart_trim(substring) - hit->chroffset,
- Substring_alignend_trim(substring) - 1U - hit->chroffset);
+ Substring_alignend_trim(substring) - 1U - hit->chroffset,
+ Substring_trim_left(substring),Substring_trim_right(substring));
}
printf("\n");
#endif
@@ -2108,9 +2140,10 @@ find_ilengths (int *ilength_low, int *ilength_high, Stage3end_T hit, Univcoord_T
*ilength_low = 0;
for (p = hit->substrings_1toN, q = hit->junctions_1toN; p != NULL; p = List_next(p), q = List_next(q)) {
substring = (Substring_T) List_head(p);
- debug15(printf("substring %p: %u..%u\n",substring,
+ debug15(printf("substring %p: %u..%u, trim %d..%d\n",substring,
Substring_alignstart_trim(substring) - hit->chroffset,
- Substring_alignend_trim(substring) - 1U - hit->chroffset));
+ Substring_alignend_trim(substring) - 1U - hit->chroffset,
+ Substring_trim_left(substring),Substring_trim_right(substring)));
if (Substring_overlap_point_trimmed_p(substring,common_genomicpos) == false) {
*ilength_low += Substring_genomic_alignment_length(substring);
if (q != NULL) {
@@ -2146,9 +2179,10 @@ find_ilengths (int *ilength_low, int *ilength_high, Stage3end_T hit, Univcoord_T
common_genomicpos - hit->chroffset);
for (p = hit->substrings_1toN; p != NULL; p = List_next(p)) {
substring = (Substring_T) List_head(p);
- printf("substring %p: %u..%u\n",
+ printf("substring %p: %u..%u, trim %d..%d\n",
substring,Substring_alignstart_trim(substring) - hit->chroffset,
- Substring_alignend_trim(substring) - 1U - hit->chroffset);
+ Substring_alignend_trim(substring) - 1U - hit->chroffset,
+ Substring_trim_left(substring),Substring_trim_right(substring));
}
printf("\n");
#endif
@@ -5167,7 +5201,7 @@ Stage3end_new_substrings (int *found_score, Intlist_T endpoints,
printf("\n");
#endif
- new = (T) MALLOC(sizeof(*new));
+ new = (T) MALLOC_OUT(sizeof(*new));
new->hittype = SUBSTRINGS;
new->pairarray = (struct Pair_T *) NULL;
@@ -5215,8 +5249,13 @@ Stage3end_new_substrings (int *found_score, Intlist_T endpoints,
printf("NEW SUBSTRINGS\n");
for (p = new->substrings_1toN; p != NULL; p = List_next(p)) {
substring = List_head(p);
- printf("%d..%d\t%u..%u\n",Substring_querystart(substring),Substring_queryend(substring),
- Substring_alignstart_trim_chr(substring),Substring_alignend_trim_chr(substring));
+ if (Substring_ambiguous_p(substring) == true) {
+ printf("%d..%d\tambig\tmismatches:%d\n",Substring_querystart(substring),Substring_queryend(substring),
+ Substring_nmismatches_whole(substring));
+ } else {
+ printf("%d..%d\t%u..%u\tmismatches:%d\n",Substring_querystart(substring),Substring_queryend(substring),
+ Substring_alignstart_trim_chr(substring),Substring_alignend_trim_chr(substring),Substring_nmismatches_whole(substring));
+ }
}
printf("\n");
#endif
@@ -5347,6 +5386,7 @@ Stage3end_substrings_run_gmap_plus (T *hit2, T this, char *queryuc_ptr, int quer
Chrpos_T genomepos;
char c, g, g_alt, comp;
+ int max_nmismatches;
int cdna_direction, sensedir;
int npairs1, goodness1, matches1, nmatches_posttrim_1,
max_match_length_1, ambig_end_length_5_1, ambig_end_length_3_1,
@@ -5372,7 +5412,12 @@ Stage3end_substrings_run_gmap_plus (T *hit2, T this, char *queryuc_ptr, int quer
#ifdef DEBUG13
for (p = this->substrings_1toN; p != NULL; p = List_next(p)) {
substring = (Substring_T) List_head(p);
- printf("%d..%d\n",Substring_querystart(substring),Substring_queryend(substring));
+ printf("%d..%d with %d nmismatches",
+ Substring_querystart(substring),Substring_queryend(substring),Substring_nmismatches_region(substring));
+ if (Substring_ambiguous_p(substring) == true) {
+ printf(" ambiguous");
+ }
+ printf("\n");
}
#endif
@@ -5442,12 +5487,10 @@ Stage3end_substrings_run_gmap_plus (T *hit2, T this, char *queryuc_ptr, int quer
} else {
substring = (Substring_T) List_head(this->substrings_1toN);
- if (high_resolution_substring_ends_p == false) {
- /* Don't try to solve ends, as requested by user */
+ if ((querypos = Substring_querystart(substring)) < 30 && high_resolution_substring_ends_p == false) {
debug13(printf("User does not want high resolution substring ends\n"));
all_stage2_starts = (List_T) NULL;
- } else if ((querypos = Substring_querystart(substring)) < 15) {
- /* Don't try to solve short ends, for sake of efficiency */
+ } else if (querypos < 15) {
debug13(printf("No starts, since querystart %d < 15\n",querypos));
all_stage2_starts = (List_T) NULL;
} else {
@@ -5517,12 +5560,10 @@ Stage3end_substrings_run_gmap_plus (T *hit2, T this, char *queryuc_ptr, int quer
} else {
substring = (Substring_T) List_head(this->substrings_Nto1);
- if (high_resolution_substring_ends_p == false) {
- /* Don't try to solve ends, as requested by user */
+ if ((querypos = Substring_queryend(substring)) > querylength - 30 && high_resolution_substring_ends_p == false) {
debug13(printf("User does not want high resolution substring ends\n"));
all_stage2_ends = (List_T) NULL;
- } else if ((querypos = Substring_queryend(substring)) > querylength - 15) {
- /* Don't try to solve short ends, for sake of efficiency */
+ } else if (querypos > querylength - 15) {
debug13(printf("No ends, since queryend %d > querylength %d - 15\n",querypos,querylength));
all_stage2_ends = (List_T) NULL;
} else {
@@ -5550,12 +5591,21 @@ Stage3end_substrings_run_gmap_plus (T *hit2, T this, char *queryuc_ptr, int quer
}
+ /* No, do run this branch, because we are calling it only when we need to */
/* Get better results if we turn off this branch, because GMAP finds
indels at ends, so do re-run GMAP in all cases */
- if (/* high_resolution_substring_ends_p == false && */
- extend_ends_p == true && all_stage2_starts == NULL && all_stage2_ends == NULL) {
+ max_nmismatches = 0;
+ for (p = this->substrings_1toN; p != NULL; p = List_next(p)) {
+ substring = (Substring_T) List_head(p);
+ if (Substring_nmismatches_region(substring) > max_nmismatches) {
+ max_nmismatches = Substring_nmismatches_region(substring);
+ }
+ }
+
+ if (max_nmismatches <= 2 && all_stage2_starts == NULL && all_stage2_ends == NULL) {
/* Don't run re-run GMAP on central portion. Just rely on substrings. */
- debug13(printf("Just relying on substrings. hit1 is NULL\n"));
+ debug13(printf("max_nmismatches = %d, so just relying on substrings. hit1 is NULL\n",
+ max_nmismatches));
hit1 = (T) NULL;
} else {
@@ -5740,6 +5790,7 @@ Stage3end_substrings_run_gmap_minus (T *hit2, T this, char *queryuc_ptr, int que
Chrpos_T genomepos;
char c, g, g_alt, comp;
+ int max_nmismatches;
int cdna_direction, sensedir;
int npairs1, goodness1, matches1, nmatches_posttrim_1,
max_match_length_1, ambig_end_length_5_1, ambig_end_length_3_1,
@@ -5765,7 +5816,12 @@ Stage3end_substrings_run_gmap_minus (T *hit2, T this, char *queryuc_ptr, int que
#ifdef DEBUG13
for (p = this->substrings_1toN; p != NULL; p = List_next(p)) {
substring = (Substring_T) List_head(p);
- printf("%d..%d\n",Substring_querystart(substring),Substring_queryend(substring));
+ printf("%d..%d with %d nmismatches",
+ Substring_querystart(substring),Substring_queryend(substring),Substring_nmismatches_region(substring));
+ if (Substring_ambiguous_p(substring) == true) {
+ printf(" ambiguous");
+ }
+ printf("\n");
}
#endif
@@ -5835,12 +5891,10 @@ Stage3end_substrings_run_gmap_minus (T *hit2, T this, char *queryuc_ptr, int que
} else {
substring = (Substring_T) List_head(this->substrings_1toN);
- if (high_resolution_substring_ends_p == false) {
- /* Don't try to solve ends, as requested by user */
+ if ((querypos = Substring_querystart(substring)) < 30 && high_resolution_substring_ends_p == false) {
debug13(printf("User does not want high resolution substring ends\n"));
all_stage2_starts = (List_T) NULL;
- } else if ((querypos = Substring_querystart(substring)) < 15) {
- /* Don't try to solve short ends, for sake of efficiency */
+ } else if (querypos < 15) {
debug13(printf("No starts, since querystart %d < 15\n",querypos));
all_stage2_starts = (List_T) NULL;
} else {
@@ -5910,12 +5964,10 @@ Stage3end_substrings_run_gmap_minus (T *hit2, T this, char *queryuc_ptr, int que
} else {
substring = (Substring_T) List_head(this->substrings_Nto1);
- if (high_resolution_substring_ends_p == false) {
- /* Don't try to solve ends, as requested by user */
+ if ((querypos = Substring_queryend(substring)) > querylength - 30 && high_resolution_substring_ends_p == false) {
debug13(printf("User does not want high resolution substring ends\n"));
all_stage2_ends = (List_T) NULL;
- } else if ((querypos = Substring_queryend(substring)) > querylength - 15) {
- /* Don't try to solve short ends, for sake of efficiency */
+ } else if (querypos > querylength - 15) {
debug13(printf("No ends, since queryend %d > querylength %d - 15\n",querypos,querylength));
all_stage2_ends = (List_T) NULL;
} else {
@@ -5943,12 +5995,21 @@ Stage3end_substrings_run_gmap_minus (T *hit2, T this, char *queryuc_ptr, int que
}
+ /* No, do run this branch, because we are calling it only when we need to */
/* Get better results if we turn off this branch, because GMAP finds
indels at ends, so do re-run GMAP in all cases */
- if (/* high_resolution_substring_ends_p == false && */
- extend_ends_p == true && all_stage2_starts == NULL && all_stage2_ends == NULL) {
+ max_nmismatches = 0;
+ for (p = this->substrings_1toN; p != NULL; p = List_next(p)) {
+ substring = (Substring_T) List_head(p);
+ if (Substring_nmismatches_region(substring) > max_nmismatches) {
+ max_nmismatches = Substring_nmismatches_region(substring);
+ }
+ }
+
+ if (max_nmismatches <= 2 && all_stage2_starts == NULL && all_stage2_ends == NULL) {
/* Don't run re-run GMAP on central portion. Just rely on substrings. */
- debug13(printf("Just relying on substrings. hit1 is NULL\n"));
+ debug13(printf("max_nmismatches = %d, so just relying on substrings. hit1 is NULL\n",
+ max_nmismatches));
hit1 = (T) NULL;
} else {
@@ -6150,12 +6211,11 @@ Stage3end_gmap_run_gmap_plus (T *hit2, T this, char *queryuc_ptr, int querylengt
debug13(printf("Entered Stage3hr_gmap_run_gmap_plus with gmap_cdna_direction %d\n",this->gmap_cdna_direction));
/* D. Make all_stage2_starts (paths) */
- if (high_resolution_gmap_ends_p == false) {
- /* Don't try to solve ends, as requested by user */
+ if ((querypos = Pair_querypos(&(this->pairarray[0]))) < 30 && high_resolution_gmap_ends_p == false) {
debug13(printf("User does not want high resolution gmap ends\n"));
all_stage2_starts = (List_T) NULL;
- } else if ((querypos = Pair_querypos(&(this->pairarray[0]))) < 15) {
- /* Don't try to solve short ends, for sake of efficiency */
+ } else if (querypos < 15) {
+ debug13(printf("No starts, since querystart %d < 15\n",querypos));
all_stage2_starts = (List_T) NULL;
} else {
chrend = Pair_genomepos(&(this->pairarray[0]));
@@ -6182,12 +6242,11 @@ Stage3end_gmap_run_gmap_plus (T *hit2, T this, char *queryuc_ptr, int querylengt
/* E. Make all_stage2_ends (pairs) */
- if (high_resolution_gmap_ends_p == false) {
- /* Don't try to solve ends, as requested by user */
+ if ((querypos = Pair_querypos(&(this->pairarray[this->npairs - 1]))) > querylength - 30 && high_resolution_gmap_ends_p == false) {
debug13(printf("User does not want high resolution gmap ends\n"));
all_stage2_ends = (List_T) NULL;
- } else if ((querypos = Pair_querypos(&(this->pairarray[this->npairs - 1]))) > querylength - 15) {
- /* Don't try to solve short ends, for sake of efficiency */
+ } else if (querypos > querylength - 15) {
+ debug13(printf("No ends, since queryend %d > querylength %d - 15\n",querypos,querylength));
all_stage2_ends = (List_T) NULL;
} else {
chrstart = Pair_genomepos(&(this->pairarray[this->npairs - 1]));
@@ -6220,7 +6279,11 @@ Stage3end_gmap_run_gmap_plus (T *hit2, T this, char *queryuc_ptr, int querylengt
/* F. Make stage2pairs */
stage2pairs = (List_T) NULL;
for (i = 0; i < this->npairs; i++) {
- stage2pairs = Pairpool_push_copy(stage2pairs,pairpool,&(this->pairarray[i]));
+ if (this->pairarray[i].gapp == true) {
+ /* Skip any gaps pushed into "final" pairarray */
+ } else {
+ stage2pairs = Pairpool_push_copy(stage2pairs,pairpool,&(this->pairarray[i]));
+ }
}
debug13(Pair_dump_list(stage2pairs,true));
debug13(printf("\n"));
@@ -6376,12 +6439,11 @@ Stage3end_gmap_run_gmap_minus (T *hit2, T this, char *queryuc_ptr, int queryleng
debug13(printf("Entered Stage3hr_gmap_run_gmap_minus with gmap_cdna_direction %d\n",this->gmap_cdna_direction));
/* D. Make all_stage2_starts (paths) */
- if (high_resolution_gmap_ends_p == false) {
- /* Don't try to solve ends, as requested by user */
+ if ((querypos = Pair_querypos(&(this->pairarray[0]))) < 30 && high_resolution_gmap_ends_p == false) {
debug13(printf("User does not want high resolution gmap ends\n"));
all_stage2_starts = (List_T) NULL;
- } else if ((querypos = Pair_querypos(&(this->pairarray[0]))) < 15) {
- /* Don't try to solve short ends, for sake of efficiency */
+ } else if (querypos < 15) {
+ debug13(printf("No starts, since querystart %d < 15\n",querypos));
all_stage2_starts = (List_T) NULL;
} else {
chrstart = Pair_genomepos(&(this->pairarray[0]));
@@ -6409,12 +6471,11 @@ Stage3end_gmap_run_gmap_minus (T *hit2, T this, char *queryuc_ptr, int queryleng
/* E. Make all_stage2_ends (pairs) */
- if (high_resolution_gmap_ends_p == false) {
- /* Don't try to solve ends, as requested by user */
+ if ((querypos = Pair_querypos(&(this->pairarray[this->npairs - 1]))) > querylength - 30 && high_resolution_gmap_ends_p == false) {
debug13(printf("User does not want high resolution gmap ends\n"));
all_stage2_ends = (List_T) NULL;
- } else if ((querypos = Pair_querypos(&(this->pairarray[this->npairs - 1]))) > querylength - 15) {
- /* Don't try to solve short ends, for sake of efficiency */
+ } else if (querypos > querylength - 15) {
+ debug13(printf("No ends, since queryend %d > querylength %d - 15\n",querypos,querylength));
all_stage2_ends = (List_T) NULL;
} else {
chrend = Pair_genomepos(&(this->pairarray[this->npairs - 1]));
@@ -7240,7 +7301,7 @@ Stage3end_new_insertion (int *found_score, int nindels, int indel_pos, int nmism
new->trim_right_splicep = Substring_trim_right_splicep(substring2);
#if 0
-#ifdef SCORE_INDELS
+#ifdef SCORE_INDELS_EVENTRIM
/* indel_penalty will be counted later */
new->penalties = 0;
#else
@@ -7565,7 +7626,7 @@ Stage3end_new_deletion (int *found_score, int nindels, int indel_pos, int nmisma
new->trim_right_splicep = Substring_trim_right_splicep(substring2);
#if 0
-#ifdef SCORE_INDELS
+#ifdef SCORE_INDELS_EVENTRIM
/* indel_penalty will be counted later */
new->penalties = 0;
#else
@@ -9088,13 +9149,13 @@ Stage3end_new_gmap (int nmismatches_whole, int nmatches_posttrim, int max_match_
/* new->nmatches_posttrim -= indel_penalty_middle * nindelbreaks; -- for use in goodness_cmp procedures */
if (new->nmatches_posttrim < querylength/2) {
- debug0(printf(" nmatches %d < querylength %d/2, so returning NULL\n",
+ debug0(printf(" Bad GMAP: nmatches %d < querylength %d/2, so returning NULL\n",
new->nmatches_posttrim,querylength));
Pair_tokens_free(&cigar_tokens);
FREE_OUT(new);
return (T) NULL;
} else if (max_match_length < gmap_min_nconsecutive) {
- debug0(printf(" max_match_length %d < %d, so returning NULL\n",max_match_length,gmap_min_nconsecutive));
+ debug0(printf(" Bad GMAP: max_match_length %d < %d, so returning NULL\n",max_match_length,gmap_min_nconsecutive));
Pair_tokens_free(&cigar_tokens);
FREE_OUT(new);
return (T) NULL;
@@ -9152,7 +9213,7 @@ Stage3end_new_gmap (int nmismatches_whole, int nmatches_posttrim, int max_match_
#if 0
/* new->penalties not used anyway for GMAP alignments */
-#ifdef SCORE_INDELS
+#ifdef SCORE_INDELS_EVENTRIM
/* indel_penalty will be counted later */
new->penalties = localsplicing_penalty * nintrons;
#else
@@ -9212,16 +9273,20 @@ Stage3end_new_gmap (int nmismatches_whole, int nmatches_posttrim, int max_match_
Pair_tokens_free(&new->cigar_tokens);
/* No substrings or junctions */
FREE(new);
+ debug0(printf("Returning NULL\n"));
return (T) NULL;
} else if (new->circularalias >= 0) {
+ debug0(printf("Returning GMAP %p\n",new));
new->altlocp = false;
return new;
} else if ((new->altlocp = altlocp[chrnum]) == false) {
+ debug0(printf("Returning GMAP %p\n",new));
return new;
} else {
+ debug0(printf("Returning GMAP %p\n",new));
return new;
}
}
@@ -10040,7 +10105,7 @@ Stage3end_optimal_score_aux (bool *eliminatedp, List_T hitlist,
int minscore = querylength;
int max_nmatches = 0, max_nmatches_posttrim = 0;
int trim_left = querylength, trim_right = querylength, trim_left_0, trim_right_0;
- int nindelbreaks;
+ int nindelbreaks, nbadintrons;
int best_nsegments;
#ifdef DISTANT_SPLICE_SPECIAL
@@ -10122,7 +10187,7 @@ Stage3end_optimal_score_aux (bool *eliminatedp, List_T hitlist,
if (hit->hittype == GMAP) {
debug4(printf("score GMAP:"));
- hit->score_eventrim = Pair_nmismatches_region(&nindelbreaks,hit->pairarray,hit->npairs,
+ hit->score_eventrim = Pair_nmismatches_region(&nindelbreaks,&nbadintrons,hit->pairarray,hit->npairs,
trim_left,trim_right,start_amb_length(hit),end_amb_length(hit),
hit->querylength);
debug4(printf(" add mismatches %d.",hit->score_eventrim));
@@ -10135,9 +10200,14 @@ Stage3end_optimal_score_aux (bool *eliminatedp, List_T hitlist,
hit->score_eventrim += amb_penalty;
}
-#ifdef SCORE_INDELS
- hit->score_eventrim += indel_penalty_middle * nindelbreaks;
+#ifdef SCORE_INDELS_EVENTRIM
+ if (nindelbreaks > 1) {
+ hit->score_eventrim += indel_penalty_middle * (nindelbreaks - 1);
+ debug4(printf(" add penalty for %d indelbreaks",nindelbreaks));
+ }
#endif
+ hit->score_eventrim += EVENTRIM_BADINTRON_PENALTY * nbadintrons;
+ debug4(printf(" add penalty for %d bad introns.",nbadintrons));
debug4(printf(" RESULT: %d\n",hit->score_eventrim));
} else {
@@ -10160,7 +10230,9 @@ Stage3end_optimal_score_aux (bool *eliminatedp, List_T hitlist,
query_compress_fwd,query_compress_rev)));
}
-#ifdef SCORE_INDELS
+#if 0
+ /* Accept a single indel */
+#ifdef SCORE_INDELS_EVENTRIM
/* Needs to match GMAP scoring */
if (hit->hittype == INSERTION || hit->hittype == DELETION) {
debug4(printf(" indel at %d",hit->indel_pos));
@@ -10170,6 +10242,7 @@ Stage3end_optimal_score_aux (bool *eliminatedp, List_T hitlist,
}
}
#endif
+#endif
debug4(printf(" RESULT: %d\n",hit->score_eventrim));
}
}
@@ -12022,10 +12095,12 @@ Stage3end_resolve_multimapping (List_T hits) {
static void
print_alignment_info (Filestring_T fp, int nblocks, int score, int mapq_score, bool sarrayp) {
- FPRINTF(fp,"segs:%d,align_score:%d,mapq:%d",nblocks,score,mapq_score);
+#ifndef NO_COMPARE
+ FPRINTF(fp,"\tsegs:%d,align_score:%d,mapq:%d",nblocks,score,mapq_score);
if (sarrayp == true) {
FPRINTF(fp,",method:sa");
}
+#endif
return;
}
@@ -12086,6 +12161,11 @@ Stage3pair_pairtype (Stage3pair_T this) {
return this->pairtype;
}
+Pairtype_T
+Stage3pair_determine_pairtype (Stage3pair_T this) {
+ return Stage3_determine_pairtype(this->hit5,this->hit3);
+}
+
bool
Stage3pair_circularp (Stage3pair_T this) {
return this->circularp;
@@ -12139,8 +12219,10 @@ print_pair_info (Filestring_T fp, T hit5, T hit3, int insertlength, int pairscor
assert(hit5->plusp == hit3->plusp); /* Same direction */
#endif
+#ifndef NO_COMPARE
FPRINTF(fp,"pair_score:%d",pairscore);
FPRINTF(fp,",insert_length:%d",insertlength);
+#endif
switch (pairtype) {
case CONCORDANT: break;
@@ -12150,6 +12232,7 @@ print_pair_info (Filestring_T fp, T hit5, T hit3, int insertlength, int pairscor
case CONCORDANT_TRANSLOCATIONS: break;
case CONCORDANT_TERMINAL: break;
case PAIRED_UNSPECIFIED: abort();
+ case PAIRED_TERMINALS: abort();
case UNPAIRED: abort();
case UNSPECIFIED: abort();
}
@@ -12230,7 +12313,6 @@ print_substrings (Filestring_T fp, T this, int score, Univ_IIT_T chromosome_iit,
}
/* Alignment info */
- FPRINTF(fp,"\t");
print_alignment_info(fp,nblocks,score,mapq_score,this->sarrayp);
/* Pairing info */
@@ -12270,7 +12352,6 @@ print_substrings (Filestring_T fp, T this, int score, Univ_IIT_T chromosome_iit,
if (pairinfo_printed_p == false) {
/* Alignment info */
- FPRINTF(fp,"\t");
print_alignment_info(fp,nblocks,score,mapq_score,this->sarrayp);
/* Pairing info */
@@ -12306,7 +12387,6 @@ print_substrings (Filestring_T fp, T this, int score, Univ_IIT_T chromosome_iit,
if (pairinfo_printed_p == false) {
/* Alignment info */
- FPRINTF(fp,"\t");
print_alignment_info(fp,nblocks,score,mapq_score,this->sarrayp);
/* Pairing info */
@@ -12841,6 +12921,7 @@ strip_gaps_at_tail (List_T pairs) {
}
+/* Used only for --merge-overlap features, so obey hardclip and not querystart/queryend */
/* If use querylength_adj, ss.bug.4 fails. If use querylength, ss.bug.3 fails */
static List_T
Stage3end_convert_to_pairs (List_T pairs, T hit, Shortread_T queryseq,
@@ -13716,7 +13797,7 @@ compute_insertlength (Stage3pair_T this) {
debug10(printf("Got hit5 of type GMAP\n"));
if (hit5->plusp == true && hit3->plusp == true) {
/* Have 5-start..end and 3-start..end */
- debug10(printf("plus: comparing hit5->genomicend %u <= hit3->genomicstart %u\n",
+ debug10(printf("1 plus: comparing hit5->genomicend %u <= hit3->genomicstart %u\n",
hit5->genomicend - hit5->chroffset,hit3->genomicstart - hit3->chroffset));
if (hit5->genomicend <= hit3->genomicstart) {
@@ -13731,7 +13812,7 @@ compute_insertlength (Stage3pair_T this) {
} else if (hit5->plusp == false && hit3->plusp == false) {
/* Have 3-end..start and 5-end..start */
- debug10(printf("minus: comparing hit3->genomicstart %u <= hit5->genomicend %u\n",
+ debug10(printf("2 minus: comparing hit3->genomicstart %u <= hit5->genomicend %u\n",
hit3->genomicstart - hit3->chroffset,hit5->genomicend - hit5->chroffset));
if (hit3->genomicstart <= hit5->genomicend) {
@@ -13750,7 +13831,7 @@ compute_insertlength (Stage3pair_T this) {
debug10(printf("Got hit3 of type GMAP\n"));
if (hit5->plusp == true && hit3->plusp == true) {
/* Have 5-start..end and 3-start..end */
- debug10(printf("plus: comparing hit5->genomicend %u <= hit3->genomicstart %u\n",
+ debug10(printf("3 plus: comparing hit5->genomicend %u <= hit3->genomicstart %u\n",
hit5->genomicend - hit5->chroffset,hit3->genomicstart - hit3->chroffset));
if (hit5->genomicend <= hit3->genomicstart) {
@@ -13765,7 +13846,7 @@ compute_insertlength (Stage3pair_T this) {
} else if (hit5->plusp == false && hit3->plusp == false) {
/* Have 3-end..start and 5-end..start */
- debug10(printf("minus: comparing hit3->genomicstart %u <= hit5->genomicend %u\n",
+ debug10(printf("4 minus: comparing hit3->genomicstart %u <= hit5->genomicend %u\n",
hit3->genomicstart - hit3->chroffset,hit5->genomicend - hit5->chroffset));
if (hit3->genomicstart <= hit5->genomicend) {
/* No overlap */
@@ -13838,6 +13919,11 @@ Stage3pair_new (T hit5, T hit3, int genestrand, Pairtype_T pairtype,
/* Stage3end_T copy; */
Substring_T substring1, substringN;
Chrpos_T chrstart, chrend, chrpos;
+
+ Univcoord_T common_genomicpos;
+ int ilength5_low, ilength5_high, ilength3_low, ilength3_high;
+ int insertlength1, insertlength2;
+
int querypos;
int unresolved_amb_length = 0;
/* int found_score = 0; */
@@ -13847,8 +13933,8 @@ Stage3pair_new (T hit5, T hit3, int genestrand, Pairtype_T pairtype,
int querylength5 = hit5->querylength;
int querylength3 = hit3->querylength;
- debug10(printf("\nStage3pair_new called with pairtype %s and chrnum %d, %d (effective %d, %d)\n",
- Pairtype_string(pairtype),hit5->chrnum,hit3->chrnum,hit5->effective_chrnum,hit3->effective_chrnum));
+ debug0(printf("\nStage3pair_new called with pairtype %s and chrnum %d, %d (effective %d, %d), expect_concordant_p %d\n",
+ Pairtype_string(pairtype),hit5->chrnum,hit3->chrnum,hit5->effective_chrnum,hit3->effective_chrnum,expect_concordant_p));
#if 0
if (hit5->hittype == TERMINAL && hit5->trim_left + hit5->trim_right >= reject_trimlength) {
@@ -13859,7 +13945,7 @@ Stage3pair_new (T hit5, T hit3, int genestrand, Pairtype_T pairtype,
if (private3p == true) {
Stage3end_free(&hit3);
}
- debug5(printf("Rejecting terminal as NULL because hit5 trim %d+%d > reject_trimlength %d\n",hit5->trim_left,hit5->trim_right,reject_trimlength));
+ debug10(printf("Rejecting terminal as NULL because hit5 trim %d+%d > reject_trimlength %d\n",hit5->trim_left,hit5->trim_right,reject_trimlength));
return (Stage3pair_T) NULL;
} else if (hit3->hittype == TERMINAL && hit3->trim_left + hit3->trim_right >= reject_trimlength) {
@@ -13870,7 +13956,7 @@ Stage3pair_new (T hit5, T hit3, int genestrand, Pairtype_T pairtype,
if (private3p == true) {
Stage3end_free(&hit3);
}
- debug5(printf("Rejecting terminal as NULL because hit3 trim %d+%d > reject_trimlength %d\n",hit3->trim_left,hit3->trim_right,reject_trimlength));
+ debug10(printf("Rejecting terminal as NULL because hit3 trim %d+%d > reject_trimlength %d\n",hit3->trim_left,hit3->trim_right,reject_trimlength));
return (Stage3pair_T) NULL;
} else {
#endif
@@ -13901,27 +13987,71 @@ Stage3pair_new (T hit5, T hit3, int genestrand, Pairtype_T pairtype,
#endif
if (hit5->hittype == GMAP && hit3->hittype == GMAP) {
- debug10(printf("Got hit5 and hit3 both of type GMAP\n"));
+ if ((common_genomicpos = pair_common_genomicpos(hit5,hit3)) > 0) {
+ debug10(printf("Got hit5 and hit3 both of type GMAP with common genomicpos %u\n",common_genomicpos));
+ find_ilengths(&ilength5_low,&ilength5_high,hit5,common_genomicpos,hit5->chroffset);
+ find_ilengths(&ilength3_low,&ilength3_high,hit3,common_genomicpos,hit3->chroffset);
+
+ debug10(printf("ilengths5: %d|%d. ilengths3: %d|%d\n",ilength5_low,ilength5_high,ilength3_low,ilength3_high));
+ debug10(printf("ilength53 is %d, ilength 35 is %d\n",ilength5_low + ilength3_high - 1,ilength3_low + ilength5_high - 1));
+ debug10(printf("trims for hit5 %d..%d, trims for hit3 %d..%d\n",hit5->trim_left,hit5->trim_right,hit3->trim_left,hit3->trim_right));
+
+ /* Do not try to resolve ambiguity on inside of concordant ends */
+ if (hit5->plusp == true && hit3->plusp == true) {
+ new->dir = +1;
+ insertlength1 = (ilength5_low + ilength3_high - 1) + hit5->trim_left + hit3->trim_right + hit5->gmap_start_amb_length + hit3->gmap_end_amb_length;
+ insertlength2 = (ilength3_low + ilength5_high - 1) + hit3->trim_left + hit5->trim_right + hit3->gmap_start_amb_length + hit5->gmap_end_amb_length;
+ debug10(printf("insertlength1 is %d, insertlength2 is %d\n",insertlength1,insertlength2));
+ if (insertlength1 < insertlength2) {
+ new->insertlength = insertlength1;
+ } else {
+ new->insertlength = insertlength2;
+ }
+ new->insertlength_expected_sign = insertlength_expected(new->insertlength);
+ debug10(printf("plus, overlap: insert length %d\n",new->insertlength));
+
+ } else if (hit5->plusp == false && hit3->plusp == false) {
+ new->dir = -1;
+ insertlength1 = (ilength5_low + ilength3_high - 1) + hit3->trim_left + hit5->trim_right + hit3->gmap_start_amb_length + hit5->gmap_end_amb_length;
+ insertlength2 = (ilength3_low + ilength5_high - 1) + hit5->trim_left + hit3->trim_right + hit5->gmap_start_amb_length + hit3->gmap_end_amb_length;
+ debug10(printf("insertlength1 is %d, insertlength2 is %d\n",insertlength1,insertlength2));
+ if (insertlength1 < insertlength2) {
+ new->insertlength = insertlength1;
+ } else {
+ new->insertlength = insertlength2;
+ }
+ new->insertlength_expected_sign = insertlength_expected(new->insertlength);
+ debug10(printf("minus, overlap: insert length %d\n",new->insertlength));
+
+ } else {
+ new->dir = 0;
+ new->insertlength = 0;
+ new->insertlength_expected_sign = false;
+ }
- /* Do not try to resolve ambiguity on inside of concordant ends */
- if (hit5->plusp == true && hit3->plusp == true) {
- new->dir = +1;
- new->insertlength = (hit3->genomicstart - hit5->genomicend) + querylength5 + querylength3;
- new->insertlength_expected_sign = insertlength_expected(new->insertlength);
- debug10(printf("plus, no overlap: insert length %d = start3 %u - end5 %u + %d + %d\n",
- new->insertlength,hit3->genomicstart - hit3->chroffset,
- hit5->genomicend - hit5->chroffset,querylength5,querylength3));
- } else if (hit5->plusp == false && hit3->plusp == false) {
- new->dir = -1;
- new->insertlength = (hit5->genomicend - hit3->genomicstart) + querylength5 + querylength3;
- new->insertlength_expected_sign = insertlength_expected(new->insertlength);
- debug10(printf("minus, no overlap: insert length %d = end5 %u - start3 %u + %d + %d\n",
- new->insertlength,hit5->genomicend - hit5->chroffset,
- hit3->genomicstart - hit3->chroffset,querylength5,querylength3));
} else {
- new->dir = 0;
- new->insertlength = pair_insert_length_unpaired(hit5,hit3);
- new->insertlength_expected_sign = false;
+ debug10(printf("Got hit5 and hit3 both of type GMAP without overlap\n"));
+
+ /* Do not try to resolve ambiguity on inside of concordant ends */
+ if (hit5->plusp == true && hit3->plusp == true) {
+ new->dir = +1;
+ new->insertlength = (hit3->genomicstart - hit5->genomicend) + querylength5 + querylength3;
+ new->insertlength_expected_sign = insertlength_expected(new->insertlength);
+ debug10(printf("plus, no overlap: insert length %d = start3 %u - end5 %u + %d + %d\n",
+ new->insertlength,hit3->genomicstart - hit3->chroffset,
+ hit5->genomicend - hit5->chroffset,querylength5,querylength3));
+ } else if (hit5->plusp == false && hit3->plusp == false) {
+ new->dir = -1;
+ new->insertlength = (hit5->genomicend - hit3->genomicstart) + querylength5 + querylength3;
+ new->insertlength_expected_sign = insertlength_expected(new->insertlength);
+ debug10(printf("minus, no overlap: insert length %d = end5 %u - start3 %u + %d + %d\n",
+ new->insertlength,hit5->genomicend - hit5->chroffset,
+ hit3->genomicstart - hit3->chroffset,querylength5,querylength3));
+ } else {
+ new->dir = 0;
+ new->insertlength = pair_insert_length_unpaired(hit5,hit3);
+ new->insertlength_expected_sign = false;
+ }
}
} else if (hit5->hittype == GMAP) {
@@ -13936,7 +14066,7 @@ Stage3pair_new (T hit5, T hit3, int genestrand, Pairtype_T pairtype,
}
/* Have 5-start..end and 3-start..end */
- debug10(printf("plus: comparing hit5->genomicend %u <= hit3->genomicstart %u\n",
+ debug10(printf("5 plus: comparing hit5->genomicend %u <= hit3->genomicstart %u\n",
hit5->genomicend - hit5->chroffset,hit3->genomicstart - hit3->chroffset));
if (hit5->genomicend <= hit3->genomicstart) {
@@ -13958,7 +14088,7 @@ Stage3pair_new (T hit5, T hit3, int genestrand, Pairtype_T pairtype,
if (new->insertlength <= 0) {
/* Overreach */
- debug5(printf(" Returning NULL because of overreach\n"));
+ debug10(printf(" Returning NULL because of overreach\n"));
if (private5p == true) {
Stage3end_free(&hit5);
}
@@ -13980,7 +14110,7 @@ Stage3pair_new (T hit5, T hit3, int genestrand, Pairtype_T pairtype,
}
/* Have 3-end..start and 5-end..start */
- debug10(printf("minus: comparing hit3->genomicstart %u <= hit5->genomicend %u\n",
+ debug10(printf("6 minus: comparing hit3->genomicstart %u <= hit5->genomicend %u\n",
hit3->genomicstart - hit3->chroffset,hit5->genomicend - hit5->chroffset));
if (hit3->genomicstart <= hit5->genomicend) {
@@ -14002,7 +14132,7 @@ Stage3pair_new (T hit5, T hit3, int genestrand, Pairtype_T pairtype,
if (new->insertlength <= 0) {
/* Overreach */
- debug5(printf(" Returning NULL because of overreach\n"));
+ debug10(printf(" Returning NULL because of overreach\n"));
if (private5p == true) {
Stage3end_free(&hit5);
}
@@ -14032,7 +14162,7 @@ Stage3pair_new (T hit5, T hit3, int genestrand, Pairtype_T pairtype,
}
/* Have 5-start..end and 3-start..end */
- debug10(printf("plus: comparing hit5->genomicend %u <= hit3->genomicstart %u\n",
+ debug10(printf("7 plus: comparing hit5->genomicend %u <= hit3->genomicstart %u\n",
hit5->genomicend - hit5->chroffset,hit3->genomicstart - hit3->chroffset));
if (hit5->genomicend <= hit3->genomicstart) {
@@ -14054,7 +14184,7 @@ Stage3pair_new (T hit5, T hit3, int genestrand, Pairtype_T pairtype,
if (new->insertlength <= 0) {
/* Overreach */
- debug5(printf(" Returning NULL because of overreach\n"));
+ debug10(printf(" Returning NULL because of overreach\n"));
if (private5p == true) {
Stage3end_free(&hit5);
}
@@ -14076,7 +14206,7 @@ Stage3pair_new (T hit5, T hit3, int genestrand, Pairtype_T pairtype,
}
/* Have 3-end..start and 5-end..start */
- debug10(printf("minus: comparing hit3->genomicstart %u <= hit5->genomicend %u\n",
+ debug10(printf("8 minus: comparing hit3->genomicstart %u <= hit5->genomicend %u\n",
hit3->genomicstart - hit3->chroffset,hit5->genomicend - hit5->chroffset));
if (hit3->genomicstart <= hit5->genomicend) {
/* No overlap */
@@ -14097,7 +14227,7 @@ Stage3pair_new (T hit5, T hit3, int genestrand, Pairtype_T pairtype,
if (new->insertlength <= 0) {
/* Overreach */
- debug5(printf(" Returning NULL because of overreach\n"));
+ debug10(printf(" Returning NULL because of overreach\n"));
if (private5p == true) {
Stage3end_free(&hit5);
}
@@ -14177,7 +14307,7 @@ Stage3pair_new (T hit5, T hit3, int genestrand, Pairtype_T pairtype,
if (overreach5p == true || overreach3p == true) {
/* Either overreach */
- debug5(printf(" Returning NULL because of dual overreach\n"));
+ debug10(printf(" Returning NULL because of dual overreach\n"));
if (private5p == true) {
Stage3end_free(&hit5);
}
@@ -14309,7 +14439,7 @@ Stage3pair_new (T hit5, T hit3, int genestrand, Pairtype_T pairtype,
if (overreach5p == true || overreach3p == true) {
/* Either overreach */
- debug5(printf(" Returning NULL because of dual overreach\n"));
+ debug10(printf(" Returning NULL because of dual overreach\n"));
if (private5p == true) {
Stage3end_free(&hit5);
}
@@ -14413,7 +14543,7 @@ Stage3pair_new (T hit5, T hit3, int genestrand, Pairtype_T pairtype,
}
}
- debug5(printf("\nGot insertlength of %d\n",new->insertlength));
+ debug10(printf("\nGot insertlength of %d\n",new->insertlength));
/* Was new->insertlength <= 0, but this eliminates legitimate overlaps */
/* Was new->insertlength < -pairmax, but this allows overreach */
if (new->insertlength <= 0) {
@@ -14426,7 +14556,7 @@ Stage3pair_new (T hit5, T hit3, int genestrand, Pairtype_T pairtype,
#endif
if (expect_concordant_p == true) {
- debug5(printf(" Returning NULL, because not concordant\n"));
+ debug10(printf(" Returning NULL, because not concordant\n"));
if (private5p == true) {
Stage3end_free(&hit5);
}
@@ -14444,7 +14574,7 @@ Stage3pair_new (T hit5, T hit3, int genestrand, Pairtype_T pairtype,
pairmax = pairmax_linear;
}
if (new->insertlength > pairmax && expect_concordant_p == true) {
- debug5(printf(" Returning NULL because insertlength %d > pairmax %d\n",new->insertlength,pairmax));
+ debug10(printf(" Returning NULL because insertlength %d > pairmax %d\n",new->insertlength,pairmax));
if (private5p == true) {
Stage3end_free(&hit5);
}
@@ -14477,7 +14607,7 @@ Stage3pair_new (T hit5, T hit3, int genestrand, Pairtype_T pairtype,
new->sense_consistent_p = true;
} else if (expect_concordant_p == true) {
- debug5(printf(" Returning NULL, because senses are not consistent\n"));
+ debug10(printf(" Returning NULL, because senses are not consistent\n"));
if (private5p == true) {
Stage3end_free(&hit5);
}
@@ -16358,13 +16488,13 @@ Stage3pair_optimal_score_aux (bool *eliminatedp, List_T hitpairlist,
int n;
int minscore5 = querylength5, minscore3 = querylength3, minscore = querylength5 + querylength3;
int best_nsegments, best_nsegments_5, best_nsegments_3;
- int max_nmatches = 0;
+ /* int max_nmatches = 0; */
#ifdef USE_OPTIMAL_SCORE_BINGO
int minscore_bingo = querylength5 + querylength3;
#endif
int trim_left_5 = querylength5, trim_right_5 = querylength5,
trim_left_3 = querylength3, trim_right_3 = querylength3, trim_left, trim_right;
- int nindelbreaks;
+ int nindelbreaks, nbadintrons;
#if 0 /* DISTANT_SPLICE_SPECIAL */
bool shortdistance_p = false;
@@ -16490,10 +16620,10 @@ Stage3pair_optimal_score_aux (bool *eliminatedp, List_T hitpairlist,
}
#endif
- hit5->score_eventrim += Pair_nmismatches_region(&nindelbreaks,hit5->pairarray,hit5->npairs,
+ hit5->score_eventrim += Pair_nmismatches_region(&nindelbreaks,&nbadintrons,hit5->pairarray,hit5->npairs,
trim_left_5,trim_right_5,start_amb_length(hit5),end_amb_length(hit5),
hit5->querylength);
- debug6(printf(" add nmismatches %d.",Pair_nmismatches_region(&nindelbreaks,hit5->pairarray,hit5->npairs,
+ debug6(printf(" add nmismatches %d.",Pair_nmismatches_region(&nindelbreaks,&nbadintrons,hit5->pairarray,hit5->npairs,
trim_left_5,trim_right_5,start_amb_length(hit5),end_amb_length(hit5),
hit5->querylength)));
if (start_amb_length(hit5) > 0) {
@@ -16505,10 +16635,14 @@ Stage3pair_optimal_score_aux (bool *eliminatedp, List_T hitpairlist,
hit5->score_eventrim += amb_penalty;
}
-#ifdef SCORE_INDELS
- hit5->score_eventrim += indel_penalty_middle * nindelbreaks;
- debug6(printf(" add indelbreaks %d.",indel_penalty_middle * nindelbreaks));
+#ifdef SCORE_INDELS_EVENTRIM
+ if (nindelbreaks > 1) {
+ hit5->score_eventrim += indel_penalty_middle * (nindelbreaks - 1);
+ debug6(printf(" add penalty for %d indelbreaks",nindelbreaks));
+ }
#endif
+ hit5->score_eventrim += EVENTRIM_BADINTRON_PENALTY * nbadintrons;
+ debug6(printf(" add penalty for %d bad introns.",nbadintrons));
debug6(printf(" RESULT: %d\n",hit5->score_eventrim));
} else {
@@ -16532,7 +16666,9 @@ Stage3pair_optimal_score_aux (bool *eliminatedp, List_T hitpairlist,
query5_compress_fwd,query5_compress_rev)));
}
-#ifdef SCORE_INDELS
+#if 0
+ /* Accept a single indel */
+#ifdef SCORE_INDELS_EVENTRIM
/* Needs to match GMAP scoring */
if (hit5->hittype == INSERTION || hit5->hittype == DELETION) {
debug6(printf(" indel at %d",hit5->indel_pos));
@@ -16542,6 +16678,7 @@ Stage3pair_optimal_score_aux (bool *eliminatedp, List_T hitpairlist,
}
}
#endif
+#endif
debug6(printf(" RESULT: %d\n",hit5->score_eventrim));
}
@@ -16570,10 +16707,10 @@ Stage3pair_optimal_score_aux (bool *eliminatedp, List_T hitpairlist,
}
#endif
- hit3->score_eventrim += Pair_nmismatches_region(&nindelbreaks,hit3->pairarray,hit3->npairs,
+ hit3->score_eventrim += Pair_nmismatches_region(&nindelbreaks,&nbadintrons,hit3->pairarray,hit3->npairs,
trim_left_3,trim_right_3,start_amb_length(hit3),end_amb_length(hit3),
hit3->querylength);
- debug6(printf(" add nmismatches %d.",Pair_nmismatches_region(&nindelbreaks,hit3->pairarray,hit3->npairs,
+ debug6(printf(" add nmismatches %d.",Pair_nmismatches_region(&nindelbreaks,&nbadintrons,hit3->pairarray,hit3->npairs,
trim_left_3,trim_right_3,start_amb_length(hit3),end_amb_length(hit3),
hit3->querylength)));
@@ -16586,10 +16723,15 @@ Stage3pair_optimal_score_aux (bool *eliminatedp, List_T hitpairlist,
hit3->score_eventrim += amb_penalty;
}
-#ifdef SCORE_INDELS
- hit3->score_eventrim += indel_penalty_middle * nindelbreaks;
- debug6(printf(" add indelbreaks %d.",indel_penalty_middle * nindelbreaks));
+#ifdef SCORE_INDELS_EVENTRIM
+ if (nindelbreaks > 1) {
+ hit3->score_eventrim += indel_penalty_middle * (nindelbreaks - 1);
+ debug6(printf(" add penalty for %d bad introns.",nbadintrons));
+ }
#endif
+ hit3->score_eventrim += EVENTRIM_BADINTRON_PENALTY * nbadintrons;
+ debug6(printf(" add penalty for %d bad introns.",nbadintrons));
+
#if 0
if (hit3->start_amb_prob < 0.9) {
hit3->score_eventrim += hit3->start_amb_length / ambig_end_interval;
@@ -16623,7 +16765,9 @@ Stage3pair_optimal_score_aux (bool *eliminatedp, List_T hitpairlist,
query3_compress_fwd,query3_compress_rev)));
}
-#ifdef SCORE_INDELS
+#if 0
+ /* Accept a single indel */
+#ifdef SCORE_INDELS_EVENTRIM
/* Needs to match GMAP scoring */
if (hit3->hittype == INSERTION || hit3->hittype == DELETION) {
debug6(printf(" indel at %d",hit3->indel_pos));
@@ -16633,9 +16777,13 @@ Stage3pair_optimal_score_aux (bool *eliminatedp, List_T hitpairlist,
}
}
#endif
+#endif
debug6(printf(" RESULT: %d\n",hit3->score_eventrim));
}
+ debug6(printf("hitpair score_eventrim %d = %d + %d\n",
+ hit5->score_eventrim + hit3->score_eventrim,
+ hit5->score_eventrim,hit3->score_eventrim));
hitpair->score_eventrim = hit5->score_eventrim + hit3->score_eventrim;
if (hitpair->score_eventrim < minscore) {
minscore = hitpair->score_eventrim;
@@ -16694,7 +16842,8 @@ Stage3pair_optimal_score_aux (bool *eliminatedp, List_T hitpairlist,
}
} else {
- /* Final */
+ /* Final: Previously based purely on nmatches. However, this leads to indelbreaks and bad introns. */
+#if 0
max_nmatches = 0;
for (p = hitpairlist; p != NULL; p = p->rest) {
hitpair = (Stage3pair_T) p->first;
@@ -16709,10 +16858,32 @@ Stage3pair_optimal_score_aux (bool *eliminatedp, List_T hitpairlist,
max_nmatches = hitpair->nmatches;
}
}
+#else
+ for (p = hitpairlist; p != NULL; p = p->rest) {
+ hitpair = (Stage3pair_T) p->first;
+ debug6(printf("%u..%u|%u..%u types %s and %s, score_eventrim %d+%d, pairlength %d, outerlength %u\n",
+ hitpair->hit5->low - hitpair->hit5->chroffset,hitpair->hit5->high - hitpair->hit5->chroffset,
+ hitpair->hit3->low - hitpair->hit3->chroffset,hitpair->hit3->high - hitpair->hit3->chroffset,
+ hittype_string(hitpair->hit5->hittype),hittype_string(hitpair->hit3->hittype),
+ hitpair->hit5->score_eventrim,hitpair->hit3->score_eventrim,
+ hitpair->insertlength,hitpair->outerlength));
+
+ if (hitpair->hit5->score_eventrim + hitpair->hit3->score_eventrim < minscore) {
+ minscore = hitpair->hit5->score_eventrim + hitpair->hit3->score_eventrim;
+ }
+ }
+ debug6(printf("Stage3pair_optimal_score over %d pairs: minscore = %d and %d + subopt:%d\n",
+ n,minscore5,minscore3,subopt_levels));
+
+ /* finalp == false. Add suboptimal_mismatches to each end. */
+ minscore += subopt_levels;
+ cutoff_level = minscore;
/* finalp == true. Add suboptimal_mismatches to overall score. */
- cutoff_level = max_nmatches - subopt_levels;
- debug6(printf("cutoff level %d = max_nmatches %d - subopt %d\n",cutoff_level,max_nmatches,subopt_levels));
+ /* cutoff_level = cutoff_level_5 + cutoff_level_3 - subopt_levels; */
+ debug6(printf("cutoff level %d = cutoff_level_5 %d + cutoff_level_3 %d - subopt %d\n",
+ cutoff_level,cutoff_level_5,cutoff_level_3,subopt_levels));
+#endif
/* For comparison within loci (as in hit_goodness_cmp), use hitpair->nmatches_posttrim */
/* For comparison across loci, use hitpair->nmatches, not hitpair->nmatches_posttrim, hitpair->score_eventrim or hitpair->score */
@@ -16730,17 +16901,17 @@ Stage3pair_optimal_score_aux (bool *eliminatedp, List_T hitpairlist,
hitpair,hittype_string(hitpair->hit5->hittype),hittype_string(hitpair->hit3->hittype),hitpair->score));
optimal = List_push(optimal,hitpair);
- } else if (hitpair->nmatches < cutoff_level) {
- debug6(printf("Final: Eliminating hit pair %p at %u..%u|%u..%u with nmatches %d < cutoff_level %d (finalp %d)\n",
+ } else if (hitpair->hit5->score_eventrim + hitpair->hit3->score_eventrim > cutoff_level) {
+ debug6(printf("Final: Eliminating hit pair %p at %u..%u|%u..%u with scores %d+%d > cutoff_level %d (finalp %d)\n",
hitpair,hitpair->hit5->low - hitpair->hit5->chroffset,hitpair->hit5->high - hitpair->hit5->chroffset,
hitpair->hit3->low - hitpair->hit3->chroffset,hitpair->hit3->high - hitpair->hit3->chroffset,
- hitpair->nmatches,cutoff_level,finalp));
+ hitpair->hit5->score_eventrim,hitpair->hit3->score_eventrim,cutoff_level,finalp));
*eliminatedp = true;
Stage3pair_free(&hitpair);
} else {
- debug6(printf("Final: Keeping hit pair %p with nmatches %d (vs cutoff_level %d)\n",
- hitpair,hitpair->nmatches,cutoff_level));
+ debug6(printf("Final: Keeping hit pair %p with scores %d+%d (vs cutoff_level %d)\n",
+ hitpair,hitpair->hit5->score_eventrim,hitpair->hit3->score_eventrim,cutoff_level));
optimal = List_push(optimal,hitpair);
}
}
@@ -17150,8 +17321,9 @@ pair_up_concordant_aux (bool *abort_pairing_p, int *found_score, int *nconcordan
/*private5p*/false,/*private3p*/false,/*expect_concordant_p*/true)) != NULL) {
debug5(printf("Have new pair with scores %d + %d, compared with new_found_score %d\n",hit5->score,hit3->score,new_found_score));
- if (Stage3pair_total_trim(stage3pair) > 15) {
+ if (Stage3pair_max_trim(stage3pair) > 8) {
/* Don't use terminals to set new_found_score */
+ debug5(printf("Max trim is %d > 8, so treating as terminals\n",Stage3pair_max_trim(stage3pair)));
*terminals = List_push(*terminals,(void *) stage3pair);
} else if (hit5->hittype == GMAP || hit3->hittype == GMAP) {
@@ -17269,8 +17441,9 @@ pair_up_concordant_aux (bool *abort_pairing_p, int *found_score, int *nconcordan
/*private5p*/false,/*private3p*/false,/*expect_concordant_p*/true)) != NULL) {
debug5(printf("Have new pair with scores %d + %d, compared with new_found_score %d\n",hit5->score,hit3->score,new_found_score));
- if (Stage3pair_total_trim(stage3pair) > 15) {
+ if (Stage3pair_max_trim(stage3pair) > 8) {
/* Don't use terminals to set new_found_score */
+ debug5(printf("Max trim is %d > 8, so treating as terminals\n",Stage3pair_max_trim(stage3pair)));
*terminals = List_push(*terminals,(void *) stage3pair);
} else if (hit5->hittype == GMAP || hit3->hittype == GMAP) {
diff --git a/src/stage3hr.h b/src/stage3hr.h
index 16e57f6..5278244 100644
--- a/src/stage3hr.h
+++ b/src/stage3hr.h
@@ -1,4 +1,4 @@
-/* $Id: stage3hr.h 191133 2016-06-03 17:26:49Z twu $ */
+/* $Id: stage3hr.h 193887 2016-07-12 03:23:17Z twu $ */
#ifndef STAGE3HR_INCLUDED
#define STAGE3HR_INCLUDED
@@ -457,6 +457,8 @@ Stage3end_print (Filestring_T fp, T this, int score, Univ_IIT_T chromosome_iit,
extern Pairtype_T
Stage3pair_pairtype (Stage3pair_T this);
+extern Pairtype_T
+Stage3pair_determine_pairtype (Stage3pair_T this);
extern bool
Stage3pair_circularp (Stage3pair_T this);
extern bool
diff --git a/src/substring.c b/src/substring.c
index 45ab519..aaa657b 100644
--- a/src/substring.c
+++ b/src/substring.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: substring.c 184582 2016-02-19 14:40:24Z twu $";
+static char rcsid[] = "$Id: substring.c 193899 2016-07-12 04:41:34Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -1091,10 +1091,10 @@ Substring_free (T *old) {
if (*old) {
debug2(printf("Freeing substring %p\n",*old));
if ((*old)->nambcoords > 0) {
- FREE((*old)->ambcoords);
- FREE((*old)->amb_knowni);
- FREE((*old)->amb_nmismatches);
- FREE((*old)->amb_probs);
+ FREE_OUT((*old)->ambcoords);
+ FREE_OUT((*old)->amb_knowni);
+ FREE_OUT((*old)->amb_nmismatches);
+ FREE_OUT((*old)->amb_probs);
}
if ((*old)->genomic_bothdiff != NULL) {
if ((*old)->genomic_refdiff != (*old)->genomic_bothdiff) {
@@ -2985,7 +2985,7 @@ Substring_new_ambig (int querystart, int queryend, int splice_pos, int queryleng
Intlist_T amb_knowni, Intlist_T amb_nmismatches, Doublelist_T amb_probs,
double amb_common_prob, bool amb_donor_common_p, bool substring1p) {
int ignore;
- T new = (T) MALLOC(sizeof(*new));
+ T new = (T) MALLOC_OUT(sizeof(*new));
debug2(printf("Entered Substring_new_ambig with chrnum %d (chroffset %u, chrhigh %u), %d..%d, querylength %d, plusp %d\n",
chrnum,chroffset,chrhigh,querystart,queryend,querylength,plusp));
@@ -3052,9 +3052,11 @@ Substring_new_ambig (int querystart, int queryend, int splice_pos, int queryleng
new->genomic_bothdiff = (char *) NULL;
new->genomic_refdiff = (char *) NULL;
if (substring1p == true) {
+ debug2(printf("substring1p is true, so setting trims to be %d and %d\n",querystart,0));
new->trim_left = querystart;
new->trim_right = 0;
} else {
+ debug2(printf("substring1p is false, so setting trims to be %d and %d\n",0,querylength - queryend));
new->trim_left = 0;
new->trim_right = querylength - queryend;
}
@@ -3462,6 +3464,11 @@ Substring_nmismatches_refdiff (T this) {
}
int
+Substring_nmismatches_region (T this) {
+ return this->queryend - this->querystart - this->nmatches;
+}
+
+int
Substring_nmatches (T this) {
return this->nmatches;
}
@@ -3645,22 +3652,38 @@ Substring_alignend (T this) {
Chrpos_T
Substring_alignstart_chr (T this) {
- return this->alignstart - this->chroffset;
+ if (this->ambiguous_p == true) {
+ return 0;
+ } else {
+ return this->alignstart - this->chroffset;
+ }
}
Chrpos_T
Substring_alignend_chr (T this) {
- return this->alignend - this->chroffset;
+ if (this->ambiguous_p == true) {
+ return 0;
+ } else {
+ return this->alignend - this->chroffset;
+ }
}
Chrpos_T
Substring_alignstart_trim_chr (T this) {
- return this->alignstart_trim - this->chroffset;
+ if (this->ambiguous_p == true) {
+ return 0;
+ } else {
+ return this->alignstart_trim - this->chroffset;
+ }
}
Chrpos_T
Substring_alignend_trim_chr (T this) {
- return this->alignend_trim - this->chroffset;
+ if (this->ambiguous_p == true) {
+ return 0;
+ } else {
+ return this->alignend_trim - this->chroffset;
+ }
}
Univcoord_T
@@ -3979,10 +4002,10 @@ Substring_copy (T old) {
new->amb_donor_common_p = false;
} else {
new->nambcoords = old->nambcoords;
- new->ambcoords = (Univcoord_T *) MALLOC(old->nambcoords * sizeof(Univcoord_T));
- new->amb_knowni = (int *) MALLOC(old->nambcoords * sizeof(int));
- new->amb_nmismatches = (int *) MALLOC(old->nambcoords * sizeof(int));
- new->amb_probs = (double *) MALLOC(old->nambcoords * sizeof(double));
+ new->ambcoords = (Univcoord_T *) MALLOC_OUT(old->nambcoords * sizeof(Univcoord_T));
+ new->amb_knowni = (int *) MALLOC_OUT(old->nambcoords * sizeof(int));
+ new->amb_nmismatches = (int *) MALLOC_OUT(old->nambcoords * sizeof(int));
+ new->amb_probs = (double *) MALLOC_OUT(old->nambcoords * sizeof(double));
new->amb_common_prob = old->amb_common_prob;
new->amb_donor_common_p = old->amb_donor_common_p;
@@ -5111,9 +5134,7 @@ static void
print_forward (Filestring_T fp, char *string, int n) {
int i;
- for (i = 0; i < n; i++) {
- FPRINTF(fp,"%c",string[i]);
- }
+ FPRINTF(fp,"%.*s",n,string);
return;
}
@@ -5136,9 +5157,7 @@ static void
print_revcomp (Filestring_T fp, char *nt, int len) {
int i;
- for (i = len-1; i >= 0; --i) {
- FPRINTF(fp,"%c",complCode[(int) nt[i]]);
- }
+ FPRINTF(fp,"%.*R",len,nt);
return;
}
@@ -5169,13 +5188,13 @@ print_genomic (Filestring_T fp, T substring, char *deletion, int deletionlength,
if (deletion != NULL) {
print_lc(fp,deletion,deletionlength);
}
- print_forward(fp,&(substring->genomic_refdiff[substring->queryend]),substring->querylength - substring->queryend);
+ print_forward(fp,&(substring->genomic_refdiff[substring->queryend]),substring->querylength - deletionlength - substring->queryend);
} else {
print_forward(fp,substring->genomic_bothdiff,substring->queryend);
if (deletion != NULL) {
print_lc(fp,deletion,deletionlength);
}
- print_forward(fp,&(substring->genomic_bothdiff[substring->queryend]),substring->querylength - substring->queryend);
+ print_forward(fp,&(substring->genomic_bothdiff[substring->queryend]),substring->querylength - deletionlength - substring->queryend);
}
for (i = 0; i < Shortread_choplength(queryseq); i++) {
@@ -5194,14 +5213,14 @@ print_genomic (Filestring_T fp, T substring, char *deletion, int deletionlength,
if (deletion != NULL) {
print_revcomp_lc(fp,deletion,deletionlength);
}
- print_revcomp(fp,substring->genomic_refdiff,substring->querystart);
+ print_revcomp(fp,&(substring->genomic_refdiff[deletionlength]),substring->querystart - deletionlength);
} else {
print_revcomp(fp,&(substring->genomic_bothdiff[substring->querystart]),substring->querylength - substring->querystart);
if (deletion != NULL) {
print_revcomp_lc(fp,deletion,deletionlength);
}
- print_revcomp(fp,substring->genomic_bothdiff,substring->querystart);
+ print_revcomp(fp,&(substring->genomic_bothdiff[deletionlength]),substring->querystart - deletionlength);
}
for (i = 0; i < Shortread_choplength(queryseq); i++) {
FPRINTF(fp,"*");
@@ -6310,8 +6329,8 @@ Substring_convert_to_pairs (List_T pairs, T substring, int querylength, Shortrea
return pairs;
}
- debug6(printf("*** Entered Substring_convert_to_pairs with querylength %d, hardclip_low %d, hardclip_high %d\n",
- querylength,hardclip_low,hardclip_high));
+ debug6(printf("*** Entered Substring_convert_to_pairs with querylength %d, hardclip_low %d, hardclip_high %d, queryseq_offset %d\n",
+ querylength,hardclip_low,hardclip_high,queryseq_offset));
seq1 = Shortread_fullpointer_uc(queryseq);
if (substring->plusp == true) {
diff --git a/src/substring.h b/src/substring.h
index 36810d0..3975d6e 100644
--- a/src/substring.h
+++ b/src/substring.h
@@ -1,4 +1,4 @@
-/* $Id: substring.h 184484 2016-02-18 03:11:53Z twu $ */
+/* $Id: substring.h 193327 2016-07-01 19:24:12Z twu $ */
#ifndef SUBSTRING_INCLUDED
#define SUBSTRING_INCLUDED
@@ -132,6 +132,8 @@ Substring_nmismatches_bothdiff (T this);
extern int
Substring_nmismatches_refdiff (T this);
extern int
+Substring_nmismatches_region (T this);
+extern int
Substring_nmatches (T this);
extern int
Substring_nmatches_posttrim (T this);
diff --git a/src/uintlist.c b/src/uintlist.c
index 4fb0ccc..e35d892 100644
--- a/src/uintlist.c
+++ b/src/uintlist.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: uintlist.c 166641 2015-05-29 21:13:04Z twu $";
+static char rcsid[] = "$Id: uintlist.c 193899 2016-07-12 04:41:34Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -71,6 +71,21 @@ Uintlist_free (T *list) {
}
T
+Uintlist_keep_one (T list, int i) {
+ T head;
+
+ while (--i >= 0) {
+ /* Pop */
+ head = list->rest;
+ FREE(list);
+ list = head;
+ }
+
+ Uintlist_free(&list->rest);
+ return list;
+}
+
+T
Uintlist_reverse (T list) {
T head = NULL, next;
diff --git a/src/uintlist.h b/src/uintlist.h
index 773fd56..83b43e5 100644
--- a/src/uintlist.h
+++ b/src/uintlist.h
@@ -1,4 +1,4 @@
-/* $Id: uintlist.h 166641 2015-05-29 21:13:04Z twu $ */
+/* $Id: uintlist.h 193899 2016-07-12 04:41:34Z twu $ */
#ifndef UINTLIST_INCLUDED
#define UINTLIST_INCLUDED
@@ -21,6 +21,8 @@ extern void
Uintlist_head_set (T list, UINT4 x);
extern void
Uintlist_free (T *list);
+extern T
+Uintlist_keep_one (T list, int i);
extern T
Uintlist_reverse (T list);
extern int
diff --git a/src/uniqscan.c b/src/uniqscan.c
index e65e9e3..cc0fc87 100644
--- a/src/uniqscan.c
+++ b/src/uniqscan.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: uniqscan.c 190433 2016-05-24 21:30:34Z twu $";
+static char rcsid[] = "$Id: uniqscan.c 193899 2016-07-12 04:41:34Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -1329,8 +1329,8 @@ main (int argc, char *argv[]) {
min_intronlength,max_deletionlength,/*min_indel_end_matches*/6,
maxpeelback_distalmedial,nullgap,extramaterial_end,extramaterial_paired,
extraband_single,extraband_end,extraband_paired,
- ngap,maxintronlen,maxintronlen_ends,minendexon,/*output_sam_p*/false,/*homopolymerp*/false,
- /*stage3debug*/NO_STAGE3DEBUG);
+ ngap,maxintronlen,maxintronlen_ends,minendexon,
+ /*homopolymerp*/false,/*stage3debug*/NO_STAGE3DEBUG);
Stage3hr_setup(/*invert_first_p*/false,/*invert_second_p*/false,genome,
chromosome_iit,nchromosomes,circular_typeint,genes_iit,genes_divint_crosstable,
/*tally_iit*/NULL,/*tally_divint_crosstable*/NULL,
diff --git a/src/univdiag.c b/src/univdiag.c
index 869bafd..8d73940 100644
--- a/src/univdiag.c
+++ b/src/univdiag.c
@@ -114,3 +114,25 @@ Univdiag_descending_cmp (const void *a, const void *b) {
}
+
+int
+Univdiag_diagonal_cmp (const void *a, const void *b) {
+ T x = * (T *) a;
+ T y = * (T *) b;
+
+ if (x->univdiagonal < y->univdiagonal) {
+ return -1;
+ } else if (y->univdiagonal < x->univdiagonal) {
+ return +1;
+ } else if (x->querystart < y->querystart) {
+ return -1;
+ } else if (y->querystart < x->querystart) {
+ return +1;
+ } else if (x->queryend < y->queryend) {
+ return -1;
+ } else if (y->queryend < x->queryend) {
+ return +1;
+ } else {
+ return 0;
+ }
+}
diff --git a/src/univdiag.h b/src/univdiag.h
index 20022c5..6fa8ff7 100644
--- a/src/univdiag.h
+++ b/src/univdiag.h
@@ -1,4 +1,4 @@
-/* $Id: univdiag.h 166641 2015-05-29 21:13:04Z twu $ */
+/* $Id: univdiag.h 193897 2016-07-12 04:12:45Z twu $ */
#ifndef UNIVDIAG_INCLUDED
#define UNIVDIAG_INCLUDED
@@ -24,6 +24,8 @@ extern int
Univdiag_ascending_cmp (const void *a, const void *b);
extern int
Univdiag_descending_cmp (const void *a, const void *b);
+extern int
+Univdiag_diagonal_cmp (const void *a, const void *b);
#undef T
#endif
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/gmap.git
More information about the debian-med-commit
mailing list