[med-svn] [gmap] 01/03: New upstream version 2016-09-23
Alex Mestiashvili
malex-guest at moszumanska.debian.org
Sun Sep 25 15:07:28 UTC 2016
This is an automated email from the git hooks/post-receive script.
malex-guest pushed a commit to branch master
in repository gmap.
commit 81426f010010d9b948ee5d99c5cb96eb3a89f8a8
Author: Alexandre Mestiashvili <alex at biotec.tu-dresden.de>
Date: Sun Sep 25 14:09:21 2016 +0200
New upstream version 2016-09-23
---
ChangeLog | 80 +++
VERSION | 2 +-
configure | 24 +-
src/Makefile.am | 2 +-
src/Makefile.in | 106 ++-
src/access.c | 5 +-
src/boyer-moore.c | 3 +-
src/dynprog_genome.c | 794 +++++----------------
src/pair.c | 31 +-
src/samprint.c | 13 +-
src/splice.c | 51 +-
src/splice.h | 6 +-
src/stage1.c | 8 +-
src/stage1hr.c | 87 ++-
src/stage2.c | 134 +++-
src/stage3.c | 1886 +++++++++++++++++++++++++++++++++++++++-----------
src/stage3.h | 4 +-
src/stage3hr.c | 260 +++++--
src/stage3hr.h | 4 +-
src/substring.c | 110 +--
src/substring.h | 6 +-
21 files changed, 2387 insertions(+), 1229 deletions(-)
diff --git a/ChangeLog b/ChangeLog
index 07fca4c..9296781 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,83 @@
+2016-09-24 twu
+
+ * stage3.c: In solving dual introns, handling the case where single_gappairs
+ is NULL. Added code for gmapl.
+
+ * stage1.c: Modified debugging statements
+
+ * pair.c: Added a check for monotonicity of query coordinates to the
+ debugging procedure
+
+ * dynprog_genome.c: If procedure is returning NULL instead of the computed
+ gap pairs, then setting finalscore to be negative, so the result is not
+ used by the calling procedure
+
+ * access.c: If shm_attach fails, and using mmap instead, then not trying to
+ copy a file to a read-only memory segment
+
+ * Makefile.gsnaptoo.am: Added uint8list.c and uint8list.h for gmap
+
+ * stage2.c: Added back find_shifted_canonical procedure as unused code
+
+2016-09-21 twu
+
+ * substring.h: Using sensedir as a field, instead of chimera_sensedir
+
+ * substring.c: Substring_new can use trimmed ends to determine the sensedir.
+ Using sensedir as a field, instead of chimera_sensedir
+
+ * stage3hr.h: Stage3end_new_gmap takes sensedir_knownp as an argument
+
+ * stage3hr.c: Stage3end_new_gmap takes sensedir_knownp as an argument, and
+ can use trimmed ends to determine the sensedir. Stage3end_new_substrings
+ can determine sensedir from its component substrings and junctions. For
+ comparing alignments, using nmatches rather than nmatches_posttrim
+
+ * stage3.h: Changed variable name
+
+ * stage3.c: Removing maxpeelback restriction on peeling back for introns.
+ For microexons, just transferring without checking. In comparing single
+ and dual gaps, not using middle exonprob to evaluate middle exon. In
+ solving dual breaks for microexons, allowing for multiple possible outer
+ splice positions. Changed order of operations to smooth first, then find
+ dual breaks, and then single introns.
+
+ * stage1hr.c: Deciding separately whether to run gmap on 5' and 3' ends,
+ depending on max_matches found on each end
+
+ * pair.c: Putting macro around GSNAP-specific output code for using mate
+ sensedir
+
+ * dynprog_genome.c: Not using probabilities to determine if dinucleotide
+ solution is good
+
+ * boyer-moore.c: Added debugging statement
+
+2016-09-20 twu
+
+ * dynprog_genome.c: Removed backup algorithm for best score above a
+ probability threshold. Instead, using best probability among canonical or
+ semicanonical dinucleotides.
+
+2016-09-16 twu
+
+ * stage3.c: Solving for microexons inside of traverse_dual_break. Solving
+ for dual breaks before solving introns.
+
+ * splice.c, splice.h: Splice_trim_novel_spliceends function now returning
+ new splicedir
+
+ * stage3.c: Added intron-specific functions for peelback, to handle long
+ similarity between exon ends and intron segments on the other end.
+ Function for finding novel spliceends now returns new splicedir, although
+ currently not used.
+
+2016-09-15 twu
+
+ * samprint.c: Using new interface for Substring_sensedir
+
+ * pair.c: Printing mate sensedir, to be consistent with samprint code
+
2016-09-14 twu
* stage3hr.h: Removed obsolete functions
diff --git a/VERSION b/VERSION
index f7edb0e..6daa5bd 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-2016-09-14
\ No newline at end of file
+2016-09-23
\ No newline at end of file
diff --git a/configure b/configure
index d08e66d..ed5376d 100755
--- a/configure
+++ b/configure
@@ -1,6 +1,6 @@
#! /bin/sh
# Guess values for system-dependent variables and create Makefiles.
-# Generated by GNU Autoconf 2.69 for gmap 2016-09-14.
+# Generated by GNU Autoconf 2.69 for gmap 2016-09-23.
#
# Report bugs to <Thomas Wu <twu at gene.com>>.
#
@@ -590,8 +590,8 @@ MAKEFLAGS=
# Identity of this package.
PACKAGE_NAME='gmap'
PACKAGE_TARNAME='gmap'
-PACKAGE_VERSION='2016-09-14'
-PACKAGE_STRING='gmap 2016-09-14'
+PACKAGE_VERSION='2016-09-23'
+PACKAGE_STRING='gmap 2016-09-23'
PACKAGE_BUGREPORT='Thomas Wu <twu at gene.com>'
PACKAGE_URL=''
@@ -1372,7 +1372,7 @@ if test "$ac_init_help" = "long"; then
# Omit some internal or obsolete options to make the list less imposing.
# This message is too long to be a string in the A/UX 3.1 sh.
cat <<_ACEOF
-\`configure' configures gmap 2016-09-14 to adapt to many kinds of systems.
+\`configure' configures gmap 2016-09-23 to adapt to many kinds of systems.
Usage: $0 [OPTION]... [VAR=VALUE]...
@@ -1443,7 +1443,7 @@ fi
if test -n "$ac_init_help"; then
case $ac_init_help in
- short | recursive ) echo "Configuration of gmap 2016-09-14:";;
+ short | recursive ) echo "Configuration of gmap 2016-09-23:";;
esac
cat <<\_ACEOF
@@ -1582,7 +1582,7 @@ fi
test -n "$ac_init_help" && exit $ac_status
if $ac_init_version; then
cat <<\_ACEOF
-gmap configure 2016-09-14
+gmap configure 2016-09-23
generated by GNU Autoconf 2.69
Copyright (C) 2012 Free Software Foundation, Inc.
@@ -2188,7 +2188,7 @@ cat >config.log <<_ACEOF
This file contains any messages produced by compilers while
running configure, to aid debugging if configure makes a mistake.
-It was created by gmap $as_me 2016-09-14, which was
+It was created by gmap $as_me 2016-09-23, which was
generated by GNU Autoconf 2.69. Invocation command line was
$ $0 $@
@@ -2538,8 +2538,8 @@ ac_compiler_gnu=$ac_cv_c_compiler_gnu
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking package version" >&5
$as_echo_n "checking package version... " >&6; }
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: 2016-09-14" >&5
-$as_echo "2016-09-14" >&6; }
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: 2016-09-23" >&5
+$as_echo "2016-09-23" >&6; }
### Read defaults
@@ -4404,7 +4404,7 @@ fi
# Define the identity of the package.
PACKAGE='gmap'
- VERSION='2016-09-14'
+ VERSION='2016-09-23'
cat >>confdefs.h <<_ACEOF
@@ -20109,7 +20109,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
# report actual input values of CONFIG_FILES etc. instead of their
# values after options handling.
ac_log="
-This file was extended by gmap $as_me 2016-09-14, which was
+This file was extended by gmap $as_me 2016-09-23, which was
generated by GNU Autoconf 2.69. Invocation command line was
CONFIG_FILES = $CONFIG_FILES
@@ -20175,7 +20175,7 @@ _ACEOF
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
ac_cs_version="\\
-gmap config.status 2016-09-14
+gmap config.status 2016-09-23
configured by $0, generated by GNU Autoconf 2.69,
with options \\"\$ac_cs_config\\"
diff --git a/src/Makefile.am b/src/Makefile.am
index e0630f6..c567e17 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -166,7 +166,7 @@ GMAPL_FILES = fopen.h bool.h types.h separator.h comp.h \
except.c except.h assert.c assert.h mem.c mem.h \
intlistdef.h intlist.c intlist.h listdef.h list.c list.h \
littleendian.c littleendian.h bigendian.c bigendian.h \
- univinterval.c univinterval.h interval.c interval.h uintlist.c uintlist.h \
+ univinterval.c univinterval.h interval.c interval.h uintlist.c uintlist.h uint8list.c uint8list.h \
stopwatch.c stopwatch.h semaphore.c semaphore.h access.c access.h \
filestring.c filestring.h \
iit-read-univ.c iit-read-univ.h iitdef.h iit-read.c iit-read.h \
diff --git a/src/Makefile.in b/src/Makefile.in
index e3e7262..a2e957d 100644
--- a/src/Makefile.in
+++ b/src/Makefile.in
@@ -666,8 +666,9 @@ am__objects_14 = gmapl_avx2-except.$(OBJEXT) \
gmapl_avx2-bigendian.$(OBJEXT) \
gmapl_avx2-univinterval.$(OBJEXT) \
gmapl_avx2-interval.$(OBJEXT) gmapl_avx2-uintlist.$(OBJEXT) \
- gmapl_avx2-stopwatch.$(OBJEXT) gmapl_avx2-semaphore.$(OBJEXT) \
- gmapl_avx2-access.$(OBJEXT) gmapl_avx2-filestring.$(OBJEXT) \
+ gmapl_avx2-uint8list.$(OBJEXT) gmapl_avx2-stopwatch.$(OBJEXT) \
+ gmapl_avx2-semaphore.$(OBJEXT) gmapl_avx2-access.$(OBJEXT) \
+ gmapl_avx2-filestring.$(OBJEXT) \
gmapl_avx2-iit-read-univ.$(OBJEXT) \
gmapl_avx2-iit-read.$(OBJEXT) gmapl_avx2-md5.$(OBJEXT) \
gmapl_avx2-bzip2.$(OBJEXT) gmapl_avx2-sequence.$(OBJEXT) \
@@ -727,6 +728,7 @@ am__objects_15 = gmapl_nosimd-except.$(OBJEXT) \
gmapl_nosimd-univinterval.$(OBJEXT) \
gmapl_nosimd-interval.$(OBJEXT) \
gmapl_nosimd-uintlist.$(OBJEXT) \
+ gmapl_nosimd-uint8list.$(OBJEXT) \
gmapl_nosimd-stopwatch.$(OBJEXT) \
gmapl_nosimd-semaphore.$(OBJEXT) gmapl_nosimd-access.$(OBJEXT) \
gmapl_nosimd-filestring.$(OBJEXT) \
@@ -798,8 +800,9 @@ am__objects_16 = gmapl_sse2-except.$(OBJEXT) \
gmapl_sse2-bigendian.$(OBJEXT) \
gmapl_sse2-univinterval.$(OBJEXT) \
gmapl_sse2-interval.$(OBJEXT) gmapl_sse2-uintlist.$(OBJEXT) \
- gmapl_sse2-stopwatch.$(OBJEXT) gmapl_sse2-semaphore.$(OBJEXT) \
- gmapl_sse2-access.$(OBJEXT) gmapl_sse2-filestring.$(OBJEXT) \
+ gmapl_sse2-uint8list.$(OBJEXT) gmapl_sse2-stopwatch.$(OBJEXT) \
+ gmapl_sse2-semaphore.$(OBJEXT) gmapl_sse2-access.$(OBJEXT) \
+ gmapl_sse2-filestring.$(OBJEXT) \
gmapl_sse2-iit-read-univ.$(OBJEXT) \
gmapl_sse2-iit-read.$(OBJEXT) gmapl_sse2-md5.$(OBJEXT) \
gmapl_sse2-bzip2.$(OBJEXT) gmapl_sse2-sequence.$(OBJEXT) \
@@ -858,6 +861,7 @@ am__objects_17 = gmapl_sse41-except.$(OBJEXT) \
gmapl_sse41-bigendian.$(OBJEXT) \
gmapl_sse41-univinterval.$(OBJEXT) \
gmapl_sse41-interval.$(OBJEXT) gmapl_sse41-uintlist.$(OBJEXT) \
+ gmapl_sse41-uint8list.$(OBJEXT) \
gmapl_sse41-stopwatch.$(OBJEXT) \
gmapl_sse41-semaphore.$(OBJEXT) gmapl_sse41-access.$(OBJEXT) \
gmapl_sse41-filestring.$(OBJEXT) \
@@ -923,6 +927,7 @@ am__objects_18 = gmapl_sse42-except.$(OBJEXT) \
gmapl_sse42-bigendian.$(OBJEXT) \
gmapl_sse42-univinterval.$(OBJEXT) \
gmapl_sse42-interval.$(OBJEXT) gmapl_sse42-uintlist.$(OBJEXT) \
+ gmapl_sse42-uint8list.$(OBJEXT) \
gmapl_sse42-stopwatch.$(OBJEXT) \
gmapl_sse42-semaphore.$(OBJEXT) gmapl_sse42-access.$(OBJEXT) \
gmapl_sse42-filestring.$(OBJEXT) \
@@ -988,6 +993,7 @@ am__objects_19 = gmapl_ssse3-except.$(OBJEXT) \
gmapl_ssse3-bigendian.$(OBJEXT) \
gmapl_ssse3-univinterval.$(OBJEXT) \
gmapl_ssse3-interval.$(OBJEXT) gmapl_ssse3-uintlist.$(OBJEXT) \
+ gmapl_ssse3-uint8list.$(OBJEXT) \
gmapl_ssse3-stopwatch.$(OBJEXT) \
gmapl_ssse3-semaphore.$(OBJEXT) gmapl_ssse3-access.$(OBJEXT) \
gmapl_ssse3-filestring.$(OBJEXT) \
@@ -2441,7 +2447,7 @@ GMAPL_FILES = fopen.h bool.h types.h separator.h comp.h \
except.c except.h assert.c assert.h mem.c mem.h \
intlistdef.h intlist.c intlist.h listdef.h list.c list.h \
littleendian.c littleendian.h bigendian.c bigendian.h \
- univinterval.c univinterval.h interval.c interval.h uintlist.c uintlist.h \
+ univinterval.c univinterval.h interval.c interval.h uintlist.c uintlist.h uint8list.c uint8list.h \
stopwatch.c stopwatch.h semaphore.c semaphore.h access.c access.h \
filestring.c filestring.h \
iit-read-univ.c iit-read-univ.h iitdef.h iit-read.c iit-read.h \
@@ -3944,6 +3950,7 @@ distclean-compile:
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_avx2-stage3.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_avx2-stopwatch.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_avx2-translation.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_avx2-uint8list.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_avx2-uintlist.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_avx2-uinttable.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_avx2-univinterval.Po at am__quote@
@@ -4029,6 +4036,7 @@ distclean-compile:
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_nosimd-stage3.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_nosimd-stopwatch.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_nosimd-translation.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_nosimd-uint8list.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_nosimd-uintlist.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_nosimd-uinttable.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_nosimd-univinterval.Po at am__quote@
@@ -4114,6 +4122,7 @@ distclean-compile:
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_sse2-stage3.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_sse2-stopwatch.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_sse2-translation.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_sse2-uint8list.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_sse2-uintlist.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_sse2-uinttable.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_sse2-univinterval.Po at am__quote@
@@ -4199,6 +4208,7 @@ distclean-compile:
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_sse41-stage3.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_sse41-stopwatch.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_sse41-translation.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_sse41-uint8list.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_sse41-uintlist.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_sse41-uinttable.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_sse41-univinterval.Po at am__quote@
@@ -4284,6 +4294,7 @@ distclean-compile:
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_sse42-stage3.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_sse42-stopwatch.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_sse42-translation.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_sse42-uint8list.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_sse42-uintlist.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_sse42-uinttable.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_sse42-univinterval.Po at am__quote@
@@ -4369,6 +4380,7 @@ distclean-compile:
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_ssse3-stage3.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_ssse3-stopwatch.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_ssse3-translation.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_ssse3-uint8list.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_ssse3-uintlist.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_ssse3-uinttable.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_ssse3-univinterval.Po at am__quote@
@@ -15310,6 +15322,20 @@ gmapl_avx2-uintlist.obj: uintlist.c
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx2_CFLAGS) $(CFLAGS) -c -o gmapl_avx2-uintlist.obj `if test -f 'uintlist.c'; then $(CYGPATH_W) 'uintlist.c'; else $(CYGPATH_W) '$(srcdir)/uintlist.c'; fi`
+gmapl_avx2-uint8list.o: uint8list.c
+ at am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx2_CFLAGS) $(CFLAGS) -MT gmapl_avx2-uint8list.o -MD -MP -MF $(DEPDIR)/gmapl_avx2-uint8list.Tpo -c -o gmapl_avx2-uint8list.o `test -f 'uint8list.c' || echo '$(srcdir)/'`uint8list.c
+ at am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx2-uint8list.Tpo $(DEPDIR)/gmapl_avx2-uint8list.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='uint8list.c' object='gmapl_avx2-uint8list.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx2_CFLAGS) $(CFLAGS) -c -o gmapl_avx2-uint8list.o `test -f 'uint8list.c' || echo '$(srcdir)/'`uint8list.c
+
+gmapl_avx2-uint8list.obj: uint8list.c
+ at am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx2_CFLAGS) $(CFLAGS) -MT gmapl_avx2-uint8list.obj -MD -MP -MF $(DEPDIR)/gmapl_avx2-uint8list.Tpo -c -o gmapl_avx2-uint8list.obj `if test -f 'uint8list.c'; then $(CYGPATH_W) 'uint8list.c'; else $(CYGPATH_W) '$(srcdir)/uint8list.c'; fi`
+ at am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx2-uint8list.Tpo $(DEPDIR)/gmapl_avx2-uint8list.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='uint8list.c' object='gmapl_avx2-uint8list.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx2_CFLAGS) $(CFLAGS) -c -o gmapl_avx2-uint8list.obj `if test -f 'uint8list.c'; then $(CYGPATH_W) 'uint8list.c'; else $(CYGPATH_W) '$(srcdir)/uint8list.c'; fi`
+
gmapl_avx2-stopwatch.o: stopwatch.c
@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx2_CFLAGS) $(CFLAGS) -MT gmapl_avx2-stopwatch.o -MD -MP -MF $(DEPDIR)/gmapl_avx2-stopwatch.Tpo -c -o gmapl_avx2-stopwatch.o `test -f 'stopwatch.c' || echo '$(srcdir)/'`stopwatch.c
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx2-stopwatch.Tpo $(DEPDIR)/gmapl_avx2-stopwatch.Po
@@ -16500,6 +16526,20 @@ gmapl_nosimd-uintlist.obj: uintlist.c
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_nosimd_CFLAGS) $(CFLAGS) -c -o gmapl_nosimd-uintlist.obj `if test -f 'uintlist.c'; then $(CYGPATH_W) 'uintlist.c'; else $(CYGPATH_W) '$(srcdir)/uintlist.c'; fi`
+gmapl_nosimd-uint8list.o: uint8list.c
+ at am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_nosimd_CFLAGS) $(CFLAGS) -MT gmapl_nosimd-uint8list.o -MD -MP -MF $(DEPDIR)/gmapl_nosimd-uint8list.Tpo -c -o gmapl_nosimd-uint8list.o `test -f 'uint8list.c' || echo '$(srcdir)/'`uint8list.c
+ at am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_nosimd-uint8list.Tpo $(DEPDIR)/gmapl_nosimd-uint8list.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='uint8list.c' object='gmapl_nosimd-uint8list.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_nosimd_CFLAGS) $(CFLAGS) -c -o gmapl_nosimd-uint8list.o `test -f 'uint8list.c' || echo '$(srcdir)/'`uint8list.c
+
+gmapl_nosimd-uint8list.obj: uint8list.c
+ at am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_nosimd_CFLAGS) $(CFLAGS) -MT gmapl_nosimd-uint8list.obj -MD -MP -MF $(DEPDIR)/gmapl_nosimd-uint8list.Tpo -c -o gmapl_nosimd-uint8list.obj `if test -f 'uint8list.c'; then $(CYGPATH_W) 'uint8list.c'; else $(CYGPATH_W) '$(srcdir)/uint8list.c'; fi`
+ at am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_nosimd-uint8list.Tpo $(DEPDIR)/gmapl_nosimd-uint8list.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='uint8list.c' object='gmapl_nosimd-uint8list.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_nosimd_CFLAGS) $(CFLAGS) -c -o gmapl_nosimd-uint8list.obj `if test -f 'uint8list.c'; then $(CYGPATH_W) 'uint8list.c'; else $(CYGPATH_W) '$(srcdir)/uint8list.c'; fi`
+
gmapl_nosimd-stopwatch.o: stopwatch.c
@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_nosimd_CFLAGS) $(CFLAGS) -MT gmapl_nosimd-stopwatch.o -MD -MP -MF $(DEPDIR)/gmapl_nosimd-stopwatch.Tpo -c -o gmapl_nosimd-stopwatch.o `test -f 'stopwatch.c' || echo '$(srcdir)/'`stopwatch.c
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_nosimd-stopwatch.Tpo $(DEPDIR)/gmapl_nosimd-stopwatch.Po
@@ -17690,6 +17730,20 @@ gmapl_sse2-uintlist.obj: uintlist.c
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_sse2_CFLAGS) $(CFLAGS) -c -o gmapl_sse2-uintlist.obj `if test -f 'uintlist.c'; then $(CYGPATH_W) 'uintlist.c'; else $(CYGPATH_W) '$(srcdir)/uintlist.c'; fi`
+gmapl_sse2-uint8list.o: uint8list.c
+ at am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_sse2_CFLAGS) $(CFLAGS) -MT gmapl_sse2-uint8list.o -MD -MP -MF $(DEPDIR)/gmapl_sse2-uint8list.Tpo -c -o gmapl_sse2-uint8list.o `test -f 'uint8list.c' || echo '$(srcdir)/'`uint8list.c
+ at am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_sse2-uint8list.Tpo $(DEPDIR)/gmapl_sse2-uint8list.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='uint8list.c' object='gmapl_sse2-uint8list.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_sse2_CFLAGS) $(CFLAGS) -c -o gmapl_sse2-uint8list.o `test -f 'uint8list.c' || echo '$(srcdir)/'`uint8list.c
+
+gmapl_sse2-uint8list.obj: uint8list.c
+ at am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_sse2_CFLAGS) $(CFLAGS) -MT gmapl_sse2-uint8list.obj -MD -MP -MF $(DEPDIR)/gmapl_sse2-uint8list.Tpo -c -o gmapl_sse2-uint8list.obj `if test -f 'uint8list.c'; then $(CYGPATH_W) 'uint8list.c'; else $(CYGPATH_W) '$(srcdir)/uint8list.c'; fi`
+ at am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_sse2-uint8list.Tpo $(DEPDIR)/gmapl_sse2-uint8list.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='uint8list.c' object='gmapl_sse2-uint8list.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_sse2_CFLAGS) $(CFLAGS) -c -o gmapl_sse2-uint8list.obj `if test -f 'uint8list.c'; then $(CYGPATH_W) 'uint8list.c'; else $(CYGPATH_W) '$(srcdir)/uint8list.c'; fi`
+
gmapl_sse2-stopwatch.o: stopwatch.c
@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_sse2_CFLAGS) $(CFLAGS) -MT gmapl_sse2-stopwatch.o -MD -MP -MF $(DEPDIR)/gmapl_sse2-stopwatch.Tpo -c -o gmapl_sse2-stopwatch.o `test -f 'stopwatch.c' || echo '$(srcdir)/'`stopwatch.c
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_sse2-stopwatch.Tpo $(DEPDIR)/gmapl_sse2-stopwatch.Po
@@ -18880,6 +18934,20 @@ gmapl_sse41-uintlist.obj: uintlist.c
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_sse41_CFLAGS) $(CFLAGS) -c -o gmapl_sse41-uintlist.obj `if test -f 'uintlist.c'; then $(CYGPATH_W) 'uintlist.c'; else $(CYGPATH_W) '$(srcdir)/uintlist.c'; fi`
+gmapl_sse41-uint8list.o: uint8list.c
+ at am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_sse41_CFLAGS) $(CFLAGS) -MT gmapl_sse41-uint8list.o -MD -MP -MF $(DEPDIR)/gmapl_sse41-uint8list.Tpo -c -o gmapl_sse41-uint8list.o `test -f 'uint8list.c' || echo '$(srcdir)/'`uint8list.c
+ at am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_sse41-uint8list.Tpo $(DEPDIR)/gmapl_sse41-uint8list.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='uint8list.c' object='gmapl_sse41-uint8list.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_sse41_CFLAGS) $(CFLAGS) -c -o gmapl_sse41-uint8list.o `test -f 'uint8list.c' || echo '$(srcdir)/'`uint8list.c
+
+gmapl_sse41-uint8list.obj: uint8list.c
+ at am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_sse41_CFLAGS) $(CFLAGS) -MT gmapl_sse41-uint8list.obj -MD -MP -MF $(DEPDIR)/gmapl_sse41-uint8list.Tpo -c -o gmapl_sse41-uint8list.obj `if test -f 'uint8list.c'; then $(CYGPATH_W) 'uint8list.c'; else $(CYGPATH_W) '$(srcdir)/uint8list.c'; fi`
+ at am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_sse41-uint8list.Tpo $(DEPDIR)/gmapl_sse41-uint8list.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='uint8list.c' object='gmapl_sse41-uint8list.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_sse41_CFLAGS) $(CFLAGS) -c -o gmapl_sse41-uint8list.obj `if test -f 'uint8list.c'; then $(CYGPATH_W) 'uint8list.c'; else $(CYGPATH_W) '$(srcdir)/uint8list.c'; fi`
+
gmapl_sse41-stopwatch.o: stopwatch.c
@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_sse41_CFLAGS) $(CFLAGS) -MT gmapl_sse41-stopwatch.o -MD -MP -MF $(DEPDIR)/gmapl_sse41-stopwatch.Tpo -c -o gmapl_sse41-stopwatch.o `test -f 'stopwatch.c' || echo '$(srcdir)/'`stopwatch.c
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_sse41-stopwatch.Tpo $(DEPDIR)/gmapl_sse41-stopwatch.Po
@@ -20070,6 +20138,20 @@ gmapl_sse42-uintlist.obj: uintlist.c
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_sse42_CFLAGS) $(CFLAGS) -c -o gmapl_sse42-uintlist.obj `if test -f 'uintlist.c'; then $(CYGPATH_W) 'uintlist.c'; else $(CYGPATH_W) '$(srcdir)/uintlist.c'; fi`
+gmapl_sse42-uint8list.o: uint8list.c
+ at am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_sse42_CFLAGS) $(CFLAGS) -MT gmapl_sse42-uint8list.o -MD -MP -MF $(DEPDIR)/gmapl_sse42-uint8list.Tpo -c -o gmapl_sse42-uint8list.o `test -f 'uint8list.c' || echo '$(srcdir)/'`uint8list.c
+ at am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_sse42-uint8list.Tpo $(DEPDIR)/gmapl_sse42-uint8list.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='uint8list.c' object='gmapl_sse42-uint8list.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_sse42_CFLAGS) $(CFLAGS) -c -o gmapl_sse42-uint8list.o `test -f 'uint8list.c' || echo '$(srcdir)/'`uint8list.c
+
+gmapl_sse42-uint8list.obj: uint8list.c
+ at am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_sse42_CFLAGS) $(CFLAGS) -MT gmapl_sse42-uint8list.obj -MD -MP -MF $(DEPDIR)/gmapl_sse42-uint8list.Tpo -c -o gmapl_sse42-uint8list.obj `if test -f 'uint8list.c'; then $(CYGPATH_W) 'uint8list.c'; else $(CYGPATH_W) '$(srcdir)/uint8list.c'; fi`
+ at am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_sse42-uint8list.Tpo $(DEPDIR)/gmapl_sse42-uint8list.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='uint8list.c' object='gmapl_sse42-uint8list.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_sse42_CFLAGS) $(CFLAGS) -c -o gmapl_sse42-uint8list.obj `if test -f 'uint8list.c'; then $(CYGPATH_W) 'uint8list.c'; else $(CYGPATH_W) '$(srcdir)/uint8list.c'; fi`
+
gmapl_sse42-stopwatch.o: stopwatch.c
@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_sse42_CFLAGS) $(CFLAGS) -MT gmapl_sse42-stopwatch.o -MD -MP -MF $(DEPDIR)/gmapl_sse42-stopwatch.Tpo -c -o gmapl_sse42-stopwatch.o `test -f 'stopwatch.c' || echo '$(srcdir)/'`stopwatch.c
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_sse42-stopwatch.Tpo $(DEPDIR)/gmapl_sse42-stopwatch.Po
@@ -21260,6 +21342,20 @@ gmapl_ssse3-uintlist.obj: uintlist.c
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_ssse3_CFLAGS) $(CFLAGS) -c -o gmapl_ssse3-uintlist.obj `if test -f 'uintlist.c'; then $(CYGPATH_W) 'uintlist.c'; else $(CYGPATH_W) '$(srcdir)/uintlist.c'; fi`
+gmapl_ssse3-uint8list.o: uint8list.c
+ at am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_ssse3_CFLAGS) $(CFLAGS) -MT gmapl_ssse3-uint8list.o -MD -MP -MF $(DEPDIR)/gmapl_ssse3-uint8list.Tpo -c -o gmapl_ssse3-uint8list.o `test -f 'uint8list.c' || echo '$(srcdir)/'`uint8list.c
+ at am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_ssse3-uint8list.Tpo $(DEPDIR)/gmapl_ssse3-uint8list.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='uint8list.c' object='gmapl_ssse3-uint8list.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_ssse3_CFLAGS) $(CFLAGS) -c -o gmapl_ssse3-uint8list.o `test -f 'uint8list.c' || echo '$(srcdir)/'`uint8list.c
+
+gmapl_ssse3-uint8list.obj: uint8list.c
+ at am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_ssse3_CFLAGS) $(CFLAGS) -MT gmapl_ssse3-uint8list.obj -MD -MP -MF $(DEPDIR)/gmapl_ssse3-uint8list.Tpo -c -o gmapl_ssse3-uint8list.obj `if test -f 'uint8list.c'; then $(CYGPATH_W) 'uint8list.c'; else $(CYGPATH_W) '$(srcdir)/uint8list.c'; fi`
+ at am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_ssse3-uint8list.Tpo $(DEPDIR)/gmapl_ssse3-uint8list.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='uint8list.c' object='gmapl_ssse3-uint8list.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_ssse3_CFLAGS) $(CFLAGS) -c -o gmapl_ssse3-uint8list.obj `if test -f 'uint8list.c'; then $(CYGPATH_W) 'uint8list.c'; else $(CYGPATH_W) '$(srcdir)/uint8list.c'; fi`
+
gmapl_ssse3-stopwatch.o: stopwatch.c
@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_ssse3_CFLAGS) $(CFLAGS) -MT gmapl_ssse3-stopwatch.o -MD -MP -MF $(DEPDIR)/gmapl_ssse3-stopwatch.Tpo -c -o gmapl_ssse3-stopwatch.o `test -f 'stopwatch.c' || echo '$(srcdir)/'`stopwatch.c
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_ssse3-stopwatch.Tpo $(DEPDIR)/gmapl_ssse3-stopwatch.Po
diff --git a/src/access.c b/src/access.c
index 2d379e6..7ed4c87 100644
--- a/src/access.c
+++ b/src/access.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: access.c 184162 2016-02-12 18:54:56Z twu $";
+static char rcsid[] = "$Id: access.c 198277 2016-09-24 00:46:18Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -802,7 +802,10 @@ Access_allocate_shared (Access_T *access, int *shmid, key_t *key, int *fd, size_
filename,*len);
*shmid = 0;
memory = Access_mmap(&(*fd),&(*len),filename,/*randomp*/true);
+#if 0
+ /* Crashes because memory is read-only */
copy_memory_from_file(memory,filename,/*filesize*/*len,eltsize);
+#endif
*access = MMAPPED;
}
#else
diff --git a/src/boyer-moore.c b/src/boyer-moore.c
index 6e752fa..fc44770 100644
--- a/src/boyer-moore.c
+++ b/src/boyer-moore.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: boyer-moore.c 145990 2014-08-25 21:47:32Z twu $";
+static char rcsid[] = "$Id: boyer-moore.c 198071 2016-09-21 00:19:07Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -396,6 +396,7 @@ BoyerMoore_nt (char *query, int querylen, int textoffset, int textlen,
int bad_char_shift[ASIZE];
char *text, *text_alt;
+ debug(printf("Entered BoyerMoore_nt\n"));
if (query_okay(query,querylen)) {
good_suffix_shift = (int *) MALLOCA(querylen * sizeof(int));
text = (char *) MALLOC((textlen+querylen+1) * sizeof(char)); /* alloca could cause stack overflow */
diff --git a/src/dynprog_genome.c b/src/dynprog_genome.c
index b385c33..8ce3097 100644
--- a/src/dynprog_genome.c
+++ b/src/dynprog_genome.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: dynprog_genome.c 197738 2016-09-13 17:53:52Z twu $";
+static char rcsid[] = "$Id: dynprog_genome.c 198278 2016-09-24 00:47:16Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -80,8 +80,8 @@ static char rcsid[] = "$Id: dynprog_genome.c 197738 2016-09-13 17:53:52Z twu $";
#define USE_WEAK_SCOREI 1
#define PROB_CEILING 0.85
-#define PROB_FLOOR 0.75
-#define PROB_BAD 0.50
+#define PROB_FLOOR 0.50
+/* #define PROB_BAD 0.50 */
/* Prefer alternate intron to other non-canonicals, but don't
introduce mismatches or gaps to identify */
@@ -811,17 +811,17 @@ bridge_intron_gap_8_site_level (int *bestrL, int *bestrR, int *bestcL, int *best
Univcoord_T chroffset, Univcoord_T chrhigh,
bool halfp, bool finalp) {
int rL, rR, cL, cR;
- int bestrL_with_prob, bestrR_with_prob, bestcL_with_prob, bestcR_with_prob;
+ int bestrL_with_dinucl, bestrR_with_dinucl, bestcL_with_dinucl, bestcR_with_dinucl;
int cloL, chighL;
int cloR, chighR;
int introntype;
int bestscore = NEG_INFINITY_8, score, scoreL, scoreR, scoreI;
- int bestscore_with_suffprob = NEG_INFINITY_8;
- double *left_probabilities, *right_probabilities, probL, probR, probL_trunc, probR_trunc, bestprob_with_score, bestprob_trunc;
+ int bestscore_with_dinucl = NEG_INFINITY_8;
+ double *left_probabilities, *right_probabilities, probL, probR, bestprob_with_score, bestprob_with_dinucl;
Univcoord_T splicesitepos;
char left1, left2, right2, right1, left1_alt, left2_alt, right2_alt, right1_alt;
int *leftdi, *rightdi;
- bool use_prob_p;
+ bool use_dinucl_p;
/* Read dinucleotides */
@@ -972,7 +972,7 @@ bridge_intron_gap_8_site_level (int *bestrL, int *bestrR, int *bestcL, int *best
/* Search using probs and without simultaneously */
bestscore = NEG_INFINITY_8;
- bestprob_with_score = bestprob_trunc = 0.0;
+ bestprob_with_score = bestprob_with_dinucl = 0.0;
for (rL = 1, rR = rlength-1; rL < rlength; rL++, rR--) {
debug3(printf("\nAt row %d on left and %d on right\n",rL,rR));
if ((cloL = rL - lbandL) < 1) {
@@ -992,34 +992,19 @@ bridge_intron_gap_8_site_level (int *bestrL, int *bestrR, int *bestcL, int *best
debug3(printf("A. Test no indels\n"));
cL = rL;
probL = left_probabilities[cL];
- if (probL > PROB_CEILING) {
- probL_trunc = PROB_CEILING;
- } else if (probL < PROB_FLOOR) {
- probL_trunc = PROB_FLOOR;
- } else {
- probL_trunc = probL;
- }
scoreL = (int) matrixL_upper[cL][rL];
cR = rR;
probR = right_probabilities[cR];
- if (probR > PROB_CEILING) {
- probR_trunc = PROB_CEILING;
- } else if (probR < PROB_FLOOR) {
- probR_trunc = PROB_FLOOR;
- } else {
- probR_trunc = probR;
- }
scoreR = (int) matrixR_upper[cR][rR];
-
#ifdef USE_SCOREI
scoreI = intron_score(&introntype,leftdi[cL],rightdi[cR],cdna_direction,canonical_reward,finalp);
#else
scoreI = 0;
#endif
- if ((score = scoreL + scoreI + scoreR) > bestscore) {
+ if ((score = scoreL + scoreR) > bestscore) {
debug3(printf("Best score: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
debug3(printf("probL %f, probR %f\n",left_probabilities[cL],right_probabilities[cR]));
@@ -1043,36 +1028,18 @@ bridge_intron_gap_8_site_level (int *bestrL, int *bestrR, int *bestcL, int *best
cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
}
- if (probL_trunc + probR_trunc < bestprob_trunc) {
- debug3a(printf("At %d left to %d right, prob is %f + %f = %f\n",
- cL,cR,probL_trunc,probR_trunc,probL_trunc+probR_trunc));
-
- } else if (probL_trunc + probR_trunc == bestprob_trunc) {
- debug3(printf("At %d left to %d right, prob is %f + %f = %f\n",
- cL,cR,probL_trunc,probR_trunc,probL_trunc+probR_trunc));
-
- if (scoreL + scoreI + scoreR > bestscore_with_suffprob) {
- debug3(printf(" (bestscore_with_sufficient_prob %d)\n",scoreL+scoreR));
- bestprob_trunc = probL_trunc + probR_trunc;
- bestcL_with_prob = cL;
- bestcR_with_prob = cR;
- bestrL_with_prob = rL;
- bestrR_with_prob = rR;
- bestscore_with_suffprob = scoreL + scoreI + scoreR;
+ /* Perform only without indels */
+ if (scoreI > 0) {
+ debug3a(printf("At %d left to %d right, scoreI is %d and prob is %f + %f = %f\n",
+ cL,cR,scoreI,probL,probR,probL+probR));
+ if (probL + probR > bestprob_with_dinucl) {
+ bestscore_with_dinucl = scoreL + scoreR;
+ bestcL_with_dinucl = cL;
+ bestcR_with_dinucl = cR;
+ bestrL_with_dinucl = rL;
+ bestrR_with_dinucl = rR;
+ bestprob_with_dinucl = probL + probR;
}
-
- } else {
- /* probL_trunc + probR_trunc > bestprob_trunc */
- debug3(printf("At %d left to %d right, prob is %f + %f = %f\n",
- cL,cR,probL_trunc,probR_trunc,probL_trunc+probR_trunc));
-
- debug3(printf(" (bestscore_with_sufficient_prob %d)\n",scoreL+scoreR));
- bestprob_trunc = probL_trunc + probR_trunc;
- bestcL_with_prob = cL;
- bestcR_with_prob = cR;
- bestrL_with_prob = rL;
- bestrR_with_prob = rR;
- bestscore_with_suffprob = scoreL + scoreI + scoreR;
}
@@ -1080,13 +1047,6 @@ bridge_intron_gap_8_site_level (int *bestrL, int *bestrR, int *bestcL, int *best
/* Test indel on right */
cL = rL;
probL = left_probabilities[cL];
- if (probL > PROB_CEILING) {
- probL_trunc = PROB_CEILING;
- } else if (probL < PROB_FLOOR) {
- probL_trunc = PROB_FLOOR;
- } else {
- probL_trunc = probL;
- }
scoreL = (int) matrixL_upper[cL][rL];
if (directionsL_upper_nogap[cL][rL] != DIAG) {
/* Favor gaps away from intron if possible */
@@ -1096,13 +1056,6 @@ bridge_intron_gap_8_site_level (int *bestrL, int *bestrR, int *bestcL, int *best
/* Disallow leftoffset + cL >= rightoffset - cR, or cR >= rightoffset - leftoffset - cL */
for (cR = cloR; cR < /*to main diagonal*/rR && cR < rightoffset-leftoffset-cL; cR++) {
probR = right_probabilities[cR];
- if (probR > PROB_CEILING) {
- probR_trunc = PROB_CEILING;
- } else if (probR < PROB_FLOOR) {
- probR_trunc = PROB_FLOOR;
- } else {
- probR_trunc = probR;
- }
scoreR = (int) matrixR_lower[rR][cR];
if (directionsR_lower_nogap[rR][cR] != DIAG) {
/* Favor gaps away from intron if possible */
@@ -1115,7 +1068,7 @@ bridge_intron_gap_8_site_level (int *bestrL, int *bestrR, int *bestcL, int *best
scoreI = 0;
#endif
- if ((score = scoreL + scoreI + scoreR) > bestscore) {
+ if ((score = scoreL + scoreR) > bestscore) {
debug3(printf("Best score: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
debug3(printf("probL %f, probR %f\n",left_probabilities[cL],right_probabilities[cR]));
@@ -1138,50 +1091,11 @@ bridge_intron_gap_8_site_level (int *bestrL, int *bestrR, int *bestcL, int *best
debug3a(printf("Not best score: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
}
-
- if (probL_trunc + probR_trunc < bestprob_trunc) {
- debug3a(printf("At %d left to %d right, prob is %f + %f = %f\n",
- cL,cR,probL_trunc,probR_trunc,probL_trunc+probR_trunc));
-
- } else if (probL_trunc + probR_trunc == bestprob_trunc) {
- debug3(printf("At %d left to %d right, prob is %f + %f = %f\n",
- cL,cR,probL_trunc,probR_trunc,probL_trunc+probR_trunc));
-
- if (scoreL + scoreI + scoreR > bestscore_with_suffprob) {
- debug3(printf(" (bestscore %d)\n",scoreL+scoreR));
- bestprob_trunc = probL_trunc + probR_trunc;
- bestcL_with_prob = cL;
- bestcR_with_prob = cR;
- bestrL_with_prob = rL;
- bestrR_with_prob = rR;
- bestscore_with_suffprob = scoreL + scoreI + scoreR;
- }
-
- } else {
- /* probL_trunc + probR_trunc > bestprob_trunc */
- debug3(printf("At %d left to %d right, prob is %f + %f = %f\n",
- cL,cR,probL_trunc,probR_trunc,probL_trunc+probR_trunc));
-
- debug3(printf(" (bestscore %d)\n",scoreL+scoreR));
- bestprob_trunc = probL_trunc + probR_trunc;
- bestcL_with_prob = cL;
- bestcR_with_prob = cR;
- bestrL_with_prob = rL;
- bestrR_with_prob = rR;
- bestscore_with_suffprob = scoreL + scoreI + scoreR;
- }
}
debug3(printf("Skip main diagonal\n"));
for (/*skip main diagonal*/cR++; cR < chighR && cR < rightoffset-leftoffset-cL; cR++) {
probR = right_probabilities[cR];
- if (probR > PROB_CEILING) {
- probR_trunc = PROB_CEILING;
- } else if (probR < PROB_FLOOR) {
- probR_trunc = PROB_FLOOR;
- } else {
- probR_trunc = probR;
- }
scoreR = (int) matrixR_upper[cR][rR];
if (directionsR_upper_nogap[cR][rR] != DIAG) {
/* Favor gaps away from intron if possible */
@@ -1194,7 +1108,7 @@ bridge_intron_gap_8_site_level (int *bestrL, int *bestrR, int *bestcL, int *best
scoreI = 0;
#endif
- if ((score = scoreL + scoreI + scoreR) > bestscore) {
+ if ((score = scoreL + scoreR) > bestscore) {
debug3(printf("Best score: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
debug3(printf("probL %f, probR %f\n",left_probabilities[cL],right_probabilities[cR]));
@@ -1217,51 +1131,12 @@ bridge_intron_gap_8_site_level (int *bestrL, int *bestrR, int *bestcL, int *best
debug3a(printf("Not best score: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
}
-
- if (probL_trunc + probR_trunc < bestprob_trunc) {
- debug3a(printf("At %d left to %d right, prob is %f + %f = %f\n",
- cL,cR,probL_trunc,probR_trunc,probL_trunc+probR_trunc));
-
- } else if (probL_trunc + probR_trunc == bestprob_trunc) {
- debug3(printf("At %d left to %d right, prob is %f + %f = %f\n",
- cL,cR,probL_trunc,probR_trunc,probL_trunc+probR_trunc));
-
- if (scoreL + scoreI + scoreR > bestscore_with_suffprob) {
- debug3(printf(" (bestscore_with_sufficient_prob %d)\n",scoreL+scoreR));
- bestprob_trunc = probL_trunc + probR_trunc;
- bestcL_with_prob = cL;
- bestcR_with_prob = cR;
- bestrL_with_prob = rL;
- bestrR_with_prob = rR;
- bestscore_with_suffprob = scoreL + scoreI + scoreR;
- }
-
- } else {
- /* probL_trunc + probR_trunc > bestprob_trunc */
- debug3(printf("At %d left to %d right, prob is %f + %f = %f\n",
- cL,cR,probL_trunc,probR_trunc,probL_trunc+probR_trunc));
-
- debug3(printf(" (bestscore_with_sufficient_prob %d)\n",scoreL+scoreR));
- bestprob_trunc = probL_trunc + probR_trunc;
- bestcL_with_prob = cL;
- bestcR_with_prob = cR;
- bestrL_with_prob = rL;
- bestrR_with_prob = rR;
- bestscore_with_suffprob = scoreL + scoreI + scoreR;
- }
}
debug3(printf("C. Test indel on left\n"));
/* Test indel on left */
cR = rR;
probR = right_probabilities[cR];
- if (probR > PROB_CEILING) {
- probR_trunc = PROB_CEILING;
- } else if (probR < PROB_FLOOR) {
- probR_trunc = PROB_FLOOR;
- } else {
- probR_trunc = probR;
- }
scoreR = (int) matrixR_upper[cR][rR];
if (directionsR_upper_nogap[cR][rR] != DIAG) {
/* Favor gaps away from intron if possible */
@@ -1271,13 +1146,6 @@ bridge_intron_gap_8_site_level (int *bestrL, int *bestrR, int *bestcL, int *best
/* Disallow leftoffset + cL >= rightoffset - cR, or cR >= rightoffset - leftoffset - cL */
for (cL = cloL; cL < /*to main diagonal*/rL && cL < rightoffset-leftoffset-cR; cL++) {
probL = left_probabilities[cL];
- if (probL > PROB_CEILING) {
- probL_trunc = PROB_CEILING;
- } else if (probL < PROB_FLOOR) {
- probL_trunc = PROB_FLOOR;
- } else {
- probL_trunc = probL;
- }
scoreL = (int) matrixL_lower[rL][cL];
if (directionsL_lower_nogap[rL][cL] != DIAG) {
/* Favor gaps away from intron if possible */
@@ -1290,7 +1158,7 @@ bridge_intron_gap_8_site_level (int *bestrL, int *bestrR, int *bestcL, int *best
scoreI = 0;
#endif
- if ((score = scoreL + scoreI + scoreR) > bestscore) {
+ if ((score = scoreL + scoreR) > bestscore) {
debug3(printf("Best score: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
debug3(printf("probL %f, probR %f\n",left_probabilities[cL],right_probabilities[cR]));
@@ -1313,50 +1181,11 @@ bridge_intron_gap_8_site_level (int *bestrL, int *bestrR, int *bestcL, int *best
debug3a(printf("Not best score: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
}
-
- if (probL_trunc + probR_trunc < bestprob_trunc) {
- debug3a(printf("At %d left to %d right, prob is %f + %f = %f\n",
- cL,cR,probL_trunc,probR_trunc,probL_trunc+probR_trunc));
-
- } else if (probL_trunc + probR_trunc == bestprob_trunc) {
- debug3(printf("At %d left to %d right, prob is %f + %f = %f\n",
- cL,cR,probL_trunc,probR_trunc,probL_trunc+probR_trunc));
-
- if (scoreL + scoreI + scoreR > bestscore_with_suffprob) {
- debug3(printf(" (bestscore %d)\n",scoreL+scoreR));
- bestprob_trunc = probL_trunc + probR_trunc;
- bestcL_with_prob = cL;
- bestcR_with_prob = cR;
- bestrL_with_prob = rL;
- bestrR_with_prob = rR;
- bestscore_with_suffprob = scoreL + scoreI + scoreR;
- }
-
- } else {
- /* probL_trunc + probR_trunc > bestprob_trunc */
- debug3(printf("At %d left to %d right, prob is %f + %f = %f\n",
- cL,cR,probL_trunc,probR_trunc,probL_trunc+probR_trunc));
-
- debug3(printf(" (bestscore %d)\n",scoreL+scoreR));
- bestprob_trunc = probL_trunc + probR_trunc;
- bestcL_with_prob = cL;
- bestcR_with_prob = cR;
- bestrL_with_prob = rL;
- bestrR_with_prob = rR;
- bestscore_with_suffprob = scoreL + scoreI + scoreR;
- }
}
debug3(printf("Skip main diagonal\n"));
for (/*Skip main diagonal*/cL++; cL < chighL && cL < rightoffset-leftoffset-cR; cL++) {
probL = left_probabilities[cL];
- if (probL > PROB_CEILING) {
- probL_trunc = PROB_CEILING;
- } else if (probL < PROB_FLOOR) {
- probL_trunc = PROB_FLOOR;
- } else {
- probL_trunc = probL;
- }
scoreL = (int) matrixL_upper[cL][rL];
if (directionsL_upper_nogap[cL][rL] != DIAG) {
/* Favor gaps away from intron if possible */
@@ -1369,7 +1198,7 @@ bridge_intron_gap_8_site_level (int *bestrL, int *bestrR, int *bestcL, int *best
scoreI = 0;
#endif
- if ((score = scoreL + scoreI + scoreR) > bestscore) {
+ if ((score = scoreL + scoreR) > bestscore) {
debug3(printf("Best score: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
debug3(printf("probL %f, probR %f\n",left_probabilities[cL],right_probabilities[cR]));
@@ -1392,67 +1221,39 @@ bridge_intron_gap_8_site_level (int *bestrL, int *bestrR, int *bestcL, int *best
debug3a(printf("Not best score: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
}
-
- if (probL_trunc + probR_trunc < bestprob_trunc) {
- debug3a(printf("At %d left to %d right, prob is %f + %f = %f\n",
- cL,cR,probL_trunc,probR_trunc,probL_trunc+probR_trunc));
-
- } else if (probL_trunc + probR_trunc == bestprob_trunc) {
- debug3(printf("At %d left to %d right, prob is %f + %f = %f\n",
- cL,cR,probL_trunc,probR_trunc,probL_trunc+probR_trunc));
-
- if (scoreL + scoreI + scoreR > bestscore_with_suffprob) {
- debug3(printf(" (bestscore %d)\n",scoreL+scoreR));
- bestprob_trunc = probL_trunc + probR_trunc;
- bestcL_with_prob = cL;
- bestcR_with_prob = cR;
- bestrL_with_prob = rL;
- bestrR_with_prob = rR;
- bestscore_with_suffprob = scoreL + scoreI + scoreR;
- }
-
- } else {
- /* probL_trunc + probR_trunc > bestprob_trunc */
- debug3(printf("At %d left to %d right, prob is %f + %f = %f\n",
- cL,cR,probL_trunc,probR_trunc,probL_trunc+probR_trunc));
-
- debug3(printf(" (bestscore %d)\n",scoreL+scoreR));
- bestprob_trunc = probL_trunc + probR_trunc;
- bestcL_with_prob = cL;
- bestcR_with_prob = cR;
- bestrL_with_prob = rL;
- bestrR_with_prob = rR;
- bestscore_with_suffprob = scoreL + scoreI + scoreR;
- }
}
}
if (bestprob_with_score > 2*PROB_CEILING) {
/* Probability is good with best alignment, so take that */
- debug3(printf("Best alignment based on score alone has good probability\n"));
- use_prob_p = false; /* was previously true (bug) */
- } else if (left_probabilities[bestcL_with_prob] < PROB_CEILING && right_probabilities[bestcR_with_prob] < PROB_CEILING) {
- /* Probability-based solution is bad, so use alignment */
- debug3(printf("Probability-based solution is bad on both sites\n"));
- use_prob_p = false;
- } else if (bestscore_with_suffprob < 0 || bestscore_with_suffprob < bestscore - 9) {
- debug3(printf("Probability-based solution requires very bad alignment, because bestscore_with_suffprob %d < bestscore %d - 9\n",
- bestscore_with_suffprob,bestscore));
- use_prob_p = false;
+ debug(printf("Best alignment based on score alone has good probability\n"));
+ use_dinucl_p = false; /* was previously true (bug) */
+ } else if (bestprob_with_dinucl == 0.0) {
+ debug(printf("No dinucleotides found\n"));
+ use_dinucl_p = false;
+ } else if (0 && left_probabilities[bestcL_with_dinucl] < PROB_CEILING && right_probabilities[bestcR_with_dinucl] < PROB_CEILING) {
+ /* Dinucleotide-based solution is bad, so use alignment */
+ debug(printf("Dinucleotide-based solution is bad on both sites\n"));
+ use_dinucl_p = false;
+ } else if (bestscore_with_dinucl < 0 || bestscore_with_dinucl < bestscore - 9) {
+ debug(printf("Dinucleotide-based solution requires very bad alignment, because bestscore_with_dinucl %d < bestscore %d - 9\n",
+ bestscore_with_dinucl,bestscore));
+ use_dinucl_p = false;
} else {
- use_prob_p = true;
+ use_dinucl_p = true;
}
- if (use_prob_p == true) {
- /* Best alignment yields bad probability, and probability-based alignment yields good probability, so switch */
- debug3(printf("Switch to probability-based solution\n"));
- debug3(printf("SIMD 8. bestscore %d (bestprob_with_score %f) vs bestscore_with_suffprob %d (bestprob_trunc %f, actually %f and %f)\n",
- bestscore,bestprob_with_score,bestscore_with_suffprob,bestprob_trunc,left_probabilities[bestcL_with_prob],right_probabilities[bestcR_with_prob]));
- *bestcL = bestcL_with_prob;
- *bestcR = bestcR_with_prob;
- *bestrL = bestrL_with_prob;
- *bestrR = bestrR_with_prob;
- bestscore = bestscore_with_suffprob;
+ debug(printf("SIMD 8. bestscore %d (bestprob_with_score %f)\n",bestscore,bestprob_with_score));
+ if (use_dinucl_p == true) {
+ debug(printf("SIMD 8. bestscore %d (bestprob_with_score %f) vs bestscore_with_dinucl %d (bestprob_with_dinucl %f and %f)\n",
+ bestscore,bestprob_with_score,bestscore_with_dinucl,left_probabilities[bestcL_with_dinucl],right_probabilities[bestcR_with_dinucl]));
+ /* Best alignment yields bad probability, and dinucleotide-based alignment yields good probability, so switch */
+ debug(printf("Switch to dinucleotide-based solution\n"));
+ *bestcL = bestcL_with_dinucl;
+ *bestcR = bestcR_with_dinucl;
+ *bestrL = bestrL_with_dinucl;
+ *bestrR = bestrR_with_dinucl;
+ bestscore = bestscore_with_dinucl;
}
FREEA(rightdi);
@@ -1834,17 +1635,17 @@ bridge_intron_gap_16_site_level (int *bestrL, int *bestrR, int *bestcL, int *bes
Univcoord_T chroffset, Univcoord_T chrhigh,
bool halfp, bool finalp) {
int rL, rR, cL, cR;
- int bestrL_with_prob, bestrR_with_prob, bestcL_with_prob, bestcR_with_prob;
+ int bestrL_with_dinucl, bestrR_with_dinucl, bestcL_with_dinucl, bestcR_with_dinucl;
int cloL, chighL;
int cloR, chighR;
int introntype;
int bestscore = NEG_INFINITY_16, score, scoreL, scoreR, scoreI;
- int bestscore_with_suffprob = NEG_INFINITY_16;
- double *left_probabilities, *right_probabilities, probL, probR, probL_trunc, probR_trunc, bestprob_with_score, bestprob_trunc;
+ int bestscore_with_dinucl = NEG_INFINITY_16;
+ double *left_probabilities, *right_probabilities, probL, probR, bestprob_with_score, bestprob_with_dinucl;
Univcoord_T splicesitepos;
char left1, left2, right2, right1, left1_alt, left2_alt, right2_alt, right1_alt;
int *leftdi, *rightdi;
- bool use_prob_p;
+ bool use_dinucl_p;
/* Read dinucleotides */
@@ -1995,7 +1796,7 @@ bridge_intron_gap_16_site_level (int *bestrL, int *bestrR, int *bestcL, int *bes
/* Search using probs and without simultaneously */
bestscore = NEG_INFINITY_16;
- bestprob_with_score = bestprob_trunc = 0.0;
+ bestprob_with_score = bestprob_with_dinucl = 0.0;
for (rL = 1, rR = rlength-1; rL < rlength; rL++, rR--) {
debug3(printf("\nAt row %d on left and %d on right\n",rL,rR));
if ((cloL = rL - lbandL) < 1) {
@@ -2015,24 +1816,10 @@ bridge_intron_gap_16_site_level (int *bestrL, int *bestrR, int *bestcL, int *bes
debug3(printf("A. Test no indels\n"));
cL = rL;
probL = left_probabilities[cL];
- if (probL > PROB_CEILING) {
- probL_trunc = PROB_CEILING;
- } else if (probL < PROB_FLOOR) {
- probL_trunc = PROB_FLOOR;
- } else {
- probL_trunc = probL;
- }
scoreL = (int) matrixL_upper[cL][rL];
cR = rR;
probR = right_probabilities[cR];
- if (probR > PROB_CEILING) {
- probR_trunc = PROB_CEILING;
- } else if (probR < PROB_FLOOR) {
- probR_trunc = PROB_FLOOR;
- } else {
- probR_trunc = probR;
- }
scoreR = (int) matrixR_upper[cR][rR];
#ifdef USE_SCOREI
@@ -2041,7 +1828,7 @@ bridge_intron_gap_16_site_level (int *bestrL, int *bestrR, int *bestcL, int *bes
scoreI = 0;
#endif
- if ((score = scoreL + scoreI + scoreR) > bestscore) {
+ if ((score = scoreL + scoreR) > bestscore) {
debug3(printf("Best score: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
debug3(printf("probL %f, probR %f\n",left_probabilities[cL],right_probabilities[cR]));
@@ -2065,50 +1852,25 @@ bridge_intron_gap_16_site_level (int *bestrL, int *bestrR, int *bestcL, int *bes
cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
}
- if (probL_trunc + probR_trunc < bestprob_trunc) {
- debug3a(printf("At %d left to %d right, prob is %f + %f = %f\n",
- cL,cR,probL_trunc,probR_trunc,probL_trunc+probR_trunc));
-
- } else if (probL_trunc + probR_trunc == bestprob_trunc) {
- debug3(printf("At %d left to %d right, prob is %f + %f = %f\n",
- cL,cR,probL_trunc,probR_trunc,probL_trunc+probR_trunc));
-
- if (scoreL + scoreI + scoreR > bestscore_with_suffprob) {
- debug3(printf(" (bestscore %d)\n",scoreL+scoreR));
- bestprob_trunc = probL_trunc + probR_trunc;
- bestcL_with_prob = cL;
- bestcR_with_prob = cR;
- bestrL_with_prob = rL;
- bestrR_with_prob = rR;
- bestscore_with_suffprob = scoreL + scoreI + scoreR;
+ /* Perform only without indels */
+ if (scoreI > 0) {
+ debug3(printf("At %d left to %d right, scoreI is %d and prob is %f + %f = %f\n",
+ cL,cR,scoreI,probL,probR,probL+probR));
+ if (probL + probR > bestprob_with_dinucl) {
+ bestscore_with_dinucl = scoreL + scoreR;
+ bestcL_with_dinucl = cL;
+ bestcR_with_dinucl = cR;
+ bestrL_with_dinucl = rL;
+ bestrR_with_dinucl = rR;
+ bestprob_with_dinucl = probL + probR;
}
-
- } else {
- /* probL_trunc + probR_trunc > bestprob_trunc */
- debug3(printf("At %d left to %d right, prob is %f + %f = %f\n",
- cL,cR,probL_trunc,probR_trunc,probL_trunc+probR_trunc));
-
- debug3(printf(" (bestscore %d)\n",scoreL+scoreR));
- bestprob_trunc = probL_trunc + probR_trunc;
- bestcL_with_prob = cL;
- bestcR_with_prob = cR;
- bestrL_with_prob = rL;
- bestrR_with_prob = rR;
- bestscore_with_suffprob = scoreL + scoreI + scoreR;
}
-
+
debug3(printf("B. Test indel on right\n"));
/* Test indel on right */
cL = rL;
probL = left_probabilities[cL];
- if (probL > PROB_CEILING) {
- probL_trunc = PROB_CEILING;
- } else if (probL < PROB_FLOOR) {
- probL_trunc = PROB_FLOOR;
- } else {
- probL_trunc = probL;
- }
scoreL = (int) matrixL_upper[cL][rL];
if (directionsL_upper_nogap[cL][rL] != DIAG) {
/* Favor gaps away from intron if possible */
@@ -2118,13 +1880,6 @@ bridge_intron_gap_16_site_level (int *bestrL, int *bestrR, int *bestcL, int *bes
/* Disallow leftoffset + cL >= rightoffset - cR, or cR >= rightoffset - leftoffset - cL */
for (cR = cloR; cR < /*to main diagonal*/rR && cR < rightoffset-leftoffset-cL; cR++) {
probR = right_probabilities[cR];
- if (probR > PROB_CEILING) {
- probR_trunc = PROB_CEILING;
- } else if (probR < PROB_FLOOR) {
- probR_trunc = PROB_FLOOR;
- } else {
- probR_trunc = probR;
- }
scoreR = (int) matrixR_lower[rR][cR];
if (directionsR_lower_nogap[rR][cR] != DIAG) {
/* Favor gaps away from intron if possible */
@@ -2137,7 +1892,7 @@ bridge_intron_gap_16_site_level (int *bestrL, int *bestrR, int *bestcL, int *bes
scoreI = 0;
#endif
- if ((score = scoreL + scoreI + scoreR) > bestscore) {
+ if ((score = scoreL + scoreR) > bestscore) {
debug3(printf("Best score: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
debug3(printf("probL %f, probR %f\n",left_probabilities[cL],right_probabilities[cR]));
@@ -2160,50 +1915,11 @@ bridge_intron_gap_16_site_level (int *bestrL, int *bestrR, int *bestcL, int *bes
debug3a(printf("Not best score: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
}
-
- if (probL_trunc + probR_trunc < bestprob_trunc) {
- debug3a(printf("At %d left to %d right, prob is %f + %f = %f\n",
- cL,cR,probL_trunc,probR_trunc,probL_trunc+probR_trunc));
-
- } else if (probL_trunc + probR_trunc == bestprob_trunc) {
- debug3(printf("At %d left to %d right, prob is %f + %f = %f\n",
- cL,cR,probL_trunc,probR_trunc,probL_trunc+probR_trunc));
-
- if (scoreL + scoreI + scoreR > bestscore_with_suffprob) {
- debug3(printf(" (bestscore %d)\n",scoreL+scoreR));
- bestprob_trunc = probL_trunc + probR_trunc;
- bestcL_with_prob = cL;
- bestcR_with_prob = cR;
- bestrL_with_prob = rL;
- bestrR_with_prob = rR;
- bestscore_with_suffprob = scoreL + scoreI + scoreR;
- }
-
- } else {
- /* probL_trunc + probR_trunc > bestprob_trunc */
- debug3(printf("At %d left to %d right, prob is %f + %f = %f\n",
- cL,cR,probL_trunc,probR_trunc,probL_trunc+probR_trunc));
-
- debug3(printf(" (bestscore %d)\n",scoreL+scoreR));
- bestprob_trunc = probL_trunc + probR_trunc;
- bestcL_with_prob = cL;
- bestcR_with_prob = cR;
- bestrL_with_prob = rL;
- bestrR_with_prob = rR;
- bestscore_with_suffprob = scoreL + scoreI + scoreR;
- }
}
debug3(printf("Skip main diagonal\n"));
for (/*Skip main diagonal*/cR++; cR < chighR && cR < rightoffset-leftoffset-cL; cR++) {
probR = right_probabilities[cR];
- if (probR > PROB_CEILING) {
- probR_trunc = PROB_CEILING;
- } else if (probR < PROB_FLOOR) {
- probR_trunc = PROB_FLOOR;
- } else {
- probR_trunc = probR;
- }
scoreR = (int) matrixR_upper[cR][rR];
if (directionsR_upper_nogap[cR][rR] != DIAG) {
/* Favor gaps away from intron if possible */
@@ -2216,7 +1932,7 @@ bridge_intron_gap_16_site_level (int *bestrL, int *bestrR, int *bestcL, int *bes
scoreI = 0;
#endif
- if ((score = scoreL + scoreI + scoreR) > bestscore) {
+ if ((score = scoreL + scoreR) > bestscore) {
debug3(printf("Best score: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
debug3(printf("probL %f, probR %f\n",left_probabilities[cL],right_probabilities[cR]));
@@ -2239,38 +1955,6 @@ bridge_intron_gap_16_site_level (int *bestrL, int *bestrR, int *bestcL, int *bes
debug3a(printf("Not best score: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
}
-
- if (probL_trunc + probR_trunc < bestprob_trunc) {
- debug3a(printf("At %d left to %d right, prob is %f + %f = %f\n",
- cL,cR,probL_trunc,probR_trunc,probL_trunc+probR_trunc));
-
- } else if (probL_trunc + probR_trunc == bestprob_trunc) {
- debug3(printf("At %d left to %d right, prob is %f + %f = %f\n",
- cL,cR,probL_trunc,probR_trunc,probL_trunc+probR_trunc));
-
- if (scoreL + scoreI + scoreR > bestscore_with_suffprob) {
- debug3(printf(" (bestscore %d)\n",scoreL+scoreR));
- bestprob_trunc = probL_trunc + probR_trunc;
- bestcL_with_prob = cL;
- bestcR_with_prob = cR;
- bestrL_with_prob = rL;
- bestrR_with_prob = rR;
- bestscore_with_suffprob = scoreL + scoreI + scoreR;
- }
-
- } else {
- /* probL_trunc + probR_trunc > bestprob_trunc */
- debug3(printf("At %d left to %d right, prob is %f + %f = %f\n",
- cL,cR,probL_trunc,probR_trunc,probL_trunc+probR_trunc));
-
- debug3(printf(" (bestscore %d)\n",scoreL+scoreR));
- bestprob_trunc = probL_trunc + probR_trunc;
- bestcL_with_prob = cL;
- bestcR_with_prob = cR;
- bestrL_with_prob = rL;
- bestrR_with_prob = rR;
- bestscore_with_suffprob = scoreL + scoreI + scoreR;
- }
}
@@ -2278,13 +1962,6 @@ bridge_intron_gap_16_site_level (int *bestrL, int *bestrR, int *bestcL, int *bes
/* Test indel on left */
cR = rR;
probR = right_probabilities[cR];
- if (probR > PROB_CEILING) {
- probR_trunc = PROB_CEILING;
- } else if (probR < PROB_FLOOR) {
- probR_trunc = PROB_FLOOR;
- } else {
- probR_trunc = probR;
- }
scoreR = (int) matrixR_upper[cR][rR];
if (directionsR_upper_nogap[cR][rR] != DIAG) {
/* Favor gaps away from intron if possible */
@@ -2294,13 +1971,6 @@ bridge_intron_gap_16_site_level (int *bestrL, int *bestrR, int *bestcL, int *bes
/* Disallow leftoffset + cL >= rightoffset - cR, or cR >= rightoffset - leftoffset - cL */
for (cL = cloL; cL < /*to main diagonal*/rL && cL < rightoffset-leftoffset-cR; cL++) {
probL = left_probabilities[cL];
- if (probL > PROB_CEILING) {
- probL_trunc = PROB_CEILING;
- } else if (probL < PROB_FLOOR) {
- probL_trunc = PROB_FLOOR;
- } else {
- probL_trunc = probL;
- }
scoreL = (int) matrixL_lower[rL][cL];
if (directionsL_lower_nogap[rL][cL] != DIAG) {
/* Favor gaps away from intron if possible */
@@ -2313,7 +1983,7 @@ bridge_intron_gap_16_site_level (int *bestrL, int *bestrR, int *bestcL, int *bes
scoreI = 0;
#endif
- if ((score = scoreL + scoreI + scoreR) > bestscore) {
+ if ((score = scoreL + scoreR) > bestscore) {
debug3(printf("Best score: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
debug3(printf("probL %f, probR %f\n",left_probabilities[cL],right_probabilities[cR]));
@@ -2336,50 +2006,11 @@ bridge_intron_gap_16_site_level (int *bestrL, int *bestrR, int *bestcL, int *bes
debug3a(printf("Not best score: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
}
-
- if (probL_trunc + probR_trunc < bestprob_trunc) {
- debug3a(printf("At %d left to %d right, prob is %f + %f = %f\n",
- cL,cR,probL_trunc,probR_trunc,probL_trunc+probR_trunc));
-
- } else if (probL_trunc + probR_trunc == bestprob_trunc) {
- debug3(printf("At %d left to %d right, prob is %f + %f = %f\n",
- cL,cR,probL_trunc,probR_trunc,probL_trunc+probR_trunc));
-
- if (scoreL + scoreI + scoreR > bestscore_with_suffprob) {
- debug3(printf(" (bestscore %d)\n",scoreL+scoreR));
- bestprob_trunc = probL_trunc + probR_trunc;
- bestcL_with_prob = cL;
- bestcR_with_prob = cR;
- bestrL_with_prob = rL;
- bestrR_with_prob = rR;
- bestscore_with_suffprob = scoreL + scoreI + scoreR;
- }
-
- } else {
- /* probL_trunc + probR_trunc > bestprob_trunc */
- debug3(printf("At %d left to %d right, prob is %f + %f = %f\n",
- cL,cR,probL_trunc,probR_trunc,probL_trunc+probR_trunc));
-
- debug3(printf(" (bestscore %d)\n",scoreL+scoreR));
- bestprob_trunc = probL_trunc + probR_trunc;
- bestcL_with_prob = cL;
- bestcR_with_prob = cR;
- bestrL_with_prob = rL;
- bestrR_with_prob = rR;
- bestscore_with_suffprob = scoreL + scoreI + scoreR;
- }
}
debug3(printf("Skip main diagonal\n"));
for (/*Skip main diagonal*/cL++; cL < chighL && cL < rightoffset-leftoffset-cR; cL++) {
probL = left_probabilities[cL];
- if (probL > PROB_CEILING) {
- probL_trunc = PROB_CEILING;
- } else if (probL < PROB_FLOOR) {
- probL_trunc = PROB_FLOOR;
- } else {
- probL_trunc = probL;
- }
scoreL = (int) matrixL_upper[cL][rL];
if (directionsL_upper_nogap[cL][rL] != DIAG) {
/* Favor gaps away from intron if possible */
@@ -2392,7 +2023,7 @@ bridge_intron_gap_16_site_level (int *bestrL, int *bestrR, int *bestcL, int *bes
scoreI = 0;
#endif
- if ((score = scoreL + scoreI + scoreR) > bestscore) {
+ if ((score = scoreL + scoreR) > bestscore) {
debug3(printf("Best score: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
debug3(printf("probL %f, probR %f\n",left_probabilities[cL],right_probabilities[cR]));
@@ -2415,66 +2046,40 @@ bridge_intron_gap_16_site_level (int *bestrL, int *bestrR, int *bestcL, int *bes
debug3a(printf("Not best score: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
}
-
- if (probL_trunc + probR_trunc < bestprob_trunc) {
- debug3a(printf("At %d left to %d right, prob is %f + %f = %f\n",
- cL,cR,probL_trunc,probR_trunc,probL_trunc+probR_trunc));
-
- } else if (probL_trunc + probR_trunc == bestprob_trunc) {
- debug3(printf("At %d left to %d right, prob is %f + %f = %f\n",
- cL,cR,probL_trunc,probR_trunc,probL_trunc+probR_trunc));
-
- if (scoreL + scoreI + scoreR > bestscore_with_suffprob) {
- debug3(printf(" (bestscore %d)\n",scoreL+scoreR));
- bestprob_trunc = probL_trunc + probR_trunc;
- bestcL_with_prob = cL;
- bestcR_with_prob = cR;
- bestrL_with_prob = rL;
- bestrR_with_prob = rR;
- bestscore_with_suffprob = scoreL + scoreI + scoreR;
- }
-
- } else {
- /* probL_trunc + probR_trunc > bestprob_trunc */
- debug3(printf("At %d left to %d right, prob is %f + %f = %f\n",
- cL,cR,probL_trunc,probR_trunc,probL_trunc+probR_trunc));
-
- debug3(printf(" (bestscore %d)\n",scoreL+scoreR));
- bestprob_trunc = probL_trunc + probR_trunc;
- bestcL_with_prob = cL;
- bestcR_with_prob = cR;
- bestrL_with_prob = rL;
- bestrR_with_prob = rR;
- bestscore_with_suffprob = scoreL + scoreI + scoreR;
- }
}
}
if (bestprob_with_score > 2*PROB_CEILING) {
/* Probability is good with best alignment, so take that */
debug(printf("Best alignment based on score alone has good probability\n"));
- use_prob_p = false; /* was previously true (bug) */
- } else if (left_probabilities[bestcL_with_prob] < PROB_CEILING && right_probabilities[bestcR_with_prob] < PROB_CEILING) {
- /* Probability-based solution is bad, so use alignment */
- debug(printf("Probability-based solution is bad on both sites\n"));
- use_prob_p = false;
- } else if (bestscore_with_suffprob < 0 || bestscore_with_suffprob < bestscore - 9) {
- debug(printf("Probability-based solution requires very bad alignment\n"));
- use_prob_p = false;
+ use_dinucl_p = false; /* was previously true (bug) */
+ } else if (bestprob_with_dinucl == 0.0) {
+ debug(printf("No dinucleotides found\n"));
+ use_dinucl_p = false;
+ } else if (0 && left_probabilities[bestcL_with_dinucl] < PROB_CEILING && right_probabilities[bestcR_with_dinucl] < PROB_CEILING) {
+ /* Dinucleotide-based solution is bad, so use alignment */
+ debug(printf("Dinucleotide-based solution is bad on both sites\n"));
+ use_dinucl_p = false;
+ } else if (bestscore_with_dinucl < 0 || bestscore_with_dinucl < bestscore - 9) {
+ debug(printf("Dinucleotide-based solution requires very bad alignment, because bestscore_with_dinucl %d < bestscore %d - 9\n",
+ bestscore_with_dinucl,bestscore));
+ use_dinucl_p = false;
} else {
- use_prob_p = true;
+ use_dinucl_p = true;
}
- if (use_prob_p == true) {
- /* Best alignment yields bad probability, and probability-based alignment yields good probability, so switch */
- debug(printf("Switch to probability-based solution\n"));
- debug(printf("SIMD 16. bestscore %d (bestprob_with_score %f) vs bestscore_with_suffprob %d (bestprob_trunc %f, actually %f and %f)\n",
- bestscore,bestprob_with_score,bestscore_with_suffprob,bestprob_trunc,left_probabilities[bestcL_with_prob],right_probabilities[bestcR_with_prob]));
- *bestcL = bestcL_with_prob;
- *bestcR = bestcR_with_prob;
- *bestrL = bestrL_with_prob;
- *bestrR = bestrR_with_prob;
- bestscore = bestscore_with_suffprob;
+ debug(printf("SIMD 16. bestscore %d (bestprob_with_score %f)\n",bestscore,bestprob_with_score));
+ if (use_dinucl_p == true) {
+ debug(printf("SIMD 16. bestscore %d (bestprob_with_score %f) vs bestscore_with_dinucl %d (bestprob_with_dinucl %f and %f)\n",
+ bestscore,bestprob_with_score,bestscore_with_dinucl,left_probabilities[bestcL_with_dinucl],right_probabilities[bestcR_with_dinucl]));
+
+ /* Best alignment yields bad probability, and dinucleotide-based alignment yields good probability, so switch */
+ debug(printf("Switch to dinucleotide-based solution\n"));
+ *bestcL = bestcL_with_dinucl;
+ *bestcR = bestcR_with_dinucl;
+ *bestrL = bestrL_with_dinucl;
+ *bestrR = bestrR_with_dinucl;
+ bestscore = bestscore_with_dinucl;
}
FREEA(rightdi);
@@ -2707,17 +2312,17 @@ bridge_intron_gap_site_level (int *bestrL, int *bestrR, int *bestcL, int *bestcR
Univcoord_T chroffset, Univcoord_T chrhigh,
bool halfp, bool finalp) {
int rL, rR, cL, cR;
- int bestrL_with_prob, bestrR_with_prob, bestcL_with_prob, bestcR_with_prob;
+ int bestrL_with_dinucl, bestrR_with_dinucl, bestcL_with_dinucl, bestcR_with_dinucl;
int cloL, chighL;
int cloR, chighR;
int introntype;
int bestscore = NEG_INFINITY_32, score, scoreL, scoreR, scoreI;
- int bestscore_with_suffprob = NEG_INFINITY_32;
- double *left_probabilities, *right_probabilities, probL, probR, probL_trunc, probR_trunc, bestprob_with_score, bestprob_trunc;
+ int bestscore_with_dinucl = NEG_INFINITY_32;
+ double *left_probabilities, *right_probabilities, probL, probR, bestprob_with_score, bestprob_with_dinucl;
Univcoord_T splicesitepos;
char left1, left2, right2, right1, left1_alt, left2_alt, right2_alt, right1_alt;
int *leftdi, *rightdi;
- bool use_prob_p;
+ bool use_dinucl_p;
/* Read dinucleotides */
@@ -2868,7 +2473,7 @@ bridge_intron_gap_site_level (int *bestrL, int *bestrR, int *bestcL, int *bestcR
/* Search using probs and without simultaneously */
bestscore = NEG_INFINITY_32;
- bestprob_with_score = bestprob_trunc = 0.0;
+ bestprob_with_score = bestprob_with_dinucl = 0.0;
for (rL = 1, rR = rlength-1; rL < rlength; rL++, rR--) {
debug3(printf("\nAt row %d on left and %d on right\n",rL,rR));
if ((cloL = rL - lbandL) < 1) {
@@ -2888,38 +2493,61 @@ bridge_intron_gap_site_level (int *bestrL, int *bestrR, int *bestcL, int *bestcR
debug3(printf("A. Test no indels\n"));
cL = rL;
probL = left_probabilities[cL];
- if (probL > PROB_CEILING) {
- probL_trunc = PROB_CEILING;
- } else if (probL < PROB_FLOOR) {
- probL_trunc = PROB_FLOOR;
- } else {
- probL_trunc = probL;
- }
scoreL = (int) matrixL[cL][rL];
cR = rR;
probR = right_probabilities[cR];
- if (probR > PROB_CEILING) {
- probR_trunc = PROB_CEILING;
- } else if (probR < PROB_FLOOR) {
- probR_trunc = PROB_FLOOR;
+ scoreR = (int) matrixR[cR][rR];
+
+#ifdef USE_SCOREI
+ scoreI = intron_score(&introntype,leftdi[cL],rightdi[cR],cdna_direction,canonical_reward,finalp);
+#else
+ scoreI = 0;
+#endif
+
+ if ((score = scoreL + scoreR) > bestscore) {
+ debug3(printf("Best score: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
+ cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
+ debug3(printf("probL %f, probR %f\n",left_probabilities[cL],right_probabilities[cR]));
+ bestscore = score;
+ *bestrL = rL;
+ *bestrR = rR;
+ *bestcL = cL;
+ *bestcR = cR;
+ bestprob_with_score = probL + probR;
+ } else if (score == bestscore && probL + probR > bestprob_with_score) {
+ debug3(printf("Improved prob: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
+ cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
+ debug3(printf("probL %f, probR %f\n",left_probabilities[cL],right_probabilities[cR]));
+ *bestrL = rL;
+ *bestrR = rR;
+ *bestcL = cL;
+ *bestcR = cR;
+ bestprob_with_score = probL + probR;
} else {
- probR_trunc = probR;
+ debug3a(printf("Not best score: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
+ cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
+ }
+
+ /* Perform only without indels */
+ if (scoreI > 0) {
+ debug3a(printf("At %d left to %d right, scoreI is %d and prob is %f + %f = %f\n",
+ cL,cR,scoreI,probL,probR,probL+probR));
+ if (probL + probR > bestprob_with_dinucl) {
+ bestscore_with_dinucl = scoreL + scoreR;
+ bestcL_with_dinucl = cL;
+ bestcR_with_dinucl = cR;
+ bestrL_with_dinucl = rL;
+ bestrR_with_dinucl = rR;
+ bestprob_with_dinucl = probL + probR;
+ }
}
- scoreR = (int) matrixR[cR][rR];
debug3(printf("B. Test indel on right\n"));
/* Test indel on right */
cL = rL;
probL = left_probabilities[cL];
- if (probL > PROB_CEILING) {
- probL_trunc = PROB_CEILING;
- } else if (probL < PROB_FLOOR) {
- probL_trunc = PROB_FLOOR;
- } else {
- probL_trunc = probL;
- }
scoreL = (int) matrixL[cL][rL];
if (directionsL_nogap[cL][rL] != DIAG) {
/* Favor gaps away from intron if possible */
@@ -2929,13 +2557,6 @@ bridge_intron_gap_site_level (int *bestrL, int *bestrR, int *bestcL, int *bestcR
/* Disallow leftoffset + cL >= rightoffset - cR, or cR >= rightoffset - leftoffset - cL */
for (cR = cloR; cR < chighR && cR < rightoffset-leftoffset-cL; cR++) {
probR = right_probabilities[cR];
- if (probR > PROB_CEILING) {
- probR_trunc = PROB_CEILING;
- } else if (probR < PROB_FLOOR) {
- probR_trunc = PROB_FLOOR;
- } else {
- probR_trunc = probR;
- }
scoreR = (int) matrixR[cR][rR];
if (directionsR_nogap[cR][rR] != DIAG) {
/* Favor gaps away from intron if possible */
@@ -2948,7 +2569,7 @@ bridge_intron_gap_site_level (int *bestrL, int *bestrR, int *bestcL, int *bestcR
scoreI = 0;
#endif
- if ((score = scoreL + scoreI + scoreR) > bestscore) {
+ if ((score = scoreL + scoreR) > bestscore) {
debug3(printf("Best score: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
debug3(printf("probL %f, probR %f\n",left_probabilities[cL],right_probabilities[cR]));
@@ -2971,51 +2592,12 @@ bridge_intron_gap_site_level (int *bestrL, int *bestrR, int *bestcL, int *bestcR
debug3a(printf("Not best score: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
}
-
- if (probL_trunc + probR_trunc < bestprob_trunc) {
- debug3a(printf("At %d left to %d right, prob is %f + %f = %f\n",
- cL,cR,probL_trunc,probR_trunc,probL_trunc+probR_trunc));
-
- } else if (probL_trunc + probR_trunc == bestprob_trunc) {
- debug3(printf("At %d left to %d right, prob is %f + %f = %f\n",
- cL,cR,probL_trunc,probR_trunc,probL_trunc+probR_trunc));
-
- if (scoreL + scoreI + scoreR > bestscore_with_suffprob) {
- debug3(printf(" (bestscore %d)\n",scoreL+scoreR));
- bestprob_trunc = probL_trunc + probR_trunc;
- bestcL_with_prob = cL;
- bestcR_with_prob = cR;
- bestrL_with_prob = rL;
- bestrR_with_prob = rR;
- bestscore_with_suffprob = scoreL + scoreI + scoreR;
- }
-
- } else {
- /* probL_trunc + probR_trunc > bestprob_trunc */
- debug3(printf("At %d left to %d right, prob is %f + %f = %f\n",
- cL,cR,probL_trunc,probR_trunc,probL_trunc+probR_trunc));
-
- debug3(printf(" (bestscore %d)\n",scoreL+scoreR));
- bestprob_trunc = probL_trunc + probR_trunc;
- bestcL_with_prob = cL;
- bestcR_with_prob = cR;
- bestrL_with_prob = rL;
- bestrR_with_prob = rR;
- bestscore_with_suffprob = scoreL + scoreI + scoreR;
- }
}
debug3(printf("C. Test indel on left\n"));
/* Test indel on left */
cR = rR;
probR = right_probabilities[cR];
- if (probR > PROB_CEILING) {
- probR_trunc = PROB_CEILING;
- } else if (probR < PROB_FLOOR) {
- probR_trunc = PROB_FLOOR;
- } else {
- probR_trunc = probR;
- }
scoreR = (int) matrixR[cR][rR];
if (directionsR_nogap[cR][rR] != DIAG) {
/* Favor gaps away from intron if possible */
@@ -3025,13 +2607,6 @@ bridge_intron_gap_site_level (int *bestrL, int *bestrR, int *bestcL, int *bestcR
/* Disallow leftoffset + cL >= rightoffset - cR, or cR >= rightoffset - leftoffset - cL */
for (cL = cloL; cL < chighL && cL < rightoffset-leftoffset-cR; cL++) {
probL = left_probabilities[cL];
- if (probL > PROB_CEILING) {
- probL_trunc = PROB_CEILING;
- } else if (probL < PROB_FLOOR) {
- probL_trunc = PROB_FLOOR;
- } else {
- probL_trunc = probL;
- }
scoreL = (int) matrixL[cL][rL];
if (directionsL_nogap[cL][rL] != DIAG) {
/* Favor gaps away from intron if possible */
@@ -3044,7 +2619,7 @@ bridge_intron_gap_site_level (int *bestrL, int *bestrR, int *bestcL, int *bestcR
scoreI = 0;
#endif
- if ((score = scoreL + scoreI + scoreR) > bestscore) {
+ if ((score = scoreL + scoreR) > bestscore) {
debug3(printf("Best score: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
debug3(printf("probL %f, probR %f\n",left_probabilities[cL],right_probabilities[cR]));
@@ -3067,66 +2642,38 @@ bridge_intron_gap_site_level (int *bestrL, int *bestrR, int *bestcL, int *bestcR
debug3a(printf("Not best score: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
}
-
- if (probL_trunc + probR_trunc < bestprob_trunc) {
- debug3a(printf("At %d left to %d right, prob is %f + %f = %f\n",
- cL,cR,probL_trunc,probR_trunc,probL_trunc+probR_trunc));
-
- } else if (probL_trunc + probR_trunc == bestprob_trunc) {
- debug3(printf("At %d left to %d right, prob is %f + %f = %f\n",
- cL,cR,probL_trunc,probR_trunc,probL_trunc+probR_trunc));
-
- if (scoreL + scoreI + scoreR > bestscore_with_suffprob) {
- debug3(printf(" (bestscore %d)\n",scoreL+scoreR));
- bestprob_trunc = probL_trunc + probR_trunc;
- bestcL_with_prob = cL;
- bestcR_with_prob = cR;
- bestrL_with_prob = rL;
- bestrR_with_prob = rR;
- bestscore_with_suffprob = scoreL + scoreI + scoreR;
- }
-
- } else {
- /* probL_trunc + probR_trunc > bestprob_trunc */
- debug3(printf("At %d left to %d right, prob is %f + %f = %f\n",
- cL,cR,probL_trunc,probR_trunc,probL_trunc+probR_trunc));
-
- debug3(printf(" (bestscore %d)\n",scoreL+scoreR));
- bestprob_trunc = probL_trunc + probR_trunc;
- bestcL_with_prob = cL;
- bestcR_with_prob = cR;
- bestrL_with_prob = rL;
- bestrR_with_prob = rR;
- bestscore_with_suffprob = scoreL + scoreI + scoreR;
- }
}
}
if (bestprob_with_score > 2*PROB_CEILING) {
/* Probability is good with best alignment, so take that */
debug(printf("Best alignment based on score alone has good probability\n"));
- use_prob_p = false; /* was previously true (bug) */
- } else if (left_probabilities[bestcL_with_prob] < PROB_CEILING && right_probabilities[bestcR_with_prob] < PROB_CEILING) {
- /* Probability-based solution is bad, so use alignment */
- debug(printf("Probability-based solution is bad on both sites\n"));
- use_prob_p = false;
- } else if (bestscore_with_suffprob < 0 || bestscore_with_suffprob < bestscore - 9) {
- debug(printf("Probability-based solution requires very bad alignment\n"));
- use_prob_p = false;
+ use_dinucl_p = false; /* was previously true (bug) */
+ } else if (bestprob_with_dinucl == 0.0) {
+ debug(printf("No dinucleotides found\n"));
+ use_dinucl_p = false; /* was previously true (bug) */
+ } else if (0 && left_probabilities[bestcL_with_dinucl] < PROB_CEILING && right_probabilities[bestcR_with_dinucl] < PROB_CEILING) {
+ /* Dinucleotide-based solution is bad, so use alignment */
+ debug(printf("Dinucleotide-based solution is bad on both sites\n"));
+ use_dinucl_p = false;
+ } else if (bestscore_with_dinucl < 0 || bestscore_with_dinucl < bestscore - 9) {
+ debug(printf("Dinucleotide-based solution requires very bad alignment\n"));
+ use_dinucl_p = false;
} else {
- use_prob_p = true;
+ use_dinucl_p = true;
}
- if (use_prob_p == true) {
- /* Best alignment yields bad probability, and probability-based alignment yields good probability, so switch */
- debug(printf("Switch to probability-based solution\n"));
- debug(printf("Non-SIMD. bestscore %d (bestprob_with_score %f) vs bestscore_with_suffprob %d (bestprob_trunc %f, actually %f and %f)\n",
- bestscore,bestprob_with_score,bestscore_with_suffprob,bestprob_trunc,left_probabilities[bestcL_with_prob],right_probabilities[bestcR_with_prob]));
- *bestcL = bestcL_with_prob;
- *bestcR = bestcR_with_prob;
- *bestrL = bestrL_with_prob;
- *bestrR = bestrR_with_prob;
- bestscore = bestscore_with_suffprob;
+ debug(printf("Non-SIMD. bestscore %d (bestprob_with_score %f)\n",bestscore,bestprob_with_score));
+ if (use_dinucl_p == true) {
+ debug(printf("Non-SIMD. bestscore %d (bestprob_with_score %f) vs bestscore_with_dinucl %d (bestprob_with_dinucl %f and %f)\n",
+ bestscore,bestprob_with_score,bestscore_with_dinucl,left_probabilities[bestcL_with_dinucl],right_probabilities[bestcR_with_dinucl]));
+ /* Best alignment yields bad probability, and dinucleotide-based alignment yields good probability, so switch */
+ debug(printf("Switch to dinucleotide-based solution\n"));
+ *bestcL = bestcL_with_dinucl;
+ *bestcR = bestcR_with_dinucl;
+ *bestrL = bestrL_with_dinucl;
+ *bestrR = bestrR_with_dinucl;
+ bestscore = bestscore_with_dinucl;
}
@@ -3867,6 +3414,7 @@ Dynprog_genome_gap (int *dynprogindex, int *finalscore, int *new_leftgenomepos,
debug3(printf("maxnegscore = %d\n",Pair_maxnegscore(pairs)));
if (Pair_maxnegscore(pairs) < -10) {
+ *finalscore = -100; /* Otherwise calling procedure will act on finalscore */
return (List_T) NULL;
} else {
return List_reverse(pairs);
@@ -4001,6 +3549,7 @@ Dynprog_genome_gap (int *dynprogindex, int *finalscore, int *new_leftgenomepos,
debug3(Pair_dump_list(pairs,true));
debug3(printf("maxnegscore = %d\n",Pair_maxnegscore(pairs)));
if (Pair_maxnegscore(pairs) < -10) {
+ *finalscore = -100; /* Otherwise calling procedure will act on finalscore */
return (List_T) NULL;
} else {
return List_reverse(pairs);
@@ -4100,6 +3649,7 @@ Dynprog_genome_gap (int *dynprogindex, int *finalscore, int *new_leftgenomepos,
debug3(Pair_dump_list(pairs,true));
debug3(printf("maxnegscore = %d\n",Pair_maxnegscore(pairs)));
if (Pair_maxnegscore(pairs) < -10) {
+ *finalscore = -100; /* Otherwise calling procedure will act on finalscore */
return (List_T) NULL;
} else {
return List_reverse(pairs);
diff --git a/src/pair.c b/src/pair.c
index 894d05f..ae3ddda 100644
--- a/src/pair.c
+++ b/src/pair.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: pair.c 196403 2016-08-16 14:33:56Z twu $";
+static char rcsid[] = "$Id: pair.c 198279 2016-09-24 00:54:22Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -1551,7 +1551,7 @@ Pair_dump_one (T this, bool zerobasedp) {
/* Useful for debugging */
void
Pair_dump_list (List_T pairs, bool zerobasedp) {
- T this;
+ T this, prev = NULL, old = NULL;
List_T p;
printf("***Start of list***\n");
@@ -1559,6 +1559,26 @@ Pair_dump_list (List_T pairs, bool zerobasedp) {
this = List_head(p);
Pair_dump_one(this,zerobasedp);
printf("\n");
+
+ if (this->querypos != -1) {
+ if (old != NULL) {
+ if (old->querypos > prev->querypos) {
+ if (prev->querypos < this->querypos) {
+ fprintf(stderr,"%d %d %d\n",old->querypos,prev->querypos,this->querypos);
+ abort();
+ }
+ } else if (old->querypos < prev->querypos) {
+ if (prev->querypos > this->querypos) {
+ fprintf(stderr,"%d %d %d\n",old->querypos,prev->querypos,this->querypos);
+ abort();
+ }
+ }
+ }
+
+ old = prev;
+ prev = this;
+ }
+
}
printf("***End of list***\n");
return;
@@ -5077,8 +5097,8 @@ print_sam_line (Filestring_T fp, char *abbrev, bool first_read_p, char *acc1, ch
FPRINTF(fp,"\tXO:Z:%s",abbrev);
/* 12. TAGS: XS */
-#if 0
- /* Previously used for GSNAP */
+#ifdef GSNAP
+ /* Use mate sensedir, to be consistent with samprint method */
if (sensedir == SENSE_NULL) {
sensedir = mate_sensedir;
}
@@ -5101,12 +5121,13 @@ print_sam_line (Filestring_T fp, char *abbrev, bool first_read_p, char *acc1, ch
} else if (intronp == false) {
/* Skip. No intron in this end and mate is not revealing. */
+#if 0
} else if (force_xs_direction_p == true) {
+ /* Don't print XS field for SENSE_NULL */
/* Could not determine sense, so just report arbitrarily as + */
/* This option provided for users of Cufflinks, which cannot handle XS:A:? */
FPRINTF(fp,"\tXS:A:+");
-#if 0
} else {
/* Non-canonical. Don't report. */
FPRINTF(fp,"\tXS:A:?");
diff --git a/src/samprint.c b/src/samprint.c
index 6576b24..1fe4f71 100644
--- a/src/samprint.c
+++ b/src/samprint.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: samprint.c 196273 2016-08-12 15:15:06Z twu $";
+static char rcsid[] = "$Id: samprint.c 197889 2016-09-15 23:19:09Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -1207,9 +1207,15 @@ print_substrings (Filestring_T fp, char *abbrev, Stage3end_T stage3end, Stage3en
querylength = Shortread_fulllength(queryseq);
plusp = Stage3end_plusp(stage3end);
+
+#if 1
if ((sensedir = Stage3end_sensedir(stage3end)) == SENSE_NULL && mate != NULL) {
sensedir = Stage3end_sensedir(mate);
}
+#else
+ /* If we use this, we need to change code in pair.c also */
+ sensedir = Stage3end_sensedir(stage3end);
+#endif
/* sensep = (sensedir == SENSE_ANTI) ? false : true; */
/* 1. QNAME */
@@ -2054,6 +2060,7 @@ print_substrings (Filestring_T fp, char *abbrev, Stage3end_T stage3end, Stage3en
#endif
}
+
/* 12. TAGS: XA */
if (prevp == NULL) {
/* substringL = (Substring_T) NULL; */
@@ -3237,7 +3244,7 @@ print_exon_exon (Filestring_T fp, char *abbrev, Stage3end_T this, Stage3end_T ma
/* Code taken from that for XS tag for print_halfdonor and print_halfacceptor */
/* For the donor and acceptor strands, use the substring sensedir and not the Stage3end_T sensedir */
- if ((donor_sensedir = Substring_chimera_sensedir(donor)) == SENSE_FORWARD) {
+ if ((donor_sensedir = Substring_sensedir(donor)) == SENSE_FORWARD) {
if (Substring_plusp(donor) == true) {
donor_strand = '+';
} else {
@@ -3253,7 +3260,7 @@ print_exon_exon (Filestring_T fp, char *abbrev, Stage3end_T this, Stage3end_T ma
abort();
}
- if ((acceptor_sensedir = Substring_chimera_sensedir(acceptor)) == SENSE_FORWARD) {
+ if ((acceptor_sensedir = Substring_sensedir(acceptor)) == SENSE_FORWARD) {
if (Substring_plusp(acceptor) == true) {
acceptor_strand = '+';
} else {
diff --git a/src/splice.c b/src/splice.c
index 59876c4..1bc5628 100644
--- a/src/splice.c
+++ b/src/splice.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: splice.c 197773 2016-09-14 00:39:12Z twu $";
+static char rcsid[] = "$Id: splice.c 197917 2016-09-16 13:39:50Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -2499,15 +2499,16 @@ Splice_group_by_segmentj (int *found_score, List_T localsplicing, List_T *ambigu
/* Note: If substring does not extend to ends of query, then region
beyond querystart and queryend might actually be matching, and not
mismatches. Could fix in the future. */
-void
+int
Splice_trim_novel_spliceends (int *ambig_end_length_5, int *ambig_end_length_3,
Splicetype_T *ambig_splicetype_5, Splicetype_T *ambig_splicetype_3,
- double *ambig_prob_5, double *ambig_prob_3, int *sensedir,
+ double *ambig_prob_5, double *ambig_prob_3, int orig_sensedir,
Univcoord_T start5, Univcoord_T middle5, Univcoord_T end5, bool solve5p,
Univcoord_T start3, Univcoord_T middle3, Univcoord_T end3, bool solve3p,
Univcoord_T genomicstart5, Univcoord_T genomicend3,
Univcoord_T chroffset, bool plusp) {
+ int new_sensedir;
Univcoord_T genomicpos, start_genomicpos, middle_genomicpos, end_genomicpos;
Univcoord_T splice_genomepos_5, splice_genomepos_3, splice_genomepos_5_mm, splice_genomepos_3_mm;
double donor_prob, acceptor_prob;
@@ -2521,7 +2522,7 @@ Splice_trim_novel_spliceends (int *ambig_end_length_5, int *ambig_end_length_3,
int splice_sensedir_5, splice_sensedir_3, splice_sensedir_5_mm, splice_sensedir_3_mm;
- debug13(printf("\nEntered Splice_trim_novel_spliceends with sensedir %d\n",*sensedir));
+ debug13(printf("\nEntered Splice_trim_novel_spliceends with orig_sensedir %d\n",orig_sensedir));
*ambig_end_length_5 = 0;
*ambig_end_length_3 = 0;
*ambig_prob_5 = 0.0;
@@ -2554,9 +2555,11 @@ Splice_trim_novel_spliceends (int *ambig_end_length_5, int *ambig_end_length_3,
}
#endif
+ new_sensedir = SENSE_NULL;
+
if (solve3p == false) {
/* Skip 3' end */
- } else if (*sensedir == SENSE_FORWARD) {
+ } else if (orig_sensedir == SENSE_FORWARD) {
if (plusp) {
splicetype3 = splicetype3_mm = DONOR;
@@ -2616,7 +2619,7 @@ Splice_trim_novel_spliceends (int *ambig_end_length_5, int *ambig_end_length_3,
debug13(printf("\n"));
}
- } else if (*sensedir == SENSE_ANTI) {
+ } else if (orig_sensedir == SENSE_ANTI) {
if (plusp) {
splicetype3 = splicetype3_mm = ANTIACCEPTOR;
@@ -2803,7 +2806,7 @@ Splice_trim_novel_spliceends (int *ambig_end_length_5, int *ambig_end_length_3,
if (solve3p == false) {
/* Skip 3' end */
- } else if (*sensedir != SENSE_NULL) {
+ } else if (orig_sensedir != SENSE_NULL) {
if (max_prob_3 > END_SPLICESITE_PROB_MATCH) {
debug13(printf("Found good splice %s on 3' end at %u with probability %f\n",
Splicetype_string(splicetype3),splice_genomepos_3-chroffset,max_prob_3));
@@ -2862,7 +2865,7 @@ Splice_trim_novel_spliceends (int *ambig_end_length_5, int *ambig_end_length_3,
if (solve5p == false) {
/* Skip 5' end */
- } else if (*sensedir == SENSE_FORWARD) {
+ } else if (orig_sensedir == SENSE_FORWARD) {
if (plusp) {
splicetype5 = splicetype5_mm = ACCEPTOR;
@@ -2922,7 +2925,7 @@ Splice_trim_novel_spliceends (int *ambig_end_length_5, int *ambig_end_length_3,
debug13(printf("\n"));
}
- } else if (*sensedir == SENSE_ANTI) {
+ } else if (orig_sensedir == SENSE_ANTI) {
if (plusp) {
splicetype5 = splicetype5_mm = ANTIDONOR;
@@ -3109,7 +3112,7 @@ Splice_trim_novel_spliceends (int *ambig_end_length_5, int *ambig_end_length_3,
if (solve5p == false) {
/* Skip 5' end */
- } else if (*sensedir != SENSE_NULL) {
+ } else if (orig_sensedir != SENSE_NULL) {
if (max_prob_5 > END_SPLICESITE_PROB_MATCH) {
debug13(printf("Found good splice %s on 5' end at %u with probability %f\n",
Splicetype_string(splicetype5),splice_genomepos_5-chroffset,max_prob_5));
@@ -3137,7 +3140,7 @@ Splice_trim_novel_spliceends (int *ambig_end_length_5, int *ambig_end_length_3,
}
}
- if (*sensedir == SENSE_NULL) {
+ if (orig_sensedir == SENSE_NULL) {
if (max_prob_3 >= END_SPLICESITE_PROB_MATCH || max_prob_5 >= END_SPLICESITE_PROB_MATCH) {
if (max_prob_3 >= END_SPLICESITE_PROB_MATCH && max_prob_5 >= END_SPLICESITE_PROB_MATCH
&& max_prob_sense_forward_3 >= END_SPLICESITE_PROB_MATCH && max_prob_sense_anti_3 < END_SPLICESITE_PROB_MATCH
@@ -3156,7 +3159,7 @@ Splice_trim_novel_spliceends (int *ambig_end_length_5, int *ambig_end_length_3,
*ambig_splicetype_5 = splicetype5;
*ambig_prob_5 = max_prob_5;
debug13(printf("Set ambig_end_length_5 to be %d\n",*ambig_end_length_5));
- *sensedir = SENSE_FORWARD; /* = splice_sensedir_3 */
+ new_sensedir = SENSE_FORWARD; /* = splice_sensedir_3 */
} else if (max_prob_3 >= END_SPLICESITE_PROB_MATCH && max_prob_5 >= END_SPLICESITE_PROB_MATCH
&& max_prob_sense_anti_3 >= END_SPLICESITE_PROB_MATCH && max_prob_sense_forward_3 < END_SPLICESITE_PROB_MATCH
@@ -3175,7 +3178,7 @@ Splice_trim_novel_spliceends (int *ambig_end_length_5, int *ambig_end_length_3,
*ambig_splicetype_5 = splicetype5;
*ambig_prob_5 = max_prob_5;
debug13(printf("Set ambig_end_length_5 to be %d\n",*ambig_end_length_5));
- *sensedir = SENSE_ANTI; /* = splice_sensedir_3 */
+ new_sensedir = SENSE_ANTI; /* = splice_sensedir_3 */
} else if (max_prob_3 > max_prob_5) {
/* Consider just 3' end */
@@ -3192,10 +3195,10 @@ Splice_trim_novel_spliceends (int *ambig_end_length_5, int *ambig_end_length_3,
debug13(printf("Set ambig_end_length_3 to be %d\n",*ambig_end_length_3));
if (max_prob_sense_forward_3 >= END_SPLICESITE_PROB_MATCH && max_prob_sense_anti_3 < END_SPLICESITE_PROB_MATCH
&& max_prob_sense_anti_5 < END_SPLICESITE_PROB_MATCH) {
- *sensedir = splice_sensedir_3;
+ new_sensedir = splice_sensedir_3;
} else if (max_prob_sense_anti_3 >= END_SPLICESITE_PROB_MATCH && max_prob_sense_forward_3 < END_SPLICESITE_PROB_MATCH
&& max_prob_sense_forward_5 < END_SPLICESITE_PROB_MATCH) {
- *sensedir = splice_sensedir_3;
+ new_sensedir = splice_sensedir_3;
} else {
/* Not enough evidence to set sensedir */
}
@@ -3216,10 +3219,10 @@ Splice_trim_novel_spliceends (int *ambig_end_length_5, int *ambig_end_length_3,
/* *cdna_direction = splice_cdna_direction_5; */
if (max_prob_sense_forward_5 >= END_SPLICESITE_PROB_MATCH && max_prob_sense_anti_5 < END_SPLICESITE_PROB_MATCH
&& max_prob_sense_anti_3 < END_SPLICESITE_PROB_MATCH) {
- *sensedir = splice_sensedir_5;
+ new_sensedir = splice_sensedir_5;
} else if (max_prob_sense_anti_5 >= END_SPLICESITE_PROB_MATCH && max_prob_sense_forward_5 < END_SPLICESITE_PROB_MATCH
&& max_prob_sense_forward_3 < END_SPLICESITE_PROB_MATCH) {
- *sensedir = splice_sensedir_5;
+ new_sensedir = splice_sensedir_5;
} else {
/* Not enough evidence to set sensedir */
}
@@ -3241,10 +3244,10 @@ Splice_trim_novel_spliceends (int *ambig_end_length_5, int *ambig_end_length_3,
/* *cdna_direction = splice_cdna_direction_3_mm; */
if (max_prob_sense_forward_3_mm >= END_SPLICESITE_PROB_MISMATCH && max_prob_sense_anti_3_mm < END_SPLICESITE_PROB_MISMATCH
&& max_prob_sense_anti_5_mm < END_SPLICESITE_PROB_MISMATCH) {
- *sensedir = splice_sensedir_3_mm;
+ new_sensedir = splice_sensedir_3_mm;
} else if (max_prob_sense_anti_3_mm >= END_SPLICESITE_PROB_MISMATCH && max_prob_sense_forward_3_mm < END_SPLICESITE_PROB_MISMATCH
&& max_prob_sense_forward_5_mm < END_SPLICESITE_PROB_MISMATCH) {
- *sensedir = splice_sensedir_3_mm;
+ new_sensedir = splice_sensedir_3_mm;
} else {
/* Not enough evidence to set sensedir */
}
@@ -3263,10 +3266,10 @@ Splice_trim_novel_spliceends (int *ambig_end_length_5, int *ambig_end_length_3,
/* *cdna_direction = splice_cdna_direction_5_mm; */
if (max_prob_sense_forward_5_mm >= END_SPLICESITE_PROB_MISMATCH && max_prob_sense_anti_5_mm < END_SPLICESITE_PROB_MISMATCH
&& max_prob_sense_anti_3_mm < END_SPLICESITE_PROB_MISMATCH) {
- *sensedir = splice_sensedir_5_mm;
+ new_sensedir = splice_sensedir_5_mm;
} else if (max_prob_sense_anti_5_mm >= END_SPLICESITE_PROB_MISMATCH && max_prob_sense_forward_5_mm < END_SPLICESITE_PROB_MISMATCH
&& max_prob_sense_forward_3_mm < END_SPLICESITE_PROB_MISMATCH) {
- *sensedir = splice_sensedir_5_mm;
+ new_sensedir = splice_sensedir_5_mm;
} else {
/* Not enough evidence to set sensedir */
}
@@ -3274,8 +3277,8 @@ Splice_trim_novel_spliceends (int *ambig_end_length_5, int *ambig_end_length_3,
}
}
- debug13(printf("Returning ambig_end_length_5 %d and ambig_end_length_3 %d, probs %f and %f\n",
- *ambig_end_length_5,*ambig_end_length_3,*ambig_prob_5,*ambig_prob_3));
- return;
+ debug13(printf("Returning ambig_end_length_5 %d and ambig_end_length_3 %d, probs %f and %f, new_sensedir %d\n",
+ *ambig_end_length_5,*ambig_end_length_3,*ambig_prob_5,*ambig_prob_3,new_sensedir));
+ return new_sensedir;
}
diff --git a/src/splice.h b/src/splice.h
index 8ab3631..9221e38 100644
--- a/src/splice.h
+++ b/src/splice.h
@@ -1,4 +1,4 @@
-/* $Id: splice.h 197773 2016-09-14 00:39:12Z twu $ */
+/* $Id: splice.h 197917 2016-09-16 13:39:50Z twu $ */
#ifndef SPLICE_INCLUDED
#define SPLICE_INCLUDED
#include "bool.h"
@@ -95,10 +95,10 @@ extern List_T
Splice_group_by_segmentj (int *found_score, List_T localsplicing, List_T *ambiguous,
int querylength, bool first_read_p, bool sarrayp);
-extern void
+extern int
Splice_trim_novel_spliceends (int *ambig_end_length_5, int *ambig_end_length_3,
Splicetype_T *ambig_splicetype_5, Splicetype_T *ambig_splicetype_3,
- double *ambig_prob_5, double *ambig_prob_3, int *sensedir,
+ double *ambig_prob_5, double *ambig_prob_3, int orig_sensedir,
Univcoord_T start5, Univcoord_T middle5, Univcoord_T end5, bool solve5p,
Univcoord_T start3, Univcoord_T middle3, Univcoord_T end3, bool solve3p,
Univcoord_T genomicstart5, Univcoord_T genomicend3,
diff --git a/src/stage1.c b/src/stage1.c
index abd678d..3030ba1 100644
--- a/src/stage1.c
+++ b/src/stage1.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: stage1.c 184461 2016-02-18 00:07:47Z twu $";
+static char rcsid[] = "$Id: stage1.c 198280 2016-09-24 00:54:40Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -3602,7 +3602,7 @@ Stage1_compute (bool *lowidentityp, Sequence_T queryuc, Indexdb_T indexdb_fwd, I
#endif
maxentries = MAXENTRIES;
- debug(printf("Finding first pair with matchsize = %d, maxentries = %d\n",matchsize,maxentries));
+ debug(printf("Finding first pair with maxentries = %d\n",maxentries));
this = Stage1_new(queryuc,maxtotallen,maxentries);
read_oligos(this,queryuc);
@@ -3907,7 +3907,7 @@ Stage1_compute_nonstranded (bool *lowidentityp, Sequence_T queryuc,
debug(fprintf(stderr,"trimlength = %d, maxtotallen = %d\n",trimlength,maxtotallen));
/* Scan ends (find first pair and stutter) */
- debug(printf("Finding first pair, fwd, with matchsize = %d, maxentries = %d\n",matchsize,maxentries));
+ debug(printf("Finding first pair, fwd, with maxentries = %d\n",maxentries));
this_fwd = Stage1_new(queryuc,maxtotallen,maxentries);
read_oligos(this_fwd,queryuc);
@@ -3962,7 +3962,7 @@ Stage1_compute_nonstranded (bool *lowidentityp, Sequence_T queryuc,
debug(fprintf(stderr,"trimlength = %d, maxtotallen = %d\n",trimlength,maxtotallen));
/* Scan ends (find first pair and stutter) */
- debug(printf("Finding first pair, rev, with matchsize = %d, maxentries = %d\n",matchsize,maxentries));
+ debug(printf("Finding first pair, rev, with maxentries = %d\n",maxentries));
this_rev = Stage1_new(queryrc,maxtotallen,maxentries);
read_oligos(this_rev,queryrc);
diff --git a/src/stage1hr.c b/src/stage1hr.c
index a9b5350..d5019a3 100644
--- a/src/stage1hr.c
+++ b/src/stage1hr.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: stage1hr.c 197776 2016-09-14 00:42:27Z twu $";
+static char rcsid[] = "$Id: stage1hr.c 198074 2016-09-21 00:25:02Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -14912,7 +14912,7 @@ run_gmap_for_region (bool *good_start_p, bool *good_end_p, History_T gmap_histor
/*left*/start,/*genomiclength*/end - start + 1,
/*plusp*/watsonp,genestrand,
accession,querylength,chrnum,chroffset,chrhigh,chrlength,
- /*cdna_direction*/+1,/*sensedir*/SENSE_FORWARD,
+ /*cdna_direction*/+1,/*sensedir*/SENSE_FORWARD,/*sensedir_knownp*/false,
/*gmap_source*/GMAP_VIA_REGION)) == NULL) {
debug13(printf("Stage3end_new_gmap returns NULL\n"));
stored_hits = List_push(stored_hits,(void *) NULL);
@@ -14944,7 +14944,7 @@ run_gmap_for_region (bool *good_start_p, bool *good_end_p, History_T gmap_histor
/*left*/end,/*genomiclength*/start - end + 1,
/*plusp*/watsonp,genestrand,
accession,querylength,chrnum,chroffset,chrhigh,chrlength,
- /*cdna_direction*/+1,/*sensedir*/SENSE_FORWARD,
+ /*cdna_direction*/+1,/*sensedir*/SENSE_FORWARD,/*sensedir_knownp*/false,
/*gmap_source*/GMAP_VIA_REGION)) == NULL) {
debug13(printf("Stage3end_new_gmap returns NULL\n"));
stored_hits = List_push(stored_hits,(void *) NULL);
@@ -14983,7 +14983,7 @@ run_gmap_for_region (bool *good_start_p, bool *good_end_p, History_T gmap_histor
/*left*/start,/*genomiclength*/end - start + 1,
/*plusp*/watsonp,genestrand,
accession,querylength,chrnum,chroffset,chrhigh,chrlength,
- /*cdna_direction*/-1,/*sensedir*/SENSE_ANTI,
+ /*cdna_direction*/-1,/*sensedir*/SENSE_ANTI,/*sensedir_knownp*/false,
/*gmap_source*/GMAP_VIA_REGION)) == NULL) {
debug13(printf("Stage3end_new_gmap returns NULL\n"));
stored_hits = List_push(stored_hits,(void *) NULL);
@@ -15015,7 +15015,7 @@ run_gmap_for_region (bool *good_start_p, bool *good_end_p, History_T gmap_histor
/*left*/end,/*genomiclength*/start - end + 1,
/*plusp*/watsonp,genestrand,
accession,querylength,chrnum,chroffset,chrhigh,chrlength,
- /*cdna_direction*/-1,/*sensedir*/SENSE_ANTI,
+ /*cdna_direction*/-1,/*sensedir*/SENSE_ANTI,/*sensedir_knownp*/false,
/*gmap_source*/GMAP_VIA_REGION)) == NULL) {
debug13(printf("Stage3end_new_gmap returns NULL\n"));
stored_hits = List_push(stored_hits,(void *) NULL);
@@ -15056,7 +15056,8 @@ run_gmap_for_region (bool *good_start_p, bool *good_end_p, History_T gmap_histor
/*left*/start,/*genomiclength*/end - start + 1,
/*plusp*/watsonp,genestrand,
accession,querylength,chrnum,chroffset,chrhigh,chrlength,
- cdna_direction,sensedir,/*gmap_source*/GMAP_VIA_REGION)) == NULL) {
+ cdna_direction,sensedir,/*sensedir_knownp*/true,
+ /*gmap_source*/GMAP_VIA_REGION)) == NULL) {
debug13(printf("Stage3end_new_gmap returns NULL\n"));
stored_hits = List_push(stored_hits,(void *) NULL);
FREE_OUT(pairarray1);
@@ -15087,7 +15088,8 @@ run_gmap_for_region (bool *good_start_p, bool *good_end_p, History_T gmap_histor
/*left*/end,/*genomiclength*/start - end + 1,
/*plusp*/watsonp,genestrand,
accession,querylength,chrnum,chroffset,chrhigh,chrlength,
- cdna_direction,sensedir,/*gmap_source*/GMAP_VIA_REGION)) == NULL) {
+ cdna_direction,sensedir,/*sensedir_knownp*/true,
+ /*gmap_source*/GMAP_VIA_REGION)) == NULL) {
debug13(printf("Stage3end_new_gmap returns NULL\n"));
stored_hits = List_push(stored_hits,(void *) NULL);
FREE_OUT(pairarray1);
@@ -16422,7 +16424,7 @@ convert_plus_segments_to_gmap (List_T hits, char *queryuc_ptr, int querylength,
/*left*/start,/*genomiclength*/end - start + 1,
/*plusp*/true,genestrand,
/*accession*/NULL,querylength,chrnum,chroffset,chrhigh,chrlength,
- /*cdna_direction*/+1,/*sensedir*/SENSE_FORWARD,
+ /*cdna_direction*/+1,/*sensedir*/SENSE_FORWARD,/*sensedir_knownp*/false,
/*gmap_source*/GMAP_VIA_SEGMENTS)) == NULL) {
FREE_OUT(pairarray1);
@@ -16445,7 +16447,7 @@ convert_plus_segments_to_gmap (List_T hits, char *queryuc_ptr, int querylength,
/*left*/start,/*genomiclength*/end - start + 1,
/*plusp*/true,genestrand,
/*accession*/NULL,querylength,chrnum,chroffset,chrhigh,chrlength,
- /*cdna_direction*/-1,/*sensedir*/SENSE_ANTI,
+ /*cdna_direction*/-1,/*sensedir*/SENSE_ANTI,/*sensedir_knownp*/false,
/*gmap_source*/GMAP_VIA_SEGMENTS)) == NULL) {
FREE_OUT(pairarray2);
@@ -16469,7 +16471,8 @@ convert_plus_segments_to_gmap (List_T hits, char *queryuc_ptr, int querylength,
/*left*/start,/*genomiclength*/end - start + 1,
/*plusp*/true,genestrand,
/*accession*/NULL,querylength,chrnum,chroffset,chrhigh,chrlength,
- cdna_direction,sensedir,/*gmap_source*/GMAP_VIA_SEGMENTS)) == NULL) {
+ cdna_direction,sensedir,/*sensedir_knownp*/true,
+ /*gmap_source*/GMAP_VIA_SEGMENTS)) == NULL) {
FREE_OUT(pairarray1);
} else {
@@ -17144,7 +17147,7 @@ convert_minus_segments_to_gmap (List_T hits, char *queryuc_ptr, int querylength,
/*left*/end,/*genomiclength*/start - end + 1,
/*plusp*/false,genestrand,
/*accession*/NULL,querylength,chrnum,chroffset,chrhigh,chrlength,
- /*cdna_direction*/+1,/*sensedir*/SENSE_FORWARD,
+ /*cdna_direction*/+1,/*sensedir*/SENSE_FORWARD,/*sensedir_knownp*/false,
/*gmap_source*/GMAP_VIA_SEGMENTS)) == NULL) {
FREE_OUT(pairarray1);
} else {
@@ -17166,7 +17169,7 @@ convert_minus_segments_to_gmap (List_T hits, char *queryuc_ptr, int querylength,
/*left*/end,/*genomiclength*/start - end + 1,
/*plusp*/false,genestrand,
/*accession*/NULL,querylength,chrnum,chroffset,chrhigh,chrlength,
- /*cdna_direction*/-1,/*sensedir*/SENSE_ANTI,
+ /*cdna_direction*/-1,/*sensedir*/SENSE_ANTI,/*sensedir_knownp*/false,
/*gmap_source*/GMAP_VIA_SEGMENTS)) == NULL) {
FREE_OUT(pairarray2);
} else {
@@ -17189,7 +17192,8 @@ convert_minus_segments_to_gmap (List_T hits, char *queryuc_ptr, int querylength,
/*left*/end,/*genomiclength*/start - end + 1,
/*plusp*/false,genestrand,
/*accession*/NULL,querylength,chrnum,chroffset,chrhigh,chrlength,
- cdna_direction,sensedir,/*gmap_source*/GMAP_VIA_SEGMENTS)) == NULL) {
+ cdna_direction,sensedir,/*sensedir_knownp*/true,
+ /*gmap_source*/GMAP_VIA_SEGMENTS)) == NULL) {
FREE_OUT(pairarray1);
} else {
hits = List_push(hits,(void *) hit);
@@ -19748,7 +19752,8 @@ align_pair (bool *abort_pairing_p, int *found_score, int *cutoff_level_5, int *c
List_T hitpairs = NULL, p;
Stage3pair_T newpair;
List_T halfmapping5, halfmapping3, a;
- Stage3end_T hit5, hit3, gmap5, gmap3;
+ Stage3end_T hit5, hit3, gmap5, gmap3, hit;
+ int max_matches_5, max_matches_3;
List_T hitarray5[HITARRAY_N], hitarray3[HITARRAY_N];
Segment_T *plus_anchor_segments_5 = NULL, *minus_anchor_segments_5 = NULL,
*plus_anchor_segments_3 = NULL, *minus_anchor_segments_3 = NULL;
@@ -20627,27 +20632,65 @@ align_pair (bool *abort_pairing_p, int *found_score, int *cutoff_level_5, int *c
/* Search 6: Paired segments -> GMAP via segments */
- gmap5p = gmap3p = true;
if (gmap_segments_p == false) {
- debug(printf("gmap_segments_p is false, so setting gmap5p and gmap3p false\n"));
+ debug13(printf("gmap_segments_p is false, so setting gmap5p and gmap3p false\n"));
gmap5p = gmap3p = false;
} else if (*abort_pairing_p == true) {
- debug(printf("abort_pairing_p is true, so setting gmap5p and gmap3p false\n"));
+ debug13(printf("abort_pairing_p is true, so setting gmap5p and gmap3p false\n"));
gmap5p = gmap3p = false;
} else if (nconcordant > 0) {
/* Rely upon GMAP improvement instead */
- debug(printf("nconcordant > 0, so setting gmap5p and gmap3p false\n"));
+ debug13(printf("nconcordant > 0, so setting gmap5p and gmap3p false\n"));
gmap5p = gmap3p = false;
} else if (*found_score < trigger_score_for_gmap) {
- debug(printf("found_score %d < trigger_score_for_gmap %d, so setting gmap5p and gmap3p false\n",
- *found_score,trigger_score_for_gmap));
+ debug13(printf("found_score %d < trigger_score_for_gmap %d, so setting gmap5p and gmap3p false\n",
+ *found_score,trigger_score_for_gmap));
gmap5p = gmap3p = false;
} else if (*found_score < done_level_5 + done_level_3) {
- debug(printf("found_score %d < done_level_5 %d + done_level_3 %d, so setting gmap5p and gmap3p false\n",
- *found_score,done_level_5,done_level_3));
+ debug13(printf("found_score %d < done_level_5 %d + done_level_3 %d, so setting gmap5p and gmap3p false\n",
+ *found_score,done_level_5,done_level_3));
gmap5p = gmap3p = false;
+
+ } else {
+ max_matches_5 = 0;
+ for (p = *hits5; p != NULL; p = List_next(p)) {
+ hit = (Stage3end_T) List_head(p);
+ if (Stage3end_nmatches(hit) > max_matches_5) {
+ max_matches_5 = Stage3end_nmatches(hit);
+ }
+ }
+ debug13(printf("max_matches_5 %d\n",max_matches_5));
+
+ if (querylength5 - max_matches_5 < done_level_5) {
+ gmap5p = false;
+ } else {
+ gmap5p = true;
+ }
+
+
+ max_matches_3 = 0;
+ for (p = *hits3; p != NULL; p = List_next(p)) {
+ hit = (Stage3end_T) List_head(p);
+ if (Stage3end_nmatches(hit) > max_matches_3) {
+ max_matches_3 = Stage3end_nmatches(hit);
+ }
+ }
+ debug13(printf("max_matches_3 %d\n",max_matches_3));
+
+ if (querylength3 - max_matches_3 < done_level_3) {
+ gmap3p = false;
+ } else {
+ gmap3p = true;
+ }
+
+ if (gmap5p == false && gmap3p == false) {
+ /* If both gmap5p and gmap3 are false, then we have a concordance
+ issue. Might need to run gmap on both sides */
+ gmap5p = gmap3p = true;
+ }
}
+
if (gmap5p == true || gmap3p == true) {
debug(printf("***Trying to pair up segments***\n"));
pair_up_anchor_segments(plus_anchor_segments_5,minus_anchor_segments_5,
diff --git a/src/stage2.c b/src/stage2.c
index d1c9d63..d8f3eb8 100644
--- a/src/stage2.c
+++ b/src/stage2.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: stage2.c 189205 2016-05-06 23:15:22Z twu $";
+static char rcsid[] = "$Id: stage2.c 198275 2016-09-24 00:44:53Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -678,6 +678,138 @@ print_last_dinucl (int *last_dinucl, int genomiclength) {
#if 0
+/* Need this procedure because we are skipping some oligomers */
+static bool
+find_shifted_canonical (Chrpos_T leftpos, Chrpos_T rightpos, int querydistance,
+ Chrpos_T (*genome_left_position)(Chrpos_T, Chrpos_T, Univcoord_T, Univcoord_T, bool),
+ Chrpos_T (*genome_right_position)(Chrpos_T, Chrpos_T, Univcoord_T, Univcoord_T, bool),
+ Univcoord_T chroffset, Univcoord_T chrhigh, bool plusp, bool skip_repetitive_p) {
+ Chrpos_T leftdi, rightdi;
+ Chrpos_T last_leftpos, last_rightpos;
+ int shift, leftmiss, rightmiss;
+ Chrpos_T left_chrbound, right_chrbound;
+
+ /* leftpos = prevposition + querydistance + indexsize_nt - 1; */
+ /* rightpos = position; */
+
+ debug7(printf("Looking for shifted canonical at leftpos %u to rightpos %u, chroffset %u, chrhigh %u\n",
+ leftpos,rightpos,chroffset,chrhigh));
+
+#if 0
+ /* previously checked against genomiclength */
+ if (leftpos > genomiclength || rightpos > genomiclength) {
+ return false;
+ }
+#else
+ /* Checking just before call to genome_right_position */
+#endif
+
+ if (leftpos >= rightpos) {
+ debug7(printf("leftpos %u >= rightpos %u, so returning false\n",leftpos,rightpos));
+ return false;
+ }
+
+ if (leftpos < 103) {
+ left_chrbound = 3; /* Previously 0, but then can find splice site at beginning of segment */
+ } else {
+ left_chrbound = leftpos - 100;
+ }
+
+ if (rightpos < 103) {
+ right_chrbound = 3; /* Previously 0, but then can find splice site at beginning of segment */
+ } else {
+ right_chrbound = rightpos - 100;
+ }
+
+#if 0
+ if (skip_repetitive_p == false) {
+
+ last_leftpos = (*genome_left_position)(leftpos,left_chrbound,chroffset,chrhigh,plusp);
+ last_rightpos = (*genome_right_position)(rightpos,right_chrbound,chroffset,chrhigh,plusp);
+ debug7(printf("last_leftpos %u, last_rightpos %u\n",last_leftpos,last_rightpos));
+
+ debug7(printf("skip_repetitive_p == false, so returning %u == %u && %u == %u\n",
+ leftpos,last_leftpos,rightpos,last_rightpos));
+ return (leftpos == last_leftpos && rightpos == last_rightpos);
+ }
+#endif
+
+ /* Allow canonical to be to right of match */
+ leftpos += SHIFT_EXTRA;
+ if (leftpos > chrhigh - 3) {
+ leftpos = chrhigh - 3;
+ }
+ rightpos += SHIFT_EXTRA;
+ if (rightpos > chrhigh - 3) {
+ rightpos = chrhigh - 3;
+ }
+ debug7(printf("after shift, leftpos = %u, rightpos = %u\n",leftpos,rightpos));
+
+ shift = 0;
+ while (shift <= querydistance + SHIFT_EXTRA + SHIFT_EXTRA) {
+
+#if 0
+ if (leftpos < 0) {
+ return false;
+ } else if (rightpos < 0) {
+ /* Shouldn't need to check if leftpos >= 0 and rightpos >= leftpos, in the other two conditions) */
+ return false;
+ } else if (rightpos >= chrlength) {
+ return false;
+ }
+#endif
+ if (leftpos < 3) {
+ return false;
+ } else if (leftpos > rightpos) {
+ return false;
+ }
+
+ last_leftpos = (*genome_left_position)(leftpos,left_chrbound,chroffset,chrhigh,plusp);
+ debug7(printf("last_leftpos %u\n",last_leftpos));
+ assert(last_leftpos != 0U);
+ if ((leftdi = last_leftpos) == -1) {
+ debug7(printf("\n"));
+ return false;
+ } else {
+ leftmiss = (int) (leftpos - leftdi);
+ }
+
+ last_rightpos = (*genome_right_position)(rightpos,right_chrbound,chroffset,chrhigh,plusp);
+ debug7(printf("last_rightpos %u\n",last_rightpos));
+ assert(last_rightpos != 0U);
+ if ((rightdi = last_rightpos) == -1) {
+ debug7(printf("\n"));
+ return false;
+ } else {
+ rightmiss = (int) (rightpos - rightdi);
+ }
+
+ debug7(printf("shift %d/left %d (miss %d)/right %d (miss %d)\n",shift,leftpos,leftmiss,rightpos,rightmiss));
+ if (leftmiss == rightmiss) { /* was leftmiss == 0 && rightmiss == 0, which doesn't allow for a shift */
+ debug7(printf(" => Success at %u..%u (fwd) or %u..%u (rev)\n\n",
+ leftpos-leftmiss+/*onebasedp*/1U,rightpos-rightmiss+/*onebasedp*/1U,
+ chrhigh-chroffset-(leftpos-leftmiss),chrhigh-chroffset-(rightpos-rightmiss)));
+ return true;
+ } else if (leftmiss >= rightmiss) {
+ shift += leftmiss;
+ leftpos -= leftmiss;
+ rightpos -= leftmiss;
+ } else {
+ shift += rightmiss;
+ leftpos -= rightmiss;
+ rightpos -= rightmiss;
+ }
+ }
+
+ debug7(printf("\n"));
+ return false;
+}
+#endif
+
+
+
+
+#if 0
/* General case for ranges in score_querypos */
while (prevhit != -1 && (prevposition = mappings[prev_querypos][prevhit]) + indexsize_nt <= position) {
/* printf("fwd: prevposition %u, prevhit %d\n",prevposition,prevhit); */
diff --git a/src/stage3.c b/src/stage3.c
index 1745fec..e448541 100644
--- a/src/stage3.c
+++ b/src/stage3.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: stage3.c 197772 2016-09-14 00:38:08Z twu $";
+static char rcsid[] = "$Id: stage3.c 198281 2016-09-24 00:55:49Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -36,7 +36,7 @@ static char rcsid[] = "$Id: stage3.c 197772 2016-09-14 00:38:08Z twu $";
#include "dynprog_genome.h"
#include "dynprog_cdna.h"
#include "dynprog_end.h"
-#include "splice.h"
+#include "boyer-moore.h"
#include "maxent.h"
#include "maxent_hr.h"
#include "fastlog.h"
@@ -72,7 +72,8 @@ static char rcsid[] = "$Id: stage3.c 197772 2016-09-14 00:38:08Z twu $";
#define DYNPROGINDEX_MINOR +1
#define DUAL_BREAK_PROB_THRESHOLD 0.90
-#define MIN_STAGE2_FOR_DUALBREAK 8 /* was 24, but misses small exons */
+#define MIN_STAGE2_FOR_DUALBREAK 3 /* was 24, but misses small exons */
+#define MIN_MICROEXON_LENGTH 3
#define THETA_SLACK 0.10
#define TRIM_END_PVALUE 1e-4
@@ -94,6 +95,9 @@ static char rcsid[] = "$Id: stage3.c 197772 2016-09-14 00:38:08Z twu $";
#define END_SPLICESITE_PROB_MATCH 0.90
#define END_SPLICESITE_PROB_MISMATCH 0.95
+#define MICROEXON_PROB_MATCH 0.50
+#define MICROEXON_PROB_MISMATCH 0.80
+
#define END_MIN_EXONLENGTH 12
#if 0
@@ -6505,6 +6509,172 @@ peel_leftward (int *n_peeled_indels, bool *protectedp, List_T *peeled_path, List
}
+static List_T
+peel_leftward_intron (int *n_peeled_indels, bool *protectedp, List_T *peeled_path, List_T path, int *querydp5, Chrpos_T *genomedp5,
+ Chrpos_T genomedp3, bool stop_at_indels_p, Univcoord_T chroffset, Univcoord_T chrhigh, bool watsonp,
+ int minpeelback, int min_mismatches) {
+ List_T peeled = NULL;
+ Pair_T pair, rightpair;
+ int npeelback = 0, nmismatches = 0, niter;
+ char cdna, intron_nt, intron_nt_alt;
+#if 0
+ int nincursion = 0;
+#endif
+ int maxpeelback = 12;
+
+ *n_peeled_indels = 0;
+ /* *protectedp = false; -- set by calling procedure */
+
+ debug(printf("Peeling leftward with genomedp3 %d and stop_at_indels_p %d:",genomedp3,stop_at_indels_p));
+
+ /* Remove initial gaps */
+ while (path != NULL &&
+ ( ((Pair_T) path->first)->gapp == true ||
+ ((Pair_T) path->first)->comp == INDEL_COMP ||
+ ((Pair_T) path->first)->comp == SHORTGAP_COMP)) {
+ path = Pairpool_pop(path,&pair);
+ }
+
+ if (path == NULL) {
+ debug(printf(" path is empty\n"));
+
+ } else if (stop_at_indels_p == true) {
+ pair = path->first;
+ if (pair->gapp == true) {
+ /* Peel known gap */
+ debug(printf(" Known_gap"));
+ peeled = List_transfer_one(peeled,&path);
+ }
+
+ /* Peel initial indels anyway */
+ while (path != NULL && ( ((Pair_T) path->first)->comp == INDEL_COMP || ((Pair_T) path->first)->comp == SHORTGAP_COMP )) {
+ debug(printf(" Peel [");
+ Pair_dump_one(path->first,/*zerobasedp*/true);
+ printf("]"));
+ peeled = List_transfer_one(peeled,&path);
+ }
+
+ while (/*npeelback < maxpeelback &&*/
+ (npeelback < minpeelback || nmismatches < min_mismatches) && path != NULL &&
+ ((Pair_T) path->first)->gapp == false &&
+ ((Pair_T) path->first)->comp != INDEL_COMP &&
+ ((Pair_T) path->first)->comp != SHORTGAP_COMP) {
+ debug(printf(" Peel [");
+ Pair_dump_one(path->first,/*zerobasedp*/true);
+ printf("]"));
+
+ intron_nt = get_genomic_nt(&intron_nt_alt,genomedp3--,chroffset,chrhigh,watsonp);
+ if ((cdna = ((Pair_T) path->first)->cdna) != intron_nt && cdna != intron_nt_alt) {
+ debug(printf(" Mismatch %c != %c or %c at %u\n",cdna,intron_nt,intron_nt_alt,genomedp3+1));
+ nmismatches++;
+ }
+
+ if (((Pair_T) path->first)->protectedp == true) {
+ *protectedp = true;
+ }
+ peeled = List_transfer_one(peeled,&path);
+ npeelback++;
+ }
+
+ } else {
+ /* Don't stop at indels, but do stop at gaps */
+ pair = path->first;
+ if (pair->gapp == true) {
+ /* Peel known gap */
+ debug(printf(" Known_gap"));
+ peeled = List_transfer_one(peeled,&path);
+ }
+
+ niter = 0;
+ while (/*npeelback < maxpeelback &&*/
+ (npeelback < minpeelback || nmismatches < min_mismatches) && niter < MAXITER && path != NULL &&
+ ((Pair_T) path->first)->gapp == false) {
+ debug(printf(" Peel [");
+ Pair_dump_one(path->first,/*zerobasedp*/true);
+ printf("]"));
+
+ intron_nt = get_genomic_nt(&intron_nt_alt,genomedp3--,chroffset,chrhigh,watsonp);
+ if ((cdna = ((Pair_T) path->first)->cdna) != intron_nt && cdna != intron_nt_alt) {
+ debug(printf(" Mismatch %c != %c or %c at %u\n",cdna,intron_nt,intron_nt_alt,genomedp3+1));
+ nmismatches++;
+ }
+
+ if (((Pair_T) path->first)->comp == MATCH_COMP || ((Pair_T) path->first)->comp == DYNPROG_MATCH_COMP || ((Pair_T) path->first)->comp == AMBIGUOUS_COMP) {
+ npeelback++;
+ } else if (((Pair_T) path->first)->comp == INDEL_COMP) {
+ *n_peeled_indels += 1;
+ npeelback--;
+ } else if (((Pair_T) path->first)->comp == SHORTGAP_COMP) {
+ *n_peeled_indels += 1;
+ npeelback--;
+ } else {
+ npeelback--;
+ }
+ if (((Pair_T) path->first)->protectedp == true) {
+ *protectedp = true;
+ }
+ niter++;
+ peeled = List_transfer_one(peeled,&path);
+ }
+
+ if (path != NULL && ((Pair_T) path->first)->gapp == true) {
+ debug(printf(" Hit gap [");
+ Pair_dump_one(path->first,/*zerobasedp*/true);
+ printf("]"));
+ }
+ }
+
+ if (path != NULL &&
+ ( ((Pair_T) path->first)->gapp == true ||
+ ((Pair_T) path->first)->comp == INDEL_COMP ||
+ ((Pair_T) path->first)->comp == SHORTGAP_COMP)) {
+ /* Don't leave a gap or indel on the top of the path */
+ while (peeled != NULL &&
+ ( ((Pair_T) peeled->first)->gapp == true ||
+ ((Pair_T) peeled->first)->comp == INDEL_COMP ||
+ ((Pair_T) peeled->first)->comp == SHORTGAP_COMP)) {
+ debug(printf(" Putback [");
+ Pair_dump_one(peeled->first,/*zerobasedp*/true);
+ printf("]"));
+ path = List_transfer_one(path,&peeled);
+ }
+ if (peeled != NULL) {
+ debug(printf(" Putback [");
+ Pair_dump_one(peeled->first,/*zerobasedp*/true);
+ printf("]"));
+ path = List_transfer_one(path,&peeled); /* This should be match or mismatch */
+ }
+ }
+
+ if (path != NULL) {
+ rightpair = path->first;
+ *querydp5 = rightpair->querypos + 1;
+ *genomedp5 = rightpair->genomepos + 1;
+ } else if (peeled != NULL) {
+ rightpair = peeled->first;
+ *querydp5 = rightpair->querypos;
+ *genomedp5 = rightpair->genomepos;
+ } else {
+ /* fprintf(stderr,"In peel_leftward, path and peeled are both NULL\n"); */
+ /* abort(); */
+ }
+
+ debug(
+ if (path == NULL) {
+ printf(" => Top of path is NULL.");
+ } else {
+ pair = path->first;
+ printf(" => Top of path is ");
+ Pair_dump_one(pair,/*zerobasedp*/true);
+ }
+ printf("\n => querydp5 = %d, genomedp5 = %d\n",*querydp5,*genomedp5);
+ );
+
+ *peeled_path = peeled;
+ return path;
+}
+
+
#if 0
static List_T
peel_rightward_old (bool *mismatchp, List_T *peeled_pairs, List_T pairs, int *querydp3, int *genomedp3,
@@ -6967,85 +7137,252 @@ peel_rightward (int *n_peeled_indels, bool *protectedp, List_T *peeled_pairs, Li
}
-/************************************************************************
- * Traversal functions
- ************************************************************************/
-
-/* For peel_rightward and peel_leftward, we set quit_on_gap_p = true,
- because we want to merge gaps in initial smoothing steps */
-
+/* Instead of maxpeelback, follow the 5' intron until we get enough mismatches */
static List_T
-traverse_single_gap (bool *filledp, int *dynprogindex, List_T pairs, List_T *path,
- Pair_T leftpair, Pair_T rightpair,
- Univcoord_T chroffset, Univcoord_T chrhigh,
- char *queryseq_ptr, char *queryuc_ptr, int querylength,
- bool watsonp, bool jump_late_p, Pairpool_T pairpool, Dynprog_T dynprog,
- Chrpos_T *last_genomedp5, Chrpos_T *last_genomedp3,
- int maxpeelback, double defect_rate, bool forcep, bool finalp) {
- List_T gappairs, peeled_pairs, peeled_path;
- int queryjump, genomejump;
- int querydp5, querydp3;
- Chrpos_T genomedp5, genomedp3;
- int nmatches, nmismatches, nopens, nindels;
- int unknowns, qopens, qindels, topens, tindels, ncanonical, nsemicanonical, nnoncanonical;
- int finalscore, origscore;
- bool protectedp;
- int n_peeled_indels;
- double min_splice_prob;
- /* int origqueryjump, origgenomejump; */
+peel_rightward_intron (int *n_peeled_indels, bool *protectedp, List_T *peeled_pairs, List_T pairs, int *querydp3, Chrpos_T *genomedp3,
+ Chrpos_T genomedp5, bool stop_at_indels_p, Univcoord_T chroffset, Univcoord_T chrhigh, bool watsonp,
+ int minpeelback, int min_mismatches) {
+ List_T peeled = NULL;
+ Pair_T pair, leftpair;
+ int npeelback = 0, nmismatches = 0, niter;
+ char cdna, intron_nt, intron_nt_alt;
+#if 0
+ int incursion = 0;
+#endif
+ int maxpeelback = 12;
- debug(printf("\nTRAVERSE_SINGLE_GAP\n"));
- querydp5 = leftpair->querypos + 1;
- genomedp5 = leftpair->genomepos + 1;
- /* if (leftpair->cdna == ' ') querydp5--; -- For old dynamic programming */
- /* if (leftpair->genome == ' ') genomedp5--; -- For old dynamic programming */
- querydp3 = rightpair->querypos - 1;
- genomedp3 = rightpair->genomepos - 1;
+ *n_peeled_indels = 0;
+ /* *protectedp = false; -- set by calling procedure */
- /* origqueryjump = querydp3 - querydp5 + 1; */
- /* origgenomejump = genomedp3 - genomedp5 + 1; */
+ debug(printf("Peeling rightward with genomedp5 %d and stop_at_indels_p %d:",genomedp5,stop_at_indels_p));
- /* Used to peelback only half as much as for a paired gap, to save
- on dynamic programming, but not any more. */
- protectedp = false;
- pairs = peel_rightward(&n_peeled_indels,&protectedp,&peeled_pairs,pairs,&querydp3,&genomedp3,
- maxpeelback,/*stop_at_indels_p*/true);
- *path = peel_leftward(&n_peeled_indels,&protectedp,&peeled_path,*path,&querydp5,&genomedp5,
- maxpeelback,/*stop_at_indels_p*/true);
+ /* Remove initial gaps */
+ while (pairs != NULL &&
+ ( ((Pair_T) pairs->first)->gapp == true ||
+ ((Pair_T) pairs->first)->comp == INDEL_COMP ||
+ ((Pair_T) pairs->first)->comp == SHORTGAP_COMP )) {
+ pairs = Pairpool_pop(pairs,&pair);
+ }
- if (last_genomedp5 != NULL) {
- if (querydp5 < 0) {
- querydp5 = 0;
- }
- if (querydp3 >= querylength) {
- querydp3 = querylength - 1;
- }
- if (0 && finalp == false && genomedp5 == last_genomedp5[querydp5] && genomedp3 == last_genomedp3[querydp3]) {
- debug(printf("Already solved for %u..%u at %d..%d\n",genomedp5,genomedp3,querydp5,querydp3));
+ if (pairs == NULL) {
+ debug(printf(" pairs is empty\n"));
- pairs = Pairpool_transfer(pairs,peeled_pairs);
- *path = Pairpool_transfer(*path,peeled_path);
+ } else if (stop_at_indels_p == true) {
+ pair = pairs->first;
+ if (pair->gapp == true) {
+ /* Peel known gap */
+ debug(printf(" Known_gap"));
+ peeled = List_transfer_one(peeled,&pairs);
+ }
- *filledp = false; /* This replaces the gap */
- return pairs;
+ /* Peel initial indels anyway */
+ while (pairs != NULL && ( ((Pair_T) pairs->first)->comp == INDEL_COMP || ((Pair_T) pairs->first)->comp == INDEL_COMP )) {
+ debug(printf(" Peel [");
+ Pair_dump_one(pairs->first,/*zerobasedp*/true);
+ printf("]"));
+ peeled = List_transfer_one(peeled,&pairs);
}
- }
- queryjump = querydp3 - querydp5 + 1;
- genomejump = genomedp3 - genomedp5 + 1;
-
- if (queryjump <= 0 || genomejump <= 0) {
- /* This prevents cases like queryjump 0, genomejump 1 from being solved */
- debug(printf("Unable to perform dynamic programming\n"));
- *filledp = false;
+ while (/*npeelback < maxpeelback &&*/
+ (npeelback < minpeelback || nmismatches < min_mismatches) && pairs != NULL &&
+ ((Pair_T) pairs->first)->gapp == false &&
+ ((Pair_T) pairs->first)->comp != INDEL_COMP &&
+ ((Pair_T) pairs->first)->comp != SHORTGAP_COMP) {
+ debug(printf(" Peel [");
+ Pair_dump_one(pairs->first,/*zerobasedp*/true);
+ printf("]"));
- pairs = Pairpool_transfer(pairs,peeled_pairs);
- *path = Pairpool_transfer(*path,peeled_path);
+ intron_nt = get_genomic_nt(&intron_nt_alt,genomedp5++,chroffset,chrhigh,watsonp);
+ if ((cdna = ((Pair_T) pairs->first)->cdna) != intron_nt && cdna != intron_nt_alt) {
+ debug(printf(" Mismatch %c != %c or %c at %u\n",cdna,intron_nt,intron_nt_alt,genomedp5-1));
+ nmismatches++;
+ }
- return pairs;
+ if (((Pair_T) pairs->first)->protectedp == true) {
+ *protectedp = true;
+ }
+ peeled = List_transfer_one(peeled,&pairs);
+ npeelback++;
+ }
} else {
- gappairs = Dynprog_single_gap(&(*dynprogindex),&finalscore,
+ /* Don't stop at indels, but do stop at gaps */
+ pair = pairs->first;
+ if (pair->gapp == true) {
+ /* Peel known gap */
+ debug(printf(" Known_gap"));
+ peeled = List_transfer_one(peeled,&pairs);
+ }
+
+ niter = 0;
+ while (/*npeelback < maxpeelback &&*/
+ (npeelback < minpeelback || nmismatches < min_mismatches) && niter < MAXITER && pairs != NULL &&
+ ((Pair_T) pairs->first)->gapp == false) {
+ debug(printf(" Peel [");
+ Pair_dump_one(pairs->first,/*zerobasedp*/true);
+ printf("]"));
+
+ intron_nt = get_genomic_nt(&intron_nt_alt,genomedp5++,chroffset,chrhigh,watsonp);
+ if ((cdna = ((Pair_T) pairs->first)->cdna) != intron_nt && cdna != intron_nt_alt) {
+ debug(printf(" Mismatch %c != %c or %c at %u\n",cdna,intron_nt,intron_nt_alt,genomedp5-1));
+ nmismatches++;
+ }
+
+ if (((Pair_T) pairs->first)->comp == MATCH_COMP || ((Pair_T) pairs->first)->comp == DYNPROG_MATCH_COMP || ((Pair_T) pairs->first)->comp == AMBIGUOUS_COMP) {
+ npeelback++;
+ } else if (((Pair_T) pairs->first)->comp == INDEL_COMP) {
+ *n_peeled_indels += 1;
+ npeelback--;
+ } else if (((Pair_T) pairs->first)->comp == SHORTGAP_COMP) {
+ *n_peeled_indels += 1;
+ npeelback--;
+ } else {
+ npeelback--;
+ }
+ if (((Pair_T) pairs->first)->protectedp == true) {
+ *protectedp = true;
+ }
+ niter++;
+ peeled = List_transfer_one(peeled,&pairs);
+ }
+
+ if (pairs != NULL && ((Pair_T) pairs->first)->gapp == true) {
+ debug(printf(" Hit gap [");
+ Pair_dump_one(pairs->first,/*zerobasedp*/true);
+ printf("]"));
+ }
+ }
+
+ if (pairs != NULL &&
+ ( ((Pair_T) pairs->first)->gapp == true ||
+ ((Pair_T) pairs->first)->comp == INDEL_COMP ||
+ ((Pair_T) pairs->first)->comp == SHORTGAP_COMP )) {
+ /* Don't leave a gap or indel on the top of the pairs */
+ while (peeled != NULL &&
+ ( ((Pair_T) peeled->first)->gapp == true ||
+ ((Pair_T) peeled->first)->comp == INDEL_COMP ||
+ ((Pair_T) peeled->first)->comp == SHORTGAP_COMP)) {
+ debug(printf(" Putback [");
+ Pair_dump_one(peeled->first,/*zerobasedp*/true);
+ printf("]"));
+ pairs = List_transfer_one(pairs,&peeled);
+ }
+ if (peeled != NULL) {
+ debug(printf(" Putback [");
+ Pair_dump_one(peeled->first,/*zerobasedp*/true);
+ printf("]"));
+ pairs = List_transfer_one(pairs,&peeled); /* This should be match or mismatch */
+ }
+ }
+
+ if (pairs != NULL) {
+ leftpair = pairs->first;
+ *querydp3 = leftpair->querypos - 1;
+ *genomedp3 = leftpair->genomepos - 1;
+ } else if (peeled != NULL) {
+ leftpair = peeled->first;
+ *querydp3 = leftpair->querypos;
+ *genomedp3 = leftpair->genomepos;
+ } else {
+ /* fprintf(stderr,"In peel_rightward, pairs and peeled are both NULL\n"); */
+ /* abort(); */
+ }
+
+ debug(
+ if (pairs == NULL) {
+ printf(" => Top of pairs is NULL.");
+ } else {
+ pair = pairs->first;
+ printf(" => Top of pairs is ");
+ Pair_dump_one(pair,/*zerobasedp*/true);
+ }
+ printf("\n => querydp3 = %d, genomedp3 = %d\n",*querydp3,*genomedp3);
+ );
+
+ *peeled_pairs = peeled;
+ return pairs;
+}
+
+
+/************************************************************************
+ * Traversal functions
+ ************************************************************************/
+
+/* For peel_rightward and peel_leftward, we set quit_on_gap_p = true,
+ because we want to merge gaps in initial smoothing steps */
+
+static List_T
+traverse_single_gap (bool *filledp, int *dynprogindex, List_T pairs, List_T *path,
+ Pair_T leftpair, Pair_T rightpair,
+ Univcoord_T chroffset, Univcoord_T chrhigh,
+ char *queryseq_ptr, char *queryuc_ptr, int querylength,
+ bool watsonp, bool jump_late_p, Pairpool_T pairpool, Dynprog_T dynprog,
+ Chrpos_T *last_genomedp5, Chrpos_T *last_genomedp3,
+ int maxpeelback, double defect_rate, bool forcep, bool finalp) {
+ List_T gappairs, peeled_pairs, peeled_path;
+ int queryjump, genomejump;
+ int querydp5, querydp3;
+ Chrpos_T genomedp5, genomedp3;
+ int nmatches, nmismatches, nopens, nindels;
+ int unknowns, qopens, qindels, topens, tindels, ncanonical, nsemicanonical, nnoncanonical;
+ int finalscore, origscore;
+ bool protectedp;
+ int n_peeled_indels;
+ double min_splice_prob;
+ /* int origqueryjump, origgenomejump; */
+
+ debug(printf("\nTRAVERSE_SINGLE_GAP\n"));
+ querydp5 = leftpair->querypos + 1;
+ genomedp5 = leftpair->genomepos + 1;
+ /* if (leftpair->cdna == ' ') querydp5--; -- For old dynamic programming */
+ /* if (leftpair->genome == ' ') genomedp5--; -- For old dynamic programming */
+ querydp3 = rightpair->querypos - 1;
+ genomedp3 = rightpair->genomepos - 1;
+
+ /* origqueryjump = querydp3 - querydp5 + 1; */
+ /* origgenomejump = genomedp3 - genomedp5 + 1; */
+
+ /* Used to peelback only half as much as for a paired gap, to save
+ on dynamic programming, but not any more. */
+ protectedp = false;
+ pairs = peel_rightward(&n_peeled_indels,&protectedp,&peeled_pairs,pairs,&querydp3,&genomedp3,
+ maxpeelback,/*stop_at_indels_p*/true);
+ *path = peel_leftward(&n_peeled_indels,&protectedp,&peeled_path,*path,&querydp5,&genomedp5,
+ maxpeelback,/*stop_at_indels_p*/true);
+
+ if (last_genomedp5 != NULL) {
+ if (querydp5 < 0) {
+ querydp5 = 0;
+ }
+ if (querydp3 >= querylength) {
+ querydp3 = querylength - 1;
+ }
+ if (0 && finalp == false && genomedp5 == last_genomedp5[querydp5] && genomedp3 == last_genomedp3[querydp3]) {
+ debug(printf("Already solved for %u..%u at %d..%d\n",genomedp5,genomedp3,querydp5,querydp3));
+
+ pairs = Pairpool_transfer(pairs,peeled_pairs);
+ *path = Pairpool_transfer(*path,peeled_path);
+
+ *filledp = false; /* This replaces the gap */
+ return pairs;
+ }
+ }
+
+ queryjump = querydp3 - querydp5 + 1;
+ genomejump = genomedp3 - genomedp5 + 1;
+
+ if (queryjump <= 0 || genomejump <= 0) {
+ /* This prevents cases like queryjump 0, genomejump 1 from being solved */
+ debug(printf("Unable to perform dynamic programming\n"));
+ *filledp = false;
+
+ pairs = Pairpool_transfer(pairs,peeled_pairs);
+ *path = Pairpool_transfer(*path,peeled_path);
+
+ return pairs;
+
+ } else {
+ gappairs = Dynprog_single_gap(&(*dynprogindex),&finalscore,
&nmatches,&nmismatches,&nopens,&nindels,dynprog,
&(queryseq_ptr[querydp5]),&(queryuc_ptr[querydp5]),
queryjump,genomejump,querydp5,genomedp5,
@@ -7270,7 +7607,8 @@ traverse_genome_gap (bool *filledp, bool *shiftp, int *dynprogindex_minor, int *
Pair_T pair;
int queryjump, genomejump;
int querydp5, querydp3;
- Chrpos_T genomedp5, genomedp3;
+ Chrpos_T genomedp5, genomedp3, orig_genomedp5, orig_genomedp3;
+ int minpeelback, min_mismatches;
int new_leftgenomepos, new_rightgenomepos;
double left_prob, right_prob;
int finalscore, nmatches, nmismatches, nopens, nindels, exonhead, introntype;
@@ -7361,19 +7699,26 @@ traverse_genome_gap (bool *filledp, bool *shiftp, int *dynprogindex_minor, int *
}
#else /* SHORTCUT */
+
+ orig_genomedp5 = genomedp5;
+ orig_genomedp3 = genomedp3;
+
if (defect_rate < DEFECT_HIGHQ) {
- maxpeelback = 6;
+ minpeelback = 6;
+ min_mismatches = 2;
} else if (defect_rate < DEFECT_MEDQ) {
- maxpeelback = 8;
+ minpeelback = 8;
+ min_mismatches = 3;
} else {
- maxpeelback = 10;
+ minpeelback = 10;
+ min_mismatches = 4;
}
protectedp = false;
- pairs = peel_rightward(&n_peeled_indels_rightward,&protectedp,&peeled_pairs,pairs,&querydp3,&genomedp3,
- maxpeelback,stop_at_indels_p);
- *path = peel_leftward(&n_peeled_indels_leftward,&protectedp,&peeled_path,*path,&querydp5,&genomedp5,
- maxpeelback,stop_at_indels_p);
+ pairs = peel_rightward_intron(&n_peeled_indels_rightward,&protectedp,&peeled_pairs,pairs,&querydp3,&genomedp3,
+ orig_genomedp5,stop_at_indels_p,chroffset,chrhigh,watsonp,minpeelback,min_mismatches);
+ *path = peel_leftward_intron(&n_peeled_indels_leftward,&protectedp,&peeled_path,*path,&querydp5,&genomedp5,
+ orig_genomedp3,stop_at_indels_p,chroffset,chrhigh,watsonp,minpeelback,min_mismatches);
if (last_genomedp5 != NULL) {
if (querydp5 < 0) {
@@ -7583,7 +7928,7 @@ traverse_genome_gap (bool *filledp, bool *shiftp, int *dynprogindex_minor, int *
debug(Pair_dump_list(micropairs,/*zerobasedp*/true));
debug(printf("\n"));
-#if 1
+#if 0
if (1 || (nindels == 0 && nmismatches < 4)) {
/* Have a higher standard */
if (prob2 >= 0.95 && prob3 >= 0.95) {
@@ -7635,7 +7980,8 @@ traverse_dual_genome_gap (int *dynprogindex, List_T pairs, List_T *path,
right_gappairs = NULL, left_gappairs = NULL, peeled_pairs, peeled_path;
int queryjump, genomejump;
int querydp5, querydp3;
- Chrpos_T genomedp5, genomedp3;
+ Chrpos_T genomedp5, genomedp3, orig_genomedp5, orig_genomedp3;
+ int minpeelback, min_mismatches;
int new_leftgenomepos, new_rightgenomepos;
double single_left_prob, single_right_prob, dual_left_prob_1, dual_right_prob_1, dual_left_prob_2, dual_right_prob_2;
int querydp5_dual, querydp3_dual, genomedp5_dual, genomedp3_dual;
@@ -7674,19 +8020,26 @@ traverse_dual_genome_gap (int *dynprogindex, List_T pairs, List_T *path,
querydp3 = rightpair->querypos - 1;
genomedp3 = rightpair->genomepos - 1;
+
+ orig_genomedp5 = genomedp5;
+ orig_genomedp3 = genomedp3;
+
if (defect_rate < DEFECT_HIGHQ) {
- maxpeelback = 6;
+ minpeelback = 6;
+ min_mismatches = 2;
} else if (defect_rate < DEFECT_MEDQ) {
- maxpeelback = 8;
+ minpeelback = 8;
+ min_mismatches = 3;
} else {
- maxpeelback = 10;
+ minpeelback = 10;
+ min_mismatches = 4;
}
protectedp = false;
- pairs = peel_rightward(&n_peeled_indels,&protectedp,&peeled_pairs,pairs,&querydp3,&genomedp3,
- maxpeelback,/*stop_at_indels_p*/false);
- *path = peel_leftward(&n_peeled_indels,&protectedp,&peeled_path,*path,&querydp5,&genomedp5,
- maxpeelback,/*stop_at_indels_p*/false);
+ pairs = peel_rightward_intron(&n_peeled_indels,&protectedp,&peeled_pairs,pairs,&querydp3,&genomedp3,
+ orig_genomedp5,/*stop_at_indels_p*/false,chroffset,chrhigh,watsonp,minpeelback,min_mismatches);
+ *path = peel_leftward_intron(&n_peeled_indels,&protectedp,&peeled_path,*path,&querydp5,&genomedp5,
+ orig_genomedp3,/*stop_at_indels_p*/false,chroffset,chrhigh,watsonp,minpeelback,min_mismatches);
if (last_genomedp5 != NULL) {
if (querydp5 < 0) {
@@ -7774,8 +8127,8 @@ traverse_dual_genome_gap (int *dynprogindex, List_T pairs, List_T *path,
/* Right of short exon */
querydp5_dual = midquerypos;
genomedp5_dual = midgenomepos;
- querydp3_dual = querydp3; /* From peel_rightward */
- genomedp3_dual = genomedp3; /* From peel_rightward */
+ querydp3_dual = querydp3; /* From peel_rightward_intron */
+ genomedp3_dual = genomedp3; /* From peel_rightward_intron */
queryjump = querydp3_dual - querydp5_dual + 1;
genomejump = queryjump + extramaterial_paired;
@@ -7801,8 +8154,8 @@ traverse_dual_genome_gap (int *dynprogindex, List_T pairs, List_T *path,
dual_goodness = dual_nmatches_2 + MISMATCH*nmismatches + QOPEN*nopens + QINDEL*nindels;
/* Left of short exon */
- querydp5_dual = querydp5; /* From peel_leftward */
- genomedp5_dual = genomedp5; /* From peel_leftward */
+ querydp5_dual = querydp5; /* From peel_leftward_intron */
+ genomedp5_dual = genomedp5; /* From peel_leftward_intron */
querydp3_dual = midquerypos-1;
genomedp3_dual = midgenomepos-1;
@@ -7896,11 +8249,11 @@ traverse_dual_genome_gap (int *dynprogindex, List_T pairs, List_T *path,
dual_canonical_p));
debug(printf("Single goodness = %d. Dual goodness = %d. ",
single_goodness,dual_goodness));
- debug(printf("Probability is %g. ",middle_exonprob));
+ debug(printf("Probability is %g (ignoring). ",middle_exonprob));
}
/* Want high threshold for accepting dual intron */
- if (dual_canonical_p == true && middle_exonprob < 0.001 &&
+ if (dual_canonical_p == true && /*middle_exonprob < 0.001 &&*/
single_canonical_p == false && single_goodness <= dual_goodness) {
debug(printf("Dual scores win\n"));
debug(printf("Loser: single_gappairs\n"));
@@ -7926,91 +8279,98 @@ traverse_dual_genome_gap (int *dynprogindex, List_T pairs, List_T *path,
}
if (singlep == true) {
- if (right_end_intron_p == true) {
- /* Keep left intron only and extend right from short exon */
- querydp5_right = querydp5;
- genomedp5_right = genomedp5;
- querydp3_right = midquerypos;
- genomedp3_right = midgenomepos;
-
- queryjump = querydp3_right - querydp5_right + 1;
- genomejump = queryjump + extramaterial_paired;
-
- if (genomedp5_right + genomejump - 1 >= genomedp3_right) {
- /* Bounds don't make sense */
- debug(printf("Bounds don't make sense if we omit right intron: %d + %d - 1 >= %d\n\n",
- genomedp5_right,genomejump,genomedp3_right));
- right_gappairs = NULL;
+ if (single_gappairs == NULL) {
+ /* Need to handle the possibility that Dynprog_genome_gap artificially returns NULL for single_gappairs */
+ pairs = Pairpool_transfer(pairs,peeled_pairs);
+ *path = Pairpool_transfer(*path,peeled_path);
- } else {
- right_gappairs = Dynprog_genome_gap(&(*dynprogindex),&right_score,&new_leftgenomepos,&new_rightgenomepos,
- &single_left_prob,&single_right_prob,&right_nmatches,&nmismatches,&nopens,&nindels,
- &right_exonhead,&right_introntype,dynprogL,dynprogR,
- &(queryseq_ptr[querydp5_right]),&(queryuc_ptr[querydp5_right]),
- queryjump,genomejump,genomejump,
- querydp5_right,genomedp5_right,genomedp3_right,
- chrnum,chroffset,chrhigh,
- cdna_direction,watsonp,jump_late_p,pairpool,extraband_paired,
- defect_rate,maxpeelback,/*halfp*/false,/*finalp*/false,splicingp);
+ } else {
+ if (right_end_intron_p == true) {
+ /* Keep left intron only and extend right from short exon */
+ querydp5_right = querydp5;
+ genomedp5_right = genomedp5;
+ querydp3_right = midquerypos;
+ genomedp3_right = midgenomepos;
+
+ queryjump = querydp3_right - querydp5_right + 1;
+ genomejump = queryjump + extramaterial_paired;
- right_goodness = right_nmatches + MISMATCH*nmismatches + QOPEN*nopens + QINDEL*nindels;
- debug(printf("Right goodness (keeping left intron only) = %d\n",right_goodness));
+ if (genomedp5_right + genomejump - 1 >= genomedp3_right) {
+ /* Bounds don't make sense */
+ debug(printf("Bounds don't make sense if we omit right intron: %d + %d - 1 >= %d\n\n",
+ genomedp5_right,genomejump,genomedp3_right));
+ right_gappairs = NULL;
- if (right_goodness > single_goodness) {
- debug(printf("New winner: right gappairs\n"));
- debug(Pair_dump_list(right_gappairs,true));
- single_gappairs = right_gappairs;
- single_goodness = right_goodness;
+ } else {
+ right_gappairs = Dynprog_genome_gap(&(*dynprogindex),&right_score,&new_leftgenomepos,&new_rightgenomepos,
+ &single_left_prob,&single_right_prob,&right_nmatches,&nmismatches,&nopens,&nindels,
+ &right_exonhead,&right_introntype,dynprogL,dynprogR,
+ &(queryseq_ptr[querydp5_right]),&(queryuc_ptr[querydp5_right]),
+ queryjump,genomejump,genomejump,
+ querydp5_right,genomedp5_right,genomedp3_right,
+ chrnum,chroffset,chrhigh,
+ cdna_direction,watsonp,jump_late_p,pairpool,extraband_paired,
+ defect_rate,maxpeelback,/*halfp*/false,/*finalp*/false,splicingp);
+
+ right_goodness = right_nmatches + MISMATCH*nmismatches + QOPEN*nopens + QINDEL*nindels;
+ debug(printf("Right goodness (keeping left intron only) = %d\n",right_goodness));
+
+ if (right_goodness > single_goodness) {
+ debug(printf("New winner: right gappairs\n"));
+ debug(Pair_dump_list(right_gappairs,true));
+ single_gappairs = right_gappairs;
+ single_goodness = right_goodness;
+ }
}
}
- }
- if (left_end_intron_p == true) {
- /* Keep right intron only and extend left from short exon */
- querydp5_left = midquerypos;
- genomedp5_left = midgenomepos;
- querydp3_left = querydp3;
- genomedp3_left = genomedp3;
+ if (left_end_intron_p == true) {
+ /* Keep right intron only and extend left from short exon */
+ querydp5_left = midquerypos;
+ genomedp5_left = midgenomepos;
+ querydp3_left = querydp3;
+ genomedp3_left = genomedp3;
- queryjump = querydp3_left - querydp5_left + 1;
- genomejump = queryjump + extramaterial_paired;
+ queryjump = querydp3_left - querydp5_left + 1;
+ genomejump = queryjump + extramaterial_paired;
- if (genomedp5_left + genomejump - 1 >= genomedp3_left) {
- /* Bounds don't make sense */
- debug(printf("Bounds don't make sense if we omit left intron: %d + %d - 1 >= %d\n\n",
- genomedp5_left,genomejump,genomedp3_left));
- left_gappairs = NULL;
+ if (genomedp5_left + genomejump - 1 >= genomedp3_left) {
+ /* Bounds don't make sense */
+ debug(printf("Bounds don't make sense if we omit left intron: %d + %d - 1 >= %d\n\n",
+ genomedp5_left,genomejump,genomedp3_left));
+ left_gappairs = NULL;
- } else {
- left_gappairs = Dynprog_genome_gap(&(*dynprogindex),&left_score,&new_leftgenomepos,&new_rightgenomepos,
- &single_left_prob,&single_right_prob,&left_nmatches,&nmismatches,&nopens,&nindels,
- &left_exonhead,&left_introntype,dynprogL,dynprogR,
- &(queryseq_ptr[querydp5_left]),&(queryuc_ptr[querydp5_left]),
- queryjump,genomejump,genomejump,
- querydp5_left,genomedp5_left,genomedp3_left,
- chrnum,chroffset,chrhigh,
- cdna_direction,watsonp,jump_late_p,pairpool,extraband_paired,
- defect_rate,maxpeelback,/*halfp*/false,/*finalp*/false,splicingp);
+ } else {
+ left_gappairs = Dynprog_genome_gap(&(*dynprogindex),&left_score,&new_leftgenomepos,&new_rightgenomepos,
+ &single_left_prob,&single_right_prob,&left_nmatches,&nmismatches,&nopens,&nindels,
+ &left_exonhead,&left_introntype,dynprogL,dynprogR,
+ &(queryseq_ptr[querydp5_left]),&(queryuc_ptr[querydp5_left]),
+ queryjump,genomejump,genomejump,
+ querydp5_left,genomedp5_left,genomedp3_left,
+ chrnum,chroffset,chrhigh,
+ cdna_direction,watsonp,jump_late_p,pairpool,extraband_paired,
+ defect_rate,maxpeelback,/*halfp*/false,/*finalp*/false,splicingp);
- left_goodness = left_nmatches + MISMATCH*nmismatches + QOPEN*nopens + QINDEL*nindels;
- debug(printf("Left goodness (keeping right intron only) = %d\n",left_goodness));
-
- if (left_goodness > single_goodness) {
- debug(printf("New winner: left gappairs\n"));
- debug(Pair_dump_list(left_gappairs,true));
- single_gappairs = left_gappairs;
- single_goodness = left_goodness;
+ left_goodness = left_nmatches + MISMATCH*nmismatches + QOPEN*nopens + QINDEL*nindels;
+ debug(printf("Left goodness (keeping right intron only) = %d\n",left_goodness));
+
+ if (left_goodness > single_goodness) {
+ debug(printf("New winner: left gappairs\n"));
+ debug(Pair_dump_list(left_gappairs,true));
+ single_gappairs = left_gappairs;
+ single_goodness = left_goodness;
+ }
}
}
- }
- /* Finally transfer best single result */
- if (single_gappairs == right_gappairs) {
- pairs = Pairpool_transfer(pairs,peeled_pairs);
- }
- pairs = Pairpool_transfer(pairs,single_gappairs);
- if (single_gappairs == left_gappairs) {
- *path = Pairpool_transfer(*path,peeled_path);
+ /* Finally transfer best single result */
+ if (single_gappairs == right_gappairs) {
+ pairs = Pairpool_transfer(pairs,peeled_pairs);
+ }
+ pairs = Pairpool_transfer(pairs,single_gappairs);
+ if (single_gappairs == left_gappairs) {
+ *path = Pairpool_transfer(*path,peeled_path);
+ }
}
}
@@ -8422,72 +8782,605 @@ extend_ending3 (bool *knownsplicep, int *dynprogindex_minor, int *finalscore,
maxpeelback,/*stop_at_indels_p*/true);
}
- queryjump = querydp3 - querydp5_distalgap + 1;
- genomejump = queryjump + extramaterial_end; /* proposed */
- /* Previously, we limited genomejump = min(2*queryjump,queryjump+extramaterial_end) */
+ queryjump = querydp3 - querydp5_distalgap + 1;
+ genomejump = queryjump + extramaterial_end; /* proposed */
+ /* Previously, we limited genomejump = min(2*queryjump,queryjump+extramaterial_end) */
+
+ /* genomedp3 = genomedp5_distalgap + genomejump - 1; */
+#ifdef EXTRACT_GENOMICSEG
+ /* Make sure we don't go past the end */
+ if (genomedp3 > genomiclength - 1) {
+ genomedp3 = genomiclength - 1;
+ genomejump = genomedp3 - genomedp5_distalgap + 1;
+ }
+#endif
+
+ debug(printf("Stage 3 (dir %d), extend_ending3: Dynamic programming at 3' end (distal to gap): querydp5 = %d, querydp3 = %d, genomedp5 = %d\n",
+ cdna_direction,querydp5_distalgap,querydp3,genomedp5_distalgap));
+
+ if (endalign == QUERYEND_GAP && splicesites != NULL) {
+ continuous_gappairs_distalgap = Dynprog_end3_known(&(*knownsplicep),&(*dynprogindex_minor),&(*finalscore),
+ &(*ambig_end_length),&(*ambig_splicetype),
+ &nmatches,&nmismatches,&nopens,&nindels,dynprog,
+ &(queryseq_ptr[querydp5_distalgap]),&(queryuc_ptr[querydp5_distalgap]),
+ queryjump,genomejump,querydp5_distalgap,genomedp5_distalgap,
+ querylength,chroffset,chrhigh,knownsplice_limit_low,knownsplice_limit_high,
+ cdna_direction,watsonp,jump_late_p,pairpool,
+ extraband_end,defect_rate);
+ if (*ambig_end_length > 0) {
+ *ambig_prob = 2.0;
+ }
+ } else {
+ continuous_gappairs_distalgap = Dynprog_end3_gap(&(*dynprogindex_minor),&(*finalscore),
+ &nmatches,&nmismatches,&nopens,&nindels,dynprog,
+ &(queryseq_ptr[querydp5_distalgap]),&(queryuc_ptr[querydp5_distalgap]),
+ queryjump,genomejump,querydp5_distalgap,genomedp5_distalgap,
+ chroffset,chrhigh,watsonp,jump_late_p,pairpool,
+ extraband_end,defect_rate,endalign);
+ *ambig_end_length = 0;
+ *ambig_prob = 0.0;
+ *knownsplicep = false;
+ }
+
+ debug(printf(" finalscore: %d\n",*finalscore));
+ if (continuous_gappairs_distalgap == NULL) {
+ return (List_T) NULL;
+ } else {
+ continuous_gappairs_distalgap = List_reverse(continuous_gappairs_distalgap);
+ firstpair = List_head(continuous_gappairs_distalgap);
+ if (0 && firstpair->querypos != querydp5_distalgap) {
+ /* Not a good test anymore, since we are halting peelbacks at gaps */
+ /* Must have an indel between the gappairs and the rest of the read */
+ debug(printf("Detected indel between gappairs %d and the rest of the read %d\n",
+ firstpair->querypos,querydp5_distalgap));
+ return (List_T) NULL;
+
+ } else if (*finalscore < 0) {
+ *knownsplicep = false;
+#if 0
+ return (List_T) NULL;
+#endif
+ return continuous_gappairs_distalgap;
+ } else {
+ return continuous_gappairs_distalgap;
+ }
+ }
+}
+
+
+
+/* Modified from trim_novel_spliceends. Note that code for 5' end
+ here is taken from the 3' end of trim_novel_spliceends, and vice
+ versa */
+static void
+find_dual_break_spliceends (List_T path, List_T pairs,
+ Doublelist_T *spliceprobs5, Doublelist_T *spliceprobs3,
+#ifdef LARGE_GENOMES
+ Uint8list_T *splice_positions_5, Uint8list_T *splice_positions_3,
+#else
+ Uintlist_T *splice_positions_5, Uintlist_T *splice_positions_3,
+#endif
+ int cdna_direction, bool watsonp,
+ Univcoord_T chroffset, Univcoord_T chrhigh) {
+ List_T p;
+ int exondist, i;
+ int querypos;
+
+ Pair_T pair;
+ Univcoord_T genomicpos, start_genomicpos, middle_genomicpos, end_genomicpos;
+ Univcoord_T splice_genomepos_5, splice_genomepos_3, splice_genomepos_5_mm, splice_genomepos_3_mm;
+ Univcoord_T start, middle, end; /* start to middle has mismatches, while middle to end has none */
+ double donor_prob, acceptor_prob;
+ double max_prob_5 = 0.0, max_prob_3 = 0.0, max_prob_5_mm = 0.0, max_prob_3_mm = 0.0;
+
+
+ debug13(printf("\nEntered find_dual_break_spliceends with cdna_direction %d\n",cdna_direction));
+#ifdef LARGE_GENOMES
+ *splice_positions_5 = *splice_positions_3 = (Uint8list_T) NULL;
+#else
+ *splice_positions_5 = *splice_positions_3 = (Uintlist_T) NULL;
+#endif
+ *spliceprobs5 = *spliceprobs3 = (Doublelist_T) NULL;
+
+
+ /* 5' intron end */
+ if (path != NULL) {
+ p = path;
+ while (p != NULL && ((Pair_T) p->first)->gapp == true) {
+ p = List_next(p);
+ }
+
+ if (p != NULL) {
+ pair = (Pair_T) List_head(p);
+ querypos = pair->querypos + 1; /* Because start_genomicpos = start + 1 */
+ start = middle = end = pair->genomepos;
+ debug13(printf("Initializing start and end to be %u\n",start));
+
+ i = 0;
+ while (i < END_SPLICESITE_SEARCH) {
+ if ((p = List_next(p)) == NULL) {
+ break;
+ } else if (pair->gapp == true) {
+ break;
+ } else {
+ end = pair->genomepos;
+ debug13(printf("Resetting end to be %u\n",end));
+ }
+ pair = (Pair_T) List_head(p);
+ i++;
+ }
+
+ start = middle + 5;
+ querypos += 5;
+
+ /* Find distance from end to intron, if any */
+ exondist = 0;
+ while (p != NULL && ((Pair_T) List_head(p))->gapp == false &&
+ exondist < END_MIN_EXONLENGTH) {
+ p = List_next(p);
+ exondist++;
+ }
+ debug13(printf("exondist is %d\n",exondist));
+
+ if (cdna_direction > 0) {
+ if (watsonp) {
+ /* splicetype5 = splicetype5_mm = DONOR; */
+
+ start_genomicpos = start + 1;
+ middle_genomicpos = middle + 1;
+ end_genomicpos = end + 1;
+
+ /* assert(start_genomicpos >= end_genomicpos); */
+ genomicpos = start_genomicpos;
+ while (genomicpos >= middle_genomicpos &&
+ genomicpos >= end_genomicpos - exondist + END_MIN_EXONLENGTH) {
+ donor_prob = Maxent_hr_donor_prob(chroffset+genomicpos,chroffset); /* Case 1 */
+ debug13(printf("5', watson, sense anti %d %u %u %f mm",querypos,chroffset+genomicpos,genomicpos,donor_prob));
+ if (donor_prob > 0.5) {
+#ifdef LARGE_GENOMES
+ *splice_positions_5 = Uint8list_push(*splice_positions_5,genomicpos - 1);
+#else
+ *splice_positions_5 = Uintlist_push(*splice_positions_5,genomicpos - 1);
+#endif
+ *spliceprobs5 = Doublelist_push(*spliceprobs5,donor_prob);
+ debug13(printf(" **"));
+ }
+ debug13(printf("\n"));
+ genomicpos--;
+ querypos--;
+ }
+ while (genomicpos >= end_genomicpos &&
+ genomicpos >= end_genomicpos - exondist + END_MIN_EXONLENGTH) {
+ donor_prob = Maxent_hr_donor_prob(chroffset+genomicpos,chroffset); /* Case 1 */
+ debug13(printf("5', watson, sense anti %d %u %u %f",querypos,chroffset+genomicpos,genomicpos,donor_prob));
+ if (donor_prob > 0.5) {
+#ifdef LARGE_GENOMES
+ *splice_positions_5 = Uint8list_push(*splice_positions_5,genomicpos - 1);
+#else
+ *splice_positions_5 = Uintlist_push(*splice_positions_5,genomicpos - 1);
+#endif
+ *spliceprobs5 = Doublelist_push(*spliceprobs5,donor_prob);
+ debug13(printf(" **"));
+ }
+ debug13(printf("\n"));
+ genomicpos--;
+ querypos--;
+ }
+ debug13(printf("\n"));
+
+ } else {
+ /* splicetype5 = splicetype5_mm = ANTIDONOR; */
+
+ start_genomicpos = (chrhigh - chroffset) - start;
+ middle_genomicpos = (chrhigh - chroffset) - middle;
+ end_genomicpos = (chrhigh - chroffset) - end;
+
+ /* assert(start_genomicpos <= end_genomicpos); */
+ genomicpos = start_genomicpos;
+ while (genomicpos <= middle_genomicpos &&
+ genomicpos <= end_genomicpos + exondist - END_MIN_EXONLENGTH) {
+ donor_prob = Maxent_hr_antidonor_prob(chroffset+genomicpos,chroffset); /* Case 3 */
+ debug13(printf("5', crick, sense forward %d %u %u %f mm",querypos,chroffset+genomicpos,genomicpos,donor_prob));
+ if (donor_prob > 0.5) {
+#ifdef LARGE_GENOMES
+ *splice_positions_5 = Uint8list_push(*splice_positions_5,(chrhigh - chroffset) - genomicpos);
+#else
+ *splice_positions_5 = Uintlist_push(*splice_positions_5,(chrhigh - chroffset) - genomicpos);
+#endif
+ *spliceprobs5 = Doublelist_push(*spliceprobs5,donor_prob);
+ debug13(printf(" **"));
+ }
+ debug13(printf("\n"));
+ genomicpos++;
+ querypos--;
+ }
+ while (genomicpos <= end_genomicpos &&
+ genomicpos <= end_genomicpos + exondist - END_MIN_EXONLENGTH) {
+ donor_prob = Maxent_hr_antidonor_prob(chroffset+genomicpos,chroffset); /* Case 3 */
+ debug13(printf("5', crick, sense forward %d %u %u %f",querypos,chroffset+genomicpos,genomicpos,donor_prob));
+ if (donor_prob > 0.5) {
+#ifdef LARGE_GENOMES
+ *splice_positions_5 = Uint8list_push(*splice_positions_5,(chrhigh - chroffset) - genomicpos);
+#else
+ *splice_positions_5 = Uintlist_push(*splice_positions_5,(chrhigh - chroffset) - genomicpos);
+#endif
+ *spliceprobs5 = Doublelist_push(*spliceprobs5,donor_prob);
+ debug13(printf(" **"));
+ }
+ debug13(printf("\n"));
+ genomicpos++;
+ querypos--;
+ }
+ debug13(printf("\n"));
+ }
+
+ } else if (cdna_direction < 0) {
+ if (watsonp) {
+ /* splicetype5 = splicetype5_mm = ANTIACCEPTOR; */
+
+ start_genomicpos = start + 1;
+ middle_genomicpos = middle + 1;
+ end_genomicpos = end + 1;
+
+ /* assert(start_genomicpos >= end_genomicpos); */
+ genomicpos = start_genomicpos;
+ while (genomicpos >= middle_genomicpos &&
+ genomicpos >= end_genomicpos - exondist + END_MIN_EXONLENGTH) {
+ acceptor_prob = Maxent_hr_antiacceptor_prob(chroffset+genomicpos,chroffset); /* Case 5 */
+ debug13(printf("5', watson, sense forward %d %u %u %f mm",querypos,chroffset+genomicpos,genomicpos,acceptor_prob));
+ if (acceptor_prob > 0.5) {
+#ifdef LARGE_GENOMES
+ *splice_positions_5 = Uint8list_push(*splice_positions_5,genomicpos - 1);
+#else
+ *splice_positions_5 = Uintlist_push(*splice_positions_5,genomicpos - 1);
+#endif
+ *spliceprobs5 = Doublelist_push(*spliceprobs5,acceptor_prob);
+ debug13(printf(" **"));
+ }
+ debug13(printf("\n"));
+ genomicpos--;
+ querypos--;
+ }
+ while (genomicpos >= end_genomicpos &&
+ genomicpos >= end_genomicpos - exondist + END_MIN_EXONLENGTH) {
+ acceptor_prob = Maxent_hr_antiacceptor_prob(chroffset+genomicpos,chroffset); /* Case 5 */
+ debug13(printf("5', watson, sense forward %d %u %u %f",querypos,chroffset+genomicpos,genomicpos,acceptor_prob));
+ if (acceptor_prob > 0.5) {
+#ifdef LARGE_GENOMES
+ *splice_positions_5 = Uint8list_push(*splice_positions_5,genomicpos - 1);
+#else
+ *splice_positions_5 = Uintlist_push(*splice_positions_5,genomicpos - 1);
+#endif
+ *spliceprobs5 = Doublelist_push(*spliceprobs5,acceptor_prob);
+ debug13(printf(" **"));
+ }
+ debug13(printf("\n"));
+ genomicpos--;
+ querypos--;
+ }
+ debug13(printf("\n"));
+
+ } else {
+ /* splicetype5 = splicetype5_mm = ACCEPTOR; */
+
+ start_genomicpos = (chrhigh - chroffset) - start;
+ middle_genomicpos = (chrhigh - chroffset) - middle;
+ end_genomicpos = (chrhigh - chroffset) - end;
+
+ /* assert(start_genomicpos <= end_genomicpos); */
+ genomicpos = start_genomicpos;
+ while (genomicpos <= middle_genomicpos &&
+ genomicpos <= end_genomicpos + exondist - END_MIN_EXONLENGTH) {
+ acceptor_prob = Maxent_hr_acceptor_prob(chroffset+genomicpos,chroffset); /* Case 7 */
+ debug13(printf("5', crick, sense anti %d %u %u %f mm",querypos,chroffset+genomicpos,genomicpos,acceptor_prob));
+ if (acceptor_prob > 0.5) {
+#ifdef LARGE_GENOMES
+ *splice_positions_5 = Uint8list_push(*splice_positions_5,(chrhigh - chroffset) - genomicpos);
+#else
+ *splice_positions_5 = Uintlist_push(*splice_positions_5,(chrhigh - chroffset) - genomicpos);
+#endif
+ *spliceprobs5 = Doublelist_push(*spliceprobs5,acceptor_prob);
+ debug13(printf(" **"));
+ }
+ debug13(printf("\n"));
+ genomicpos++;
+ querypos--;
+ }
+ while (genomicpos <= end_genomicpos &&
+ genomicpos <= end_genomicpos + exondist - END_MIN_EXONLENGTH) {
+ acceptor_prob = Maxent_hr_acceptor_prob(chroffset+genomicpos,chroffset); /* Case 7 */
+ debug13(printf("5', crick, sense anti %d %u %u %f",querypos,chroffset+genomicpos,genomicpos,acceptor_prob));
+ if (acceptor_prob > 0.5) {
+#ifdef LARGE_GENOMES
+ *splice_positions_5 = Uint8list_push(*splice_positions_5,(chrhigh - chroffset) - genomicpos);
+#else
+ *splice_positions_5 = Uintlist_push(*splice_positions_5,(chrhigh - chroffset) - genomicpos);
+#endif
+ *spliceprobs5 = Doublelist_push(*spliceprobs5,acceptor_prob);
+ debug13(printf(" **"));
+ }
+ debug13(printf("\n"));
+ genomicpos++;
+ querypos--;
+ }
+ debug13(printf("\n"));
+ }
+
+ } else {
+ fprintf(stderr,"Not expecting cdna_direction to be 0\n");
+ abort();
+ }
+ }
+ }
+
+ /* 3' intron end */
+ if (pairs != NULL) {
+ p = pairs;
+ while (p != NULL && ((Pair_T) p->first)->gapp == true) {
+ p = List_next(p);
+ }
+
+ if (p != NULL) {
+ pair = (Pair_T) List_head(p);
+ querypos = pair->querypos;
+ start = middle = end = pair->genomepos;
+ debug13(printf("Initializing start and end to be %u\n",start));
+
+ i = 0;
+ while (i < END_SPLICESITE_SEARCH) {
+ if ((p = List_next(p)) == NULL) {
+ break;
+ } else if (pair->gapp == true) {
+ break;
+ } else {
+ end = pair->genomepos;
+ debug13(printf("Resetting end to be %u\n",end));
+ }
+ pair = (Pair_T) List_head(p);
+ i++;
+ }
+
+ start = middle - 5;
+ querypos -= 5;
+
+ /* Find distance from end to intron, if any */
+ exondist = 0;
+ while (p != NULL && ((Pair_T) List_head(p))->gapp == false &&
+ exondist < END_MIN_EXONLENGTH) {
+ p = List_next(p);
+ exondist++;
+ }
+ debug13(printf("exondist is %d\n",exondist));
+
+ if (cdna_direction > 0) {
+ if (watsonp) {
+ /* splicetype3 = splicetype3_mm = ACCEPTOR; */
+
+ start_genomicpos = start;
+ middle_genomicpos = middle;
+ end_genomicpos = end;
+
+ /* assert(start_genomicpos <= end_genomicpos); */
+ genomicpos = start_genomicpos;
+ while (genomicpos <= middle_genomicpos &&
+ genomicpos <= end_genomicpos + exondist - END_MIN_EXONLENGTH) {
+ acceptor_prob = Maxent_hr_acceptor_prob(chroffset+genomicpos,chroffset); /* Case 2 */
+ debug13(printf("3', watson, sense forward %d %u %u %f mm",querypos,chroffset+genomicpos,genomicpos,acceptor_prob));
+ if (acceptor_prob > 0.5) {
+#ifdef LARGE_GENOMES
+ *splice_positions_3 = Uint8list_push(*splice_positions_3,genomicpos);
+#else
+ *splice_positions_3 = Uintlist_push(*splice_positions_3,genomicpos);
+#endif
+ *spliceprobs3 = Doublelist_push(*spliceprobs3,acceptor_prob);
+ debug13(printf(" **"));
+ }
+ debug13(printf("\n"));
+ genomicpos++;
+ querypos++;
+ }
+ while (genomicpos <= end_genomicpos &&
+ genomicpos <= end_genomicpos + exondist - END_MIN_EXONLENGTH) {
+ acceptor_prob = Maxent_hr_acceptor_prob(chroffset+genomicpos,chroffset); /* Case 2 */
+ debug13(printf("3', watson, sense forward %d %u %u %f",querypos,chroffset+genomicpos,genomicpos,acceptor_prob));
+ if (acceptor_prob > 0.5) {
+#ifdef LARGE_GENOMES
+ *splice_positions_3 = Uint8list_push(*splice_positions_3,genomicpos);
+#else
+ *splice_positions_3 = Uintlist_push(*splice_positions_3,genomicpos);
+#endif
+ *spliceprobs3 = Doublelist_push(*spliceprobs3,acceptor_prob);
+ debug13(printf(" **"));
+ }
+ debug13(printf("\n"));
+ genomicpos++;
+ querypos++;
+ }
+ debug13(printf("\n"));
+
+ } else {
+ /* splicetype3 = splicetype3_mm = ANTIACCEPTOR; */
+
+ start_genomicpos = (chrhigh - chroffset) - start + 1;
+ middle_genomicpos = (chrhigh - chroffset) - middle + 1;
+ end_genomicpos = (chrhigh - chroffset) - end + 1;
+
+ /* assert(start_genomicpos >= end_genomicpos); */
+ genomicpos = start_genomicpos;
+ while (genomicpos >= middle_genomicpos &&
+ genomicpos >= end_genomicpos - exondist + END_MIN_EXONLENGTH) {
+ acceptor_prob = Maxent_hr_antiacceptor_prob(chroffset+genomicpos,chroffset); /* Case 4 */
+ debug13(printf("3', crick, sense anti %d %u %u %f mm",querypos,chroffset+genomicpos,genomicpos,acceptor_prob));
+ if (acceptor_prob > 0.5) {
+#ifdef LARGE_GENOMES
+ *splice_positions_3 = Uint8list_push(*splice_positions_3,(chrhigh - chroffset) - genomicpos + 1);
+#else
+ *splice_positions_3 = Uintlist_push(*splice_positions_3,(chrhigh - chroffset) - genomicpos + 1);
+#endif
+ *spliceprobs3 = Doublelist_push(*spliceprobs3,acceptor_prob);
+ debug13(printf(" **"));
+ }
+ debug13(printf("\n"));
+ genomicpos--;
+ querypos++;
+ }
+ while (genomicpos >= end_genomicpos &&
+ genomicpos >= end_genomicpos - exondist + END_MIN_EXONLENGTH) {
+ acceptor_prob = Maxent_hr_antiacceptor_prob(chroffset+genomicpos,chroffset); /* Case 4 */
+ debug13(printf("3', crick, sense anti %d %u %u %f",querypos,chroffset+genomicpos,genomicpos,acceptor_prob));
+ if (acceptor_prob > 0.5) {
+#ifdef LARGE_GENOMES
+ *splice_positions_3 = Uint8list_push(*splice_positions_3,(chrhigh - chroffset) - genomicpos + 1);
+#else
+ *splice_positions_3 = Uintlist_push(*splice_positions_3,(chrhigh - chroffset) - genomicpos + 1);
+#endif
+ *spliceprobs3 = Doublelist_push(*spliceprobs3,acceptor_prob);
+ debug13(printf(" **"));
+ }
+ debug13(printf("\n"));
+ genomicpos--;
+ querypos++;
+ }
+ debug13(printf("\n"));
+ }
+
+ } else if (cdna_direction < 0) {
+ if (watsonp) {
+ /* splicetype3 = splicetype3_mm = ANTIDONOR; */
+
+ start_genomicpos = start;
+ middle_genomicpos = middle;
+ end_genomicpos = end;
+
+ /* assert(start_genomicpos <= end_genomicpos); */
+ genomicpos = start_genomicpos;
+ while (genomicpos <= middle_genomicpos &&
+ genomicpos <= end_genomicpos + exondist - END_MIN_EXONLENGTH) {
+ donor_prob = Maxent_hr_antidonor_prob(chroffset+genomicpos,chroffset); /* Case 6 */
+ debug13(printf("3', watson, sense anti %d %u %u %f mm",querypos,chroffset+genomicpos,genomicpos,donor_prob));
+ if (donor_prob > 0.5) {
+#ifdef LARGE_GENOMES
+ *splice_positions_3 = Uint8list_push(*splice_positions_3,genomicpos);
+#else
+ *splice_positions_3 = Uintlist_push(*splice_positions_3,genomicpos);
+#endif
+ *spliceprobs3 = Doublelist_push(*spliceprobs3,donor_prob);
+ debug13(printf(" **"));
+ }
+ debug13(printf("\n"));
+ genomicpos++;
+ querypos++;
+ }
+ while (genomicpos <= end_genomicpos &&
+ genomicpos <= end_genomicpos + exondist - END_MIN_EXONLENGTH) {
+ donor_prob = Maxent_hr_antidonor_prob(chroffset+genomicpos,chroffset); /* Case 6 */
+ debug13(printf("3', watson, sense anti %d %u %u %f",querypos,chroffset+genomicpos,genomicpos,donor_prob));
+ if (donor_prob > 0.5) {
+#ifdef LARGE_GENOMES
+ *splice_positions_3 = Uint8list_push(*splice_positions_3,genomicpos);
+#else
+ *splice_positions_3 = Uintlist_push(*splice_positions_3,genomicpos);
+#endif
+ *spliceprobs3 = Doublelist_push(*spliceprobs3,donor_prob);
+ debug13(printf(" **"));
+ }
+ debug13(printf("\n"));
+ genomicpos++;
+ querypos++;
+ }
+ debug13(printf("\n"));
+
+ } else {
+ /* splicetype3 = splicetype3_mm = DONOR; */
+
+ start_genomicpos = (chrhigh - chroffset) - start + 1;
+ middle_genomicpos = (chrhigh - chroffset) - middle + 1;
+ end_genomicpos = (chrhigh - chroffset) - end + 1;
+
+ /* assert(start_genomicpos >= end_genomicpos); */
+ genomicpos = start_genomicpos;
+ while (genomicpos >= middle_genomicpos &&
+ genomicpos >= end_genomicpos - exondist + END_MIN_EXONLENGTH) {
+ donor_prob = Maxent_hr_donor_prob(chroffset+genomicpos,chroffset); /* Case 8 */
+ debug13(printf("3', crick, sense forward %d %u %u %f mm",querypos,chroffset+genomicpos,genomicpos,donor_prob));
+ if (donor_prob > 0.5) {
+#ifdef LARGE_GENOMES
+ *splice_positions_3 = Uint8list_push(*splice_positions_3,(chrhigh - chroffset) - genomicpos + 1);
+#else
+ *splice_positions_3 = Uintlist_push(*splice_positions_3,(chrhigh - chroffset) - genomicpos + 1);
+#endif
+ *spliceprobs3 = Doublelist_push(*spliceprobs3,donor_prob);
+ debug13(printf(" **"));
+ }
+ debug13(printf("\n"));
+ genomicpos--;
+ querypos++;
+ }
+ while (genomicpos >= end_genomicpos &&
+ genomicpos >= end_genomicpos - exondist + END_MIN_EXONLENGTH) {
+ donor_prob = Maxent_hr_donor_prob(chroffset+genomicpos,chroffset); /* Case 8 */
+ debug13(printf("3', crick, sense forward %d %u %u %f",querypos,chroffset+genomicpos,genomicpos,donor_prob));
+ if (donor_prob > 0.5) {
+#ifdef LARGE_GENOMES
+ *splice_positions_3 = Uint8list_push(*splice_positions_3,(chrhigh - chroffset) - genomicpos + 1);
+#else
+ *splice_positions_3 = Uintlist_push(*splice_positions_3,(chrhigh - chroffset) - genomicpos + 1);
+#endif
+ *spliceprobs3 = Doublelist_push(*spliceprobs3,donor_prob);
+ debug13(printf(" **"));
+ }
+ debug13(printf("\n"));
+ genomicpos--;
+ querypos++;
+ }
+ debug13(printf("\n"));
+ }
+
+ } else {
+ fprintf(stderr,"Not expecting cdna_direction to be 0\n");
+ abort();
+ }
+ }
+ }
+
+ return;
+}
- /* genomedp3 = genomedp5_distalgap + genomejump - 1; */
-#ifdef EXTRACT_GENOMICSEG
- /* Make sure we don't go past the end */
- if (genomedp3 > genomiclength - 1) {
- genomedp3 = genomiclength - 1;
- genomejump = genomedp3 - genomedp5_distalgap + 1;
- }
-#endif
- debug(printf("Stage 3 (dir %d), extend_ending3: Dynamic programming at 3' end (distal to gap): querydp5 = %d, querydp3 = %d, genomedp5 = %d\n",
- cdna_direction,querydp5_distalgap,querydp3,genomedp5_distalgap));
-
- if (endalign == QUERYEND_GAP && splicesites != NULL) {
- continuous_gappairs_distalgap = Dynprog_end3_known(&(*knownsplicep),&(*dynprogindex_minor),&(*finalscore),
- &(*ambig_end_length),&(*ambig_splicetype),
- &nmatches,&nmismatches,&nopens,&nindels,dynprog,
- &(queryseq_ptr[querydp5_distalgap]),&(queryuc_ptr[querydp5_distalgap]),
- queryjump,genomejump,querydp5_distalgap,genomedp5_distalgap,
- querylength,chroffset,chrhigh,knownsplice_limit_low,knownsplice_limit_high,
- cdna_direction,watsonp,jump_late_p,pairpool,
- extraband_end,defect_rate);
- if (*ambig_end_length > 0) {
- *ambig_prob = 2.0;
- }
- } else {
- continuous_gappairs_distalgap = Dynprog_end3_gap(&(*dynprogindex_minor),&(*finalscore),
- &nmatches,&nmismatches,&nopens,&nindels,dynprog,
- &(queryseq_ptr[querydp5_distalgap]),&(queryuc_ptr[querydp5_distalgap]),
- queryjump,genomejump,querydp5_distalgap,genomedp5_distalgap,
- chroffset,chrhigh,watsonp,jump_late_p,pairpool,
- extraband_end,defect_rate,endalign);
- *ambig_end_length = 0;
- *ambig_prob = 0.0;
- *knownsplicep = false;
- }
+/* Modified from make_microexon_pairs_double in dynprog_single.c */
+static List_T
+add_microexon_pairs (List_T pairs, int querydpM, Chrpos_T genomedpM, int lengthM,
+ char *queryseq_ptr, char *queryuc_ptr,
+ Univcoord_T chroffset, Univcoord_T chrhigh, bool watsonp,
+ Pairpool_T pairpool, int dynprogindex) {
+ Pair_T gappair;
+ char c1, c1_uc, c2, c2_alt;
+ int i;
- debug(printf(" finalscore: %d\n",*finalscore));
- if (continuous_gappairs_distalgap == NULL) {
- return (List_T) NULL;
- } else {
- continuous_gappairs_distalgap = List_reverse(continuous_gappairs_distalgap);
- firstpair = List_head(continuous_gappairs_distalgap);
- if (0 && firstpair->querypos != querydp5_distalgap) {
- /* Not a good test anymore, since we are halting peelbacks at gaps */
- /* Must have an indel between the gappairs and the rest of the read */
- debug(printf("Detected indel between gappairs %d and the rest of the read %d\n",
- firstpair->querypos,querydp5_distalgap));
- return (List_T) NULL;
-
- } else if (*finalscore < 0) {
- *knownsplicep = false;
-#if 0
- return (List_T) NULL;
+ /* Microexon */
+ for (i = 0; i < lengthM; i++) {
+ c1 = queryseq_ptr[querydpM+i];
+ c1_uc = queryuc_ptr[querydpM+i];
+
+ c2 = get_genomic_nt(&c2_alt,genomedpM+i,chroffset,chrhigh,watsonp);
+#ifdef EXTRACT_GENOMICSEG
+ assert(c2 == genomicseg[genomedpM+i]);
#endif
- return continuous_gappairs_distalgap;
+
+ if (c1_uc == c2 || c1_uc == c2_alt) {
+ pairs = Pairpool_push(pairs,pairpool,querydpM+i,genomedpM+i,c1,DYNPROG_MATCH_COMP,c2,c2_alt,
+ dynprogindex);
+ } else if (consistent_array[(int) c1_uc][(int) c2] == true || consistent_array[(int) c1_uc][(int) c2_alt] == true) {
+ pairs = Pairpool_push(pairs,pairpool,querydpM+i,genomedpM+i,c1,AMBIGUOUS_COMP,c2,c2_alt,
+ dynprogindex);
} else {
- return continuous_gappairs_distalgap;
+ pairs = Pairpool_push(pairs,pairpool,querydpM+i,genomedpM+i,c1,MISMATCH_COMP,c2,c2_alt,
+ dynprogindex);
}
}
-}
-
+ return pairs;
+}
static List_T
@@ -8497,18 +9390,259 @@ traverse_dual_break (List_T pairs, List_T *path, Pair_T leftpair, Pair_T rightpa
char *queryaaseq_ptr,
#endif
char *queryseq_ptr, char *queryuc_ptr, int querylength,
- bool watsonp, int genestrand, Pairpool_T pairpool, int maxpeelback,
+ int cdna_direction, bool watsonp, int genestrand, Pairpool_T pairpool, int maxpeelback,
Oligoindex_array_T oligoindices_minor,
- Diagpool_T diagpool, Cellpool_T cellpool) {
- List_T gappairs, peeled_pairs = NULL, peeled_path = NULL;
+ Diagpool_T diagpool, Cellpool_T cellpool, int *dynprogindex) {
+ List_T gappairs, peeled_pairs = NULL, peeled_path = NULL, q, r;
int querydp5, querydp3;
Chrpos_T genomedp5, genomedp3;
+ Univcoord_T best_splicepos5, best_splicepos3, best_splicepos5_with_canonical, best_splicepos3_with_canonical,
+ splicepos5, splicepos3;
bool protectedp;
int n_peeled_indels;
- Pair_T firstpair, lastpair;
+ Pair_T firstpair, lastpair, pair;
Chrpos_T chrstart, chrend;
+#ifdef LARGE_GENOMES
+ Uint8list_T splice_positions_5, splice_positions_3, a, b;
+#else
+ Uintlist_T splice_positions_5, splice_positions_3, a, b;
+#endif
+ Doublelist_T spliceprobs5, spliceprobs3, x, y;
+ Intlist_T hits = NULL, p;
+ int best_middlelength, best_middlelength_with_canonical, middlelength;
+ int best_candidate, best_candidate_with_canonical, candidate;
+ Chrpos_T splicesitepos;
+ double bestprob, bestprob_with_canonical, prob1, prob2, prob3, prob4;
+ char intron1, intron2, intron3, intron4;
+ char c1_alt, c2_alt, c3_alt, c4_alt;
+
+
debug14(printf("\nTRAVERSE_DUAL_BREAK\n"));
+
+ /* First, try to find a microexon */
+ find_dual_break_spliceends(*path,pairs,&spliceprobs5,&spliceprobs3,&splice_positions_5,&splice_positions_3,
+ cdna_direction,watsonp,chroffset,chrhigh);
+
+ if (cdna_direction > 0) {
+ intron1 = 'G'; intron2 = 'T'; intron3 = 'A'; intron4 = 'G';
+ } else if (cdna_direction < 0) {
+ intron1 = 'C'; intron2 = 'T'; intron3 = 'A'; intron4 = 'C';
+ }
+
+
+ bestprob = 0.0;
+ bestprob_with_canonical = 0.0;
+
+ for (a = splice_positions_5, x = spliceprobs5; a != NULL;
+#ifdef LARGE_GENOMES
+ a = Uint8list_next(a),
+#else
+ a = Uintlist_next(a),
+#endif
+ x = Doublelist_next(x)) {
+ prob1 = Doublelist_head(x);
+#ifdef LARGE_GENOMES
+ splicepos5 = Uint8list_head(a);
+#else
+ splicepos5 = Uintlist_head(a);
+#endif
+
+ q = *path;
+ while (q != NULL && ((Pair_T) q->first)->genomepos > splicepos5) {
+ q = List_next(q);
+ }
+ if (q == NULL) {
+ leftpair = (Pair_T) NULL;
+ } else {
+ leftpair = (Pair_T) q->first;
+ querydp5 = leftpair->querypos + 1;
+ genomedp5 = leftpair->genomepos + 1;
+ }
+
+ for (b = splice_positions_3, y = spliceprobs3; b != NULL;
+#ifdef LARGE_GENOMES
+ b = Uint8list_next(b),
+#else
+ b = Uintlist_next(b),
+#endif
+ y = Doublelist_next(y)) {
+ prob4 = Doublelist_head(y);
+#ifdef LARGE_GENOMES
+ splicepos3 = Uint8list_head(b);
+#else
+ splicepos3 = Uintlist_head(b);
+#endif
+
+ r = pairs;
+ while (r != NULL && ((Pair_T) r->first)->genomepos < splicepos3) {
+ r = List_next(r);
+ }
+ if (r == NULL) {
+ rightpair = (Pair_T) NULL;
+ } else {
+ rightpair = (Pair_T) r->first;
+ querydp3 = rightpair->querypos - 1;
+ genomedp3 = rightpair->genomepos - 1;
+ }
+
+ if (leftpair != NULL && rightpair != NULL &&
+ (middlelength = querydp3 - querydp5 + 1) > MIN_MICROEXON_LENGTH) {
+ debug(printf("middlelength %d = %d - %d + 1\n",middlelength,querydp3,querydp5));
+ hits = BoyerMoore_nt(&(queryuc_ptr[querydp5]),/*querylen*/middlelength,
+ /*textleft*/genomedp5,/*textlen*/genomedp3 - genomedp5 + 1,
+ chroffset,chrhigh,watsonp);
+ for (p = hits; p != NULL; p = Intlist_next(p)) {
+ candidate = genomedp5 + Intlist_head(p);
+
+ /* Not handling known splice sites yet */
+ if (watsonp == true) {
+ if (cdna_direction > 0) {
+ splicesitepos = chroffset + (candidate-1) + 1;
+ prob2 = Maxent_hr_acceptor_prob(splicesitepos,chroffset);
+ splicesitepos = chroffset + candidate+middlelength;
+ prob3 = Maxent_hr_donor_prob(splicesitepos,chroffset);
+ } else {
+ splicesitepos = chroffset + (candidate-1) + 1;
+ prob2 = Maxent_hr_antidonor_prob(splicesitepos,chroffset);
+ splicesitepos = chroffset + candidate+middlelength;
+ prob3 = Maxent_hr_antiacceptor_prob(splicesitepos,chroffset);
+ }
+ } else {
+ if (cdna_direction > 0) {
+ splicesitepos = chrhigh - (candidate-1);
+ prob2 = Maxent_hr_antiacceptor_prob(splicesitepos,chroffset);
+ splicesitepos = chrhigh - (candidate+middlelength) + 1;
+ prob3 = Maxent_hr_antidonor_prob(splicesitepos,chroffset);
+ } else {
+ splicesitepos = chrhigh - (candidate-1);
+ prob2 = Maxent_hr_donor_prob(splicesitepos,chroffset);
+ splicesitepos = chrhigh - (candidate+middlelength) + 1;
+ prob3 = Maxent_hr_acceptor_prob(splicesitepos,chroffset);
+ }
+ }
+
+ debug13(printf("end probabilities: prob1 = %f, prob4 = %f, microexon probabilities: prob2 = %f, prob3 = %f\n",prob1,prob4,prob2,prob3));
+ if (prob1 + prob2 + prob3 + prob4 > bestprob) {
+ best_splicepos5 = splicepos5;
+ best_candidate = candidate;
+ best_middlelength = middlelength;
+ best_splicepos3 = splicepos3;
+ bestprob = prob1 + prob2 + prob3 + prob4;
+ }
+
+ debug(printf("candidate: at %u\n",candidate));
+ debug(printf("intron3 %c\n",get_genomic_nt(&c3_alt,candidate-2,chroffset,chrhigh,watsonp)));
+ debug(printf("intron4 %c\n",get_genomic_nt(&c4_alt,candidate-1,chroffset,chrhigh,watsonp)));
+ debug(printf("intron1 %c\n",get_genomic_nt(&c1_alt,candidate+middlelength,chroffset,chrhigh,watsonp)));
+ debug(printf("intron2 %c\n",get_genomic_nt(&c2_alt,candidate+middlelength+1,chroffset,chrhigh,watsonp)));
+
+ if (/*genomicuc[candidate - 2]*/ get_genomic_nt(&c3_alt,candidate-2,chroffset,chrhigh,watsonp) == intron3 &&
+ /*genomicuc[candidate - 1]*/ get_genomic_nt(&c4_alt,candidate-1,chroffset,chrhigh,watsonp) == intron4 &&
+ /*genomicuc[candidate + middlelength]*/ get_genomic_nt(&c1_alt,candidate+middlelength,chroffset,chrhigh,watsonp) == intron1 &&
+ /*genomicuc[candidate + middlelength + 1]*/ get_genomic_nt(&c2_alt,candidate+middlelength+1,chroffset,chrhigh,watsonp) == intron2) {
+ debug(printf(" Canonical microexon at %d >>> %d..%d >>> %d\n",genomedp5,candidate,candidate+middlelength,genomedp3));
+ if (prob1 + prob2 + prob3 + prob4 > bestprob_with_canonical) {
+ best_splicepos5_with_canonical = splicepos5;
+ best_candidate_with_canonical = candidate;
+ best_middlelength_with_canonical = middlelength;
+ best_splicepos3_with_canonical = splicepos3;
+ bestprob_with_canonical = prob1 + prob2 + prob3 + prob4;
+ }
+ }
+ }
+
+ Intlist_free(&hits);
+ }
+ }
+ }
+
+ debug13(printf("best prob is %f\n",bestprob));
+ if (bestprob > 3.0) {
+ while ((*path) != NULL && ((Pair_T) (*path)->first)->genomepos > best_splicepos5) {
+ *path = List_next(*path);
+ }
+ while (pairs != NULL && ((Pair_T) pairs->first)->genomepos < best_splicepos3) {
+ pairs = List_next(pairs);
+ }
+ leftpair = (Pair_T) (*path)->first;
+ querydp5 = leftpair->querypos + 1;
+
+ debug13(printf("Making microexon pairs with splicepos5 %u, candidate %u, middlelength %d, splicepos3 %u\n",
+ best_splicepos5,best_candidate,best_middlelength,best_splicepos3));
+ *path = add_microexon_pairs(*path,/*querydpM*/querydp5,/*genomedpM*/best_candidate,
+ /*lengthM*/best_middlelength,queryseq_ptr,queryuc_ptr,
+ chroffset,chrhigh,watsonp,pairpool,*dynprogindex);
+ *dynprogindex += (*dynprogindex > 0 ? +1 : -1);
+
+#ifdef LARGE_GENOMES
+ Uint8list_free(&splice_positions_5);
+ Uint8list_free(&splice_positions_3);
+#else
+ Uintlist_free(&splice_positions_5);
+ Uintlist_free(&splice_positions_3);
+#endif
+ Doublelist_free(&spliceprobs3);
+ Doublelist_free(&spliceprobs5);
+
+ return pairs;
+
+ } else if (bestprob_with_canonical > 0.0) {
+ while ((*path) != NULL && ((Pair_T) (*path)->first)->genomepos > best_splicepos5_with_canonical) {
+ *path = List_next(*path);
+ }
+ while (pairs != NULL && ((Pair_T) pairs->first)->genomepos < best_splicepos3_with_canonical) {
+ pairs = List_next(pairs);
+ }
+ leftpair = (Pair_T) (*path)->first;
+ querydp5 = leftpair->querypos + 1;
+
+ debug13(printf("Making microexon pairs with splicepos5 %u, candidate %u, middlelength %d, splicepos3 %u\n",
+ best_splicepos5_with_canonical,best_candidate_with_canonical,best_middlelength_with_canonical,best_splicepos3_with_canonical));
+ *path = add_microexon_pairs(*path,/*querydpM*/querydp5,/*genomedpM*/best_candidate_with_canonical,
+ /*lengthM*/best_middlelength_with_canonical,queryseq_ptr,queryuc_ptr,
+ chroffset,chrhigh,watsonp,pairpool,*dynprogindex);
+ *dynprogindex += (*dynprogindex > 0 ? +1 : -1);
+
+#ifdef LARGE_GENOMES
+ Uint8list_free(&splice_positions_5);
+ Uint8list_free(&splice_positions_3);
+#else
+ Uintlist_free(&splice_positions_5);
+ Uintlist_free(&splice_positions_3);
+#endif
+ Doublelist_free(&spliceprobs3);
+ Doublelist_free(&spliceprobs5);
+
+ return pairs;
+
+ } else {
+#ifdef LARGE_GENOMES
+ Uint8list_free(&splice_positions_5);
+ Uint8list_free(&splice_positions_3);
+#else
+ Uintlist_free(&splice_positions_5);
+ Uintlist_free(&splice_positions_3);
+#endif
+ Doublelist_free(&spliceprobs3);
+ Doublelist_free(&spliceprobs5);
+ }
+
+
+
+ /* Try to solve without a microexon */
+ if (*path == NULL) {
+ leftpair = (Pair_T) NULL;
+ } else {
+ leftpair = (Pair_T) (*path)->first;
+ }
+
+ if (pairs == NULL) {
+ rightpair = (Pair_T) NULL;
+ } else {
+ rightpair = (Pair_T) pairs->first;
+ }
+
if (leftpair != NULL && rightpair != NULL) {
querydp5 = leftpair->querypos + 1;
genomedp5 = leftpair->genomepos + 1;
@@ -8635,40 +9769,24 @@ build_dual_breaks (bool *dual_break_p, int *dynprogindex_minor, int *dynproginde
Pair_T pair, leftpair, rightpair;
bool filledp, shiftp;
+ debug(printf("Entered build_dual_breaks\n"));
*dual_break_p = false;
-
debug(Pair_dump_list(path,true));
-#if 0
- if (path != NULL && ((Pair_T) path->first)->querypos < querylength - 50) {
- /* Solve end as a dual break */
- debug(printf("Observed a dual break at the end of the alignment, querypos %d vs querylength %d\n",
- ((Pair_T) path->first)->querypos,querylength));
- *dual_break_p = true;
- pairs = traverse_dual_break(/*pairs*/NULL,&path,/*leftpair*/path->first,/*rightpair*/NULL,chroffset,chrhigh,
-#ifdef PMAP
- queryaaseq_ptr,
-#endif
- queryseq_ptr,queryuc_ptr,querylength,watsonp,genestrand,
- pairpool,maxpeelback,oligoindices_minor,
- diagpool,cellpool);
- }
-#endif
-
-
while (path != NULL) {
/* pairptr = path; */
/* path = Pairpool_pop(path,&pair); */
pair = (Pair_T) path->first;
- if (pair->gapp == false || pair->comp != DUALBREAK_COMP) {
+ /* Cannot rely on previous procedures to assign pair->comp value */
+ if (pair->gapp == false /*|| pair->comp != DUALBREAK_COMP*/) {
#ifdef WASTE
pairs = Pairpool_push_existing(pairs,pairpool,pair);
#else
pairs = List_transfer_one(pairs,&path);
#endif
} else if (path->rest == NULL || pairs == NULL) {
- debug(printf("Observed a dual break at the end of the alignment, case 1\n"));
+ debug(printf("Observed a gap at the end of the alignment, case 1\n"));
path = Pairpool_pop(path,&pair);
} else {
@@ -8678,9 +9796,9 @@ build_dual_breaks (bool *dual_break_p, int *dynprogindex_minor, int *dynproginde
leftpair = path->first;
rightpair = pairs->first;
if (leftpair->querypos < 0 || rightpair->querypos < 0) {
- debug(printf("Observed a dual break at the end of the alignment, case 2\n"));
+ debug(printf("Observed a gap at the end of the alignment, case 2\n"));
} else {
- debug(printf("Observed a dual break at %d..%d with queryjump = %d, genomejump = %d\n",
+ debug(printf("Observed a gap at %d..%d with queryjump = %d, genomejump = %d\n",
leftpair->querypos,rightpair->querypos,pair->queryjump,pair->genomejump));
if (0 && finalp == true) {
@@ -8702,7 +9820,7 @@ build_dual_breaks (bool *dual_break_p, int *dynprogindex_minor, int *dynproginde
maxpeelback,defect_rate,/*forcep*/true,/*finalp*/false);
} else if (pair->queryjump < MIN_STAGE2_FOR_DUALBREAK) {
- debug(printf(" Can be solved as a genome gap\n"));
+ debug(printf(" Too small for a dual break\n"));
pairs = traverse_genome_gap(&filledp,&shiftp,&(*dynprogindex_minor),&(*dynprogindex_major),
pairs,&path,leftpair,rightpair,chrnum,chroffset,chrhigh,
queryseq_ptr,queryuc_ptr,querylength,
@@ -8717,30 +9835,14 @@ build_dual_breaks (bool *dual_break_p, int *dynprogindex_minor, int *dynproginde
#ifdef PMAP
queryaaseq_ptr,
#endif
- queryseq_ptr,queryuc_ptr,querylength,watsonp,genestrand,
+ queryseq_ptr,queryuc_ptr,querylength,cdna_direction,watsonp,genestrand,
pairpool,maxpeelback,oligoindices_minor,
- diagpool,cellpool);
+ diagpool,cellpool,&(*dynprogindex_major));
}
}
}
}
-#if 0
- if (pairs != NULL && ((Pair_T) pairs->first)->querypos > 50) {
- /* Solve beginning as a dual break */
- debug(printf("Observed a dual break at the beginning of the alignment, querypos %d\n",
- ((Pair_T) pairs->first)->querypos));
- *dual_break_p = true;
- pairs = traverse_dual_break(pairs,&path,/*leftpair*/NULL,/*rightpair*/pairs->first,chroffset,chrhigh,
-#ifdef PMAP
- queryaaseq_ptr,
-#endif
- queryseq_ptr,queryuc_ptr,querylength,watsonp,genestrand,
- pairpool,maxpeelback,oligoindices_minor,
- diagpool,cellpool);
- }
-#endif
-
debug(printf("After build_dual_breaks:\n"));
debug(Pair_dump_list(pairs,true));
@@ -9302,7 +10404,7 @@ build_pairs_introns (bool *shiftp, bool *incompletep,
}
} else {
- /* Solve as dual break */
+ /* Solve as dual break. Should have already been done by build_dual_breaks */
/* pairptr = path; */ /* save */
/* path = Pairpool_pop(path,&pair); */
leftpair = path->first;
@@ -9311,9 +10413,9 @@ build_pairs_introns (bool *shiftp, bool *incompletep,
#ifdef PMAP
queryaaseq_ptr,
#endif
- queryseq_ptr,queryuc_ptr,querylength,watsonp,genestrand,
+ queryseq_ptr,queryuc_ptr,querylength,cdna_direction,watsonp,genestrand,
pairpool,maxpeelback,oligoindices_minor,
- diagpool,cellpool);
+ diagpool,cellpool,&(*dynprogindex_major));
}
} else if (finalp == false && pair->queryjump > pair->genomejump + EXTRAQUERYGAP) {
@@ -9436,6 +10538,7 @@ build_pairs_introns (bool *shiftp, bool *incompletep,
pairs = List_transfer_one(pairs,&path);
}
+ debug(printf("\n** Finishing build_pairs_introns\n"));
return pairs;
}
@@ -10824,16 +11927,74 @@ path_compute_dir (double *defect_rate, List_T pairs,
}
debug(printf("defect_rate = %f (%d matches, %d mismatches)\n",*defect_rate,matches,mismatches));
- /* Pass 3: introns */
+
+ /* Pass 3: Smoothing */
+ debug(printf("*** Pass 3 (dir %d): Smooth\n",cdna_direction));
+
+ /* Smoothing by probability */
+ path = insert_gapholders(pairs,queryseq_ptr,queryuc_ptr,chroffset,chrhigh,watsonp,pairpool);
+ pairs = assign_intron_probs(path,cdna_direction,watsonp,chrnum,chroffset,chrhigh,pairpool);
+ Smooth_reset(pairs);
+ pairs = Smooth_pairs_by_intronprobs(&badp,pairs,pairpool);
+
+#if 0
+ /* Smoothing by netgap. Can crash or stall, and generally doesn't do anything except for very low-identity alignments. */
+ debug(printf("\n*** Pass 1 (dir %d): Initial smoothing by net gap. Iteration1 %d\n",
+ cdna_direction,iter1));
+ pairs = Smooth_pairs_by_netgap(&smoothp,pairs,pairpool);
+#endif
+
+ /* Smoothing by size: This can undo the short exons found by traverse_dual_genome, so we use protectedp in traverse_dual_genome */
+ debug(printf("*** Pass 3a (dir %d): Smoothing by size. Iteration0 %d, iteration1 %d\n",
+ cdna_direction,iter0,iter1));
+ path = List_reverse(pairs);
+ pairs = remove_indel_gaps(path);
+ pairs = Smooth_pairs_by_size(&shortp,&deletep,pairs,pairpool,/*stage2_indexsize*/6);
+ debug(printf(" => Result of Pass 3a (smoothing): shortp is %d, deletep is %d\n",shortp,deletep));
+ debug(Pair_dump_list(pairs,/*zerobasedp*/true));
+
+#ifdef DEBUG8
+ if (stage3debug == POST_SMOOTHING) {
+ path = List_reverse(pairs);
+ return path;
+ }
+#endif
+
+
+ /* Pass 4: Fix dual breaks */
+ debug(printf("\n*** Pass 4 (dir %d): Fix dual breaks. Iteration0 %d\n",cdna_direction,iter0));
+ /* pairs = remove_indel_gaps(path); */
+
+ path = List_reverse(pairs);
+ pairs = build_dual_breaks(&dual_break_p,&dynprogindex_minor,&dynprogindex_major,path,
+ chrnum,chroffset,chrhigh,
+#ifdef PMAP
+ queryaaseq_ptr,
+#endif
+ queryseq_ptr,queryuc_ptr,querylength,
+ cdna_direction,watsonp,genestrand,jump_late_p,pairpool,
+ dynprogL,dynprogM,dynprogR,last_genomedp5,last_genomedp3,maxpeelback,
+ oligoindices_minor,diagpool,cellpool,
+ *defect_rate,/*finalp*/true,/*simplep*/true);
+
+#ifdef DEBUG8
+ if (stage3debug == POST_DUAL_BREAKS) {
+ path = List_reverse(pairs);
+ return path;
+ }
+#endif
+
+
+ /* Pass 5: introns */
/* >>pairs */
- debug(printf("\n*** Pass 3 (dir %d): Smooth and solve dual introns iteratively. Iteration0 %d\n",
+ debug(printf("\n*** Pass 5 (dir %d): Smooth and solve dual introns iteratively. Iteration0 %d\n",
cdna_direction,iter0));
iter1 = 0;
shortp = true;
deletep = badp = false;
while ((shortp == true || deletep == true || badp == true) && iter1 < MAXITER_SMOOTH_BY_SIZE) {
/* Pass 3c: single introns */
- debug(printf("*** Pass 3c (dir %d): Solve introns. Iteration0 %d, iteration1 %d\n",
+ debug(printf("*** Pass 5 (dir %d): Solve introns. Iteration0 %d, iteration1 %d\n",
cdna_direction,iter0,iter1));
iter2 = 0;
@@ -10851,7 +12012,7 @@ path_compute_dir (double *defect_rate, List_T pairs,
maxpeelback,*defect_rate,pairpool,dynprogL,dynprogM,dynprogR,
oligoindices_minor,diagpool,cellpool,
last_genomedp5,last_genomedp3,/*finalp*/false,/*simplep*/true);
- debug(printf(" => Result of Pass 3c (introns):\n"));
+ debug(printf(" => Result of Pass 5 (introns):\n"));
debug(Pair_dump_list(pairs,/*zerobasedp*/true));
}
@@ -10880,36 +12041,7 @@ path_compute_dir (double *defect_rate, List_T pairs,
#endif
- /* Smoothing by probability */
- path = insert_gapholders(pairs,queryseq_ptr,queryuc_ptr,chroffset,chrhigh,watsonp,pairpool);
- pairs = assign_intron_probs(path,cdna_direction,watsonp,chrnum,chroffset,chrhigh,pairpool);
- Smooth_reset(pairs);
- pairs = Smooth_pairs_by_intronprobs(&badp,pairs,pairpool);
-
-#if 0
- /* Smoothing by netgap. Can crash or stall, and generally doesn't do anything except for very low-identity alignments. */
- debug(printf("\n*** Pass 1 (dir %d): Initial smoothing by net gap. Iteration1 %d\n",
- cdna_direction,iter1));
- pairs = Smooth_pairs_by_netgap(&smoothp,pairs,pairpool);
-#endif
-
- /* Smoothing by size: This can undo the short exons found by traverse_dual_genome, so we use protectedp in traverse_dual_genome */
- debug(printf("*** Pass 3a (dir %d): Smoothing by size. Iteration0 %d, iteration1 %d\n",
- cdna_direction,iter0,iter1));
- path = List_reverse(pairs);
- pairs = remove_indel_gaps(path);
- pairs = Smooth_pairs_by_size(&shortp,&deletep,pairs,pairpool,/*stage2_indexsize*/6);
- debug(printf(" => Result of Pass 3a (smoothing): shortp is %d, deletep is %d\n",shortp,deletep));
- debug(Pair_dump_list(pairs,/*zerobasedp*/true));
-
-#ifdef DEBUG8
- if (stage3debug == POST_SMOOTHING) {
- path = List_reverse(pairs);
- return path;
- }
-#endif
-
- debug(printf("*** Pass 3b (dir %d): Solve dual introns. Iteration0 %d, Iteration1 %d\n",
+ debug(printf("*** Pass 6 (dir %d): Solve dual introns. Iteration0 %d, Iteration1 %d\n",
cdna_direction,iter0,iter1));
if (badp == false && shortp == false && deletep == false) {
debug(printf(" no shortp or deletep, so do nothing\n"));
@@ -10948,54 +12080,15 @@ path_compute_dir (double *defect_rate, List_T pairs,
}
#endif
-#if 0
- if (maximize_coverage_p == true) {
- /* Don't trim ends */
- } else {
- /* Pass 3b: trim end exons: pairs -> pairs */
- debug(printf("\n*** Pass 3b (dir %d): Trim end exons\n",cdna_direction));
-#ifdef WASTE
- pairs = chop_ends_by_changepoint(pairs,pairpool);
-#else
- pairs = chop_ends_by_changepoint(pairs);
-#endif
- debug(Pair_dump_list(pairs,/*zerobasedp*/true));
- }
-#endif
-
path = insert_gapholders(pairs,queryseq_ptr,queryuc_ptr,chroffset,chrhigh,watsonp,pairpool);
debug(Pair_dump_list(path,/*zerobasedp*/true));
- /* Pass 5: Fix dual breaks */
- debug(printf("\n*** Pass 5 (dir %d): Fix dual breaks. Iteration0 %d\n",cdna_direction,iter0));
- pairs = remove_indel_gaps(path);
- path = List_reverse(pairs);
-
- pairs = build_dual_breaks(&dual_break_p,&dynprogindex_minor,&dynprogindex_major,path,
- chrnum,chroffset,chrhigh,
-#ifdef PMAP
- queryaaseq_ptr,
-#endif
- queryseq_ptr,queryuc_ptr,querylength,
- cdna_direction,watsonp,genestrand,jump_late_p,pairpool,
- dynprogL,dynprogM,dynprogR,last_genomedp5,last_genomedp3,maxpeelback,
- oligoindices_minor,diagpool,cellpool,
- *defect_rate,/*finalp*/false,/*simplep*/true);
- /* Must end with path to start loop */
- path = insert_gapholders(pairs,queryseq_ptr,queryuc_ptr,chroffset,chrhigh,watsonp,pairpool);
pairs = List_reverse(path);
debug14(printf("Result of build_dual_breaks\n"));
debug14(Pair_dump_list(pairs,true));
debug(printf("Result of build_dual_breaks\n"));
debug(Pair_dump_list(pairs,true));
-#ifdef DEBUG8
- if (stage3debug == POST_DUAL_BREAKS) {
- path = List_reverse(pairs);
- return path;
- }
-#endif
-
#ifdef GSNAP
/* Too expensive to loop */
dual_break_p = false;
@@ -11355,12 +12448,13 @@ path_compute_final (double defect_rate, List_T pairs, int cdna_direction, bool w
#ifdef GSNAP
+/* I believe this function never gets called with SENSE_NULL */
static List_T
-trim_novel_spliceends (List_T pairs,
+trim_novel_spliceends (int *new_sensedir, List_T pairs,
int *ambig_end_length_5, int *ambig_end_length_3,
Splicetype_T *ambig_splicetype_5, Splicetype_T *ambig_splicetype_3,
double *ambig_prob_5, double *ambig_prob_3,
- int *sensedir, bool watsonp, int querylength,
+ int orig_sensedir, bool watsonp, int querylength,
Univcoord_T chroffset, Univcoord_T chrhigh,
bool knownsplice5p, bool knownsplice3p) {
List_T path, p;
@@ -11379,13 +12473,16 @@ trim_novel_spliceends (List_T pairs,
max_prob_sense_forward_5_mm = 0.0, max_prob_sense_anti_5_mm = 0.0,
max_prob_sense_forward_3_mm = 0.0, max_prob_sense_anti_3_mm = 0.0;
Splicetype_T splicetype5, splicetype3, splicetype5_mm, splicetype3_mm;
- int splice_cdna_direction_5, splice_sensedir_5, splice_cdna_direction_3, splice_sensedir_3;
- int splice_cdna_direction_5_mm, splice_sensedir_5_mm, splice_cdna_direction_3_mm, splice_sensedir_3_mm;
+ /* int splice_cdna_direction_5, splice_cdna_direction_3; */
+ int splice_sensedir_5, splice_sensedir_3;
+ /* int splice_cdna_direction_5_mm, splice_cdna_direction_3_mm; */
+ int splice_sensedir_5_mm, splice_sensedir_3_mm;
bool mismatchp;
- debug13(printf("\nEntered gmap_trim_novel_spliceends with sensedir %d, ambig_end_lengths %d and %d\n",
- *sensedir,*ambig_end_length_5,*ambig_end_length_3));
+ debug13(printf("\nEntered gmap_trim_novel_spliceends with orig_sensedir %d, ambig_end_lengths %d and %d\n",
+ orig_sensedir,*ambig_end_length_5,*ambig_end_length_3));
+ *new_sensedir = SENSE_NULL;
Pair_trim_distances(&trim5,&trim3,pairs);
debug13(printf("Trim distances are %d and %d\n",trim5,trim3));
@@ -11451,7 +12548,7 @@ trim_novel_spliceends (List_T pairs,
/* Note: pairs may not extend all the way to the end, which is why we look at end pair to initialize mismatchp */
debug13(printf("Allowing perfect overhang into potential intron\n"));
- } else if (*sensedir == SENSE_FORWARD) {
+ } else if (orig_sensedir == SENSE_FORWARD) {
if (watsonp) {
splicetype3 = splicetype3_mm = DONOR;
@@ -11515,7 +12612,7 @@ trim_novel_spliceends (List_T pairs,
debug13(printf("\n"));
}
- } else if (*sensedir == SENSE_ANTI) {
+ } else if (orig_sensedir == SENSE_ANTI) {
if (watsonp) {
splicetype3 = splicetype3_mm = ANTIACCEPTOR;
@@ -11597,7 +12694,7 @@ trim_novel_spliceends (List_T pairs,
if (donor_prob > max_prob_3_mm) {
max_prob_3_mm = donor_prob;
splice_genomepos_3_mm = genomicpos - 1;
- splice_cdna_direction_3_mm = +1;
+ /* splice_cdna_direction_3_mm = +1; */
splice_sensedir_3_mm = SENSE_FORWARD;
splicetype3_mm = DONOR;
}
@@ -11607,7 +12704,7 @@ trim_novel_spliceends (List_T pairs,
if (acceptor_prob > max_prob_3_mm) {
max_prob_3_mm = acceptor_prob;
splice_genomepos_3_mm = genomicpos - 1;
- splice_cdna_direction_3_mm = -1;
+ /* splice_cdna_direction_3_mm = -1; */
splice_sensedir_3_mm = SENSE_ANTI;
splicetype3_mm = ANTIACCEPTOR;
}
@@ -11624,7 +12721,7 @@ trim_novel_spliceends (List_T pairs,
if (donor_prob > max_prob_3) {
max_prob_3 = donor_prob;
splice_genomepos_3 = genomicpos - 1;
- splice_cdna_direction_3 = +1;
+ /* splice_cdna_direction_3 = +1; */
splice_sensedir_3 = SENSE_FORWARD;
splicetype3 = DONOR;
}
@@ -11634,7 +12731,7 @@ trim_novel_spliceends (List_T pairs,
if (acceptor_prob > max_prob_3) {
max_prob_3 = acceptor_prob;
splice_genomepos_3 = genomicpos - 1;
- splice_cdna_direction_3 = -1;
+ /* splice_cdna_direction_3 = -1; */
splice_sensedir_3 = SENSE_ANTI;
splicetype3 = ANTIACCEPTOR;
}
@@ -11660,7 +12757,7 @@ trim_novel_spliceends (List_T pairs,
if (donor_prob > max_prob_3_mm) {
max_prob_3_mm = donor_prob;
splice_genomepos_3_mm = (chrhigh - chroffset) - genomicpos;
- splice_cdna_direction_3_mm = +1;
+ /* splice_cdna_direction_3_mm = +1; */
splice_sensedir_3_mm = SENSE_FORWARD;
splicetype3_mm = ANTIDONOR;
}
@@ -11670,7 +12767,7 @@ trim_novel_spliceends (List_T pairs,
if (acceptor_prob > max_prob_3_mm) {
max_prob_3_mm = acceptor_prob;
splice_genomepos_3_mm = (chrhigh - chroffset) - genomicpos;
- splice_cdna_direction_3_mm = -1;
+ /* splice_cdna_direction_3_mm = -1; */
splice_sensedir_3_mm = SENSE_ANTI;
splicetype3_mm = ACCEPTOR;
}
@@ -11687,7 +12784,7 @@ trim_novel_spliceends (List_T pairs,
if (donor_prob > max_prob_3) {
max_prob_3 = donor_prob;
splice_genomepos_3 = (chrhigh - chroffset) - genomicpos;
- splice_cdna_direction_3 = +1;
+ /* splice_cdna_direction_3 = +1; */
splice_sensedir_3 = SENSE_FORWARD;
splicetype3 = ANTIDONOR;
}
@@ -11697,7 +12794,7 @@ trim_novel_spliceends (List_T pairs,
if (acceptor_prob > max_prob_3) {
max_prob_3 = acceptor_prob;
splice_genomepos_3 = (chrhigh - chroffset) - genomicpos;
- splice_cdna_direction_3 = -1;
+ /* splice_cdna_direction_3 = -1; */
splice_sensedir_3 = SENSE_ANTI;
splicetype3 = ACCEPTOR;
}
@@ -11708,7 +12805,7 @@ trim_novel_spliceends (List_T pairs,
}
}
- if (*sensedir != SENSE_NULL) {
+ if (orig_sensedir != SENSE_NULL) {
if (max_prob_3 > END_SPLICESITE_PROB_MATCH) {
debug13(printf("Found good splice %s on 3' end at %u with probability %f\n",
Splicetype_string(splicetype3),splice_genomepos_3,max_prob_3));
@@ -11801,7 +12898,7 @@ trim_novel_spliceends (List_T pairs,
/* Note: pairs may not extend all the way to the end, which is why we look at end pair to initialize mismatchp */
debug13(printf("Allowing perfect overhang into potential intron\n"));
- } else if (*sensedir == SENSE_FORWARD) {
+ } else if (orig_sensedir == SENSE_FORWARD) {
if (watsonp) {
splicetype5 = splicetype5_mm = ACCEPTOR;
@@ -11865,7 +12962,7 @@ trim_novel_spliceends (List_T pairs,
debug13(printf("\n"));
}
- } else if (*sensedir == SENSE_ANTI) {
+ } else if (orig_sensedir == SENSE_ANTI) {
if (watsonp) {
splicetype5 = splicetype5_mm = ANTIDONOR;
@@ -11947,7 +13044,7 @@ trim_novel_spliceends (List_T pairs,
if (acceptor_prob > max_prob_5_mm) {
max_prob_5_mm = acceptor_prob;
splice_genomepos_5_mm = genomicpos;
- splice_cdna_direction_5_mm = +1;
+ /* splice_cdna_direction_5_mm = +1; */
splice_sensedir_5_mm = SENSE_FORWARD;
splicetype5_mm = ACCEPTOR;
}
@@ -11957,7 +13054,7 @@ trim_novel_spliceends (List_T pairs,
if (donor_prob > max_prob_5_mm) {
max_prob_5_mm = donor_prob;
splice_genomepos_5_mm = genomicpos;
- splice_cdna_direction_5_mm = -1;
+ /* splice_cdna_direction_5_mm = -1; */
splice_sensedir_5_mm = SENSE_ANTI;
splicetype5_mm = ANTIDONOR;
}
@@ -11974,7 +13071,7 @@ trim_novel_spliceends (List_T pairs,
if (acceptor_prob > max_prob_5) {
max_prob_5 = acceptor_prob;
splice_genomepos_5 = genomicpos;
- splice_cdna_direction_5 = +1;
+ /* splice_cdna_direction_5 = +1; */
splice_sensedir_5 = SENSE_FORWARD;
splicetype5 = ACCEPTOR;
}
@@ -11984,7 +13081,7 @@ trim_novel_spliceends (List_T pairs,
if (donor_prob > max_prob_5) {
max_prob_5 = donor_prob;
splice_genomepos_5 = genomicpos;
- splice_cdna_direction_5 = -1;
+ /* splice_cdna_direction_5 = -1; */
splice_sensedir_5 = SENSE_ANTI;
splicetype5 = ANTIDONOR;
}
@@ -12010,7 +13107,7 @@ trim_novel_spliceends (List_T pairs,
if (acceptor_prob > max_prob_5_mm) {
max_prob_5_mm = acceptor_prob;
splice_genomepos_5_mm = (chrhigh - chroffset) - genomicpos + 1;
- splice_cdna_direction_5_mm = +1;
+ /* splice_cdna_direction_5_mm = +1; */
splice_sensedir_5_mm = SENSE_FORWARD;
splicetype5_mm = ANTIACCEPTOR;
}
@@ -12020,7 +13117,7 @@ trim_novel_spliceends (List_T pairs,
if (donor_prob > max_prob_5_mm) {
max_prob_5_mm = donor_prob;
splice_genomepos_5_mm = (chrhigh - chroffset) - genomicpos + 1;
- splice_cdna_direction_5_mm = -1;
+ /* splice_cdna_direction_5_mm = -1; */
splice_sensedir_5_mm = SENSE_ANTI;
splicetype5_mm = DONOR;
}
@@ -12037,7 +13134,7 @@ trim_novel_spliceends (List_T pairs,
if (acceptor_prob > max_prob_5) {
max_prob_5 = acceptor_prob;
splice_genomepos_5 = (chrhigh - chroffset) - genomicpos + 1;
- splice_cdna_direction_5 = +1;
+ /* splice_cdna_direction_5 = +1; */
splice_sensedir_5 = SENSE_FORWARD;
splicetype5 = ANTIACCEPTOR;
}
@@ -12047,7 +13144,7 @@ trim_novel_spliceends (List_T pairs,
if (donor_prob > max_prob_5) {
max_prob_5 = donor_prob;
splice_genomepos_5 = (chrhigh - chroffset) - genomicpos + 1;
- splice_cdna_direction_5 = -1;
+ /* splice_cdna_direction_5 = -1; */
splice_sensedir_5 = SENSE_ANTI;
splicetype5 = DONOR;
}
@@ -12058,7 +13155,7 @@ trim_novel_spliceends (List_T pairs,
}
}
- if (*sensedir != SENSE_NULL) {
+ if (orig_sensedir != SENSE_NULL) {
if (max_prob_5 > END_SPLICESITE_PROB_MATCH) {
debug13(printf("Found good splice %s on 5' end at %u with probability %f\n",
Splicetype_string(splicetype5),splice_genomepos_5,max_prob_5));
@@ -12089,7 +13186,7 @@ trim_novel_spliceends (List_T pairs,
}
}
- if (*sensedir == SENSE_NULL) {
+ if (orig_sensedir == SENSE_NULL) {
if (max_prob_3 >= END_SPLICESITE_PROB_MATCH || max_prob_5 >= END_SPLICESITE_PROB_MATCH) {
if (max_prob_3 >= END_SPLICESITE_PROB_MATCH && max_prob_5 >= END_SPLICESITE_PROB_MATCH
&& max_prob_sense_forward_3 >= END_SPLICESITE_PROB_MATCH && max_prob_sense_anti_3 < END_SPLICESITE_PROB_MATCH
@@ -12124,7 +13221,7 @@ trim_novel_spliceends (List_T pairs,
/* *cdna_direction = splice_cdna_direction_3; */
debug13(printf("Set ambig_end_length_3 to be %d\n",*ambig_end_length_3));
}
- *sensedir = SENSE_FORWARD;
+ *new_sensedir = SENSE_FORWARD;
pairs = List_reverse(path);
} else if (max_prob_3 >= END_SPLICESITE_PROB_MATCH && max_prob_5 >= END_SPLICESITE_PROB_MATCH
@@ -12160,7 +13257,7 @@ trim_novel_spliceends (List_T pairs,
/* *cdna_direction = splice_cdna_direction_3; */
debug13(printf("Set ambig_end_length_3 to be %d\n",*ambig_end_length_3));
}
- *sensedir = SENSE_ANTI;
+ *new_sensedir = SENSE_ANTI;
pairs = List_reverse(path);
} else if (max_prob_3 > max_prob_5) {
@@ -12180,10 +13277,10 @@ trim_novel_spliceends (List_T pairs,
debug13(printf("Set ambig_end_length_3 to be %d\n",*ambig_end_length_3));
if (max_prob_sense_forward_3 >= END_SPLICESITE_PROB_MATCH && max_prob_sense_anti_3 < END_SPLICESITE_PROB_MATCH
&& max_prob_sense_anti_5 < END_SPLICESITE_PROB_MATCH) {
- *sensedir = splice_sensedir_3;
+ *new_sensedir = splice_sensedir_3;
} else if (max_prob_sense_anti_3 >= END_SPLICESITE_PROB_MATCH && max_prob_sense_forward_3 < END_SPLICESITE_PROB_MATCH
&& max_prob_sense_forward_5 < END_SPLICESITE_PROB_MATCH) {
- *sensedir = splice_sensedir_3;
+ *new_sensedir = splice_sensedir_3;
} else {
/* Not enough evidence to set sensedir */
}
@@ -12206,10 +13303,10 @@ trim_novel_spliceends (List_T pairs,
debug13(printf("Set ambig_end_length_5 to be %d\n",*ambig_end_length_5));
if (max_prob_sense_forward_5 >= END_SPLICESITE_PROB_MATCH && max_prob_sense_anti_5 < END_SPLICESITE_PROB_MATCH
&& max_prob_sense_anti_3 < END_SPLICESITE_PROB_MATCH) {
- *sensedir = splice_sensedir_5;
+ *new_sensedir = splice_sensedir_5;
} else if (max_prob_sense_anti_5 >= END_SPLICESITE_PROB_MATCH && max_prob_sense_forward_5 < END_SPLICESITE_PROB_MATCH
&& max_prob_sense_forward_3 < END_SPLICESITE_PROB_MATCH) {
- *sensedir = splice_sensedir_5;
+ *new_sensedir = splice_sensedir_5;
} else {
/* Not enough evidence to set sensedir */
}
@@ -12233,10 +13330,10 @@ trim_novel_spliceends (List_T pairs,
debug13(printf("Set ambig_end_length_3 to be %d\n",*ambig_end_length_3));
if (max_prob_sense_forward_3_mm >= END_SPLICESITE_PROB_MISMATCH && max_prob_sense_anti_3_mm < END_SPLICESITE_PROB_MISMATCH
&& max_prob_sense_anti_5_mm < END_SPLICESITE_PROB_MISMATCH) {
- *sensedir = splice_sensedir_3_mm;
+ *new_sensedir = splice_sensedir_3_mm;
} else if (max_prob_sense_anti_3_mm >= END_SPLICESITE_PROB_MISMATCH && max_prob_sense_forward_3_mm < END_SPLICESITE_PROB_MISMATCH
&& max_prob_sense_forward_5_mm < END_SPLICESITE_PROB_MISMATCH) {
- *sensedir = splice_sensedir_3_mm;
+ *new_sensedir = splice_sensedir_3_mm;
} else {
/* Not enough evidence to set sensedir */
}
@@ -12257,10 +13354,10 @@ trim_novel_spliceends (List_T pairs,
debug13(printf("Set ambig_end_length_5 to be %d\n",*ambig_end_length_5));
if (max_prob_sense_forward_5_mm >= END_SPLICESITE_PROB_MISMATCH && max_prob_sense_anti_5_mm < END_SPLICESITE_PROB_MISMATCH
&& max_prob_sense_anti_3_mm < END_SPLICESITE_PROB_MISMATCH) {
- *sensedir = splice_sensedir_5_mm;
+ *new_sensedir = splice_sensedir_5_mm;
} else if (max_prob_sense_anti_5_mm >= END_SPLICESITE_PROB_MISMATCH && max_prob_sense_forward_5_mm < END_SPLICESITE_PROB_MISMATCH
&& max_prob_sense_forward_3_mm < END_SPLICESITE_PROB_MISMATCH) {
- *sensedir = splice_sensedir_5_mm;
+ *new_sensedir = splice_sensedir_5_mm;
} else {
/* Not enough evidence to set sensedir */
}
@@ -12507,7 +13604,7 @@ trim_novel_spliceends_new (List_T pairs,
Splice_trim_novel_spliceends(&(*ambig_end_length_5),&(*ambig_end_length_3),
&(*ambig_splicetype_5),&(*ambig_splicetype_3),
- &(*ambig_prob_5),&(*ambig_prob_3),&(*sensedir),
+ &(*ambig_prob_5),&(*ambig_prob_3),/*orig_sensedir*/*sensedir,
start5,middle5,end5,solve5p,start3,middle3,end3,solve3p,
genomicstart5,genomicend3,chroffset,/*plusp*/watsonp);
@@ -12537,7 +13634,7 @@ path_trim (double defect_rate, int *ambig_end_length_5, int *ambig_end_length_3,
List_T pairs, int *cdna_direction, bool watsonp, bool jump_late_p,
int querylength,
#ifdef GSNAP
- int *sensedir,
+ int orig_sensedir,
#endif
char *queryseq_ptr, char *queryuc_ptr,
Univcoord_T chroffset, Univcoord_T chrhigh,
@@ -12549,10 +13646,11 @@ path_trim (double defect_rate, int *ambig_end_length_5, int *ambig_end_length_3,
bool knownsplice5p = false, knownsplice3p = false;
bool trimp, trim5p, trim3p, trim5p_ignore, trim3p_ignore;
int iter = 0;
+ int new_sensedir; /* Not used currently */
#ifdef GSNAP
- debug(printf("Entering path_trim with cdna_direction %d and sensedir %d\n",*cdna_direction,*sensedir));
- debug3(printf("Entering path_trim with cdna_direction %d and sensedir %d\n",*cdna_direction,*sensedir));
+ debug(printf("Entering path_trim with cdna_direction %d and sensedir %d\n",*cdna_direction,orig_sensedir));
+ debug3(printf("Entering path_trim with cdna_direction %d and sensedir %d\n",*cdna_direction,orig_sensedir));
#else
debug(printf("Entering path_trim with cdna_direction %d\n",*cdna_direction));
debug3(printf("Entering path_trim with cdna_direction %d\n",*cdna_direction));
@@ -12560,10 +13658,10 @@ path_trim (double defect_rate, int *ambig_end_length_5, int *ambig_end_length_3,
#ifdef GSNAP
if (novelsplicingp == true) {
- pairs = trim_novel_spliceends(pairs,&(*ambig_end_length_5),&(*ambig_end_length_3),
+ pairs = trim_novel_spliceends(&new_sensedir,pairs,&(*ambig_end_length_5),&(*ambig_end_length_3),
&(*ambig_splicetype_5),&(*ambig_splicetype_3),
&(*ambig_prob_5),&(*ambig_prob_3),
- &(*sensedir),watsonp,querylength,
+ orig_sensedir,watsonp,querylength,
chroffset,chrhigh,knownsplice5p,knownsplice3p);
}
#endif
@@ -12641,8 +13739,8 @@ path_trim (double defect_rate, int *ambig_end_length_5, int *ambig_end_length_3,
/* Cannot put trim_novel_spliceends here, which can generate an infinite loop in calling procedures */
- debug3(printf("Final result of path_trim: chroffset = %u, cdna_direction %d\n",
- chroffset,*cdna_direction));
+ debug3(printf("Final result of path_trim: chroffset = %u, cdna_direction %d, new_sensedir %d\n",
+ chroffset,*cdna_direction,new_sensedir));
debug3(Pair_dump_list(pairs,true));
debug3(printf("\n"));
@@ -13204,7 +14302,6 @@ Stage3_compute (int *cdna_direction, int *sensedir, List_T *finalpairs1, int *np
*ambig_prob_3_1 = fwd_ambig_prob_3;
*cdna_direction = +1;
- *sensedir = SENSE_FORWARD;
/* path_trim alters pairs_fwd, so make a copy in case we use it for pairs_pretrim */
pairs_fwd_copy = Pairpool_copy(pairs_fwd,pairpool);
@@ -13214,7 +14311,7 @@ Stage3_compute (int *cdna_direction, int *sensedir, List_T *finalpairs1, int *np
pairs_fwd_copy,&(*cdna_direction),watsonp,
jump_late_p,querylength,
#ifdef GSNAP
- &(*sensedir),
+ /*orig_sensedir*/SENSE_FORWARD,
#endif
queryseq_ptr,queryuc_ptr,
chroffset,chrhigh,knownsplice_limit_low,knownsplice_limit_high,
@@ -13228,7 +14325,6 @@ Stage3_compute (int *cdna_direction, int *sensedir, List_T *finalpairs1, int *np
*ambig_prob_3_2 = rev_ambig_prob_3;
*cdna_direction = -1;
- *sensedir = SENSE_ANTI;
pairs_rev_copy = Pairpool_copy(pairs_rev,pairpool);
*finalpairs2 = path_trim(defect_rate_rev,&(*ambig_end_length_5_2),&(*ambig_end_length_3_2),
@@ -13237,7 +14333,7 @@ Stage3_compute (int *cdna_direction, int *sensedir, List_T *finalpairs1, int *np
pairs_rev_copy,&(*cdna_direction),watsonp,
jump_late_p,querylength,
#ifdef GSNAP
- &(*sensedir),
+ /*orig_sensedir*/SENSE_ANTI,
#endif
queryseq_ptr,queryuc_ptr,
chroffset,chrhigh,knownsplice_limit_low,knownsplice_limit_high,
@@ -13340,7 +14436,7 @@ Stage3_compute (int *cdna_direction, int *sensedir, List_T *finalpairs1, int *np
pairs_pretrim,&(*cdna_direction),watsonp,
jump_late_p,querylength,
#ifdef GSNAP
- &(*sensedir),
+ /*orig_sensedir*/*sensedir,
#endif
queryseq_ptr,queryuc_ptr,
chroffset,chrhigh,knownsplice_limit_low,knownsplice_limit_high,
diff --git a/src/stage3.h b/src/stage3.h
index 4707bc3..c3a5899 100644
--- a/src/stage3.h
+++ b/src/stage3.h
@@ -1,4 +1,4 @@
-/* $Id: stage3.h 193876 2016-07-12 02:46:04Z twu $ */
+/* $Id: stage3.h 198076 2016-09-21 00:29:14Z twu $ */
#ifndef STAGE3_INCLUDED
#define STAGE3_INCLUDED
@@ -281,7 +281,7 @@ extern int
Stage3_good_part (struct Pair_T *pairarray, int npairs, int pos5, int pos3);
extern struct Pair_T *
-Stage3_compute (int *cdna_direction, int *sensedir1, List_T *pairs1, int *npairs1, int *goodness1,
+Stage3_compute (int *cdna_direction, int *sensedir, List_T *pairs1, int *npairs1, int *goodness1,
int *matches1, int *nmatches_posttrim_1, int *max_match_length_1,
int *ambig_end_length_5_1, int *ambig_end_length_3_1,
Splicetype_T *ambig_splicetype_5_1, Splicetype_T *ambig_splicetype_3_1,
diff --git a/src/stage3hr.c b/src/stage3hr.c
index 2184b66..3b36519 100644
--- a/src/stage3hr.c
+++ b/src/stage3hr.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: stage3hr.c 197777 2016-09-14 00:43:45Z twu $";
+static char rcsid[] = "$Id: stage3hr.c 198077 2016-09-21 00:34:35Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -570,6 +570,7 @@ struct Stage3pair_T {
int score;
int nmatches;
+ int nmismatches; /* querylength - sum of nmatches */
int nmatches_posttrim;
int score_eventrim;
@@ -4902,7 +4903,7 @@ Stage3end_new_substrings (int *found_score, Intlist_T endpoints,
Intlist_T nmismatches_list, List_T junctions, int querylength,
Compress_T query_compress,
Substring_T right_ambig, Substring_T left_ambig,
- bool plusp, int genestrand, int sensedir,
+ bool plusp, int genestrand, int orig_sensedir,
Chrnum_T chrnum, Univcoord_T chroffset, Univcoord_T chrhigh,
Chrpos_T chrlength, bool sarrayp) {
T new;
@@ -4926,10 +4927,12 @@ Stage3end_new_substrings (int *found_score, Intlist_T endpoints,
int adj = 0, adj0; /* deletions - insertions */
int nmismatches_whole = 0, nmismatches, indel_score = 0, nindels = 0;
int nmismatches_bothdiff = 0;
+ int new_sensedir;
+ bool contradictionp;
- debug0(printf("Entered Stage3end_new_substrings at left %u [%u], with plusp %d, sensedir %d, and endpoints %s\n",
- Uintlist_head(lefts),Uintlist_head(lefts) - chroffset,plusp,sensedir,Intlist_to_string(endpoints)));
+ debug0(printf("Entered Stage3end_new_substrings at left %u [%u], with plusp %d, orig_sensedir %d, and endpoints %s\n",
+ Uintlist_head(lefts),Uintlist_head(lefts) - chroffset,plusp,orig_sensedir,Intlist_to_string(endpoints)));
debug0(printf("There are %d endpoints, %d lefts, %d nmismatches, and %d junctions\n",
Intlist_length(endpoints),Uintlist_length(lefts),Intlist_length(nmismatches_list),List_length(junctions)));
debug0(printf("Ambig left %p, right %p\n",left_ambig,right_ambig));
@@ -4958,7 +4961,7 @@ Stage3end_new_substrings (int *found_score, Intlist_T endpoints,
j = junctions; /* Put here before we handle left_ambig */
if (left_ambig != NULL) {
substrings = List_push(substrings,(void *) left_ambig);
- junctions = List_push(junctions,(void *) Junction_new_splice(/*distance*/0,sensedir,
+ junctions = List_push(junctions,(void *) Junction_new_splice(/*distance*/0,orig_sensedir,
Substring_amb_donor_prob(left_ambig),
Substring_amb_acceptor_prob(left_ambig)));
} else {
@@ -5069,7 +5072,7 @@ Stage3end_new_substrings (int *found_score, Intlist_T endpoints,
querystart,queryend,querylength,alignstart,alignend,
/*exactp*/Intlist_head(x) == 0 ? true : false,plusp,genestrand,
trim_left_action,trim_right_action,outofbounds_start,outofbounds_end,
- /*minlength*/0,sensedir)) == NULL) {
+ /*minlength*/0,orig_sensedir)) == NULL) {
debug0(printf("Poor substring (plus) for %d..%d, so returning NULL from Stage3end_new_substrings\n",
querystart,queryend));
for (p = substrings; p != NULL; p = List_next(p)) {
@@ -5115,7 +5118,7 @@ Stage3end_new_substrings (int *found_score, Intlist_T endpoints,
j = junctions; /* Put here before we handle left_ambig */
if (left_ambig != NULL) {
substrings = List_push(substrings,(void *) left_ambig);
- junctions = List_push(junctions,(void *) Junction_new_splice(/*distance*/0,sensedir,
+ junctions = List_push(junctions,(void *) Junction_new_splice(/*distance*/0,orig_sensedir,
Substring_amb_donor_prob(left_ambig),
Substring_amb_acceptor_prob(left_ambig)));
} else {
@@ -5226,7 +5229,7 @@ Stage3end_new_substrings (int *found_score, Intlist_T endpoints,
alignstart,alignend,
/*exactp*/Intlist_head(x) == 0 ? true : false,plusp,genestrand,
trim_left_action,trim_right_action,outofbounds_start,outofbounds_end,
- /*minlength*/0,sensedir)) == NULL) {
+ /*minlength*/0,orig_sensedir)) == NULL) {
debug0(printf("Poor substring (minus) for %d..%d, so returning NULL from Stage3end_new_substrings\n",
querylength - queryend,querylength - querystart));
for (p = substrings; p != NULL; p = List_next(p)) {
@@ -5269,7 +5272,7 @@ Stage3end_new_substrings (int *found_score, Intlist_T endpoints,
if (right_ambig != NULL) {
substrings = List_push(substrings,(void *) right_ambig);
junctions = List_reverse(junctions);
- junctions = List_push(junctions,(void *) Junction_new_splice(/*distance*/0,sensedir,
+ junctions = List_push(junctions,(void *) Junction_new_splice(/*distance*/0,orig_sensedir,
Substring_amb_donor_prob(right_ambig),
Substring_amb_acceptor_prob(right_ambig)));
junctions = List_reverse(junctions);
@@ -5376,8 +5379,46 @@ Stage3end_new_substrings (int *found_score, Intlist_T endpoints,
new->chrhigh = chrhigh;
new->chrlength = chrlength;
new->plusp = plusp;
- new->sensedir = sensedir;
+ if (orig_sensedir != SENSE_NULL) {
+ debug0(printf("sensedir is %d (original)\n",orig_sensedir));
+ new->sensedir = orig_sensedir;
+ } else {
+ new_sensedir = SENSE_NULL;
+ contradictionp = false;
+ for (p = new->substrings_1toN; p != NULL; p = List_next(p)) {
+ substring = (Substring_T) List_head(p);
+ debug0(printf("substring has sensedir %d\n",Substring_sensedir(substring)));
+ if (Substring_sensedir(substring) == SENSE_NULL) {
+ /* Ignore */
+ } else if (new_sensedir == SENSE_NULL) {
+ new_sensedir = Substring_sensedir(substring);
+ } else if (Substring_sensedir(substring) != new_sensedir) {
+ contradictionp = true;
+ }
+ }
+
+ for (p = new->junctions_1toN; p != NULL; p = List_next(p)) {
+ junction = (Junction_T) List_head(p);
+ debug0(printf("junction has sensedir %d\n",Junction_sensedir(junction)));
+ if (Junction_sensedir(junction) == SENSE_NULL) {
+ /* Ignore. Probably an indel. */
+ } else if (new_sensedir == SENSE_NULL) {
+ new_sensedir = Junction_sensedir(junction);
+ } else if (Junction_sensedir(junction) != new_sensedir) {
+ contradictionp = true;
+ }
+ }
+
+ if (contradictionp == true) {
+ debug0(printf("CONTRADICTION IN SENSEDIR\n"));
+ new->sensedir = SENSE_NULL;
+ } else {
+ debug0(printf("sensedir is %d\n",new_sensedir));
+ new->sensedir = new_sensedir;
+ }
+ }
+
new->nindels = nindels;
new->nmismatches_whole = nmismatches_whole;
new->nmismatches_bothdiff = nmismatches_bothdiff; /* Trimmed */
@@ -5803,7 +5844,7 @@ Stage3end_substrings_run_gmap_plus (T *hit2, T this, char *queryuc_ptr, int quer
/*left*/start,/*genomiclength*/end - start + 1,
/*plusp*/true,genestrand,
/*accession*/NULL,querylength,this->chrnum,this->chroffset,this->chrhigh,this->chrlength,
- /*cdna_direction*/+1,/*sensedir*/SENSE_FORWARD,
+ /*cdna_direction*/+1,/*sensedir*/SENSE_FORWARD,/*sensedir_knownp*/false,
/*gmap_source*/GMAP_VIA_SUBSTRINGS)) == NULL) {
FREE_OUT(pairarray1);
}
@@ -5823,7 +5864,7 @@ Stage3end_substrings_run_gmap_plus (T *hit2, T this, char *queryuc_ptr, int quer
/*left*/start,/*genomiclength*/end - start + 1,
/*plusp*/true,genestrand,
/*accession*/NULL,querylength,this->chrnum,this->chroffset,this->chrhigh,this->chrlength,
- /*cdna_direction*/-1,/*sensedir*/SENSE_ANTI,
+ /*cdna_direction*/-1,/*sensedir*/SENSE_ANTI,/*sensedir_knownp*/false,
/*gmap_source*/GMAP_VIA_SUBSTRINGS)) == NULL) {
FREE_OUT(pairarray2);
}
@@ -5844,7 +5885,8 @@ Stage3end_substrings_run_gmap_plus (T *hit2, T this, char *queryuc_ptr, int quer
/*left*/start,/*genomiclength*/end - start + 1,
/*plusp*/true,genestrand,
/*accession*/NULL,querylength,this->chrnum,this->chroffset,this->chrhigh,this->chrlength,
- cdna_direction,sensedir,/*gmap_source*/GMAP_VIA_SUBSTRINGS)) == NULL) {
+ cdna_direction,sensedir,/*sensedir_knownp*/true,
+ /*gmap_source*/GMAP_VIA_SUBSTRINGS)) == NULL) {
FREE_OUT(pairarray1);
}
}
@@ -6210,7 +6252,7 @@ Stage3end_substrings_run_gmap_minus (T *hit2, T this, char *queryuc_ptr, int que
/*left*/end,/*genomiclength*/start - end + 1,
/*plusp*/false,genestrand,
/*accession*/NULL,querylength,this->chrnum,this->chroffset,this->chrhigh,this->chrlength,
- /*cdna_direction*/+1,/*sensedir*/SENSE_FORWARD,
+ /*cdna_direction*/+1,/*sensedir*/SENSE_FORWARD,/*sensedir_knownp*/false,
/*gmap_source*/GMAP_VIA_SUBSTRINGS)) == NULL) {
FREE_OUT(pairarray1);
}
@@ -6229,7 +6271,7 @@ Stage3end_substrings_run_gmap_minus (T *hit2, T this, char *queryuc_ptr, int que
/*left*/end,/*genomiclength*/start - end + 1,
/*plusp*/false,genestrand,
/*accession*/NULL,querylength,this->chrnum,this->chroffset,this->chrhigh,this->chrlength,
- /*cdna_direction*/-1,/*sensedir*/SENSE_ANTI,
+ /*cdna_direction*/-1,/*sensedir*/SENSE_ANTI,/*sensedir_knownp*/false,
/*gmap_source*/GMAP_VIA_SUBSTRINGS)) == NULL) {
FREE_OUT(pairarray2);
}
@@ -6249,7 +6291,8 @@ Stage3end_substrings_run_gmap_minus (T *hit2, T this, char *queryuc_ptr, int que
/*left*/end,/*genomiclength*/start - end + 1,
/*plusp*/false,genestrand,
/*accession*/NULL,querylength,this->chrnum,this->chroffset,this->chrhigh,this->chrlength,
- cdna_direction,sensedir,/*gmap_source*/GMAP_VIA_SUBSTRINGS)) == NULL) {
+ cdna_direction,sensedir,/*sensedir_knownp*/true,
+ /*gmap_source*/GMAP_VIA_SUBSTRINGS)) == NULL) {
FREE_OUT(pairarray1);
}
}
@@ -6443,7 +6486,7 @@ Stage3end_gmap_run_gmap_plus (T *hit2, T this, char *queryuc_ptr, int querylengt
/*left*/start,/*genomiclength*/end - start + 1,
/*plusp*/true,genestrand,
/*accession*/NULL,querylength,this->chrnum,this->chroffset,this->chrhigh,this->chrlength,
- /*cdna_direction*/+1,/*sensedir*/SENSE_FORWARD,
+ /*cdna_direction*/+1,/*sensedir*/SENSE_FORWARD,/*sensedir_knownp*/false,
/*gmap_source*/GMAP_VIA_SUBSTRINGS)) == NULL) {
FREE_OUT(pairarray1);
}
@@ -6463,7 +6506,7 @@ Stage3end_gmap_run_gmap_plus (T *hit2, T this, char *queryuc_ptr, int querylengt
/*left*/start,/*genomiclength*/end - start + 1,
/*plusp*/true,genestrand,
/*accession*/NULL,querylength,this->chrnum,this->chroffset,this->chrhigh,this->chrlength,
- /*cdna_direction*/-1,/*sensedir*/SENSE_ANTI,
+ /*cdna_direction*/-1,/*sensedir*/SENSE_ANTI,/*sensedir_knownp*/false,
/*gmap_source*/GMAP_VIA_SUBSTRINGS)) == NULL) {
FREE_OUT(pairarray2);
}
@@ -6484,7 +6527,8 @@ Stage3end_gmap_run_gmap_plus (T *hit2, T this, char *queryuc_ptr, int querylengt
/*left*/start,/*genomiclength*/end - start + 1,
/*plusp*/true,genestrand,
/*accession*/NULL,querylength,this->chrnum,this->chroffset,this->chrhigh,this->chrlength,
- cdna_direction,sensedir,/*gmap_source*/GMAP_VIA_SUBSTRINGS)) == NULL) {
+ cdna_direction,sensedir,/*sensedir_knownp*/true,
+ /*gmap_source*/GMAP_VIA_SUBSTRINGS)) == NULL) {
FREE_OUT(pairarray1);
}
}
@@ -6670,7 +6714,7 @@ Stage3end_gmap_run_gmap_minus (T *hit2, T this, char *queryuc_ptr, int queryleng
/*left*/end,/*genomiclength*/start - end + 1,
/*plusp*/false,genestrand,
/*accession*/NULL,querylength,this->chrnum,this->chroffset,this->chrhigh,this->chrlength,
- /*cdna_direction*/+1,/*sensedir*/SENSE_FORWARD,
+ /*cdna_direction*/+1,/*sensedir*/SENSE_FORWARD,/*sensedir_knownp*/false,
/*gmap_source*/GMAP_VIA_SUBSTRINGS)) == NULL) {
FREE_OUT(pairarray1);
}
@@ -6689,7 +6733,7 @@ Stage3end_gmap_run_gmap_minus (T *hit2, T this, char *queryuc_ptr, int queryleng
/*left*/end,/*genomiclength*/start - end + 1,
/*plusp*/false,genestrand,
/*accession*/NULL,querylength,this->chrnum,this->chroffset,this->chrhigh,this->chrlength,
- /*cdna_direction*/-1,/*sensedir*/SENSE_ANTI,
+ /*cdna_direction*/-1,/*sensedir*/SENSE_ANTI,/*sensedir_knownp*/false,
/*gmap_source*/GMAP_VIA_SUBSTRINGS)) == NULL) {
FREE_OUT(pairarray2);
}
@@ -6709,7 +6753,8 @@ Stage3end_gmap_run_gmap_minus (T *hit2, T this, char *queryuc_ptr, int queryleng
/*left*/end,/*genomiclength*/start - end + 1,
/*plusp*/false,genestrand,
/*accession*/NULL,querylength,this->chrnum,this->chroffset,this->chrhigh,this->chrlength,
- cdna_direction,sensedir,/*gmap_source*/GMAP_VIA_SUBSTRINGS)) == NULL) {
+ cdna_direction,sensedir,/*sensedir_knownp*/true,
+ /*gmap_source*/GMAP_VIA_SUBSTRINGS)) == NULL) {
FREE_OUT(pairarray1);
}
}
@@ -6887,7 +6932,6 @@ Stage3end_new_exact (int *found_score, Univcoord_T left, int genomiclength, Comp
new->nsegments = 1;
new->nmatches_posttrim = genomiclength;
new->nmatches = genomiclength;
-
new->trim_left = 0;
new->trim_right = 0;
new->trim_left_splicep = false;
@@ -7903,7 +7947,7 @@ Stage3end_new_splice (int *found_score, int nmismatches_donor, int nmismatches_a
}
#else
assert(Substring_plusp(donor) == Substring_plusp(acceptor));
- assert(Substring_chimera_sensedir(donor) == Substring_chimera_sensedir(acceptor));
+ assert(Substring_sensedir(donor) == Substring_sensedir(acceptor));
new->plusp = Substring_plusp(donor);
#endif
@@ -8561,6 +8605,7 @@ Stage3end_new_shortexon (int *found_score, Substring_T donor, Substring_T accept
new->nmatches_posttrim = Substring_nmatches_posttrim(shortexon);
new->nmatches = Substring_nmatches(shortexon);
+
if (donor == NULL) {
if (0 && favor_ambiguous_p == true) {
new->nmatches += amb_length_donor;
@@ -9088,10 +9133,10 @@ Stage3end_new_gmap (int nmismatches_whole, int nmatches_posttrim, int max_match_
Univcoord_T left, int genomiclength, bool plusp, int genestrand,
char *accession, int querylength, Chrnum_T chrnum,
Univcoord_T chroffset, Univcoord_T chrhigh, Chrpos_T chrlength,
- int cdna_direction, int sensedir, GMAP_source_T gmap_source) {
+ int cdna_direction, int orig_sensedir, bool sensedir_knownp, GMAP_source_T gmap_source) {
T new;
Univcoord_T genomicstart, genomicend, genomepos;
- double prob1, prob2;
+ double prob5_sense_forward, prob5_sense_anti, prob3_sense_forward, prob3_sense_anti;
Pair_T start, end;
List_T cigar_tokens;
@@ -9106,8 +9151,8 @@ Stage3end_new_gmap (int nmismatches_whole, int nmatches_posttrim, int max_match_
*/
/* However, this leads to fatal bugs later, so restored these statements */
- debug0(printf("Entered Stage3end_new_gmap with sensedir %d\n",sensedir));
- assert(sensedir == SENSE_NULL || sensedir == SENSE_ANTI || sensedir == SENSE_FORWARD);
+ debug0(printf("Entered Stage3end_new_gmap with orig_sensedir %d\n",orig_sensedir));
+ assert(orig_sensedir == SENSE_NULL || orig_sensedir == SENSE_ANTI || orig_sensedir == SENSE_FORWARD);
start = &(pairarray[0]);
end = &(pairarray[npairs-1]);
@@ -9115,7 +9160,7 @@ Stage3end_new_gmap (int nmismatches_whole, int nmatches_posttrim, int max_match_
hardclip_end = (querylength - 1) - end->querypos;
cigar_tokens = Pair_compute_cigar(&intronp,&hardclip_start,&hardclip_end,pairarray,npairs,querylength,
- /*watsonp*/plusp,sensedir,/*chimera_part*/0);
+ /*watsonp*/plusp,orig_sensedir,/*chimera_part*/0);
if (Pair_tokens_cigarlength(cigar_tokens) + hardclip_start + hardclip_end != querylength) {
fprintf(stderr,"Could not compute a valid cigar for %s: %d + %d + %d != %d\n",
accession,Pair_tokens_cigarlength(cigar_tokens),hardclip_start,hardclip_end,querylength);
@@ -9161,9 +9206,9 @@ Stage3end_new_gmap (int nmismatches_whole, int nmatches_posttrim, int max_match_
new = (T) MALLOC_OUT(sizeof(*new));
- debug0(printf("Stage3end_new_gmap %p: left %llu, genomicstart/end %u..%u, chrhigh %llu, chrnum %d, nmismatches %d, nmatches_posttrim %d, cdna_direction %d, sensedir %d, max_match_length %d, gmap_source %d\n",
+ debug0(printf("Stage3end_new_gmap %p: left %llu, genomicstart/end %u..%u, chrhigh %llu, chrnum %d, nmismatches %d, nmatches_posttrim %d, cdna_direction %d, orig_sensedir %d, max_match_length %d, gmap_source %d\n",
new,(unsigned long long) left,(unsigned int) (genomicstart - chroffset),(unsigned int) (genomicend - chroffset),
- (unsigned long long) chrhigh,chrnum,nmismatches_whole,nmatches_posttrim,cdna_direction,sensedir,max_match_length,gmap_source));
+ (unsigned long long) chrhigh,chrnum,nmismatches_whole,nmatches_posttrim,cdna_direction,orig_sensedir,max_match_length,gmap_source));
debug0(printf(" ambig_end_length_5 %d, ambig_end_length_3 %d\n",ambig_end_length_5,ambig_end_length_3));
debug0(Pair_dump_comp_array(pairarray,npairs));
@@ -9213,7 +9258,6 @@ Stage3end_new_gmap (int nmismatches_whole, int nmatches_posttrim, int max_match_
new->gmap_nindelbreaks = nindelbreaks;
new->gmap_cdna_direction = cdna_direction;
new->gmap_nintrons = nintrons;
- new->sensedir = sensedir;
#if 0
new->mapq_loglik = Substring_mapq_loglik(substring);
@@ -9283,20 +9327,40 @@ Stage3end_new_gmap (int nmismatches_whole, int nmatches_posttrim, int max_match_
new->trim_left = Pair_querypos(&(pairarray[0])) /*- ambig_end_length_5*/; /* Do not subtract ambig_end_length, so we are equivalent with substrings */
if (ambig_end_length_5 > 0) {
new->trim_left_splicep = true;
+ if (sensedir_knownp == false) {
+ if (plusp == true) {
+ genomepos = chroffset + Pair_genomepos(&(pairarray[0])) /*+ 1U*/;
+ prob5_sense_forward = Maxent_hr_acceptor_prob(genomepos,chroffset);
+ prob5_sense_anti = Maxent_hr_antidonor_prob(genomepos,chroffset);
+ debug0(printf("1. At %llu, acceptor prob %f, antidonor prob %f (verified)\n",
+ (unsigned long long) genomepos - chroffset,prob5_sense_forward,prob5_sense_anti));
+ } else {
+ genomepos = chroffset + Pair_genomepos(&(pairarray[0])) + 1U;
+ prob5_sense_anti = Maxent_hr_donor_prob(genomepos,chroffset);
+ prob5_sense_forward = Maxent_hr_antiacceptor_prob(genomepos,chroffset);
+ debug0(printf("2. At %llu, donor prob %f, antiacceptor prob %f (verified)\n",
+ (unsigned long long) genomepos - chroffset,prob5_sense_anti,prob5_sense_forward));
+ }
+ }
+
} else if (novelsplicingp == false) {
new->trim_left_splicep = false;
+
} else {
- genomepos = chroffset + Pair_genomepos(&(pairarray[0])) + 1U;
if (plusp == true) {
- prob1 = Maxent_hr_acceptor_prob(genomepos,chroffset);
- prob2 = Maxent_hr_antidonor_prob(genomepos,chroffset);
- /* printf("At %llu, acceptor prob %f, antidonor prob %f\n",(unsigned long long) genomepos,prob1,prob2); */
+ genomepos = chroffset + Pair_genomepos(&(pairarray[0])) /*+ 1U*/;
+ prob5_sense_forward = Maxent_hr_acceptor_prob(genomepos,chroffset);
+ prob5_sense_anti = Maxent_hr_antidonor_prob(genomepos,chroffset);
+ debug0(printf("1. At %llu, acceptor prob %f, antidonor prob %f (verified)\n",
+ (unsigned long long) genomepos - chroffset,prob5_sense_forward,prob5_sense_anti));
} else {
- prob1 = Maxent_hr_donor_prob(genomepos,chroffset);
- prob2 = Maxent_hr_antiacceptor_prob(genomepos,chroffset);
- /* printf("At %llu, donor prob %f, antiacceptor prob %f\n",(unsigned long long) genomepos,prob1,prob2); */
+ genomepos = chroffset + Pair_genomepos(&(pairarray[0])) + 1U;
+ prob5_sense_anti = Maxent_hr_donor_prob(genomepos,chroffset);
+ prob5_sense_forward = Maxent_hr_antiacceptor_prob(genomepos,chroffset);
+ debug0(printf("2. At %llu, donor prob %f, antiacceptor prob %f (verified)\n",
+ (unsigned long long) genomepos - chroffset,prob5_sense_anti,prob5_sense_forward));
}
- if (prob1 > 0.90 || prob2 > 0.90) {
+ if (prob5_sense_forward > 0.90 || prob5_sense_anti > 0.90) {
new->trim_left_splicep = true;
} else {
new->trim_left_splicep = false;
@@ -9306,26 +9370,70 @@ Stage3end_new_gmap (int nmismatches_whole, int nmatches_posttrim, int max_match_
new->trim_right = (querylength - 1) - Pair_querypos(&(pairarray[npairs-1])) /*- ambig_end_length_3*/; /* Do not subtract ambig_end_length, so we are equivalent with substrings */
if (ambig_end_length_3 > 0) {
new->trim_right_splicep = true;
+ if (sensedir_knownp == false) {
+ if (plusp == true) {
+ genomepos = chroffset + Pair_genomepos(&(pairarray[npairs-1])) + 1U;
+ prob3_sense_forward = Maxent_hr_donor_prob(genomepos,chroffset);
+ prob3_sense_anti = Maxent_hr_antiacceptor_prob(genomepos,chroffset);
+ debug0(printf("3. At %llu, donor prob %f, antiacceptor prob %f (verified)\n",
+ (unsigned long long) genomepos - chroffset,prob3_sense_forward,prob3_sense_anti));
+ } else {
+ genomepos = chroffset + Pair_genomepos(&(pairarray[npairs-1])) /*+ 1U*/;
+ prob3_sense_anti = Maxent_hr_acceptor_prob(genomepos,chroffset);
+ prob3_sense_forward = Maxent_hr_antidonor_prob(genomepos,chroffset);
+ debug0(printf("4. At %llu, acceptor prob %f, antidonor prob %f (verified)\n",
+ (unsigned long long) genomepos - chroffset,prob3_sense_anti,prob3_sense_forward));
+ }
+ }
+
} else if (novelsplicingp == false) {
new->trim_right_splicep = false;
+
} else {
- genomepos = chroffset + Pair_genomepos(&(pairarray[npairs-1])) + 1U;
if (plusp == true) {
- prob1 = Maxent_hr_donor_prob(genomepos,chroffset);
- prob2 = Maxent_hr_antiacceptor_prob(genomepos,chroffset);
- /* printf("At %llu, donor prob %f, antiacceptor prob %f\n",(unsigned long long) genomepos,prob1,prob2); */
+ genomepos = chroffset + Pair_genomepos(&(pairarray[npairs-1])) + 1U;
+ prob3_sense_forward = Maxent_hr_donor_prob(genomepos,chroffset);
+ prob3_sense_anti = Maxent_hr_antiacceptor_prob(genomepos,chroffset);
+ debug0(printf("3. At %llu, donor prob %f, antiacceptor prob %f (verified)\n",
+ (unsigned long long) genomepos - chroffset,prob3_sense_forward,prob3_sense_anti));
} else {
- prob1 = Maxent_hr_acceptor_prob(genomepos,chroffset);
- prob2 = Maxent_hr_antidonor_prob(genomepos,chroffset);
- /* printf("At %llu, acceptor prob %f, antidonor prob %f\n",(unsigned long long) genomepos,prob1,prob2); */
+ genomepos = chroffset + Pair_genomepos(&(pairarray[npairs-1])) /*+ 1U*/;
+ prob3_sense_anti = Maxent_hr_acceptor_prob(genomepos,chroffset);
+ prob3_sense_forward = Maxent_hr_antidonor_prob(genomepos,chroffset);
+ debug0(printf("4. At %llu, acceptor prob %f, antidonor prob %f (verified)\n",
+ (unsigned long long) genomepos - chroffset,prob3_sense_anti,prob3_sense_forward));
}
- if (prob1 > 0.90 || prob2 > 0.90) {
+ if (prob3_sense_forward > 0.90 || prob3_sense_anti > 0.90) {
new->trim_right_splicep = true;
} else {
new->trim_right_splicep = false;
}
}
+ if (sensedir_knownp == true) {
+ new->sensedir = orig_sensedir;
+ debug0(printf("sensedir is %d (original), because known, plusp %d\n",orig_sensedir,plusp));
+
+ } else if ((prob5_sense_forward > 0.90 && prob5_sense_forward > prob5_sense_anti + 0.10) ||
+ (prob3_sense_forward > 0.90 && prob3_sense_forward > prob3_sense_anti + 0.10)) {
+ debug0(printf("sensedir is %d, based on probabilities: %f %f fwd, %f %f anti, plusp %d\n",
+ SENSE_FORWARD,prob5_sense_forward,prob3_sense_forward,prob5_sense_anti,prob3_sense_anti, plusp));
+ new->sensedir = SENSE_FORWARD;
+
+ } else if ((prob5_sense_anti > 0.90 && prob5_sense_anti > prob5_sense_forward + 0.10) ||
+ (prob3_sense_anti > 0.90 && prob3_sense_anti > prob3_sense_forward + 0.10)) {
+ debug0(printf("sensedir is %d, based on probabilities: %f %f fwd, %f %f anti, plusp %d\n",
+ SENSE_ANTI,prob5_sense_forward,prob3_sense_forward,prob5_sense_anti,prob3_sense_anti,plusp));
+ new->sensedir = SENSE_ANTI;
+
+ } else {
+ debug0(printf("sensedir is %d, based on fall through: %f %f fwd, %f %f anti, plusp %d\n",
+ SENSE_NULL,prob5_sense_forward,prob3_sense_forward,prob5_sense_anti,prob3_sense_anti,plusp));
+ new->sensedir = SENSE_NULL;
+ }
+
+ debug0(printf("sensedir is %d\n",new->sensedir));
+
#if 0
/* new->penalties not used anyway for GMAP alignments */
#ifdef SCORE_INDELS_EVENTRIM
@@ -9432,10 +9540,12 @@ Stage3end_output_cmp (const void *a, const void *b) {
return -1;
} else if (y->guided_insertlength < x->guided_insertlength) {
return +1;
+#if 0
} else if (x->nmatches_posttrim > y->nmatches_posttrim) {
return -1;
} else if (y->nmatches_posttrim > x->nmatches_posttrim) {
return +1;
+#endif
} else if (x->score < y->score) {
return -1;
} else if (y->score < x->score) {
@@ -9492,10 +9602,12 @@ Stage3pair_output_cmp (const void *a, const void *b) {
return -1;
} else if (y->insertlength < x->insertlength) {
return +1;
+#if 0
} else if (x->nmatches_posttrim > y->nmatches_posttrim) {
return -1;
} else if (y->nmatches_posttrim > x->nmatches_posttrim) {
return +1;
+#endif
} else if (x->score < y->score) {
return -1;
} else if (y->score < x->score) {
@@ -11535,13 +11647,16 @@ hit_goodness_cmp (bool *equalp, Stage3end_T hit,
return -1;
}
+#if 0
} else if (hit->nmatches_posttrim < best_hit->nmatches_posttrim) {
+ /* Favors longer alignments to potentially wrong splice sites */
debug7(printf(" => %d loses by nmatches_posttrim\n",k));
return -1;
} else if (hit->nmatches_posttrim > best_hit->nmatches_posttrim) {
debug7(printf(" => %d wins by nmatches_posttrim\n",k));
return +1;
+#endif
#if 0
} else if (hit->nsplices > best_hit->nsplices) {
@@ -15141,10 +15256,14 @@ hitpair_equiv_cmp (Stage3pair_T x, Stage3pair_T y) {
return -1;
} else if (y->score < x->score) {
return +1;
+#endif
+
} else if (x->nmatches > y->nmatches) {
return -1;
} else if (y->nmatches > x->nmatches) {
return +1;
+
+#if 0
} else if (x->nmatches_posttrim > y->nmatches_posttrim) {
return -1;
} else if (y->nmatches_posttrim > x->nmatches_posttrim) {
@@ -15679,13 +15798,13 @@ hitpair_goodness_cmp (bool *equalp, Stage3pair_T hitpair,
return -1;
}
- } else if (hitpair->nmatches_posttrim < best_hitpair->nmatches_posttrim) {
+ } else if (hitpair->nmatches < best_hitpair->nmatches) {
/* k is worse */
- debug8(printf(" => loses by nmatches_posttrim\n"));
+ debug8(printf(" => loses by nmatches\n"));
return -1;
- } else if (hitpair->nmatches_posttrim > best_hitpair->nmatches_posttrim) {
+ } else if (hitpair->nmatches > best_hitpair->nmatches) {
/* k is better */
- debug8(printf(" => wins by nmatches_posttrim\n"));
+ debug8(printf(" => wins by nmatches\n"));
return +1;
#if 0
@@ -17080,8 +17199,11 @@ Stage3pair_optimal_score_aux (bool *eliminatedp, List_T hitpairlist,
#endif
} else {
- debug6(printf("Final: Keeping hit pair %p with scores %d+%d (vs cutoff_level %d)\n",
- hitpair,hitpair->hit5->score_eventrim,hitpair->hit3->score_eventrim,cutoff_level));
+ debug6(printf("Final: Keeping hit pair %p at %u..%u|%u..%u with nmatches_posttrim %d (%d+%d) + amb %d+%d < cutoff_level %d (finalp %d)\n",
+ hitpair,hitpair->hit5->low - hitpair->hit5->chroffset,hitpair->hit5->high - hitpair->hit5->chroffset,
+ hitpair->hit3->low - hitpair->hit3->chroffset,hitpair->hit3->high - hitpair->hit3->chroffset,
+ hitpair->nmatches_posttrim,hitpair->hit5->nmatches_posttrim,hitpair->hit3->nmatches_posttrim,
+ amb_length(hitpair->hit5),amb_length(hitpair->hit3),cutoff_level,finalp));
optimal = List_push(optimal,hitpair);
}
}
@@ -17491,9 +17613,9 @@ pair_up_concordant_aux (bool *abort_pairing_p, int *found_score, int *nconcordan
/*private5p*/false,/*private3p*/false,/*expect_concordant_p*/true)) != NULL) {
debug5(printf("Have new pair with scores %d + %d, compared with new_found_score %d\n",hit5->score,hit3->score,new_found_score));
- if (Stage3pair_max_trim(stage3pair) > 8) {
+ if (Stage3pair_max_trim(stage3pair) > 18) {
/* Don't use terminals to set new_found_score */
- debug5(printf("Max trim is %d > 8, so treating as terminals\n",Stage3pair_max_trim(stage3pair)));
+ debug5(printf("Max trim is %d > 18, so treating as terminals\n",Stage3pair_max_trim(stage3pair)));
*terminals = List_push(*terminals,(void *) stage3pair);
} else if (hit5->hittype == GMAP || hit3->hittype == GMAP) {
@@ -17611,9 +17733,9 @@ pair_up_concordant_aux (bool *abort_pairing_p, int *found_score, int *nconcordan
/*private5p*/false,/*private3p*/false,/*expect_concordant_p*/true)) != NULL) {
debug5(printf("Have new pair with scores %d + %d, compared with new_found_score %d\n",hit5->score,hit3->score,new_found_score));
- if (Stage3pair_max_trim(stage3pair) > 8) {
+ if (Stage3pair_max_trim(stage3pair) > 18) {
/* Don't use terminals to set new_found_score */
- debug5(printf("Max trim is %d > 8, so treating as terminals\n",Stage3pair_max_trim(stage3pair)));
+ debug5(printf("Max trim is %d > 18, so treating as terminals\n",Stage3pair_max_trim(stage3pair)));
*terminals = List_push(*terminals,(void *) stage3pair);
} else if (hit5->hittype == GMAP || hit3->hittype == GMAP) {
@@ -17855,13 +17977,13 @@ sort_hits_by_trimmed_score (T **hits_plus, T **hits_minus, int *nhits_plus, int
for (q = hitarray[i]; q != NULL; q = q->rest) {
hit = (T) q->first;
debug5(printf(" : %p score %d, type %s\n",hit,hit->score,hittype_string(hit->hittype)));
- assert(hit->nmismatches_bothdiff >= 0);
- if (hit->nmismatches_bothdiff > cutoff_level) {
+ /* assert(hit->nmismatches_bothdiff >= 0); */
+ if (hit->querylength - hit->nmatches > cutoff_level) {
debug5(printf("Skipping hit with trimmed score %d > cutoff level %d\n",hit->nmismatches_bothdiff,cutoff_level));
} else if (hit->plusp == true) {
- nhits_plus[hit->nmismatches_bothdiff]++;
+ nhits_plus[hit->querylength - hit->nmatches]++;
} else {
- nhits_minus[hit->nmismatches_bothdiff]++;
+ nhits_minus[hit->querylength - hit->nmatches]++;
}
}
}
@@ -17911,12 +18033,12 @@ sort_hits_by_trimmed_score (T **hits_plus, T **hits_minus, int *nhits_plus, int
for (i = 0; i < narray; i++) {
for (q = hitarray[i]; q != NULL; q = q->rest) {
hit = (T) q->first;
- if (hit->nmismatches_bothdiff > cutoff_level) {
+ if (hit->querylength - hit->nmatches > cutoff_level) {
/* Skip */
} else if (hit->plusp == true) {
- hits_plus[hit->nmismatches_bothdiff][nhits_plus[hit->nmismatches_bothdiff]++] = hit;
+ hits_plus[hit->querylength - hit->nmatches][nhits_plus[hit->querylength - hit->nmatches]++] = hit;
} else {
- hits_minus[hit->nmismatches_bothdiff][nhits_minus[hit->nmismatches_bothdiff]++] = hit;
+ hits_minus[hit->querylength - hit->nmatches][nhits_minus[hit->querylength - hit->nmatches]++] = hit;
}
}
}
@@ -17953,8 +18075,8 @@ Stage3_pair_up_concordant (bool *abort_pairing_p, int *found_score, int *nconcor
for (i = 0; i < narray5; i++) {
for (q = hitarray5[i]; q != NULL; q = q->rest) {
hit = (T) q->first;
- if (min_score_5 == -1 || hit->nmismatches_bothdiff < min_score_5) {
- min_score_5 = hit->nmismatches_bothdiff;
+ if (min_score_5 == -1 || hit->querylength - hit->nmatches < min_score_5) {
+ min_score_5 = hit->querylength - hit->nmatches;
}
}
}
@@ -17966,8 +18088,8 @@ Stage3_pair_up_concordant (bool *abort_pairing_p, int *found_score, int *nconcor
for (i = 0; i < narray3; i++) {
for (q = hitarray3[i]; q != NULL; q = q->rest) {
hit = (T) q->first;
- if (min_score_3 == -1 || hit->nmismatches_bothdiff < min_score_3) {
- min_score_3 = hit->nmismatches_bothdiff;
+ if (min_score_3 == -1 || hit->querylength - hit->nmatches < min_score_3) {
+ min_score_3 = hit->querylength - hit->nmatches;
}
}
}
diff --git a/src/stage3hr.h b/src/stage3hr.h
index ec2637a..b7b6566 100644
--- a/src/stage3hr.h
+++ b/src/stage3hr.h
@@ -1,4 +1,4 @@
-/* $Id: stage3hr.h 197778 2016-09-14 00:44:23Z twu $ */
+/* $Id: stage3hr.h 198078 2016-09-21 00:34:48Z twu $ */
#ifndef STAGE3HR_INCLUDED
#define STAGE3HR_INCLUDED
@@ -400,7 +400,7 @@ Stage3end_new_gmap (int nmismatches_whole, int nmatches_posttrim, int max_match_
Univcoord_T left, int genomiclength, bool plusp, int genestrand,
char *accession, int querylength,
Chrnum_T chrnum, Univcoord_T chroffset, Univcoord_T chrhigh, Chrpos_T chrlength,
- int cdna_direction, int sensedir, GMAP_source_T gmap_source);
+ int cdna_direction, int sensedir, bool sensedir_knownp, GMAP_source_T gmap_source);
extern List_T
Stage3end_sort_bymatches (List_T hits);
diff --git a/src/substring.c b/src/substring.c
index 87ae86d..64a3b29 100644
--- a/src/substring.c
+++ b/src/substring.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: substring.c 197774 2016-09-14 00:40:45Z twu $";
+static char rcsid[] = "$Id: substring.c 198079 2016-09-21 00:35:41Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -311,8 +311,7 @@ struct T {
float mapq_loglik;
- /* for splices */
- int chimera_sensedir;
+ int sensedir;
Univcoord_T splicecoord_D;
int splicesitesD_knowni; /* Needed for intragenic_splice_p in stage1hr.c */
@@ -1822,18 +1821,18 @@ embellish_genomic_sam (char *genomic_diff, char *query, int querystart, int quer
}
-static void
+static int
substring_trim_novel_spliceends (T substring1, T substringN, int *ambig_end_length_5, int *ambig_end_length_3,
Splicetype_T *ambig_splicetype_5, Splicetype_T *ambig_splicetype_3,
- double *ambig_prob_5, double *ambig_prob_3, int *sensedir,
+ double *ambig_prob_5, double *ambig_prob_3, int orig_sensedir,
Univcoord_T chroffset) {
-
+ int new_sensedir;
Univcoord_T start5, middle5, end5, start3, middle3, end3;
Univcoord_T genomicstart5, genomicend3;
bool solve5p, solve3p, plusp;
- debug13(printf("\nEntered substring_trim_novel_spliceends with sensedir %d\n",*sensedir));
+ debug13(printf("\nEntered substring_trim_novel_spliceends with orig_sensedir %d\n",orig_sensedir));
*ambig_end_length_5 = 0;
*ambig_end_length_3 = 0;
*ambig_prob_5 = 0.0;
@@ -1909,15 +1908,16 @@ substring_trim_novel_spliceends (T substring1, T substringN, int *ambig_end_leng
start5 - chroffset,middle5 - chroffset,end5 - chroffset));
}
- Splice_trim_novel_spliceends(&(*ambig_end_length_5),&(*ambig_end_length_3),
- &(*ambig_splicetype_5),&(*ambig_splicetype_3),
- &(*ambig_prob_5),&(*ambig_prob_3),&(*sensedir),
- start5,middle5,end5,solve5p,start3,middle3,end3,solve3p,
- genomicstart5,genomicend3,chroffset,plusp);
+ new_sensedir = Splice_trim_novel_spliceends(&(*ambig_end_length_5),&(*ambig_end_length_3),
+ &(*ambig_splicetype_5),&(*ambig_splicetype_3),
+ &(*ambig_prob_5),&(*ambig_prob_3),orig_sensedir,
+ start5,middle5,end5,solve5p,start3,middle3,end3,solve3p,
+ genomicstart5,genomicend3,chroffset,plusp);
- debug13(printf("Returning ambig_end_length_5 %d and ambig_end_length_3 %d, probs %f and %f\n",
- *ambig_end_length_5,*ambig_end_length_3,*ambig_prob_5,*ambig_prob_3));
- return;
+ debug13(printf("Returning ambig_end_length_5 %d and ambig_end_length_3 %d, probs %f and %f, new_sensedir %d\n",
+ *ambig_end_length_5,*ambig_end_length_3,*ambig_prob_5,*ambig_prob_3,new_sensedir));
+
+ return new_sensedir;
}
@@ -1930,7 +1930,7 @@ Substring_new (int nmismatches_whole, Chrnum_T chrnum, Univcoord_T chroffset,
Univcoord_T alignstart, Univcoord_T alignend,
bool exactp, bool plusp, int genestrand,
Trimaction_T trim_left_action, Trimaction_T trim_right_action,
- int outofbounds_start, int outofbounds_end, int minlength, int sensedir) {
+ int outofbounds_start, int outofbounds_end, int minlength, int orig_sensedir) {
T new;
int nmatches;
/* int nonterminal_trim = 0; */
@@ -1939,6 +1939,7 @@ Substring_new (int nmismatches_whole, Chrnum_T chrnum, Univcoord_T chroffset,
Splicetype_T ambig_splicetype_5, ambig_splicetype_3;
double ambig_prob_5, ambig_prob_3;
int nmismatches_end_left, nmismatches_end_right;
+ int new_sensedir = SENSE_NULL;
int trim;
@@ -2183,9 +2184,9 @@ Substring_new (int nmismatches_whole, Chrnum_T chrnum, Univcoord_T chroffset,
new->trim_left_splicep = new->trim_right_splicep = false;
if (novelsplicingp == true) {
if (trim_left_action == COMPUTE_TRIM && trim_right_action == COMPUTE_TRIM) {
- substring_trim_novel_spliceends(/*substring1*/new,/*substringN*/new,&ambig_end_length_5,&ambig_end_length_3,
- &ambig_splicetype_5,&ambig_splicetype_3,&ambig_prob_5,&ambig_prob_3,
- &sensedir,chroffset);
+ new_sensedir = substring_trim_novel_spliceends(/*substring1*/new,/*substringN*/new,&ambig_end_length_5,&ambig_end_length_3,
+ &ambig_splicetype_5,&ambig_splicetype_3,&ambig_prob_5,&ambig_prob_3,
+ orig_sensedir,chroffset);
if (ambig_end_length_5 > 0) {
/* Revise trim_left to be at the splice site */
new->trim_left_splicep = true;
@@ -2222,9 +2223,9 @@ Substring_new (int nmismatches_whole, Chrnum_T chrnum, Univcoord_T chroffset,
}
} else if (trim_left_action == COMPUTE_TRIM) {
- substring_trim_novel_spliceends(/*substring1*/new,/*substringN*/NULL,&ambig_end_length_5,&ambig_end_length_3,
- &ambig_splicetype_5,&ambig_splicetype_3,&ambig_prob_5,&ambig_prob_3,
- &sensedir,chroffset);
+ new_sensedir = substring_trim_novel_spliceends(/*substring1*/new,/*substringN*/NULL,&ambig_end_length_5,&ambig_end_length_3,
+ &ambig_splicetype_5,&ambig_splicetype_3,&ambig_prob_5,&ambig_prob_3,
+ orig_sensedir,chroffset);
if (ambig_end_length_5 > 0) {
/* Revise trim_left to be at the splice site */
new->trim_left_splicep = true;
@@ -2244,9 +2245,9 @@ Substring_new (int nmismatches_whole, Chrnum_T chrnum, Univcoord_T chroffset,
}
} else if (trim_right_action == COMPUTE_TRIM) {
- substring_trim_novel_spliceends(/*substring1*/NULL,/*substringN*/new,&ambig_end_length_5,&ambig_end_length_3,
- &ambig_splicetype_5,&ambig_splicetype_3,&ambig_prob_5,&ambig_prob_3,
- &sensedir,chroffset);
+ new_sensedir = substring_trim_novel_spliceends(/*substring1*/NULL,/*substringN*/new,&ambig_end_length_5,&ambig_end_length_3,
+ &ambig_splicetype_5,&ambig_splicetype_3,&ambig_prob_5,&ambig_prob_3,
+ orig_sensedir,chroffset);
if (ambig_end_length_3 > 0) {
/* Revise trim_right to be at the splice site */
new->trim_right_splicep = true;
@@ -2294,9 +2295,9 @@ Substring_new (int nmismatches_whole, Chrnum_T chrnum, Univcoord_T chroffset,
new->trim_left_splicep = new->trim_right_splicep = false;
if (novelsplicingp == true) {
if (trim_left_action == COMPUTE_TRIM && trim_right_action == COMPUTE_TRIM) {
- substring_trim_novel_spliceends(/*substring1*/new,/*substringN*/new,&ambig_end_length_5,&ambig_end_length_3,
- &ambig_splicetype_5,&ambig_splicetype_3,&ambig_prob_5,&ambig_prob_3,
- &sensedir,chroffset);
+ new_sensedir = substring_trim_novel_spliceends(/*substring1*/new,/*substringN*/new,&ambig_end_length_5,&ambig_end_length_3,
+ &ambig_splicetype_5,&ambig_splicetype_3,&ambig_prob_5,&ambig_prob_3,
+ orig_sensedir,chroffset);
if (ambig_end_length_5 > 0) {
/* Revise trim_left to be at the splice site */
new->trim_left_splicep = true;
@@ -2333,9 +2334,9 @@ Substring_new (int nmismatches_whole, Chrnum_T chrnum, Univcoord_T chroffset,
}
} else if (trim_left_action == COMPUTE_TRIM) {
- substring_trim_novel_spliceends(/*substring1*/new,/*substringN*/NULL,&ambig_end_length_5,&ambig_end_length_3,
- &ambig_splicetype_5,&ambig_splicetype_3,&ambig_prob_5,&ambig_prob_3,
- &sensedir,chroffset);
+ new_sensedir = substring_trim_novel_spliceends(/*substring1*/new,/*substringN*/NULL,&ambig_end_length_5,&ambig_end_length_3,
+ &ambig_splicetype_5,&ambig_splicetype_3,&ambig_prob_5,&ambig_prob_3,
+ orig_sensedir,chroffset);
if (ambig_end_length_5 > 0) {
/* Revise trim_left to be at the splice site */
new->trim_left_splicep = true;
@@ -2355,9 +2356,9 @@ Substring_new (int nmismatches_whole, Chrnum_T chrnum, Univcoord_T chroffset,
}
} else if (trim_right_action == COMPUTE_TRIM) {
- substring_trim_novel_spliceends(/*substring1*/NULL,/*substringN*/new,&ambig_end_length_5,&ambig_end_length_3,
- &ambig_splicetype_5,&ambig_splicetype_3,&ambig_prob_5,&ambig_prob_3,
- &sensedir,chroffset);
+ new_sensedir = substring_trim_novel_spliceends(/*substring1*/NULL,/*substringN*/new,&ambig_end_length_5,&ambig_end_length_3,
+ &ambig_splicetype_5,&ambig_splicetype_3,&ambig_prob_5,&ambig_prob_3,
+ orig_sensedir,chroffset);
if (ambig_end_length_3 > 0) {
/* Revise trim_right to be at the splice site */
new->trim_right_splicep = true;
@@ -2379,6 +2380,7 @@ Substring_new (int nmismatches_whole, Chrnum_T chrnum, Univcoord_T chroffset,
}
}
+ new->sensedir = new_sensedir;
/* nmismatches_bothdiff: Counts matches of trimmed region */
if (new->trim_left == 0 && new->trim_right == 0) {
@@ -3492,8 +3494,8 @@ Substring_nchimera_novel (T this) {
int
-Substring_chimera_sensedir (T this) {
- return this->chimera_sensedir;
+Substring_sensedir (T this) {
+ return this->sensedir;
}
@@ -3642,7 +3644,7 @@ Substring_copy (T old) {
new->mapq_loglik = old->mapq_loglik;
- new->chimera_sensedir = old->chimera_sensedir;
+ new->sensedir = old->sensedir;
new->splicecoord_D = old->splicecoord_D;
new->splicesitesD_knowni = old->splicesitesD_knowni;
@@ -3740,7 +3742,7 @@ Substring_new_startfrag (Univcoord_T startfrag_coord, int splice_pos, int nmisma
query_compress,start_endtype,end_endtype,querystart,queryend,querylength,
alignstart,alignend,/*exactp*/false,plusp,genestrand,
trim_left_action,trim_right_action,/*outofbounds_start*/0,/*outofbounds_end*/0,
- /*minlength*/0,/*sensedir*/SENSE_NULL)) == NULL) {
+ /*minlength*/0,/*orig_sensedir*/SENSE_NULL)) == NULL) {
return (T) NULL;
}
@@ -3749,8 +3751,6 @@ Substring_new_startfrag (Univcoord_T startfrag_coord, int splice_pos, int nmisma
new->splicecoord_N = startfrag_coord;
assert(startfrag_coord == left + splice_pos);
- new->chimera_sensedir = SENSE_NULL;
-
if (plusp == true) {
new->siteN_pos = splice_pos;
} else {
@@ -3823,8 +3823,6 @@ Substring_new_endfrag (Univcoord_T endfrag_coord, int splice_pos, int nmismatche
new->splicecoord_N = endfrag_coord;
assert(endfrag_coord == left + splice_pos);
- new->chimera_sensedir = SENSE_NULL;
-
if (plusp == true) {
new->siteN_pos = splice_pos;
} else {
@@ -3968,7 +3966,9 @@ Substring_new_donor (Univcoord_T donor_coord, int donor_knowni, int donor_pos,
new->splicesitesD_knowni = donor_knowni;
assert(donor_coord == left + donor_pos);
- new->chimera_sensedir = sensedir;
+ if (new->sensedir == SENSE_NULL) {
+ new->sensedir = sensedir;
+ }
if (donor_knowni >= 0) {
new->siteD_knownp = true;
/* new->chimera_novelp = false */
@@ -4120,7 +4120,9 @@ Substring_new_acceptor (Univcoord_T acceptor_coord, int acceptor_knowni, int acc
new->splicesitesA_knowni = acceptor_knowni;
assert(acceptor_coord == left + acceptor_pos);
- new->chimera_sensedir = sensedir;
+ if (new->sensedir == SENSE_NULL) {
+ new->sensedir = sensedir;
+ }
if (acceptor_knowni >= 0) {
new->siteA_knownp = true;
/* new->chimera_novelp = false */
@@ -4215,7 +4217,9 @@ Substring_new_shortexon (Univcoord_T acceptor_coord, int acceptor_knowni, Univco
new->splicecoord_D = donor_coord;
new->splicesitesD_knowni = donor_knowni;
- new->chimera_sensedir = sensedir;
+ if (new->sensedir == SENSE_NULL) {
+ new->sensedir = sensedir;
+ }
if (acceptor_knowni >= 0) {
new->siteA_knownp = true;
@@ -4258,14 +4262,14 @@ Substring_assign_donor_prob (T donor) {
} else if (donor->siteD_knownp == false) {
/* Prob already assigned */
- } else if (donor->chimera_sensedir == SENSE_FORWARD) {
+ } else if (donor->sensedir == SENSE_FORWARD) {
if (donor->plusp == true) {
donor->siteD_prob = Maxent_hr_donor_prob(donor->splicecoord_D,donor->chroffset);
} else {
donor->siteD_prob = Maxent_hr_antidonor_prob(donor->splicecoord_D,donor->chroffset);
}
- } else if (donor->chimera_sensedir == SENSE_ANTI) {
+ } else if (donor->sensedir == SENSE_ANTI) {
if (donor->plusp == true) {
donor->siteD_prob = Maxent_hr_antidonor_prob(donor->splicecoord_D,donor->chroffset);
} else {
@@ -4289,14 +4293,14 @@ Substring_assign_acceptor_prob (T acceptor) {
} else if (acceptor->siteA_knownp == false) {
/* Prob already assigned */
- } else if (acceptor->chimera_sensedir == SENSE_FORWARD) {
+ } else if (acceptor->sensedir == SENSE_FORWARD) {
if (acceptor->plusp == true) {
acceptor->siteA_prob = Maxent_hr_acceptor_prob(acceptor->splicecoord_A,acceptor->chroffset);
} else {
acceptor->siteA_prob = Maxent_hr_antiacceptor_prob(acceptor->splicecoord_A,acceptor->chroffset);
}
- } else if (acceptor->chimera_sensedir == SENSE_ANTI) {
+ } else if (acceptor->sensedir == SENSE_ANTI) {
if (acceptor->plusp == true) {
acceptor->siteA_prob = Maxent_hr_antiacceptor_prob(acceptor->splicecoord_A,acceptor->chroffset);
} else {
@@ -4318,14 +4322,14 @@ Substring_assign_shortexon_prob (T shortexon) {
if (shortexon->siteA_knownp == false) {
/* Prob1 already assigned */
- } else if (shortexon->chimera_sensedir == SENSE_FORWARD) {
+ } else if (shortexon->sensedir == SENSE_FORWARD) {
if (shortexon->plusp == true) {
shortexon->siteA_prob = Maxent_hr_acceptor_prob(shortexon->splicecoord_A,shortexon->chroffset);
} else {
shortexon->siteA_prob = Maxent_hr_antiacceptor_prob(shortexon->splicecoord_A,shortexon->chroffset);
}
- } else if (shortexon->chimera_sensedir == SENSE_ANTI) {
+ } else if (shortexon->sensedir == SENSE_ANTI) {
if (shortexon->plusp == true) {
shortexon->siteA_prob = Maxent_hr_antiacceptor_prob(shortexon->splicecoord_A,shortexon->chroffset);
} else {
@@ -4339,14 +4343,14 @@ Substring_assign_shortexon_prob (T shortexon) {
if (shortexon->siteD_knownp == false) {
/* Prob2 already assigned */
- } else if (shortexon->chimera_sensedir == SENSE_FORWARD) {
+ } else if (shortexon->sensedir == SENSE_FORWARD) {
if (shortexon->plusp == true) {
shortexon->siteD_prob = Maxent_hr_donor_prob(shortexon->splicecoord_D,shortexon->chroffset);
} else {
shortexon->siteD_prob = Maxent_hr_antidonor_prob(shortexon->splicecoord_D,shortexon->chroffset);
}
- } else if (shortexon->chimera_sensedir == SENSE_ANTI) {
+ } else if (shortexon->sensedir == SENSE_ANTI) {
if (shortexon->plusp == true) {
shortexon->siteD_prob = Maxent_hr_antidonor_prob(shortexon->splicecoord_D,shortexon->chroffset);
} else {
diff --git a/src/substring.h b/src/substring.h
index 3cbdfc6..4f6f570 100644
--- a/src/substring.h
+++ b/src/substring.h
@@ -1,4 +1,4 @@
-/* $Id: substring.h 197775 2016-09-14 00:41:46Z twu $ */
+/* $Id: substring.h 198080 2016-09-21 00:35:55Z twu $ */
#ifndef SUBSTRING_INCLUDED
#define SUBSTRING_INCLUDED
@@ -61,7 +61,7 @@ Substring_new (int nmismatches_whole, Chrnum_T chrnum, Univcoord_T chroffset,
Univcoord_T alignstart, Univcoord_T alignend,
bool exactp, bool plusp, int genestrand,
Trimaction_T trim_left_action, Trimaction_T trim_right_action, int outofbounds_start, int outofbounds_end,
- int minlength, int sensedir);
+ int minlength, int orig_sensedir);
extern T
Substring_new_ambig_D (int querystart, int queryend, int splice_pos, int querylength,
@@ -258,7 +258,7 @@ Substring_siteA_pos (T this);
extern int
Substring_siteN_pos (T this);
extern int
-Substring_chimera_sensedir (T this);
+Substring_sensedir (T this);
extern bool
Substring_ambiguous_p (T this);
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/gmap.git
More information about the debian-med-commit
mailing list