[med-svn] [gmap] 01/03: Imported Upstream version 2016-08-16
Alex Mestiashvili
malex-guest at moszumanska.debian.org
Thu Aug 18 09:21:24 UTC 2016
This is an automated email from the git hooks/post-receive script.
malex-guest pushed a commit to branch master
in repository gmap.
commit 2deaf4fd86266c637468c1fe90b31e5f5f84d5f5
Author: Alexandre Mestiashvili <alex at biotec.tu-dresden.de>
Date: Wed Aug 17 15:51:24 2016 +0200
Imported Upstream version 2016-08-16
---
ChangeLog | 109 +-
Makefile.in | 11 +-
README | 31 +-
TODO | 3 +
VERSION | 2 +-
aclocal.m4 | 36 +
configure | 85 +-
configure.ac | 15 +-
src/ChangeLog | 0
src/Makefile.am | 32 +-
src/Makefile.in | 45 +-
src/atoi.c | 2 +-
src/bytecoding.c | 18 +-
src/cmet.c | 2 +-
src/comp.h | 2 +-
src/compile | 165 +++
src/filestring.c | 2 +-
src/genome_sites.c | 2 +-
src/gmap.c | 21 +-
src/gsnap.c | 19 +-
src/indel.c | 120 +-
src/mapq.c | 21 +-
src/pair.c | 20 +-
src/pair.h | 4 +-
src/pairpool.c | 2 +-
src/samprint.c | 22 +-
src/sarray-read.c | 502 +++++---
src/sedgesort.c | 2 +-
src/sedgesort.h | 2 +-
src/shortread.c | 28 +-
src/splice.c | 895 +++-----------
src/stage1hr.c | 2893 ++++++++++++++++----------------------------
src/stage1hr.h | 5 +-
src/stage3.c | 19 +-
src/stage3hr.c | 207 ++--
src/stage3hr.h | 2 +-
src/substring.c | 1220 +++++++++++++------
src/substring.h | 76 +-
src/uniqscan.c | 15 +-
src/univdiag.h | 2 +-
tests/Makefile.in | 9 +-
util/Makefile.in | 9 +-
util/gtf_genes.pl.in | 24 +-
util/gtf_introns.pl.in | 2 +-
util/gtf_splicesites.pl.in | 2 +-
45 files changed, 3284 insertions(+), 3421 deletions(-)
diff --git a/ChangeLog b/ChangeLog
index f9cb1d7..e592956 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,54 +1,95 @@
-2016-08-08 twu
+2016-08-16 twu
+
+ * VERSION: Updated version number
- * atoi.c, cmet.c: Fixed reduce procedures for 64-bit oligos
+ * README: Discussing MAX_STACK_READLENGTH
- * stage1hr.c: Fixed values of splice_pos_start and splice_pos_end given to
- Genome_donor_positions and related functions
+ * gsnap.c, uniqscan.c: Using MAX_FLOORS_READLENGTH instead of MAX_READLENGTH
- * filestring.c: Handling the case where stringlen is negative
+ * configure.ac: Using MAX_STACK_READLENGTH instead of MAX_READLENGTH
- * stage3.c: Merged revision 195962 from trunk to fix an issue where we tried
- to use pairs_pretrim after path_trim altered the pairs
+ * Makefile.gsnaptoo.am: Using MAX_STACK_READLENGTH instead of MAX_READLENGTH
- * samprint.c, substring.c, substring.h: Merged revision 195960 from trunk to
- fix XT field to have correct fusion coordinates
+ * stage1hr.h: Adding max_floor_readlength to setup
-2016-08-04 twu
+ * stage1hr.c: Removed local allocation of arrays of size MAX_READLENGTH.
+ Now checking querylength against MAX_STACK_READLENGTH to determine whether
+ to allocate from stack or heap. Adding max_floor_readlength to setup
- * 2016-08-02-long-read-fusions, comp.h, config.site.rescomp.prd, pair.c,
- pairpool.c, sarray-read.c, src, stage3.c, util: Merged revisions 195492
- through 195762 from branches/2016-07-01-better-triage to get latest fixes
+ * indel.c, mapq.c, sarray-read.c, splice.c: Removed local allocation of
+ arrays of size MAX_READLENGTH. Now checking querylength against
+ MAX_STACK_READLENGTH to determine whether to allocate from stack or heap
- * 2016-08-02-long-read-fusions, Makefile.gsnaptoo.am, comp.h,
- config.site.rescomp.prd, configure.ac, filestring.c, gsnap.c, pair.c,
- pairpool.c, samprint.c, sarray-read.c, sedgesort.c, sedgesort.h,
- shortread.c, src, stage1hr.c, stage3.c, stage3hr.c, stage3hr.h,
- substring.c, substring.h, univdiag.c, univdiag.h: Merged revisions 193240
- to 195491 from branches/2016-07-01-better-triage for better performance
+ * stage3hr.c: Not allowing any indels to set trims in determining optimal
+ score
-2016-08-03 twu
+ * stage1hr.c: Using pre-processor macro LONG_READLENGTHS to allocate
+ read-related memory on heap instead of stack. Setting spliceable_high_p
+ to be false for last segment. In computing end indels, ensuring that
+ shifti is not negative when looking up array value.
- * stage1hr.c: Hard-coded some values for plusp
+ * shortread.c: Using MAX_EXPECTED_READLENGTH instead of MAX_READLENGTH
- * splice.c: Using new interface to Substring_new_donor and
- Substring_new_acceptor
+ * stage3.c: Handling the case when trimming ends that exon is empty
- * stage1hr.c: In computing spliceable segments, using a variable for holding
- previous spliceable information, to resolve writing to an uninitialized
- ptr at end. Using a streamlined version of splicing for distant RNA.
+ * stage3hr.c: Restored setting of abort_pairing_p when nconcordant exceeds
+ maxpairedpaths
- * substring.c, substring.h: Added parameters substring_querystart and
- substring_queryend to Substring_new_donor and Substring_new_acceptor, so
- we can handle splicing segments in the middle of the read
+ * gsnap.c, uniqscan.c: Using new interface to Pair_setup
- * genome_sites.c: Added debugging statements
+ * indel.c, mapq.c, sarray-read.c, splice.c, substring.c: Using pre-processor
+ macro LONG_READLENGTHS to allocate read-related memory on heap instead of
+ stack
- * stage1hr.c: Allowing fusions to occur between middle segments that are
- spliceable on their distal ends
+ * gmap.c, pair.c, pair.h: Added option --gff3-swap-phase
-2016-08-02 twu
+ * bytecoding.c: Added explanation messages to remove shared memory segments
+
+2016-08-12 twu
+
+ * Makefile.gsnaptoo.am, config.site.rescomp.prd, configure.ac, filestring.c,
+ genome_sites.c, gsnap.c, pair.c, samprint.c, sarray-read.c, sedgesort.c,
+ sedgesort.h, shortread.c, splice.c, src, stage1hr.c, stage3hr.c,
+ stage3hr.h, substring.c, substring.h, trunk, univdiag.c, univdiag.h, util:
+ Merged revisions 195608 to 196272 from
+ branches/2016-08-09-genome-sites-hr, which contains merged revisions from
+ branches/2016-08-02-long-read-fusions and 2016-07-01-better-triage
+
+ * VERSION, trunk: Updated version number
+
+ * Makefile.gsnaptoo.am: Removed chrsubset.c and chrsubset.h for
+ splicing-score
+
+ * pair.c: Added variable to swap phase for gff3 output
+
+ * configure.ac: Added a line to disable maintainer mode for users
+
+ * config.site.rescomp.prd, config.site.rescomp.tst: Updated for latest
+ version
+
+ * MAINTAINER: Added note about PATH
- * 2016-08-02-long-read-fusions: Created branch to find fusions in long reads
+ * archive.html, index.html: Updated for latest version
+
+2016-08-08 twu
+
+ * gtf_genes.pl.in, gtf_introns.pl.in, gtf_splicesites.pl.in: Printing both
+ gene_id and gene_name
+
+ * atoi.c, cmet.c: Fixed reduce procedures for 64-bit computers
+
+ * Makefile.gsnaptoo.am: Added semaphore.c and semaphore.h to list of files
+ for splicing-score
+
+ * stage1hr.c: Fixed debugging statements
+
+ * stage3.c: Fixed issue where we tried to use pairs_pretrim after path_trim
+ altered the pairs
+
+ * samprint.c, substring.c, substring.h: Fixed XT field to print correct
+ junction coordinates
+
+2016-08-02 twu
* stage3hr.c: Restoring final procedure based on nmatches in
Stage3pair_optimal_score
diff --git a/Makefile.in b/Makefile.in
index 6d532b2..36e3f57 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -184,7 +184,7 @@ am__DIST_COMMON = $(srcdir)/Makefile.in $(top_srcdir)/config/compile \
$(top_srcdir)/config/config.sub \
$(top_srcdir)/config/install-sh $(top_srcdir)/config/ltmain.sh \
$(top_srcdir)/config/missing AUTHORS COPYING ChangeLog INSTALL \
- NEWS README config/compile config/config.guess \
+ NEWS README TODO config/compile config/config.guess \
config/config.sub config/install-sh config/ltmain.sh \
config/missing
DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
@@ -273,9 +273,10 @@ LIPO = @LIPO@
LN_S = @LN_S@
LTLIBOBJS = @LTLIBOBJS@
LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@
+MAINT = @MAINT@
MAKEINFO = @MAKEINFO@
MANIFEST_TOOL = @MANIFEST_TOOL@
-MAX_READLENGTH = @MAX_READLENGTH@
+MAX_STACK_READLENGTH = @MAX_STACK_READLENGTH@
MKDIR_P = @MKDIR_P@
MPICC = @MPICC@
MPILIBS = @MPILIBS@
@@ -379,7 +380,7 @@ all: all-recursive
.SUFFIXES:
am--refresh: Makefile
@:
-$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps)
+$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps)
@for dep in $?; do \
case '$(am__configure_deps)' in \
*$$dep*) \
@@ -405,9 +406,9 @@ Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
$(SHELL) ./config.status --recheck
-$(top_srcdir)/configure: $(am__configure_deps)
+$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps)
$(am__cd) $(srcdir) && $(AUTOCONF)
-$(ACLOCAL_M4): $(am__aclocal_m4_deps)
+$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps)
$(am__cd) $(srcdir) && $(ACLOCAL) $(ACLOCAL_AMFLAGS)
$(am__aclocal_m4_deps):
diff --git a/README b/README
index fac3a4c..9693b5a 100644
--- a/README
+++ b/README
@@ -51,26 +51,25 @@ then refer to it like this
./configure CONFIG_SITE=<config site file>
-Note 3: GSNAP is designed for short reads of a limited length, and
-uses a configure variable called MAX_READLENGTH (default 300) as a
-guide to the maximum read length. You may set this variable by
-providing it to configure like this
+Note 3: GSNAP previously had a configure variable called
+MAX_READLENGTH (default 300) as a guide to the maximum read length.
+That variable is no longer needed, since GSNAP can align reads of
+arbitrary length. (But, for longer reads, GMAP will probably be much
+faster.)
- ./configure MAX_READLENGTH=<length>
+However, whenever possible, based on the length of the read, GSNAP
+will use stack memory instead of heap memory for some algorithms. To
+control this decision, there is a variable called
+MAX_STACK_READLENGTH, set like this
+
+ ./configure MAX_STACK_READLENGTH=<length>
or by defining it in your config.site file (or in the file provided to
configure as the value of CONFIG_SITE). Or you may set the value of
-MAX_READLENGTH as an environment variable before calling ./configure.
-If you do not set MAX_READLENGTH, it will have the default value shown
-when you run "./configure --help".
-
-Note that MAX_READLENGTH applies only to GSNAP. GMAP, on the other
-hand, can process queries up to 1 million bp.
-
-Also, starting with version 2014-08-20, if your C compiler can
-handle stack-based memory allocation using the alloca() function,
-GSNAP ignores MAX_READLENGTH, and can handle reads longer than that
-value.
+MAX_STACK_READLENGTH as an environment variable before calling
+./configure. If you set MAX_STACK_READLENGTH too high, you may
+overflow the amount of stack allocated by your computer. If you do
+not set MAX_STACK_READLENGTH, it will have a default value of 300.
Note 4: GSNAP can read from gzip-compressed FASTA or FASTQ input
diff --git a/TODO b/TODO
new file mode 100644
index 0000000..c5a7bd7
--- /dev/null
+++ b/TODO
@@ -0,0 +1,3 @@
+
+Add flag that allows for splitting afterwards.
+
diff --git a/VERSION b/VERSION
index 6f5cbc7..7e6fbae 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-2016-08-08
\ No newline at end of file
+2016-08-16
\ No newline at end of file
diff --git a/aclocal.m4 b/aclocal.m4
index 0b0e230..4f3cc97 100644
--- a/aclocal.m4
+++ b/aclocal.m4
@@ -645,6 +645,42 @@ fi
rmdir .tst 2>/dev/null
AC_SUBST([am__leading_dot])])
+# Add --enable-maintainer-mode option to configure. -*- Autoconf -*-
+# From Jim Meyering
+
+# Copyright (C) 1996-2014 Free Software Foundation, Inc.
+#
+# This file is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# AM_MAINTAINER_MODE([DEFAULT-MODE])
+# ----------------------------------
+# Control maintainer-specific portions of Makefiles.
+# Default is to disable them, unless 'enable' is passed literally.
+# For symmetry, 'disable' may be passed as well. Anyway, the user
+# can override the default with the --enable/--disable switch.
+AC_DEFUN([AM_MAINTAINER_MODE],
+[m4_case(m4_default([$1], [disable]),
+ [enable], [m4_define([am_maintainer_other], [disable])],
+ [disable], [m4_define([am_maintainer_other], [enable])],
+ [m4_define([am_maintainer_other], [enable])
+ m4_warn([syntax], [unexpected argument to AM@&t at _MAINTAINER_MODE: $1])])
+AC_MSG_CHECKING([whether to enable maintainer-specific portions of Makefiles])
+ dnl maintainer-mode's default is 'disable' unless 'enable' is passed
+ AC_ARG_ENABLE([maintainer-mode],
+ [AS_HELP_STRING([--]am_maintainer_other[-maintainer-mode],
+ am_maintainer_other[ make rules and dependencies not useful
+ (and sometimes confusing) to the casual installer])],
+ [USE_MAINTAINER_MODE=$enableval],
+ [USE_MAINTAINER_MODE=]m4_if(am_maintainer_other, [enable], [no], [yes]))
+ AC_MSG_RESULT([$USE_MAINTAINER_MODE])
+ AM_CONDITIONAL([MAINTAINER_MODE], [test $USE_MAINTAINER_MODE = yes])
+ MAINT=$MAINTAINER_MODE_TRUE
+ AC_SUBST([MAINT])dnl
+]
+)
+
# Check to see how 'make' treats includes. -*- Autoconf -*-
# Copyright (C) 2001-2014 Free Software Foundation, Inc.
diff --git a/configure b/configure
index 1e873be..5992757 100755
--- a/configure
+++ b/configure
@@ -1,6 +1,6 @@
#! /bin/sh
# Guess values for system-dependent variables and create Makefiles.
-# Generated by GNU Autoconf 2.69 for gmap 2016-08-08.
+# Generated by GNU Autoconf 2.69 for gmap 2016-08-16.
#
# Report bugs to <Thomas Wu <twu at gene.com>>.
#
@@ -590,8 +590,8 @@ MAKEFLAGS=
# Identity of this package.
PACKAGE_NAME='gmap'
PACKAGE_TARNAME='gmap'
-PACKAGE_VERSION='2016-08-08'
-PACKAGE_STRING='gmap 2016-08-08'
+PACKAGE_VERSION='2016-08-16'
+PACKAGE_STRING='gmap 2016-08-16'
PACKAGE_BUGREPORT='Thomas Wu <twu at gene.com>'
PACKAGE_URL=''
@@ -638,7 +638,7 @@ LTLIBOBJS
LIBOBJS
BZLIB_LIBS
ZLIB_LIBS
-MAX_READLENGTH
+MAX_STACK_READLENGTH
GMAPDB
MAKE_SSE2_FALSE
MAKE_SSE2_TRUE
@@ -694,6 +694,9 @@ MAINTAINER_FALSE
MAINTAINER_TRUE
FULLDIST_FALSE
FULLDIST_TRUE
+MAINT
+MAINTAINER_MODE_FALSE
+MAINTAINER_MODE_TRUE
AM_BACKSLASH
AM_DEFAULT_VERBOSITY
AM_DEFAULT_V
@@ -795,6 +798,7 @@ enable_option_checking
enable_largefile
enable_dependency_tracking
enable_silent_rules
+enable_maintainer_mode
enable_fulldist
enable_maintainer
enable_shared
@@ -827,7 +831,7 @@ CPPFLAGS
MPICC
LT_SYS_LIBRARY_PATH
CPP
-MAX_READLENGTH'
+MAX_STACK_READLENGTH'
# Initialize some variables set by options.
@@ -1368,7 +1372,7 @@ if test "$ac_init_help" = "long"; then
# Omit some internal or obsolete options to make the list less imposing.
# This message is too long to be a string in the A/UX 3.1 sh.
cat <<_ACEOF
-\`configure' configures gmap 2016-08-08 to adapt to many kinds of systems.
+\`configure' configures gmap 2016-08-16 to adapt to many kinds of systems.
Usage: $0 [OPTION]... [VAR=VALUE]...
@@ -1439,7 +1443,7 @@ fi
if test -n "$ac_init_help"; then
case $ac_init_help in
- short | recursive ) echo "Configuration of gmap 2016-08-08:";;
+ short | recursive ) echo "Configuration of gmap 2016-08-16:";;
esac
cat <<\_ACEOF
@@ -1454,6 +1458,9 @@ Optional Features:
speeds up one-time build
--enable-silent-rules less verbose build output (undo: "make V=1")
--disable-silent-rules verbose build output (undo: "make V=0")
+ --enable-maintainer-mode
+ enable make rules and dependencies not useful (and
+ sometimes confusing) to the casual installer
--enable-fulldist For use by program maintainer
--enable-maintainer For use by program maintainer
--enable-shared[=PKGS] build shared libraries [default=yes]
@@ -1505,8 +1512,9 @@ Some influential environment variables:
LT_SYS_LIBRARY_PATH
User-defined run-time library search path.
CPP C preprocessor
- MAX_READLENGTH
- Maximum read length for GSNAP (default 300)
+ MAX_STACK_READLENGTH
+ Maximum read length for GSNAP allocating on stack rather than
+ heap (default 300)
Use these variables to override the choices made by `configure' or to help
it to find libraries and programs with nonstandard names/locations.
@@ -1574,7 +1582,7 @@ fi
test -n "$ac_init_help" && exit $ac_status
if $ac_init_version; then
cat <<\_ACEOF
-gmap configure 2016-08-08
+gmap configure 2016-08-16
generated by GNU Autoconf 2.69
Copyright (C) 2012 Free Software Foundation, Inc.
@@ -2180,7 +2188,7 @@ cat >config.log <<_ACEOF
This file contains any messages produced by compilers while
running configure, to aid debugging if configure makes a mistake.
-It was created by gmap $as_me 2016-08-08, which was
+It was created by gmap $as_me 2016-08-16, which was
generated by GNU Autoconf 2.69. Invocation command line was
$ $0 $@
@@ -2530,8 +2538,8 @@ ac_compiler_gnu=$ac_cv_c_compiler_gnu
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking package version" >&5
$as_echo_n "checking package version... " >&6; }
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: 2016-08-08" >&5
-$as_echo "2016-08-08" >&6; }
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: 2016-08-16" >&5
+$as_echo "2016-08-16" >&6; }
### Read defaults
@@ -4396,7 +4404,7 @@ fi
# Define the identity of the package.
PACKAGE='gmap'
- VERSION='2016-08-08'
+ VERSION='2016-08-16'
cat >>confdefs.h <<_ACEOF
@@ -4618,6 +4626,30 @@ END
fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether to enable maintainer-specific portions of Makefiles" >&5
+$as_echo_n "checking whether to enable maintainer-specific portions of Makefiles... " >&6; }
+ # Check whether --enable-maintainer-mode was given.
+if test "${enable_maintainer_mode+set}" = set; then :
+ enableval=$enable_maintainer_mode; USE_MAINTAINER_MODE=$enableval
+else
+ USE_MAINTAINER_MODE=no
+fi
+
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: $USE_MAINTAINER_MODE" >&5
+$as_echo "$USE_MAINTAINER_MODE" >&6; }
+ if test $USE_MAINTAINER_MODE = yes; then
+ MAINTAINER_MODE_TRUE=
+ MAINTAINER_MODE_FALSE='#'
+else
+ MAINTAINER_MODE_TRUE='#'
+ MAINTAINER_MODE_FALSE=
+fi
+
+ MAINT=$MAINTAINER_MODE_TRUE
+
+
+
+
if test "x$enable_fulldist" = xyes; then
FULLDIST_TRUE=
FULLDIST_FALSE='#'
@@ -15238,6 +15270,7 @@ fi
#AC_FUNC_MMAP # Checks only private fixed mapping of already-mapped memory
+
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether alloca is enabled" >&5
$as_echo_n "checking whether alloca is enabled... " >&6; }
# Check whether --enable-alloca was given.
@@ -18918,13 +18951,13 @@ fi
$as_echo "$GMAPDB" >&6; }
-# MAX_READLENGTH
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking MAX_READLENGTH" >&5
-$as_echo_n "checking MAX_READLENGTH... " >&6; }
+# MAX_STACK_READLENGTH
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking MAX_STACK_READLENGTH" >&5
+$as_echo_n "checking MAX_STACK_READLENGTH... " >&6; }
-if test x"$MAX_READLENGTH" = x; then
+if test x"$MAX_STACK_READLENGTH" = x; then
- EXP_VAR=MAX_READLENGTH
+ EXP_VAR=MAX_STACK_READLENGTH
FROM_VAR='300'
prefix_save=$prefix
@@ -18945,15 +18978,15 @@ if test x"$MAX_READLENGTH" = x; then
done
full_var=$new_full_var
- MAX_READLENGTH="$full_var"
+ MAX_STACK_READLENGTH="$full_var"
prefix=$prefix_save
exec_prefix=$exec_prefix_save
fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $MAX_READLENGTH" >&5
-$as_echo "$MAX_READLENGTH" >&6; }
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $MAX_STACK_READLENGTH" >&5
+$as_echo "$MAX_STACK_READLENGTH" >&6; }
# zlib package
@@ -19642,6 +19675,10 @@ else
am__EXEEXT_FALSE=
fi
+if test -z "${MAINTAINER_MODE_TRUE}" && test -z "${MAINTAINER_MODE_FALSE}"; then
+ as_fn_error $? "conditional \"MAINTAINER_MODE\" was never defined.
+Usually this means the macro was only invoked conditionally." "$LINENO" 5
+fi
if test -z "${FULLDIST_TRUE}" && test -z "${FULLDIST_FALSE}"; then
as_fn_error $? "conditional \"FULLDIST\" was never defined.
Usually this means the macro was only invoked conditionally." "$LINENO" 5
@@ -20072,7 +20109,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
# report actual input values of CONFIG_FILES etc. instead of their
# values after options handling.
ac_log="
-This file was extended by gmap $as_me 2016-08-08, which was
+This file was extended by gmap $as_me 2016-08-16, which was
generated by GNU Autoconf 2.69. Invocation command line was
CONFIG_FILES = $CONFIG_FILES
@@ -20138,7 +20175,7 @@ _ACEOF
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
ac_cs_version="\\
-gmap config.status 2016-08-08
+gmap config.status 2016-08-16
configured by $0, generated by GNU Autoconf 2.69,
with options \\"\$ac_cs_config\\"
diff --git a/configure.ac b/configure.ac
index e991099..d4061f5 100644
--- a/configure.ac
+++ b/configure.ac
@@ -111,6 +111,8 @@ AC_ARG_PROGRAM
#AM_INIT_AUTOMAKE([no-dependencies])
#AM_INIT_AUTOMAKE(AC_PACKAGE_NAME, AC_PACKAGE_VERSION)
AM_INIT_AUTOMAKE
+AM_MAINTAINER_MODE([disable])
+
AM_CONDITIONAL(FULLDIST,test "x$enable_fulldist" = xyes)
AC_ARG_ENABLE([fulldist],
@@ -261,6 +263,7 @@ AC_FUNC_FSEEKO
#AC_FUNC_MMAP # Checks only private fixed mapping of already-mapped memory
+
AC_MSG_CHECKING(whether alloca is enabled)
AC_ARG_ENABLE([alloca],
AC_HELP_STRING([--enable-alloca],
@@ -593,13 +596,13 @@ AC_SUBST(GMAPDB)
AC_MSG_RESULT($GMAPDB)
-# MAX_READLENGTH
-AC_MSG_CHECKING(MAX_READLENGTH)
-AC_ARG_VAR([MAX_READLENGTH], [Maximum read length for GSNAP (default 300)])
-if test x"$MAX_READLENGTH" = x; then
- ACX_EXPAND(MAX_READLENGTH,'300')
+# MAX_STACK_READLENGTH
+AC_MSG_CHECKING(MAX_STACK_READLENGTH)
+AC_ARG_VAR([MAX_STACK_READLENGTH], [Maximum read length for GSNAP allocating on stack rather than heap (default 300)])
+if test x"$MAX_STACK_READLENGTH" = x; then
+ ACX_EXPAND(MAX_STACK_READLENGTH,'300')
fi
-AC_MSG_RESULT($MAX_READLENGTH)
+AC_MSG_RESULT($MAX_STACK_READLENGTH)
# zlib package
diff --git a/src/ChangeLog b/src/ChangeLog
new file mode 100644
index 0000000..e69de29
diff --git a/src/Makefile.am b/src/Makefile.am
index 0b9f209..e0630f6 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -283,37 +283,37 @@ GSNAP_FILES = fopen.h bool.h types.h separator.h comp.h \
# Previously included -lrt for shm_open, but we are not calling that
gsnap_nosimd_CC = $(PTHREAD_CC)
-gsnap_nosimd_CFLAGS = $(AM_CFLAGS) $(PTHREAD_CFLAGS) -DTARGET=\"$(target)\" -DGMAPDB=\"$(GMAPDB)\" -DMAX_READLENGTH=$(MAX_READLENGTH) -DGSNAP=1 $(POPCNT_CFLAGS)
+gsnap_nosimd_CFLAGS = $(AM_CFLAGS) $(PTHREAD_CFLAGS) -DTARGET=\"$(target)\" -DGMAPDB=\"$(GMAPDB)\" -DMAX_STACK_READLENGTH=$(MAX_STACK_READLENGTH) -DGSNAP=1 $(POPCNT_CFLAGS)
gsnap_nosimd_LDFLAGS = $(AM_LDFLAGS) $(STATIC_LDFLAG)
gsnap_nosimd_LDADD = $(PTHREAD_LIBS) $(ZLIB_LIBS) $(BZLIB_LIBS)
dist_gsnap_nosimd_SOURCES = $(GSNAP_FILES)
gsnap_sse2_CC = $(PTHREAD_CC)
-gsnap_sse2_CFLAGS = $(AM_CFLAGS) $(PTHREAD_CFLAGS) -DTARGET=\"$(target)\" -DGMAPDB=\"$(GMAPDB)\" -DMAX_READLENGTH=$(MAX_READLENGTH) -DGSNAP=1 $(POPCNT_CFLAGS) -DHAVE_SSE2=1 $(SIMD_SSE2_CFLAGS)
+gsnap_sse2_CFLAGS = $(AM_CFLAGS) $(PTHREAD_CFLAGS) -DTARGET=\"$(target)\" -DGMAPDB=\"$(GMAPDB)\" -DMAX_STACK_READLENGTH=$(MAX_STACK_READLENGTH) -DGSNAP=1 $(POPCNT_CFLAGS) -DHAVE_SSE2=1 $(SIMD_SSE2_CFLAGS)
gsnap_sse2_LDFLAGS = $(AM_LDFLAGS) $(STATIC_LDFLAG)
gsnap_sse2_LDADD = $(PTHREAD_LIBS) $(ZLIB_LIBS) $(BZLIB_LIBS)
dist_gsnap_sse2_SOURCES = $(GSNAP_FILES)
gsnap_ssse3_CC = $(PTHREAD_CC)
-gsnap_ssse3_CFLAGS = $(AM_CFLAGS) $(PTHREAD_CFLAGS) -DTARGET=\"$(target)\" -DGMAPDB=\"$(GMAPDB)\" -DMAX_READLENGTH=$(MAX_READLENGTH) -DGSNAP=1 $(POPCNT_CFLAGS) -DHAVE_SSE2=1 -DHAVE_SSSE3=1 $(SIMD_SSSE3_CFLAGS)
+gsnap_ssse3_CFLAGS = $(AM_CFLAGS) $(PTHREAD_CFLAGS) -DTARGET=\"$(target)\" -DGMAPDB=\"$(GMAPDB)\" -DMAX_STACK_READLENGTH=$(MAX_STACK_READLENGTH) -DGSNAP=1 $(POPCNT_CFLAGS) -DHAVE_SSE2=1 -DHAVE_SSSE3=1 $(SIMD_SSSE3_CFLAGS)
gsnap_ssse3_LDFLAGS = $(AM_LDFLAGS) $(STATIC_LDFLAG)
gsnap_ssse3_LDADD = $(PTHREAD_LIBS) $(ZLIB_LIBS) $(BZLIB_LIBS)
dist_gsnap_ssse3_SOURCES = $(GSNAP_FILES)
gsnap_sse41_CC = $(PTHREAD_CC)
-gsnap_sse41_CFLAGS = $(AM_CFLAGS) $(PTHREAD_CFLAGS) -DTARGET=\"$(target)\" -DGMAPDB=\"$(GMAPDB)\" -DMAX_READLENGTH=$(MAX_READLENGTH) -DGSNAP=1 $(POPCNT_CFLAGS) -DHAVE_SSE2=1 -DHAVE_SSSE3=1 -DHAVE_SSE4_1=1 $(SIMD_SSE4_1_CFLAGS)
+gsnap_sse41_CFLAGS = $(AM_CFLAGS) $(PTHREAD_CFLAGS) -DTARGET=\"$(target)\" -DGMAPDB=\"$(GMAPDB)\" -DMAX_STACK_READLENGTH=$(MAX_STACK_READLENGTH) -DGSNAP=1 $(POPCNT_CFLAGS) -DHAVE_SSE2=1 -DHAVE_SSSE3=1 -DHAVE_SSE4_1=1 $(SIMD_SSE4_1_CFLAGS)
gsnap_sse41_LDFLAGS = $(AM_LDFLAGS) $(STATIC_LDFLAG)
gsnap_sse41_LDADD = $(PTHREAD_LIBS) $(ZLIB_LIBS) $(BZLIB_LIBS)
dist_gsnap_sse41_SOURCES = $(GSNAP_FILES)
gsnap_sse42_CC = $(PTHREAD_CC)
-gsnap_sse42_CFLAGS = $(AM_CFLAGS) $(PTHREAD_CFLAGS) -DTARGET=\"$(target)\" -DGMAPDB=\"$(GMAPDB)\" -DMAX_READLENGTH=$(MAX_READLENGTH) -DGSNAP=1 $(POPCNT_CFLAGS) -DHAVE_SSE2=1 -DHAVE_SSSE3=1 -DHAVE_SSE4_1=1 -DHAVE_SSE4_2=1 $(SIMD_SSE4_2_CFLAGS)
+gsnap_sse42_CFLAGS = $(AM_CFLAGS) $(PTHREAD_CFLAGS) -DTARGET=\"$(target)\" -DGMAPDB=\"$(GMAPDB)\" -DMAX_STACK_READLENGTH=$(MAX_STACK_READLENGTH) -DGSNAP=1 $(POPCNT_CFLAGS) -DHAVE_SSE2=1 -DHAVE_SSSE3=1 -DHAVE_SSE4_1=1 -DHAVE_SSE4_2=1 $(SIMD_SSE4_2_CFLAGS)
gsnap_sse42_LDFLAGS = $(AM_LDFLAGS) $(STATIC_LDFLAG)
gsnap_sse42_LDADD = $(PTHREAD_LIBS) $(ZLIB_LIBS) $(BZLIB_LIBS)
dist_gsnap_sse42_SOURCES = $(GSNAP_FILES)
gsnap_avx2_CC = $(PTHREAD_CC)
-gsnap_avx2_CFLAGS = $(AM_CFLAGS) $(PTHREAD_CFLAGS) -DTARGET=\"$(target)\" -DGMAPDB=\"$(GMAPDB)\" -DMAX_READLENGTH=$(MAX_READLENGTH) -DGSNAP=1 $(POPCNT_CFLAGS) -DHAVE_SSE2=1 -DHAVE_SSSE3=1 -DHAVE_SSE4_1=1 -DHAVE_SSE4_2=1 -DHAVE_AVX2=1 $(SIMD_AVX2_CFLAGS)
+gsnap_avx2_CFLAGS = $(AM_CFLAGS) $(PTHREAD_CFLAGS) -DTARGET=\"$(target)\" -DGMAPDB=\"$(GMAPDB)\" -DMAX_STACK_READLENGTH=$(MAX_STACK_READLENGTH) -DGSNAP=1 $(POPCNT_CFLAGS) -DHAVE_SSE2=1 -DHAVE_SSSE3=1 -DHAVE_SSE4_1=1 -DHAVE_SSE4_2=1 -DHAVE_AVX2=1 $(SIMD_AVX2_CFLAGS)
gsnap_avx2_LDFLAGS = $(AM_LDFLAGS) $(STATIC_LDFLAG)
gsnap_avx2_LDADD = $(PTHREAD_LIBS) $(ZLIB_LIBS) $(BZLIB_LIBS)
dist_gsnap_avx2_SOURCES = $(GSNAP_FILES)
@@ -362,37 +362,37 @@ GSNAPL_FILES = fopen.h bool.h types.h separator.h comp.h \
# Note: dist_ commands get read by bootstrap, and don't follow the flags
gsnapl_nosimd_CC = $(PTHREAD_CC)
-gsnapl_nosimd_CFLAGS = $(AM_CFLAGS) $(PTHREAD_CFLAGS) -DTARGET=\"$(target)\" -DGMAPDB=\"$(GMAPDB)\" -DMAX_READLENGTH=$(MAX_READLENGTH) -DGSNAP=1 -DLARGE_GENOMES=1 $(POPCNT_CFLAGS)
+gsnapl_nosimd_CFLAGS = $(AM_CFLAGS) $(PTHREAD_CFLAGS) -DTARGET=\"$(target)\" -DGMAPDB=\"$(GMAPDB)\" -DMAX_STACK_READLENGTH=$(MAX_STACK_READLENGTH) -DGSNAP=1 -DLARGE_GENOMES=1 $(POPCNT_CFLAGS)
gsnapl_nosimd_LDFLAGS = $(AM_LDFLAGS) $(STATIC_LDFLAG)
gsnapl_nosimd_LDADD = $(PTHREAD_LIBS) $(ZLIB_LIBS) $(BZLIB_LIBS)
dist_gsnapl_nosimd_SOURCES = $(GSNAPL_FILES)
gsnapl_sse2_CC = $(PTHREAD_CC)
-gsnapl_sse2_CFLAGS = $(AM_CFLAGS) $(PTHREAD_CFLAGS) -DTARGET=\"$(target)\" -DGMAPDB=\"$(GMAPDB)\" -DMAX_READLENGTH=$(MAX_READLENGTH) -DGSNAP=1 -DLARGE_GENOMES=1 $(POPCNT_CFLAGS) -DHAVE_SSE2=1 $(SIMD_SSE2_CFLAGS)
+gsnapl_sse2_CFLAGS = $(AM_CFLAGS) $(PTHREAD_CFLAGS) -DTARGET=\"$(target)\" -DGMAPDB=\"$(GMAPDB)\" -DMAX_STACK_READLENGTH=$(MAX_STACK_READLENGTH) -DGSNAP=1 -DLARGE_GENOMES=1 $(POPCNT_CFLAGS) -DHAVE_SSE2=1 $(SIMD_SSE2_CFLAGS)
gsnapl_sse2_LDFLAGS = $(AM_LDFLAGS) $(STATIC_LDFLAG)
gsnapl_sse2_LDADD = $(PTHREAD_LIBS) $(ZLIB_LIBS) $(BZLIB_LIBS)
dist_gsnapl_sse2_SOURCES = $(GSNAPL_FILES)
gsnapl_ssse3_CC = $(PTHREAD_CC)
-gsnapl_ssse3_CFLAGS = $(AM_CFLAGS) $(PTHREAD_CFLAGS) -DTARGET=\"$(target)\" -DGMAPDB=\"$(GMAPDB)\" -DMAX_READLENGTH=$(MAX_READLENGTH) -DGSNAP=1 -DLARGE_GENOMES=1 $(POPCNT_CFLAGS) -DHAVE_SSE2=1 -DHAVE_SSSE3=1 $(SIMD_SSSE3_CFLAGS)
+gsnapl_ssse3_CFLAGS = $(AM_CFLAGS) $(PTHREAD_CFLAGS) -DTARGET=\"$(target)\" -DGMAPDB=\"$(GMAPDB)\" -DMAX_STACK_READLENGTH=$(MAX_STACK_READLENGTH) -DGSNAP=1 -DLARGE_GENOMES=1 $(POPCNT_CFLAGS) -DHAVE_SSE2=1 -DHAVE_SSSE3=1 $(SIMD_SSSE3_CFLAGS)
gsnapl_ssse3_LDFLAGS = $(AM_LDFLAGS) $(STATIC_LDFLAG)
gsnapl_ssse3_LDADD = $(PTHREAD_LIBS) $(ZLIB_LIBS) $(BZLIB_LIBS)
dist_gsnapl_ssse3_SOURCES = $(GSNAPL_FILES)
gsnapl_sse41_CC = $(PTHREAD_CC)
-gsnapl_sse41_CFLAGS = $(AM_CFLAGS) $(PTHREAD_CFLAGS) -DTARGET=\"$(target)\" -DGMAPDB=\"$(GMAPDB)\" -DMAX_READLENGTH=$(MAX_READLENGTH) -DGSNAP=1 -DLARGE_GENOMES=1 $(POPCNT_CFLAGS) -DHAVE_SSE2=1 -DHAVE_SSSE3=1 -DHAVE_SSE4_1=1 $(SIMD_SSE4_1_CFLAGS)
+gsnapl_sse41_CFLAGS = $(AM_CFLAGS) $(PTHREAD_CFLAGS) -DTARGET=\"$(target)\" -DGMAPDB=\"$(GMAPDB)\" -DMAX_STACK_READLENGTH=$(MAX_STACK_READLENGTH) -DGSNAP=1 -DLARGE_GENOMES=1 $(POPCNT_CFLAGS) -DHAVE_SSE2=1 -DHAVE_SSSE3=1 -DHAVE_SSE4_1=1 $(SIMD_SSE4_1_CFLAGS)
gsnapl_sse41_LDFLAGS = $(AM_LDFLAGS) $(STATIC_LDFLAG)
gsnapl_sse41_LDADD = $(PTHREAD_LIBS) $(ZLIB_LIBS) $(BZLIB_LIBS)
dist_gsnapl_sse41_SOURCES = $(GSNAPL_FILES)
gsnapl_sse42_CC = $(PTHREAD_CC)
-gsnapl_sse42_CFLAGS = $(AM_CFLAGS) $(PTHREAD_CFLAGS) -DTARGET=\"$(target)\" -DGMAPDB=\"$(GMAPDB)\" -DMAX_READLENGTH=$(MAX_READLENGTH) -DGSNAP=1 -DLARGE_GENOMES=1 $(POPCNT_CFLAGS) -DHAVE_SSE2=1 -DHAVE_SSSE3=1 -DHAVE_SSE4_1=1 -DHAVE_SSE4_2=1 $(SIMD_SSE4_2_CFLAGS)
+gsnapl_sse42_CFLAGS = $(AM_CFLAGS) $(PTHREAD_CFLAGS) -DTARGET=\"$(target)\" -DGMAPDB=\"$(GMAPDB)\" -DMAX_STACK_READLENGTH=$(MAX_STACK_READLENGTH) -DGSNAP=1 -DLARGE_GENOMES=1 $(POPCNT_CFLAGS) -DHAVE_SSE2=1 -DHAVE_SSSE3=1 -DHAVE_SSE4_1=1 -DHAVE_SSE4_2=1 $(SIMD_SSE4_2_CFLAGS)
gsnapl_sse42_LDFLAGS = $(AM_LDFLAGS) $(STATIC_LDFLAG)
gsnapl_sse42_LDADD = $(PTHREAD_LIBS) $(ZLIB_LIBS) $(BZLIB_LIBS)
dist_gsnapl_sse42_SOURCES = $(GSNAPL_FILES)
gsnapl_avx2_CC = $(PTHREAD_CC)
-gsnapl_avx2_CFLAGS = $(AM_CFLAGS) $(PTHREAD_CFLAGS) -DTARGET=\"$(target)\" -DGMAPDB=\"$(GMAPDB)\" -DMAX_READLENGTH=$(MAX_READLENGTH) -DGSNAP=1 -DLARGE_GENOMES=1 $(POPCNT_CFLAGS) -DHAVE_SSE2=1 -DHAVE_SSSE3=1 -DHAVE_SSE4_1=1 -DHAVE_SSE4_2=1 -DHAVE_AVX2=1 $(SIMD_AVX2_CFLAGS)
+gsnapl_avx2_CFLAGS = $(AM_CFLAGS) $(PTHREAD_CFLAGS) -DTARGET=\"$(target)\" -DGMAPDB=\"$(GMAPDB)\" -DMAX_STACK_READLENGTH=$(MAX_STACK_READLENGTH) -DGSNAP=1 -DLARGE_GENOMES=1 $(POPCNT_CFLAGS) -DHAVE_SSE2=1 -DHAVE_SSSE3=1 -DHAVE_SSE4_1=1 -DHAVE_SSE4_2=1 -DHAVE_AVX2=1 $(SIMD_AVX2_CFLAGS)
gsnapl_avx2_LDFLAGS = $(AM_LDFLAGS) $(STATIC_LDFLAG)
gsnapl_avx2_LDADD = $(PTHREAD_LIBS) $(ZLIB_LIBS) $(BZLIB_LIBS)
dist_gsnapl_avx2_SOURCES = $(GSNAPL_FILES)
@@ -436,7 +436,7 @@ UNIQSCAN_FILES = fopen.h bool.h types.h separator.h comp.h \
getopt.c getopt1.c getopt.h uniqscan.c
uniqscan_CC = $(PTHREAD_CC)
-uniqscan_CFLAGS = $(AM_CFLAGS) $(PTHREAD_CFLAGS) -DTARGET=\"$(target)\" -DGMAPDB=\"$(GMAPDB)\" -DMAX_READLENGTH=$(MAX_READLENGTH) -DGSNAP=1 $(POPCNT_CFLAGS)
+uniqscan_CFLAGS = $(AM_CFLAGS) $(PTHREAD_CFLAGS) -DTARGET=\"$(target)\" -DGMAPDB=\"$(GMAPDB)\" -DMAX_STACK_READLENGTH=$(MAX_STACK_READLENGTH) -DGSNAP=1 $(POPCNT_CFLAGS)
uniqscan_LDFLAGS = $(AM_LDFLAGS) $(STATIC_LDFLAG)
uniqscan_LDADD = $(PTHREAD_LIBS) $(ZLIB_LIBS) $(BZLIB_LIBS)
@@ -478,7 +478,7 @@ UNIQSCANL_FILES = fopen.h bool.h types.h separator.h comp.h \
getopt.c getopt1.c getopt.h uniqscan.c
uniqscanl_CC = $(PTHREAD_CC)
-uniqscanl_CFLAGS = $(AM_CFLAGS) $(PTHREAD_CFLAGS) -DTARGET=\"$(target)\" -DGMAPDB=\"$(GMAPDB)\" -DMAX_READLENGTH=$(MAX_READLENGTH) -DGSNAP=1 -DLARGE_GENOMES=1 $(POPCNT_CFLAGS)
+uniqscanl_CFLAGS = $(AM_CFLAGS) $(PTHREAD_CFLAGS) -DTARGET=\"$(target)\" -DGMAPDB=\"$(GMAPDB)\" -DMAX_STACK_READLENGTH=$(MAX_STACK_READLENGTH) -DGSNAP=1 -DLARGE_GENOMES=1 $(POPCNT_CFLAGS)
uniqscanl_LDFLAGS = $(AM_LDFLAGS) $(STATIC_LDFLAG)
uniqscanl_LDADD = $(PTHREAD_LIBS) $(ZLIB_LIBS) $(BZLIB_LIBS)
@@ -702,13 +702,13 @@ dist_sam_sort_SOURCES = $(SAM_SORT_FILES)
# intlistdef.h intlist.c intlist.h listdef.h list.c list.h \
# univinterval.c univinterval.h interval.c interval.h \
# uintlist.c uintlist.h \
-# chrom.c chrom.h stopwatch.c stopwatch.h access.c access.h \
+# chrom.c chrom.h stopwatch.c stopwatch.h semaphore.c semaphore.h access.c access.h \
# iit-read-univ.c iit-read-univ.h iitdef.h iit-read.c iit-read.h \
# filestring.c filestring.h \
# md5.c md5.h complement.h bzip2.c bzip2.h sequence.c sequence.h \
# genome.c genome.h \
# genomicpos.c genomicpos.h \
-# chrnum.c chrnum.h chrsubset.c chrsubset.h \
+# chrnum.c chrnum.h \
# maxent.c maxent.h \
# branchpoint.c branchpoint.h \
# parserange.c parserange.h datadir.c datadir.h getopt.c getopt1.c getopt.h splicing-score.c
diff --git a/src/Makefile.in b/src/Makefile.in
index 29d2b96..e3e7262 100644
--- a/src/Makefile.in
+++ b/src/Makefile.in
@@ -2190,7 +2190,7 @@ am__define_uniq_tagged_files = \
ETAGS = etags
CTAGS = ctags
am__DIST_COMMON = $(srcdir)/Makefile.in $(srcdir)/config.h.in \
- $(top_srcdir)/config/depcomp
+ $(top_srcdir)/config/depcomp ChangeLog compile
DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
ACLOCAL = @ACLOCAL@
ALLOCA = @ALLOCA@
@@ -2236,9 +2236,10 @@ LIPO = @LIPO@
LN_S = @LN_S@
LTLIBOBJS = @LTLIBOBJS@
LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@
+MAINT = @MAINT@
MAKEINFO = @MAKEINFO@
MANIFEST_TOOL = @MANIFEST_TOOL@
-MAX_READLENGTH = @MAX_READLENGTH@
+MAX_STACK_READLENGTH = @MAX_STACK_READLENGTH@
MKDIR_P = @MKDIR_P@
MPICC = @MPICC@
MPILIBS = @MPILIBS@
@@ -2548,32 +2549,32 @@ GSNAP_FILES = fopen.h bool.h types.h separator.h comp.h \
# Note: dist_ commands get read by bootstrap, and don't follow the flags
# Previously included -lrt for shm_open, but we are not calling that
gsnap_nosimd_CC = $(PTHREAD_CC)
-gsnap_nosimd_CFLAGS = $(AM_CFLAGS) $(PTHREAD_CFLAGS) -DTARGET=\"$(target)\" -DGMAPDB=\"$(GMAPDB)\" -DMAX_READLENGTH=$(MAX_READLENGTH) -DGSNAP=1 $(POPCNT_CFLAGS)
+gsnap_nosimd_CFLAGS = $(AM_CFLAGS) $(PTHREAD_CFLAGS) -DTARGET=\"$(target)\" -DGMAPDB=\"$(GMAPDB)\" -DMAX_STACK_READLENGTH=$(MAX_STACK_READLENGTH) -DGSNAP=1 $(POPCNT_CFLAGS)
gsnap_nosimd_LDFLAGS = $(AM_LDFLAGS) $(STATIC_LDFLAG)
gsnap_nosimd_LDADD = $(PTHREAD_LIBS) $(ZLIB_LIBS) $(BZLIB_LIBS)
dist_gsnap_nosimd_SOURCES = $(GSNAP_FILES)
gsnap_sse2_CC = $(PTHREAD_CC)
-gsnap_sse2_CFLAGS = $(AM_CFLAGS) $(PTHREAD_CFLAGS) -DTARGET=\"$(target)\" -DGMAPDB=\"$(GMAPDB)\" -DMAX_READLENGTH=$(MAX_READLENGTH) -DGSNAP=1 $(POPCNT_CFLAGS) -DHAVE_SSE2=1 $(SIMD_SSE2_CFLAGS)
+gsnap_sse2_CFLAGS = $(AM_CFLAGS) $(PTHREAD_CFLAGS) -DTARGET=\"$(target)\" -DGMAPDB=\"$(GMAPDB)\" -DMAX_STACK_READLENGTH=$(MAX_STACK_READLENGTH) -DGSNAP=1 $(POPCNT_CFLAGS) -DHAVE_SSE2=1 $(SIMD_SSE2_CFLAGS)
gsnap_sse2_LDFLAGS = $(AM_LDFLAGS) $(STATIC_LDFLAG)
gsnap_sse2_LDADD = $(PTHREAD_LIBS) $(ZLIB_LIBS) $(BZLIB_LIBS)
dist_gsnap_sse2_SOURCES = $(GSNAP_FILES)
gsnap_ssse3_CC = $(PTHREAD_CC)
-gsnap_ssse3_CFLAGS = $(AM_CFLAGS) $(PTHREAD_CFLAGS) -DTARGET=\"$(target)\" -DGMAPDB=\"$(GMAPDB)\" -DMAX_READLENGTH=$(MAX_READLENGTH) -DGSNAP=1 $(POPCNT_CFLAGS) -DHAVE_SSE2=1 -DHAVE_SSSE3=1 $(SIMD_SSSE3_CFLAGS)
+gsnap_ssse3_CFLAGS = $(AM_CFLAGS) $(PTHREAD_CFLAGS) -DTARGET=\"$(target)\" -DGMAPDB=\"$(GMAPDB)\" -DMAX_STACK_READLENGTH=$(MAX_STACK_READLENGTH) -DGSNAP=1 $(POPCNT_CFLAGS) -DHAVE_SSE2=1 -DHAVE_SSSE3=1 $(SIMD_SSSE3_CFLAGS)
gsnap_ssse3_LDFLAGS = $(AM_LDFLAGS) $(STATIC_LDFLAG)
gsnap_ssse3_LDADD = $(PTHREAD_LIBS) $(ZLIB_LIBS) $(BZLIB_LIBS)
dist_gsnap_ssse3_SOURCES = $(GSNAP_FILES)
gsnap_sse41_CC = $(PTHREAD_CC)
-gsnap_sse41_CFLAGS = $(AM_CFLAGS) $(PTHREAD_CFLAGS) -DTARGET=\"$(target)\" -DGMAPDB=\"$(GMAPDB)\" -DMAX_READLENGTH=$(MAX_READLENGTH) -DGSNAP=1 $(POPCNT_CFLAGS) -DHAVE_SSE2=1 -DHAVE_SSSE3=1 -DHAVE_SSE4_1=1 $(SIMD_SSE4_1_CFLAGS)
+gsnap_sse41_CFLAGS = $(AM_CFLAGS) $(PTHREAD_CFLAGS) -DTARGET=\"$(target)\" -DGMAPDB=\"$(GMAPDB)\" -DMAX_STACK_READLENGTH=$(MAX_STACK_READLENGTH) -DGSNAP=1 $(POPCNT_CFLAGS) -DHAVE_SSE2=1 -DHAVE_SSSE3=1 -DHAVE_SSE4_1=1 $(SIMD_SSE4_1_CFLAGS)
gsnap_sse41_LDFLAGS = $(AM_LDFLAGS) $(STATIC_LDFLAG)
gsnap_sse41_LDADD = $(PTHREAD_LIBS) $(ZLIB_LIBS) $(BZLIB_LIBS)
dist_gsnap_sse41_SOURCES = $(GSNAP_FILES)
gsnap_sse42_CC = $(PTHREAD_CC)
-gsnap_sse42_CFLAGS = $(AM_CFLAGS) $(PTHREAD_CFLAGS) -DTARGET=\"$(target)\" -DGMAPDB=\"$(GMAPDB)\" -DMAX_READLENGTH=$(MAX_READLENGTH) -DGSNAP=1 $(POPCNT_CFLAGS) -DHAVE_SSE2=1 -DHAVE_SSSE3=1 -DHAVE_SSE4_1=1 -DHAVE_SSE4_2=1 $(SIMD_SSE4_2_CFLAGS)
+gsnap_sse42_CFLAGS = $(AM_CFLAGS) $(PTHREAD_CFLAGS) -DTARGET=\"$(target)\" -DGMAPDB=\"$(GMAPDB)\" -DMAX_STACK_READLENGTH=$(MAX_STACK_READLENGTH) -DGSNAP=1 $(POPCNT_CFLAGS) -DHAVE_SSE2=1 -DHAVE_SSSE3=1 -DHAVE_SSE4_1=1 -DHAVE_SSE4_2=1 $(SIMD_SSE4_2_CFLAGS)
gsnap_sse42_LDFLAGS = $(AM_LDFLAGS) $(STATIC_LDFLAG)
gsnap_sse42_LDADD = $(PTHREAD_LIBS) $(ZLIB_LIBS) $(BZLIB_LIBS)
dist_gsnap_sse42_SOURCES = $(GSNAP_FILES)
gsnap_avx2_CC = $(PTHREAD_CC)
-gsnap_avx2_CFLAGS = $(AM_CFLAGS) $(PTHREAD_CFLAGS) -DTARGET=\"$(target)\" -DGMAPDB=\"$(GMAPDB)\" -DMAX_READLENGTH=$(MAX_READLENGTH) -DGSNAP=1 $(POPCNT_CFLAGS) -DHAVE_SSE2=1 -DHAVE_SSSE3=1 -DHAVE_SSE4_1=1 -DHAVE_SSE4_2=1 -DHAVE_AVX2=1 $(SIMD_AVX2_CFLAGS)
+gsnap_avx2_CFLAGS = $(AM_CFLAGS) $(PTHREAD_CFLAGS) -DTARGET=\"$(target)\" -DGMAPDB=\"$(GMAPDB)\" -DMAX_STACK_READLENGTH=$(MAX_STACK_READLENGTH) -DGSNAP=1 $(POPCNT_CFLAGS) -DHAVE_SSE2=1 -DHAVE_SSSE3=1 -DHAVE_SSE4_1=1 -DHAVE_SSE4_2=1 -DHAVE_AVX2=1 $(SIMD_AVX2_CFLAGS)
gsnap_avx2_LDFLAGS = $(AM_LDFLAGS) $(STATIC_LDFLAG)
gsnap_avx2_LDADD = $(PTHREAD_LIBS) $(ZLIB_LIBS) $(BZLIB_LIBS)
dist_gsnap_avx2_SOURCES = $(GSNAP_FILES)
@@ -2617,32 +2618,32 @@ GSNAPL_FILES = fopen.h bool.h types.h separator.h comp.h \
# Note: dist_ commands get read by bootstrap, and don't follow the flags
gsnapl_nosimd_CC = $(PTHREAD_CC)
-gsnapl_nosimd_CFLAGS = $(AM_CFLAGS) $(PTHREAD_CFLAGS) -DTARGET=\"$(target)\" -DGMAPDB=\"$(GMAPDB)\" -DMAX_READLENGTH=$(MAX_READLENGTH) -DGSNAP=1 -DLARGE_GENOMES=1 $(POPCNT_CFLAGS)
+gsnapl_nosimd_CFLAGS = $(AM_CFLAGS) $(PTHREAD_CFLAGS) -DTARGET=\"$(target)\" -DGMAPDB=\"$(GMAPDB)\" -DMAX_STACK_READLENGTH=$(MAX_STACK_READLENGTH) -DGSNAP=1 -DLARGE_GENOMES=1 $(POPCNT_CFLAGS)
gsnapl_nosimd_LDFLAGS = $(AM_LDFLAGS) $(STATIC_LDFLAG)
gsnapl_nosimd_LDADD = $(PTHREAD_LIBS) $(ZLIB_LIBS) $(BZLIB_LIBS)
dist_gsnapl_nosimd_SOURCES = $(GSNAPL_FILES)
gsnapl_sse2_CC = $(PTHREAD_CC)
-gsnapl_sse2_CFLAGS = $(AM_CFLAGS) $(PTHREAD_CFLAGS) -DTARGET=\"$(target)\" -DGMAPDB=\"$(GMAPDB)\" -DMAX_READLENGTH=$(MAX_READLENGTH) -DGSNAP=1 -DLARGE_GENOMES=1 $(POPCNT_CFLAGS) -DHAVE_SSE2=1 $(SIMD_SSE2_CFLAGS)
+gsnapl_sse2_CFLAGS = $(AM_CFLAGS) $(PTHREAD_CFLAGS) -DTARGET=\"$(target)\" -DGMAPDB=\"$(GMAPDB)\" -DMAX_STACK_READLENGTH=$(MAX_STACK_READLENGTH) -DGSNAP=1 -DLARGE_GENOMES=1 $(POPCNT_CFLAGS) -DHAVE_SSE2=1 $(SIMD_SSE2_CFLAGS)
gsnapl_sse2_LDFLAGS = $(AM_LDFLAGS) $(STATIC_LDFLAG)
gsnapl_sse2_LDADD = $(PTHREAD_LIBS) $(ZLIB_LIBS) $(BZLIB_LIBS)
dist_gsnapl_sse2_SOURCES = $(GSNAPL_FILES)
gsnapl_ssse3_CC = $(PTHREAD_CC)
-gsnapl_ssse3_CFLAGS = $(AM_CFLAGS) $(PTHREAD_CFLAGS) -DTARGET=\"$(target)\" -DGMAPDB=\"$(GMAPDB)\" -DMAX_READLENGTH=$(MAX_READLENGTH) -DGSNAP=1 -DLARGE_GENOMES=1 $(POPCNT_CFLAGS) -DHAVE_SSE2=1 -DHAVE_SSSE3=1 $(SIMD_SSSE3_CFLAGS)
+gsnapl_ssse3_CFLAGS = $(AM_CFLAGS) $(PTHREAD_CFLAGS) -DTARGET=\"$(target)\" -DGMAPDB=\"$(GMAPDB)\" -DMAX_STACK_READLENGTH=$(MAX_STACK_READLENGTH) -DGSNAP=1 -DLARGE_GENOMES=1 $(POPCNT_CFLAGS) -DHAVE_SSE2=1 -DHAVE_SSSE3=1 $(SIMD_SSSE3_CFLAGS)
gsnapl_ssse3_LDFLAGS = $(AM_LDFLAGS) $(STATIC_LDFLAG)
gsnapl_ssse3_LDADD = $(PTHREAD_LIBS) $(ZLIB_LIBS) $(BZLIB_LIBS)
dist_gsnapl_ssse3_SOURCES = $(GSNAPL_FILES)
gsnapl_sse41_CC = $(PTHREAD_CC)
-gsnapl_sse41_CFLAGS = $(AM_CFLAGS) $(PTHREAD_CFLAGS) -DTARGET=\"$(target)\" -DGMAPDB=\"$(GMAPDB)\" -DMAX_READLENGTH=$(MAX_READLENGTH) -DGSNAP=1 -DLARGE_GENOMES=1 $(POPCNT_CFLAGS) -DHAVE_SSE2=1 -DHAVE_SSSE3=1 -DHAVE_SSE4_1=1 $(SIMD_SSE4_1_CFLAGS)
+gsnapl_sse41_CFLAGS = $(AM_CFLAGS) $(PTHREAD_CFLAGS) -DTARGET=\"$(target)\" -DGMAPDB=\"$(GMAPDB)\" -DMAX_STACK_READLENGTH=$(MAX_STACK_READLENGTH) -DGSNAP=1 -DLARGE_GENOMES=1 $(POPCNT_CFLAGS) -DHAVE_SSE2=1 -DHAVE_SSSE3=1 -DHAVE_SSE4_1=1 $(SIMD_SSE4_1_CFLAGS)
gsnapl_sse41_LDFLAGS = $(AM_LDFLAGS) $(STATIC_LDFLAG)
gsnapl_sse41_LDADD = $(PTHREAD_LIBS) $(ZLIB_LIBS) $(BZLIB_LIBS)
dist_gsnapl_sse41_SOURCES = $(GSNAPL_FILES)
gsnapl_sse42_CC = $(PTHREAD_CC)
-gsnapl_sse42_CFLAGS = $(AM_CFLAGS) $(PTHREAD_CFLAGS) -DTARGET=\"$(target)\" -DGMAPDB=\"$(GMAPDB)\" -DMAX_READLENGTH=$(MAX_READLENGTH) -DGSNAP=1 -DLARGE_GENOMES=1 $(POPCNT_CFLAGS) -DHAVE_SSE2=1 -DHAVE_SSSE3=1 -DHAVE_SSE4_1=1 -DHAVE_SSE4_2=1 $(SIMD_SSE4_2_CFLAGS)
+gsnapl_sse42_CFLAGS = $(AM_CFLAGS) $(PTHREAD_CFLAGS) -DTARGET=\"$(target)\" -DGMAPDB=\"$(GMAPDB)\" -DMAX_STACK_READLENGTH=$(MAX_STACK_READLENGTH) -DGSNAP=1 -DLARGE_GENOMES=1 $(POPCNT_CFLAGS) -DHAVE_SSE2=1 -DHAVE_SSSE3=1 -DHAVE_SSE4_1=1 -DHAVE_SSE4_2=1 $(SIMD_SSE4_2_CFLAGS)
gsnapl_sse42_LDFLAGS = $(AM_LDFLAGS) $(STATIC_LDFLAG)
gsnapl_sse42_LDADD = $(PTHREAD_LIBS) $(ZLIB_LIBS) $(BZLIB_LIBS)
dist_gsnapl_sse42_SOURCES = $(GSNAPL_FILES)
gsnapl_avx2_CC = $(PTHREAD_CC)
-gsnapl_avx2_CFLAGS = $(AM_CFLAGS) $(PTHREAD_CFLAGS) -DTARGET=\"$(target)\" -DGMAPDB=\"$(GMAPDB)\" -DMAX_READLENGTH=$(MAX_READLENGTH) -DGSNAP=1 -DLARGE_GENOMES=1 $(POPCNT_CFLAGS) -DHAVE_SSE2=1 -DHAVE_SSSE3=1 -DHAVE_SSE4_1=1 -DHAVE_SSE4_2=1 -DHAVE_AVX2=1 $(SIMD_AVX2_CFLAGS)
+gsnapl_avx2_CFLAGS = $(AM_CFLAGS) $(PTHREAD_CFLAGS) -DTARGET=\"$(target)\" -DGMAPDB=\"$(GMAPDB)\" -DMAX_STACK_READLENGTH=$(MAX_STACK_READLENGTH) -DGSNAP=1 -DLARGE_GENOMES=1 $(POPCNT_CFLAGS) -DHAVE_SSE2=1 -DHAVE_SSSE3=1 -DHAVE_SSE4_1=1 -DHAVE_SSE4_2=1 -DHAVE_AVX2=1 $(SIMD_AVX2_CFLAGS)
gsnapl_avx2_LDFLAGS = $(AM_LDFLAGS) $(STATIC_LDFLAG)
gsnapl_avx2_LDADD = $(PTHREAD_LIBS) $(ZLIB_LIBS) $(BZLIB_LIBS)
dist_gsnapl_avx2_SOURCES = $(GSNAPL_FILES)
@@ -2685,7 +2686,7 @@ UNIQSCAN_FILES = fopen.h bool.h types.h separator.h comp.h \
getopt.c getopt1.c getopt.h uniqscan.c
uniqscan_CC = $(PTHREAD_CC)
-uniqscan_CFLAGS = $(AM_CFLAGS) $(PTHREAD_CFLAGS) -DTARGET=\"$(target)\" -DGMAPDB=\"$(GMAPDB)\" -DMAX_READLENGTH=$(MAX_READLENGTH) -DGSNAP=1 $(POPCNT_CFLAGS)
+uniqscan_CFLAGS = $(AM_CFLAGS) $(PTHREAD_CFLAGS) -DTARGET=\"$(target)\" -DGMAPDB=\"$(GMAPDB)\" -DMAX_STACK_READLENGTH=$(MAX_STACK_READLENGTH) -DGSNAP=1 $(POPCNT_CFLAGS)
uniqscan_LDFLAGS = $(AM_LDFLAGS) $(STATIC_LDFLAG)
uniqscan_LDADD = $(PTHREAD_LIBS) $(ZLIB_LIBS) $(BZLIB_LIBS)
dist_uniqscan_SOURCES = $(UNIQSCAN_FILES)
@@ -2724,7 +2725,7 @@ UNIQSCANL_FILES = fopen.h bool.h types.h separator.h comp.h \
getopt.c getopt1.c getopt.h uniqscan.c
uniqscanl_CC = $(PTHREAD_CC)
-uniqscanl_CFLAGS = $(AM_CFLAGS) $(PTHREAD_CFLAGS) -DTARGET=\"$(target)\" -DGMAPDB=\"$(GMAPDB)\" -DMAX_READLENGTH=$(MAX_READLENGTH) -DGSNAP=1 -DLARGE_GENOMES=1 $(POPCNT_CFLAGS)
+uniqscanl_CFLAGS = $(AM_CFLAGS) $(PTHREAD_CFLAGS) -DTARGET=\"$(target)\" -DGMAPDB=\"$(GMAPDB)\" -DMAX_STACK_READLENGTH=$(MAX_STACK_READLENGTH) -DGSNAP=1 -DLARGE_GENOMES=1 $(POPCNT_CFLAGS)
uniqscanl_LDFLAGS = $(AM_LDFLAGS) $(STATIC_LDFLAG)
uniqscanl_LDADD = $(PTHREAD_LIBS) $(ZLIB_LIBS) $(BZLIB_LIBS)
dist_uniqscanl_SOURCES = $(UNIQSCANL_FILES)
@@ -2928,7 +2929,7 @@ all: config.h
.SUFFIXES:
.SUFFIXES: .c .lo .o .obj
-$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps)
+$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps)
@for dep in $?; do \
case '$(am__configure_deps)' in \
*$$dep*) \
@@ -2952,9 +2953,9 @@ Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
-$(top_srcdir)/configure: $(am__configure_deps)
+$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps)
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
-$(ACLOCAL_M4): $(am__aclocal_m4_deps)
+$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps)
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
$(am__aclocal_m4_deps):
@@ -2965,7 +2966,7 @@ config.h: stamp-h1
stamp-h1: $(srcdir)/config.h.in $(top_builddir)/config.status
@rm -f stamp-h1
cd $(top_builddir) && $(SHELL) ./config.status src/config.h
-$(srcdir)/config.h.in: $(am__configure_deps)
+$(srcdir)/config.h.in: @MAINTAINER_MODE_TRUE@ $(am__configure_deps)
($(am__cd) $(top_srcdir) && $(AUTOHEADER))
rm -f stamp-h1
touch $@
@@ -41486,13 +41487,13 @@ uninstall-am: uninstall-binPROGRAMS
# intlistdef.h intlist.c intlist.h listdef.h list.c list.h \
# univinterval.c univinterval.h interval.c interval.h \
# uintlist.c uintlist.h \
-# chrom.c chrom.h stopwatch.c stopwatch.h access.c access.h \
+# chrom.c chrom.h stopwatch.c stopwatch.h semaphore.c semaphore.h access.c access.h \
# iit-read-univ.c iit-read-univ.h iitdef.h iit-read.c iit-read.h \
# filestring.c filestring.h \
# md5.c md5.h complement.h bzip2.c bzip2.h sequence.c sequence.h \
# genome.c genome.h \
# genomicpos.c genomicpos.h \
-# chrnum.c chrnum.h chrsubset.c chrsubset.h \
+# chrnum.c chrnum.h \
# maxent.c maxent.h \
# branchpoint.c branchpoint.h \
# parserange.c parserange.h datadir.c datadir.h getopt.c getopt1.c getopt.h splicing-score.c
diff --git a/src/atoi.c b/src/atoi.c
index 386c4a1..d5b4d15 100644
--- a/src/atoi.c
+++ b/src/atoi.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: atoi.c 195988 2016-08-08 19:29:00Z twu $";
+static char rcsid[] = "$Id: atoi.c 195989 2016-08-08 21:42:24Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
diff --git a/src/bytecoding.c b/src/bytecoding.c
index d88d102..fd2bfcc 100644
--- a/src/bytecoding.c
+++ b/src/bytecoding.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: bytecoding.c 179281 2015-11-20 00:10:35Z twu $";
+static char rcsid[] = "$Id: bytecoding.c 196402 2016-08-16 14:29:06Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -653,6 +653,8 @@ Bytecoding_read (UINT4 key, unsigned char *bytes, UINT4 *exceptions, int nexcept
/* return exceptions[highi + 1]; */
fprintf(stderr,"Bytecoding_read should have found index %u as an exception, but failed\n",key);
+ fprintf(stderr,"One possible cause is a corrupted shared memory segment, if GSNAP has exited abnormally\n");
+ fprintf(stderr,"Please do 'ipcs -m' and then 'ipcrm -m' on each of those segments\n");
abort();
}
}
@@ -712,6 +714,8 @@ Bytecoding_read_wguide (UINT4 key, unsigned char *bytes, UINT4 *guide, UINT4 *ex
/* return exceptions[highi + 1]; */
fprintf(stderr,"Bytecoding_read_wguide should have found index %u as an exception, but failed\n",key);
+ fprintf(stderr,"One possible cause is a corrupted shared memory segment, if GSNAP has exited abnormally\n");
+ fprintf(stderr,"Please do 'ipcs -m' and then 'ipcrm -m' on each of those segments\n");
abort();
}
}
@@ -766,6 +770,8 @@ Bytecoding_lcpchilddc_lcp (UINT4 key, unsigned char *bytes, UINT4 *exceptions, i
/* return exceptions[highi + 1]; */
fprintf(stderr,"Bytecoding_lcp should have found index %u as an exception, but failed\n",key);
+ fprintf(stderr,"One possible cause is a corrupted shared memory segment, if GSNAP has exited abnormally\n");
+ fprintf(stderr,"Please do 'ipcs -m' and then 'ipcrm -m' on each of those segments\n");
abort();
}
}
@@ -847,6 +853,8 @@ Bytecoding_lcpchilddc_child_up (UINT4 key, unsigned char *bytes, UINT4 *guide, U
/* return exceptions[highi + 1]; */
fprintf(stderr,"Bytecoding_lcpchilddc_child_up should have found index %u as an exception, but failed\n",key);
+ fprintf(stderr,"One possible cause is a corrupted shared memory segment, if GSNAP has exited abnormally\n");
+ fprintf(stderr,"Please do 'ipcs -m' and then 'ipcrm -m' on each of those segments\n");
abort();
}
}
@@ -908,6 +916,8 @@ Bytecoding_lcpchilddc_child_next (UINT4 key, unsigned char *bytes, UINT4 *guide,
/* return exceptions[highi + 1]; */
fprintf(stderr,"Bytecoding_lcpchilddc_child_next should have found index %u as an exception, but failed\n",key);
+ fprintf(stderr,"One possible cause is a corrupted shared memory segment, if GSNAP has exited abnormally\n");
+ fprintf(stderr,"Please do 'ipcs -m' and then 'ipcrm -m' on each of those segments\n");
abort();
}
}
@@ -975,6 +985,8 @@ Bytecoding_lcpchilddc_lcp_next (UINT4 *child_next, UINT4 key, unsigned char *byt
/* return exceptions[highi + 1]; */
fprintf(stderr,"Bytecoding_lcpchilddc_lcp_next should have found index %u as an exception, but failed\n",key);
+ fprintf(stderr,"One possible cause is a corrupted shared memory segment, if GSNAP has exited abnormally\n");
+ fprintf(stderr,"Please do 'ipcs -m' and then 'ipcrm -m' on each of those segments\n");
abort();
}
}
@@ -1046,6 +1058,8 @@ Bytecoding_lcpchilddcn_child_up (bool *nextp, UINT4 key, unsigned char *bytes, U
/* return exceptions[highi + 1]; */
fprintf(stderr,"Bytecoding_lcpchilddcn_child_up should have found index %u as an exception, but failed\n",key);
+ fprintf(stderr,"One possible cause is a corrupted shared memory segment, if GSNAP has exited abnormally\n");
+ fprintf(stderr,"Please do 'ipcs -m' and then 'ipcrm -m' on each of those segments\n");
abort();
}
}
@@ -1118,6 +1132,8 @@ Bytecoding_lcpchilddcn_child_next (bool *nextp, UINT4 key, unsigned char *bytes,
/* return exceptions[highi + 1]; */
fprintf(stderr,"Bytecoding_lcpchilddcn_child_next should have found index %u as an exception, but failed\n",key);
+ fprintf(stderr,"One possible cause is a corrupted shared memory segment, if GSNAP has exited abnormally\n");
+ fprintf(stderr,"Please do 'ipcs -m' and then 'ipcrm -m' on each of those segments\n");
abort();
}
}
diff --git a/src/cmet.c b/src/cmet.c
index ea9f6f5..8eb3aab 100644
--- a/src/cmet.c
+++ b/src/cmet.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: cmet.c 195988 2016-08-08 19:29:00Z twu $";
+static char rcsid[] = "$Id: cmet.c 195989 2016-08-08 21:42:24Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
diff --git a/src/comp.h b/src/comp.h
index 5115764..5d91cee 100644
--- a/src/comp.h
+++ b/src/comp.h
@@ -1,4 +1,4 @@
-/* $Id: comp.h 195763 2016-08-04 01:37:20Z twu $ */
+/* $Id: comp.h 195548 2016-08-02 17:18:50Z twu $ */
#ifndef COMP_INCLUDED
#define COMP_INCLUDED
diff --git a/src/compile b/src/compile
new file mode 100644
index 0000000..51522be
--- /dev/null
+++ b/src/compile
@@ -0,0 +1,165 @@
+-*- mode: compilation; default-directory: "~/bioinfo/gmap/trunk/src/" -*-
+Compilation started at Mon Dec 14 14:13:20
+
+make -k gsnap.sse42
+/gne/home/twu/bin/gcc -DHAVE_CONFIG_H -I. -pthread -DTARGET=\"x86_64-unknown-linux-gnu\" -DGMAPDB=\"/gne/research/data/bioinfo/gmap/data/genomes\" -DMAX_READLENGTH=300 -DGSNAP=1 -DHAVE_SSE2=1 -DHAVE_SSSE3=1 -DHAVE_SSE4_1=1 -DHAVE_SSE4_2=1 -msse2 -mssse3 -msse4.1 -msse4.2 -mpopcnt -g -Wall -Wextra -DCHECK_ASSERTIONS=1 -MT gsnap_sse42-dynprog_simd.o -MD -MP -MF .deps/gsnap_sse42-dynprog_simd.Tpo -c -o gsnap_sse42-dynprog_simd.o `test -f 'dynprog_simd.c' || echo './'`dynprog_simd.c
+dynprog_simd.c: In function ‘Dynprog_simd_8’:
+dynprog_simd.c:2143:4: warning: array subscript has type ‘char’ [-Wchar-subscripts]
+ na2 = revp ? nt_to_int_array[gsequence[1-c]] : nt_to_int_array[gsequence[c-1]];
+ ^
+dynprog_simd.c:2143:4: warning: array subscript has type ‘char’ [-Wchar-subscripts]
+dynprog_simd.c:2144:4: warning: array subscript has type ‘char’ [-Wchar-subscripts]
+ na2_alt = revp ? nt_to_int_array[gsequence_alt[1-c]] : nt_to_int_array[gsequence_alt[c-1
+ ^
+dynprog_simd.c:2144:4: warning: array subscript has type ‘char’ [-Wchar-subscripts]
+dynprog_simd.c:2347:4: warning: array subscript has type ‘char’ [-Wchar-subscripts]
+ na2 = revp ? nt_to_int_array[gsequence[1-c]] : nt_to_int_array[gsequence[c-1]];
+ ^
+dynprog_simd.c:2347:4: warning: array subscript has type ‘char’ [-Wchar-subscripts]
+dynprog_simd.c:2348:4: warning: array subscript has type ‘char’ [-Wchar-subscripts]
+ na2_alt = revp ? nt_to_int_array[gsequence_alt[1-c]] : nt_to_int_array[gsequence_alt[c-1
+ ^
+dynprog_simd.c:2348:4: warning: array subscript has type ‘char’ [-Wchar-subscripts]
+dynprog_simd.c:1942:33: warning: variable ‘extend_ladder’ set but not used [-Wunused-but-set-variable]
+ __m128i gap_open, gap_extend, extend_ladder, complement_dummy;
+ ^
+dynprog_simd.c: In function ‘Dynprog_simd_8_upper’:
+dynprog_simd.c:2770:4: warning: array subscript has type ‘char’ [-Wchar-subscripts]
+ na2 = revp ? nt_to_int_array[gsequence[1-c]] : nt_to_int_array[gsequence[c-1]];
+ ^
+dynprog_simd.c:2770:4: warning: array subscript has type ‘char’ [-Wchar-subscripts]
+dynprog_simd.c:2771:4: warning: array subscript has type ‘char’ [-Wchar-subscripts]
+ na2_alt = revp ? nt_to_int_array[gsequence_alt[1-c]] : nt_to_int_array[gsequence_alt[c-1
+ ^
+dynprog_simd.c:2771:4: warning: array subscript has type ‘char’ [-Wchar-subscripts]
+dynprog_simd.c:2896:4: warning: array subscript has type ‘char’ [-Wchar-subscripts]
+ na2 = revp ? nt_to_int_array[gsequence[1-c]] : nt_to_int_array[gsequence[c-1]];
+ ^
+dynprog_simd.c:2896:4: warning: array subscript has type ‘char’ [-Wchar-subscripts]
+dynprog_simd.c:2897:4: warning: array subscript has type ‘char’ [-Wchar-subscripts]
+ na2_alt = revp ? nt_to_int_array[gsequence_alt[1-c]] : nt_to_int_array[gsequence_alt[c-1
+ ^
+dynprog_simd.c:2897:4: warning: array subscript has type ‘char’ [-Wchar-subscripts]
+dynprog_simd.c:2632:8: warning: unused variable ‘na2_single’ [-Wunused-variable]
+ char na2_single;
+ ^
+dynprog_simd.c:2626:70: warning: unused variable ‘pairscore’ [-Wunused-variable]
+ Score8_T *pairscores[5], *pairscores_std_ptr, *pairscores_alt_ptr, pairscore;
+ ^
+dynprog_simd.c: In function ‘Dynprog_simd_8_lower’:
+dynprog_simd.c:3238:3: error: ‘extend_ladder’ undeclared (first use in this function)
+ extend_ladder = _mm_setr_epi8(0,extend,2*extend,3*extend,4*extend,5*extend,6*extend,7*ext
+ ^
+dynprog_simd.c:3238:3: note: each undeclared identifier is reported only once for each function it appears in
+dynprog_simd.c:3267:4: warning: array subscript has type ‘char’ [-Wchar-subscripts]
+ na1 = revp ? nt_to_int_array[rsequence[1-r]] : nt_to_int_array[rsequence[r-1]];
+ ^
+dynprog_simd.c:3267:4: warning: array subscript has type ‘char’ [-Wchar-subscripts]
+dynprog_simd.c:3389:4: warning: array subscript has type ‘char’ [-Wchar-subscripts]
+ na1 = revp ? nt_to_int_array[rsequence[1-r]] : nt_to_int_array[rsequence[r-1]];
+ ^
+dynprog_simd.c:3389:4: warning: array subscript has type ‘char’ [-Wchar-subscripts]
+dynprog_simd.c:3089:8: warning: unused variable ‘na2_single’ [-Wunused-variable]
+ char na2_single;
+ ^
+dynprog_simd.c:3083:45: warning: unused variable ‘pairscore’ [-Wunused-variable]
+ Score8_T *pairscores[5], *pairscores_ptr, pairscore;
+ ^
+dynprog_simd.c: In function ‘Dynprog_simd_16’:
+dynprog_simd.c:3739:4: warning: array subscript has type ‘char’ [-Wchar-subscripts]
+ na2 = revp ? nt_to_int_array[gsequence[1-c]] : nt_to_int_array[gsequence[c-1]];
+ ^
+dynprog_simd.c:3739:4: warning: array subscript has type ‘char’ [-Wchar-subscripts]
+dynprog_simd.c:3740:4: warning: array subscript has type ‘char’ [-Wchar-subscripts]
+ na2_alt = revp ? nt_to_int_array[gsequence_alt[1-c]] : nt_to_int_array[gsequence_alt[c-1
+ ^
+dynprog_simd.c:3740:4: warning: array subscript has type ‘char’ [-Wchar-subscripts]
+dynprog_simd.c:3923:4: warning: array subscript has type ‘char’ [-Wchar-subscripts]
+ na2 = revp ? nt_to_int_array[gsequence[1-c]] : nt_to_int_array[gsequence[c-1]];
+ ^
+dynprog_simd.c:3923:4: warning: array subscript has type ‘char’ [-Wchar-subscripts]
+dynprog_simd.c:3924:4: warning: array subscript has type ‘char’ [-Wchar-subscripts]
+ na2_alt = revp ? nt_to_int_array[gsequence_alt[1-c]] : nt_to_int_array[gsequence_alt[c-1
+ ^
+dynprog_simd.c:3924:4: warning: array subscript has type ‘char’ [-Wchar-subscripts]
+dynprog_simd.c:3563:33: warning: variable ‘extend_ladder’ set but not used [-Wunused-but-set-variable]
+ __m128i gap_open, gap_extend, extend_ladder, complement_dummy;
+ ^
+dynprog_simd.c: In function ‘Dynprog_simd_16_upper’:
+dynprog_simd.c:4259:3: error: ‘extend_ladder’ undeclared (first use in this function)
+ extend_ladder = _mm_setr_epi16(0,extend,2*extend,3*extend,4*extend,5*extend,6*extend,7*ex
+ ^
+dynprog_simd.c:4284:4: warning: array subscript has type ‘char’ [-Wchar-subscripts]
+ na2 = revp ? nt_to_int_array[gsequence[1-c]] : nt_to_int_array[gsequence[c-1]];
+ ^
+dynprog_simd.c:4284:4: warning: array subscript has type ‘char’ [-Wchar-subscripts]
+dynprog_simd.c:4285:4: warning: array subscript has type ‘char’ [-Wchar-subscripts]
+ na2_alt = revp ? nt_to_int_array[gsequence_alt[1-c]] : nt_to_int_array[gsequence_alt[c-1
+ ^
+dynprog_simd.c:4285:4: warning: array subscript has type ‘char’ [-Wchar-subscripts]
+dynprog_simd.c:4381:4: warning: array subscript has type ‘char’ [-Wchar-subscripts]
+ na2 = revp ? nt_to_int_array[gsequence[1-c]] : nt_to_int_array[gsequence[c-1]];
+ ^
+dynprog_simd.c:4381:4: warning: array subscript has type ‘char’ [-Wchar-subscripts]
+dynprog_simd.c:4382:4: warning: array subscript has type ‘char’ [-Wchar-subscripts]
+ na2_alt = revp ? nt_to_int_array[gsequence_alt[1-c]] : nt_to_int_array[gsequence_alt[c-1
+ ^
+dynprog_simd.c:4382:4: warning: array subscript has type ‘char’ [-Wchar-subscripts]
+dynprog_simd.c:4158:8: warning: unused variable ‘na2_single’ [-Wunused-variable]
+ char na2_single;
+ ^
+dynprog_simd.c:4152:71: warning: unused variable ‘pairscore’ [-Wunused-variable]
+ Score16_T *pairscores[5], *pairscores_std_ptr, *pairscores_alt_ptr, pairscore;
+ ^
+dynprog_simd.c: In function ‘Dynprog_simd_16_lower’:
+dynprog_simd.c:4675:3: error: ‘extend_ladder’ undeclared (first use in this function)
+ extend_ladder = _mm_setr_epi16(0,extend,2*extend,3*extend,4*extend,5*extend,6*extend,7*ex
+ ^
+dynprog_simd.c:4699:4: warning: array subscript has type ‘char’ [-Wchar-subscripts]
+ na1 = revp ? nt_to_int_array[rsequence[1-r]] : nt_to_int_array[rsequence[r-1]];
+ ^
+dynprog_simd.c:4699:4: warning: array subscript has type ‘char’ [-Wchar-subscripts]
+dynprog_simd.c:4792:4: warning: array subscript has type ‘char’ [-Wchar-subscripts]
+ na1 = revp ? nt_to_int_array[rsequence[1-r]] : nt_to_int_array[rsequence[r-1]];
+ ^
+dynprog_simd.c:4792:4: warning: array subscript has type ‘char’ [-Wchar-subscripts]
+dynprog_simd.c:4542:8: warning: unused variable ‘na2_single’ [-Wunused-variable]
+ char na2_single;
+ ^
+dynprog_simd.c:4536:46: warning: unused variable ‘pairscore’ [-Wunused-variable]
+ Score16_T *pairscores[5], *pairscores_ptr, pairscore;
+ ^
+dynprog_simd.c: In function ‘Dynprog_traceback_8_lower’:
+dynprog_simd.c:5278:8: warning: unused variable ‘add_dashes_p’ [-Wunused-variable]
+ bool add_dashes_p;
+ ^
+dynprog_simd.c:5275:11: warning: unused parameter ‘cdna_direction’ [-Wunused-parameter]
+ int cdna_direction, bool watsonp, int dynprogindex) {
+ ^
+dynprog_simd.c: In function ‘Dynprog_traceback_16_lower’:
+dynprog_simd.c:5662:8: warning: unused variable ‘add_dashes_p’ [-Wunused-variable]
+ bool add_dashes_p;
+ ^
+dynprog_simd.c:5659:12: warning: unused parameter ‘cdna_direction’ [-Wunused-parameter]
+ int cdna_direction, bool watsonp, int dynprogindex) {
+ ^
+dynprog_simd.c: At top level:
+dynprog_simd.c:1:13: warning: ‘rcsid’ defined but not used [-Wunused-variable]
+ static char rcsid[] = "$Id: dynprog_simd.c 146623 2014-09-02 21:31:32Z twu $";
+ ^
+dynprog_simd.c:510:1: warning: ‘Directions8_print’ defined but not used [-Wunused-function]
+ Directions8_print (Direction8_T **directions_nogap, Direction8_T **directions_Egap, Directi
+ ^
+dynprog_simd.c:604:1: warning: ‘Directions8_print_ud’ defined but not used [-Wunused-function]
+ Directions8_print_ud (Direction8_T **directions_nogap, Direction8_T **directions_Egap,
+ ^
+dynprog_simd.c:713:1: warning: ‘Directions16_print’ defined but not used [-Wunused-function]
+ Directions16_print (Direction16_T **directions_nogap, Direction16_T **directions_Egap, Dire
+ ^
+dynprog_simd.c:807:1: warning: ‘Directions16_print_ud’ defined but not used [-Wunused-function]
+ Directions16_print_ud (Direction16_T **directions_nogap, Direction16_T **directions_Egap,
+ ^
+make: *** [gsnap_sse42-dynprog_simd.o] Error 1
+make: Target `gsnap.sse42' not remade because of errors.
+
+Compilation exited abnormally with code 2 at Mon Dec 14 14:13:23
diff --git a/src/filestring.c b/src/filestring.c
index aea43ac..4c49e36 100644
--- a/src/filestring.c
+++ b/src/filestring.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: filestring.c 195969 2016-08-08 17:01:27Z twu $";
+static char rcsid[] = "$Id: filestring.c 196273 2016-08-12 15:15:06Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
diff --git a/src/genome_sites.c b/src/genome_sites.c
index e3724dd..5597cf1 100644
--- a/src/genome_sites.c
+++ b/src/genome_sites.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: genome_sites.c 195749 2016-08-03 23:35:09Z twu $";
+static char rcsid[] = "$Id: genome_sites.c 196273 2016-08-12 15:15:06Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
diff --git a/src/gmap.c b/src/gmap.c
index 39c14f4..30f9c9a 100644
--- a/src/gmap.c
+++ b/src/gmap.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: gmap.c 193877 2016-07-12 02:46:33Z twu $";
+static char rcsid[] = "$Id: gmap.c 196403 2016-08-16 14:33:56Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -382,6 +382,7 @@ static bool require_splicedir_p = false;
/* GFF3 */
static bool gff3_separators_p = true;
+static bool gff3_phase_swap_p = false;
/* SAM */
/* Applicable to PMAP? */
@@ -557,6 +558,7 @@ static struct option long_options[] = {
{"require-splicedir", no_argument, 0, 0}, /* require_splicedir_p */
{"gff3-add-separators", required_argument, 0, 0}, /* gff3_separators_p */
+ {"gff3-swap-phase", required_argument, 0, 0}, /* gff3_phase_swap_p */
#ifndef PMAP
{"quality-protocol", required_argument, 0, 0}, /* quality_shift */
@@ -5298,6 +5300,7 @@ parse_command_line (int argc, char *argv[], int optind) {
split_output_root = optarg;
} else if (!strcmp(long_name,"append-output")) {
appendp = true;
+
} else if (!strcmp(long_name,"gff3-add-separators")) {
if (!strcmp(optarg,"1")) {
gff3_separators_p = true;
@@ -5308,6 +5311,16 @@ parse_command_line (int argc, char *argv[], int optind) {
return 9;
}
+ } else if (!strcmp(long_name,"gff3-swap-phase")) {
+ if (!strcmp(optarg,"1")) {
+ gff3_phase_swap_p = true;
+ } else if (!strcmp(optarg,"0")) {
+ gff3_phase_swap_p = false;
+ } else {
+ fprintf(stderr,"--gff3-swap-phase flag must be 0 or 1\n");
+ return 9;
+ }
+
#ifndef PMAP
} else if (!strcmp(long_name,"no-sam-headers")) {
sam_headers_p = false;
@@ -6575,7 +6588,8 @@ main (int argc, char *argv[]) {
Pair_setup(trim_mismatch_score,trim_indel_score,gff3_separators_p,sam_insert_0M_p,
force_xs_direction_p,md_lowercase_variant_p,
/*snps_p*/genomecomp_alt ? true : false,
- /*print_nsnpdiffs_p*/genomecomp_alt ? true : false,genomelength);
+ /*print_nsnpdiffs_p*/genomecomp_alt ? true : false,genomelength,
+ gff3_phase_swap_p);
Stage3_setup(/*splicingp*/novelsplicingp == true || knownsplicingp == true,novelsplicingp,
require_splicedir_p,splicing_iit,splicing_divint_crosstable,
donor_typeint,acceptor_typeint,splicesites,altlocp,alias_starts,alias_ends,
@@ -7184,6 +7198,9 @@ Output options\n\
fprintf(stdout,"\
--gff3-add-separators=INT Whether to add a ### separator after each query sequence\n\
Values: 0 (no), 1 (yes, default)\n\
+ --gff3-swap-phase=INT Whether to swap phase (0 => 0, 1 => 2, 2 => 1) in gff3_gene format\n\
+ Needed by some analysis programs, but deviates from GFF3 specification\n\
+ Values: 0 (no, default), 1 (yes)\n\
");
fprintf(stdout,"\n");
diff --git a/src/gsnap.c b/src/gsnap.c
index 264c34c..a6fa7aa 100644
--- a/src/gsnap.c
+++ b/src/gsnap.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: gsnap.c 195760 2016-08-04 00:12:04Z twu $";
+static char rcsid[] = "$Id: gsnap.c 196438 2016-08-16 20:23:27Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -121,6 +121,7 @@ static char rcsid[] = "$Id: gsnap.c 195760 2016-08-04 00:12:04Z twu $";
#define MIN_INDEXDB_SIZE_THRESHOLD 100
+#define MAX_FLOORS_READLENGTH 300
#define MAX_QUERYLENGTH_FOR_ALLOC 100000
#define MAX_GENOMICLENGTH_FOR_ALLOC 1000000
@@ -752,7 +753,7 @@ print_program_version () {
genomedir = Datadir_find_genomedir(/*user_genomedir*/NULL);
fprintf(stdout,"Default gmap directory (environment): %s\n",genomedir);
FREE(genomedir);
- fprintf(stdout,"Maximum read length: %d\n",MAX_READLENGTH);
+ fprintf(stdout,"Maximum stack read length: %d\n",MAX_STACK_READLENGTH);
fprintf(stdout,"Thomas D. Wu, Genentech, Inc.\n");
fprintf(stdout,"Contact: twu at gene.com\n");
fprintf(stdout,"\n");
@@ -1086,7 +1087,7 @@ single_thread () {
cellpool = Cellpool_new();
worker_stopwatch = (timingp == true) ? Stopwatch_new() : (Stopwatch_T) NULL;
- floors_array = (Floors_T *) CALLOC(MAX_READLENGTH+1,sizeof(Floors_T));
+ floors_array = (Floors_T *) CALLOC(MAX_FLOORS_READLENGTH+1,sizeof(Floors_T));
/* Except_stack_create(); -- requires pthreads */
@@ -1184,7 +1185,7 @@ single_thread () {
/* Except_stack_destroy(); -- requires pthreads */
- for (i = 0; i <= MAX_READLENGTH; i++) {
+ for (i = 0; i <= MAX_FLOORS_READLENGTH; i++) {
if (floors_array[i] != NULL) {
Floors_free_keep(&(floors_array[i]));
}
@@ -1260,7 +1261,7 @@ worker_thread (void *data) {
cellpool = Cellpool_new();
worker_stopwatch = (timingp == true) ? Stopwatch_new() : (Stopwatch_T) NULL;
- floors_array = (Floors_T *) CALLOC(MAX_READLENGTH+1,sizeof(Floors_T));
+ floors_array = (Floors_T *) CALLOC(MAX_FLOORS_READLENGTH+1,sizeof(Floors_T));
Except_stack_create();
@@ -1360,7 +1361,7 @@ worker_thread (void *data) {
Except_stack_destroy();
- for (i = 0; i <= MAX_READLENGTH; i++) {
+ for (i = 0; i <= MAX_FLOORS_READLENGTH; i++) {
if (floors_array[i] != NULL) {
Floors_free_keep(&(floors_array[i]));
}
@@ -3300,7 +3301,8 @@ worker_setup (char *genomesubdir, char *fileroot) {
Pair_setup(trim_mismatch_score,trim_indel_score,/*gff3_separators_p*/false,sam_insert_0M_p,
force_xs_direction_p,md_lowercase_variant_p,
/*snps_p*/snps_iit ? true : false,print_nsnpdiffs_p,
- Univ_IIT_genomelength(chromosome_iit,/*with_circular_alias*/false));
+ Univ_IIT_genomelength(chromosome_iit,/*with_circular_alias*/false),
+ /*gff3_phase_swap_p*/false);
Stage3_setup(/*splicingp*/novelsplicingp == true || knownsplicingp == true,novelsplicingp,
/*require_splicedir_p*/true,splicing_iit,splicing_divint_crosstable,
donor_typeint,acceptor_typeint,splicesites,altlocp,alias_starts,alias_ends,
@@ -3338,7 +3340,8 @@ worker_setup (char *genomesubdir, char *fileroot) {
nullgap,maxpeelback,maxpeelback_distalmedial,
extramaterial_end,extramaterial_paired,gmap_mode,
trigger_score_for_gmap,gmap_allowance,max_gmap_pairsearch,
- max_gmap_terminal,max_gmap_improvement,antistranded_penalty);
+ max_gmap_terminal,max_gmap_improvement,antistranded_penalty,
+ MAX_FLOORS_READLENGTH);
Substring_setup(print_nsnpdiffs_p,print_snplabels_p,
show_refdiff_p,snps_iit,snps_divint_crosstable,
genes_iit,genes_divint_crosstable,
diff --git a/src/indel.c b/src/indel.c
index 18d2aba..8cfbd1a 100644
--- a/src/indel.c
+++ b/src/indel.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: indel.c 193229 2016-06-30 22:31:10Z twu $";
+static char rcsid[] = "$Id: indel.c 196431 2016-08-16 20:19:22Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -45,19 +45,24 @@ Indel_resolve_middle_insertion (int *best_nmismatches_i, int *best_nmismatches_j
#endif
int nmismatches_left, nmismatches_right;
int best_sum, sum, nmismatches_lefti, nmismatches_righti, lefti, righti;
-
+ int *mismatch_positions_left, *mismatch_positions_right;
#ifdef HAVE_ALLOCA
- int *mismatch_positions_left = (int *) ALLOCA(querylength * sizeof(int));
- int *mismatch_positions_right = (int *) ALLOCA(querylength * sizeof(int));
-#else
- int mismatch_positions_left[MAX_READLENGTH], mismatch_positions_right[MAX_READLENGTH];
-
- if (max_mismatches_allowed > MAX_READLENGTH) {
- max_mismatches_allowed = MAX_READLENGTH;
+ if (querylength <= MAX_STACK_READLENGTH) {
+ mismatch_positions_left = (int *) ALLOCA(querylength * sizeof(int));
+ mismatch_positions_right = (int *) ALLOCA(querylength * sizeof(int));
+ } else {
+ mismatch_positions_left = (int *) MALLOC(querylength * sizeof(int));
+ mismatch_positions_right = (int *) MALLOC(querylength * sizeof(int));
}
+#else
+ mismatch_positions_left = (int *) MALLOC(querylength * sizeof(int));
+ mismatch_positions_right = (int *) MALLOC(querylength * sizeof(int));
#endif
+ if (max_mismatches_allowed > querylength) {
+ max_mismatches_allowed = querylength;
+ }
/* query has insertion. Get |indels| less from genome; trim from left. */
/* left = ptr->diagonal - querylength; */
@@ -168,6 +173,19 @@ Indel_resolve_middle_insertion (int *best_nmismatches_i, int *best_nmismatches_j
}
debug2(printf("\n"));
+#ifdef HAVE_ALLOCA
+ if (querylength <= MAX_STACK_READLENGTH) {
+ FREEA(mismatch_positions_left);
+ FREEA(mismatch_positions_right);
+ } else {
+ FREE(mismatch_positions_left);
+ FREE(mismatch_positions_right);
+ }
+#else
+ FREE(mismatch_positions_left);
+ FREE(mismatch_positions_right);
+#endif
+
*best_nmismatches_i = nmismatches_lefti;
*best_nmismatches_j = nmismatches_righti;
@@ -203,18 +221,24 @@ Indel_resolve_middle_deletion (int *best_nmismatches_i, int *best_nmismatches_j,
#endif
int nmismatches_left, nmismatches_right, nmismatches_lefti, nmismatches_righti;
int best_sum, sum, lefti, righti;
+ int *mismatch_positions_left, *mismatch_positions_right;
#ifdef HAVE_ALLOCA
- int *mismatch_positions_left = (int *) ALLOCA(querylength * sizeof(int));
- int *mismatch_positions_right = (int *) ALLOCA(querylength * sizeof(int));
-#else
- int mismatch_positions_left[MAX_READLENGTH], mismatch_positions_right[MAX_READLENGTH];
-
- if (max_mismatches_allowed > MAX_READLENGTH) {
- max_mismatches_allowed = MAX_READLENGTH;
+ if (querylength <= MAX_STACK_READLENGTH) {
+ mismatch_positions_left = (int *) ALLOCA(querylength * sizeof(int));
+ mismatch_positions_right = (int *) ALLOCA(querylength * sizeof(int));
+ } else {
+ mismatch_positions_left = (int *) MALLOC(querylength * sizeof(int));
+ mismatch_positions_right = (int *) MALLOC(querylength * sizeof(int));
}
+#else
+ mismatch_positions_left = (int *) MALLOC(querylength * sizeof(int));
+ mismatch_positions_right = (int *) MALLOC(querylength * sizeof(int));
#endif
+ if (max_mismatches_allowed > querylength) {
+ max_mismatches_allowed = querylength;
+ }
/* query has deletion. Get |indels| more from genome; add to right. */
/* left = ptr->diagonal - querylength; */
@@ -319,6 +343,19 @@ Indel_resolve_middle_deletion (int *best_nmismatches_i, int *best_nmismatches_j,
}
debug2(printf("\n"));
+#ifdef HAVE_ALLOCA
+ if (querylength <= MAX_STACK_READLENGTH) {
+ FREEA(mismatch_positions_left);
+ FREEA(mismatch_positions_right);
+ } else {
+ FREE(mismatch_positions_left);
+ FREE(mismatch_positions_right);
+ }
+#else
+ FREE(mismatch_positions_left);
+ FREE(mismatch_positions_right);
+#endif
+
*best_nmismatches_i = nmismatches_lefti;
*best_nmismatches_j = nmismatches_righti;
@@ -357,12 +394,19 @@ Indel_solve_middle_insertion (bool *foundp, int *found_score, int *nhits, List_T
int nmismatches_left, nmismatches_right;
int best_sum, sum, nmismatches_lefti, nmismatches_righti, lefti, righti;
int nmismatches1, nmismatches2;
+ int *mismatch_positions_left, *mismatch_positions_right;
#ifdef HAVE_ALLOCA
- int *mismatch_positions_left = (int *) ALLOCA(querylength * sizeof(int));
- int *mismatch_positions_right = (int *) ALLOCA(querylength * sizeof(int));
+ if (querylength <= MAX_STACK_READLENGTH) {
+ mismatch_positions_left = (int *) ALLOCA(querylength * sizeof(int));
+ mismatch_positions_right = (int *) ALLOCA(querylength * sizeof(int));
+ } else {
+ mismatch_positions_left = (int *) MALLOC(querylength * sizeof(int));
+ mismatch_positions_right = (int *) MALLOC(querylength * sizeof(int));
+ }
#else
- int mismatch_positions_left[MAX_READLENGTH], mismatch_positions_right[MAX_READLENGTH];
+ mismatch_positions_left = (int *) MALLOC(querylength * sizeof(int));
+ mismatch_positions_right = (int *) MALLOC(querylength * sizeof(int));
#endif
@@ -475,6 +519,19 @@ Indel_solve_middle_insertion (bool *foundp, int *found_score, int *nhits, List_T
}
debug2(printf("\n"));
+#ifdef HAVE_ALLOCA
+ if (querylength <= MAX_STACK_READLENGTH) {
+ FREEA(mismatch_positions_left);
+ FREEA(mismatch_positions_right);
+ } else {
+ FREE(mismatch_positions_left);
+ FREE(mismatch_positions_right);
+ }
+#else
+ FREE(mismatch_positions_left);
+ FREE(mismatch_positions_right);
+#endif
+
if (best_sum <= max_mismatches_allowed) {
if (plusp == true) {
query_indel_pos = best_indel_pos;
@@ -523,12 +580,19 @@ Indel_solve_middle_deletion (bool *foundp, int *found_score, int *nhits, List_T
int nmismatches_left, nmismatches_right;
int best_sum, sum, nmismatches_lefti, nmismatches_righti, lefti, righti;
int nmismatches1, nmismatches2;
+ int *mismatch_positions_left, *mismatch_positions_right;
#ifdef HAVE_ALLOCA
- int *mismatch_positions_left = (int *) ALLOCA(querylength * sizeof(int));
- int *mismatch_positions_right = (int *) ALLOCA(querylength * sizeof(int));
+ if (querylength <= MAX_STACK_READLENGTH) {
+ mismatch_positions_left = (int *) ALLOCA(querylength * sizeof(int));
+ mismatch_positions_right = (int *) ALLOCA(querylength * sizeof(int));
+ } else {
+ mismatch_positions_left = (int *) MALLOC(querylength * sizeof(int));
+ mismatch_positions_right = (int *) MALLOC(querylength * sizeof(int));
+ }
#else
- int mismatch_positions_left[MAX_READLENGTH], mismatch_positions_right[MAX_READLENGTH];
+ mismatch_positions_left = (int *) MALLOC(querylength * sizeof(int));
+ mismatch_positions_right = (int *) MALLOC(querylength * sizeof(int));
#endif
@@ -637,6 +701,18 @@ Indel_solve_middle_deletion (bool *foundp, int *found_score, int *nhits, List_T
}
debug2(printf("\n"));
+#ifdef HAVE_ALLOCA
+ if (querylength <= MAX_STACK_READLENGTH) {
+ FREEA(mismatch_positions_left);
+ FREEA(mismatch_positions_right);
+ } else {
+ FREE(mismatch_positions_left);
+ FREE(mismatch_positions_right);
+ }
+#else
+ FREE(mismatch_positions_left);
+ FREE(mismatch_positions_right);
+#endif
if (best_sum <= max_mismatches_allowed) {
if (plusp == true) {
diff --git a/src/mapq.c b/src/mapq.c
index 397c897..50e69d5 100644
--- a/src/mapq.c
+++ b/src/mapq.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: mapq.c 184376 2016-02-16 23:39:30Z twu $";
+static char rcsid[] = "$Id: mapq.c 196431 2016-08-16 20:19:22Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -160,11 +160,16 @@ MAPQ_loglik (Compress_T query_compress, Univcoord_T left, int querystart, int qu
int nmismatches, i;
int alignlength;
+ int *mismatch_positions;
#ifdef HAVE_ALLOCA
- int *mismatch_positions = (int *) ALLOCA((querylength+1) * sizeof(int));
+ if (querylength <= MAX_STACK_READLENGTH) {
+ mismatch_positions = (int *) ALLOCA((querylength+1) * sizeof(int));
+ } else {
+ mismatch_positions = (int *) MALLOC((querylength+1) * sizeof(int));
+ }
#else
- int mismatch_positions[MAX_READLENGTH+1];
+ mismatch_positions = (int *) MALLOC((querylength+1) * sizeof(int));
#endif
@@ -254,6 +259,16 @@ MAPQ_loglik (Compress_T query_compress, Univcoord_T left, int querystart, int qu
}
+#ifdef HAVE_ALLOCA
+ if (querylength <= MAX_STACK_READLENGTH) {
+ FREEA(mismatch_positions);
+ } else {
+ FREE(mismatch_positions);
+ }
+#else
+ FREE(mismatch_positions);
+#endif
+
debug(printf("returning loglik %f\n",loglik));
return loglik;
}
diff --git a/src/pair.c b/src/pair.c
index c24366a..894d05f 100644
--- a/src/pair.c
+++ b/src/pair.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: pair.c 195763 2016-08-04 01:37:20Z twu $";
+static char rcsid[] = "$Id: pair.c 196403 2016-08-16 14:33:56Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -149,12 +149,14 @@ static bool snps_p;
static bool print_nsnpdiffs_p;
static double genomelength; /* For BLAST E-value */
+static bool gff3_phase_swap_p = true;
+
void
Pair_setup (int trim_mismatch_score_in, int trim_indel_score_in,
bool gff3_separators_p_in, bool sam_insert_0M_p_in, bool force_xs_direction_p_in,
bool md_lowercase_variant_p_in, bool snps_p_in, bool print_nsnpdiffs_p_in,
- Univcoord_T genomelength_in) {
+ Univcoord_T genomelength_in, bool gff3_phase_swap_p_in) {
trim_mismatch_score = trim_mismatch_score_in;
trim_indel_score = trim_indel_score_in;
gff3_separators_p = gff3_separators_p_in;
@@ -164,6 +166,7 @@ Pair_setup (int trim_mismatch_score_in, int trim_indel_score_in,
snps_p = snps_p_in;
print_nsnpdiffs_p = print_nsnpdiffs_p_in;
genomelength = (double) genomelength_in;
+ gff3_phase_swap_p = gff3_phase_swap_p_in;
return;
}
@@ -2449,7 +2452,18 @@ print_gff3_cds (Filestring_T fp, int cdsno, int pathnum, char *sourcename, char
}
}
- FPRINTF(fp,"%d\t",cds_phase); /* 8: phase */
+ if (gff3_phase_swap_p == true && cds_phase > 0) {
+ /* Some analysis programs want phase in gff3 to be different */
+ FPRINTF(fp,"%d\t",3 - cds_phase); /* 8: phase */
+ } else {
+ /* This appears to be the specification: a phase of 0 indicates
+ that the next codon begins at the first base of the region
+ described by the current line, a phase of 1 indicates that the
+ next codon begins at the second base of this region, and a
+ phase of 2 indicates that the codon begins at the third base of
+ this region. */
+ FPRINTF(fp,"%d\t",cds_phase); /* 8: phase */
+ }
/* 9: features */
FPRINTF(fp,"ID=%s.mrna%d.cds%d;",accession,pathnum,cdsno);
diff --git a/src/pair.h b/src/pair.h
index 0663017..3123737 100644
--- a/src/pair.h
+++ b/src/pair.h
@@ -1,4 +1,4 @@
-/* $Id: pair.h 193230 2016-06-30 22:32:37Z twu $ */
+/* $Id: pair.h 196403 2016-08-16 14:33:56Z twu $ */
#ifndef PAIR_INCLUDED
#define PAIR_INCLUDED
@@ -32,7 +32,7 @@ extern void
Pair_setup (int trim_mismatch_score_in, int trim_indel_score_in,
bool gff3_separators_p_in, bool sam_insert_0M_p_in, bool force_xs_direction_p_in,
bool md_lowercase_variant_p_in, bool snps_p_in, bool print_nsnpdiffs_p_in,
- Univcoord_T genomelength_in);
+ Univcoord_T genomelength_in, bool gff3_phase_swap_p_in);
extern int
Pair_querypos (T this);
extern Chrpos_T
diff --git a/src/pairpool.c b/src/pairpool.c
index 7572f6e..04aa5c5 100644
--- a/src/pairpool.c
+++ b/src/pairpool.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: pairpool.c 195763 2016-08-04 01:37:20Z twu $";
+static char rcsid[] = "$Id: pairpool.c 195548 2016-08-02 17:18:50Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
diff --git a/src/samprint.c b/src/samprint.c
index d2fce93..6576b24 100644
--- a/src/samprint.c
+++ b/src/samprint.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: samprint.c 195961 2016-08-08 16:36:34Z twu $";
+static char rcsid[] = "$Id: samprint.c 196273 2016-08-12 15:15:06Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -2262,7 +2262,7 @@ print_halfdonor (Filestring_T fp, char *abbrev, Substring_T donor, Substring_T a
if (sensep == true) {
- assert(Substring_chimera_pos(donor) == Substring_queryend(donor));
+ assert(Substring_siteD_pos(donor) == Substring_queryend(donor));
if (plusp == true) {
/* sensep true, plusp true */
/* FPRINTF(fp,"donor sensep true, plusp true\n"); */
@@ -2321,7 +2321,7 @@ print_halfdonor (Filestring_T fp, char *abbrev, Substring_T donor, Substring_T a
}
} else {
- assert(Substring_chimera_pos(donor) == Substring_querystart(donor));
+ assert(Substring_siteD_pos(donor) == Substring_querystart(donor));
if (plusp == true) {
/* sensep false, plusp true */
/* FPRINTF(fp,"donor sensep false, plusp true\n"); */
@@ -2669,8 +2669,8 @@ print_halfdonor (Filestring_T fp, char *abbrev, Substring_T donor, Substring_T a
/* 12. TAGS: XT */
if (print_xt_p == true) {
FPRINTF(fp,"\tXT:Z:%c%c-%c%c,%.2f,%.2f",donor1,donor2,acceptor2,acceptor1,donor_prob,acceptor_prob);
- FPRINTF(fp,",%c%s@%u..%c%s@%u",donor_strand,donor_chr,Substring_chr_splicecoord(donor),
- acceptor_strand,acceptor_chr,Substring_chr_splicecoord(acceptor));
+ FPRINTF(fp,",%c%s@%u..%c%s@%u",donor_strand,donor_chr,Substring_chr_splicecoord_D(donor),
+ acceptor_strand,acceptor_chr,Substring_chr_splicecoord_A(acceptor));
}
/* 12. TAGS: XC */
@@ -2776,7 +2776,7 @@ print_halfacceptor (Filestring_T fp, char *abbrev, Substring_T donor, Substring_
if (sensep == true) {
- assert(Substring_chimera_pos(acceptor) == Substring_querystart(acceptor));
+ assert(Substring_siteA_pos(acceptor) == Substring_querystart(acceptor));
if (plusp == true) {
/* sensep true, plusp true */
/* FPRINTF(fp,"acceptor sensep true, plusp true\n"); */
@@ -2830,7 +2830,7 @@ print_halfacceptor (Filestring_T fp, char *abbrev, Substring_T donor, Substring_
} else {
/* sensep false, plusp true */
- assert(Substring_chimera_pos(acceptor) == Substring_queryend(acceptor));
+ assert(Substring_siteA_pos(acceptor) == Substring_queryend(acceptor));
if (plusp == true) {
/* FPRINTF(fp,"acceptor sensep false, plusp true\n"); */
if (hide_soft_clips_p == true) {
@@ -3174,8 +3174,8 @@ print_halfacceptor (Filestring_T fp, char *abbrev, Substring_T donor, Substring_
/* 12. TAGS: XT */
if (print_xt_p == true) {
FPRINTF(fp,"\tXT:Z:%c%c-%c%c,%.2f,%.2f",donor1,donor2,acceptor2,acceptor1,donor_prob,acceptor_prob);
- FPRINTF(fp,",%c%s@%u..%c%s@%u",donor_strand,donor_chr,Substring_chr_splicecoord(donor),
- acceptor_strand,acceptor_chr,Substring_chr_splicecoord(acceptor));
+ FPRINTF(fp,",%c%s@%u..%c%s@%u",donor_strand,donor_chr,Substring_chr_splicecoord_D(donor),
+ acceptor_strand,acceptor_chr,Substring_chr_splicecoord_A(acceptor));
}
@@ -3232,8 +3232,8 @@ print_exon_exon (Filestring_T fp, char *abbrev, Stage3end_T this, Stage3end_T ma
halfacceptor_dinucleotide(&acceptor2,&acceptor1,acceptor,sensedir);
donor_chr = Univ_IIT_label(chromosome_iit,Substring_chrnum(donor),&allocp);
acceptor_chr = Univ_IIT_label(chromosome_iit,Substring_chrnum(acceptor),&allocp);
- donor_prob = Substring_chimera_prob(donor);
- acceptor_prob = Substring_chimera_prob(acceptor);
+ donor_prob = Substring_siteD_prob(donor);
+ acceptor_prob = Substring_siteA_prob(acceptor);
/* Code taken from that for XS tag for print_halfdonor and print_halfacceptor */
/* For the donor and acceptor strands, use the substring sensedir and not the Stage3end_T sensedir */
diff --git a/src/sarray-read.c b/src/sarray-read.c
index 2062400..3a02f07 100644
--- a/src/sarray-read.c
+++ b/src/sarray-read.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: sarray-read.c 195763 2016-08-04 01:37:20Z twu $";
+static char rcsid[] = "$Id: sarray-read.c 196431 2016-08-16 20:19:22Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -2349,6 +2349,7 @@ fill_positions_filtered_first (Elt_T this, T sarray, Univcoord_T low, Univcoord_
this->all_positions = (Univcoord_T *) NULL;
} else {
+ /* Function surrounded by HAVE_ALLOCA */
#ifdef USE_QSORT
positions_temp = out = (Univcoord_T *) MALLOCA((this->finalptr - this->initptr + 1) * sizeof(Univcoord_T));
#else
@@ -2503,6 +2504,7 @@ fill_positions_filtered_first (Elt_T this, T sarray, Univcoord_T low, Univcoord_
#endif
}
+ /* Function surrounded by HAVE_ALLOCA */
FREEA(positions_temp);
}
@@ -2599,6 +2601,7 @@ fill_positions_filtered_first (Elt_T this, T sarray, Univcoord_T low, Univcoord_
this->all_positions = (Univcoord_T *) NULL;
} else {
+ /* Function surrounded by HAVE_ALLOCA */
positions_temp = out = (Univcoord_T *) MALLOCA((this->finalptr - this->initptr + 1) * sizeof(Univcoord_T));
low_adj = low + this->querystart;
@@ -2751,6 +2754,7 @@ fill_positions_filtered_first (Elt_T this, T sarray, Univcoord_T low, Univcoord_
#endif
}
+ /* Function surrounded by HAVE_ALLOCA */
FREEA(positions_temp);
}
@@ -2791,6 +2795,7 @@ fill_positions_filtered_first (Elt_T this, T sarray, Univcoord_T low, Univcoord_
this->all_positions = (Univcoord_T *) NULL;
} else {
+ /* Function surrounded by HAVE_ALLOCA */
#ifdef USE_QSORT
positions_temp = (Univcoord_T *) MALLOCA((this->finalptr - this->initptr + 1) * sizeof(Univcoord_T));
#else
@@ -2971,6 +2976,7 @@ fill_positions_filtered_first (Elt_T this, T sarray, Univcoord_T low, Univcoord_
#endif
}
+ /* Function surrounded by HAVE_ALLOCA */
FREEA(positions_temp);
}
@@ -3652,22 +3658,8 @@ solve_twopart (int *found_score, List_T *subs, List_T *indels, List_T *ambiguous
int k, j, i, n;
bool segmenti_usedp, segmentj_usedp;
bool foundp;
-
-#ifdef HAVE_ALLOCA
- int *segmenti_donor_knownpos = (int *) ALLOCA((querylength+1)*sizeof(int));
- int *segmentj_acceptor_knownpos = (int *) ALLOCA((querylength+1)*sizeof(int));
- int *segmentj_antidonor_knownpos = (int *) ALLOCA((querylength+1)*sizeof(int));
- int *segmenti_antiacceptor_knownpos = (int *) ALLOCA((querylength+1)*sizeof(int));
- int *segmenti_donor_knowni = (int *) ALLOCA((querylength+1)*sizeof(int));
- int *segmentj_acceptor_knowni = (int *) ALLOCA((querylength+1)*sizeof(int));
- int *segmentj_antidonor_knowni = (int *) ALLOCA((querylength+1)*sizeof(int));
- int *segmenti_antiacceptor_knowni = (int *) ALLOCA((querylength+1)*sizeof(int));
-#else
- int segmenti_donor_knownpos[MAX_READLENGTH+1], segmentj_acceptor_knownpos[MAX_READLENGTH+1],
- segmentj_antidonor_knownpos[MAX_READLENGTH+1], segmenti_antiacceptor_knownpos[MAX_READLENGTH+1];
- int segmenti_donor_knowni[MAX_READLENGTH+1], segmentj_acceptor_knowni[MAX_READLENGTH+1],
- segmentj_antidonor_knowni[MAX_READLENGTH+1], segmenti_antiacceptor_knowni[MAX_READLENGTH+1];
-#endif
+ int *segmenti_donor_knownpos, *segmentj_acceptor_knownpos, *segmentj_antidonor_knownpos, *segmenti_antiacceptor_knownpos,
+ *segmenti_donor_knowni, *segmentj_acceptor_knowni, *segmentj_antidonor_knowni, *segmenti_antiacceptor_knowni;
/* Potential success */
@@ -3680,8 +3672,40 @@ solve_twopart (int *found_score, List_T *subs, List_T *indels, List_T *ambiguous
return false;
} else {
left = goal /* - querylength */;
+
+#ifdef HAVE_ALLOCA
+ if (querylength <= MAX_STACK_READLENGTH) {
+ segmenti_donor_knownpos = (int *) ALLOCA((querylength+1)*sizeof(int));
+ segmentj_acceptor_knownpos = (int *) ALLOCA((querylength+1)*sizeof(int));
+ segmentj_antidonor_knownpos = (int *) ALLOCA((querylength+1)*sizeof(int));
+ segmenti_antiacceptor_knownpos = (int *) ALLOCA((querylength+1)*sizeof(int));
+ segmenti_donor_knowni = (int *) ALLOCA((querylength+1)*sizeof(int));
+ segmentj_acceptor_knowni = (int *) ALLOCA((querylength+1)*sizeof(int));
+ segmentj_antidonor_knowni = (int *) ALLOCA((querylength+1)*sizeof(int));
+ segmenti_antiacceptor_knowni = (int *) ALLOCA((querylength+1)*sizeof(int));
+ } else {
+ segmenti_donor_knownpos = (int *) MALLOC((querylength+1)*sizeof(int));
+ segmentj_acceptor_knownpos = (int *) MALLOC((querylength+1)*sizeof(int));
+ segmentj_antidonor_knownpos = (int *) MALLOC((querylength+1)*sizeof(int));
+ segmenti_antiacceptor_knownpos = (int *) MALLOC((querylength+1)*sizeof(int));
+ segmenti_donor_knowni = (int *) MALLOC((querylength+1)*sizeof(int));
+ segmentj_acceptor_knowni = (int *) MALLOC((querylength+1)*sizeof(int));
+ segmentj_antidonor_knowni = (int *) MALLOC((querylength+1)*sizeof(int));
+ segmenti_antiacceptor_knowni = (int *) MALLOC((querylength+1)*sizeof(int));
+ }
+#else
+ segmenti_donor_knownpos = (int *) MALLOC((querylength+1)*sizeof(int));
+ segmentj_acceptor_knownpos = (int *) MALLOC((querylength+1)*sizeof(int));
+ segmentj_antidonor_knownpos = (int *) MALLOC((querylength+1)*sizeof(int));
+ segmenti_antiacceptor_knownpos = (int *) MALLOC((querylength+1)*sizeof(int));
+ segmenti_donor_knowni = (int *) MALLOC((querylength+1)*sizeof(int));
+ segmentj_acceptor_knowni = (int *) MALLOC((querylength+1)*sizeof(int));
+ segmentj_antidonor_knowni = (int *) MALLOC((querylength+1)*sizeof(int));
+ segmenti_antiacceptor_knowni = (int *) MALLOC((querylength+1)*sizeof(int));
+#endif
}
+
nsame = ndiff = 0;
querystart_diff = querylength;
queryend_diff = 0;
@@ -3793,11 +3817,20 @@ solve_twopart (int *found_score, List_T *subs, List_T *indels, List_T *ambiguous
debug7(printf("same is at %u from %d to %d\n",left,querystart_same,queryend_same));
n = Uintlist_length(difflist);
+#ifdef HAVE_ALLOCA
#ifdef USE_QSORT
array = (UINT4 *) MALLOCA(n * sizeof(UINT4));
#else
array = (UINT4 *) MALLOCA((n + 1) * sizeof(UINT4));
#endif
+#else
+#ifdef USE_QSORT
+ array = (UINT4 *) MALLOC(n * sizeof(UINT4));
+#else
+ array = (UINT4 *) MALLOC((n + 1) * sizeof(UINT4));
+#endif
+#endif
+
Uintlist_fill_array_and_free(array,&difflist);
#ifdef USE_QSORT
qsort(array,n,sizeof(Univcoord_T),Univcoord_compare);
@@ -4001,8 +4034,8 @@ solve_twopart (int *found_score, List_T *subs, List_T *indels, List_T *ambiguous
Substring_genomicstart(Stage3end_substring_donor(hit)),Substring_genomicend(Stage3end_substring_donor(hit)),
Substring_match_length_orig(Stage3end_substring_acceptor(hit)),
Substring_genomicstart(Stage3end_substring_acceptor(hit)),Substring_genomicend(Stage3end_substring_acceptor(hit)),
- Stage3end_nmismatches_whole(hit),Substring_chimera_prob(Stage3end_substring_donor(hit)),
- Substring_chimera_prob(Stage3end_substring_acceptor(hit))));
+ Stage3end_nmismatches_whole(hit),Substring_siteD_prob(Stage3end_substring_donor(hit)),
+ Substring_siteA_prob(Stage3end_substring_acceptor(hit))));
if ((nmismatches = Stage3end_nmismatches_whole(hit)) < best_nmismatches) {
best_nmismatches = nmismatches;
}
@@ -4018,8 +4051,8 @@ solve_twopart (int *found_score, List_T *subs, List_T *indels, List_T *ambiguous
if (Stage3end_nmismatches_whole(hit) <= best_nmismatches + LOCALSPLICING_NMATCHES_SLOP &&
Stage3end_chimera_prob(hit) >= best_prob - LOCALSPLICING_PROB_SLOP) {
debug7(printf("accepting distance %d, probabilities %f and %f\n",
- Stage3end_distance(hit),Substring_chimera_prob(Stage3end_substring_donor(hit)),
- Substring_chimera_prob(Stage3end_substring_acceptor(hit))));
+ Stage3end_distance(hit),Substring_siteD_prob(Stage3end_substring_donor(hit)),
+ Substring_siteA_prob(Stage3end_substring_acceptor(hit))));
n_good_spliceends += 1;
accepted_hits = List_push(accepted_hits,(void *) hit);
} else {
@@ -4035,8 +4068,8 @@ solve_twopart (int *found_score, List_T *subs, List_T *indels, List_T *ambiguous
if (Stage3end_nmismatches_whole(hit) <= best_nmismatches + LOCALSPLICING_NMATCHES_SLOP ||
Stage3end_chimera_prob(hit) >= best_prob - LOCALSPLICING_PROB_SLOP) {
debug7(printf("accepting distance %d, probabilities %f and %f\n",
- Stage3end_distance(hit),Substring_chimera_prob(Stage3end_substring_donor(hit)),
- Substring_chimera_prob(Stage3end_substring_acceptor(hit))));
+ Stage3end_distance(hit),Substring_siteD_prob(Stage3end_substring_donor(hit)),
+ Substring_siteA_prob(Stage3end_substring_acceptor(hit))));
n_good_spliceends += 1;
accepted_hits = List_push(accepted_hits,(void *) hit);
} else {
@@ -4112,17 +4145,17 @@ solve_twopart (int *found_score, List_T *subs, List_T *indels, List_T *ambiguous
for (k = i; k < j; k++) {
acceptor = Stage3end_substring_acceptor(hitarray[k]);
#ifdef LARGE_GENOMES
- ambcoords = Uint8list_push(ambcoords,Substring_splicecoord(acceptor));
+ ambcoords = Uint8list_push(ambcoords,Substring_splicecoord_A(acceptor));
#else
- ambcoords = Uintlist_push(ambcoords,Substring_splicecoord(acceptor));
+ ambcoords = Uintlist_push(ambcoords,Substring_splicecoord_A(acceptor));
#endif
amb_knowni = Intlist_push(amb_knowni,-1);
amb_nmismatches = Intlist_push(amb_nmismatches,Substring_nmismatches_whole(acceptor));
- amb_probs = Doublelist_push(amb_probs,Substring_chimera_prob(acceptor));
+ amb_probs = Doublelist_push(amb_probs,Substring_siteA_prob(acceptor));
}
nmismatches_acceptor = best_nmismatches - Substring_nmismatches_whole(donor);
- prob = best_prob - Substring_chimera_prob(donor);
+ prob = best_prob - Substring_siteD_prob(donor);
*ambiguous = List_push(*ambiguous,
(void *) Stage3end_new_splice(&(*found_score),
/*nmismatches_donor*/Substring_nmismatches_whole(donor),nmismatches_acceptor,
@@ -4175,17 +4208,17 @@ solve_twopart (int *found_score, List_T *subs, List_T *indels, List_T *ambiguous
for (k = i; k < j; k++) {
donor = Stage3end_substring_donor(hitarray[k]);
#ifdef LARGE_GENOMES
- ambcoords = Uint8list_push(ambcoords,Substring_splicecoord(donor));
+ ambcoords = Uint8list_push(ambcoords,Substring_splicecoord_D(donor));
#else
- ambcoords = Uintlist_push(ambcoords,Substring_splicecoord(donor));
+ ambcoords = Uintlist_push(ambcoords,Substring_splicecoord_D(donor));
#endif
amb_knowni = Intlist_push(amb_knowni,-1);
amb_nmismatches = Intlist_push(amb_nmismatches,Substring_nmismatches_whole(donor));
- amb_probs = Doublelist_push(amb_probs,Substring_chimera_prob(donor));
+ amb_probs = Doublelist_push(amb_probs,Substring_siteD_prob(donor));
}
nmismatches_donor = best_nmismatches - Substring_nmismatches_whole(acceptor);
- prob = best_prob - Substring_chimera_prob(acceptor);
+ prob = best_prob - Substring_siteA_prob(acceptor);
*ambiguous = List_push(*ambiguous,
(void *) Stage3end_new_splice(&(*found_score),
nmismatches_donor,/*nmismatches_acceptor*/Substring_nmismatches_whole(acceptor),
@@ -4230,8 +4263,8 @@ solve_twopart (int *found_score, List_T *subs, List_T *indels, List_T *ambiguous
Substring_genomicstart(Stage3end_substring_donor(hit)),Substring_genomicend(Stage3end_substring_donor(hit)),
Substring_match_length_orig(Stage3end_substring_acceptor(hit)),
Substring_genomicstart(Stage3end_substring_acceptor(hit)),Substring_genomicend(Stage3end_substring_acceptor(hit)),
- Stage3end_nmismatches_whole(hit),Substring_chimera_prob(Stage3end_substring_donor(hit)),
- Substring_chimera_prob(Stage3end_substring_acceptor(hit))));
+ Stage3end_nmismatches_whole(hit),Substring_siteD_prob(Stage3end_substring_donor(hit)),
+ Substring_siteA_prob(Stage3end_substring_acceptor(hit))));
if ((nmismatches = Stage3end_nmismatches_whole(hit)) < best_nmismatches) {
best_nmismatches = nmismatches;
}
@@ -4249,8 +4282,8 @@ solve_twopart (int *found_score, List_T *subs, List_T *indels, List_T *ambiguous
debug7(printf("accepting distance %d, donor length %d and acceptor length %d, probabilities %f and %f\n",
Stage3end_distance(hit),Substring_match_length_orig(Stage3end_substring_donor(hit)),
Substring_match_length_orig(Stage3end_substring_acceptor(hit)),
- Substring_chimera_prob(Stage3end_substring_donor(hit)),
- Substring_chimera_prob(Stage3end_substring_acceptor(hit))));
+ Substring_siteD_prob(Stage3end_substring_donor(hit)),
+ Substring_siteA_prob(Stage3end_substring_acceptor(hit))));
n_good_spliceends += 1;
accepted_hits = List_push(accepted_hits,(void *) hit);
} else {
@@ -4268,8 +4301,8 @@ solve_twopart (int *found_score, List_T *subs, List_T *indels, List_T *ambiguous
debug7(printf("accepting distance %d, donor length %d and acceptor length %d, probabilities %f and %f\n",
Stage3end_distance(hit),Substring_match_length_orig(Stage3end_substring_donor(hit)),
Substring_match_length_orig(Stage3end_substring_acceptor(hit)),
- Substring_chimera_prob(Stage3end_substring_donor(hit)),
- Substring_chimera_prob(Stage3end_substring_acceptor(hit))));
+ Substring_siteD_prob(Stage3end_substring_donor(hit)),
+ Substring_siteA_prob(Stage3end_substring_acceptor(hit))));
n_good_spliceends += 1;
accepted_hits = List_push(accepted_hits,(void *) hit);
} else {
@@ -4345,17 +4378,17 @@ solve_twopart (int *found_score, List_T *subs, List_T *indels, List_T *ambiguous
for (k = i; k < j; k++) {
acceptor = Stage3end_substring_acceptor(hitarray[k]);
#ifdef LARGE_GENOMES
- ambcoords = Uint8list_push(ambcoords,Substring_splicecoord(acceptor));
+ ambcoords = Uint8list_push(ambcoords,Substring_splicecoord_A(acceptor));
#else
- ambcoords = Uintlist_push(ambcoords,Substring_splicecoord(acceptor));
+ ambcoords = Uintlist_push(ambcoords,Substring_splicecoord_A(acceptor));
#endif
amb_knowni = Intlist_push(amb_knowni,-1);
amb_nmismatches = Intlist_push(amb_nmismatches,Substring_nmismatches_whole(acceptor));
- amb_probs = Doublelist_push(amb_probs,Substring_chimera_prob(acceptor));
+ amb_probs = Doublelist_push(amb_probs,Substring_siteA_prob(acceptor));
}
nmismatches_acceptor = best_nmismatches - Substring_nmismatches_whole(donor);
- prob = best_prob - Substring_chimera_prob(donor);
+ prob = best_prob - Substring_siteD_prob(donor);
*ambiguous = List_push(*ambiguous,
(void *) Stage3end_new_splice(&(*found_score),
/*nmismatches_donor*/Substring_nmismatches_whole(donor),nmismatches_acceptor,
@@ -4408,17 +4441,17 @@ solve_twopart (int *found_score, List_T *subs, List_T *indels, List_T *ambiguous
for (k = i; k < j; k++) {
donor = Stage3end_substring_donor(hitarray[k]);
#ifdef LARGE_GENOMES
- ambcoords = Uint8list_push(ambcoords,Substring_splicecoord(donor));
+ ambcoords = Uint8list_push(ambcoords,Substring_splicecoord_D(donor));
#else
- ambcoords = Uintlist_push(ambcoords,Substring_splicecoord(donor));
+ ambcoords = Uintlist_push(ambcoords,Substring_splicecoord_D(donor));
#endif
amb_knowni = Intlist_push(amb_knowni,-1);
amb_nmismatches = Intlist_push(amb_nmismatches,Substring_nmismatches_whole(donor));
- amb_probs = Doublelist_push(amb_probs,Substring_chimera_prob(donor));
+ amb_probs = Doublelist_push(amb_probs,Substring_siteD_prob(donor));
}
nmismatches_donor = best_nmismatches - Substring_nmismatches_whole(acceptor);
- prob = best_prob - Substring_chimera_prob(acceptor);
+ prob = best_prob - Substring_siteA_prob(acceptor);
*ambiguous = List_push(*ambiguous,
(void *) Stage3end_new_splice(&(*found_score),
nmismatches_donor,/*nmismatches_acceptor*/Substring_nmismatches_whole(acceptor),
@@ -4460,7 +4493,11 @@ solve_twopart (int *found_score, List_T *subs, List_T *indels, List_T *ambiguous
}
List_free(&lowprob);
+#ifdef HAVE_ALLOCA
FREEA(array);
+#else
+ FREE(array);
+#endif
} else if (querystart_diff == 0 && queryend_same == querylength - 1) {
left2 = left;
@@ -4468,11 +4505,20 @@ solve_twopart (int *found_score, List_T *subs, List_T *indels, List_T *ambiguous
debug7(printf("same is at %u from %d to %d\n",left,querystart_same,queryend_same));
n = Uintlist_length(difflist);
+#ifdef HAVE_ALLOCA
#ifdef USE_QSORT
array = (UINT4 *) MALLOCA(n * sizeof(UINT4));
#else
array = (UINT4 *) MALLOCA((n + 1) * sizeof(UINT4));
#endif
+#else
+#ifdef USE_QSORT
+ array = (UINT4 *) MALLOC(n * sizeof(UINT4));
+#else
+ array = (UINT4 *) MALLOC((n + 1) * sizeof(UINT4));
+#endif
+#endif
+
Uintlist_fill_array_and_free(array,&difflist);
#ifdef USE_QSORT
qsort(array,n,sizeof(Univcoord_T),Univcoord_compare);
@@ -4665,8 +4711,8 @@ solve_twopart (int *found_score, List_T *subs, List_T *indels, List_T *ambiguous
Substring_genomicstart(Stage3end_substring_donor(hit)),Substring_genomicend(Stage3end_substring_donor(hit)),
Substring_match_length_orig(Stage3end_substring_acceptor(hit)),
Substring_genomicstart(Stage3end_substring_acceptor(hit)),Substring_genomicend(Stage3end_substring_acceptor(hit)),
- Stage3end_nmismatches_whole(hit),Substring_chimera_prob(Stage3end_substring_donor(hit)),
- Substring_chimera_prob(Stage3end_substring_acceptor(hit))));
+ Stage3end_nmismatches_whole(hit),Substring_siteD_prob(Stage3end_substring_donor(hit)),
+ Substring_siteA_prob(Stage3end_substring_acceptor(hit))));
if ((nmismatches = Stage3end_nmismatches_whole(hit)) < best_nmismatches) {
best_nmismatches = nmismatches;
}
@@ -4682,8 +4728,8 @@ solve_twopart (int *found_score, List_T *subs, List_T *indels, List_T *ambiguous
if (Stage3end_nmismatches_whole(hit) <= best_nmismatches + LOCALSPLICING_NMATCHES_SLOP &&
Stage3end_chimera_prob(hit) >= best_prob - LOCALSPLICING_PROB_SLOP) {
debug7(printf("accepting distance %d, probabilities %f and %f\n",
- Stage3end_distance(hit),Substring_chimera_prob(Stage3end_substring_donor(hit)),
- Substring_chimera_prob(Stage3end_substring_acceptor(hit))));
+ Stage3end_distance(hit),Substring_siteD_prob(Stage3end_substring_donor(hit)),
+ Substring_siteA_prob(Stage3end_substring_acceptor(hit))));
n_good_spliceends += 1;
accepted_hits = List_push(accepted_hits,(void *) hit);
} else {
@@ -4701,8 +4747,8 @@ solve_twopart (int *found_score, List_T *subs, List_T *indels, List_T *ambiguous
debug7(printf("accepting distance %d, donor length %d and acceptor length %d, probabilities %f and %f\n",
Stage3end_distance(hit),Substring_match_length_orig(Stage3end_substring_donor(hit)),
Substring_match_length_orig(Stage3end_substring_acceptor(hit)),
- Substring_chimera_prob(Stage3end_substring_donor(hit)),
- Substring_chimera_prob(Stage3end_substring_acceptor(hit))));
+ Substring_siteD_prob(Stage3end_substring_donor(hit)),
+ Substring_siteA_prob(Stage3end_substring_acceptor(hit))));
n_good_spliceends += 1;
accepted_hits = List_push(accepted_hits,(void *) hit);
} else {
@@ -4778,17 +4824,17 @@ solve_twopart (int *found_score, List_T *subs, List_T *indels, List_T *ambiguous
for (k = i; k < j; k++) {
acceptor = Stage3end_substring_acceptor(hitarray[k]);
#ifdef LARGE_GENOMES
- ambcoords = Uint8list_push(ambcoords,Substring_splicecoord(acceptor));
+ ambcoords = Uint8list_push(ambcoords,Substring_splicecoord_A(acceptor));
#else
- ambcoords = Uintlist_push(ambcoords,Substring_splicecoord(acceptor));
+ ambcoords = Uintlist_push(ambcoords,Substring_splicecoord_A(acceptor));
#endif
amb_knowni = Intlist_push(amb_knowni,-1);
amb_nmismatches = Intlist_push(amb_nmismatches,Substring_nmismatches_whole(acceptor));
- amb_probs = Doublelist_push(amb_probs,Substring_chimera_prob(acceptor));
+ amb_probs = Doublelist_push(amb_probs,Substring_siteA_prob(acceptor));
}
nmismatches_acceptor = best_nmismatches - Substring_nmismatches_whole(donor);
- prob = best_prob - Substring_chimera_prob(donor);
+ prob = best_prob - Substring_siteD_prob(donor);
*ambiguous = List_push(*ambiguous,
(void *) Stage3end_new_splice(&(*found_score),
/*nmismatches_donor*/Substring_nmismatches_whole(donor),nmismatches_acceptor,
@@ -4842,17 +4888,17 @@ solve_twopart (int *found_score, List_T *subs, List_T *indels, List_T *ambiguous
for (k = i; k < j; k++) {
donor = Stage3end_substring_donor(hitarray[k]);
#ifdef LARGE_GENOMES
- ambcoords = Uint8list_push(ambcoords,Substring_splicecoord(donor));
+ ambcoords = Uint8list_push(ambcoords,Substring_splicecoord_D(donor));
#else
- ambcoords = Uintlist_push(ambcoords,Substring_splicecoord(donor));
+ ambcoords = Uintlist_push(ambcoords,Substring_splicecoord_D(donor));
#endif
amb_knowni = Intlist_push(amb_knowni,-1);
amb_nmismatches = Intlist_push(amb_nmismatches,Substring_nmismatches_whole(donor));
- amb_probs = Doublelist_push(amb_probs,Substring_chimera_prob(donor));
+ amb_probs = Doublelist_push(amb_probs,Substring_siteD_prob(donor));
}
nmismatches_donor = best_nmismatches - Substring_nmismatches_whole(acceptor);
- prob = best_prob - Substring_chimera_prob(acceptor);
+ prob = best_prob - Substring_siteA_prob(acceptor);
*ambiguous = List_push(*ambiguous,
(void *) Stage3end_new_splice(&(*found_score),
nmismatches_donor,/*nmismatches_acceptor*/Substring_nmismatches_whole(acceptor),
@@ -4897,8 +4943,8 @@ solve_twopart (int *found_score, List_T *subs, List_T *indels, List_T *ambiguous
Substring_genomicstart(Stage3end_substring_donor(hit)),Substring_genomicend(Stage3end_substring_donor(hit)),
Substring_match_length_orig(Stage3end_substring_acceptor(hit)),
Substring_genomicstart(Stage3end_substring_acceptor(hit)),Substring_genomicend(Stage3end_substring_acceptor(hit)),
- Stage3end_nmismatches_whole(hit),Substring_chimera_prob(Stage3end_substring_donor(hit)),
- Substring_chimera_prob(Stage3end_substring_acceptor(hit))));
+ Stage3end_nmismatches_whole(hit),Substring_siteD_prob(Stage3end_substring_donor(hit)),
+ Substring_siteA_prob(Stage3end_substring_acceptor(hit))));
if ((nmismatches = Stage3end_nmismatches_whole(hit)) < best_nmismatches) {
best_nmismatches = nmismatches;
}
@@ -4914,8 +4960,8 @@ solve_twopart (int *found_score, List_T *subs, List_T *indels, List_T *ambiguous
if (Stage3end_nmismatches_whole(hit) <= best_nmismatches + LOCALSPLICING_NMATCHES_SLOP &&
Stage3end_chimera_prob(hit) >= best_prob - LOCALSPLICING_PROB_SLOP) {
debug7(printf("accepting distance %d, probabilities %f and %f\n",
- Stage3end_distance(hit),Substring_chimera_prob(Stage3end_substring_donor(hit)),
- Substring_chimera_prob(Stage3end_substring_acceptor(hit))));
+ Stage3end_distance(hit),Substring_siteD_prob(Stage3end_substring_donor(hit)),
+ Substring_siteA_prob(Stage3end_substring_acceptor(hit))));
n_good_spliceends += 1;
accepted_hits = List_push(accepted_hits,(void *) hit);
} else {
@@ -4933,8 +4979,8 @@ solve_twopart (int *found_score, List_T *subs, List_T *indels, List_T *ambiguous
debug7(printf("accepting distance %d, donor length %d and acceptor length %d, probabilities %f and %f\n",
Stage3end_distance(hit),Substring_match_length_orig(Stage3end_substring_donor(hit)),
Substring_match_length_orig(Stage3end_substring_acceptor(hit)),
- Substring_chimera_prob(Stage3end_substring_donor(hit)),
- Substring_chimera_prob(Stage3end_substring_acceptor(hit))));
+ Substring_siteD_prob(Stage3end_substring_donor(hit)),
+ Substring_siteA_prob(Stage3end_substring_acceptor(hit))));
n_good_spliceends += 1;
accepted_hits = List_push(accepted_hits,(void *) hit);
} else {
@@ -5010,17 +5056,17 @@ solve_twopart (int *found_score, List_T *subs, List_T *indels, List_T *ambiguous
for (k = i; k < j; k++) {
acceptor = Stage3end_substring_acceptor(hitarray[k]);
#ifdef LARGE_GENOMES
- ambcoords = Uint8list_push(ambcoords,Substring_splicecoord(acceptor));
+ ambcoords = Uint8list_push(ambcoords,Substring_splicecoord_A(acceptor));
#else
- ambcoords = Uintlist_push(ambcoords,Substring_splicecoord(acceptor));
+ ambcoords = Uintlist_push(ambcoords,Substring_splicecoord_A(acceptor));
#endif
amb_knowni = Intlist_push(amb_knowni,-1);
amb_nmismatches = Intlist_push(amb_nmismatches,Substring_nmismatches_whole(acceptor));
- amb_probs = Doublelist_push(amb_probs,Substring_chimera_prob(acceptor));
+ amb_probs = Doublelist_push(amb_probs,Substring_siteA_prob(acceptor));
}
nmismatches_acceptor = best_nmismatches - Substring_nmismatches_whole(donor);
- prob = best_prob - Substring_chimera_prob(donor);
+ prob = best_prob - Substring_siteD_prob(donor);
*ambiguous = List_push(*ambiguous,
(void *) Stage3end_new_splice(&(*found_score),
/*nmismatches_donor*/Substring_nmismatches_whole(donor),nmismatches_acceptor,
@@ -5073,17 +5119,17 @@ solve_twopart (int *found_score, List_T *subs, List_T *indels, List_T *ambiguous
for (k = i; k < j; k++) {
donor = Stage3end_substring_donor(hitarray[k]);
#ifdef LARGE_GENOMES
- ambcoords = Uint8list_push(ambcoords,Substring_splicecoord(donor));
+ ambcoords = Uint8list_push(ambcoords,Substring_splicecoord_D(donor));
#else
- ambcoords = Uintlist_push(ambcoords,Substring_splicecoord(donor));
+ ambcoords = Uintlist_push(ambcoords,Substring_splicecoord_D(donor));
#endif
amb_knowni = Intlist_push(amb_knowni,-1);
amb_nmismatches = Intlist_push(amb_nmismatches,Substring_nmismatches_whole(donor));
- amb_probs = Doublelist_push(amb_probs,Substring_chimera_prob(donor));
+ amb_probs = Doublelist_push(amb_probs,Substring_siteD_prob(donor));
}
nmismatches_donor = best_nmismatches - Substring_nmismatches_whole(acceptor);
- prob = best_prob - Substring_chimera_prob(acceptor);
+ prob = best_prob - Substring_siteA_prob(acceptor);
*ambiguous = List_push(*ambiguous,
(void *) Stage3end_new_splice(&(*found_score),
nmismatches_donor,/*nmismatches_acceptor*/Substring_nmismatches_whole(acceptor),
@@ -5126,12 +5172,48 @@ solve_twopart (int *found_score, List_T *subs, List_T *indels, List_T *ambiguous
}
List_free(&lowprob);
+#ifdef HAVE_ALLOCA
FREEA(array);
+#else
+ FREE(array);
+#endif
} else {
Uintlist_free(&difflist);
}
+
+#ifdef HAVE_ALLOCA
+ if (querylength <= MAX_STACK_READLENGTH) {
+ FREEA(segmenti_donor_knownpos);
+ FREEA(segmentj_acceptor_knownpos);
+ FREEA(segmentj_antidonor_knownpos);
+ FREEA(segmenti_antiacceptor_knownpos);
+ FREEA(segmenti_donor_knowni);
+ FREEA(segmentj_acceptor_knowni);
+ FREEA(segmentj_antidonor_knowni);
+ FREEA(segmenti_antiacceptor_knowni);
+ } else {
+ FREE(segmenti_donor_knownpos);
+ FREE(segmentj_acceptor_knownpos);
+ FREE(segmentj_antidonor_knownpos);
+ FREE(segmenti_antiacceptor_knownpos);
+ FREE(segmenti_donor_knowni);
+ FREE(segmentj_acceptor_knowni);
+ FREE(segmentj_antidonor_knowni);
+ FREE(segmenti_antiacceptor_knowni);
+ }
+#else
+ FREE(segmenti_donor_knownpos);
+ FREE(segmentj_acceptor_knownpos);
+ FREE(segmentj_antidonor_knownpos);
+ FREE(segmenti_antiacceptor_knownpos);
+ FREE(segmenti_donor_knowni);
+ FREE(segmentj_acceptor_knowni);
+ FREE(segmentj_antidonor_knowni);
+ FREE(segmenti_antiacceptor_knowni);
+#endif
+
return twopartp;
}
#endif
@@ -5324,9 +5406,15 @@ get_diagonals (Univdiag_T *middle_diagonal, List_T *best_right_diagonals, List_T
#ifdef SUBDIVIDE_NOMATCHES
/* Try to subdivide elts that have no matches */
+#ifdef HAVE_ALLOCA
coveredp = (bool *) CALLOCA(querylength,sizeof(bool));
- mappings = (Chrpos_T **) MALLOCA(querylength * sizeof(Chrpos_T *));
+ mappings = (Chrpos_T **) ALLOCA(querylength * sizeof(Chrpos_T *));
npositions = (int *) CALLOCA(querylength,sizeof(int));
+#else
+ coveredp = (bool *) CALLOC(querylength,sizeof(bool));
+ mappings = (Chrpos_T **) MALLOC(querylength * sizeof(Chrpos_T *));
+ npositions = (int *) CALLOC(querylength,sizeof(int));
+#endif
oligoindex = Oligoindex_array_elt(oligoindices_minor,/*source*/0);
indexsize = Oligoindex_indexsize(oligoindex);
@@ -5936,23 +6024,41 @@ find_best_path (List_T *right_paths, Intlist_T *right_endpoints_sense, Intlist_T
int segmenti_donor_nknown, segmentj_acceptor_nknown,
segmentj_antidonor_nknown, segmenti_antiacceptor_nknown;
+ int *segmenti_donor_knownpos, *segmentj_acceptor_knownpos, *segmentj_antidonor_knownpos, *segmenti_antiacceptor_knownpos,
+ *segmenti_donor_knowni, *segmentj_acceptor_knowni, *segmentj_antidonor_knowni, *segmenti_antiacceptor_knowni;
+ int j;
+
#ifdef HAVE_ALLOCA
- int *segmenti_donor_knownpos = (int *) ALLOCA((querylength+1)*sizeof(int));
- int *segmentj_acceptor_knownpos = (int *) ALLOCA((querylength+1)*sizeof(int));
- int *segmentj_antidonor_knownpos = (int *) ALLOCA((querylength+1)*sizeof(int));
- int *segmenti_antiacceptor_knownpos = (int *) ALLOCA((querylength+1)*sizeof(int));
- int *segmenti_donor_knowni = (int *) ALLOCA((querylength+1)*sizeof(int));
- int *segmentj_acceptor_knowni = (int *) ALLOCA((querylength+1)*sizeof(int));
- int *segmentj_antidonor_knowni = (int *) ALLOCA((querylength+1)*sizeof(int));
- int *segmenti_antiacceptor_knowni = (int *) ALLOCA((querylength+1)*sizeof(int));
+ if (querylength <= MAX_STACK_READLENGTH) {
+ segmenti_donor_knownpos = (int *) ALLOCA((querylength+1)*sizeof(int));
+ segmentj_acceptor_knownpos = (int *) ALLOCA((querylength+1)*sizeof(int));
+ segmentj_antidonor_knownpos = (int *) ALLOCA((querylength+1)*sizeof(int));
+ segmenti_antiacceptor_knownpos = (int *) ALLOCA((querylength+1)*sizeof(int));
+ segmenti_donor_knowni = (int *) ALLOCA((querylength+1)*sizeof(int));
+ segmentj_acceptor_knowni = (int *) ALLOCA((querylength+1)*sizeof(int));
+ segmentj_antidonor_knowni = (int *) ALLOCA((querylength+1)*sizeof(int));
+ segmenti_antiacceptor_knowni = (int *) ALLOCA((querylength+1)*sizeof(int));
+ } else {
+ segmenti_donor_knownpos = (int *) MALLOC((querylength+1)*sizeof(int));
+ segmentj_acceptor_knownpos = (int *) MALLOC((querylength+1)*sizeof(int));
+ segmentj_antidonor_knownpos = (int *) MALLOC((querylength+1)*sizeof(int));
+ segmenti_antiacceptor_knownpos = (int *) MALLOC((querylength+1)*sizeof(int));
+ segmenti_donor_knowni = (int *) MALLOC((querylength+1)*sizeof(int));
+ segmentj_acceptor_knowni = (int *) MALLOC((querylength+1)*sizeof(int));
+ segmentj_antidonor_knowni = (int *) MALLOC((querylength+1)*sizeof(int));
+ segmenti_antiacceptor_knowni = (int *) MALLOC((querylength+1)*sizeof(int));
+ }
#else
- int segmenti_donor_knownpos[MAX_READLENGTH+1], segmentj_acceptor_knownpos[MAX_READLENGTH+1],
- segmentj_antidonor_knownpos[MAX_READLENGTH+1], segmenti_antiacceptor_knownpos[MAX_READLENGTH+1];
- int segmenti_donor_knowni[MAX_READLENGTH+1], segmentj_acceptor_knowni[MAX_READLENGTH+1],
- segmentj_antidonor_knowni[MAX_READLENGTH+1], segmenti_antiacceptor_knowni[MAX_READLENGTH+1];
+ segmenti_donor_knownpos = (int *) MALLOC((querylength+1)*sizeof(int));
+ segmentj_acceptor_knownpos = (int *) MALLOC((querylength+1)*sizeof(int));
+ segmentj_antidonor_knownpos = (int *) MALLOC((querylength+1)*sizeof(int));
+ segmenti_antiacceptor_knownpos = (int *) MALLOC((querylength+1)*sizeof(int));
+ segmenti_donor_knowni = (int *) MALLOC((querylength+1)*sizeof(int));
+ segmentj_acceptor_knowni = (int *) MALLOC((querylength+1)*sizeof(int));
+ segmentj_antidonor_knowni = (int *) MALLOC((querylength+1)*sizeof(int));
+ segmenti_antiacceptor_knowni = (int *) MALLOC((querylength+1)*sizeof(int));
#endif
- int j;
debug13(printf("***Entered find_best_path\n"));
@@ -6672,9 +6778,47 @@ find_best_path (List_T *right_paths, Intlist_T *right_endpoints_sense, Intlist_T
debug13(printf("***Exiting find_best_path\n"));
#ifdef SUBDIVIDE_ENDS
+#ifdef HAVE_ALLOCA
FREEA(npositions);
FREEA(coveredp);
FREEA(mappings);
+#else
+ FREE(npositions);
+ FREE(coveredp);
+ FREE(mappings);
+#endif
+#endif
+
+
+#ifdef HAVE_ALLOCA
+ if (querylength <= MAX_STACK_READLENGTH) {
+ FREEA(segmenti_donor_knownpos);
+ FREEA(segmentj_acceptor_knownpos);
+ FREEA(segmentj_antidonor_knownpos);
+ FREEA(segmenti_antiacceptor_knownpos);
+ FREEA(segmenti_donor_knowni);
+ FREEA(segmentj_acceptor_knowni);
+ FREEA(segmentj_antidonor_knowni);
+ FREEA(segmenti_antiacceptor_knowni);
+ } else {
+ FREE(segmenti_donor_knownpos);
+ FREE(segmentj_acceptor_knownpos);
+ FREE(segmentj_antidonor_knownpos);
+ FREE(segmenti_antiacceptor_knownpos);
+ FREE(segmenti_donor_knowni);
+ FREE(segmentj_acceptor_knowni);
+ FREE(segmentj_antidonor_knowni);
+ FREE(segmenti_antiacceptor_knowni);
+ }
+#else
+ FREE(segmenti_donor_knownpos);
+ FREE(segmentj_acceptor_knownpos);
+ FREE(segmentj_antidonor_knownpos);
+ FREE(segmenti_antiacceptor_knownpos);
+ FREE(segmenti_donor_knowni);
+ FREE(segmentj_acceptor_knowni);
+ FREE(segmentj_antidonor_knowni);
+ FREE(segmenti_antiacceptor_knowni);
#endif
return middle_path;
@@ -7240,22 +7384,39 @@ solve_via_segments (int *found_score, bool *completep, List_T hits, List_T middl
left_ambig_sense, left_ambig_antisense;
int segmenti_donor_nknown, segmentj_acceptor_nknown,
segmentj_antidonor_nknown, segmenti_antiacceptor_nknown;
+ int *segmenti_donor_knownpos, *segmentj_acceptor_knownpos, *segmentj_antidonor_knownpos, *segmenti_antiacceptor_knownpos,
+ *segmenti_donor_knowni, *segmentj_acceptor_knowni, *segmentj_antidonor_knowni, *segmenti_antiacceptor_knowni;
int j;
#ifdef HAVE_ALLOCA
- int *segmenti_donor_knownpos = (int *) ALLOCA((querylength+1)*sizeof(int));
- int *segmentj_acceptor_knownpos = (int *) ALLOCA((querylength+1)*sizeof(int));
- int *segmentj_antidonor_knownpos = (int *) ALLOCA((querylength+1)*sizeof(int));
- int *segmenti_antiacceptor_knownpos = (int *) ALLOCA((querylength+1)*sizeof(int));
- int *segmenti_donor_knowni = (int *) ALLOCA((querylength+1)*sizeof(int));
- int *segmentj_acceptor_knowni = (int *) ALLOCA((querylength+1)*sizeof(int));
- int *segmentj_antidonor_knowni = (int *) ALLOCA((querylength+1)*sizeof(int));
- int *segmenti_antiacceptor_knowni = (int *) ALLOCA((querylength+1)*sizeof(int));
+ if (querylength <= MAX_STACK_READLENGTH) {
+ segmenti_donor_knownpos = (int *) ALLOCA((querylength+1)*sizeof(int));
+ segmentj_acceptor_knownpos = (int *) ALLOCA((querylength+1)*sizeof(int));
+ segmentj_antidonor_knownpos = (int *) ALLOCA((querylength+1)*sizeof(int));
+ segmenti_antiacceptor_knownpos = (int *) ALLOCA((querylength+1)*sizeof(int));
+ segmenti_donor_knowni = (int *) ALLOCA((querylength+1)*sizeof(int));
+ segmentj_acceptor_knowni = (int *) ALLOCA((querylength+1)*sizeof(int));
+ segmentj_antidonor_knowni = (int *) ALLOCA((querylength+1)*sizeof(int));
+ segmenti_antiacceptor_knowni = (int *) ALLOCA((querylength+1)*sizeof(int));
+ } else {
+ segmenti_donor_knownpos = (int *) MALLOC((querylength+1)*sizeof(int));
+ segmentj_acceptor_knownpos = (int *) MALLOC((querylength+1)*sizeof(int));
+ segmentj_antidonor_knownpos = (int *) MALLOC((querylength+1)*sizeof(int));
+ segmenti_antiacceptor_knownpos = (int *) MALLOC((querylength+1)*sizeof(int));
+ segmenti_donor_knowni = (int *) MALLOC((querylength+1)*sizeof(int));
+ segmentj_acceptor_knowni = (int *) MALLOC((querylength+1)*sizeof(int));
+ segmentj_antidonor_knowni = (int *) MALLOC((querylength+1)*sizeof(int));
+ segmenti_antiacceptor_knowni = (int *) MALLOC((querylength+1)*sizeof(int));
+ }
#else
- int segmenti_donor_knownpos[MAX_READLENGTH+1], segmentj_acceptor_knownpos[MAX_READLENGTH+1],
- segmentj_antidonor_knownpos[MAX_READLENGTH+1], segmenti_antiacceptor_knownpos[MAX_READLENGTH+1];
- int segmenti_donor_knowni[MAX_READLENGTH+1], segmentj_acceptor_knowni[MAX_READLENGTH+1],
- segmentj_antidonor_knowni[MAX_READLENGTH+1], segmenti_antiacceptor_knowni[MAX_READLENGTH+1];
+ segmenti_donor_knownpos = (int *) MALLOC((querylength+1)*sizeof(int));
+ segmentj_acceptor_knownpos = (int *) MALLOC((querylength+1)*sizeof(int));
+ segmentj_antidonor_knownpos = (int *) MALLOC((querylength+1)*sizeof(int));
+ segmenti_antiacceptor_knownpos = (int *) MALLOC((querylength+1)*sizeof(int));
+ segmenti_donor_knowni = (int *) MALLOC((querylength+1)*sizeof(int));
+ segmentj_acceptor_knowni = (int *) MALLOC((querylength+1)*sizeof(int));
+ segmentj_antidonor_knowni = (int *) MALLOC((querylength+1)*sizeof(int));
+ segmenti_antiacceptor_knowni = (int *) MALLOC((querylength+1)*sizeof(int));
#endif
@@ -7638,21 +7799,21 @@ solve_via_segments (int *found_score, bool *completep, List_T hits, List_T middl
/* sense_endpoints = Intlist_push(sense_endpoints,queryend); */
if (plusp == true) {
- right_ambig_sense = Substring_new_ambig(/*querystart*/splice_pos,queryend,
- /*splice_pos*/splice_pos,querylength,
- chrnum,chroffset,chrhigh,chrlength,plusp,genestrand,
- right_ambcoords_sense,right_amb_knowni_sense,
- right_amb_nmismatchesj_sense,right_amb_probsj_sense,
- /*amb_common_prob*/Doublelist_head(right_amb_probsi_sense),
- /*amb_donor_common_p*/true,/*substring1p*/false);
+ right_ambig_sense = Substring_new_ambig_A(/*querystart*/splice_pos,queryend,
+ /*splice_pos*/splice_pos,querylength,
+ chrnum,chroffset,chrhigh,chrlength,plusp,genestrand,
+ right_ambcoords_sense,right_amb_knowni_sense,
+ right_amb_nmismatchesj_sense,right_amb_probsj_sense,
+ /*amb_common_prob*/Doublelist_head(right_amb_probsi_sense),
+ /*substring1p*/false);
} else {
- right_ambig_sense = Substring_new_ambig(/*querystart*/querylength - queryend,querylength - splice_pos,
- /*splice_pos*/querylength - splice_pos,querylength,
- chrnum,chroffset,chrhigh,chrlength,plusp,genestrand,
- right_ambcoords_sense,right_amb_knowni_sense,
- right_amb_nmismatchesj_sense,right_amb_probsj_sense,
- /*amb_common_prob*/Doublelist_head(right_amb_probsi_sense),
- /*amb_donor_common_p*/false,/*substring1p*/true);
+ right_ambig_sense = Substring_new_ambig_D(/*querystart*/querylength - queryend,querylength - splice_pos,
+ /*splice_pos*/querylength - splice_pos,querylength,
+ chrnum,chroffset,chrhigh,chrlength,plusp,genestrand,
+ right_ambcoords_sense,right_amb_knowni_sense,
+ right_amb_nmismatchesj_sense,right_amb_probsj_sense,
+ /*amb_common_prob*/Doublelist_head(right_amb_probsi_sense),
+ /*substring1p*/true);
}
}
@@ -7713,21 +7874,21 @@ solve_via_segments (int *found_score, bool *completep, List_T hits, List_T middl
/* antisense_endpoints = Intlist_push(antisense_endpoints,queryend); */
if (plusp == true) {
- right_ambig_antisense = Substring_new_ambig(/*querystart*/splice_pos,queryend,
- /*splice_pos*/splice_pos,querylength,
- chrnum,chroffset,chrhigh,chrlength,plusp,genestrand,
- right_ambcoords_antisense,right_amb_knowni_antisense,
- right_amb_nmismatchesj_antisense,right_amb_probsj_antisense,
- /*amb_common_prob*/Doublelist_head(right_amb_probsi_antisense),
- /*amb_donor_common_p*/false,/*substring1p*/false);
+ right_ambig_antisense = Substring_new_ambig_D(/*querystart*/splice_pos,queryend,
+ /*splice_pos*/splice_pos,querylength,
+ chrnum,chroffset,chrhigh,chrlength,plusp,genestrand,
+ right_ambcoords_antisense,right_amb_knowni_antisense,
+ right_amb_nmismatchesj_antisense,right_amb_probsj_antisense,
+ /*amb_common_prob*/Doublelist_head(right_amb_probsi_antisense),
+ /*substring1p*/false);
} else {
- right_ambig_antisense = Substring_new_ambig(/*querystart*/querylength - queryend,querylength - splice_pos,
- /*splice_pos*/querylength - splice_pos,querylength,
- chrnum,chroffset,chrhigh,chrlength,plusp,genestrand,
- right_ambcoords_antisense,right_amb_knowni_antisense,
- right_amb_nmismatchesj_antisense,right_amb_probsj_antisense,
- /*amb_common_prob*/Doublelist_head(right_amb_probsi_antisense),
- /*amb_donor_common_p*/true,/*substring1p*/true);
+ right_ambig_antisense = Substring_new_ambig_A(/*querystart*/querylength - queryend,querylength - splice_pos,
+ /*splice_pos*/querylength - splice_pos,querylength,
+ chrnum,chroffset,chrhigh,chrlength,plusp,genestrand,
+ right_ambcoords_antisense,right_amb_knowni_antisense,
+ right_amb_nmismatchesj_antisense,right_amb_probsj_antisense,
+ /*amb_common_prob*/Doublelist_head(right_amb_probsi_antisense),
+ /*substring1p*/true);
}
}
@@ -7836,21 +7997,21 @@ solve_via_segments (int *found_score, bool *completep, List_T hits, List_T middl
/* sense_endpoints = Intlist_push(sense_endpoints,querystart); */
if (plusp == true) {
- left_ambig_sense = Substring_new_ambig(querystart,/*queryend*/splice_pos,
- /*splice_pos*/splice_pos,querylength,
- chrnum,chroffset,chrhigh,chrlength,plusp,genestrand,
- left_ambcoords_sense,left_amb_knowni_sense,
- left_amb_nmismatchesi_sense,left_amb_probsi_sense,
- /*amb_common_prob*/Doublelist_head(left_amb_probsj_sense),
- /*amb_donor_common_p*/false,/*substring1p*/true);
+ left_ambig_sense = Substring_new_ambig_D(querystart,/*queryend*/splice_pos,
+ /*splice_pos*/splice_pos,querylength,
+ chrnum,chroffset,chrhigh,chrlength,plusp,genestrand,
+ left_ambcoords_sense,left_amb_knowni_sense,
+ left_amb_nmismatchesi_sense,left_amb_probsi_sense,
+ /*amb_common_prob*/Doublelist_head(left_amb_probsj_sense),
+ /*substring1p*/true);
} else {
- left_ambig_sense = Substring_new_ambig(querylength - splice_pos,/*queryend*/querylength - querystart,
- /*splice_pos*/querylength - splice_pos,querylength,
- chrnum,chroffset,chrhigh,chrlength,plusp,genestrand,
- left_ambcoords_sense,left_amb_knowni_sense,
- left_amb_nmismatchesi_sense,left_amb_probsi_sense,
- /*amb_common_prob*/Doublelist_head(left_amb_probsj_sense),
- /*amb_donor_common_p*/true,/*substring1p*/false);
+ left_ambig_sense = Substring_new_ambig_A(querylength - splice_pos,/*queryend*/querylength - querystart,
+ /*splice_pos*/querylength - splice_pos,querylength,
+ chrnum,chroffset,chrhigh,chrlength,plusp,genestrand,
+ left_ambcoords_sense,left_amb_knowni_sense,
+ left_amb_nmismatchesi_sense,left_amb_probsi_sense,
+ /*amb_common_prob*/Doublelist_head(left_amb_probsj_sense),
+ /*substring1p*/false);
}
}
@@ -7935,21 +8096,21 @@ solve_via_segments (int *found_score, bool *completep, List_T hits, List_T middl
/* antisense_endpoints = Intlist_push(antisense_endpoints,querystart); */
if (plusp == true) {
- left_ambig_antisense = Substring_new_ambig(querystart,/*queryend*/splice_pos,
- /*splice_pos*/splice_pos,querylength,
- chrnum,chroffset,chrhigh,chrlength,plusp,genestrand,
- left_ambcoords_antisense,left_amb_knowni_antisense,
- left_amb_nmismatchesi_antisense,left_amb_probsi_antisense,
- /*amb_common_prob*/Doublelist_head(left_amb_probsj_antisense),
- /*amb_donor_common_p*/true,/*substring1p*/true);
+ left_ambig_antisense = Substring_new_ambig_A(querystart,/*queryend*/splice_pos,
+ /*splice_pos*/splice_pos,querylength,
+ chrnum,chroffset,chrhigh,chrlength,plusp,genestrand,
+ left_ambcoords_antisense,left_amb_knowni_antisense,
+ left_amb_nmismatchesi_antisense,left_amb_probsi_antisense,
+ /*amb_common_prob*/Doublelist_head(left_amb_probsj_antisense),
+ /*substring1p*/true);
} else {
- left_ambig_antisense = Substring_new_ambig(querylength - splice_pos,/*queryend*/querylength - querystart,
- /*splice_pos*/querylength - splice_pos,querylength,
- chrnum,chroffset,chrhigh,chrlength,plusp,genestrand,
- left_ambcoords_antisense,left_amb_knowni_antisense,
- left_amb_nmismatchesi_antisense,left_amb_probsi_antisense,
- /*amb_common_prob*/Doublelist_head(left_amb_probsj_antisense),
- /*amb_donor_common_p*/false,/*substring1p*/false);
+ left_ambig_antisense = Substring_new_ambig_D(querylength - splice_pos,/*queryend*/querylength - querystart,
+ /*splice_pos*/querylength - splice_pos,querylength,
+ chrnum,chroffset,chrhigh,chrlength,plusp,genestrand,
+ left_ambcoords_antisense,left_amb_knowni_antisense,
+ left_amb_nmismatchesi_antisense,left_amb_probsi_antisense,
+ /*amb_common_prob*/Doublelist_head(left_amb_probsj_antisense),
+ /*substring1p*/false);
}
}
@@ -8131,6 +8292,37 @@ solve_via_segments (int *found_score, bool *completep, List_T hits, List_T middl
}
List_free(&super_path);
+#ifdef HAVE_ALLOCA
+ if (querylength <= MAX_STACK_READLENGTH) {
+ FREEA(segmenti_donor_knownpos);
+ FREEA(segmentj_acceptor_knownpos);
+ FREEA(segmentj_antidonor_knownpos);
+ FREEA(segmenti_antiacceptor_knownpos);
+ FREEA(segmenti_donor_knowni);
+ FREEA(segmentj_acceptor_knowni);
+ FREEA(segmentj_antidonor_knowni);
+ FREEA(segmenti_antiacceptor_knowni);
+ } else {
+ FREE(segmenti_donor_knownpos);
+ FREE(segmentj_acceptor_knownpos);
+ FREE(segmentj_antidonor_knownpos);
+ FREE(segmenti_antiacceptor_knownpos);
+ FREE(segmenti_donor_knowni);
+ FREE(segmentj_acceptor_knowni);
+ FREE(segmentj_antidonor_knowni);
+ FREE(segmenti_antiacceptor_knowni);
+ }
+#else
+ FREE(segmenti_donor_knownpos);
+ FREE(segmentj_acceptor_knownpos);
+ FREE(segmentj_antidonor_knownpos);
+ FREE(segmenti_antiacceptor_knownpos);
+ FREE(segmenti_donor_knowni);
+ FREE(segmentj_acceptor_knowni);
+ FREE(segmentj_antidonor_knowni);
+ FREE(segmenti_antiacceptor_knowni);
+#endif
+
return hits;
}
diff --git a/src/sedgesort.c b/src/sedgesort.c
index 3d7da76..6b4a397 100644
--- a/src/sedgesort.c
+++ b/src/sedgesort.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: sedgesort.c 195760 2016-08-04 00:12:04Z twu $";
+static char rcsid[] = "$Id: sedgesort.c 196273 2016-08-12 15:15:06Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
diff --git a/src/sedgesort.h b/src/sedgesort.h
index 39167de..8d3d630 100644
--- a/src/sedgesort.h
+++ b/src/sedgesort.h
@@ -1,4 +1,4 @@
-/* $Id: sedgesort.h 195760 2016-08-04 00:12:04Z twu $ */
+/* $Id: sedgesort.h 196273 2016-08-12 15:15:06Z twu $ */
#ifndef SEDGESORT_INCLUDED
#define SEDGESORT_INCLUDED
diff --git a/src/shortread.c b/src/shortread.c
index f434d24..3c57ca8 100644
--- a/src/shortread.c
+++ b/src/shortread.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: shortread.c 195760 2016-08-04 00:12:04Z twu $";
+static char rcsid[] = "$Id: shortread.c 196410 2016-08-16 15:57:57Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -200,9 +200,13 @@ Shortread_free (T *old) {
static char Header[HEADERLEN];
static char Discard[DISCARDLEN];
-static char Read1[MAX_READLENGTH+1];
-static char Read2[MAX_READLENGTH+1];
-static char Quality[MAX_READLENGTH+1];
+
+/* input_oneline() can actually read longer than this */
+#define MAX_EXPECTED_READLENGTH 300
+
+static char Read1[MAX_EXPECTED_READLENGTH+1];
+static char Read2[MAX_EXPECTED_READLENGTH+1];
+static char Quality[MAX_EXPECTED_READLENGTH+1];
/* The first element of Sequence is always the null character, to mark
@@ -1487,7 +1491,7 @@ input_oneline (int *nextchar, int *nchars, char **longstring, char *Start,
*longstring = (char *) NULL;
ptr = &(Start[0]);
- remainder = (&(Start[MAX_READLENGTH]) - ptr)/sizeof(char);
+ remainder = (&(Start[MAX_EXPECTED_READLENGTH]) - ptr)/sizeof(char);
if (*nextchar == EOF || (possible_fasta_header_p == true && (*nextchar == '>' || *nextchar == '+'))) {
debug(printf("nchars %d: EOF or > or +: Returning 0\n",*nchars));
return 0;
@@ -1534,7 +1538,7 @@ input_oneline (int *nextchar, int *nchars, char **longstring, char *Start,
debug(printf("No line feed, but not end of file. Using Intlist_T.\n"));
intlist = (Intlist_T) NULL;
i = 0;
- while (i <= MAX_READLENGTH && Start[i] != '\0') {
+ while (i <= MAX_EXPECTED_READLENGTH && Start[i] != '\0') {
debug(printf("Pushing %c\n",Start[i]));
intlist = Intlist_push_in(intlist,Start[i]);
i++;
@@ -1585,7 +1589,7 @@ input_oneline_filecontents (int *nextchar, char **longstring, char *Start,
*longstring = (char *) NULL;
ptr = &(Start[0]);
- remainder = (&(Start[MAX_READLENGTH]) - ptr)/sizeof(char);
+ remainder = (&(Start[MAX_EXPECTED_READLENGTH]) - ptr)/sizeof(char);
if (*nextchar == EOF || *nextchar == '\0' ||
(possible_fasta_header_p == true && (*nextchar == '>' || *nextchar == '+'))) {
debug(printf("EOF or > or +: Returning 0\n"));
@@ -1632,7 +1636,7 @@ input_oneline_filecontents (int *nextchar, char **longstring, char *Start,
debug(printf("No line feed, but not end of file. Using Intlist_T.\n"));
intlist = (Intlist_T) NULL;
i = 0;
- while (i <= MAX_READLENGTH && Start[i] != '\0') {
+ while (i <= MAX_EXPECTED_READLENGTH && Start[i] != '\0') {
debug(printf("Pushing %c\n",Start[i]));
intlist = Intlist_push_in(intlist,Start[i]);
i++;
@@ -1682,7 +1686,7 @@ input_oneline_gzip (int *nextchar, char **longstring, char *Start,
*longstring = (char *) NULL;
ptr = &(Start[0]);
- remainder = (&(Start[MAX_READLENGTH]) - ptr)/sizeof(char);
+ remainder = (&(Start[MAX_EXPECTED_READLENGTH]) - ptr)/sizeof(char);
if (*nextchar == EOF || (possible_fasta_header_p == true && (*nextchar == '>' || *nextchar == '+'))) {
debug(printf("EOF or > or +: Returning 0\n"));
return 0;
@@ -1731,7 +1735,7 @@ input_oneline_gzip (int *nextchar, char **longstring, char *Start,
debug(printf("No line feed, but not end of file. Using Intlist_T.\n"));
intlist = (Intlist_T) NULL;
i = 0;
- while (i <= MAX_READLENGTH && Start[i] != '\0') {
+ while (i <= MAX_EXPECTED_READLENGTH && Start[i] != '\0') {
debug(printf("Pushing %c\n",Start[i]));
intlist = Intlist_push_in(intlist,Start[i]);
i++;
@@ -1792,7 +1796,7 @@ input_oneline_bzip2 (int *nextchar, char **longstring, char *Start,
*longstring = (char *) NULL;
ptr = &(Start[0]);
- remainder = (&(Start[MAX_READLENGTH]) - ptr)/sizeof(char);
+ remainder = (&(Start[MAX_EXPECTED_READLENGTH]) - ptr)/sizeof(char);
if (*nextchar == EOF || (possible_fasta_header_p == true && (*nextchar == '>' || *nextchar == '+'))) {
debug(printf("EOF or > or +: Returning 0\n"));
return 0;
@@ -1841,7 +1845,7 @@ input_oneline_bzip2 (int *nextchar, char **longstring, char *Start,
debug(printf("No line feed, but not end of file. Using Intlist_T.\n"));
intlist = (Intlist_T) NULL;
i = 0;
- while (i <= MAX_READLENGTH && Start[i] != '\0') {
+ while (i <= MAX_EXPECTED_READLENGTH && Start[i] != '\0') {
debug(printf("Pushing %c\n",Start[i]));
intlist = Intlist_push_in(intlist,Start[i]);
i++;
diff --git a/src/splice.c b/src/splice.c
index 2755863..0c8ee0a 100644
--- a/src/splice.c
+++ b/src/splice.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: splice.c 195753 2016-08-03 23:44:46Z twu $";
+static char rcsid[] = "$Id: splice.c 196431 2016-08-16 20:19:22Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -141,15 +141,26 @@ Splice_resolve_sense (int *best_knowni_i, int *best_knowni_j,
int donori_nsites, acceptorj_nsites, antiacceptori_nsites, antidonorj_nsites;
int *donori_positions, *acceptorj_positions, *antiacceptori_positions, *antidonorj_positions;
int *donori_knowni, *acceptorj_knowni, *antiacceptori_knowni, *antidonorj_knowni;
+ int *donor_positions_alloc, *acceptor_positions_alloc, *donor_knowni_alloc, *acceptor_knowni_alloc;
+
#ifdef HAVE_ALLOCA
- int *donor_positions_alloc = (int *) alloca((querylength+1)*sizeof(int));
- int *acceptor_positions_alloc = (int *) alloca((querylength+1)*sizeof(int));
- int *donor_knowni_alloc = (int *) alloca((querylength+1)*sizeof(int));
- int *acceptor_knowni_alloc = (int *) alloca((querylength+1)*sizeof(int));
+ if (querylength <= MAX_STACK_READLENGTH) {
+ donor_positions_alloc = (int *) ALLOCA((querylength+1)*sizeof(int));
+ acceptor_positions_alloc = (int *) ALLOCA((querylength+1)*sizeof(int));
+ donor_knowni_alloc = (int *) ALLOCA((querylength+1)*sizeof(int));
+ acceptor_knowni_alloc = (int *) ALLOCA((querylength+1)*sizeof(int));
+ } else {
+ donor_positions_alloc = (int *) MALLOC((querylength+1)*sizeof(int));
+ acceptor_positions_alloc = (int *) MALLOC((querylength+1)*sizeof(int));
+ donor_knowni_alloc = (int *) MALLOC((querylength+1)*sizeof(int));
+ acceptor_knowni_alloc = (int *) MALLOC((querylength+1)*sizeof(int));
+ }
#else
- int donor_positions_alloc[MAX_READLENGTH+1], acceptor_positions_alloc[MAX_READLENGTH+1];
- int donor_knowni_alloc[MAX_READLENGTH+1], acceptor_knowni_alloc[MAX_READLENGTH+1];
+ donor_positions_alloc = (int *) MALLOC((querylength+1)*sizeof(int));
+ acceptor_positions_alloc = (int *) MALLOC((querylength+1)*sizeof(int));
+ donor_knowni_alloc = (int *) MALLOC((querylength+1)*sizeof(int));
+ acceptor_knowni_alloc = (int *) MALLOC((querylength+1)*sizeof(int));
#endif
@@ -405,6 +416,26 @@ Splice_resolve_sense (int *best_knowni_i, int *best_knowni_j,
debug1(printf("best_knowni_i is %d and best_knowni_j is %d\n",*best_knowni_i,*best_knowni_j));
+#ifdef HAVE_ALLOCA
+ if (querylength <= MAX_STACK_READLENGTH) {
+ FREEA(donor_positions_alloc);
+ FREEA(acceptor_positions_alloc);
+ FREEA(donor_knowni_alloc);
+ FREEA(acceptor_knowni_alloc);
+ } else {
+ FREE(donor_positions_alloc);
+ FREE(acceptor_positions_alloc);
+ FREE(donor_knowni_alloc);
+ FREE(acceptor_knowni_alloc);
+ }
+#else
+ FREE(donor_positions_alloc);
+ FREE(acceptor_positions_alloc);
+ FREE(donor_knowni_alloc);
+ FREE(acceptor_knowni_alloc);
+#endif
+
+
if (*best_prob_i > 0.95 && *best_prob_j > 0.70) {
debug1(printf("Returning %d with probi %f and probj %f\n",best_splice_pos,*best_prob_i,*best_prob_j));
debug1(printf("nmismatches %d and %d\n",*best_nmismatches_i,*best_nmismatches_j));
@@ -451,15 +482,26 @@ Splice_resolve_antisense (int *best_knowni_i, int *best_knowni_j,
int donori_nsites, acceptorj_nsites, antiacceptori_nsites, antidonorj_nsites;
int *donori_positions, *acceptorj_positions, *antiacceptori_positions, *antidonorj_positions;
int *donori_knowni, *acceptorj_knowni, *antiacceptori_knowni, *antidonorj_knowni;
+ int *donor_positions_alloc, *acceptor_positions_alloc, *donor_knowni_alloc, *acceptor_knowni_alloc;
+
#ifdef HAVE_ALLOCA
- int *donor_positions_alloc = (int *) alloca((querylength+1)*sizeof(int));
- int *acceptor_positions_alloc = (int *) alloca((querylength+1)*sizeof(int));
- int *donor_knowni_alloc = (int *) alloca((querylength+1)*sizeof(int));
- int *acceptor_knowni_alloc = (int *) alloca((querylength+1)*sizeof(int));
+ if (querylength <= MAX_STACK_READLENGTH) {
+ donor_positions_alloc = (int *) ALLOCA((querylength+1)*sizeof(int));
+ acceptor_positions_alloc = (int *) ALLOCA((querylength+1)*sizeof(int));
+ donor_knowni_alloc = (int *) ALLOCA((querylength+1)*sizeof(int));
+ acceptor_knowni_alloc = (int *) ALLOCA((querylength+1)*sizeof(int));
+ } else {
+ donor_positions_alloc = (int *) MALLOC((querylength+1)*sizeof(int));
+ acceptor_positions_alloc = (int *) MALLOC((querylength+1)*sizeof(int));
+ donor_knowni_alloc = (int *) MALLOC((querylength+1)*sizeof(int));
+ acceptor_knowni_alloc = (int *) MALLOC((querylength+1)*sizeof(int));
+ }
#else
- int donor_positions_alloc[MAX_READLENGTH+1], acceptor_positions_alloc[MAX_READLENGTH+1];
- int donor_knowni_alloc[MAX_READLENGTH+1], acceptor_knowni_alloc[MAX_READLENGTH+1];
+ donor_positions_alloc = (int *) MALLOC((querylength+1)*sizeof(int));
+ acceptor_positions_alloc = (int *) MALLOC((querylength+1)*sizeof(int));
+ donor_knowni_alloc = (int *) MALLOC((querylength+1)*sizeof(int));
+ acceptor_knowni_alloc = (int *) MALLOC((querylength+1)*sizeof(int));
#endif
debug1(printf("Splice_resolve_antisense: Getting genome at lefti %u and leftj %u (diff: %d), range %d..%d\n",
@@ -711,6 +753,26 @@ Splice_resolve_antisense (int *best_knowni_i, int *best_knowni_j,
}
}
+#ifdef HAVE_ALLOCA
+ if (querylength <= MAX_STACK_READLENGTH) {
+ FREEA(donor_positions_alloc);
+ FREEA(acceptor_positions_alloc);
+ FREEA(donor_knowni_alloc);
+ FREEA(acceptor_knowni_alloc);
+ } else {
+ FREE(donor_positions_alloc);
+ FREE(acceptor_positions_alloc);
+ FREE(donor_knowni_alloc);
+ FREE(acceptor_knowni_alloc);
+ }
+#else
+ FREE(donor_positions_alloc);
+ FREE(acceptor_positions_alloc);
+ FREE(donor_knowni_alloc);
+ FREE(acceptor_knowni_alloc);
+#endif
+
+
debug1(printf("best_knowni_i is %d and best_knowni_j is %d\n",*best_knowni_i,*best_knowni_j));
if (*best_prob_i > 0.95 && *best_prob_j > 0.70) {
@@ -772,18 +834,28 @@ Splice_solve_single_sense (int *found_score, int *nhits, List_T hits, List_T *lo
int donori_nsites, acceptorj_nsites, antiacceptori_nsites, antidonorj_nsites;
int *donori_positions, *acceptorj_positions, *antiacceptori_positions, *antidonorj_positions;
int *donori_knowni, *acceptorj_knowni, *antiacceptori_knowni, *antidonorj_knowni;
+ int *donor_positions_alloc, *acceptor_positions_alloc, *donor_knowni_alloc, *acceptor_knowni_alloc;
+
#ifdef HAVE_ALLOCA
- int *donor_positions_alloc = (int *) alloca((querylength+1)*sizeof(int));
- int *acceptor_positions_alloc = (int *) alloca((querylength+1)*sizeof(int));
- int *donor_knowni_alloc = (int *) alloca((querylength+1)*sizeof(int));
- int *acceptor_knowni_alloc = (int *) alloca((querylength+1)*sizeof(int));
+ if (querylength <= MAX_STACK_READLENGTH) {
+ donor_positions_alloc = (int *) ALLOCA((querylength+1)*sizeof(int));
+ acceptor_positions_alloc = (int *) ALLOCA((querylength+1)*sizeof(int));
+ donor_knowni_alloc = (int *) ALLOCA((querylength+1)*sizeof(int));
+ acceptor_knowni_alloc = (int *) ALLOCA((querylength+1)*sizeof(int));
+ } else {
+ donor_positions_alloc = (int *) MALLOC((querylength+1)*sizeof(int));
+ acceptor_positions_alloc = (int *) MALLOC((querylength+1)*sizeof(int));
+ donor_knowni_alloc = (int *) MALLOC((querylength+1)*sizeof(int));
+ acceptor_knowni_alloc = (int *) MALLOC((querylength+1)*sizeof(int));
+ }
#else
- int donor_positions_alloc[MAX_READLENGTH+1], acceptor_positions_alloc[MAX_READLENGTH+1];
- int donor_knowni_alloc[MAX_READLENGTH+1], acceptor_knowni_alloc[MAX_READLENGTH+1];
+ donor_positions_alloc = (int *) MALLOC((querylength+1)*sizeof(int));
+ acceptor_positions_alloc = (int *) MALLOC((querylength+1)*sizeof(int));
+ donor_knowni_alloc = (int *) MALLOC((querylength+1)*sizeof(int));
+ acceptor_knowni_alloc = (int *) MALLOC((querylength+1)*sizeof(int));
#endif
-
debug1(printf("Splice_solve_single: Getting genome at lefti %u and leftj %u (diff: %d)\n",
segmenti_left,segmentj_left,segmentj_left-segmenti_left));
*nhits = 0;
@@ -1084,7 +1156,7 @@ Splice_solve_single_sense (int *found_score, int *nhits, List_T hits, List_T *lo
if (sufficient1p && sufficient2p) {
*nhits += 1;
- return List_push(hits,(void *) Stage3end_new_splice(&(*found_score),best_segmenti_nmismatches,best_segmentj_nmismatches,
+ hits = List_push(hits,(void *) Stage3end_new_splice(&(*found_score),best_segmenti_nmismatches,best_segmentj_nmismatches,
donor,acceptor,best_donor_prob,best_acceptor_prob,
/*distance*/segmentj_left - segmenti_left,
/*shortdistancep*/true,splicing_penalty,querylength,
@@ -1094,14 +1166,15 @@ Splice_solve_single_sense (int *found_score, int *nhits, List_T hits, List_T *lo
/*amb_probs_donor*/NULL,/*amb_probs_acceptor*/NULL,
/*copy_donor_p*/false,/*copy_acceptor_p*/false,first_read_p,sensedir,
sarrayp));
+ /* return hits; */
} else if (subs_or_indels_p == true) {
if (donor != NULL) Substring_free(&donor);
if (acceptor != NULL) Substring_free(&acceptor);
- return hits;
+ /* return hits; */
} else if (donor_support < LOWPROB_SUPPORT || acceptor_support < LOWPROB_SUPPORT) {
if (donor != NULL) Substring_free(&donor);
if (acceptor != NULL) Substring_free(&acceptor);
- return hits;
+ /* return hits; */
} else if (sufficient1p || sufficient2p) {
*lowprob = List_push(*lowprob,
(void *) Stage3end_new_splice(&(*found_score),best_segmenti_nmismatches,best_segmentj_nmismatches,
@@ -1114,10 +1187,11 @@ Splice_solve_single_sense (int *found_score, int *nhits, List_T hits, List_T *lo
/*amb_probs_donor*/NULL,/*amb_probs_acceptor*/NULL,
/*copy_donor_p*/false,/*copy_acceptor_p*/false,first_read_p,sensedir,
sarrayp));
- return hits;
+ /* return hits; */
} else {
if (donor != NULL) Substring_free(&donor);
if (acceptor != NULL) Substring_free(&acceptor);
+ /* ? return hits; */
}
}
@@ -1154,7 +1228,7 @@ Splice_solve_single_sense (int *found_score, int *nhits, List_T hits, List_T *lo
sufficient2p = sufficient_splice_prob_local(donor_support,best_segmentj_nmismatches,best_donor_prob);
if (sufficient1p && sufficient2p) {
*nhits += 1;
- return List_push(hits,(void *) Stage3end_new_splice(&(*found_score),best_segmentj_nmismatches,best_segmenti_nmismatches,
+ hits = List_push(hits,(void *) Stage3end_new_splice(&(*found_score),best_segmentj_nmismatches,best_segmenti_nmismatches,
donor,acceptor,best_donor_prob,best_acceptor_prob,
/*distance*/segmentj_left - segmenti_left,
/*shortdistancep*/true,splicing_penalty,querylength,
@@ -1164,14 +1238,15 @@ Splice_solve_single_sense (int *found_score, int *nhits, List_T hits, List_T *lo
/*amb_probs_donor*/NULL,/*amb_probs_acceptor*/NULL,
/*copy_donor_p*/false,/*copy_acceptor_p*/false,first_read_p,sensedir,
sarrayp));
+ /* return hits; */
} else if (subs_or_indels_p == true) {
if (donor != NULL) Substring_free(&donor);
if (acceptor != NULL) Substring_free(&acceptor);
- return hits;
+ /* return hits; */
} else if (donor_support < LOWPROB_SUPPORT || acceptor_support < LOWPROB_SUPPORT) {
if (donor != NULL) Substring_free(&donor);
if (acceptor != NULL) Substring_free(&acceptor);
- return hits;
+ /* return hits; */
} else if (sufficient1p || sufficient2p) {
*lowprob = List_push(*lowprob,
(void *) Stage3end_new_splice(&(*found_score),best_segmentj_nmismatches,best_segmenti_nmismatches,
@@ -1184,18 +1259,36 @@ Splice_solve_single_sense (int *found_score, int *nhits, List_T hits, List_T *lo
/*amb_probs_donor*/NULL,/*amb_probs_acceptor*/NULL,
/*copy_donor_p*/false,/*copy_acceptor_p*/false,first_read_p,sensedir,
sarrayp));
- return hits;
+ /* return hits; */
} else {
if (donor != NULL) Substring_free(&donor);
if (acceptor != NULL) Substring_free(&acceptor);
- return hits;
+ /* ? return hits; */
}
}
}
}
}
- debug1(printf("Splice_solve_single_sense fail\n"));
+#ifdef HAVE_ALLOCA
+ if (querylength <= MAX_STACK_READLENGTH) {
+ FREEA(donor_positions_alloc);
+ FREEA(acceptor_positions_alloc);
+ FREEA(donor_knowni_alloc);
+ FREEA(acceptor_knowni_alloc);
+ } else {
+ FREE(donor_positions_alloc);
+ FREE(acceptor_positions_alloc);
+ FREE(donor_knowni_alloc);
+ FREE(acceptor_knowni_alloc);
+ }
+#else
+ FREE(donor_positions_alloc);
+ FREE(acceptor_positions_alloc);
+ FREE(donor_knowni_alloc);
+ FREE(acceptor_knowni_alloc);
+#endif
+
return hits;
}
@@ -1235,15 +1328,26 @@ Splice_solve_single_antisense (int *found_score, int *nhits, List_T hits, List_T
int donori_nsites, acceptorj_nsites, antiacceptori_nsites, antidonorj_nsites;
int *donori_positions, *acceptorj_positions, *antiacceptori_positions, *antidonorj_positions;
int *donori_knowni, *acceptorj_knowni, *antiacceptori_knowni, *antidonorj_knowni;
+ int *donor_positions_alloc, *acceptor_positions_alloc, *donor_knowni_alloc, *acceptor_knowni_alloc;
+
#ifdef HAVE_ALLOCA
- int *donor_positions_alloc = (int *) alloca((querylength+1)*sizeof(int));
- int *acceptor_positions_alloc = (int *) alloca((querylength+1)*sizeof(int));
- int *donor_knowni_alloc = (int *) alloca((querylength+1)*sizeof(int));
- int *acceptor_knowni_alloc = (int *) alloca((querylength+1)*sizeof(int));
+ if (querylength <= MAX_STACK_READLENGTH) {
+ donor_positions_alloc = (int *) ALLOCA((querylength+1)*sizeof(int));
+ acceptor_positions_alloc = (int *) ALLOCA((querylength+1)*sizeof(int));
+ donor_knowni_alloc = (int *) ALLOCA((querylength+1)*sizeof(int));
+ acceptor_knowni_alloc = (int *) ALLOCA((querylength+1)*sizeof(int));
+ } else {
+ donor_positions_alloc = (int *) MALLOC((querylength+1)*sizeof(int));
+ acceptor_positions_alloc = (int *) MALLOC((querylength+1)*sizeof(int));
+ donor_knowni_alloc = (int *) MALLOC((querylength+1)*sizeof(int));
+ acceptor_knowni_alloc = (int *) MALLOC((querylength+1)*sizeof(int));
+ }
#else
- int donor_positions_alloc[MAX_READLENGTH+1], acceptor_positions_alloc[MAX_READLENGTH+1];
- int donor_knowni_alloc[MAX_READLENGTH+1], acceptor_knowni_alloc[MAX_READLENGTH+1];
+ donor_positions_alloc = (int *) MALLOC((querylength+1)*sizeof(int));
+ acceptor_positions_alloc = (int *) MALLOC((querylength+1)*sizeof(int));
+ donor_knowni_alloc = (int *) MALLOC((querylength+1)*sizeof(int));
+ acceptor_knowni_alloc = (int *) MALLOC((querylength+1)*sizeof(int));
#endif
debug1(printf("Splice_solve_single: Getting genome at lefti %u and leftj %u (diff: %d)\n",
@@ -1546,7 +1650,7 @@ Splice_solve_single_antisense (int *found_score, int *nhits, List_T hits, List_T
if (sufficient1p && sufficient2p) {
*nhits += 1;
- return List_push(hits,(void *) Stage3end_new_splice(&(*found_score),best_segmenti_nmismatches,best_segmentj_nmismatches,
+ hits = List_push(hits,(void *) Stage3end_new_splice(&(*found_score),best_segmenti_nmismatches,best_segmentj_nmismatches,
donor,acceptor,best_donor_prob,best_acceptor_prob,
/*distance*/segmentj_left - segmenti_left,
/*shortdistancep*/true,splicing_penalty,querylength,
@@ -1556,14 +1660,15 @@ Splice_solve_single_antisense (int *found_score, int *nhits, List_T hits, List_T
/*amb_probs_donor*/NULL,/*amb_probs_acceptor*/NULL,
/*copy_donor_p*/false,/*copy_acceptor_p*/false,first_read_p,sensedir,
sarrayp));
+ /* return hits; */
} else if (subs_or_indels_p == true) {
if (donor != NULL) Substring_free(&donor);
if (acceptor != NULL) Substring_free(&acceptor);
- return hits;
+ /* return hits; */
} else if (donor_support < LOWPROB_SUPPORT || acceptor_support < LOWPROB_SUPPORT) {
if (donor != NULL) Substring_free(&donor);
if (acceptor != NULL) Substring_free(&acceptor);
- return hits;
+ /* return hits; */
} else if (sufficient1p || sufficient2p) {
*lowprob = List_push(*lowprob,
(void *) Stage3end_new_splice(&(*found_score),best_segmenti_nmismatches,best_segmentj_nmismatches,
@@ -1576,10 +1681,11 @@ Splice_solve_single_antisense (int *found_score, int *nhits, List_T hits, List_T
/*amb_probs_donor*/NULL,/*amb_probs_acceptor*/NULL,
/*copy_donor_p*/false,/*copy_acceptor_p*/false,first_read_p,sensedir,
sarrayp));
- return hits;
+ /* return hits; */
} else {
if (donor != NULL) Substring_free(&donor);
if (acceptor != NULL) Substring_free(&acceptor);
+ /* ? return hits; */
}
}
@@ -1616,7 +1722,7 @@ Splice_solve_single_antisense (int *found_score, int *nhits, List_T hits, List_T
sufficient2p = sufficient_splice_prob_local(donor_support,best_segmentj_nmismatches,best_donor_prob);
if (sufficient1p && sufficient2p) {
*nhits += 1;
- return List_push(hits,(void *) Stage3end_new_splice(&(*found_score),best_segmentj_nmismatches,best_segmenti_nmismatches,
+ hits = List_push(hits,(void *) Stage3end_new_splice(&(*found_score),best_segmentj_nmismatches,best_segmenti_nmismatches,
donor,acceptor,best_donor_prob,best_acceptor_prob,
/*distance*/segmentj_left - segmenti_left,
/*shortdistancep*/true,splicing_penalty,querylength,
@@ -1626,14 +1732,15 @@ Splice_solve_single_antisense (int *found_score, int *nhits, List_T hits, List_T
/*amb_probs_donor*/NULL,/*amb_probs_acceptor*/NULL,
/*copy_donor_p*/false,/*copy_acceptor_p*/false,first_read_p,sensedir,
sarrayp));
+ /* return hits; */
} else if (subs_or_indels_p == true) {
if (donor != NULL) Substring_free(&donor);
if (acceptor != NULL) Substring_free(&acceptor);
- return hits;
+ /* return hits; */
} else if (donor_support < LOWPROB_SUPPORT || acceptor_support < LOWPROB_SUPPORT) {
if (donor != NULL) Substring_free(&donor);
if (acceptor != NULL) Substring_free(&acceptor);
- return hits;
+ /* return hits; */
} else if (sufficient1p || sufficient2p) {
*lowprob = List_push(*lowprob,
(void *) Stage3end_new_splice(&(*found_score),best_segmentj_nmismatches,best_segmenti_nmismatches,
@@ -1646,655 +1753,39 @@ Splice_solve_single_antisense (int *found_score, int *nhits, List_T hits, List_T
/*amb_probs_donor*/NULL,/*amb_probs_acceptor*/NULL,
/*copy_donor_p*/false,/*copy_acceptor_p*/false,first_read_p,sensedir,
sarrayp));
- return hits;
+ /* return hits; */
} else {
if (donor != NULL) Substring_free(&donor);
if (acceptor != NULL) Substring_free(&acceptor);
- return hits;
+ /* ? return hits; */
}
}
}
}
}
- debug1(printf("Splice_solve_single_antisense fail\n"));
- return hits;
-}
-
-
-#if 0
-List_T
-Splice_solve_double (int *found_score, int *nhits, List_T hits, List_T *lowprob,
-
- bool *segmenti_usedp, bool *segmentm_usedp, bool *segmentj_usedp,
- Univcoord_T segmenti_left, Univcoord_T segmentm_left, Univcoord_T segmentj_left,
- Chrnum_T segmenti_chrnum, Univcoord_T segmenti_chroffset,
- Univcoord_T segmenti_chrhigh, Chrpos_T segmenti_chrlength,
- Chrnum_T segmentm_chrnum, Univcoord_T segmentm_chroffset,
- Univcoord_T segmentm_chrhigh, Chrpos_T segmentm_chrlength,
- Chrnum_T segmentj_chrnum, Univcoord_T segmentj_chroffset,
- Univcoord_T segmentj_chrhigh, Chrpos_T segmentj_chrlength,
-
- int querylength, Compress_T query_compress,
- int *segmenti_donor_knownpos, int *segmentm_acceptor_knownpos, int *segmentm_donor_knownpos, int *segmentj_acceptor_knownpos,
- int *segmentj_antidonor_knownpos, int *segmentm_antiacceptor_knownpos, int *segmentm_antidonor_knownpos, int *segmenti_antiacceptor_knownpos,
- int *segmenti_donor_knowni, int *segmentm_acceptor_knowni, int *segmentm_donor_knowni, int *segmentj_acceptor_knowni,
- int *segmentj_antidonor_knowni, int *segmentm_antiacceptor_knowni, int *segmentm_antidonor_knowni, int *segmenti_antiacceptor_knowni,
- int segmenti_donor_nknown, int segmentm_acceptor_nknown, int segmentm_donor_nknown, int segmentj_acceptor_nknown,
- int segmentj_antidonor_nknown, int segmentm_antiacceptor_nknown, int segmentm_antidonor_nknown, int segmenti_antiacceptor_nknown,
- int splicing_penalty, int max_mismatches_allowed, bool plusp, int genestrand,
- bool subs_or_indels_p, bool sarrayp) {
- Substring_T donor, shortexon, acceptor;
- int best_splice_pos_1, best_splice_pos_2, splice_pos_start, splice_pos_end, splice_pos_1, splice_pos_2;
- int i, a, b, j;
-
- int best_nmismatches, nmismatches;
- int best_segmenti_nmismatches, best_segmentm_nmismatches, best_segmentj_nmismatches,
- segmenti_nmismatches, segmentm_nmismatches, segmentj_nmismatches;
- int donor_support, acceptor_support, middle_support;
- Univcoord_T best_donor1_splicecoord, best_acceptor1_splicecoord, best_donor2_splicecoord, best_acceptor2_splicecoord;
- int best_donor1_knowni, best_acceptor1_knowni, best_donor2_knowni, best_acceptor2_knowni;
- double best_prob, best_donor1_prob, best_acceptor1_prob, best_donor2_prob, best_acceptor2_prob,
- probi, proba, probb, probj;
- bool sufficient1p, sufficient2p, sufficient3p, sufficient4p, orig_plusp, matchp;
- int sensedir;
-
- int donori_nsites, acceptora_nsites, donorb_nsites, acceptorj_nsites,
- antiacceptori_nsites, antidonora_nsites, antiacceptorb_nsites, antidonorj_nsites;
- int *donori_positions, *acceptora_positions, *donorb_positions, *acceptorj_positions,
- *antiacceptori_positions, *antidonora_positions, *antiacceptorb_positions, *antidonorj_positions;
- int *donori_knowni, *acceptora_knowni, *donorb_knowni, *acceptorj_knowni,
- *antiacceptori_knowni, *antidonora_knowni, *antiacceptorb_knowni, *antidonorj_knowni;
-
#ifdef HAVE_ALLOCA
- int *donor1_positions_alloc = (int *) alloca((querylength+1)*sizeof(int));
- int *acceptor1_positions_alloc = (int *) alloca((querylength+1)*sizeof(int));
- int *donor2_positions_alloc = (int *) alloca((querylength+1)*sizeof(int));
- int *acceptor2_positions_alloc = (int *) alloca((querylength+1)*sizeof(int));
- int *donor1_knowni_alloc = (int *) alloca((querylength+1)*sizeof(int));
- int *acceptor1_knowni_alloc = (int *) alloca((querylength+1)*sizeof(int));
- int *donor2_knowni_alloc = (int *) alloca((querylength+1)*sizeof(int));
- int *acceptor2_knowni_alloc = (int *) alloca((querylength+1)*sizeof(int));
-#else
- int donor1_positions_alloc[MAX_READLENGTH+1], acceptor1_positions_alloc[MAX_READLENGTH+1],
- donor2_positions_alloc[MAX_READLENGTH+1], acceptor2_positions_alloc[MAX_READLENGTH+1];
- int donor1_knowni_alloc[MAX_READLENGTH+1], acceptor1_knowni_alloc[MAX_READLENGTH+1],
- donor2_knowni_alloc[MAX_READLENGTH+1], acceptor2_knowni_alloc[MAX_READLENGTH+1];
-#endif
-
-
- debug2(printf("Splice_solve_double: Getting genome at lefti %u, leftm %u, and leftj %u\n",
- segmenti_left,segmentm_left,segmentj_left));
-
- *nhits = 0;
- splice_pos_start = min_shortend;
- splice_pos_end = querylength - min_shortend; /* ? off by 1, so -l 3 allows only ends of up to 2 */
-
- if (splice_pos_start <= splice_pos_end) {
- /* Originally from plus strand. No complement. */
- /* Sense (End 1 to End 2) or Antisense (End 5 to End 6) */
-
- /* Segment i */
- if (novelsplicingp && segmenti_left + splice_pos_start >= DONOR_MODEL_LEFT_MARGIN) {
- donori_nsites = Genome_donor_positions(donor1_positions_alloc,donor1_knowni_alloc,
- segmenti_donor_knownpos,segmenti_donor_knowni,
- segmenti_left,splice_pos_start,splice_pos_end);
- donori_positions = donor1_positions_alloc;
- donori_knowni = donor1_knowni_alloc;
- } else {
- donori_nsites = segmenti_donor_nknown;
- donori_positions = segmenti_donor_knownpos;
- donori_knowni = segmenti_donor_knowni;
- }
-
-#ifdef DEBUG2
- printf("Found %d donori sites:",donori_nsites);
- for (i = 0; i < donori_nsites; i++) {
- printf(" %d",donori_positions[i]);
- if (donori_knowni[i] >= 0) {
- printf(" (%d)",donori_knowni[i]);
- }
- }
- printf("\n");
-#endif
-
- /* Segment m1 */
- if (novelsplicingp && segmentm_left + splice_pos_start >= ACCEPTOR_MODEL_LEFT_MARGIN) {
- acceptora_nsites = Genome_acceptor_positions(acceptor1_positions_alloc,acceptor1_knowni_alloc,
- segmentm_acceptor_knownpos,segmentm_acceptor_knowni,
- segmentm_left,splice_pos_start,splice_pos_end);
- acceptora_positions = acceptor1_positions_alloc;
- acceptora_knowni = acceptor1_knowni_alloc;
- } else {
- acceptora_nsites = segmentm_acceptor_nknown;
- acceptora_positions = segmentm_acceptor_knownpos;
- acceptora_knowni = segmentm_acceptor_knowni;
- }
-
-#ifdef DEBUG2
- printf("Found %d acceptora sites:",acceptora_nsites);
- for (i = 0; i < acceptora_nsites; i++) {
- printf(" %d",acceptora_positions[i]);
- if (acceptora_knowni[i] >= 0) {
- printf(" (%d)",acceptora_knowni[i]);
- }
- }
- printf("\n");
-#endif
-
- /* Segment m2 */
- if (novelsplicingp && segmentm_left + splice_pos_start >= DONOR_MODEL_LEFT_MARGIN) {
- donorb_nsites = Genome_donor_positions(donor2_positions_alloc,donor2_knowni_alloc,
- segmentm_donor_knownpos,segmentm_donor_knowni,
- segmentm_left,splice_pos_start,splice_pos_end);
- donorb_positions = donor2_positions_alloc;
- donorb_knowni = donor2_knowni_alloc;
- } else {
- donorb_nsites = segmentm_donor_nknown;
- donorb_positions = segmentm_donor_knownpos;
- donorb_knowni = segmentm_donor_knowni;
- }
-
-#ifdef DEBUG2
- printf("Found %d donorb sites:",donorb_nsites);
- for (i = 0; i < donorb_nsites; i++) {
- printf(" %d",donorb_positions[i]);
- if (donorb_knowni[i] >= 0) {
- printf(" (%d)",donorb_knowni[i]);
- }
- }
- printf("\n");
-#endif
-
- /* Segment j */
- if (novelsplicingp && segmentj_left + splice_pos_start >= ACCEPTOR_MODEL_LEFT_MARGIN) {
- acceptorj_nsites = Genome_acceptor_positions(acceptor2_positions_alloc,acceptor2_knowni_alloc,
- segmentj_acceptor_knownpos,segmentj_acceptor_knowni,
- segmentj_left,splice_pos_start,splice_pos_end);
- acceptorj_positions = acceptor2_positions_alloc;
- acceptorj_knowni = acceptor2_knowni_alloc;
- } else {
- acceptorj_nsites = segmentj_acceptor_nknown;
- acceptorj_positions = segmentj_acceptor_knownpos;
- acceptorj_knowni = segmentj_acceptor_knowni;
- }
-
-#ifdef DEBUG2
- printf("Found %d acceptorj sites:",acceptorj_nsites);
- for (i = 0; i < acceptorj_nsites; i++) {
- printf(" %d",acceptorj_positions[i]);
- if (acceptorj_knowni[i] >= 0) {
- printf(" (%d)",acceptorj_knowni[i]);
- }
- }
- printf("\n");
-#endif
-
- best_nmismatches = max_mismatches_allowed;
- best_prob = 0.0;
- orig_plusp = true;
-
- i = a = b = j = 0;
- while (i < donori_nsites && a < acceptora_nsites) {
- if ((splice_pos_1 = donori_positions[i]) < acceptora_positions[a]) {
- i++;
- } else if (splice_pos_1 > acceptora_positions[a]) {
- a++;
- } else {
- while (b < donorb_nsites && donorb_positions[b] <= splice_pos_1) {
- b++;
- }
- while (j < acceptorj_nsites && acceptorj_positions[j] <= splice_pos_1) {
- j++;
- }
- matchp = false;
- while (b < donorb_nsites && j < acceptorj_nsites && matchp == false) {
- if ((splice_pos_2 = donorb_positions[b]) < acceptorj_positions[j]) {
- b++;
- } else if (splice_pos_2 > acceptorj_positions[j]) {
- j++;
- } else {
- segmenti_nmismatches = Genome_count_mismatches_substring(query_compress,/*left*/segmenti_left,/*pos5*/0,/*pos3*/splice_pos_1,
- plusp,genestrand);
- segmentm_nmismatches = Genome_count_mismatches_substring(query_compress,/*left*/segmentm_left,/*pos5*/splice_pos_1,/*pos3*/splice_pos_2,
- plusp,genestrand);
- segmentj_nmismatches = Genome_count_mismatches_substring(query_compress,/*left*/segmentj_left,/*pos5*/splice_pos_2,/*pos3*/querylength,
- plusp,genestrand);
- if ((nmismatches = segmenti_nmismatches + segmentm_nmismatches + segmentj_nmismatches) <= best_nmismatches) {
- if (donori_knowni[i] >= 0) {
- probi = 1.0; /* Needs to be 1.0 for output */
- } else {
- probi = Maxent_hr_donor_prob(segmenti_left + splice_pos_1,segmenti_chroffset);
- }
-
- if (acceptora_knowni[a] >= 0) {
- proba = 1.0; /* Needs to be 1.0 for output */
- } else {
- proba = Maxent_hr_acceptor_prob(segmentm_left + splice_pos_1,segmentm_chroffset);
- }
-
- if (donorb_knowni[b] >= 0) {
- probb = 1.0; /* Needs to be 1.0 for output */
- } else {
- probb = Maxent_hr_donor_prob(segmentm_left + splice_pos_2,segmentm_chroffset);
- }
-
- if (acceptorj_knowni[j] >= 0) {
- probj = 1.0; /* Needs to be 1.0 for output */
- } else {
- probj = Maxent_hr_acceptor_prob(segmentj_left + splice_pos_2,segmentj_chroffset);
- }
-
- debug2(
- if (plusp == true) {
- printf("plus sense splice_pos %d, %d, i.donor %f, m.acceptor %f, m.donor %f, j.acceptor %f\n",
- splice_pos_1,splice_pos_2,probi,proba,probb,probj);
- } else {
- printf("minus antisense splice_pos %d %d, i.donor %f, m.acceptor %f, m.donor %f, j.acceptor %f\n",
- splice_pos_1,splice_pos_2,probi,proba,probb,probj);
- });
-
- if (nmismatches < best_nmismatches ||
- (nmismatches == best_nmismatches && probi + proba + probb + probj > best_prob)) {
- /* Success */
- best_nmismatches = nmismatches;
- best_prob = probi + proba + probb + probj;
-
- best_donor1_splicecoord = segmenti_left + splice_pos_1;
- best_acceptor1_splicecoord = segmentm_left + splice_pos_1;
- best_donor2_splicecoord = segmentm_left + splice_pos_2;
- best_acceptor2_splicecoord = segmentj_left + splice_pos_2;
- best_donor1_knowni = donori_knowni[i];
- best_acceptor1_knowni = acceptora_knowni[a];
- best_donor2_knowni = donorb_knowni[b];
- best_acceptor2_knowni = acceptorj_knowni[j];
- best_donor1_prob = probi;
- best_acceptor1_prob = proba;
- best_donor2_prob = probb;
- best_acceptor2_prob = probj;
- best_splice_pos_1 = splice_pos_1;
- best_splice_pos_2 = splice_pos_2;
- best_segmenti_nmismatches = segmenti_nmismatches;
- best_segmentm_nmismatches = segmentm_nmismatches;
- best_segmentj_nmismatches = segmentj_nmismatches;
- }
- }
- /* b++; j++; Don't advance b or j, so next i/a can match */
- matchp = true;
- }
- }
- i++;
- a++;
- }
- }
-
-
- /* Originally from minus strand. Complement. */
- /* Antisense (End 7 to End 8) or Sense (End 3 to End 4) */
-
- /* Segment i */
- if (novelsplicingp && segmenti_left + splice_pos_start >= ACCEPTOR_MODEL_RIGHT_MARGIN) {
- antiacceptori_nsites = Genome_antiacceptor_positions(acceptor1_positions_alloc,acceptor1_knowni_alloc,
- segmenti_antiacceptor_knownpos,segmenti_antiacceptor_knowni,
- segmenti_left,splice_pos_start,splice_pos_end);
- antiacceptori_positions = acceptor1_positions_alloc;
- antiacceptori_knowni = acceptor1_knowni_alloc;
- } else {
- antiacceptori_nsites = segmenti_antiacceptor_nknown;
- antiacceptori_positions = segmenti_antiacceptor_knownpos;
- antiacceptori_knowni = segmenti_antiacceptor_knowni;
- }
-
-#ifdef DEBUG2
- printf("Found %d antiacceptori sites:",antiacceptori_nsites);
- for (i = 0; i < antiacceptori_nsites; i++) {
- printf(" %d",antiacceptori_positions[i]);
- if (antiacceptori_knowni[i] >= 0) {
- printf(" (%d)",antiacceptori_knowni[i]);
- }
- }
- printf("\n");
-#endif
-
- /* Segment m1 */
- if (novelsplicingp && segmentm_left + splice_pos_start >= DONOR_MODEL_RIGHT_MARGIN) {
- antidonora_nsites = Genome_antidonor_positions(donor1_positions_alloc,donor1_knowni_alloc,
- segmentm_antidonor_knownpos,segmentm_antidonor_knowni,
- segmentm_left,splice_pos_start,splice_pos_end);
- antidonora_positions = donor1_positions_alloc;
- antidonora_knowni = donor1_knowni_alloc;
- } else {
- antidonora_nsites = segmentm_antidonor_nknown;
- antidonora_positions = segmentm_antidonor_knownpos;
- antidonora_knowni = segmentm_antidonor_knowni;
- }
-
-#ifdef DEBUG2
- printf("Found %d antidonora sites:",antidonora_nsites);
- for (i = 0; i < antidonora_nsites; i++) {
- printf(" %d",antidonora_positions[i]);
- if (antidonora_knowni[i] >= 0) {
- printf(" (%d)",antidonora_knowni[i]);
- }
- }
- printf("\n");
-#endif
-
- /* Segment m2 */
- if (novelsplicingp && segmentm_left + splice_pos_start >= ACCEPTOR_MODEL_RIGHT_MARGIN) {
- antiacceptorb_nsites = Genome_antiacceptor_positions(acceptor2_positions_alloc,acceptor2_knowni_alloc,
- segmentm_antiacceptor_knownpos,segmentm_antiacceptor_knowni,
- segmentm_left,splice_pos_start,splice_pos_end);
- antiacceptorb_positions = acceptor2_positions_alloc;
- antiacceptorb_knowni = acceptor2_knowni_alloc;
- } else {
- antiacceptorb_nsites = segmentm_antiacceptor_nknown;
- antiacceptorb_positions = segmentm_antiacceptor_knownpos;
- antiacceptorb_knowni = segmentm_antiacceptor_knowni;
- }
-
-#ifdef DEBUG2
- printf("Found %d antiacceptorb sites:",antiacceptorb_nsites);
- for (i = 0; i < antiacceptorb_nsites; i++) {
- printf(" %d",antiacceptorb_positions[i]);
- if (antiacceptorb_knowni[i] >= 0) {
- printf(" (%d)",antiacceptorb_knowni[i]);
- }
- }
- printf("\n");
-#endif
-
- /* Segment j */
- if (novelsplicingp && segmentj_left + splice_pos_start >= DONOR_MODEL_RIGHT_MARGIN) {
- antidonorj_nsites = Genome_antidonor_positions(donor2_positions_alloc,donor2_knowni_alloc,
- segmentj_antidonor_knownpos,segmentj_antidonor_knowni,
- segmentj_left,splice_pos_start,splice_pos_end);
- antidonorj_positions = donor2_positions_alloc;
- antidonorj_knowni = donor2_knowni_alloc;
- } else {
- antidonorj_nsites = segmentj_antidonor_nknown;
- antidonorj_positions = segmentj_antidonor_knownpos;
- antidonorj_knowni = segmentj_antidonor_knowni;
- }
-
-#ifdef DEBUG2
- printf("Found %d antidonorj sites:",antidonorj_nsites);
- for (i = 0; i < antidonorj_nsites; i++) {
- printf(" %d",antidonorj_positions[i]);
- if (antidonorj_knowni[i] >= 0) {
- printf(" (%d)",antidonorj_knowni[i]);
- }
- }
- printf("\n");
-#endif
-
-
- i = a = b = j = 0;
- while (i < antiacceptori_nsites && a < antidonora_nsites) {
- if ((splice_pos_1 = antiacceptori_positions[i]) < antidonora_positions[a]) {
- i++;
- } else if (splice_pos_1 > antidonora_positions[a]) {
- a++;
- } else {
- while (b < antiacceptorb_nsites && antiacceptorb_positions[b] <= splice_pos_1) {
- b++;
- }
- while (j < antidonorj_nsites && antidonorj_positions[j] <= splice_pos_1) {
- j++;
- }
- matchp = false;
- while (b < antiacceptorb_nsites && j < antidonorj_nsites && matchp == false) {
- if ((splice_pos_2 = antiacceptorb_positions[b]) < antidonorj_positions[j]) {
- b++;
- } else if (splice_pos_2 > antidonorj_positions[j]) {
- j++;
- } else {
- segmenti_nmismatches = Genome_count_mismatches_substring(query_compress,/*left*/segmenti_left,/*pos5*/0,/*pos3*/splice_pos_1,
- plusp,genestrand);
- segmentm_nmismatches = Genome_count_mismatches_substring(query_compress,/*left*/segmentm_left,/*pos5*/splice_pos_1,/*pos3*/splice_pos_2,
- plusp,genestrand);
- segmentj_nmismatches = Genome_count_mismatches_substring(query_compress,/*left*/segmentj_left,/*pos5*/splice_pos_2,/*pos3*/querylength,
- plusp,genestrand);
-
- if ((nmismatches = segmenti_nmismatches + segmentm_nmismatches + segmentj_nmismatches) <= best_nmismatches) {
- if (antiacceptori_knowni[i] >= 0) {
- probi = 1.0; /* Needs to be 1.0 for output */
- } else {
- probi = Maxent_hr_antiacceptor_prob(segmenti_left + splice_pos_1,segmenti_chroffset);
- }
-
- if (antidonora_knowni[a] >= 0) {
- proba = 1.0; /* Needs to be 1.0 for output */
- } else {
- proba = Maxent_hr_antidonor_prob(segmentm_left + splice_pos_1,segmentm_chroffset);
- }
-
- if (antiacceptorb_knowni[b] >= 0) {
- probb = 1.0; /* Needs to be 1.0 for output */
- } else {
- probb = Maxent_hr_antiacceptor_prob(segmentm_left + splice_pos_2,segmentm_chroffset);
- }
-
- if (antidonorj_knowni[j] >= 0) {
- probj = 1.0; /* Needs to be 1.0 for output */
- } else {
- probj = Maxent_hr_antidonor_prob(segmentj_left + splice_pos_2,segmentj_chroffset);
- }
-
- debug2(
- if (plusp == true) {
- printf("plus antisense splice_pos %d, %d, i.antiacceptor %f, m.antidonor %f, m.antiacceptor %f, j.antidonor %f\n",
- splice_pos_1,splice_pos_2,probi,proba,probb,probj);
- } else {
- printf("minus sense splice_pos %d, %d, i.antiacceptor %f, m.antidonor %f, m.antiacceptor %f, j.antidonor %f\n",
- splice_pos_1,splice_pos_2,probi,proba,probb,probj);
- });
-
- if (nmismatches < best_nmismatches ||
- (nmismatches == best_nmismatches && probi + proba + probb + probj > best_prob)) {
- /* Success */
- best_nmismatches = nmismatches;
- best_prob = probi + proba + probb + probj;
-
- best_acceptor1_splicecoord = segmenti_left + splice_pos_1;
- best_donor1_splicecoord = segmentm_left + splice_pos_1;
- best_acceptor2_splicecoord = segmentm_left + splice_pos_2;
- best_donor2_splicecoord = segmentj_left + splice_pos_2;
- best_acceptor1_knowni = antiacceptori_knowni[i];
- best_donor1_knowni = antidonora_knowni[a];
- best_acceptor2_knowni = antiacceptorb_knowni[b];
- best_donor2_knowni = antidonorj_knowni[j];
- best_acceptor1_prob = probi;
- best_donor1_prob = proba;
- best_acceptor2_prob = probb;
- best_donor2_prob = probj;
- best_splice_pos_1 = splice_pos_1;
- best_splice_pos_2 = splice_pos_2;
- best_segmenti_nmismatches = segmenti_nmismatches;
- best_segmentm_nmismatches = segmentm_nmismatches;
- best_segmentj_nmismatches = segmentj_nmismatches;
- orig_plusp = false;
- }
- }
- /* b++; j++; Don't advance b or j, so next i/a can match */
- matchp = true;
- }
- }
- i++;
- a++;
- }
- }
-
-
- if (best_prob > 0.0) {
- debug2(printf("best_prob = %f at splice_pos %d and %d\n",best_prob,best_splice_pos_1,best_splice_pos_2));
- if (orig_plusp == true) {
- /* Originally from plus strand. No complement. */
- sensedir = (plusp == true) ? SENSE_FORWARD : SENSE_ANTI;
-
- donor = Substring_new_donor(best_donor1_splicecoord,best_donor1_knowni,
- best_splice_pos_1,/*substring_querystart*/0,/*substring_queryend*/querylength,
- best_segmenti_nmismatches,
- best_donor1_prob,/*left*/segmenti_left,query_compress,
- querylength,plusp,genestrand,sensedir,
- segmenti_chrnum,segmenti_chroffset,segmenti_chrhigh,segmenti_chrlength);
-
- shortexon = Substring_new_shortexon(best_acceptor1_splicecoord,best_acceptor1_knowni,
- best_donor2_splicecoord,best_donor2_knowni,
- /*acceptor_pos*/best_splice_pos_1,/*donor_pos*/best_splice_pos_2,best_segmentm_nmismatches,
- /*acceptor_prob*/best_acceptor1_prob,/*donor_prob*/best_donor2_prob,
- /*left*/segmentm_left,query_compress,
- querylength,plusp,genestrand,
- sensedir,/*acceptor_ambp*/false,/*donor_ambp*/false,
- segmentm_chrnum,segmentm_chroffset,segmentm_chrhigh,segmentm_chrlength);
-
- acceptor = Substring_new_acceptor(best_acceptor2_splicecoord,best_acceptor2_knowni,
- best_splice_pos_2,/*substring_querystart*/0,/*substring_queryend*/querylength,
- best_segmentj_nmismatches,
- best_acceptor2_prob,/*left*/segmentj_left,query_compress,
- querylength,plusp,genestrand,sensedir,
- segmentj_chrnum,segmentj_chroffset,segmentj_chrhigh,segmentj_chrlength);
-
- if (donor == NULL || shortexon == NULL || acceptor == NULL) {
- if (donor != NULL) Substring_free(&donor);
- if (shortexon != NULL) Substring_free(&shortexon);
- if (acceptor != NULL) Substring_free(&acceptor);
- } else {
- *segmenti_usedp = *segmentm_usedp = *segmentj_usedp = true;
-
- donor_support = best_splice_pos_1;
- middle_support = best_splice_pos_2 - best_splice_pos_1;
- acceptor_support = querylength - best_splice_pos_2;
- sufficient1p = sufficient_splice_prob_local(donor_support,best_segmenti_nmismatches,best_donor1_prob);
- sufficient2p = sufficient_splice_prob_local(middle_support,best_segmentm_nmismatches,best_acceptor1_prob);
- sufficient3p = sufficient_splice_prob_local(middle_support,best_segmentm_nmismatches,best_donor2_prob);
- sufficient4p = sufficient_splice_prob_local(acceptor_support,best_segmentj_nmismatches,best_acceptor2_prob);
- if (sufficient1p && sufficient2p && sufficient3p && sufficient4p) {
- *nhits += 1;
- hits = List_push(hits,(void *) Stage3end_new_shortexon(&(*found_score),donor,acceptor,shortexon,
- best_donor1_prob,/*shortexonA_prob*/best_acceptor1_prob,
- /*shortexonD_prob*/best_donor2_prob,best_acceptor2_prob,
- /*amb_length_donor*/0,/*amb_length_acceptor*/0,
- /*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
- /*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
- /*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
- /*amb_probs_donor*/NULL,/*amb_probs_acceptor*/NULL,
- /*copy_donor_p*/false,/*copy_acceptor_p*/false,/*copy_shortexon_p*/false,
- splicing_penalty,querylength,sensedir,sarrayp));
- } else if (subs_or_indels_p == true) {
- /* Don't alter hits */
- if (donor != NULL) Substring_free(&donor);
- if (shortexon != NULL) Substring_free(&shortexon);
- if (acceptor != NULL) Substring_free(&acceptor);
- } else if (donor_support < LOWPROB_SUPPORT || acceptor_support < LOWPROB_SUPPORT) {
- if (donor != NULL) Substring_free(&donor);
- if (shortexon != NULL) Substring_free(&shortexon);
- if (acceptor != NULL) Substring_free(&acceptor);
- } else if ((sufficient1p || sufficient2p) && (sufficient3p || sufficient4p)) {
- *lowprob = List_push(*lowprob,
- (void *) Stage3end_new_shortexon(&(*found_score),donor,acceptor,shortexon,
- best_donor1_prob,/*shortexonA_prob*/best_acceptor1_prob,
- /*shortexonD_prob*/best_donor2_prob,best_acceptor2_prob,
- /*amb_length_donor*/0,/*amb_length_acceptor*/0,
- /*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
- /*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
- /*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
- /*amb_probs_donor*/NULL,/*amb_probs_acceptor*/NULL,
- /*copy_donor_p*/false,/*copy_acceptor_p*/false,/*copy_shortexon_p*/false,
- splicing_penalty,querylength,sensedir,sarrayp));
- } else {
- if (donor != NULL) Substring_free(&donor);
- if (shortexon != NULL) Substring_free(&shortexon);
- if (acceptor != NULL) Substring_free(&acceptor);
- }
- }
-
- } else {
- /* Originally from minus strand. Complement. */
- sensedir = (plusp == true) ? SENSE_ANTI : SENSE_FORWARD;
-
- donor = Substring_new_donor(best_donor2_splicecoord,best_donor2_knowni,
- best_splice_pos_2,/*substring_querystart*/0,/*substring_queryend*/querylength,
- best_segmentj_nmismatches,
- best_donor2_prob,/*left*/segmentj_left,query_compress,
- querylength,plusp,genestrand,sensedir,
- segmentj_chrnum,segmentj_chroffset,segmentj_chrhigh,segmentj_chrlength);
-
- shortexon = Substring_new_shortexon(best_acceptor2_splicecoord,best_acceptor2_knowni,
- best_donor1_splicecoord,best_donor1_knowni,
- /*acceptor_pos*/best_splice_pos_2,/*donor_pos*/best_splice_pos_1,best_segmentm_nmismatches,
- /*acceptor_prob*/best_acceptor2_prob,/*donor_prob*/best_donor1_prob,
- /*left*/segmentm_left,query_compress,querylength,
- plusp,genestrand,sensedir,/*acceptor_ambp*/false,/*donor_ambp*/false,
- segmentm_chrnum,segmentm_chroffset,segmentm_chrhigh,segmentm_chrlength);
-
- acceptor = Substring_new_acceptor(best_acceptor1_splicecoord,best_acceptor1_knowni,
- best_splice_pos_1,/*substring_querystart*/0,/*substring_queryend*/querylength,
- best_segmenti_nmismatches,
- best_acceptor1_prob,/*left*/segmenti_left,query_compress,
- querylength,plusp,genestrand,sensedir,
- segmenti_chrnum,segmenti_chroffset,segmenti_chrhigh,segmenti_chrlength);
-
- if (donor == NULL || shortexon == NULL || acceptor == NULL) {
- if (donor != NULL) Substring_free(&donor);
- if (shortexon != NULL) Substring_free(&shortexon);
- if (acceptor != NULL) Substring_free(&acceptor);
- } else {
- *segmenti_usedp = *segmentm_usedp = *segmentj_usedp = true;
-
- acceptor_support = best_splice_pos_1;
- middle_support = best_splice_pos_2 - best_splice_pos_1;
- donor_support = querylength - best_splice_pos_2;
- sufficient1p = sufficient_splice_prob_local(acceptor_support,best_segmenti_nmismatches,best_acceptor1_prob);
- sufficient2p = sufficient_splice_prob_local(middle_support,best_segmentm_nmismatches,best_donor1_prob);
- sufficient3p = sufficient_splice_prob_local(middle_support,best_segmentm_nmismatches,best_acceptor2_prob);
- sufficient4p = sufficient_splice_prob_local(donor_support,best_segmentj_nmismatches,best_donor2_prob);
- if (sufficient1p && sufficient2p && sufficient3p && sufficient4p) {
- *nhits += 1;
- hits = List_push(hits,(void *) Stage3end_new_shortexon(&(*found_score),donor,acceptor,shortexon,
- best_donor2_prob,/*shortexonA_prob*/best_acceptor2_prob,
- /*shortexonD_prob*/best_donor1_prob,best_acceptor1_prob,
- /*amb_length_donor*/0,/*amb_length_acceptor*/0,
- /*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
- /*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
- /*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
- /*amb_probs_donor*/NULL,/*amb_probs_acceptor*/NULL,
- /*copy_donor_p*/false,/*copy_acceptor_p*/false,/*copy_shortexon_p*/false,
- splicing_penalty,querylength,sensedir,sarrayp));
- } else if (subs_or_indels_p == true) {
- /* Don't alter hits */
- if (donor != NULL) Substring_free(&donor);
- if (shortexon != NULL) Substring_free(&shortexon);
- if (acceptor != NULL) Substring_free(&acceptor);
- } else if (donor_support < LOWPROB_SUPPORT || acceptor_support < LOWPROB_SUPPORT) {
- if (donor != NULL) Substring_free(&donor);
- if (shortexon != NULL) Substring_free(&shortexon);
- if (acceptor != NULL) Substring_free(&acceptor);
- } else if ((sufficient1p || sufficient2p) && (sufficient3p || sufficient4p)) {
- *lowprob = List_push(*lowprob,
- (void *) Stage3end_new_shortexon(&(*found_score),donor,acceptor,shortexon,
- best_donor2_prob,/*shortexonA_prob*/best_acceptor2_prob,
- /*shortexonD_prob*/best_donor1_prob,best_acceptor1_prob,
- /*amb_length_donor*/0,/*amb_length_acceptor*/0,
- /*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
- /*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
- /*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
- /*amb_probs_donor*/NULL,/*amb_probs_acceptor*/NULL,
- /*copy_donor_p*/false,/*copy_acceptor_p*/false,/*copy_shortexon_p*/false,
- splicing_penalty,querylength,sensedir,sarrayp));
- } else {
- if (donor != NULL) Substring_free(&donor);
- if (shortexon != NULL) Substring_free(&shortexon);
- if (acceptor != NULL) Substring_free(&acceptor);
- }
- }
- }
- }
+ if (querylength <= MAX_STACK_READLENGTH) {
+ FREEA(donor_positions_alloc);
+ FREEA(acceptor_positions_alloc);
+ FREEA(donor_knowni_alloc);
+ FREEA(acceptor_knowni_alloc);
+ } else {
+ FREE(donor_positions_alloc);
+ FREE(acceptor_positions_alloc);
+ FREE(donor_knowni_alloc);
+ FREE(acceptor_knowni_alloc);
}
+#else
+ FREE(donor_positions_alloc);
+ FREE(acceptor_positions_alloc);
+ FREE(donor_knowni_alloc);
+ FREE(acceptor_knowni_alloc);
+#endif
return hits;
}
-#endif
+
static int
@@ -2378,8 +1869,8 @@ group_by_segmenti_aux (int *found_score, List_T winners, List_T *ambiguous,
Substring_genomicstart(Stage3end_substring_donor(hit)),Substring_genomicend(Stage3end_substring_donor(hit)),
Substring_match_length_orig(Stage3end_substring_acceptor(hit)),
Substring_genomicstart(Stage3end_substring_acceptor(hit)),Substring_genomicend(Stage3end_substring_acceptor(hit)),
- Stage3end_nmismatches_whole(hit),Substring_chimera_prob(Stage3end_substring_donor(hit)),
- Substring_chimera_prob(Stage3end_substring_acceptor(hit))));
+ Stage3end_nmismatches_whole(hit),Substring_siteD_prob(Stage3end_substring_donor(hit)),
+ Substring_siteA_prob(Stage3end_substring_acceptor(hit))));
if ((nmismatches = Stage3end_nmismatches_whole(hit)) < best_nmismatches) {
best_nmismatches = nmismatches;
}
@@ -2395,8 +1886,8 @@ group_by_segmenti_aux (int *found_score, List_T winners, List_T *ambiguous,
if (Stage3end_nmismatches_whole(hit) <= best_nmismatches + LOCALSPLICING_NMATCHES_SLOP &&
Stage3end_chimera_prob(hit) >= best_prob - LOCALSPLICING_PROB_SLOP) {
debug7(printf("accepting distance %d, probabilities %f and %f\n",
- Stage3end_distance(hit),Substring_chimera_prob(Stage3end_substring_donor(hit)),
- Substring_chimera_prob(Stage3end_substring_acceptor(hit))));
+ Stage3end_distance(hit),Substring_siteD_prob(Stage3end_substring_donor(hit)),
+ Substring_siteA_prob(Stage3end_substring_acceptor(hit))));
n_good_spliceends += 1;
accepted_hits = List_push(accepted_hits,(void *) hit);
} else {
@@ -2412,8 +1903,8 @@ group_by_segmenti_aux (int *found_score, List_T winners, List_T *ambiguous,
if (Stage3end_nmismatches_whole(hit) <= best_nmismatches + LOCALSPLICING_NMATCHES_SLOP ||
Stage3end_chimera_prob(hit) >= best_prob - LOCALSPLICING_PROB_SLOP) {
debug7(printf("accepting distance %d, probabilities %f and %f\n",
- Stage3end_distance(hit),Substring_chimera_prob(Stage3end_substring_donor(hit)),
- Substring_chimera_prob(Stage3end_substring_acceptor(hit))));
+ Stage3end_distance(hit),Substring_siteD_prob(Stage3end_substring_donor(hit)),
+ Substring_siteA_prob(Stage3end_substring_acceptor(hit))));
n_good_spliceends += 1;
accepted_hits = List_push(accepted_hits,(void *) hit);
} else {
@@ -2483,13 +1974,13 @@ group_by_segmenti_aux (int *found_score, List_T winners, List_T *ambiguous,
for (kk = ii; kk < jj; kk++) {
acceptor = Stage3end_substring_acceptor(subarray[kk]);
#ifdef LARGE_GENOMES
- ambcoords = Uint8list_push(ambcoords,Substring_splicecoord(acceptor));
+ ambcoords = Uint8list_push(ambcoords,Substring_splicecoord_A(acceptor));
#else
- ambcoords = Uintlist_push(ambcoords,Substring_splicecoord(acceptor));
+ ambcoords = Uintlist_push(ambcoords,Substring_splicecoord_A(acceptor));
#endif
amb_knowni = Intlist_push(amb_knowni,-1);
amb_nmismatches = Intlist_push(amb_nmismatches,Substring_nmismatches_whole(acceptor));
- amb_probs = Doublelist_push(amb_probs,Substring_chimera_prob(acceptor));
+ amb_probs = Doublelist_push(amb_probs,Substring_siteA_prob(acceptor));
}
nmismatches_acceptor = best_nmismatches - Substring_nmismatches_whole(donor);
@@ -2551,13 +2042,13 @@ group_by_segmenti_aux (int *found_score, List_T winners, List_T *ambiguous,
for (kk = ii; kk < jj; kk++) {
donor = Stage3end_substring_donor(subarray[kk]);
#ifdef LARGE_GENOMES
- ambcoords = Uint8list_push(ambcoords,Substring_splicecoord(donor));
+ ambcoords = Uint8list_push(ambcoords,Substring_splicecoord_D(donor));
#else
- ambcoords = Uintlist_push(ambcoords,Substring_splicecoord(donor));
+ ambcoords = Uintlist_push(ambcoords,Substring_splicecoord_D(donor));
#endif
amb_knowni = Intlist_push(amb_knowni,-1);
amb_nmismatches = Intlist_push(amb_nmismatches,Substring_nmismatches_whole(donor));
- amb_probs = Doublelist_push(amb_probs,Substring_chimera_prob(donor));
+ amb_probs = Doublelist_push(amb_probs,Substring_siteD_prob(donor));
}
nmismatches_donor = best_nmismatches - Substring_nmismatches_whole(acceptor);
@@ -2706,8 +2197,8 @@ group_by_segmentj_aux (int *found_score, List_T winners, List_T *ambiguous,
Substring_genomicstart(Stage3end_substring_donor(hit)),Substring_genomicend(Stage3end_substring_donor(hit)),
Substring_match_length_orig(Stage3end_substring_acceptor(hit)),
Substring_genomicstart(Stage3end_substring_acceptor(hit)),Substring_genomicend(Stage3end_substring_acceptor(hit)),
- Stage3end_nmismatches_whole(hit),Substring_chimera_prob(Stage3end_substring_donor(hit)),
- Substring_chimera_prob(Stage3end_substring_acceptor(hit))));
+ Stage3end_nmismatches_whole(hit),Substring_siteD_prob(Stage3end_substring_donor(hit)),
+ Substring_siteA_prob(Stage3end_substring_acceptor(hit))));
if ((nmismatches = Stage3end_nmismatches_whole(hit)) < best_nmismatches) {
best_nmismatches = nmismatches;
}
@@ -2723,8 +2214,8 @@ group_by_segmentj_aux (int *found_score, List_T winners, List_T *ambiguous,
if (Stage3end_nmismatches_whole(hit) <= best_nmismatches + LOCALSPLICING_NMATCHES_SLOP &&
Stage3end_chimera_prob(hit) >= best_prob - LOCALSPLICING_PROB_SLOP) {
debug7(printf("accepting distance %d, probabilities %f and %f\n",
- Stage3end_distance(hit),Substring_chimera_prob(Stage3end_substring_donor(hit)),
- Substring_chimera_prob(Stage3end_substring_acceptor(hit))));
+ Stage3end_distance(hit),Substring_siteD_prob(Stage3end_substring_donor(hit)),
+ Substring_siteA_prob(Stage3end_substring_acceptor(hit))));
n_good_spliceends += 1;
accepted_hits = List_push(accepted_hits,(void *) hit);
} else {
@@ -2740,8 +2231,8 @@ group_by_segmentj_aux (int *found_score, List_T winners, List_T *ambiguous,
if (Stage3end_nmismatches_whole(hit) <= best_nmismatches + LOCALSPLICING_NMATCHES_SLOP ||
Stage3end_chimera_prob(hit) >= best_prob - LOCALSPLICING_PROB_SLOP) {
debug7(printf("accepting distance %d, probabilities %f and %f\n",
- Stage3end_distance(hit),Substring_chimera_prob(Stage3end_substring_donor(hit)),
- Substring_chimera_prob(Stage3end_substring_acceptor(hit))));
+ Stage3end_distance(hit),Substring_siteD_prob(Stage3end_substring_donor(hit)),
+ Substring_siteA_prob(Stage3end_substring_acceptor(hit))));
n_good_spliceends += 1;
accepted_hits = List_push(accepted_hits,(void *) hit);
} else {
@@ -2814,13 +2305,13 @@ group_by_segmentj_aux (int *found_score, List_T winners, List_T *ambiguous,
for (kk = ii; kk < jj; kk++) {
acceptor = Stage3end_substring_acceptor(subarray[kk]);
#ifdef LARGE_GENOMES
- ambcoords = Uint8list_push(ambcoords,Substring_splicecoord(acceptor));
+ ambcoords = Uint8list_push(ambcoords,Substring_splicecoord_A(acceptor));
#else
- ambcoords = Uintlist_push(ambcoords,Substring_splicecoord(acceptor));
+ ambcoords = Uintlist_push(ambcoords,Substring_splicecoord_A(acceptor));
#endif
amb_knowni = Intlist_push(amb_knowni,-1);
amb_nmismatches = Intlist_push(amb_nmismatches,Substring_nmismatches_whole(acceptor));
- amb_probs = Doublelist_push(amb_probs,Substring_chimera_prob(acceptor));
+ amb_probs = Doublelist_push(amb_probs,Substring_siteA_prob(acceptor));
}
nmismatches_acceptor = best_nmismatches - Substring_nmismatches_whole(donor);
@@ -2882,13 +2373,13 @@ group_by_segmentj_aux (int *found_score, List_T winners, List_T *ambiguous,
for (kk = ii; kk < jj; kk++) {
donor = Stage3end_substring_donor(subarray[kk]);
#ifdef LARGE_GENOMES
- ambcoords = Uint8list_push(ambcoords,Substring_splicecoord(donor));
+ ambcoords = Uint8list_push(ambcoords,Substring_splicecoord_D(donor));
#else
- ambcoords = Uintlist_push(ambcoords,Substring_splicecoord(donor));
+ ambcoords = Uintlist_push(ambcoords,Substring_splicecoord_D(donor));
#endif
amb_knowni = Intlist_push(amb_knowni,-1);
amb_nmismatches = Intlist_push(amb_nmismatches,Substring_nmismatches_whole(donor));
- amb_probs = Doublelist_push(amb_probs,Substring_chimera_prob(donor));
+ amb_probs = Doublelist_push(amb_probs,Substring_siteD_prob(donor));
}
nmismatches_donor = best_nmismatches - Substring_nmismatches_whole(acceptor);
diff --git a/src/stage1hr.c b/src/stage1hr.c
index 665fc5a..8e2cf43 100644
--- a/src/stage1hr.c
+++ b/src/stage1hr.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: stage1hr.c 195972 2016-08-08 17:11:50Z twu $";
+static char rcsid[] = "$Id: stage1hr.c 196433 2016-08-16 20:20:51Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -89,11 +89,6 @@ static char rcsid[] = "$Id: stage1hr.c 195972 2016-08-08 17:11:50Z twu $";
#define MAX_INDEXSIZE 8
#endif
-/* Note: MAX_READLENGTH is defined externally by configure */
-#ifndef MAX_READLENGTH
-#error A default value for MAX_READLENGTH was not provided to configure
-#endif
-
/* MAX_NALIGNMENTS of 2 vs 1 gets 1600 improvements in 275,000 reads */
/* MAX_NALIGNMENTS of 3 vs 2 gets 96 improvements in 275,000 reads */
@@ -165,6 +160,8 @@ static int max_gmap_pairsearch;
static int max_gmap_segments; /* Not used */
static int max_gmap_improvement;
+static int max_floors_readlength;
+
#define A_CHAR 0x0
#define C_CHAR 0x1
@@ -4561,6 +4558,7 @@ identify_all_segments (int *nsegments, Segment_T **anchor_segments, int *nanchor
ptr->leftmost = ptr->rightmost = -1;
ptr->left_splice_p = ptr->right_splice_p = false;
ptr->spliceable_low_p = last_spliceable_p;
+ /* ptr->spliceable_high_p = false; */
#if 0
ptr->leftspan = ptr->rightspan = -1;
#endif
@@ -4574,6 +4572,7 @@ identify_all_segments (int *nsegments, Segment_T **anchor_segments, int *nanchor
so if segmenti->querypos3 is too high, then it is not spliceable */
if (last_querypos > query_lastpos) {
/* Not spliceable */
+ last_spliceable_p = false;
} else if (diagonal <= last_diagonal + max_distance) {
*ptr_spliceable++ = ptr;
ptr->spliceable_high_p = last_spliceable_p = true;
@@ -4583,6 +4582,7 @@ identify_all_segments (int *nsegments, Segment_T **anchor_segments, int *nanchor
so if segmenti->querypos5 is too low, then it is not spliceable */
if (first_querypos < index1part) {
/* Not spliceable */
+ last_spliceable_p = false;
} else if (diagonal <= last_diagonal + max_distance) {
*ptr_spliceable++ = ptr;
ptr->spliceable_high_p = last_spliceable_p = true;
@@ -4845,6 +4845,7 @@ identify_all_segments (int *nsegments, Segment_T **anchor_segments, int *nanchor
ptr->leftmost = ptr->rightmost = -1;
ptr->left_splice_p = ptr->right_splice_p = false;
ptr->spliceable_low_p = last_spliceable_p;
+ ptr->spliceable_high_p = false;
#if 0
ptr->leftspan = ptr->rightspan = -1;
#endif
@@ -6449,18 +6450,24 @@ compute_end_indels_right (int *indels, int *nmismatches_longcont, int *nmismatch
int sum, best_sum = querylength;
int conti, shifti;
int best_indel_pos = -1, endlength;
-
-#ifdef HAVE_ALLOCA
- int *mismatch_positions_shift = (int *) ALLOCA((querylength+1)*sizeof(int));
-#else
- int mismatch_positions_shift[MAX_READLENGTH+1];
-#endif
-
#ifdef OLD_END_INDELS
int indel_pos;
#else
int indel_pos_cont, indel_pos_shift;
#endif
+ int *mismatch_positions_shift;
+
+
+#ifdef HAVE_ALLOCA
+ if (querylength <= MAX_STACK_READLENGTH) {
+ mismatch_positions_shift = (int *) ALLOCA((querylength+1)*sizeof(int));
+ } else {
+ mismatch_positions_shift = (int *) MALLOC((querylength+1)*sizeof(int));
+ }
+#else
+ mismatch_positions_shift = (int *) MALLOC((querylength+1)*sizeof(int));
+#endif
+
debug2e(printf("Entered compute_end_indels_right with breakpoint = %d, max_mismatches_short %d\n",
breakpoint,max_mismatches_short));
@@ -6613,8 +6620,9 @@ compute_end_indels_right (int *indels, int *nmismatches_longcont, int *nmismatch
}
}
}
- shifti--;
- indel_pos_shift = mismatch_positions_shift[shifti] + 1;
+ if (--shifti >= 0) {
+ indel_pos_shift = mismatch_positions_shift[shifti] + 1;
+ }
} else {
sum = shifti + conti;
@@ -6636,9 +6644,10 @@ compute_end_indels_right (int *indels, int *nmismatches_longcont, int *nmismatch
}
}
conti++;
- shifti--;
- indel_pos_cont = mismatch_positions_long[conti];
- indel_pos_shift = mismatch_positions_shift[shifti] + 1;
+ if (--shifti >= 0) {
+ indel_pos_cont = mismatch_positions_long[conti];
+ indel_pos_shift = mismatch_positions_shift[shifti] + 1;
+ }
}
}
@@ -6817,8 +6826,9 @@ compute_end_indels_right (int *indels, int *nmismatches_longcont, int *nmismatch
}
}
}
- shifti--;
- indel_pos_shift = mismatch_positions_shift[shifti] - sep + 1;
+ if (--shifti >= 0) {
+ indel_pos_shift = mismatch_positions_shift[shifti] - sep + 1;
+ }
} else {
sum = shifti + conti;
@@ -6840,9 +6850,10 @@ compute_end_indels_right (int *indels, int *nmismatches_longcont, int *nmismatch
}
}
conti++;
- shifti--;
- indel_pos_cont = mismatch_positions_long[conti];
- indel_pos_shift = mismatch_positions_shift[shifti] - sep + 1;
+ if (--shifti >= 0) {
+ indel_pos_cont = mismatch_positions_long[conti];
+ indel_pos_shift = mismatch_positions_shift[shifti] - sep + 1;
+ }
}
}
@@ -6872,6 +6883,16 @@ compute_end_indels_right (int *indels, int *nmismatches_longcont, int *nmismatch
}
}
+#ifdef HAVE_ALLOCA
+ if (querylength <= MAX_STACK_READLENGTH) {
+ FREEA(mismatch_positions_shift);
+ } else {
+ FREE(mismatch_positions_shift);
+ }
+#else
+ FREE(mismatch_positions_shift);
+#endif
+
debug2e(printf("compute_end_indels_right returning with nmismatches_longcont %d + nmismatches_shift %d for %d indels at indel_pos %d\n",
*nmismatches_longcont,*nmismatches_shift,*indels,best_indel_pos));
@@ -6896,18 +6917,22 @@ compute_end_indels_left (int *indels, int *nmismatches_longcont, int *nmismatche
int sum, best_sum = querylength;
int conti, shifti;
int best_indel_pos = -1;
-
-#ifdef HAVE_ALLOCA
- int *mismatch_positions_shift = (int *) ALLOCA((querylength+1)*sizeof(int));
-#else
- int mismatch_positions_shift[MAX_READLENGTH+1];
-#endif
-
#ifdef OLD_END_INDELS
int indel_pos;
#else
int indel_pos_cont, indel_pos_shift;
#endif
+ int *mismatch_positions_shift;
+
+#ifdef HAVE_ALLOCA
+ if (querylength <= MAX_STACK_READLENGTH) {
+ mismatch_positions_shift = (int *) ALLOCA((querylength+1)*sizeof(int));
+ } else {
+ mismatch_positions_shift = (int *) MALLOC((querylength+1)*sizeof(int));
+ }
+#else
+ mismatch_positions_shift = (int *) MALLOC((querylength+1)*sizeof(int));
+#endif
debug2e(printf("Entered compute_end_indels_left with breakpoint = %d, max_mismatches_short %d\n",
@@ -7059,8 +7084,9 @@ compute_end_indels_left (int *indels, int *nmismatches_longcont, int *nmismatche
}
}
}
- shifti--;
- indel_pos_shift = mismatch_positions_shift[shifti];
+ if (--shifti >= 0) {
+ indel_pos_shift = mismatch_positions_shift[shifti];
+ }
} else {
sum = shifti + conti;
@@ -7081,10 +7107,10 @@ compute_end_indels_left (int *indels, int *nmismatches_longcont, int *nmismatche
}
}
conti++;
- shifti--;
- indel_pos_cont = mismatch_positions_long[conti] - sep + 1;
- indel_pos_shift = mismatch_positions_shift[shifti];
-
+ if (--shifti >= 0) {
+ indel_pos_cont = mismatch_positions_long[conti] - sep + 1;
+ indel_pos_shift = mismatch_positions_shift[shifti];
+ }
}
}
@@ -7260,8 +7286,9 @@ compute_end_indels_left (int *indels, int *nmismatches_longcont, int *nmismatche
}
}
}
- shifti--;
- indel_pos_shift = mismatch_positions_shift[shifti];
+ if (--shifti >= 0) {
+ indel_pos_shift = mismatch_positions_shift[shifti];
+ }
} else {
sum = shifti + conti;
@@ -7282,9 +7309,10 @@ compute_end_indels_left (int *indels, int *nmismatches_longcont, int *nmismatche
}
}
conti++;
- shifti--;
- indel_pos_cont = mismatch_positions_long[conti] + 1;
- indel_pos_shift = mismatch_positions_shift[shifti];
+ if (--shifti >= 0) {
+ indel_pos_cont = mismatch_positions_long[conti] + 1;
+ indel_pos_shift = mismatch_positions_shift[shifti];
+ }
}
}
@@ -7313,6 +7341,15 @@ compute_end_indels_left (int *indels, int *nmismatches_longcont, int *nmismatche
}
}
+#ifdef HAVE_ALLOCA
+ if (querylength <= MAX_STACK_READLENGTH) {
+ FREEA(mismatch_positions_shift);
+ } else {
+ FREE(mismatch_positions_shift);
+ }
+#else
+ FREE(mismatch_positions_shift);
+#endif
debug2e(printf("compute_end_indels_left returning with nmismatches_cont %d + nmismatches_shift %d for %d indels at indel_pos %d\n",
*nmismatches_longcont,*nmismatches_shift,*indels,best_indel_pos));
@@ -7343,11 +7380,17 @@ solve_end_indel_low (int *found_score, int *nhits, List_T hits, Segment_T ptr,
int indels, query_indel_pos, indel_pos, breakpoint;
int nmismatches, nmismatches_long, nmismatches_longcont, nmismatches_shift;
int nmismatches1, nmismatches2;
+ int *mismatch_positions;
+
#ifdef HAVE_ALLOCA
- int *mismatch_positions = (int *) ALLOCA(querylength*sizeof(int));
+ if (querylength <= MAX_STACK_READLENGTH) {
+ mismatch_positions = (int *) ALLOCA((querylength+1)*sizeof(int));
+ } else {
+ mismatch_positions = (int *) MALLOC((querylength+1)*sizeof(int));
+ }
#else
- int mismatch_positions[MAX_READLENGTH];
+ mismatch_positions = (int *) MALLOC((querylength+1)*sizeof(int));
#endif
@@ -7485,6 +7528,16 @@ solve_end_indel_low (int *found_score, int *nhits, List_T hits, Segment_T ptr,
}
}
+#ifdef HAVE_ALLOCA
+ if (querylength <= MAX_STACK_READLENGTH) {
+ FREEA(mismatch_positions);
+ } else {
+ FREE(mismatch_positions);
+ }
+#else
+ FREE(mismatch_positions);
+#endif
+
return hits;
}
@@ -7509,11 +7562,17 @@ solve_end_indel_high (int *found_score, int *nhits, List_T hits, Segment_T ptr,
int indels, query_indel_pos, indel_pos, breakpoint;
int nmismatches, nmismatches_long, nmismatches_longcont, nmismatches_shift;
int nmismatches1, nmismatches2;
+ int *mismatch_positions;
+
#ifdef HAVE_ALLOCA
- int *mismatch_positions = (int *) ALLOCA(querylength*sizeof(int));
+ if (querylength <= MAX_STACK_READLENGTH) {
+ mismatch_positions = (int *) ALLOCA((querylength+1)*sizeof(int));
+ } else {
+ mismatch_positions = (int *) MALLOC((querylength+1)*sizeof(int));
+ }
#else
- int mismatch_positions[MAX_READLENGTH];
+ mismatch_positions = (int *) MALLOC((querylength+1)*sizeof(int));
#endif
@@ -7651,6 +7710,16 @@ solve_end_indel_high (int *found_score, int *nhits, List_T hits, Segment_T ptr,
}
}
+#ifdef HAVE_ALLOCA
+ if (querylength <= MAX_STACK_READLENGTH) {
+ FREEA(mismatch_positions);
+ } else {
+ FREE(mismatch_positions);
+ }
+#else
+ FREE(mismatch_positions);
+#endif
+
return hits;
}
@@ -7793,47 +7862,6 @@ sufficient_splice_prob_halfintron (int support, int nmismatches, double splicepr
-#if 0
-static void
-find_segmentm_span (Segment_T segmentm, int max_mismatches_allowed,
- int querylength, Compress_T query_compress,
- Univcoord_T left, bool plusp, int genestrand, bool first_read_p) {
- int nmismatches, i;
- int leftspan, rightspan, bestspan;
-#ifdef HAVE_ALLOCA
- int *mismatch_positions = (int *) ALLOCA(querylength*sizeof(int));
-#else
- int mismatch_positions[MAX_READLENGTH];
-#endif
-
- /* Find all mismatches */
- nmismatches = Genome_mismatches_left(mismatch_positions,/*max_mismatches*/querylength,
- query_compress,left,/*pos5*/0,/*pos3*/querylength,
- plusp,genestrand,first_read_p);
-
- if (nmismatches < max_mismatches_allowed) {
- segmentm->leftspan = 0;
- segmentm->rightspan = querylength;
- } else {
- segmentm->leftspan = 0;
- bestspan = segmentm->rightspan = mismatch_positions[max_mismatches_allowed] + /*slop*/ 1;
- for (i = 0; i < nmismatches - max_mismatches_allowed; i++) {
- leftspan = mismatch_positions[i];
- rightspan = mismatch_positions[i + max_mismatches_allowed + 1] + /*slop*/ 1;
- if (rightspan - leftspan > bestspan) {
- segmentm->leftspan = leftspan;
- segmentm->rightspan = rightspan;
- bestspan = rightspan - leftspan;
- } else if (rightspan - leftspan == bestspan) {
- segmentm->rightspan = rightspan;
- }
- }
- }
- return;
-}
-#endif
-
-
/* Copied from sarray-read.c */
static int
donor_match_length_cmp (const void *a, const void *b) {
@@ -7884,24 +7912,46 @@ find_singlesplices_plus (int *found_score, List_T hits, List_T *ambiguous, List_
int nmismatches_left, nmismatches_right;
int segmenti_donor_nknown, segmentj_acceptor_nknown,
segmentj_antidonor_nknown, segmenti_antiacceptor_nknown;
+ int *mismatch_positions_left, *mismatch_positions_right;
+ int *segmenti_donor_knownpos, *segmentj_acceptor_knownpos, *segmentj_antidonor_knownpos, *segmenti_antiacceptor_knownpos,
+ *segmenti_donor_knowni, *segmentj_acceptor_knowni,
+ *segmentj_antidonor_knowni, *segmenti_antiacceptor_knowni;
#ifdef HAVE_ALLOCA
- int *mismatch_positions_left = (int *) ALLOCA(querylength*sizeof(int));
- int *mismatch_positions_right = (int *) ALLOCA(querylength*sizeof(int));
- int *segmenti_donor_knownpos = (int *) ALLOCA((querylength+1)*sizeof(int));
- int *segmentj_acceptor_knownpos = (int *) ALLOCA((querylength+1)*sizeof(int));
- int *segmentj_antidonor_knownpos = (int *) ALLOCA((querylength+1)*sizeof(int));
- int *segmenti_antiacceptor_knownpos = (int *) ALLOCA((querylength+1)*sizeof(int));
- int *segmenti_donor_knowni = (int *) ALLOCA((querylength+1)*sizeof(int));
- int *segmentj_acceptor_knowni = (int *) ALLOCA((querylength+1)*sizeof(int));
- int *segmentj_antidonor_knowni = (int *) ALLOCA((querylength+1)*sizeof(int));
- int *segmenti_antiacceptor_knowni = (int *) ALLOCA((querylength+1)*sizeof(int));
-#else
- int mismatch_positions_left[MAX_READLENGTH], mismatch_positions_right[MAX_READLENGTH];
- int segmenti_donor_knownpos[MAX_READLENGTH+1], segmentj_acceptor_knownpos[MAX_READLENGTH+1],
- segmentj_antidonor_knownpos[MAX_READLENGTH+1], segmenti_antiacceptor_knownpos[MAX_READLENGTH+1];
- int segmenti_donor_knowni[MAX_READLENGTH+1], segmentj_acceptor_knowni[MAX_READLENGTH+1],
- segmentj_antidonor_knowni[MAX_READLENGTH+1], segmenti_antiacceptor_knowni[MAX_READLENGTH+1];
+ if (querylength <= MAX_STACK_READLENGTH) {
+ mismatch_positions_left = (int *) ALLOCA(querylength*sizeof(int));
+ mismatch_positions_right = (int *) ALLOCA(querylength*sizeof(int));
+ segmenti_donor_knownpos = (int *) ALLOCA((querylength+1)*sizeof(int));
+ segmentj_acceptor_knownpos = (int *) ALLOCA((querylength+1)*sizeof(int));
+ segmentj_antidonor_knownpos = (int *) ALLOCA((querylength+1)*sizeof(int));
+ segmenti_antiacceptor_knownpos = (int *) ALLOCA((querylength+1)*sizeof(int));
+ segmenti_donor_knowni = (int *) ALLOCA((querylength+1)*sizeof(int));
+ segmentj_acceptor_knowni = (int *) ALLOCA((querylength+1)*sizeof(int));
+ segmentj_antidonor_knowni = (int *) ALLOCA((querylength+1)*sizeof(int));
+ segmenti_antiacceptor_knowni = (int *) ALLOCA((querylength+1)*sizeof(int));
+ } else {
+ mismatch_positions_left = (int *) MALLOC(querylength*sizeof(int));
+ mismatch_positions_right = (int *) MALLOC(querylength*sizeof(int));
+ segmenti_donor_knownpos = (int *) MALLOC((querylength+1)*sizeof(int));
+ segmentj_acceptor_knownpos = (int *) MALLOC((querylength+1)*sizeof(int));
+ segmentj_antidonor_knownpos = (int *) MALLOC((querylength+1)*sizeof(int));
+ segmenti_antiacceptor_knownpos = (int *) MALLOC((querylength+1)*sizeof(int));
+ segmenti_donor_knowni = (int *) MALLOC((querylength+1)*sizeof(int));
+ segmentj_acceptor_knowni = (int *) MALLOC((querylength+1)*sizeof(int));
+ segmentj_antidonor_knowni = (int *) MALLOC((querylength+1)*sizeof(int));
+ segmenti_antiacceptor_knowni = (int *) MALLOC((querylength+1)*sizeof(int));
+ }
+#else
+ mismatch_positions_left = (int *) MALLOC(querylength*sizeof(int));
+ mismatch_positions_right = (int *) MALLOC(querylength*sizeof(int));
+ segmenti_donor_knownpos = (int *) MALLOC((querylength+1)*sizeof(int));
+ segmentj_acceptor_knownpos = (int *) MALLOC((querylength+1)*sizeof(int));
+ segmentj_antidonor_knownpos = (int *) MALLOC((querylength+1)*sizeof(int));
+ segmenti_antiacceptor_knownpos = (int *) MALLOC((querylength+1)*sizeof(int));
+ segmenti_donor_knowni = (int *) MALLOC((querylength+1)*sizeof(int));
+ segmentj_acceptor_knowni = (int *) MALLOC((querylength+1)*sizeof(int));
+ segmentj_antidonor_knowni = (int *) MALLOC((querylength+1)*sizeof(int));
+ segmenti_antiacceptor_knowni = (int *) MALLOC((querylength+1)*sizeof(int));
#endif
Chrpos_T max_distance;
@@ -8126,8 +8176,8 @@ find_singlesplices_plus (int *found_score, List_T hits, List_T *ambiguous, List_
Substring_genomicstart(Stage3end_substring_donor(hit)),Substring_genomicend(Stage3end_substring_donor(hit)),
Substring_match_length_orig(Stage3end_substring_acceptor(hit)),
Substring_genomicstart(Stage3end_substring_acceptor(hit)),Substring_genomicend(Stage3end_substring_acceptor(hit)),
- Stage3end_nmismatches_whole(hit),Substring_chimera_prob(Stage3end_substring_donor(hit)),
- Substring_chimera_prob(Stage3end_substring_acceptor(hit))));
+ Stage3end_nmismatches_whole(hit),Substring_siteD_prob(Stage3end_substring_donor(hit)),
+ Substring_siteA_prob(Stage3end_substring_acceptor(hit))));
if ((nmismatches = Stage3end_nmismatches_whole(hit)) < best_nmismatches) {
best_nmismatches = nmismatches;
}
@@ -8143,8 +8193,8 @@ find_singlesplices_plus (int *found_score, List_T hits, List_T *ambiguous, List_
if (Stage3end_nmismatches_whole(hit) <= best_nmismatches + LOCALSPLICING_NMATCHES_SLOP &&
Stage3end_chimera_prob(hit) >= best_prob - LOCALSPLICING_PROB_SLOP) {
debug7(printf("accepting distance %d, probabilities %f and %f\n",
- Stage3end_distance(hit),Substring_chimera_prob(Stage3end_substring_donor(hit)),
- Substring_chimera_prob(Stage3end_substring_acceptor(hit))));
+ Stage3end_distance(hit),Substring_siteD_prob(Stage3end_substring_donor(hit)),
+ Substring_siteA_prob(Stage3end_substring_acceptor(hit))));
n_good_spliceends += 1;
accepted_hits = List_push(accepted_hits,(void *) hit);
} else {
@@ -8160,8 +8210,8 @@ find_singlesplices_plus (int *found_score, List_T hits, List_T *ambiguous, List_
if (Stage3end_nmismatches_whole(hit) <= best_nmismatches + LOCALSPLICING_NMATCHES_SLOP ||
Stage3end_chimera_prob(hit) >= best_prob - LOCALSPLICING_PROB_SLOP) {
debug7(printf("accepting distance %d, probabilities %f and %f\n",
- Stage3end_distance(hit),Substring_chimera_prob(Stage3end_substring_donor(hit)),
- Substring_chimera_prob(Stage3end_substring_acceptor(hit))));
+ Stage3end_distance(hit),Substring_siteD_prob(Stage3end_substring_donor(hit)),
+ Substring_siteA_prob(Stage3end_substring_acceptor(hit))));
n_good_spliceends += 1;
accepted_hits = List_push(accepted_hits,(void *) hit);
} else {
@@ -8228,13 +8278,13 @@ find_singlesplices_plus (int *found_score, List_T hits, List_T *ambiguous, List_
for (k = i; k < j; k++) {
acceptor = Stage3end_substring_acceptor(hitarray[k]);
#ifdef LARGE_GENOMES
- ambcoords = Uint8list_push(ambcoords,Substring_splicecoord(acceptor));
+ ambcoords = Uint8list_push(ambcoords,Substring_splicecoord_A(acceptor));
#else
- ambcoords = Uintlist_push(ambcoords,Substring_splicecoord(acceptor));
+ ambcoords = Uintlist_push(ambcoords,Substring_splicecoord_A(acceptor));
#endif
amb_knowni = Intlist_push(amb_knowni,-1);
amb_nmismatches = Intlist_push(amb_nmismatches,Substring_nmismatches_whole(acceptor));
- amb_probs = Doublelist_push(amb_probs,Substring_chimera_prob(acceptor));
+ amb_probs = Doublelist_push(amb_probs,Substring_siteA_prob(acceptor));
}
nmismatches_acceptor = best_nmismatches - Substring_nmismatches_whole(donor);
@@ -8299,13 +8349,13 @@ find_singlesplices_plus (int *found_score, List_T hits, List_T *ambiguous, List_
for (k = i; k < j; k++) {
donor = Stage3end_substring_donor(hitarray[k]);
#ifdef LARGE_GENOMES
- ambcoords = Uint8list_push(ambcoords,Substring_splicecoord(donor));
+ ambcoords = Uint8list_push(ambcoords,Substring_splicecoord_D(donor));
#else
- ambcoords = Uintlist_push(ambcoords,Substring_splicecoord(donor));
+ ambcoords = Uintlist_push(ambcoords,Substring_splicecoord_D(donor));
#endif
amb_knowni = Intlist_push(amb_knowni,-1);
amb_nmismatches = Intlist_push(amb_nmismatches,Substring_nmismatches_whole(donor));
- amb_probs = Doublelist_push(amb_probs,Substring_chimera_prob(donor));
+ amb_probs = Doublelist_push(amb_probs,Substring_siteD_prob(donor));
}
nmismatches_donor = best_nmismatches - Substring_nmismatches_whole(acceptor);
@@ -8359,8 +8409,8 @@ find_singlesplices_plus (int *found_score, List_T hits, List_T *ambiguous, List_
Substring_genomicstart(Stage3end_substring_donor(hit)),Substring_genomicend(Stage3end_substring_donor(hit)),
Substring_match_length_orig(Stage3end_substring_acceptor(hit)),
Substring_genomicstart(Stage3end_substring_acceptor(hit)),Substring_genomicend(Stage3end_substring_acceptor(hit)),
- Stage3end_nmismatches_whole(hit),Substring_chimera_prob(Stage3end_substring_donor(hit)),
- Substring_chimera_prob(Stage3end_substring_acceptor(hit))));
+ Stage3end_nmismatches_whole(hit),Substring_siteD_prob(Stage3end_substring_donor(hit)),
+ Substring_siteA_prob(Stage3end_substring_acceptor(hit))));
if ((nmismatches = Stage3end_nmismatches_whole(hit)) < best_nmismatches) {
best_nmismatches = nmismatches;
}
@@ -8378,8 +8428,8 @@ find_singlesplices_plus (int *found_score, List_T hits, List_T *ambiguous, List_
debug7(printf("accepting distance %d, donor length %d and acceptor length %d, probabilities %f and %f\n",
Stage3end_distance(hit),Substring_match_length_orig(Stage3end_substring_donor(hit)),
Substring_match_length_orig(Stage3end_substring_acceptor(hit)),
- Substring_chimera_prob(Stage3end_substring_donor(hit)),
- Substring_chimera_prob(Stage3end_substring_acceptor(hit))));
+ Substring_siteD_prob(Stage3end_substring_donor(hit)),
+ Substring_siteA_prob(Stage3end_substring_acceptor(hit))));
n_good_spliceends += 1;
accepted_hits = List_push(accepted_hits,(void *) hit);
} else {
@@ -8397,8 +8447,8 @@ find_singlesplices_plus (int *found_score, List_T hits, List_T *ambiguous, List_
debug7(printf("accepting distance %d, donor length %d and acceptor length %d, probabilities %f and %f\n",
Stage3end_distance(hit),Substring_match_length_orig(Stage3end_substring_donor(hit)),
Substring_match_length_orig(Stage3end_substring_acceptor(hit)),
- Substring_chimera_prob(Stage3end_substring_donor(hit)),
- Substring_chimera_prob(Stage3end_substring_acceptor(hit))));
+ Substring_siteD_prob(Stage3end_substring_donor(hit)),
+ Substring_siteA_prob(Stage3end_substring_acceptor(hit))));
n_good_spliceends += 1;
accepted_hits = List_push(accepted_hits,(void *) hit);
} else {
@@ -8465,13 +8515,13 @@ find_singlesplices_plus (int *found_score, List_T hits, List_T *ambiguous, List_
for (k = i; k < j; k++) {
acceptor = Stage3end_substring_acceptor(hitarray[k]);
#ifdef LARGE_GENOMES
- ambcoords = Uint8list_push(ambcoords,Substring_splicecoord(acceptor));
+ ambcoords = Uint8list_push(ambcoords,Substring_splicecoord_A(acceptor));
#else
- ambcoords = Uintlist_push(ambcoords,Substring_splicecoord(acceptor));
+ ambcoords = Uintlist_push(ambcoords,Substring_splicecoord_A(acceptor));
#endif
amb_knowni = Intlist_push(amb_knowni,-1);
amb_nmismatches = Intlist_push(amb_nmismatches,Substring_nmismatches_whole(acceptor));
- amb_probs = Doublelist_push(amb_probs,Substring_chimera_prob(acceptor));
+ amb_probs = Doublelist_push(amb_probs,Substring_siteA_prob(acceptor));
}
nmismatches_acceptor = best_nmismatches - Substring_nmismatches_whole(donor);
@@ -8536,13 +8586,13 @@ find_singlesplices_plus (int *found_score, List_T hits, List_T *ambiguous, List_
for (k = i; k < j; k++) {
donor = Stage3end_substring_donor(hitarray[k]);
#ifdef LARGE_GENOMES
- ambcoords = Uint8list_push(ambcoords,Substring_splicecoord(donor));
+ ambcoords = Uint8list_push(ambcoords,Substring_splicecoord_D(donor));
#else
- ambcoords = Uintlist_push(ambcoords,Substring_splicecoord(donor));
+ ambcoords = Uintlist_push(ambcoords,Substring_splicecoord_D(donor));
#endif
amb_knowni = Intlist_push(amb_knowni,-1);
amb_nmismatches = Intlist_push(amb_nmismatches,Substring_nmismatches_whole(donor));
- amb_probs = Doublelist_push(amb_probs,Substring_chimera_prob(donor));
+ amb_probs = Doublelist_push(amb_probs,Substring_siteD_prob(donor));
}
nmismatches_donor = best_nmismatches - Substring_nmismatches_whole(acceptor);
@@ -8588,6 +8638,43 @@ find_singlesplices_plus (int *found_score, List_T hits, List_T *ambiguous, List_
}
}
+#ifdef HAVE_ALLOCA
+ if (querylength <= MAX_STACK_READLENGTH) {
+ FREEA(mismatch_positions_left);
+ FREEA(mismatch_positions_right);
+ FREEA(segmenti_donor_knownpos);
+ FREEA(segmentj_acceptor_knownpos);
+ FREEA(segmentj_antidonor_knownpos);
+ FREEA(segmenti_antiacceptor_knownpos);
+ FREEA(segmenti_donor_knowni);
+ FREEA(segmentj_acceptor_knowni);
+ FREEA(segmentj_antidonor_knowni);
+ FREEA(segmenti_antiacceptor_knowni);
+ } else {
+ FREE(mismatch_positions_left);
+ FREE(mismatch_positions_right);
+ FREE(segmenti_donor_knownpos);
+ FREE(segmentj_acceptor_knownpos);
+ FREE(segmentj_antidonor_knownpos);
+ FREE(segmenti_antiacceptor_knownpos);
+ FREE(segmenti_donor_knowni);
+ FREE(segmentj_acceptor_knowni);
+ FREE(segmentj_antidonor_knowni);
+ FREE(segmenti_antiacceptor_knowni);
+ }
+#else
+ FREE(mismatch_positions_left);
+ FREE(mismatch_positions_right);
+ FREE(segmenti_donor_knownpos);
+ FREE(segmentj_acceptor_knownpos);
+ FREE(segmentj_antidonor_knownpos);
+ FREE(segmenti_antiacceptor_knownpos);
+ FREE(segmenti_donor_knowni);
+ FREE(segmentj_acceptor_knowni);
+ FREE(segmentj_antidonor_knowni);
+ FREE(segmenti_antiacceptor_knowni);
+#endif
+
debug(printf("Finished find_singlesplices_plus with %d hits and %d lowprob\n",
List_length(hits),List_length(*lowprob)));
@@ -8607,24 +8694,46 @@ find_singlesplices_minus (int *found_score, List_T hits, List_T *ambiguous, List
int nmismatches_left, nmismatches_right;
int segmenti_donor_nknown, segmentj_acceptor_nknown,
segmentj_antidonor_nknown, segmenti_antiacceptor_nknown;
-
+ int *mismatch_positions_left, *mismatch_positions_right;
+ int *segmenti_donor_knownpos, *segmentj_acceptor_knownpos, *segmentj_antidonor_knownpos, *segmenti_antiacceptor_knownpos,
+ *segmenti_donor_knowni, *segmentj_acceptor_knowni,
+ *segmentj_antidonor_knowni, *segmenti_antiacceptor_knowni;
+
#ifdef HAVE_ALLOCA
- int *mismatch_positions_left = (int *) ALLOCA(querylength*sizeof(int));
- int *mismatch_positions_right = (int *) ALLOCA(querylength*sizeof(int));
- int *segmenti_donor_knownpos = (int *) ALLOCA((querylength+1)*sizeof(int));
- int *segmentj_acceptor_knownpos = (int *) ALLOCA((querylength+1)*sizeof(int));
- int *segmentj_antidonor_knownpos = (int *) ALLOCA((querylength+1)*sizeof(int));
- int *segmenti_antiacceptor_knownpos = (int *) ALLOCA((querylength+1)*sizeof(int));
- int *segmenti_donor_knowni = (int *) ALLOCA((querylength+1)*sizeof(int));
- int *segmentj_acceptor_knowni = (int *) ALLOCA((querylength+1)*sizeof(int));
- int *segmentj_antidonor_knowni = (int *) ALLOCA((querylength+1)*sizeof(int));
- int *segmenti_antiacceptor_knowni = (int *) ALLOCA((querylength+1)*sizeof(int));
-#else
- int mismatch_positions_left[MAX_READLENGTH], mismatch_positions_right[MAX_READLENGTH];
- int segmenti_donor_knownpos[MAX_READLENGTH+1], segmentj_acceptor_knownpos[MAX_READLENGTH+1],
- segmentj_antidonor_knownpos[MAX_READLENGTH+1], segmenti_antiacceptor_knownpos[MAX_READLENGTH+1];
- int segmenti_donor_knowni[MAX_READLENGTH+1], segmentj_acceptor_knowni[MAX_READLENGTH+1],
- segmentj_antidonor_knowni[MAX_READLENGTH+1], segmenti_antiacceptor_knowni[MAX_READLENGTH+1];
+ if (querylength <= MAX_STACK_READLENGTH) {
+ mismatch_positions_left = (int *) ALLOCA(querylength*sizeof(int));
+ mismatch_positions_right = (int *) ALLOCA(querylength*sizeof(int));
+ segmenti_donor_knownpos = (int *) ALLOCA((querylength+1)*sizeof(int));
+ segmentj_acceptor_knownpos = (int *) ALLOCA((querylength+1)*sizeof(int));
+ segmentj_antidonor_knownpos = (int *) ALLOCA((querylength+1)*sizeof(int));
+ segmenti_antiacceptor_knownpos = (int *) ALLOCA((querylength+1)*sizeof(int));
+ segmenti_donor_knowni = (int *) ALLOCA((querylength+1)*sizeof(int));
+ segmentj_acceptor_knowni = (int *) ALLOCA((querylength+1)*sizeof(int));
+ segmentj_antidonor_knowni = (int *) ALLOCA((querylength+1)*sizeof(int));
+ segmenti_antiacceptor_knowni = (int *) ALLOCA((querylength+1)*sizeof(int));
+ } else {
+ mismatch_positions_left = (int *) MALLOC(querylength*sizeof(int));
+ mismatch_positions_right = (int *) MALLOC(querylength*sizeof(int));
+ segmenti_donor_knownpos = (int *) MALLOC((querylength+1)*sizeof(int));
+ segmentj_acceptor_knownpos = (int *) MALLOC((querylength+1)*sizeof(int));
+ segmentj_antidonor_knownpos = (int *) MALLOC((querylength+1)*sizeof(int));
+ segmenti_antiacceptor_knownpos = (int *) MALLOC((querylength+1)*sizeof(int));
+ segmenti_donor_knowni = (int *) MALLOC((querylength+1)*sizeof(int));
+ segmentj_acceptor_knowni = (int *) MALLOC((querylength+1)*sizeof(int));
+ segmentj_antidonor_knowni = (int *) MALLOC((querylength+1)*sizeof(int));
+ segmenti_antiacceptor_knowni = (int *) MALLOC((querylength+1)*sizeof(int));
+ }
+#else
+ mismatch_positions_left = (int *) MALLOC(querylength*sizeof(int));
+ mismatch_positions_right = (int *) MALLOC(querylength*sizeof(int));
+ segmenti_donor_knownpos = (int *) MALLOC((querylength+1)*sizeof(int));
+ segmentj_acceptor_knownpos = (int *) MALLOC((querylength+1)*sizeof(int));
+ segmentj_antidonor_knownpos = (int *) MALLOC((querylength+1)*sizeof(int));
+ segmenti_antiacceptor_knownpos = (int *) MALLOC((querylength+1)*sizeof(int));
+ segmenti_donor_knowni = (int *) MALLOC((querylength+1)*sizeof(int));
+ segmentj_acceptor_knowni = (int *) MALLOC((querylength+1)*sizeof(int));
+ segmentj_antidonor_knowni = (int *) MALLOC((querylength+1)*sizeof(int));
+ segmenti_antiacceptor_knowni = (int *) MALLOC((querylength+1)*sizeof(int));
#endif
Chrpos_T max_distance;
@@ -8848,8 +8957,8 @@ find_singlesplices_minus (int *found_score, List_T hits, List_T *ambiguous, List
Substring_genomicstart(Stage3end_substring_donor(hit)),Substring_genomicend(Stage3end_substring_donor(hit)),
Substring_match_length_orig(Stage3end_substring_acceptor(hit)),
Substring_genomicstart(Stage3end_substring_acceptor(hit)),Substring_genomicend(Stage3end_substring_acceptor(hit)),
- Stage3end_nmismatches_whole(hit),Substring_chimera_prob(Stage3end_substring_donor(hit)),
- Substring_chimera_prob(Stage3end_substring_acceptor(hit))));
+ Stage3end_nmismatches_whole(hit),Substring_siteD_prob(Stage3end_substring_donor(hit)),
+ Substring_siteA_prob(Stage3end_substring_acceptor(hit))));
if ((nmismatches = Stage3end_nmismatches_whole(hit)) < best_nmismatches) {
best_nmismatches = nmismatches;
}
@@ -8865,8 +8974,8 @@ find_singlesplices_minus (int *found_score, List_T hits, List_T *ambiguous, List
if (Stage3end_nmismatches_whole(hit) <= best_nmismatches + LOCALSPLICING_NMATCHES_SLOP &&
Stage3end_chimera_prob(hit) >= best_prob - LOCALSPLICING_PROB_SLOP) {
debug7(printf("accepting distance %d, probabilities %f and %f\n",
- Stage3end_distance(hit),Substring_chimera_prob(Stage3end_substring_donor(hit)),
- Substring_chimera_prob(Stage3end_substring_acceptor(hit))));
+ Stage3end_distance(hit),Substring_siteD_prob(Stage3end_substring_donor(hit)),
+ Substring_siteA_prob(Stage3end_substring_acceptor(hit))));
n_good_spliceends += 1;
accepted_hits = List_push(accepted_hits,(void *) hit);
} else {
@@ -8882,8 +8991,8 @@ find_singlesplices_minus (int *found_score, List_T hits, List_T *ambiguous, List
if (Stage3end_nmismatches_whole(hit) <= best_nmismatches + LOCALSPLICING_NMATCHES_SLOP ||
Stage3end_chimera_prob(hit) >= best_prob - LOCALSPLICING_PROB_SLOP) {
debug7(printf("accepting distance %d, probabilities %f and %f\n",
- Stage3end_distance(hit),Substring_chimera_prob(Stage3end_substring_donor(hit)),
- Substring_chimera_prob(Stage3end_substring_acceptor(hit))));
+ Stage3end_distance(hit),Substring_siteD_prob(Stage3end_substring_donor(hit)),
+ Substring_siteA_prob(Stage3end_substring_acceptor(hit))));
n_good_spliceends += 1;
accepted_hits = List_push(accepted_hits,(void *) hit);
} else {
@@ -8950,13 +9059,13 @@ find_singlesplices_minus (int *found_score, List_T hits, List_T *ambiguous, List
for (k = i; k < j; k++) {
acceptor = Stage3end_substring_acceptor(hitarray[k]);
#ifdef LARGE_GENOMES
- ambcoords = Uint8list_push(ambcoords,Substring_splicecoord(acceptor));
+ ambcoords = Uint8list_push(ambcoords,Substring_splicecoord_A(acceptor));
#else
- ambcoords = Uintlist_push(ambcoords,Substring_splicecoord(acceptor));
+ ambcoords = Uintlist_push(ambcoords,Substring_splicecoord_A(acceptor));
#endif
amb_knowni = Intlist_push(amb_knowni,-1);
amb_nmismatches = Intlist_push(amb_nmismatches,Substring_nmismatches_whole(acceptor));
- amb_probs = Doublelist_push(amb_probs,Substring_chimera_prob(acceptor));
+ amb_probs = Doublelist_push(amb_probs,Substring_siteA_prob(acceptor));
}
nmismatches_acceptor = best_nmismatches - Substring_nmismatches_whole(donor);
@@ -9021,13 +9130,13 @@ find_singlesplices_minus (int *found_score, List_T hits, List_T *ambiguous, List
for (k = i; k < j; k++) {
donor = Stage3end_substring_donor(hitarray[k]);
#ifdef LARGE_GENOMES
- ambcoords = Uint8list_push(ambcoords,Substring_splicecoord(donor));
+ ambcoords = Uint8list_push(ambcoords,Substring_splicecoord_D(donor));
#else
- ambcoords = Uintlist_push(ambcoords,Substring_splicecoord(donor));
+ ambcoords = Uintlist_push(ambcoords,Substring_splicecoord_D(donor));
#endif
amb_knowni = Intlist_push(amb_knowni,-1);
amb_nmismatches = Intlist_push(amb_nmismatches,Substring_nmismatches_whole(donor));
- amb_probs = Doublelist_push(amb_probs,Substring_chimera_prob(donor));
+ amb_probs = Doublelist_push(amb_probs,Substring_siteD_prob(donor));
}
nmismatches_donor = best_nmismatches - Substring_nmismatches_whole(acceptor);
@@ -9081,8 +9190,8 @@ find_singlesplices_minus (int *found_score, List_T hits, List_T *ambiguous, List
Substring_genomicstart(Stage3end_substring_donor(hit)),Substring_genomicend(Stage3end_substring_donor(hit)),
Substring_match_length_orig(Stage3end_substring_acceptor(hit)),
Substring_genomicstart(Stage3end_substring_acceptor(hit)),Substring_genomicend(Stage3end_substring_acceptor(hit)),
- Stage3end_nmismatches_whole(hit),Substring_chimera_prob(Stage3end_substring_donor(hit)),
- Substring_chimera_prob(Stage3end_substring_acceptor(hit))));
+ Stage3end_nmismatches_whole(hit),Substring_siteD_prob(Stage3end_substring_donor(hit)),
+ Substring_siteA_prob(Stage3end_substring_acceptor(hit))));
if ((nmismatches = Stage3end_nmismatches_whole(hit)) < best_nmismatches) {
best_nmismatches = nmismatches;
}
@@ -9100,8 +9209,8 @@ find_singlesplices_minus (int *found_score, List_T hits, List_T *ambiguous, List
debug7(printf("accepting distance %d, donor length %d and acceptor length %d, probabilities %f and %f\n",
Stage3end_distance(hit),Substring_match_length_orig(Stage3end_substring_donor(hit)),
Substring_match_length_orig(Stage3end_substring_acceptor(hit)),
- Substring_chimera_prob(Stage3end_substring_donor(hit)),
- Substring_chimera_prob(Stage3end_substring_acceptor(hit))));
+ Substring_siteD_prob(Stage3end_substring_donor(hit)),
+ Substring_siteA_prob(Stage3end_substring_acceptor(hit))));
n_good_spliceends += 1;
accepted_hits = List_push(accepted_hits,(void *) hit);
} else {
@@ -9119,8 +9228,8 @@ find_singlesplices_minus (int *found_score, List_T hits, List_T *ambiguous, List
debug7(printf("accepting distance %d, donor length %d and acceptor length %d, probabilities %f and %f\n",
Stage3end_distance(hit),Substring_match_length_orig(Stage3end_substring_donor(hit)),
Substring_match_length_orig(Stage3end_substring_acceptor(hit)),
- Substring_chimera_prob(Stage3end_substring_donor(hit)),
- Substring_chimera_prob(Stage3end_substring_acceptor(hit))));
+ Substring_siteD_prob(Stage3end_substring_donor(hit)),
+ Substring_siteA_prob(Stage3end_substring_acceptor(hit))));
n_good_spliceends += 1;
accepted_hits = List_push(accepted_hits,(void *) hit);
} else {
@@ -9187,13 +9296,13 @@ find_singlesplices_minus (int *found_score, List_T hits, List_T *ambiguous, List
for (k = i; k < j; k++) {
acceptor = Stage3end_substring_acceptor(hitarray[k]);
#ifdef LARGE_GENOMES
- ambcoords = Uint8list_push(ambcoords,Substring_splicecoord(acceptor));
+ ambcoords = Uint8list_push(ambcoords,Substring_splicecoord_A(acceptor));
#else
- ambcoords = Uintlist_push(ambcoords,Substring_splicecoord(acceptor));
+ ambcoords = Uintlist_push(ambcoords,Substring_splicecoord_A(acceptor));
#endif
amb_knowni = Intlist_push(amb_knowni,-1);
amb_nmismatches = Intlist_push(amb_nmismatches,Substring_nmismatches_whole(acceptor));
- amb_probs = Doublelist_push(amb_probs,Substring_chimera_prob(acceptor));
+ amb_probs = Doublelist_push(amb_probs,Substring_siteA_prob(acceptor));
}
nmismatches_acceptor = best_nmismatches - Substring_nmismatches_whole(donor);
@@ -9257,13 +9366,13 @@ find_singlesplices_minus (int *found_score, List_T hits, List_T *ambiguous, List
for (k = i; k < j; k++) {
donor = Stage3end_substring_donor(hitarray[k]);
#ifdef LARGE_GENOMES
- ambcoords = Uint8list_push(ambcoords,Substring_splicecoord(donor));
+ ambcoords = Uint8list_push(ambcoords,Substring_splicecoord_D(donor));
#else
- ambcoords = Uintlist_push(ambcoords,Substring_splicecoord(donor));
+ ambcoords = Uintlist_push(ambcoords,Substring_splicecoord_D(donor));
#endif
amb_knowni = Intlist_push(amb_knowni,-1);
amb_nmismatches = Intlist_push(amb_nmismatches,Substring_nmismatches_whole(donor));
- amb_probs = Doublelist_push(amb_probs,Substring_chimera_prob(donor));
+ amb_probs = Doublelist_push(amb_probs,Substring_siteD_prob(donor));
}
nmismatches_donor = best_nmismatches - Substring_nmismatches_whole(acceptor);
@@ -9309,6 +9418,43 @@ find_singlesplices_minus (int *found_score, List_T hits, List_T *ambiguous, List
}
}
+#ifdef HAVE_ALLOCA
+ if (querylength <= MAX_STACK_READLENGTH) {
+ FREEA(mismatch_positions_left);
+ FREEA(mismatch_positions_right);
+ FREEA(segmenti_donor_knownpos);
+ FREEA(segmentj_acceptor_knownpos);
+ FREEA(segmentj_antidonor_knownpos);
+ FREEA(segmenti_antiacceptor_knownpos);
+ FREEA(segmenti_donor_knowni);
+ FREEA(segmentj_acceptor_knowni);
+ FREEA(segmentj_antidonor_knowni);
+ FREEA(segmenti_antiacceptor_knowni);
+ } else {
+ FREE(mismatch_positions_left);
+ FREE(mismatch_positions_right);
+ FREE(segmenti_donor_knownpos);
+ FREE(segmentj_acceptor_knownpos);
+ FREE(segmentj_antidonor_knownpos);
+ FREE(segmenti_antiacceptor_knownpos);
+ FREE(segmenti_donor_knowni);
+ FREE(segmentj_acceptor_knowni);
+ FREE(segmentj_antidonor_knowni);
+ FREE(segmenti_antiacceptor_knowni);
+ }
+#else
+ FREE(mismatch_positions_left);
+ FREE(mismatch_positions_right);
+ FREE(segmenti_donor_knownpos);
+ FREE(segmentj_acceptor_knownpos);
+ FREE(segmentj_antidonor_knownpos);
+ FREE(segmenti_antiacceptor_knownpos);
+ FREE(segmenti_donor_knowni);
+ FREE(segmentj_acceptor_knowni);
+ FREE(segmentj_antidonor_knowni);
+ FREE(segmenti_antiacceptor_knowni);
+#endif
+
debug(printf("Finished find_singlesplices_minus with %d hits and %d lowprob\n",
List_length(hits),List_length(*lowprob)));
@@ -9402,1164 +9548,6 @@ substringA_match_length_cmp (const void *a, const void *b) {
#endif
-#if 0
-static List_T
-find_doublesplices (int *found_score, List_T hits, List_T *lowprob,
- Segment_T *spliceable, int nspliceable, struct Segment_T *segments,
- char *queryptr, int querylength, int query_lastpos, Compress_T query_compress,
- Chrpos_T max_distance, int splicing_penalty, int min_shortend,
- int max_mismatches_allowed, bool pairedp, bool first_read_p,
- bool plusp, int genestrand, bool subs_or_indels_p) {
- int j, j1, j2, joffset, jj;
-
- Segment_T segmenti, segmentj, segmentm, segmenti_start, segmentj_end, *ptr;
- List_T potentiali, potentialj, q, r;
- Univcoord_T segmenti_left, segmentj_left, segmentm_left;
- int segmenti_donor_nknown, segmentj_acceptor_nknown,
- segmentj_antidonor_nknown, segmenti_antiacceptor_nknown,
- segmentm_donor_nknown, segmentm_acceptor_nknown,
- segmentm_antidonor_nknown, segmentm_antiacceptor_nknown;
-
-#ifdef HAVE_ALLOCA
- int *segmenti_donor_knownpos = (int *) ALLOCA((querylength+1)*sizeof(int));
- int *segmentj_acceptor_knownpos = (int *) ALLOCA((querylength+1)*sizeof(int));
- int *segmentj_antidonor_knownpos = (int *) ALLOCA((querylength+1)*sizeof(int));
- int *segmenti_antiacceptor_knownpos = (int *) ALLOCA((querylength+1)*sizeof(int));
- int *segmentm_donor_knownpos = (int *) ALLOCA((querylength+1)*sizeof(int));
- int *segmentm_acceptor_knownpos = (int *) ALLOCA((querylength+1)*sizeof(int));
- int *segmentm_antidonor_knownpos = (int *) ALLOCA((querylength+1)*sizeof(int));
- int *segmentm_antiacceptor_knownpos = (int *) ALLOCA((querylength+1)*sizeof(int));
- int *segmenti_donor_knowni = (int *) ALLOCA((querylength+1)*sizeof(int));
- int *segmentj_acceptor_knowni = (int *) ALLOCA((querylength+1)*sizeof(int));
- int *segmentj_antidonor_knowni = (int *) ALLOCA((querylength+1)*sizeof(int));
- int *segmenti_antiacceptor_knowni = (int *) ALLOCA((querylength+1)*sizeof(int));
- int *segmentm_donor_knowni = (int *) ALLOCA((querylength+1)*sizeof(int));
- int *segmentm_acceptor_knowni = (int *) ALLOCA((querylength+1)*sizeof(int));
- int *segmentm_antidonor_knowni = (int *) ALLOCA((querylength+1)*sizeof(int));
- int *segmentm_antiacceptor_knowni = (int *) ALLOCA((querylength+1)*sizeof(int));
-#else
- int segmenti_donor_knownpos[MAX_READLENGTH+1], segmentj_acceptor_knownpos[MAX_READLENGTH+1],
- segmentj_antidonor_knownpos[MAX_READLENGTH+1], segmenti_antiacceptor_knownpos[MAX_READLENGTH+1],
- segmentm_donor_knownpos[MAX_READLENGTH+1], segmentm_acceptor_knownpos[MAX_READLENGTH+1],
- segmentm_antidonor_knownpos[MAX_READLENGTH+1], segmentm_antiacceptor_knownpos[MAX_READLENGTH+1];
- int segmenti_donor_knowni[MAX_READLENGTH+1], segmentj_acceptor_knowni[MAX_READLENGTH+1],
- segmentj_antidonor_knowni[MAX_READLENGTH+1], segmenti_antiacceptor_knowni[MAX_READLENGTH+1],
- segmentm_donor_knowni[MAX_READLENGTH+1], segmentm_acceptor_knowni[MAX_READLENGTH+1],
- segmentm_antidonor_knowni[MAX_READLENGTH+1], segmentm_antiacceptor_knowni[MAX_READLENGTH+1];
-#endif
-
-#ifdef LARGE_GENOMES
- Uint8list_T donor_ambcoords, acceptor_ambcoords, ambcoords_donor, ambcoords_acceptor;
-#else
- Uintlist_T donor_ambcoords, acceptor_ambcoords, ambcoords_donor, ambcoords_acceptor;
-#endif
- Intlist_T splicesites_i_left, splicesites_i_right;
- Intlist_T nmismatches_list_left, nmismatches_list_right;
- bool ambp_left, ambp_right;
- int sensedir;
- /* int *floors_from_neg3, *floors_to_pos3; */
-
- int nmismatches_shortexon_left, nmismatches_shortexon_middle, nmismatches_shortexon_right;
- int amb_length_donor, amb_length_acceptor;
- int best_left_j, best_right_j;
- bool shortexon_orig_plusp, shortexon_orig_minusp, saw_antidonor_p, saw_acceptor_p;
- int leftpos, rightpos;
- Substring_T donor, acceptor, shortexon;
-
- int nhits_local /*= 0*/, npotential_left, npotential_right;
- int donor_length, acceptor_length;
- List_T accepted_hits, rejected_hits, single_ambig_hits;
- List_T spliceends, p;
- Stage3end_T hit, *hitarray;
- int best_nmismatches, nmismatches;
- int n_good_spliceends, n, i, k;
- double best_prob, prob;
- Univcoord_T lastpos;
- Intlist_T donor_amb_knowni, acceptor_amb_knowni, donor_amb_nmismatches, acceptor_amb_nmismatches;
- Doublelist_T donor_amb_probs, acceptor_amb_probs, probs_donor, probs_acceptor;
-
-
- debug(printf("*** Starting find_known_doublesplices on %d segments ***\n",nspliceable));
- debug(printf("Initially have %d hits\n",List_length(hits)));
-
- /* floors_from_neg3 = floors->scorefrom[-index1interval]; */
- /* floors_to_pos3 = floors->scoreto[query_lastpos+index1interval]; */
-
- for (ptr = spliceable; ptr < &(spliceable[nspliceable]); ptr++) {
- segmentm = *ptr;
- if (1 || segmentm->diagonal < (Univcoord_T) -1) { /* No markers were stored in spliceable */
- segmentm_left = segmentm->diagonal - querylength;
-
- shortexon_orig_plusp = shortexon_orig_minusp = false;
- saw_acceptor_p = saw_antidonor_p = false;
-
- segmentm_donor_nknown = 0;
- segmentm_acceptor_nknown = 0;
- segmentm_antidonor_nknown = 0;
- segmentm_antiacceptor_nknown = 0;
-
- if ((joffset = segmentm->splicesites_i) >= 0) {
- j = joffset;
- while (j < nsplicesites && splicesites[j] < segmentm->diagonal) {
- if (splicetypes[j] == DONOR) {
- debug4k(printf("Setting known donor %d for segmentm at %llu\n",j,(unsigned long long) splicesites[j]));
- segmentm_donor_knownpos[segmentm_donor_nknown] = splicesites[j] - segmentm_left;
- segmentm_donor_knowni[segmentm_donor_nknown++] = j;
- if (saw_acceptor_p == true) {
- /* acceptor...donor */
- shortexon_orig_plusp = true;
- }
- } else if (splicetypes[j] == ANTIACCEPTOR) {
- debug4k(printf("Setting known antiacceptor %d for segmentm at %llu\n",j,(unsigned long long) splicesites[j]));
- segmentm_antiacceptor_knownpos[segmentm_antiacceptor_nknown] = splicesites[j] - segmentm_left;
- segmentm_antiacceptor_knowni[segmentm_antiacceptor_nknown++] = j;
- if (saw_antidonor_p == true) {
- /* antidonor...antiacceptor */
- shortexon_orig_minusp = true;
- }
- } else if (splicetypes[j] == ACCEPTOR) {
- debug4k(printf("Saw known acceptor at %llu\n",(unsigned long long) splicesites[j]));
- segmentm_acceptor_knownpos[segmentm_acceptor_nknown] = splicesites[j] - segmentm_left;
- segmentm_acceptor_knowni[segmentm_acceptor_nknown++] = j;
- saw_acceptor_p = true;
- } else if (splicetypes[j] == ANTIDONOR) {
- debug4k(printf("Saw known antidonor at %llu\n",(unsigned long long) splicesites[j]));
- segmentm_antidonor_knownpos[segmentm_antidonor_nknown] = splicesites[j] - segmentm_left;
- segmentm_antidonor_knowni[segmentm_antidonor_nknown++] = j;
- saw_antidonor_p = true;
- }
- j++;
- }
- }
-
- /* Novel splicing. Do not alter j. */
- /* Still necessary to check segmentm querypos to achieve speed */
- if (novelsplicingp &&
- segmentm->querypos3 >= index1part && segmentm->querypos5 <= query_lastpos - index1part &&
- segmentm->left_splice_p == true && segmentm->right_splice_p == true) {
- debug4d(printf("segment diagonal %llu, querypos %d..%d\n",
- (unsigned long long) segmentm->diagonal,segmentm->querypos5,segmentm->querypos3));
-
- spliceends = (List_T) NULL;
-
- /* Identify potential segmenti for segmentm */
- segmenti_start = segmentm-1;
- while (
- /* Cannot use marker segments going leftward */
- segmenti_start >= &(segments[0]) &&
- segmenti_start->diagonal < (Univcoord_T) -1 && /* Needs to be next criterion, since we initialize only segments[0]->diagonal */
- segmenti_start->chrnum == segmentm->chrnum &&
- segmentm->diagonal <= segmenti_start->diagonal + max_distance) {
- segmenti_start--;
- }
-
- /* Identify potential segmentj for segmentm */
- segmentj_end = segmentm+1;
- while (
-#ifdef NO_MARKER_SEGMENTS
- segmentj_end < &(segments[nsegments]) && segmentj_end->chrnum == segmentm->chrnum &&
-#endif
- segmentj_end->diagonal <= segmentm->diagonal + max_distance) {
- segmentj_end++;
- }
-
- potentiali = (List_T) NULL;
- potentialj = (List_T) NULL;
- npotential_left = 0;
- npotential_right = 0;
- if ((segmentm - segmenti_start) * (segmentj_end - segmentm) >= MAX_LOCALSPLICING_POTENTIAL) {
- /* Too many to check */
- /* segmenti_start = segmentm-1 - MAX_LOCALSPLICING_POTENTIAL; */
- /* segmentj_end = segmentm+1 + MAX_LOCALSPLICING_POTENTIAL; */
- segmenti = segmenti_start; /* Don't process any */
- segmentj = segmentj_end; /* Don't process any */
- } else {
- segmenti = segmentm-1;
- segmentj = segmentm+1;
- }
-
- for ( ; segmenti > segmenti_start; segmenti--) {
- debug4d(printf("local left? diagonal %llu, querypos %d..%d => diagonal %llu, querypos %d..%d\n",
- (unsigned long long) segmenti->diagonal,segmenti->querypos5,segmenti->querypos3,
- (unsigned long long) segmentm->diagonal,segmentm->querypos5,segmentm->querypos3));
- /* i5 i3 m5 m3 */
- assert(segmenti->diagonal < segmentm->diagonal);
- if (segmenti->leftmost < 0) {
- /* Failed outer floor test in find_singlesplices */
- } else if (plusp == true && segmenti->querypos3 >= segmentm->querypos5) {
- debug4d(printf("Bad querypos\n"));
- } else if (plusp == false && segmentm->querypos3 >= segmenti->querypos5) {
- debug4d(printf("Bad querypos\n"));
- } else if (segmenti->diagonal + min_intronlength > segmentm->diagonal) {
- debug4d(printf("Too short\n"));
- } else {
- potentiali = List_push(potentiali,(void *) segmenti);
- npotential_left++;
- debug4d(printf("Potential left #%d: %llu\n",npotential_left,(unsigned long long) segmenti->diagonal));
- }
- }
-
- for ( ; segmentj < segmentj_end; segmentj++) {
- debug4d(printf("local right? diagonal %llu, querypos %d..%d => diagonal %llu, querypos %d..%d\n",
- (unsigned long long) segmentm->diagonal,segmentm->querypos5,segmentm->querypos3,
- (unsigned long long) segmentj->diagonal,segmentj->querypos5,segmentj->querypos3));
- /* m5 m3 j5 j3 */
- assert(segmentm->diagonal < segmentj->diagonal);
- if (segmentj->rightmost < 0) {
- /* Failed outer floor test in find_singlesplices */
- } else if (plusp == true && segmentm->querypos3 >= segmentj->querypos5) {
- debug4d(printf("Bad querypos\n"));
- } else if (plusp == false && segmentj->querypos3 >= segmentm->querypos5) {
- debug4d(printf("Bad querypos\n"));
- } else if (segmentm->diagonal + min_intronlength > segmentj->diagonal) {
- debug4d(printf("Too short\n"));
- } else {
- potentialj = List_push(potentialj,(void *) segmentj);
- npotential_right++;
- debug4d(printf("Potential right #%d: %llu\n",npotential_right,(unsigned long long) segmentj->diagonal));
- }
- }
-
- if (npotential_left > 0 && npotential_right > 0) {
- segmentm_donor_knownpos[segmentm_donor_nknown] = querylength;
- segmentm_acceptor_knownpos[segmentm_acceptor_nknown] = querylength;
- segmentm_antidonor_knownpos[segmentm_antidonor_nknown] = querylength;
- segmentm_antiacceptor_knownpos[segmentm_antiacceptor_nknown] = querylength;
-
- for (q = potentiali; q != NULL; q = List_next(q)) {
- segmenti = (Segment_T) List_head(q);
- segmenti_left = segmenti->diagonal - querylength;
-
- /* Set known sites for segmenti */
- segmenti_donor_nknown = 0;
- segmenti_antiacceptor_nknown = 0;
- if ((jj = segmenti->splicesites_i) >= 0) {
- while (jj < nsplicesites && splicesites[jj] < segmenti->diagonal) {
- if (splicetypes[jj] == DONOR) {
- debug4d(printf("Setting known donor %d for segmenti at %llu\n",jj,(unsigned long long) splicesites[jj]));
- segmenti_donor_knownpos[segmenti_donor_nknown] = splicesites[jj] - segmenti_left;
- segmenti_donor_knowni[segmenti_donor_nknown++] = jj;
- } else if (splicetypes[jj] == ANTIACCEPTOR) {
- debug4d(printf("Setting known antiacceptor %d for segmenti at %llu\n",jj,(unsigned long long) splicesites[jj]));
- segmenti_antiacceptor_knownpos[segmenti_antiacceptor_nknown] = splicesites[jj] - segmenti_left;
- segmenti_antiacceptor_knowni[segmenti_antiacceptor_nknown++] = jj;
- }
- jj++;
- }
- }
- segmenti_donor_knownpos[segmenti_donor_nknown] = querylength;
- segmenti_antiacceptor_knownpos[segmenti_antiacceptor_nknown] = querylength;
-
-
- for (r = potentialj; r != NULL; r = List_next(r)) {
- segmentj = (Segment_T) List_head(r);
-
- debug4d(printf("Doublesplice span test (%d mismatches allowed): %d mismatches found from leftmost %d to j.rightmost %d\n",
- max_mismatches_allowed,
- Genome_count_mismatches_substring(query_compress,segmentm_left,
- /*pos5*/segmenti->leftmost,/*pos3*/segmentj->rightmost,
- plusp,genestrand,first_read_p),
- segmenti->leftmost,segmentj->rightmost));
-
- if (segmenti->leftmost >= segmentj->rightmost) {
- debug4d(printf("Double splice is not possible with pos5 %d > pos3 %d\n",
- segmenti->leftmost,segmentj->rightmost));
- } else if (Genome_count_mismatches_limit(query_compress,segmentm_left,
- /*pos5*/segmenti->leftmost,/*pos3*/segmentj->rightmost,
- max_mismatches_allowed,plusp,genestrand) <= max_mismatches_allowed) {
- debug4d(printf("Double splice is possible\n"));
- segmentj_left = segmentj->diagonal - querylength;
-
- /* Set known sites for segmentj */
- segmentj_acceptor_nknown = 0;
- segmentj_antidonor_nknown = 0;
- if ((jj = segmentj->splicesites_i) >= 0) {
- while (jj < nsplicesites && splicesites[jj] < segmentj->diagonal) {
- if (splicetypes[jj] == ACCEPTOR) {
- debug4d(printf("Setting known acceptor %d for segmentj at %llu\n",jj,(unsigned long long) splicesites[jj]));
- segmentj_acceptor_knownpos[segmentj_acceptor_nknown] = splicesites[jj] - segmentj_left;
- segmentj_acceptor_knowni[segmentj_acceptor_nknown++] = jj;
- } else if (splicetypes[jj] == ANTIDONOR) {
- debug4d(printf("Setting known antidonor %d for segmentj at %llu\n",jj,(unsigned long long) splicesites[jj]));
- segmentj_antidonor_knownpos[segmentj_antidonor_nknown] = splicesites[jj] - segmentj_left;
- segmentj_antidonor_knowni[segmentj_antidonor_nknown++] = jj;
- }
- jj++;
- }
- }
- segmentj_acceptor_knownpos[segmentj_acceptor_nknown] = querylength;
- segmentj_antidonor_knownpos[segmentj_antidonor_nknown] = querylength;
-
- debug4d(printf(" => checking for double splice: Splice_solve_double\n"));
- spliceends = Splice_solve_double(&(*found_score),&nhits_local,spliceends,&(*lowprob),
- &segmenti->usedp,&segmentm->usedp,&segmentj->usedp,
- /*segmenti_left*/segmenti->diagonal - querylength,
- /*segmentm_left*/segmentm->diagonal - querylength,
- /*segmentj_left*/segmentj->diagonal - querylength,
- segmenti->chrnum,segmenti->chroffset,segmenti->chrhigh,segmenti->chrlength,
- segmentm->chrnum,segmentm->chroffset,segmentm->chrhigh,segmentm->chrlength,
- segmentj->chrnum,segmentj->chroffset,segmentj->chrhigh,segmentj->chrlength,
- querylength,query_compress,
- segmenti_donor_knownpos,segmentm_acceptor_knownpos,segmentm_donor_knownpos,segmentj_acceptor_knownpos,
- segmentj_antidonor_knownpos,segmentm_antiacceptor_knownpos,segmentm_antidonor_knownpos,segmenti_antiacceptor_knownpos,
- segmenti_donor_knowni,segmentm_acceptor_knowni,segmentm_donor_knowni,segmentj_acceptor_knowni,
- segmentj_antidonor_knowni,segmentm_antiacceptor_knowni,segmentm_antidonor_knowni,segmenti_antiacceptor_knowni,
- segmenti_donor_nknown,segmentm_acceptor_nknown,segmentm_donor_nknown,segmentj_acceptor_nknown,
- segmentj_antidonor_nknown,segmentm_antiacceptor_nknown,segmentm_antidonor_nknown,segmenti_antiacceptor_nknown,
- splicing_penalty,max_mismatches_allowed,plusp,genestrand,
- subs_or_indels_p,/*sarrayp*/false);
- }
- }
- }
- }
-
- List_free(&potentialj);
- List_free(&potentiali);
-
- /* Process results for segmentm. */
- if (spliceends != NULL) {
- best_nmismatches = querylength;
- best_prob = 0.0;
- for (p = spliceends; p != NULL; p = List_next(p)) {
- hit = (Stage3end_T) List_head(p);
- debug7(printf("analyzing distance %d, nmismatches %d, probability %f\n",
- Stage3end_distance(hit),Stage3end_nmismatches_whole(hit),
- Stage3end_shortexon_prob(hit)));
- if ((nmismatches = Stage3end_nmismatches_whole(hit)) < best_nmismatches) {
- best_nmismatches = nmismatches;
- }
- if ((prob = Stage3end_shortexon_prob(hit)) > best_prob) {
- best_prob = prob;
- }
- }
-
- n_good_spliceends = 0;
- accepted_hits = rejected_hits = (List_T) NULL;
- for (p = spliceends; p != NULL; p = List_next(p)) {
- hit = (Stage3end_T) List_head(p);
- if (Stage3end_nmismatches_whole(hit) <= best_nmismatches + LOCALSPLICING_NMATCHES_SLOP &&
- (Stage3end_shortexon_prob(hit) >= best_prob - LOCALSPLICING_PROB_SLOP)) {
- debug7(printf("accepting distance %d, nmismatches %d, probability %f\n",
- Stage3end_distance(hit),Stage3end_nmismatches_whole(hit),
- Stage3end_shortexon_prob(hit)));
- n_good_spliceends += 1;
- accepted_hits = List_push(accepted_hits,(void *) hit);
- } else {
- rejected_hits = List_push(rejected_hits,(void *) hit);
- }
- }
-
- if (n_good_spliceends == 0) {
- /* Conjunction is too strict. Allow for disjunction instead. */
- List_free(&rejected_hits);
- for (p = spliceends; p != NULL; p = List_next(p)) {
- hit = (Stage3end_T) List_head(p);
- if (Stage3end_nmismatches_whole(hit) <= best_nmismatches + LOCALSPLICING_NMATCHES_SLOP ||
- (Stage3end_shortexon_prob(hit) >= best_prob - LOCALSPLICING_PROB_SLOP)) {
- debug7(printf("accepting distance %d, nmismatches %d, probability %f\n",
- Stage3end_distance(hit),Stage3end_nmismatches_whole(hit),
- Stage3end_shortexon_prob(hit)));
- n_good_spliceends += 1;
- accepted_hits = List_push(accepted_hits,(void *) hit);
- } else {
- rejected_hits = List_push(rejected_hits,(void *) hit);
- }
- }
- }
-
- for (p = rejected_hits; p != NULL; p = List_next(p)) {
- hit = (Stage3end_T) List_head(p);
- Stage3end_free(&hit);
- }
- List_free(&rejected_hits);
- List_free(&spliceends);
-
- if (n_good_spliceends == 1) {
- hits = List_push(hits,List_head(accepted_hits));
- List_free(&accepted_hits);
-
- } else {
- /* 5. Multiple hits, shortexon */
- debug7(printf("multiple splice hits, shortexon\n"));
-
- /* Process multiple double ambiguous first */
- hitarray = (Stage3end_T *) List_to_array_n(&n,accepted_hits);
- qsort(hitarray,n,sizeof(Stage3end_T),substringD_match_length_cmp);
- List_free(&accepted_hits);
- single_ambig_hits = (List_T) NULL;
-
- i = 0;
- while (i < n) {
- hit = hitarray[i];
- donor = Stage3end_substringD(hit);
- donor_length = Substring_match_length_orig(donor);
- acceptor = Stage3end_substringA(hit);
- acceptor_length = Substring_match_length_orig(acceptor);
- j = i + 1;
- while (j < n && Substring_match_length_orig(Stage3end_substringD(hitarray[j])) == donor_length &&
- Substring_match_length_orig(Stage3end_substringA(hitarray[j])) == acceptor_length) {
- j++;
- }
- if (j == i + 1) {
- /* Save for later analysis */
- single_ambig_hits = List_push(single_ambig_hits,(void *) hit);
- } else {
- donor_ambcoords = acceptor_ambcoords = NULL;
- donor_amb_knowni = acceptor_amb_knowni = (Intlist_T) NULL;
- donor_amb_nmismatches = acceptor_amb_nmismatches = (Intlist_T) NULL;
- donor_amb_probs = acceptor_amb_probs = (Doublelist_T) NULL;
-
- qsort(&(hitarray[i]),j-i,sizeof(Stage3end_T),Stage3end_shortexon_substringD_cmp);
- donor = Stage3end_substringD(hitarray[i]);
-#ifdef LARGE_GENOMES
- donor_ambcoords = Uint8list_push(donor_ambcoords,Substring_splicecoord(donor));
-#else
- donor_ambcoords = Uintlist_push(donor_ambcoords,Substring_splicecoord(donor));
-#endif
- donor_amb_knowni = Intlist_push(donor_amb_knowni,-1);
- donor_amb_nmismatches = Intlist_push(donor_amb_nmismatches,Substring_nmismatches_whole(donor));
- donor_amb_probs = Doublelist_push(donor_amb_probs,Substring_chimera_prob(donor));
-
- lastpos = Substring_left_genomicseg(donor);
- for (k = i + 1; k < j; k++) {
- donor = Stage3end_substringD(hitarray[k]);
- if (Substring_left_genomicseg(donor) != lastpos) {
-#ifdef LARGE_GENOMES
- donor_ambcoords = Uint8list_push(donor_ambcoords,Substring_splicecoord(donor));
-#else
- donor_ambcoords = Uintlist_push(donor_ambcoords,Substring_splicecoord(donor));
-#endif
- donor_amb_knowni = Intlist_push(donor_amb_knowni,-1);
- donor_amb_nmismatches = Intlist_push(donor_amb_nmismatches,Substring_nmismatches_whole(donor));
- donor_amb_probs = Doublelist_push(donor_amb_probs,Substring_chimera_prob(donor));
- }
- }
-
- qsort(&(hitarray[i]),j-i,sizeof(Stage3end_T),Stage3end_shortexon_substringA_cmp);
- acceptor = Stage3end_substringA(hitarray[i]);
-#ifdef LARGE_GENOMES
- acceptor_ambcoords = Uint8list_push(acceptor_ambcoords,Substring_splicecoord(acceptor));
-#else
- acceptor_ambcoords = Uintlist_push(acceptor_ambcoords,Substring_splicecoord(acceptor));
-#endif
- acceptor_amb_knowni = Intlist_push(acceptor_amb_knowni,-1);
- acceptor_amb_nmismatches = Intlist_push(acceptor_amb_nmismatches,Substring_nmismatches_whole(acceptor));
- acceptor_amb_probs = Doublelist_push(acceptor_amb_probs,Substring_chimera_prob(acceptor));
-
- lastpos = Substring_left_genomicseg(acceptor);
- for (k = i + 1; k < j; k++) {
- acceptor = Stage3end_substringA(hitarray[k]);
- if (Substring_left_genomicseg(acceptor) != lastpos) {
-#ifdef LARGE_GENOMES
- acceptor_ambcoords = Uint8list_push(acceptor_ambcoords,Substring_splicecoord(acceptor));
-#else
- acceptor_ambcoords = Uintlist_push(acceptor_ambcoords,Substring_splicecoord(acceptor));
-#endif
- acceptor_amb_knowni = Intlist_push(acceptor_amb_knowni,-1);
- acceptor_amb_nmismatches = Intlist_push(acceptor_amb_nmismatches,Substring_nmismatches_whole(acceptor));
- acceptor_amb_probs = Doublelist_push(acceptor_amb_probs,Substring_chimera_prob(acceptor));
- }
- }
-
- shortexon = Stage3end_substringS(hitarray[i]);
- sensedir = Stage3end_sensedir(hitarray[i]);
- if (Intlist_length(donor_amb_nmismatches) > 1 && Intlist_length(acceptor_amb_nmismatches) > 1) {
- hits = List_push(hits,(void *) Stage3end_new_shortexon(&(*found_score),/*donor*/NULL,/*acceptor*/NULL,shortexon,
- /*donor_prob*/Doublelist_max(donor_amb_probs),Substring_siteA_prob(shortexon),
- Substring_siteD_prob(shortexon),/*acceptor_prob*/Doublelist_max(acceptor_amb_probs),
- /*amb_length_donor*/donor_length,/*amb_length_acceptor*/acceptor_length,
- donor_ambcoords,acceptor_ambcoords,
- donor_amb_knowni,acceptor_amb_knowni,
- donor_amb_nmismatches,acceptor_amb_nmismatches,
- donor_amb_probs,acceptor_amb_probs,
- /*copy_donor_p*/false,/*copy_acceptor_p*/false,/*copy_shortexon_p*/true,
- splicing_penalty,querylength,sensedir,/*sarrayp*/false));
-
- } else if (Intlist_length(donor_amb_nmismatches) > 1) {
- hits = List_push(hits,(void *) Stage3end_new_shortexon(&(*found_score),/*donor*/NULL,acceptor,shortexon,
- /*donor_prob*/Doublelist_max(donor_amb_probs),Substring_siteA_prob(shortexon),
- Substring_siteD_prob(shortexon),/*acceptor_prob*/Substring_chimera_prob(acceptor),
- /*amb_length_donor*/donor_length,/*amb_length_acceptor*/0,
- donor_ambcoords,/*acceptor_ambcoords*/NULL,
- donor_amb_knowni,/*amb_knowni_acceptor*/NULL,
- donor_amb_nmismatches,/*amb_nmismatches_acceptor*/NULL,
- donor_amb_probs,/*amb_probs_acceptor*/NULL,
- /*copy_donor_p*/false,/*copy_acceptor_p*/true,/*copy_shortexon_p*/true,
- splicing_penalty,querylength,sensedir,/*sarrayp*/false));
-
- } else if (Intlist_length(acceptor_amb_nmismatches) > 1) {
- hits = List_push(hits,(void *) Stage3end_new_shortexon(&(*found_score),donor,/*acceptor*/NULL,shortexon,
- /*donor_prob*/Substring_chimera_prob(donor),Substring_siteA_prob(shortexon),
- Substring_siteD_prob(shortexon),/*acceptor_prob*/Doublelist_max(acceptor_amb_probs),
- /*amb_length_donor*/0,/*amb_length_acceptor*/acceptor_length,
- /*ambcoords_donor*/NULL,acceptor_ambcoords,
- /*amb_knowni_donor*/NULL,acceptor_amb_knowni,
- /*amb_nmismatches_donor*/NULL,acceptor_amb_nmismatches,
- /*amb_probs_donor*/NULL,acceptor_amb_probs,
- /*copy_donor_p*/true,/*copy_acceptor_p*/false,/*copy_shortexon_p*/true,
- splicing_penalty,querylength,sensedir,/*sarrayp*/false));
-
- } else {
- /* A singleton, apparently due to many duplicates. Is this possible? */
- hits = List_push(hits,(void *) Stage3end_new_shortexon(&(*found_score),donor,acceptor,shortexon,
- /*donor_prob*/Substring_chimera_prob(donor),Substring_siteA_prob(shortexon),
- Substring_siteD_prob(shortexon),/*acceptor_prob*/Substring_chimera_prob(acceptor),
- /*amb_length_donor*/0,/*amb_length_acceptor*/0,
- /*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
- /*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
- /*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
- /*amb_probs_donor*/NULL,/*amb_probs_acceptor*/NULL,
- /*copy_donor_p*/true,/*copy_acceptor_p*/true,/*copy_shortexon_p*/true,
- splicing_penalty,querylength,sensedir,/*sarrayp*/false));
-
- }
-
- Doublelist_free(&donor_amb_probs);
- Intlist_free(&donor_amb_nmismatches);
- Intlist_free(&donor_amb_knowni);
- Doublelist_free(&acceptor_amb_probs);
- Intlist_free(&acceptor_amb_nmismatches);
- Intlist_free(&acceptor_amb_knowni);
-#ifdef LARGE_GENOMES
- Uint8list_free(&donor_ambcoords);
- Uint8list_free(&acceptor_ambcoords);
-#else
- Uintlist_free(&donor_ambcoords);
- Uintlist_free(&acceptor_ambcoords);
-#endif
- for (k = i; k < j; k++) {
- hit = hitarray[k];
- Stage3end_free(&hit);
- }
- }
-
- i = j;
- }
- FREE(hitarray);
-
- /* Process single ambiguous on donor side */
- hitarray = (Stage3end_T *) List_to_array_n(&n,single_ambig_hits);
- qsort(hitarray,n,sizeof(Stage3end_T),substringD_match_length_cmp);
- List_free(&single_ambig_hits);
- single_ambig_hits = (List_T) NULL;
-
- i = 0;
- while (i < n) {
- hit = hitarray[i];
- donor = Stage3end_substringD(hit);
- donor_length = Substring_match_length_orig(donor);
- j = i + 1;
- while (j < n && Substring_match_length_orig(Stage3end_substringD(hitarray[j])) == donor_length) {
- j++;
- }
- if (j == i + 1) {
- /* Save for later analysis */
- single_ambig_hits = List_push(single_ambig_hits,(void *) hit);
- } else {
- acceptor_ambcoords = NULL;
- acceptor_amb_knowni = (Intlist_T) NULL;
- acceptor_amb_nmismatches = (Intlist_T) NULL;
- acceptor_amb_probs = (Doublelist_T) NULL;
-
- for (k = i + 1; k < j; k++) {
- acceptor = Stage3end_substringA(hitarray[i]);
-#ifdef LARGE_GENOMES
- acceptor_ambcoords = Uint8list_push(acceptor_ambcoords,Substring_splicecoord(acceptor));
-#else
- acceptor_ambcoords = Uintlist_push(acceptor_ambcoords,Substring_splicecoord(acceptor));
-#endif
- acceptor_amb_knowni = Intlist_push(acceptor_amb_knowni,-1);
- acceptor_amb_nmismatches = Intlist_push(acceptor_amb_nmismatches,Substring_nmismatches_whole(acceptor));
- acceptor_amb_probs = Doublelist_push(acceptor_amb_probs,Substring_chimera_prob(acceptor));
- }
-
- shortexon = Stage3end_substringS(hitarray[i]);
- sensedir = Stage3end_sensedir(hitarray[i]);
- hits = List_push(hits,(void *) Stage3end_new_shortexon(&(*found_score),donor,/*acceptor*/NULL,shortexon,
- /*donor_prob*/Substring_chimera_prob(donor),Substring_siteA_prob(shortexon),
- Substring_siteD_prob(shortexon),/*acceptor_prob*/Doublelist_max(acceptor_amb_probs),
- /*amb_length_donor*/0,/*amb_length_acceptor*/Substring_match_length_orig(acceptor),
- /*ambcoords_donor*/NULL,acceptor_ambcoords,
- /*amb_knowni_donor*/NULL,acceptor_amb_knowni,
- /*amb_nmismatches_donor*/NULL,acceptor_amb_nmismatches,
- /*amb_probs_donor*/NULL,acceptor_amb_probs,
- /*copy_donor_p*/true,/*copy_acceptor_p*/false,/*copy_shortexon_p*/true,
- splicing_penalty,querylength,sensedir,/*sarrayp*/false));
- Doublelist_free(&acceptor_amb_probs);
- Intlist_free(&acceptor_amb_nmismatches);
- Intlist_free(&acceptor_amb_knowni);
-#ifdef LARGE_GENOMES
- Uint8list_free(&acceptor_ambcoords);
-#else
- Uintlist_free(&acceptor_ambcoords);
-#endif
- for (k = i; k < j; k++) {
- hit = hitarray[k];
- Stage3end_free(&hit);
- }
- }
-
- i = j;
- }
- FREE(hitarray);
-
- /* Process single ambiguous on acceptor side */
- hitarray = (Stage3end_T *) List_to_array_n(&n,single_ambig_hits);
- qsort(hitarray,n,sizeof(Stage3end_T),substringA_match_length_cmp);
- List_free(&single_ambig_hits);
-
- i = 0;
- while (i < n) {
- hit = hitarray[i];
- acceptor = Stage3end_substringA(hit);
- acceptor_length = Substring_match_length_orig(acceptor);
- j = i + 1;
- while (j < n && Substring_match_length_orig(Stage3end_substringA(hitarray[j])) == acceptor_length) {
- j++;
- }
- if (j == i + 1) {
- /* Finally, a confirmed unique */
- hits = List_push(hits,(void *) hit);
- } else {
- donor_ambcoords = NULL;
- donor_amb_knowni = (Intlist_T) NULL;
- donor_amb_nmismatches = (Intlist_T) NULL;
- donor_amb_probs = (Doublelist_T) NULL;
-
- for (k = i + 1; k < j; k++) {
- donor = Stage3end_substringD(hitarray[i]);
-#ifdef LARGE_GENOMES
- donor_ambcoords = Uint8list_push(donor_ambcoords,Substring_splicecoord(donor));
-#else
- donor_ambcoords = Uintlist_push(donor_ambcoords,Substring_splicecoord(donor));
-#endif
- donor_amb_knowni = Intlist_push(donor_amb_knowni,-1);
- donor_amb_nmismatches = Intlist_push(donor_amb_nmismatches,Substring_nmismatches_whole(donor));
- donor_amb_probs = Doublelist_push(donor_amb_probs,Substring_chimera_prob(donor));
- }
-
- shortexon = Stage3end_substringS(hitarray[i]);
- sensedir = Stage3end_sensedir(hitarray[i]);
- hits = List_push(hits,(void *) Stage3end_new_shortexon(&(*found_score),/*donor*/NULL,acceptor,shortexon,
- /*donor_prob*/Doublelist_max(donor_amb_probs),Substring_siteA_prob(shortexon),
- Substring_siteD_prob(shortexon),/*acceptor_prob*/Substring_chimera_prob(acceptor),
- /*amb_length_donor*/Substring_match_length_orig(donor),/*amb_length_acceptor*/0,
- donor_ambcoords,/*acceptor_ambcoords*/NULL,
- donor_amb_knowni,/*amb_knowni_acceptor*/NULL,
- donor_amb_nmismatches,/*amb_nmismatches_acceptor*/NULL,
- donor_amb_probs,/*amb_probs_acceptor*/NULL,
- /*copy_donor_p*/false,/*copy_acceptor_p*/true,/*copy_shortexon_p*/true,
- splicing_penalty,querylength,sensedir,/*sarrayp*/false));
- Doublelist_free(&donor_amb_probs);
- Intlist_free(&donor_amb_nmismatches);
- Intlist_free(&donor_amb_knowni);
-#ifdef LARGE_GENOMES
- Uint8list_free(&donor_ambcoords);
-#else
- Uintlist_free(&donor_ambcoords);
-#endif
- for (k = i; k < j; k++) {
- hit = hitarray[k];
- Stage3end_free(&hit);
- }
- }
-
- i = j;
- }
- FREE(hitarray);
- }
- }
- }
-
-
- /* Short exon using known splicing, originally on plus strand */
- if (shortexon_orig_plusp == true) {
- debug4k(printf("Short exon candidate, orig_plusp. Saw short exon acceptor...donor on segment i\n"));
- sensedir = (plusp == true) ? SENSE_FORWARD : SENSE_ANTI;
- assert(plusp == true);
- assert(sensedir == SENSE_FORWARD);
-
- for (j1 = joffset; j1 < j; j1++) {
- if (splicetypes[j1] == ACCEPTOR) {
- leftpos = splicesites[j1] - segmentm_left;
- debug4k(printf(" Doing Splicetrie_find_left from leftpos %d (plus)\n",leftpos));
- if ((splicesites_i_left =
- Splicetrie_find_left(&nmismatches_shortexon_left,&nmismatches_list_left,j1,
- /*origleft*/segmentm_left,/*pos5*/0,/*pos3*/leftpos,segmentm->chroffset,
- query_compress,queryptr,querylength,max_mismatches_allowed,/*plusp*/true,
- genestrand,first_read_p,
- /*collect_all_p*/pairedp == true && first_read_p != plusp)) != NULL) {
- ambp_left = (leftpos < min_shortend || Intlist_length(splicesites_i_left) > 1) ? true : false;
-
- for (j2 = j1 + 1; j2 < j; j2++) {
- if (splicetypes[j2] == DONOR && splicesites[j2] > splicesites[j1]) {
- rightpos = splicesites[j2] - segmentm_left;
- debug4k(printf(" Doing Splicetrie_find_right from rightpos %d (plus)\n",rightpos));
- if ((nmismatches_shortexon_middle =
- Genome_count_mismatches_substring(query_compress,segmentm_left,/*pos5*/leftpos,/*pos3*/rightpos,
- plusp,genestrand)) <= max_mismatches_allowed - nmismatches_shortexon_left &&
- (splicesites_i_right =
- Splicetrie_find_right(&nmismatches_shortexon_right,&nmismatches_list_right,j2,
- /*origleft*/segmentm_left,/*pos5*/rightpos,/*pos3*/querylength,segmentm->chrhigh,
- query_compress,queryptr,
- max_mismatches_allowed - nmismatches_shortexon_left - nmismatches_shortexon_middle,
- /*plusp*/true,genestrand,first_read_p,
- /*collect_all_p*/pairedp == true && first_read_p == plusp)) != NULL) {
- ambp_right = (querylength - rightpos < min_shortend || Intlist_length(splicesites_i_right) > 1) ? true : false;
-
- debug4k(printf(" donor %s ... acceptor %d (%llu) ... donor %d (%llu) ... acceptor %s: %d + %d + %d mismatches\n",
- Intlist_to_string(splicesites_i_left),j1,(unsigned long long) splicesites[j1],
- j2,(unsigned long long) splicesites[j2],Intlist_to_string(splicesites_i_right),
- nmismatches_shortexon_left,nmismatches_shortexon_middle,nmismatches_shortexon_right));
-
- if (ambp_left == true && ambp_right == true) {
- shortexon = Substring_new_shortexon(/*acceptor_coord*/splicesites[j1],/*acceptor_knowni*/j1,
- /*donor_coord*/splicesites[j2],/*donor_knowni*/j2,
- /*acceptor_pos*/leftpos,/*donor_pos*/rightpos,
- nmismatches_shortexon_middle,
- /*acceptor_prob*/2.0,/*donor_prob*/2.0,
- /*left*/segmentm_left,query_compress,
- querylength,/*plusp*/true,genestrand,
- sensedir,/*acceptor_ambp*/true,/*donor_ambp*/true,
- segmentm->chrnum,segmentm->chroffset,segmentm->chrhigh,segmentm->chrlength);
- if (shortexon != NULL) {
- debug4k(printf("New one-third shortexon at left %llu\n",(unsigned long long) segmentm_left));
- ambcoords_donor = lookup_splicesites(&probs_donor,splicesites_i_left,splicesites);
- ambcoords_acceptor = lookup_splicesites(&probs_acceptor,splicesites_i_right,splicesites);
- amb_length_donor = leftpos /*- nmismatches_shortexon_left*/;
- amb_length_acceptor = querylength - rightpos /*- nmismatches_shortexon_right*/;
- segmentm->usedp = true;
- hits = List_push(hits,(void *) Stage3end_new_shortexon(&(*found_score),/*donor*/NULL,/*acceptor*/NULL,shortexon,
- Doublelist_max(probs_donor),Substring_siteA_prob(shortexon),
- Substring_siteD_prob(shortexon),Doublelist_max(probs_acceptor),
- amb_length_donor,amb_length_acceptor,
- ambcoords_donor,ambcoords_acceptor,
- /*amb_knowni_donor*/splicesites_i_left,/*amb_knowni_acceptor*/splicesites_i_right,
- /*amb_nmismatches_donor*/nmismatches_list_left,/*amb_nmismatches_acceptor*/nmismatches_list_right,
- /*amb_probs_donor*/probs_donor,/*amb_nmismatches_acceptor*/probs_acceptor,
- /*copy_donor_p*/false,/*copy_acceptor_p*/false,/*copy_shortexon_p*/false,
- splicing_penalty,querylength,sensedir,/*sarrayp*/false));
- Doublelist_free(&probs_donor);
- Doublelist_free(&probs_acceptor);
-#ifdef LARGE_GENOMES
- Uint8list_free(&ambcoords_donor);
- Uint8list_free(&ambcoords_acceptor);
-#else
- Uintlist_free(&ambcoords_donor);
- Uintlist_free(&ambcoords_acceptor);
-#endif
- }
-
- } else if (ambp_left == true && ambp_right == false) {
- debug4k(printf("ambp_left true, ambp_right false\n"));
- best_right_j = Intlist_head(splicesites_i_right);
-
- debug4k(printf("shortexon with amb_acceptor at %d (%llu) ... donor at %d (%llu)\n",
- j1,(unsigned long long) splicesites[j1],j2,(unsigned long long) splicesites[j2]));
- shortexon = Substring_new_shortexon(/*acceptor_coord*/splicesites[j1],/*acceptor_knowni*/j1,
- /*donor_coord*/splicesites[j2],/*donor_knowni*/j2,
- /*acceptor_pos*/leftpos,/*donor_pos*/rightpos,
- nmismatches_shortexon_middle,
- /*acceptor_prob*/2.0,/*donor_prob*/2.0,
- /*left*/segmentm_left,query_compress,
- querylength,/*plusp*/true,genestrand,
- sensedir,/*acceptor_ambp*/true,/*donor_ambp*/false,
- segmentm->chrnum,segmentm->chroffset,segmentm->chrhigh,segmentm->chrlength);
-
- debug4k(printf("acceptor at %d (%llu)\n",best_right_j,(unsigned long long) splicesites[best_right_j]));
- acceptor = Substring_new_acceptor(/*acceptor_coord*/splicesites[best_right_j],/*acceptor_knowni*/best_right_j,
- /*splice_pos*/rightpos,/*substring_querystart*/0,/*substring_queryend*/querylength,
- nmismatches_shortexon_right,
- /*prob*/2.0,/*left*/splicesites[best_right_j]-rightpos,
- query_compress,querylength,/*plusp*/true,genestrand,
- /*sensedir*/SENSE_FORWARD,segmentm->chrnum,
- segmentm->chroffset,segmentm->chrhigh,segmentm->chrlength);
-
- if (shortexon == NULL || acceptor == NULL) {
- if (shortexon != NULL) Substring_free(&shortexon);
- if (acceptor != NULL) Substring_free(&acceptor);
- } else {
- debug4k(printf("ambp_left true, ambp_right false: New two-thirds shortexon at left %llu\n",
- (unsigned long long) segmentm_left));
- ambcoords_donor = lookup_splicesites(&probs_donor,splicesites_i_left,splicesites);
- amb_length_donor = leftpos /*- nmismatches_shortexon_left*/;
- segmentm->usedp = true;
- hits = List_push(hits,(void *) Stage3end_new_shortexon(&(*found_score),/*donor*/NULL,acceptor,shortexon,
- Doublelist_max(probs_donor),Substring_siteA_prob(shortexon),
- Substring_siteD_prob(shortexon),Substring_chimera_prob(acceptor),
- amb_length_donor,/*amb_length_acceptor*/0,
- ambcoords_donor,/*ambcoords_acceptor*/NULL,
- /*amb_knowni_donor*/splicesites_i_left,/*amb_knowni_acceptor*/NULL,
- /*amb_nmismatches_donor*/nmismatches_list_left,/*amb_nmismatches_acceptor*/NULL,
- /*amb_probs_donor*/probs_donor,/*amb_probs_acceptor*/NULL,
- /*copy_donor_p*/false,/*copy_acceptor_p*/false,/*copy_shortexon_p*/false,
- splicing_penalty,querylength,sensedir,/*sarrayp*/false));
- Doublelist_free(&probs_donor);
-#ifdef LARGE_GENOMES
- Uint8list_free(&ambcoords_donor);
-#else
- Uintlist_free(&ambcoords_donor);
-#endif
- }
-
- } else if (ambp_left == false && ambp_right == true) {
- debug4k(printf("ambp_left false, ambp_right true\n"));
- best_left_j = Intlist_head(splicesites_i_left);
-
- debug4k(printf("donor at %d (%llu)\n",best_left_j,(unsigned long long) splicesites[best_left_j]));
- donor = Substring_new_donor(/*donor_coord*/splicesites[best_left_j],/*donor_knowni*/best_left_j,
- /*splice_pos*/leftpos,/*substring_querystart*/0,/*substring_queryend*/querylength,
- nmismatches_shortexon_left,
- /*prob*/2.0,/*left*/splicesites[best_left_j]-leftpos,
- query_compress,querylength,/*plusp*/true,genestrand,
- /*sensedir*/SENSE_FORWARD,segmentm->chrnum,
- segmentm->chroffset,segmentm->chrhigh,segmentm->chrlength);
-
- debug4k(printf("shortexon with acceptor at %d (%llu) ... amb_donor %d (%llu)\n",
- j1,(unsigned long long) splicesites[j1],j2,(unsigned long long) splicesites[j2]));
- shortexon = Substring_new_shortexon(/*acceptor_coord*/splicesites[j1],/*acceptor_knowni*/j1,
- /*donor_coord*/splicesites[j2],/*donor_knowni*/j2,
- /*acceptor_pos*/leftpos,/*donor_pos*/rightpos,
- nmismatches_shortexon_middle,
- /*acceptor_prob*/2.0,/*donor_prob*/2.0,
- /*left*/segmentm_left,query_compress,
- querylength,/*plusp*/true,genestrand,
- /*sensedir*/SENSE_FORWARD,/*acceptor_ambp*/false,/*donor_ambp*/true,
- segmentm->chrnum,segmentm->chroffset,segmentm->chrhigh,segmentm->chrlength);
-
- if (donor == NULL || shortexon == NULL) {
- if (donor != NULL) Substring_free(&donor);
- if (shortexon != NULL) Substring_free(&shortexon);
- } else {
- ambcoords_acceptor = lookup_splicesites(&probs_acceptor,splicesites_i_right,splicesites);
- amb_length_acceptor = querylength - rightpos /*- nmismatches_shortexon_right*/;
- segmentm->usedp = true;
- hits = List_push(hits,(void *) Stage3end_new_shortexon(&(*found_score),donor,/*acceptor*/NULL,shortexon,
- Substring_chimera_prob(donor),Substring_siteA_prob(shortexon),
- Substring_siteD_prob(shortexon),Doublelist_max(probs_acceptor),
- /*amb_length_donor*/0,amb_length_acceptor,
- /*ambcoords_donor*/NULL,ambcoords_acceptor,
- /*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/splicesites_i_right,
- /*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/nmismatches_list_right,
- /*amb_probs_donor*/NULL,/*amb_probs_acceptor*/probs_acceptor,
- /*copy_donor_p*/false,/*copy_acceptor_p*/false,/*copy_shortexon_p*/false,
- splicing_penalty,querylength,sensedir,/*sarrayp*/false));
- Doublelist_free(&probs_acceptor);
-#ifdef LARGE_GENOMES
- Uint8list_free(&ambcoords_acceptor);
-#else
- Uintlist_free(&ambcoords_acceptor);
-#endif
- }
-
-
- } else { /* ambp_left == false && ambp_right == false */
- debug4k(printf("ambp_left false, ambp_right false\n"));
- best_left_j = Intlist_head(splicesites_i_left);
- best_right_j = Intlist_head(splicesites_i_right);
- donor = Substring_new_donor(/*donor_coord*/splicesites[best_left_j],/*donor_knowni*/best_left_j,
- /*splice_pos*/leftpos,/*substring_querystart*/0,/*substring_queryend*/querylength,
- nmismatches_shortexon_left,
- /*prob*/2.0,/*left*/splicesites[best_left_j]-leftpos,
- query_compress,querylength,/*plusp*/true,genestrand,
- /*sensedir*/SENSE_FORWARD,segmentm->chrnum,
- segmentm->chroffset,segmentm->chrhigh,segmentm->chrlength);
-
- shortexon = Substring_new_shortexon(/*acceptor_coord*/splicesites[j1],/*acceptor_knowni*/j1,
- /*donor_coord*/splicesites[j2],/*donor_knowni*/j2,
- /*acceptor_pos*/leftpos,/*donor_pos*/rightpos,
- nmismatches_shortexon_middle,/*acceptor_prob*/2.0,/*donor_prob*/2.0,
- /*left*/segmentm_left,query_compress,
- querylength,/*plusp*/true,genestrand,
- sensedir,/*acceptor_ambp*/false,/*donor_ambp*/false,
- segmentm->chrnum,segmentm->chroffset,segmentm->chrhigh,segmentm->chrlength);
-
- acceptor = Substring_new_acceptor(/*acceptor_coord*/splicesites[best_right_j],/*acceptor_knowni*/best_right_j,
- /*splice_pos*/rightpos,/*substring_querystart*/0,/*substring_queryend*/querylength,
- nmismatches_shortexon_right,
- /*prob*/2.0,/*left*/splicesites[best_right_j]-rightpos,
- query_compress,querylength,/*plusp*/true,genestrand,
- /*sensedir*/SENSE_FORWARD,segmentm->chrnum,
- segmentm->chroffset,segmentm->chrhigh,segmentm->chrlength);
-
- if (donor == NULL || shortexon == NULL || acceptor == NULL) {
- if (donor != NULL) Substring_free(&donor);
- if (shortexon != NULL) Substring_free(&shortexon);
- if (acceptor != NULL) Substring_free(&acceptor);
- } else {
- debug4k(printf("New shortexon at left %llu\n",(unsigned long long) segmentm_left));
- segmentm->usedp = true;
- hits = List_push(hits,(void *) Stage3end_new_shortexon(&(*found_score),donor,acceptor,shortexon,
- Substring_chimera_prob(donor),Substring_siteA_prob(shortexon),
- Substring_siteD_prob(shortexon),Substring_chimera_prob(acceptor),
- /*amb_length_donor*/0,/*amb_length_acceptor*/0,
- /*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
- /*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
- /*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
- /*amb_probs_donor*/NULL,/*amb_probs_acceptor*/NULL,
- /*copy_donor_p*/false,/*copy_acceptor_p*/false,/*copy_shortexon_p*/false,
- splicing_penalty,querylength,sensedir,/*sarrayp*/false));
- }
- }
- Intlist_free(&nmismatches_list_right);
- Intlist_free(&splicesites_i_right);
- }
- }
- }
- Intlist_free(&nmismatches_list_left);
- Intlist_free(&splicesites_i_left);
- }
- }
- }
- debug4k(printf("End of case 1\n"));
- }
-
- /* Short exon using known splicing, originally on minus strand */
- if (shortexon_orig_minusp == true) {
- debug4k(printf("Short exon candidate, orig_minusp. Saw short exon antidonor...antiacceptor on segment i\n"));
- sensedir = (plusp == true) ? SENSE_ANTI : SENSE_FORWARD;
- assert(plusp == false);
- assert(sensedir == SENSE_ANTI);
-
- for (j1 = joffset; j1 < j; j1++) {
- if (splicetypes[j1] == ANTIDONOR) {
- leftpos = splicesites[j1] - segmentm_left;
- debug4k(printf(" Doing Splicetrie_find_left from leftpos %d (minus)\n",leftpos));
- if ((splicesites_i_left =
- Splicetrie_find_left(&nmismatches_shortexon_left,&nmismatches_list_left,j1,
- /*origleft*/segmentm_left,/*pos5*/0,/*pos3*/leftpos,segmentm->chroffset,
- query_compress,queryptr,querylength,max_mismatches_allowed,
- /*plusp*/false,genestrand,first_read_p,
- /*collect_all_p*/pairedp == true && first_read_p != plusp)) != NULL) {
- ambp_left = (leftpos < min_shortend || Intlist_length(splicesites_i_left) > 1) ? true : false;
-
- for (j2 = j1 + 1; j2 < j; j2++) {
- if (splicetypes[j2] == ANTIACCEPTOR && splicesites[j2] > splicesites[j1]) {
- rightpos = splicesites[j2] - segmentm_left;
- debug4k(printf(" Doing Splicetrie_find_right from rightpos %d (minus)\n",rightpos));
- if ((nmismatches_shortexon_middle =
- Genome_count_mismatches_substring(query_compress,segmentm_left,/*pos5*/leftpos,/*pos3*/rightpos,
- /*plusp*/false,genestrand)) <= max_mismatches_allowed - nmismatches_shortexon_left &&
- (splicesites_i_right =
- Splicetrie_find_right(&nmismatches_shortexon_right,&nmismatches_list_right,j2,
- /*origleft*/segmentm_left,/*pos5*/rightpos,/*pos3*/querylength,segmentm->chrhigh,
- query_compress,queryptr,
- max_mismatches_allowed - nmismatches_shortexon_left - nmismatches_shortexon_middle,
- /*plusp*/false,genestrand,first_read_p,
- /*collect_all_p*/pairedp == true && first_read_p == plusp)) != NULL) {
- ambp_right = (querylength - rightpos < min_shortend || Intlist_length(splicesites_i_right) > 1) ? true : false;
-
- debug4k(printf(" antiacceptor %s ... antidonor %d (%llu) ... antiacceptor %d (%llu) ... antidonor %s: %d + %d + %d mismatches\n",
- Intlist_to_string(splicesites_i_left),j1,(unsigned long long) splicesites[j1],
- j2,(unsigned long long) splicesites[j2],Intlist_to_string(splicesites_i_right),
- nmismatches_shortexon_left,nmismatches_shortexon_middle,nmismatches_shortexon_right));
-
- if (ambp_left == true && ambp_right == true) {
- shortexon = Substring_new_shortexon(/*acceptor_coord*/splicesites[j2],/*acceptor_knowni*/j2,
- /*donor_coord*/splicesites[j1],/*donor_knowni*/j1,
- /*acceptor_pos*/rightpos,/*donor_pos*/leftpos,nmismatches_shortexon_middle,
- /*acceptor_prob*/2.0,/*donor_prob*/2.0,
- /*left*/segmentm_left,query_compress,
- querylength,/*plusp*/false,genestrand,
- sensedir,/*acceptor_ambp*/true,/*donor_ambp*/true,
- segmentm->chrnum,segmentm->chroffset,segmentm->chrhigh,segmentm->chrlength);
- if (shortexon != NULL) {
- debug4k(printf("New one-third shortexon at left %llu\n",(unsigned long long) segmentm_left));
- ambcoords_donor = lookup_splicesites(&probs_donor,splicesites_i_right,splicesites);
- ambcoords_acceptor = lookup_splicesites(&probs_acceptor,splicesites_i_left,splicesites);
- amb_length_donor = querylength - rightpos /*- nmismatches_shortexon_right*/;
- amb_length_acceptor = leftpos /*- nmismatches_shortexon_left*/;
- segmentm->usedp = true;
- hits = List_push(hits,(void *) Stage3end_new_shortexon(&(*found_score),/*donor*/NULL,/*acceptor*/NULL,shortexon,
- Doublelist_max(probs_donor),Substring_siteA_prob(shortexon),
- Substring_siteD_prob(shortexon),Doublelist_max(probs_acceptor),
- amb_length_donor,amb_length_acceptor,
- ambcoords_donor,ambcoords_acceptor,
- /*amb_knowni_donor*/splicesites_i_right,/*amb_knowni_acceptor*/splicesites_i_left,
- /*amb_nmismatches_donor*/nmismatches_list_right,/*amb_nmismatches_acceptor*/nmismatches_list_left,
- /*amb_probs_donor*/probs_donor,/*amb_probs_acceptor*/probs_acceptor,
- /*copy_donor_p*/false,/*copy_acceptor_p*/false,/*copy_shortexon_p*/false,
- splicing_penalty,querylength,sensedir,/*sarrayp*/false));
- Doublelist_free(&probs_donor);
- Doublelist_free(&probs_acceptor);
-#ifdef LARGE_GENOMES
- Uint8list_free(&ambcoords_donor);
- Uint8list_free(&ambcoords_acceptor);
-#else
- Uintlist_free(&ambcoords_donor);
- Uintlist_free(&ambcoords_acceptor);
-#endif
- }
-
- } else if (ambp_left == true && ambp_right == false) {
- debug4k(printf("ambp_left true, ambp_right false\n"));
- best_right_j = Intlist_head(splicesites_i_right);
-
- debug4k(printf("shortexon with amb_donor at %d (%llu) ... acceptor at %d (%llu)\n",
- j1,(unsigned long long) splicesites[j1],j2,(unsigned long long) splicesites[j2]));
- shortexon = Substring_new_shortexon(/*acceptor_coord*/splicesites[j2],/*acceptor_knowni*/j2,
- /*donor_coord*/splicesites[j1],/*donor_knowni*/j1,
- /*acceptor_pos*/rightpos,/*donor_pos*/leftpos,nmismatches_shortexon_middle,
- /*acceptor_prob*/2.0,/*donor_prob*/2.0,
- /*left*/segmentm_left,query_compress,
- querylength,/*plusp*/false,genestrand,
- sensedir,/*acceptor_ambp*/false,/*donor_ambp*/true,
- segmentm->chrnum,segmentm->chroffset,segmentm->chrhigh,segmentm->chrlength);
-
- debug4k(printf("donor at %d (%llu)\n",best_right_j,(unsigned long long) splicesites[best_right_j]));
- donor = Substring_new_donor(/*donor_coord*/splicesites[best_right_j],/*donor_knowni*/best_right_j,
- /*splice_pos*/rightpos,/*substring_querystart*/0,/*substring_queryend*/querylength,
- nmismatches_shortexon_right,
- /*prob*/2.0,/*left*/splicesites[best_right_j]-rightpos,
- query_compress,querylength,/*plusp*/false,genestrand,
- /*sensedir*/SENSE_ANTI,segmentm->chrnum,
- segmentm->chroffset,segmentm->chrhigh,segmentm->chrlength);
-
- if (donor == NULL || shortexon == NULL) {
- if (donor != NULL) Substring_free(&donor);
- if (shortexon != NULL) Substring_free(&shortexon);
- } else {
- ambcoords_acceptor = lookup_splicesites(&probs_acceptor,splicesites_i_left,splicesites);
- amb_length_acceptor = leftpos /*- nmismatches_shortexon_left*/;
- segmentm->usedp = true;
- hits = List_push(hits,(void *) Stage3end_new_shortexon(&(*found_score),donor,/*acceptor*/NULL,shortexon,
- Substring_chimera_prob(donor),Substring_siteA_prob(shortexon),
- Substring_siteD_prob(shortexon),Doublelist_max(probs_acceptor),
- /*amb_length_donor*/0,amb_length_acceptor,
- /*ambcoords_donor*/NULL,ambcoords_acceptor,
- /*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/splicesites_i_left,
- /*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/nmismatches_list_left,
- /*amb_probs_donor*/NULL,/*amb_probs_acceptor*/probs_acceptor,
- /*copy_donor_p*/false,/*copy_acceptor_p*/false,/*copy_shortexon_p*/false,
- splicing_penalty,querylength,sensedir,/*sarrayp*/false));
- Doublelist_free(&probs_acceptor);
-#ifdef LARGE_GENOMES
- Uint8list_free(&ambcoords_acceptor);
-#else
- Uintlist_free(&ambcoords_acceptor);
-#endif
- }
-
- } else if (ambp_left == false && ambp_right == true) {
- debug4k(printf("ambp_left false, ambp_right true\n"));
- best_left_j = Intlist_head(splicesites_i_left);
-
- debug4k(printf("acceptor at %d (%llu)\n",best_left_j,(unsigned long long) splicesites[best_left_j]));
- acceptor = Substring_new_acceptor(/*acceptor_coord*/splicesites[best_left_j],/*acceptor_knowni*/best_left_j,
- /*splice_pos*/leftpos,/*substring_querystart*/0,/*substring_queryend*/querylength,
- nmismatches_shortexon_left,
- /*prob*/2.0,/*left*/splicesites[best_left_j]-leftpos,
- query_compress,querylength,/*plusp*/false,genestrand,
- /*sensedir*/SENSE_ANTI,segmentm->chrnum,
- segmentm->chroffset,segmentm->chrhigh,segmentm->chrlength);
-
- debug4k(printf("shortexon with donor at %d (%llu) ... amb_acceptor at %d (%llu)\n",
- j2,(unsigned long long) splicesites[j2],j1,(unsigned long long) plicesites[j1]));
- shortexon = Substring_new_shortexon(/*acceptor_coord*/splicesites[j2],/*acceptor_knowni*/j2,
- /*donor_coord*/splicesites[j1],/*donor_knowni*/j1,
- /*acceptor_pos*/rightpos,/*donor_pos*/leftpos,nmismatches_shortexon_middle,
- /*acceptor_prob*/2.0,/*donor_prob*/2.0,
- /*left*/segmentm_left,query_compress,
- querylength,/*plusp*/false,genestrand,
- sensedir,/*acceptor_ambp*/true,/*donor_ambp*/false,
- segmentm->chrnum,segmentm->chroffset,segmentm->chrhigh,segmentm->chrlength);
-
- if (shortexon == NULL || acceptor == NULL) {
- if (shortexon != NULL) Substring_free(&shortexon);
- if (acceptor != NULL) Substring_free(&acceptor);
- } else {
- debug4k(printf("ambp_left false, ambp_right true: New splice at left %llu\n",
- (unsigned long long) segmentm_left));
- ambcoords_donor = lookup_splicesites(&probs_donor,splicesites_i_right,splicesites);
- amb_length_donor = querylength - rightpos /*- nmismatches_shortexon_right*/;
- segmentm->usedp = true;
- hits = List_push(hits,(void *) Stage3end_new_shortexon(&(*found_score),/*donor*/NULL,acceptor,shortexon,
- Doublelist_max(probs_donor),Substring_siteA_prob(shortexon),
- Substring_siteD_prob(shortexon),Substring_chimera_prob(acceptor),
- amb_length_donor,/*amb_length_acceptor*/0,
- ambcoords_donor,/*ambcoords_acceptor*/NULL,
- /*amb_knowni_donor*/splicesites_i_right,/*amb_knowni_acceptor*/NULL,
- /*amb_nmismatches_donor*/nmismatches_list_right,/*amb_nmismatches_acceptor*/NULL,
- /*amb_probs_donor*/probs_donor,/*amb_probs_acceptor*/NULL,
- /*copy_donor_p*/false,/*copy_acceptor_p*/false,/*copy_shortexon_p*/false,
- splicing_penalty,querylength,sensedir,/*sarrayp*/false));
- Doublelist_free(&probs_donor);
-#ifdef LARGE_GENOMES
- Uint8list_free(&ambcoords_donor);
-#else
- Uintlist_free(&ambcoords_donor);
-#endif
- }
-
- } else { /* ambp_left == false && ambp_right == false */
- best_left_j = Intlist_head(splicesites_i_left);
- best_right_j = Intlist_head(splicesites_i_right);
- acceptor = Substring_new_acceptor(/*acceptor_coord*/splicesites[best_left_j],/*acceptor_knowni*/best_left_j,
- /*splice_pos*/leftpos,/*substring_querystart*/0,/*substring_queryend*/querylength,
- nmismatches_shortexon_left,
- /*prob*/2.0,/*left*/splicesites[best_left_j]-leftpos,
- query_compress,querylength,/*plusp*/false,genestrand,
- /*sensedir*/SENSE_ANTI,segmentm->chrnum,
- segmentm->chroffset,segmentm->chrhigh,segmentm->chrlength);
-
- shortexon = Substring_new_shortexon(/*acceptor_coord*/splicesites[j2],/*acceptor_knowni*/j2,
- /*donor_coord*/splicesites[j1],/*donor_knowni*/j1,
- /*acceptor_pos*/rightpos,/*donor_pos*/leftpos,
- nmismatches_shortexon_middle,/*acceptor_prob*/2.0,/*donor_prob*/2.0,
- /*left*/segmentm_left,query_compress,
- querylength,/*plusp*/false,genestrand,
- sensedir,/*acceptor_ambp*/false,/*donor_ambp*/false,
- segmentm->chrnum,segmentm->chroffset,segmentm->chrhigh,segmentm->chrlength);
-
- donor = Substring_new_donor(/*donor_coord*/splicesites[best_right_j],/*donor_knowni*/best_right_j,
- /*splice_pos*/rightpos,/*substring_querystart*/0,/*substring_queryend*/querylength,
- nmismatches_shortexon_right,
- /*prob*/2.0,/*left*/splicesites[best_right_j]-rightpos,
- query_compress,querylength,/*plusp*/false,genestrand,
- /*sensedir*/SENSE_ANTI,segmentm->chrnum,
- segmentm->chroffset,segmentm->chrhigh,segmentm->chrlength);
-
- if (acceptor == NULL || shortexon == NULL || donor == NULL) {
- if (acceptor != NULL) Substring_free(&acceptor);
- if (shortexon != NULL) Substring_free(&shortexon);
- if (donor != NULL) Substring_free(&donor);
- } else {
- debug4k(printf("New shortexon at left %llu\n",(unsigned long long) segmentm_left));
- segmentm->usedp = true;
- hits = List_push(hits,(void *) Stage3end_new_shortexon(&(*found_score),donor,acceptor,shortexon,
- Substring_chimera_prob(donor),Substring_siteA_prob(shortexon),
- Substring_siteD_prob(shortexon),Substring_chimera_prob(acceptor),
- /*amb_length_donor*/0,/*amb_length_acceptor*/0,
- /*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
- /*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
- /*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
- /*amb_probs_donor*/NULL,/*amb_probs_acceptor*/NULL,
- /*copy_donor_p*/false,/*copy_acceptor_p*/false,/*copy_shortexon_p*/false,
- splicing_penalty,querylength,sensedir,/*sarrayp*/false));
- }
- }
- Intlist_free(&nmismatches_list_right);
- Intlist_free(&splicesites_i_right);
- }
- }
- }
- Intlist_free(&nmismatches_list_left);
- Intlist_free(&splicesites_i_left);
- }
- }
- }
- debug4k(printf("End of case 2\n"));
- }
- /* End of known splicesites, segment i */
- }
- }
-
- debug4k(printf("Finished find_known_doublesplices with %d hits\n",List_length(hits)));
- return hits;
-}
-#endif
-
-
static void
find_spliceends_shortend (List_T **shortend_donors, List_T **shortend_antidonors,
@@ -10580,12 +9568,6 @@ find_spliceends_shortend (List_T **shortend_donors, List_T **shortend_antidonors
int nmismatches, jstart, jend, j;
int splice_pos;
-#ifdef HAVE_ALLOCA
- int *mismatch_positions = (int *) ALLOCA((querylength+1)*sizeof(int));
-#else
- int mismatch_positions[MAX_READLENGTH+1];
-#endif
-
int nmismatches_left, nmismatches_right;
int *floors_from_neg3, *floors_to_pos3;
int sensedir;
@@ -10595,6 +9577,18 @@ find_spliceends_shortend (List_T **shortend_donors, List_T **shortend_antidonors
int i;
#endif
+ int *mismatch_positions;
+
+#ifdef HAVE_ALLOCA
+ if (querylength <= MAX_STACK_READLENGTH) {
+ mismatch_positions = (int *) ALLOCA((querylength+1)*sizeof(int));
+ } else {
+ mismatch_positions = (int *) MALLOC((querylength+1)*sizeof(int));
+ }
+#else
+ mismatch_positions = (int *) MALLOC((querylength+1)*sizeof(int));
+#endif
+
debug4e(printf("Entering find_spliceends_shortend with %d anchor segments\n",nanchors));
if (floors != NULL) {
@@ -10684,7 +9678,7 @@ find_spliceends_shortend (List_T **shortend_donors, List_T **shortend_antidonors
sensedir,segment->chrnum,segment->chroffset,
segment->chrhigh,segment->chrlength)) != NULL) {
debug4e(printf("=> %s donor: known at %d (%d mismatches)\n",
- plusp == true ? "plus" : "minus",Substring_chimera_pos(hit),nmismatches));
+ plusp == true ? "plus" : "minus",Substring_siteD_pos(hit),nmismatches));
(*shortend_donors)[nmismatches] = List_push((*shortend_donors)[nmismatches],(void *) hit);
}
@@ -10701,7 +9695,7 @@ find_spliceends_shortend (List_T **shortend_donors, List_T **shortend_antidonors
sensedir,segment->chrnum,segment->chroffset,
segment->chrhigh,segment->chrlength)) != NULL) {
debug4e(printf("=> %s antiacceptor : known at %d (%d mismatches)\n",
- plusp == true ? "plus" : "minus",Substring_chimera_pos(hit),nmismatches));
+ plusp == true ? "plus" : "minus",Substring_siteA_pos(hit),nmismatches));
(*shortend_antiacceptors)[nmismatches] = List_push((*shortend_antiacceptors)[nmismatches],(void *) hit);
}
}
@@ -10773,7 +9767,7 @@ find_spliceends_shortend (List_T **shortend_donors, List_T **shortend_antidonors
sensedir,segment->chrnum,segment->chroffset,
segment->chrhigh,segment->chrlength)) != NULL) {
debug4e(printf("=> %s acceptor: known at %d (%d mismatches)\n",
- plusp == true ? "plus" : "minus",Substring_chimera_pos(hit),nmismatches));
+ plusp == true ? "plus" : "minus",Substring_siteA_pos(hit),nmismatches));
(*shortend_acceptors)[nmismatches] = List_push((*shortend_acceptors)[nmismatches],(void *) hit);
}
@@ -10790,7 +9784,7 @@ find_spliceends_shortend (List_T **shortend_donors, List_T **shortend_antidonors
sensedir,segment->chrnum,segment->chroffset,
segment->chrhigh,segment->chrlength)) != NULL) {
debug4e(printf("=> %s antidonor: known at %d (%d mismatches)\n",
- plusp == true ? "plus" : "minus",Substring_chimera_pos(hit),nmismatches));
+ plusp == true ? "plus" : "minus",Substring_siteD_pos(hit),nmismatches));
(*shortend_antidonors)[nmismatches] = List_push((*shortend_antidonors)[nmismatches],(void *) hit);
}
}
@@ -10800,6 +9794,16 @@ find_spliceends_shortend (List_T **shortend_donors, List_T **shortend_antidonors
}
}
+#ifdef HAVE_ALLOCA
+ if (querylength <= MAX_STACK_READLENGTH) {
+ FREEA(mismatch_positions);
+ } else {
+ FREE(mismatch_positions);
+ }
+#else
+ FREE(mismatch_positions);
+#endif
+
return;
}
@@ -10827,14 +9831,18 @@ find_spliceends_distant_dna_plus (List_T **distant_startfrags, List_T **distant_
int *floors_from_neg3, *floors_to_pos3;
int splice_pos_start, splice_pos_end;
+ int *mismatch_positions;
#ifdef HAVE_ALLOCA
- int *mismatch_positions = (int *) ALLOCA((querylength+1)*sizeof(int));
+ if (querylength <= MAX_STACK_READLENGTH) {
+ mismatch_positions = (int *) ALLOCA((querylength+1)*sizeof(int));
+ } else {
+ mismatch_positions = (int *) MALLOC((querylength+1)*sizeof(int));
+ }
#else
- int mismatch_positions[MAX_READLENGTH+1];
+ mismatch_positions = (int *) MALLOC((querylength+1)*sizeof(int));
#endif
-
debug4e(printf("Entering find_spliceends_distant_dna with %d anchor segments\n",nanchors));
if (floors != NULL) {
@@ -10901,7 +9909,7 @@ find_spliceends_distant_dna_plus (List_T **distant_startfrags, List_T **distant_
querylength,/*plusp*/true,genestrand,
segment->chrnum,segment->chroffset,
segment->chrhigh,segment->chrlength)) != NULL) {
- debug4e(printf("=> plus startfrag: at %d (%d mismatches)\n",Substring_chimera_pos(hit),nmismatches));
+ debug4e(printf("=> plus startfrag: at %d (%d mismatches)\n",Substring_siteN_pos(hit),nmismatches));
debug4e(printf("q: %s\ng: %s\n",queryptr,gbuffer));
(*distant_startfrags)[nmismatches] = List_push((*distant_startfrags)[nmismatches],(void *) hit);
}
@@ -10957,7 +9965,7 @@ find_spliceends_distant_dna_plus (List_T **distant_startfrags, List_T **distant_
querylength,/*plusp*/true,genestrand,
segment->chrnum,segment->chroffset,
segment->chrhigh,segment->chrlength)) != NULL) {
- debug4e(printf("=> plus endfrag: at %d (%d mismatches)\n",Substring_chimera_pos(hit),nmismatches));
+ debug4e(printf("=> plus endfrag: at %d (%d mismatches)\n",Substring_siteN_pos(hit),nmismatches));
debug4e(printf("q: %s\ng: %s\n",queryptr,gbuffer));
(*distant_endfrags)[nmismatches] = List_push((*distant_endfrags)[nmismatches],(void *) hit);
}
@@ -10973,6 +9981,16 @@ find_spliceends_distant_dna_plus (List_T **distant_startfrags, List_T **distant_
}
}
+#ifdef HAVE_ALLOCA
+ if (querylength <= MAX_STACK_READLENGTH) {
+ FREEA(mismatch_positions);
+ } else {
+ FREE(mismatch_positions);
+ }
+#else
+ FREE(mismatch_positions);
+#endif
+
return;
}
@@ -11000,11 +10018,16 @@ find_spliceends_distant_dna_minus (List_T **distant_startfrags, List_T **distant
int *floors_from_neg3, *floors_to_pos3;
int splice_pos_start, splice_pos_end;
+ int *mismatch_positions;
#ifdef HAVE_ALLOCA
- int *mismatch_positions = (int *) ALLOCA((querylength+1)*sizeof(int));
+ if (querylength <= MAX_STACK_READLENGTH) {
+ mismatch_positions = (int *) ALLOCA((querylength+1)*sizeof(int));
+ } else {
+ mismatch_positions = (int *) MALLOC((querylength+1)*sizeof(int));
+ }
#else
- int mismatch_positions[MAX_READLENGTH+1];
+ mismatch_positions = (int *) MALLOC((querylength+1)*sizeof(int));
#endif
@@ -11074,7 +10097,7 @@ find_spliceends_distant_dna_minus (List_T **distant_startfrags, List_T **distant
querylength,/*plusp*/false,genestrand,
segment->chrnum,segment->chroffset,
segment->chrhigh,segment->chrlength)) != NULL) {
- debug4e(printf("=> minus endfrag: at %d (%d mismatches)\n",Substring_chimera_pos(hit),nmismatches));
+ debug4e(printf("=> minus endfrag: at %d (%d mismatches)\n",Substring_siteN_pos(hit),nmismatches));
debug4e(printf("q: %s\ng: %s\n",queryptr,gbuffer));
(*distant_endfrags)[nmismatches] = List_push((*distant_endfrags)[nmismatches],(void *) hit);
}
@@ -11130,7 +10153,7 @@ find_spliceends_distant_dna_minus (List_T **distant_startfrags, List_T **distant
querylength,/*plusp*/false,genestrand,
segment->chrnum,segment->chroffset,
segment->chrhigh,segment->chrlength)) != NULL) {
- debug4e(printf("=> minus startfrag: at %d (%d mismatches)\n",Substring_chimera_pos(hit),nmismatches));
+ debug4e(printf("=> minus startfrag: at %d (%d mismatches)\n",Substring_siteN_pos(hit),nmismatches));
debug4e(printf("q: %s\ng: %s\n",queryptr,gbuffer));
(*distant_startfrags)[nmismatches] = List_push((*distant_startfrags)[nmismatches],(void *) hit);
}
@@ -11145,6 +10168,16 @@ find_spliceends_distant_dna_minus (List_T **distant_startfrags, List_T **distant
}
}
+#ifdef HAVE_ALLOCA
+ if (querylength <= MAX_STACK_READLENGTH) {
+ FREEA(mismatch_positions);
+ } else {
+ FREE(mismatch_positions);
+ }
+#else
+ FREE(mismatch_positions);
+#endif
+
return;
}
@@ -11180,34 +10213,55 @@ find_spliceends_distant_rna (List_T **distant_donors, List_T **distant_antidonor
int sensedir;
int splice_pos_start, splice_pos_end;
-
-#ifdef HAVE_ALLOCA
- int *mismatch_positions = (int *) ALLOCA((querylength+1)*sizeof(int));
- int *segment_donor_knownpos = (int *) ALLOCA((querylength+1)*sizeof(int));
- int *segment_acceptor_knownpos = (int *) ALLOCA((querylength+1)*sizeof(int));
- int *segment_antidonor_knownpos = (int *) ALLOCA((querylength+1)*sizeof(int));
- int *segment_antiacceptor_knownpos = (int *) ALLOCA((querylength+1)*sizeof(int));
- int *segment_donor_knowni = (int *) ALLOCA((querylength+1)*sizeof(int));
- int *segment_acceptor_knowni = (int *) ALLOCA((querylength+1)*sizeof(int));
- int *segment_antidonor_knowni = (int *) ALLOCA((querylength+1)*sizeof(int));
- int *segment_antiacceptor_knowni = (int *) ALLOCA((querylength+1)*sizeof(int));
- int *positions_alloc = (int *) ALLOCA((querylength+1)*sizeof(int));
- int *knowni_alloc = (int *) ALLOCA((querylength+1)*sizeof(int));
-#else
- int mismatch_positions[MAX_READLENGTH+1];
- int segment_donor_knownpos[MAX_READLENGTH+1], segment_acceptor_knownpos[MAX_READLENGTH+1];
- int segment_antidonor_knownpos[MAX_READLENGTH+1], segment_antiacceptor_knownpos[MAX_READLENGTH+1];
- int segment_donor_knowni[MAX_READLENGTH+1], segment_acceptor_knowni[MAX_READLENGTH+1];
- int segment_antidonor_knowni[MAX_READLENGTH+1], segment_antiacceptor_knowni[MAX_READLENGTH+1];
- int positions_alloc[MAX_READLENGTH+1];
- int knowni_alloc[MAX_READLENGTH+1];
-#endif
-
int segment_donor_nknown, segment_acceptor_nknown, segment_antidonor_nknown, segment_antiacceptor_nknown;
int donori_nsites, acceptorj_nsites, antiacceptori_nsites, antidonorj_nsites;
int *donori_positions, *acceptorj_positions, *antiacceptori_positions, *antidonorj_positions;
int *donori_knowni, *acceptorj_knowni, *antiacceptori_knowni, *antidonorj_knowni;
+ int *mismatch_positions;
+ int *segment_donor_knownpos, *segment_acceptor_knownpos, *segment_antidonor_knownpos, *segment_antiacceptor_knownpos,
+ *segment_donor_knowni, *segment_acceptor_knowni, *segment_antidonor_knowni, *segment_antiacceptor_knowni;
+ int *positions_alloc, *knowni_alloc;
+
+#ifdef HAVE_ALLOCA
+ if (querylength <= MAX_STACK_READLENGTH) {
+ mismatch_positions = (int *) ALLOCA((querylength+1)*sizeof(int));
+ segment_donor_knownpos = (int *) ALLOCA((querylength+1)*sizeof(int));
+ segment_acceptor_knownpos = (int *) ALLOCA((querylength+1)*sizeof(int));
+ segment_antidonor_knownpos = (int *) ALLOCA((querylength+1)*sizeof(int));
+ segment_antiacceptor_knownpos = (int *) ALLOCA((querylength+1)*sizeof(int));
+ segment_donor_knowni = (int *) ALLOCA((querylength+1)*sizeof(int));
+ segment_acceptor_knowni = (int *) ALLOCA((querylength+1)*sizeof(int));
+ segment_antidonor_knowni = (int *) ALLOCA((querylength+1)*sizeof(int));
+ segment_antiacceptor_knowni = (int *) ALLOCA((querylength+1)*sizeof(int));
+ positions_alloc = (int *) ALLOCA((querylength+1)*sizeof(int));
+ knowni_alloc = (int *) ALLOCA((querylength+1)*sizeof(int));
+ } else {
+ mismatch_positions = (int *) MALLOC((querylength+1)*sizeof(int));
+ segment_donor_knownpos = (int *) MALLOC((querylength+1)*sizeof(int));
+ segment_acceptor_knownpos = (int *) MALLOC((querylength+1)*sizeof(int));
+ segment_antidonor_knownpos = (int *) MALLOC((querylength+1)*sizeof(int));
+ segment_antiacceptor_knownpos = (int *) MALLOC((querylength+1)*sizeof(int));
+ segment_donor_knowni = (int *) MALLOC((querylength+1)*sizeof(int));
+ segment_acceptor_knowni = (int *) MALLOC((querylength+1)*sizeof(int));
+ segment_antidonor_knowni = (int *) MALLOC((querylength+1)*sizeof(int));
+ segment_antiacceptor_knowni = (int *) MALLOC((querylength+1)*sizeof(int));
+ positions_alloc = (int *) MALLOC((querylength+1)*sizeof(int));
+ knowni_alloc = (int *) MALLOC((querylength+1)*sizeof(int));
+ }
+#else
+ mismatch_positions = (int *) MALLOC((querylength+1)*sizeof(int));
+ segment_donor_knownpos = (int *) MALLOC((querylength+1)*sizeof(int));
+ segment_acceptor_knownpos = (int *) MALLOC((querylength+1)*sizeof(int));
+ segment_antidonor_knownpos = (int *) MALLOC((querylength+1)*sizeof(int));
+ segment_antiacceptor_knownpos = (int *) MALLOC((querylength+1)*sizeof(int));
+ segment_donor_knowni = (int *) MALLOC((querylength+1)*sizeof(int));
+ segment_acceptor_knowni = (int *) MALLOC((querylength+1)*sizeof(int));
+ segment_antidonor_knowni = (int *) MALLOC((querylength+1)*sizeof(int));
+ segment_antiacceptor_knowni = (int *) MALLOC((querylength+1)*sizeof(int));
+ positions_alloc = (int *) MALLOC((querylength+1)*sizeof(int));
+ knowni_alloc = (int *) MALLOC((querylength+1)*sizeof(int));
+#endif
debug4e(printf("Entering find_spliceends_distant_rna with %d anchor segments\n",nanchors));
@@ -11220,7 +10274,12 @@ find_spliceends_distant_rna (List_T **distant_donors, List_T **distant_antidonor
assert(segment->diagonal != (Univcoord_T) -1);
segment_left = segment->diagonal - querylength; /* FORMULA: Corresponds to querypos 0 */
- last_querypos = segment->querypos3 + index1part;
+ if ((first_querypos = segment->querypos5 - (index1interval - 1)) < 0) {
+ first_querypos = 0;
+ }
+ if ((last_querypos = segment->querypos3 + index1part + (index1interval - 1)) > querylength) {
+ last_querypos = querylength;
+ }
debug4e(printf("find_spliceends_distant_rna: Checking up to %d mismatches at diagonal %llu (querypos %d..%d) - querylength %d = %llu, floors %d and %d, plusp %d\n",
max_mismatches_allowed,(unsigned long long) segment->diagonal,
@@ -11239,7 +10298,7 @@ find_spliceends_distant_rna (List_T **distant_donors, List_T **distant_antidonor
/* Find splices on genomic right */
if (plusp) {
/* ? require that floors_from_neg3[segment->querypos5] <= max_mismatches_allowed */
- if (segment->querypos5 < index1part && last_querypos < query_lastpos) {
+ if (first_querypos < index1part && last_querypos < query_lastpos) {
/* genomic left anchor */
debug4e(printf("Searching genomic right: plus genomic left anchor\n"));
nmismatches_left = Genome_mismatches_left(mismatch_positions,max_mismatches_allowed,
@@ -11247,7 +10306,7 @@ find_spliceends_distant_rna (List_T **distant_donors, List_T **distant_antidonor
#if 0
/*pos5*/0,/*pos3*/querylength,
#else
- /*pos5 (was 0)*/segment->querypos5,/*pos3*/querylength,
+ /*pos5 (was 0)*/first_querypos,/*pos3*/querylength,
#endif
plusp,genestrand);
debug4e(
@@ -11262,7 +10321,7 @@ find_spliceends_distant_rna (List_T **distant_donors, List_T **distant_antidonor
#if 0
splice_pos_start = index1part;
#else
- splice_pos_start = segment->querypos5;
+ splice_pos_start = first_querypos;
#endif
if (nmismatches_left <= max_mismatches_allowed) {
splice_pos_end = querylength - 1;
@@ -11270,19 +10329,19 @@ find_spliceends_distant_rna (List_T **distant_donors, List_T **distant_antidonor
splice_pos_end = querylength - 1;
}
- } else if (segment->querypos5 > index1part && last_querypos > query_lastpos) {
+ } else if (first_querypos > index1part && last_querypos > query_lastpos) {
/* genomic right anchor. No need to find splices on genomic right */
debug4e(printf("Searching genomic right: plus genomic right anchor\n"));
splice_pos_start = querylength;
splice_pos_end = 0;
- } else if (segment->querypos5 > index1part && last_querypos < query_lastpos &&
+ } else if (first_querypos > index1part && last_querypos < query_lastpos &&
segment->spliceable_low_p == true) {
/* middle anchor */
debug4e(printf("Searching genomic right: plus middle anchor\n"));
nmismatches_left = Genome_mismatches_left(mismatch_positions,max_mismatches_allowed,
query_compress,/*left*/segment_left,
- /*pos5*/segment->querypos5,/*pos3*/querylength,
+ /*pos5*/first_querypos,/*pos3*/querylength,
plusp,genestrand);
debug4e(
printf("%d mismatches on left (%d allowed) at:",
@@ -11293,7 +10352,7 @@ find_spliceends_distant_rna (List_T **distant_donors, List_T **distant_antidonor
printf("\n");
);
- splice_pos_start = segment->querypos5;
+ splice_pos_start = first_querypos;
if (nmismatches_left <= max_mismatches_allowed) {
splice_pos_end = querylength - 1;
} else if ((splice_pos_end = mismatch_positions[nmismatches_left-1]) > querylength - 1) {
@@ -11309,13 +10368,13 @@ find_spliceends_distant_rna (List_T **distant_donors, List_T **distant_antidonor
} else {
/* ? require that floors_to_pos3[segment->querypos3] <= max_mismatches_allowed */
- if (segment->querypos5 < index1part && last_querypos < query_lastpos) {
+ if (first_querypos < index1part && last_querypos < query_lastpos) {
/* genomic right anchor. No need to find splices on genomic right */
debug4e(printf("Searching genomic right: minus genomic right anchor\n"));
splice_pos_start = querylength;
splice_pos_end = 0;
- } else if (segment->querypos5 > index1part && last_querypos > query_lastpos) {
+ } else if (first_querypos > index1part && last_querypos > query_lastpos) {
/* genomic left anchor */
debug4e(printf("Searching genomic right: minus genomic left anchor\n"));
nmismatches_left = Genome_mismatches_left(mismatch_positions,max_mismatches_allowed,
@@ -11347,13 +10406,13 @@ find_spliceends_distant_rna (List_T **distant_donors, List_T **distant_antidonor
splice_pos_end = querylength - 1;
}
- } else if (segment->querypos5 > index1part && last_querypos < query_lastpos &&
+ } else if (first_querypos > index1part && last_querypos < query_lastpos &&
segment->spliceable_low_p == true) {
/* middle anchor */
debug4e(printf("Searching genomic right: minus middle anchor\n"));
nmismatches_left = Genome_mismatches_left(mismatch_positions,max_mismatches_allowed,
query_compress,/*left*/segment_left,
- /*pos5*/querylength - segment->querypos3 - index1part,
+ /*pos5*/querylength - last_querypos,
/*pos3*/querylength,plusp,genestrand);
debug4e(
printf("%d mismatches on left (%d allowed) at:",
@@ -11442,7 +10501,7 @@ find_spliceends_distant_rna (List_T **distant_donors, List_T **distant_antidonor
(unsigned long long) segment_left,splice_pos,nmismatches,splice_pos_end));
if ((hit = Substring_new_donor(/*donor_coord*/segment_left + splice_pos,/*donor_knowni*/donori_knowni[i],
- splice_pos,/*substring_querystart*/segment->querypos5,
+ splice_pos,/*substring_querystart*/first_querypos,
/*substring_queryend*/last_querypos,
nmismatches,/*prob*/2.0,/*left*/segment_left,query_compress,
querylength,plusp,genestrand,
@@ -11450,7 +10509,7 @@ find_spliceends_distant_rna (List_T **distant_donors, List_T **distant_antidonor
segment->chrhigh,segment->chrlength)) != NULL) {
debug4e(printf("=> %s donor: %f at %d (%d mismatches) %d..%d\n",
plusp == true ? "plus" : "minus",Maxent_hr_donor_prob(segment_left + splice_pos,segment->chroffset),
- Substring_chimera_pos(hit),nmismatches,Substring_querystart(hit),Substring_queryend(hit)));
+ Substring_siteD_pos(hit),nmismatches,Substring_querystart(hit),Substring_queryend(hit)));
debug4e(printf("q: %s\ng: %s\n",queryptr,gbuffer));
(*distant_donors)[nmismatches] = List_push((*distant_donors)[nmismatches],(void *) hit);
}
@@ -11463,14 +10522,14 @@ find_spliceends_distant_rna (List_T **distant_donors, List_T **distant_antidonor
debug4e(printf("Novel donor for segment at %llu, splice_pos %d (%d mismatches), stopi = %d\n",
(unsigned long long) segment_left,splice_pos,nmismatches,splice_pos_end));
if ((hit = Substring_new_donor(/*donor_coord*/segment_left + splice_pos,/*donor_knowni*/-1,
- splice_pos,/*substring_querystart*/segment->querypos5,
+ splice_pos,/*substring_querystart*/first_querypos,
/*substring_queryend*/last_querypos,
nmismatches,prob,/*left*/segment_left,query_compress,
querylength,plusp,genestrand,
sensedir,segment->chrnum,segment->chroffset,
segment->chrhigh,segment->chrlength)) != NULL) {
debug4e(printf("=> %s donor: %f at %d (%d mismatches) %d..%d\n",
- plusp == true ? "plus" : "minus",prob,Substring_chimera_pos(hit),nmismatches,
+ plusp == true ? "plus" : "minus",prob,Substring_siteD_pos(hit),nmismatches,
Substring_querystart(hit),Substring_queryend(hit)));
debug4e(printf("q: %s\ng: %s\n",queryptr,gbuffer));
(*distant_donors)[nmismatches] = List_push((*distant_donors)[nmismatches],(void *) hit);
@@ -11523,7 +10582,7 @@ find_spliceends_distant_rna (List_T **distant_donors, List_T **distant_antidonor
debug4e(printf("Known antiacceptor for segment at %llu, splice_pos %d (%d mismatches), stopi = %d\n",
(unsigned long long) segment_left,splice_pos,nmismatches,splice_pos_end));
if ((hit = Substring_new_acceptor(/*acceptor_coord*/segment_left + splice_pos,/*acceptor_knowni*/antiacceptori_knowni[i],
- splice_pos,/*substring_querystart*/segment->querypos5,
+ splice_pos,/*substring_querystart*/first_querypos,
/*substring_queryend*/last_querypos,
nmismatches,/*prob*/2.0,/*left*/segment_left,query_compress,
querylength,plusp,genestrand,
@@ -11531,7 +10590,7 @@ find_spliceends_distant_rna (List_T **distant_donors, List_T **distant_antidonor
segment->chrhigh,segment->chrlength)) != NULL) {
debug4e(printf("=> %s antiacceptor : %f at %d (%d mismatches) %d..%d\n",
plusp == true ? "plus" : "minus",Maxent_hr_antiacceptor_prob(segment_left + splice_pos,segment->chroffset),
- Substring_chimera_pos(hit),nmismatches,Substring_querystart(hit),Substring_queryend(hit)));
+ Substring_siteA_pos(hit),nmismatches,Substring_querystart(hit),Substring_queryend(hit)));
debug4e(printf("q: %s\ng: %s\n",queryptr,gbuffer));
(*distant_antiacceptors)[nmismatches] = List_push((*distant_antiacceptors)[nmismatches],(void *) hit);
}
@@ -11544,14 +10603,14 @@ find_spliceends_distant_rna (List_T **distant_donors, List_T **distant_antidonor
debug4e(printf("Novel antiacceptor for segment at %llu, splice_pos %d (%d mismatches), stopi = %d\n",
(unsigned long long) segment_left,splice_pos,nmismatches,splice_pos_end));
if ((hit = Substring_new_acceptor(/*acceptor_coord*/segment_left + splice_pos,/*acceptor_knowni*/-1,
- splice_pos,/*substring_querystart*/segment->querypos5,
+ splice_pos,/*substring_querystart*/first_querypos,
/*substring_queryend*/last_querypos,
nmismatches,prob,/*left*/segment_left,query_compress,
querylength,plusp,genestrand,
sensedir,segment->chrnum,segment->chroffset,
segment->chrhigh,segment->chrlength)) != NULL) {
debug4e(printf("=> %s antiacceptor : %f at %d (%d mismatches) %d..%d\n",
- plusp == true ? "plus" : "minus",prob,Substring_chimera_pos(hit),nmismatches,
+ plusp == true ? "plus" : "minus",prob,Substring_siteA_pos(hit),nmismatches,
Substring_querystart(hit),Substring_queryend(hit)));
debug4e(printf("q: %s\ng: %s\n",queryptr,gbuffer));
(*distant_antiacceptors)[nmismatches] = List_push((*distant_antiacceptors)[nmismatches],(void *) hit);
@@ -11568,13 +10627,13 @@ find_spliceends_distant_rna (List_T **distant_donors, List_T **distant_antidonor
/* Find splices on genomic left */
if (plusp) {
/* ? require that floors_to_pos3[segment->querypos3] <= max_mismatches_allowed */
- if (segment->querypos5 < index1part && last_querypos < query_lastpos) {
+ if (first_querypos < index1part && last_querypos < query_lastpos) {
/* genomic left anchor. No need to find splices on genomic left. */
debug4e(printf("Searching genomic left: plus genomic left anchor\n"));
splice_pos_start = querylength;
splice_pos_end = 0;
- } else if (segment->querypos5 > index1part && last_querypos > query_lastpos) {
+ } else if (first_querypos > index1part && last_querypos > query_lastpos) {
/* genomic right anchor */
debug4e(printf("Searching genomic left: plus genomic right anchor\n"));
nmismatches_right = Genome_mismatches_right(mismatch_positions,max_mismatches_allowed,
@@ -11604,7 +10663,7 @@ find_spliceends_distant_rna (List_T **distant_donors, List_T **distant_antidonor
splice_pos_start = 1;
}
- } else if (segment->querypos5 > index1part && last_querypos < query_lastpos &&
+ } else if (first_querypos > index1part && last_querypos < query_lastpos &&
segment->spliceable_high_p == true) {
/* middle anchor */
debug4e(printf("Searching genomic left: plus middle anchor\n"));
@@ -11636,7 +10695,7 @@ find_spliceends_distant_rna (List_T **distant_donors, List_T **distant_antidonor
} else {
/* ? require that floors_from_neg3[segment->querypos5] <= max_mismatches_allowed */
- if (segment->querypos5 < index1part && last_querypos < query_lastpos) {
+ if (first_querypos < index1part && last_querypos < query_lastpos) {
/* genomic right anchor */
debug4e(printf("Searching genomic left: minus genomic right anchor\n"));
nmismatches_right = Genome_mismatches_right(mismatch_positions,max_mismatches_allowed,
@@ -11644,7 +10703,7 @@ find_spliceends_distant_rna (List_T **distant_donors, List_T **distant_antidonor
#if 0
/*pos5*/0,/*pos3*/querylength,
#else
- /*pos5*/0,/*pos3 (was querylength)*/querylength - segment->querypos5,
+ /*pos5*/0,/*pos3 (was querylength)*/querylength - first_querypos,
#endif
plusp,genestrand);
debug4e(
@@ -11658,7 +10717,7 @@ find_spliceends_distant_rna (List_T **distant_donors, List_T **distant_antidonor
#if 0
splice_pos_end = query_lastpos;
#else
- splice_pos_end = querylength - segment->querypos5;
+ splice_pos_end = querylength - first_querypos;
#endif
if (nmismatches_right <= max_mismatches_allowed) {
splice_pos_start = 1;
@@ -11666,19 +10725,19 @@ find_spliceends_distant_rna (List_T **distant_donors, List_T **distant_antidonor
splice_pos_start = 1;
}
- } else if (segment->querypos5 > index1part && last_querypos > query_lastpos) {
+ } else if (first_querypos > index1part && last_querypos > query_lastpos) {
/* genomic left anchor. No need to find splices on genomic left. */
debug4e(printf("Searching genomic left: minus genomic left anchor\n"));
splice_pos_start = querylength;
splice_pos_end = 0;
- } else if (segment->querypos5 > index1part && last_querypos < query_lastpos &&
+ } else if (first_querypos > index1part && last_querypos < query_lastpos &&
segment->spliceable_high_p == true) {
/* middle anchor */
debug4e(printf("Searching genomic left: minus middle anchor\n"));
nmismatches_right = Genome_mismatches_right(mismatch_positions,max_mismatches_allowed,
query_compress,/*left*/segment_left,
- /*pos5*/0,/*pos3*/querylength - segment->querypos5,
+ /*pos5*/0,/*pos3*/querylength - first_querypos,
plusp,genestrand);
debug4e(
printf("%d mismatches on right (%d allowed) at:",nmismatches_right,max_mismatches_allowed);
@@ -11688,7 +10747,7 @@ find_spliceends_distant_rna (List_T **distant_donors, List_T **distant_antidonor
printf("\n");
);
- splice_pos_end = querylength - segment->querypos5;
+ splice_pos_end = querylength - first_querypos;
if (nmismatches_right <= max_mismatches_allowed) {
splice_pos_start = 1;
} else if ((splice_pos_start = mismatch_positions[nmismatches_right-1]) < 1) {
@@ -11774,7 +10833,7 @@ find_spliceends_distant_rna (List_T **distant_donors, List_T **distant_antidonor
segment->chrhigh,segment->chrlength)) != NULL) {
debug4e(printf("=> %s acceptor: %f at %d (%d mismatches) %d..%d\n",
plusp == true ? "plus" : "minus",Maxent_hr_acceptor_prob(segment_left + splice_pos,segment->chroffset),
- Substring_chimera_pos(hit),nmismatches,Substring_querystart(hit),Substring_queryend(hit)));
+ Substring_siteA_pos(hit),nmismatches,Substring_querystart(hit),Substring_queryend(hit)));
debug4e(printf("q: %s\ng: %s\n",queryptr,gbuffer));
(*distant_acceptors)[nmismatches] = List_push((*distant_acceptors)[nmismatches],(void *) hit);
}
@@ -11794,7 +10853,7 @@ find_spliceends_distant_rna (List_T **distant_donors, List_T **distant_antidonor
sensedir,segment->chrnum,segment->chroffset,
segment->chrhigh,segment->chrlength)) != NULL) {
debug4e(printf("=> %s acceptor: %f at %d (%d mismatches) %d..%d\n",
- plusp == true ? "plus" : "minus",prob,Substring_chimera_pos(hit),nmismatches,
+ plusp == true ? "plus" : "minus",prob,Substring_siteA_pos(hit),nmismatches,
Substring_querystart(hit),Substring_queryend(hit)));
debug4e(printf("q: %s\ng: %s\n",queryptr,gbuffer));
(*distant_acceptors)[nmismatches] = List_push((*distant_acceptors)[nmismatches],(void *) hit);
@@ -11855,7 +10914,7 @@ find_spliceends_distant_rna (List_T **distant_donors, List_T **distant_antidonor
segment->chrhigh,segment->chrlength)) != NULL) {
debug4e(printf("=> %s antidonor: %f at %d (%d mismatches) %d..%d\n",
plusp == true ? "plus" : "minus",Maxent_hr_antidonor_prob(segment_left + splice_pos,segment->chroffset),
- Substring_chimera_pos(hit),nmismatches,Substring_querystart(hit),Substring_queryend(hit)));
+ Substring_siteD_pos(hit),nmismatches,Substring_querystart(hit),Substring_queryend(hit)));
debug4e(printf("q: %s\ng: %s\n",queryptr,gbuffer));
(*distant_antidonors)[nmismatches] = List_push((*distant_antidonors)[nmismatches],(void *) hit);
}
@@ -11875,7 +10934,7 @@ find_spliceends_distant_rna (List_T **distant_donors, List_T **distant_antidonor
sensedir,segment->chrnum,segment->chroffset,
segment->chrhigh,segment->chrlength)) != NULL) {
debug4e(printf("=> %s antidonor: %f at %d (%d mismatches) %d..%d\n",
- plusp == true ? "plus" : "minus",prob,Substring_chimera_pos(hit),nmismatches,
+ plusp == true ? "plus" : "minus",prob,Substring_siteD_pos(hit),nmismatches,
Substring_querystart(hit),Substring_queryend(hit)));
debug4e(printf("q: %s\ng: %s\n",queryptr,gbuffer));
(*distant_antidonors)[nmismatches] = List_push((*distant_antidonors)[nmismatches],(void *) hit);
@@ -11890,6 +10949,45 @@ find_spliceends_distant_rna (List_T **distant_donors, List_T **distant_antidonor
}
}
+#ifdef HAVE_ALLOCA
+ if (querylength <= MAX_STACK_READLENGTH) {
+ FREEA(mismatch_positions);
+ FREEA(segment_donor_knownpos);
+ FREEA(segment_acceptor_knownpos);
+ FREEA(segment_antidonor_knownpos);
+ FREEA(segment_antiacceptor_knownpos);
+ FREEA(segment_donor_knowni);
+ FREEA(segment_acceptor_knowni);
+ FREEA(segment_antidonor_knowni);
+ FREEA(segment_antiacceptor_knowni);
+ FREEA(positions_alloc);
+ FREEA(knowni_alloc);
+ } else {
+ FREE(mismatch_positions);
+ FREE(segment_donor_knownpos);
+ FREE(segment_acceptor_knownpos);
+ FREE(segment_antidonor_knownpos);
+ FREE(segment_antiacceptor_knownpos);
+ FREE(segment_donor_knowni);
+ FREE(segment_acceptor_knowni);
+ FREE(segment_antidonor_knowni);
+ FREE(segment_antiacceptor_knowni);
+ FREE(positions_alloc);
+ FREE(knowni_alloc);
+#else
+ FREE(mismatch_positions);
+ FREE(segment_donor_knownpos);
+ FREE(segment_acceptor_knownpos);
+ FREE(segment_antidonor_knownpos);
+ FREE(segment_antiacceptor_knownpos);
+ FREE(segment_donor_knowni);
+ FREE(segment_acceptor_knowni);
+ FREE(segment_antidonor_knowni);
+ FREE(segment_antiacceptor_knowni);
+ FREE(positions_alloc);
+ FREE(knowni_alloc);
+#endif
+
return;
}
#endif
@@ -11922,7 +11020,7 @@ find_spliceends_distant_rna (List_T **distant_donors, List_T **distant_antidonor
Substring_T hit;
Univcoord_T segment_left;
int nmismatches, j, i;
- int splice_pos, last_querypos;
+ int splice_pos, first_querypos, last_querypos;
double prob;
int nmismatches_left, nmismatches_right;
@@ -11930,34 +11028,55 @@ find_spliceends_distant_rna (List_T **distant_donors, List_T **distant_antidonor
int sensedir;
int splice_pos_start, splice_pos_end;
-
-#ifdef HAVE_ALLOCA
- int *mismatch_positions = (int *) ALLOCA((querylength+1)*sizeof(int));
- int *segment_donor_knownpos = (int *) ALLOCA((querylength+1)*sizeof(int));
- int *segment_acceptor_knownpos = (int *) ALLOCA((querylength+1)*sizeof(int));
- int *segment_antidonor_knownpos = (int *) ALLOCA((querylength+1)*sizeof(int));
- int *segment_antiacceptor_knownpos = (int *) ALLOCA((querylength+1)*sizeof(int));
- int *segment_donor_knowni = (int *) ALLOCA((querylength+1)*sizeof(int));
- int *segment_acceptor_knowni = (int *) ALLOCA((querylength+1)*sizeof(int));
- int *segment_antidonor_knowni = (int *) ALLOCA((querylength+1)*sizeof(int));
- int *segment_antiacceptor_knowni = (int *) ALLOCA((querylength+1)*sizeof(int));
- int *positions_alloc = (int *) ALLOCA((querylength+1)*sizeof(int));
- int *knowni_alloc = (int *) ALLOCA((querylength+1)*sizeof(int));
-#else
- int mismatch_positions[MAX_READLENGTH+1];
- int segment_donor_knownpos[MAX_READLENGTH+1], segment_acceptor_knownpos[MAX_READLENGTH+1];
- int segment_antidonor_knownpos[MAX_READLENGTH+1], segment_antiacceptor_knownpos[MAX_READLENGTH+1];
- int segment_donor_knowni[MAX_READLENGTH+1], segment_acceptor_knowni[MAX_READLENGTH+1];
- int segment_antidonor_knowni[MAX_READLENGTH+1], segment_antiacceptor_knowni[MAX_READLENGTH+1];
- int positions_alloc[MAX_READLENGTH+1];
- int knowni_alloc[MAX_READLENGTH+1];
-#endif
-
int segment_donor_nknown, segment_acceptor_nknown, segment_antidonor_nknown, segment_antiacceptor_nknown;
int donori_nsites, acceptorj_nsites, antiacceptori_nsites, antidonorj_nsites;
int *donori_positions, *acceptorj_positions, *antiacceptori_positions, *antidonorj_positions;
int *donori_knowni, *acceptorj_knowni, *antiacceptori_knowni, *antidonorj_knowni;
+ int *mismatch_positions;
+ int *segment_donor_knownpos, *segment_acceptor_knownpos, *segment_antidonor_knownpos, *segment_antiacceptor_knownpos,
+ *segment_donor_knowni, *segment_acceptor_knowni, *segment_antidonor_knowni, *segment_antiacceptor_knowni;
+ int *positions_alloc, *knowni_alloc;
+
+#ifdef HAVE_ALLOCA
+ if (querylength <= MAX_STACK_READLENGTH) {
+ mismatch_positions = (int *) ALLOCA((querylength+1)*sizeof(int));
+ segment_donor_knownpos = (int *) ALLOCA((querylength+1)*sizeof(int));
+ segment_acceptor_knownpos = (int *) ALLOCA((querylength+1)*sizeof(int));
+ segment_antidonor_knownpos = (int *) ALLOCA((querylength+1)*sizeof(int));
+ segment_antiacceptor_knownpos = (int *) ALLOCA((querylength+1)*sizeof(int));
+ segment_donor_knowni = (int *) ALLOCA((querylength+1)*sizeof(int));
+ segment_acceptor_knowni = (int *) ALLOCA((querylength+1)*sizeof(int));
+ segment_antidonor_knowni = (int *) ALLOCA((querylength+1)*sizeof(int));
+ segment_antiacceptor_knowni = (int *) ALLOCA((querylength+1)*sizeof(int));
+ positions_alloc = (int *) ALLOCA((querylength+1)*sizeof(int));
+ knowni_alloc = (int *) ALLOCA((querylength+1)*sizeof(int));
+ } else {
+ mismatch_positions = (int *) MALLOC((querylength+1)*sizeof(int));
+ segment_donor_knownpos = (int *) MALLOC((querylength+1)*sizeof(int));
+ segment_acceptor_knownpos = (int *) MALLOC((querylength+1)*sizeof(int));
+ segment_antidonor_knownpos = (int *) MALLOC((querylength+1)*sizeof(int));
+ segment_antiacceptor_knownpos = (int *) MALLOC((querylength+1)*sizeof(int));
+ segment_donor_knowni = (int *) MALLOC((querylength+1)*sizeof(int));
+ segment_acceptor_knowni = (int *) MALLOC((querylength+1)*sizeof(int));
+ segment_antidonor_knowni = (int *) MALLOC((querylength+1)*sizeof(int));
+ segment_antiacceptor_knowni = (int *) MALLOC((querylength+1)*sizeof(int));
+ positions_alloc = (int *) MALLOC((querylength+1)*sizeof(int));
+ knowni_alloc = (int *) MALLOC((querylength+1)*sizeof(int));
+ }
+#else
+ mismatch_positions = (int *) MALLOC((querylength+1)*sizeof(int));
+ segment_donor_knownpos = (int *) MALLOC((querylength+1)*sizeof(int));
+ segment_acceptor_knownpos = (int *) MALLOC((querylength+1)*sizeof(int));
+ segment_antidonor_knownpos = (int *) MALLOC((querylength+1)*sizeof(int));
+ segment_antiacceptor_knownpos = (int *) MALLOC((querylength+1)*sizeof(int));
+ segment_donor_knowni = (int *) MALLOC((querylength+1)*sizeof(int));
+ segment_acceptor_knowni = (int *) MALLOC((querylength+1)*sizeof(int));
+ segment_antidonor_knowni = (int *) MALLOC((querylength+1)*sizeof(int));
+ segment_antiacceptor_knowni = (int *) MALLOC((querylength+1)*sizeof(int));
+ positions_alloc = (int *) MALLOC((querylength+1)*sizeof(int));
+ knowni_alloc = (int *) MALLOC((querylength+1)*sizeof(int));
+#endif
debug4e(printf("Entering find_spliceends_distant_rna with %d anchor segments\n",nanchors));
@@ -11970,8 +11089,12 @@ find_spliceends_distant_rna (List_T **distant_donors, List_T **distant_antidonor
assert(segment->diagonal != (Univcoord_T) -1);
segment_left = segment->diagonal - querylength; /* FORMULA: Corresponds to querypos 0 */
- last_querypos = segment->querypos3 + index1part;
- assert(last_querypos <= querylength);
+ if ((first_querypos = segment->querypos5 - (index1interval - 1)) < 0) {
+ first_querypos = 0;
+ }
+ if ((last_querypos = segment->querypos3 + index1part + (index1interval - 1)) > querylength) {
+ last_querypos = querylength;
+ }
debug4e(printf("find_spliceends_distant_rna: Checking up to %d mismatches at diagonal %llu (querypos %d..%d) - querylength %d = %llu, floors %d and %d, plusp %d\n",
max_mismatches_allowed,(unsigned long long) segment->diagonal,
@@ -11991,9 +11114,10 @@ find_spliceends_distant_rna (List_T **distant_donors, List_T **distant_antidonor
if (plusp) {
/* ? require that floors_from_neg3[segment->querypos5] <= max_mismatches_allowed */
if (last_querypos < query_lastpos &&
- (segment->querypos5 < index1part || segment->spliceable_low_p == true)) {
+ (first_querypos < index1part || segment->spliceable_low_p == true)) {
/* genomic left anchor or middle anchor */
- debug4e(printf("Searching genomic right: plus genomic left anchor or middle anchor\n"));
+ debug4e(printf("Searching genomic right: plus genomic left anchor or middle anchor: %d..%d\n",
+ segment->querypos5,querylength));
nmismatches_left = Genome_mismatches_left(mismatch_positions,max_mismatches_allowed,
query_compress,/*left*/segment_left,
/*pos5*/segment->querypos5,/*pos3*/querylength,
@@ -12007,7 +11131,7 @@ find_spliceends_distant_rna (List_T **distant_donors, List_T **distant_antidonor
printf("\n");
);
- splice_pos_start = segment->querypos5 + 1;
+ splice_pos_start = first_querypos + 1;
if (nmismatches_left <= max_mismatches_allowed) {
splice_pos_end = querylength - 1 - 1;
} else if ((splice_pos_end = mismatch_positions[nmismatches_left-1]) > querylength - 1 - 1) {
@@ -12023,13 +11147,14 @@ find_spliceends_distant_rna (List_T **distant_donors, List_T **distant_antidonor
} else {
/* ? require that floors_to_pos3[segment->querypos3] <= max_mismatches_allowed */
- if (segment->querypos5 > index1part &&
+ if (first_querypos > index1part &&
(last_querypos > query_lastpos || segment->spliceable_low_p == true)) {
/* genomic left anchor or middle anchor */
- debug4e(printf("Searching genomic right: minus genomic left anchor or middle anchor\n"));
+ debug4e(printf("Searching genomic right: minus genomic left anchor or middle anchor: %d..%d\n",
+ querylength - segment->querypos3 - index1part,querylength));
nmismatches_left = Genome_mismatches_left(mismatch_positions,max_mismatches_allowed,
query_compress,/*left*/segment_left,
- /*pos5*/querylength - last_querypos,
+ /*pos5*/querylength - segment->querypos3 - index1part,
/*pos3*/querylength,plusp,genestrand);
debug4e(
printf("%d mismatches on left (%d allowed) at:",
@@ -12120,7 +11245,7 @@ find_spliceends_distant_rna (List_T **distant_donors, List_T **distant_antidonor
(unsigned long long) segment_left,splice_pos,nmismatches,splice_pos_end));
if ((hit = Substring_new_donor(/*donor_coord*/segment_left + splice_pos,/*donor_knowni*/donori_knowni[i],
- splice_pos,/*substring_querystart*/segment->querypos5,
+ splice_pos,/*substring_querystart*/first_querypos,
/*substring_queryend*/last_querypos,
nmismatches,/*prob*/2.0,/*left*/segment_left,query_compress,
querylength,plusp,genestrand,
@@ -12128,7 +11253,7 @@ find_spliceends_distant_rna (List_T **distant_donors, List_T **distant_antidonor
segment->chrhigh,segment->chrlength)) != NULL) {
debug4e(printf("=> %s donor: %f at %d (%d mismatches) %d..%d\n",
plusp == true ? "plus" : "minus",Maxent_hr_donor_prob(segment_left + splice_pos,segment->chroffset),
- Substring_chimera_pos(hit),nmismatches,Substring_querystart(hit),Substring_queryend(hit)));
+ Substring_siteD_pos(hit),nmismatches,Substring_querystart(hit),Substring_queryend(hit)));
debug4e(printf("q: %s\ng: %s\n",queryptr,gbuffer));
(*distant_donors)[nmismatches] = List_push((*distant_donors)[nmismatches],(void *) hit);
}
@@ -12141,14 +11266,14 @@ find_spliceends_distant_rna (List_T **distant_donors, List_T **distant_antidonor
debug4e(printf("Novel donor for segment at %llu, splice_pos %d (%d mismatches), stopi = %d\n",
(unsigned long long) segment_left,splice_pos,nmismatches,splice_pos_end));
if ((hit = Substring_new_donor(/*donor_coord*/segment_left + splice_pos,/*donor_knowni*/-1,
- splice_pos,/*substring_querystart*/segment->querypos5,
- /*substring_queryend*/last_querypos,
+ splice_pos,/*substring_querystart*/first_querypos,
+ /*substring_queryend, as last_querypos*/querylength,
nmismatches,prob,/*left*/segment_left,query_compress,
querylength,plusp,genestrand,
sensedir,segment->chrnum,segment->chroffset,
segment->chrhigh,segment->chrlength)) != NULL) {
debug4e(printf("=> %s donor: %f at %d (%d mismatches) %d..%d\n",
- plusp == true ? "plus" : "minus",prob,Substring_chimera_pos(hit),nmismatches,
+ plusp == true ? "plus" : "minus",prob,Substring_siteD_pos(hit),nmismatches,
Substring_querystart(hit),Substring_queryend(hit)));
debug4e(printf("q: %s\ng: %s\n",queryptr,gbuffer));
(*distant_donors)[nmismatches] = List_push((*distant_donors)[nmismatches],(void *) hit);
@@ -12203,7 +11328,7 @@ find_spliceends_distant_rna (List_T **distant_donors, List_T **distant_antidonor
debug4e(printf("Known antiacceptor for segment at %llu, splice_pos %d (%d mismatches), stopi = %d\n",
(unsigned long long) segment_left,splice_pos,nmismatches,splice_pos_end));
if ((hit = Substring_new_acceptor(/*acceptor_coord*/segment_left + splice_pos,/*acceptor_knowni*/antiacceptori_knowni[i],
- splice_pos,/*substring_querystart*/segment->querypos5,
+ splice_pos,/*substring_querystart*/first_querypos,
/*substring_queryend*/last_querypos,
nmismatches,/*prob*/2.0,/*left*/segment_left,query_compress,
querylength,plusp,genestrand,
@@ -12211,7 +11336,7 @@ find_spliceends_distant_rna (List_T **distant_donors, List_T **distant_antidonor
segment->chrhigh,segment->chrlength)) != NULL) {
debug4e(printf("=> %s antiacceptor : %f at %d (%d mismatches) %d..%d\n",
plusp == true ? "plus" : "minus",Maxent_hr_antiacceptor_prob(segment_left + splice_pos,segment->chroffset),
- Substring_chimera_pos(hit),nmismatches,Substring_querystart(hit),Substring_queryend(hit)));
+ Substring_siteA_pos(hit),nmismatches,Substring_querystart(hit),Substring_queryend(hit)));
debug4e(printf("q: %s\ng: %s\n",queryptr,gbuffer));
(*distant_antiacceptors)[nmismatches] = List_push((*distant_antiacceptors)[nmismatches],(void *) hit);
}
@@ -12224,14 +11349,14 @@ find_spliceends_distant_rna (List_T **distant_donors, List_T **distant_antidonor
debug4e(printf("Novel antiacceptor for segment at %llu, splice_pos %d (%d mismatches), stopi = %d\n",
(unsigned long long) segment_left,splice_pos,nmismatches,splice_pos_end));
if ((hit = Substring_new_acceptor(/*acceptor_coord*/segment_left + splice_pos,/*acceptor_knowni*/-1,
- splice_pos,/*substring_querystart*/segment->querypos5,
+ splice_pos,/*substring_querystart*/first_querypos,
/*substring_queryend*/last_querypos,
nmismatches,prob,/*left*/segment_left,query_compress,
querylength,plusp,genestrand,
sensedir,segment->chrnum,segment->chroffset,
segment->chrhigh,segment->chrlength)) != NULL) {
debug4e(printf("=> %s antiacceptor : %f at %d (%d mismatches) %d..%d\n",
- plusp == true ? "plus" : "minus",prob,Substring_chimera_pos(hit),nmismatches,
+ plusp == true ? "plus" : "minus",prob,Substring_siteA_pos(hit),nmismatches,
Substring_querystart(hit),Substring_queryend(hit)));
debug4e(printf("q: %s\ng: %s\n",queryptr,gbuffer));
(*distant_antiacceptors)[nmismatches] = List_push((*distant_antiacceptors)[nmismatches],(void *) hit);
@@ -12248,13 +11373,14 @@ find_spliceends_distant_rna (List_T **distant_donors, List_T **distant_antidonor
/* Find splices on genomic left */
if (plusp) {
/* ? require that floors_to_pos3[segment->querypos3] <= max_mismatches_allowed */
- if (segment->querypos5 > index1part &&
+ if (first_querypos > index1part &&
(last_querypos > query_lastpos || segment->spliceable_high_p == true)) {
/* genomic right anchor or middle anchor */
- debug4e(printf("Searching genomic left: plus genomic right anchor or middle anchor\n"));
+ debug4e(printf("Searching genomic left: plus genomic right anchor or middle anchor: %d..%d\n",
+ 0,segment->querypos3 + index1part));
nmismatches_right = Genome_mismatches_right(mismatch_positions,max_mismatches_allowed,
query_compress,/*left*/segment_left,
- /*pos5*/0,/*pos3*/last_querypos,
+ /*pos5*/0,/*pos3*/segment->querypos3 + index1part,
plusp,genestrand);
debug4e(
printf("%d mismatches on right (%d allowed) at:",nmismatches_right,max_mismatches_allowed);
@@ -12281,9 +11407,10 @@ find_spliceends_distant_rna (List_T **distant_donors, List_T **distant_antidonor
} else {
/* ? require that floors_from_neg3[segment->querypos5] <= max_mismatches_allowed */
if (last_querypos < query_lastpos &&
- (segment->querypos5 < index1part || segment->spliceable_high_p == true)) {
+ (first_querypos < index1part || segment->spliceable_high_p == true)) {
/* genomic right anchor or middle anchor*/
- debug4e(printf("Searching genomic left: minus genomic right anchor or middle anchor\n"));
+ debug4e(printf("Searching genomic left: minus genomic right anchor or middle anchor: %d..%d\n",
+ 0,querylength - segment->querypos5));
nmismatches_right = Genome_mismatches_right(mismatch_positions,max_mismatches_allowed,
query_compress,/*left*/segment_left,
/*pos5*/0,/*pos3*/querylength - segment->querypos5,
@@ -12296,7 +11423,7 @@ find_spliceends_distant_rna (List_T **distant_donors, List_T **distant_antidonor
printf("\n");
);
- splice_pos_end = querylength - segment->querypos5 - 1 - 1;
+ splice_pos_end = querylength - first_querypos - 1 - 1;
if (nmismatches_right <= max_mismatches_allowed) {
splice_pos_start = 1;
} else if ((splice_pos_start = mismatch_positions[nmismatches_right-1]) < 1) {
@@ -12376,7 +11503,7 @@ find_spliceends_distant_rna (List_T **distant_donors, List_T **distant_antidonor
debug4e(printf("Known acceptor for segment at %llu, splice_pos %d (%d mismatches), stopi = %d\n",
(unsigned long long) segment_left,splice_pos,nmismatches,splice_pos_start));
if ((hit = Substring_new_acceptor(/*acceptor_coord*/segment_left + splice_pos,/*acceptor_knowni*/acceptorj_knowni[i],
- splice_pos,/*substring_querystart*/segment->querypos5,
+ splice_pos,/*substring_querystart*/first_querypos,
/*substring_queryend*/last_querypos,
nmismatches,/*prob*/2.0,/*left*/segment_left,query_compress,
querylength,plusp,genestrand,
@@ -12384,7 +11511,7 @@ find_spliceends_distant_rna (List_T **distant_donors, List_T **distant_antidonor
segment->chrhigh,segment->chrlength)) != NULL) {
debug4e(printf("=> %s acceptor: %f at %d (%d mismatches) %d..%d\n",
plusp == true ? "plus" : "minus",Maxent_hr_acceptor_prob(segment_left + splice_pos,segment->chroffset),
- Substring_chimera_pos(hit),nmismatches,Substring_querystart(hit),Substring_queryend(hit)));
+ Substring_siteA_pos(hit),nmismatches,Substring_querystart(hit),Substring_queryend(hit)));
debug4e(printf("q: %s\ng: %s\n",queryptr,gbuffer));
(*distant_acceptors)[nmismatches] = List_push((*distant_acceptors)[nmismatches],(void *) hit);
}
@@ -12397,14 +11524,14 @@ find_spliceends_distant_rna (List_T **distant_donors, List_T **distant_antidonor
debug4e(printf("Novel acceptor for segment at %llu, splice_pos %d (%d mismatches), stopi = %d\n",
(unsigned long long) segment_left,splice_pos,nmismatches,splice_pos_start));
if ((hit = Substring_new_acceptor(/*acceptor_coord*/segment_left + splice_pos,/*acceptor_knowni*/-1,
- splice_pos,/*substring_querystart*/segment->querypos5,
+ splice_pos,/*substring_querystart*/first_querypos,
/*substring_queryend*/last_querypos,
nmismatches,prob,/*left*/segment_left,query_compress,
querylength,plusp,genestrand,
sensedir,segment->chrnum,segment->chroffset,
segment->chrhigh,segment->chrlength)) != NULL) {
debug4e(printf("=> %s acceptor: %f at %d (%d mismatches) %d..%d\n",
- plusp == true ? "plus" : "minus",prob,Substring_chimera_pos(hit),nmismatches,
+ plusp == true ? "plus" : "minus",prob,Substring_siteA_pos(hit),nmismatches,
Substring_querystart(hit),Substring_queryend(hit)));
debug4e(printf("q: %s\ng: %s\n",queryptr,gbuffer));
(*distant_acceptors)[nmismatches] = List_push((*distant_acceptors)[nmismatches],(void *) hit);
@@ -12459,7 +11586,7 @@ find_spliceends_distant_rna (List_T **distant_donors, List_T **distant_antidonor
debug4e(printf("Known antidonor for segmenti at %llu, splice_pos %d (%d mismatches), stopi = %d\n",
(unsigned long long) segment_left,splice_pos,nmismatches,splice_pos_start));
if ((hit = Substring_new_donor(/*donor_coord*/segment_left + splice_pos,/*donor_knowni*/antidonorj_knowni[i],
- splice_pos,/*substring_querystart*/segment->querypos5,
+ splice_pos,/*substring_querystart*/first_querypos,
/*substring_queryend*/last_querypos,
nmismatches,/*prob*/2.0,/*left*/segment_left,query_compress,
querylength,plusp,genestrand,
@@ -12467,7 +11594,7 @@ find_spliceends_distant_rna (List_T **distant_donors, List_T **distant_antidonor
segment->chrhigh,segment->chrlength)) != NULL) {
debug4e(printf("=> %s antidonor: %f at %d (%d mismatches) %d..%d\n",
plusp == true ? "plus" : "minus",Maxent_hr_antidonor_prob(segment_left + splice_pos,segment->chroffset),
- Substring_chimera_pos(hit),nmismatches,Substring_querystart(hit),Substring_queryend(hit)));
+ Substring_siteD_pos(hit),nmismatches,Substring_querystart(hit),Substring_queryend(hit)));
debug4e(printf("q: %s\ng: %s\n",queryptr,gbuffer));
(*distant_antidonors)[nmismatches] = List_push((*distant_antidonors)[nmismatches],(void *) hit);
}
@@ -12480,14 +11607,14 @@ find_spliceends_distant_rna (List_T **distant_donors, List_T **distant_antidonor
debug4e(printf("Novel antidonor for segmenti at %llu, splice_pos %d (%d mismatches), stopi = %d\n",
(unsigned long long) segment_left,splice_pos,nmismatches,splice_pos_start));
if ((hit = Substring_new_donor(/*donor_coord*/segment_left + splice_pos,/*donor_knowni*/-1,
- splice_pos,/*substring_querystart*/segment->querypos5,
+ splice_pos,/*substring_querystart*/first_querypos,
/*substring_queryend*/last_querypos,
nmismatches,prob,/*left*/segment_left,query_compress,
querylength,plusp,genestrand,
sensedir,segment->chrnum,segment->chroffset,
segment->chrhigh,segment->chrlength)) != NULL) {
debug4e(printf("=> %s antidonor: %f at %d (%d mismatches) %d..%d\n",
- plusp == true ? "plus" : "minus",prob,Substring_chimera_pos(hit),nmismatches,
+ plusp == true ? "plus" : "minus",prob,Substring_siteD_pos(hit),nmismatches,
Substring_querystart(hit),Substring_queryend(hit)));
debug4e(printf("q: %s\ng: %s\n",queryptr,gbuffer));
(*distant_antidonors)[nmismatches] = List_push((*distant_antidonors)[nmismatches],(void *) hit);
@@ -12502,6 +11629,46 @@ find_spliceends_distant_rna (List_T **distant_donors, List_T **distant_antidonor
}
}
+#ifdef HAVE_ALLOCA
+ if (querylength <= MAX_STACK_READLENGTH) {
+ FREEA(mismatch_positions);
+ FREEA(segment_donor_knownpos);
+ FREEA(segment_acceptor_knownpos);
+ FREEA(segment_antidonor_knownpos);
+ FREEA(segment_antiacceptor_knownpos);
+ FREEA(segment_donor_knowni);
+ FREEA(segment_acceptor_knowni);
+ FREEA(segment_antidonor_knowni);
+ FREEA(segment_antiacceptor_knowni);
+ FREEA(positions_alloc);
+ FREEA(knowni_alloc);
+ } else {
+ FREE(mismatch_positions);
+ FREE(segment_donor_knownpos);
+ FREE(segment_acceptor_knownpos);
+ FREE(segment_antidonor_knownpos);
+ FREE(segment_antiacceptor_knownpos);
+ FREE(segment_donor_knowni);
+ FREE(segment_acceptor_knowni);
+ FREE(segment_antidonor_knowni);
+ FREE(segment_antiacceptor_knowni);
+ FREE(positions_alloc);
+ FREE(knowni_alloc);
+ }
+#else
+ FREE(mismatch_positions);
+ FREE(segment_donor_knownpos);
+ FREE(segment_acceptor_knownpos);
+ FREE(segment_antidonor_knownpos);
+ FREE(segment_antiacceptor_knownpos);
+ FREE(segment_donor_knowni);
+ FREE(segment_acceptor_knowni);
+ FREE(segment_antidonor_knowni);
+ FREE(segment_antiacceptor_knowni);
+ FREE(positions_alloc);
+ FREE(knowni_alloc);
+#endif
+
return;
}
@@ -12525,13 +11692,6 @@ find_terminals (Segment_T *plus_anchor_segments, Segment_T *minus_anchor_segment
Univcoord_T segment_left;
int nmismatches_left, nmismatches_right;
Endtype_T start_endtype, end_endtype;
-
-#ifdef HAVE_ALLOCA
- int *mismatch_positions = (int *) ALLOCA((querylength+1)*sizeof(int));
-#else
- int mismatch_positions[MAX_READLENGTH+1];
-#endif
-
/* int *floors_from_neg3, *floors_to_pos3; */
int max_terminal_length;
int nterminals_left, nterminals_right, nterminals_middle;
@@ -12540,6 +11700,19 @@ find_terminals (Segment_T *plus_anchor_segments, Segment_T *minus_anchor_segment
int i;
#endif
+ int *mismatch_positions;
+
+#ifdef HAVE_ALLOCA
+ if (querylength <= MAX_STACK_READLENGTH) {
+ mismatch_positions = (int *) ALLOCA((querylength+1)*sizeof(int));
+ } else {
+ mismatch_positions = (int *) MALLOC((querylength+1)*sizeof(int));
+ }
+#else
+ mismatch_positions = (int *) MALLOC((querylength+1)*sizeof(int));
+#endif
+
+
debug(printf("identify_terminals: Checking up to %d mismatches\n",max_mismatches_allowed));
/* floors_from_neg3 = floors->scorefrom[-index1interval]; */
@@ -12981,6 +12154,16 @@ find_terminals (Segment_T *plus_anchor_segments, Segment_T *minus_anchor_segment
minus_terminals_right = (List_T) NULL;
}
+#ifdef HAVE_ALLOCA
+ if (querylength <= MAX_STACK_READLENGTH) {
+ FREEA(mismatch_positions);
+ } else {
+ FREE(mismatch_positions);
+ }
+#else
+ FREE(mismatch_positions);
+#endif
+
return List_append(plus_terminals_middle,
List_append(plus_terminals_left,
List_append(plus_terminals_right,
@@ -13162,13 +12345,13 @@ static bool
intragenic_splice_p (Chrpos_T splicedistance, Substring_T donor, Substring_T acceptor) {
int knowni;
- if ((knowni = Substring_splicesites_knowni(donor)) >= 0) {
+ if ((knowni = Substring_splicesitesD_knowni(donor)) >= 0) {
if (splicedists[knowni] >= splicedistance) {
return true;
}
}
- if ((knowni = Substring_splicesites_knowni(acceptor)) >= 0) {
+ if ((knowni = Substring_splicesitesA_knowni(acceptor)) >= 0) {
if (splicedists[knowni] >= splicedistance) {
return true;
}
@@ -13239,26 +12422,26 @@ find_splicepairs_distant_dna (int *found_score, int *ndistantsplicepairs,
(unsigned long long) Substring_genomicstart(endfrag),
Substring_querystart(endfrag),Substring_queryend(endfrag)));
- if ((pos = Substring_chimera_pos(startfrag)) < min_endlength_1) {
+ if ((pos = Substring_siteN_pos(startfrag)) < min_endlength_1) {
debug4ld(printf("chimera_pos of startfrag < min_endlength_1\n"));
p = p->rest;
} else if (pos > querylength - min_endlength_2) {
debug4ld(printf("chimera_pos of startfrag > querylength - min_endlength_2\n"));
p = p->rest;
- } else if (pos < Substring_chimera_pos(endfrag)) {
- debug4ld(printf("chimera_pos of startfrag %d < chimera_pos of endfrag %d\n",pos,Substring_chimera_pos(endfrag)));
+ } else if (pos < Substring_siteN_pos(endfrag)) {
+ debug4ld(printf("chimera_pos of startfrag %d < chimera_pos of endfrag %d\n",pos,Substring_siteN_pos(endfrag)));
p = p->rest;
- } else if (pos > Substring_chimera_pos(endfrag)) {
- debug4ld(printf("chimera_pos of startfrag %d > chimera_pos of endfrag %d\n",pos,Substring_chimera_pos(endfrag)));
+ } else if (pos > Substring_siteN_pos(endfrag)) {
+ debug4ld(printf("chimera_pos of startfrag %d > chimera_pos of endfrag %d\n",pos,Substring_siteN_pos(endfrag)));
q = q->rest;
} else {
/* Generate all pairs at this splice_pos */
qsave = q;
- while (p != NULL /* && *nsplicepairs <= MAXCHIMERAPATHS */ && Substring_chimera_pos(((Substring_T) p->first)) == pos) {
+ while (p != NULL /* && *nsplicepairs <= MAXCHIMERAPATHS */ && Substring_siteN_pos(((Substring_T) p->first)) == pos) {
startfrag = (Substring_T) p->first;
debug4ld(printf("startfrag at %llu, pos %d\n",(unsigned long long) Substring_genomicstart(startfrag),pos));
q = qsave;
- while (q != NULL /* && *nsplicepairs <= MAXCHIMERAPATHS */ && Substring_chimera_pos(((Substring_T) q->first)) == pos) {
+ while (q != NULL /* && *nsplicepairs <= MAXCHIMERAPATHS */ && Substring_siteN_pos(((Substring_T) q->first)) == pos) {
endfrag = (Substring_T) q->first;
debug4ld(printf("endfrag at %llu, pos %d\n",(unsigned long long) Substring_genomicstart(endfrag),pos));
if (Substring_genomicstart(endfrag) == Substring_genomicstart(startfrag)) {
@@ -13334,25 +12517,25 @@ find_splicepairs_distant_dna (int *found_score, int *ndistantsplicepairs,
(unsigned long long) Substring_genomicstart(endfrag),
Substring_querystart(endfrag),Substring_queryend(endfrag)));
- if ((pos = Substring_chimera_pos(startfrag)) < min_endlength_1) {
+ if ((pos = Substring_siteN_pos(startfrag)) < min_endlength_1) {
debug4ld(printf("chimera_pos of startfrag < min_endlength_1\n"));
p = p->rest;
} else if (pos > querylength - min_endlength_2) {
debug4ld(printf("chimera_pos of startfrag > querylength - min_endlength_2\n"));
p = p->rest;
- } else if (pos < Substring_chimera_pos(endfrag)) {
- debug4ld(printf("chimera_pos of startfrag %d < chimera_pos of endfrag %d\n",pos,Substring_chimera_pos(endfrag)));
+ } else if (pos < Substring_siteN_pos(endfrag)) {
+ debug4ld(printf("chimera_pos of startfrag %d < chimera_pos of endfrag %d\n",pos,Substring_siteN_pos(endfrag)));
p = p->rest;
- } else if (pos > Substring_chimera_pos(endfrag)) {
- debug4ld(printf("chimera_pos of startfrag %d > chimera_pos of endfrag %d\n",pos,Substring_chimera_pos(endfrag)));
+ } else if (pos > Substring_siteN_pos(endfrag)) {
+ debug4ld(printf("chimera_pos of startfrag %d > chimera_pos of endfrag %d\n",pos,Substring_siteN_pos(endfrag)));
q = q->rest;
} else {
qsave = q;
- while (p != NULL /* && *nsplicepairs <= MAXCHIMERAPATHS */ && Substring_chimera_pos(((Substring_T) p->first)) == pos) {
+ while (p != NULL /* && *nsplicepairs <= MAXCHIMERAPATHS */ && Substring_siteN_pos(((Substring_T) p->first)) == pos) {
startfrag = (Substring_T) p->first;
debug4ld(printf("startfrag at %llu, pos %d\n",(unsigned long long) Substring_genomicstart(startfrag),pos));
q = qsave;
- while (q != NULL /* && *nsplicepairs <= MAXCHIMERAPATHS */ && Substring_chimera_pos(((Substring_T) q->first)) == pos) {
+ while (q != NULL /* && *nsplicepairs <= MAXCHIMERAPATHS */ && Substring_siteN_pos(((Substring_T) q->first)) == pos) {
endfrag = (Substring_T) q->first;
debug4ld(printf("endfrag at %llu, pos %d\n",(unsigned long long) Substring_genomicstart(endfrag),pos));
if (Substring_genomicstart(endfrag) == Substring_genomicstart(startfrag)) {
@@ -13434,25 +12617,25 @@ find_splicepairs_distant_dna (int *found_score, int *ndistantsplicepairs,
(unsigned long long) Substring_genomicstart(endfrag),
Substring_querystart(endfrag),Substring_queryend(endfrag)));
- if ((pos = Substring_chimera_pos(startfrag)) < min_endlength_1) {
+ if ((pos = Substring_siteN_pos(startfrag)) < min_endlength_1) {
debug4ld(printf("chimera_pos of startfrag < min_endlength_1\n"));
p = p->rest;
} else if (pos > querylength - min_endlength_2) {
debug4ld(printf("chimera_pos of startfrag > querylength - min_endlength_2\n"));
p = p->rest;
- } else if (pos < Substring_chimera_pos(endfrag)) {
- debug4ld(printf("chimera_pos of startfrag %d < chimera_pos of endfrag %d\n",pos,Substring_chimera_pos(endfrag)));
+ } else if (pos < Substring_siteN_pos(endfrag)) {
+ debug4ld(printf("chimera_pos of startfrag %d < chimera_pos of endfrag %d\n",pos,Substring_siteN_pos(endfrag)));
p = p->rest;
- } else if (pos > Substring_chimera_pos(endfrag)) {
- debug4ld(printf("chimera_pos of startfrag %d > chimera_pos of endfrag %d\n",pos,Substring_chimera_pos(endfrag)));
+ } else if (pos > Substring_siteN_pos(endfrag)) {
+ debug4ld(printf("chimera_pos of startfrag %d > chimera_pos of endfrag %d\n",pos,Substring_siteN_pos(endfrag)));
q = q->rest;
} else {
qsave = q;
- while (p != NULL && *ndistantsplicepairs <= MAXCHIMERAPATHS && Substring_chimera_pos(((Substring_T) p->first)) == pos) {
+ while (p != NULL && *ndistantsplicepairs <= MAXCHIMERAPATHS && Substring_siteN_pos(((Substring_T) p->first)) == pos) {
startfrag = (Substring_T) p->first;
debug4ld(printf("startfrag at %llu, pos %d\n",(unsigned long long) Substring_genomicstart(startfrag),pos));
q = qsave;
- while (q != NULL && *ndistantsplicepairs <= MAXCHIMERAPATHS && Substring_chimera_pos(((Substring_T) q->first)) == pos) {
+ while (q != NULL && *ndistantsplicepairs <= MAXCHIMERAPATHS && Substring_siteN_pos(((Substring_T) q->first)) == pos) {
endfrag = (Substring_T) q->first;
debug4ld(printf("endfrag at %llu, pos %d\n",(unsigned long long) Substring_genomicstart(endfrag),pos));
if (Substring_chrnum(startfrag) != Substring_chrnum(endfrag)) {
@@ -13498,25 +12681,25 @@ find_splicepairs_distant_dna (int *found_score, int *ndistantsplicepairs,
(unsigned long long) Substring_genomicstart(endfrag),
Substring_querystart(endfrag),Substring_queryend(endfrag)));
- if ((pos = Substring_chimera_pos(startfrag)) < min_endlength_1) {
+ if ((pos = Substring_siteN_pos(startfrag)) < min_endlength_1) {
debug4ld(printf("chimera_pos of startfrag < min_endlength_1\n"));
p = p->rest;
} else if (pos > querylength - min_endlength_2) {
debug4ld(printf("chimera_pos of startfrag > querylength - min_endlength_2\n"));
p = p->rest;
- } else if (pos < Substring_chimera_pos(endfrag)) {
- debug4ld(printf("chimera_pos of startfrag %d < chimera_pos of endfrag %d\n",pos,Substring_chimera_pos(endfrag)));
+ } else if (pos < Substring_siteN_pos(endfrag)) {
+ debug4ld(printf("chimera_pos of startfrag %d < chimera_pos of endfrag %d\n",pos,Substring_siteN_pos(endfrag)));
p = p->rest;
- } else if (pos > Substring_chimera_pos(endfrag)) {
- debug4ld(printf("chimera_pos of startfrag %d > chimera_pos of endfrag %d\n",pos,Substring_chimera_pos(endfrag)));
+ } else if (pos > Substring_siteN_pos(endfrag)) {
+ debug4ld(printf("chimera_pos of startfrag %d > chimera_pos of endfrag %d\n",pos,Substring_siteN_pos(endfrag)));
q = q->rest;
} else {
qsave = q;
- while (p != NULL && *ndistantsplicepairs <= MAXCHIMERAPATHS && Substring_chimera_pos(((Substring_T) p->first)) == pos) {
+ while (p != NULL && *ndistantsplicepairs <= MAXCHIMERAPATHS && Substring_siteN_pos(((Substring_T) p->first)) == pos) {
startfrag = (Substring_T) p->first;
debug4ld(printf("startfrag at %llu, pos %d\n",(unsigned long long) Substring_genomicstart(startfrag),pos));
q = qsave;
- while (q != NULL && *ndistantsplicepairs <= MAXCHIMERAPATHS && Substring_chimera_pos(((Substring_T) q->first)) == pos) {
+ while (q != NULL && *ndistantsplicepairs <= MAXCHIMERAPATHS && Substring_siteN_pos(((Substring_T) q->first)) == pos) {
endfrag = (Substring_T) q->first;
debug4ld(printf("endfrag at %llu, pos %d\n",(unsigned long long) Substring_genomicstart(endfrag),pos));
if (Substring_chrnum(startfrag) != Substring_chrnum(endfrag)) {
@@ -13626,26 +12809,26 @@ find_splicepairs_distant_rna (int *found_score, int *ndistantsplicepairs,
(unsigned long long) Substring_genomicstart(acceptor),
Substring_querystart(acceptor),Substring_queryend(acceptor)));
- if ((pos = Substring_chimera_pos(donor)) < min_endlength_1) {
+ if ((pos = Substring_siteD_pos(donor)) < min_endlength_1) {
debug4ld(printf("chimera_pos of donor < min_endlength_1\n"));
p = p->rest;
} else if (pos > querylength - min_endlength_2) {
debug4ld(printf("chimera_pos of donor > querylength - min_endlength_2\n"));
p = p->rest;
- } else if (pos < Substring_chimera_pos(acceptor)) {
- debug4ld(printf("chimera_pos of donor %d < chimera_pos of acceptor %d\n",pos,Substring_chimera_pos(acceptor)));
+ } else if (pos < Substring_siteA_pos(acceptor)) {
+ debug4ld(printf("chimera_pos of donor %d < chimera_pos of acceptor %d\n",pos,Substring_siteA_pos(acceptor)));
p = p->rest;
- } else if (pos > Substring_chimera_pos(acceptor)) {
- debug4ld(printf("chimera_pos of donor %d > chimera_pos of acceptor %d\n",pos,Substring_chimera_pos(acceptor)));
+ } else if (pos > Substring_siteA_pos(acceptor)) {
+ debug4ld(printf("chimera_pos of donor %d > chimera_pos of acceptor %d\n",pos,Substring_siteA_pos(acceptor)));
q = q->rest;
} else {
/* Generate all pairs at this splice_pos */
qsave = q;
- while (p != NULL /* && *nsplicepairs <= MAXCHIMERAPATHS */ && Substring_chimera_pos(((Substring_T) p->first)) == pos) {
+ while (p != NULL /* && *nsplicepairs <= MAXCHIMERAPATHS */ && Substring_siteD_pos(((Substring_T) p->first)) == pos) {
donor = (Substring_T) p->first;
debug4ld(printf("donor at %llu, pos %d\n",(unsigned long long) Substring_genomicstart(donor),pos));
q = qsave;
- while (q != NULL /* && *nsplicepairs <= MAXCHIMERAPATHS */ && Substring_chimera_pos(((Substring_T) q->first)) == pos) {
+ while (q != NULL /* && *nsplicepairs <= MAXCHIMERAPATHS */ && Substring_siteA_pos(((Substring_T) q->first)) == pos) {
acceptor = (Substring_T) q->first;
debug4ld(printf("acceptor at %llu, pos %d\n",(unsigned long long) Substring_genomicstart(acceptor),pos));
if (Substring_genomicstart(acceptor) == Substring_genomicstart(donor)) {
@@ -13676,7 +12859,7 @@ find_splicepairs_distant_rna (int *found_score, int *ndistantsplicepairs,
if (shortdistancep) {
*localsplicing = List_push(*localsplicing,
(void *) Stage3end_new_splice(&(*found_score),nmismatches1,nmismatches2,
- donor,acceptor,Substring_chimera_prob(donor),Substring_chimera_prob(acceptor),distance,
+ donor,acceptor,Substring_siteD_prob(donor),Substring_siteA_prob(acceptor),distance,
/*shortdistancep*/true,localsplicing_penalty,querylength,
/*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
/*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
@@ -13687,7 +12870,7 @@ find_splicepairs_distant_rna (int *found_score, int *ndistantsplicepairs,
} else if (*ndistantsplicepairs <= MAXCHIMERAPATHS) {
distantsplicing = List_push(distantsplicing,
(void *) Stage3end_new_splice(&(*found_score),nmismatches1,nmismatches2,
- donor,acceptor,Substring_chimera_prob(donor),Substring_chimera_prob(acceptor),distance,
+ donor,acceptor,Substring_siteD_prob(donor),Substring_siteA_prob(acceptor),distance,
/*shortdistancep*/false,distantsplicing_penalty,querylength,
/*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
/*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
@@ -13721,25 +12904,25 @@ find_splicepairs_distant_rna (int *found_score, int *ndistantsplicepairs,
(unsigned long long) Substring_genomicstart(acceptor),
Substring_querystart(acceptor),Substring_queryend(acceptor)));
- if ((pos = Substring_chimera_pos(donor)) < min_endlength_1) {
+ if ((pos = Substring_siteD_pos(donor)) < min_endlength_1) {
debug4ld(printf("chimera_pos of donor < min_endlength_1\n"));
p = p->rest;
} else if (pos > querylength - min_endlength_2) {
debug4ld(printf("chimera_pos of donor > querylength - min_endlength_2\n"));
p = p->rest;
- } else if (pos < Substring_chimera_pos(acceptor)) {
- debug4ld(printf("chimera_pos of donor %d < chimera_pos of acceptor %d\n",pos,Substring_chimera_pos(acceptor)));
+ } else if (pos < Substring_siteA_pos(acceptor)) {
+ debug4ld(printf("chimera_pos of donor %d < chimera_pos of acceptor %d\n",pos,Substring_siteA_pos(acceptor)));
p = p->rest;
- } else if (pos > Substring_chimera_pos(acceptor)) {
- debug4ld(printf("chimera_pos of donor %d > chimera_pos of acceptor %d\n",pos,Substring_chimera_pos(acceptor)));
+ } else if (pos > Substring_siteA_pos(acceptor)) {
+ debug4ld(printf("chimera_pos of donor %d > chimera_pos of acceptor %d\n",pos,Substring_siteA_pos(acceptor)));
q = q->rest;
} else {
qsave = q;
- while (p != NULL /* && *nsplicepairs <= MAXCHIMERAPATHS */ && Substring_chimera_pos(((Substring_T) p->first)) == pos) {
+ while (p != NULL /* && *nsplicepairs <= MAXCHIMERAPATHS */ && Substring_siteD_pos(((Substring_T) p->first)) == pos) {
donor = (Substring_T) p->first;
debug4ld(printf("donor at %llu, pos %d\n",(unsigned long long) Substring_genomicstart(donor),pos));
q = qsave;
- while (q != NULL /* && *nsplicepairs <= MAXCHIMERAPATHS */ && Substring_chimera_pos(((Substring_T) q->first)) == pos) {
+ while (q != NULL /* && *nsplicepairs <= MAXCHIMERAPATHS */ && Substring_siteA_pos(((Substring_T) q->first)) == pos) {
acceptor = (Substring_T) q->first;
debug4ld(printf("acceptor at %llu, pos %d\n",(unsigned long long) Substring_genomicstart(acceptor),pos));
if (Substring_genomicstart(acceptor) == Substring_genomicstart(donor)) {
@@ -13769,7 +12952,7 @@ find_splicepairs_distant_rna (int *found_score, int *ndistantsplicepairs,
if (shortdistancep) {
*localsplicing = List_push(*localsplicing,
(void *) Stage3end_new_splice(&(*found_score),nmismatches1,nmismatches2,
- donor,acceptor,Substring_chimera_prob(donor),Substring_chimera_prob(acceptor),distance,
+ donor,acceptor,Substring_siteD_prob(donor),Substring_siteA_prob(acceptor),distance,
/*shortdistancep*/true,localsplicing_penalty,querylength,
/*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
/*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
@@ -13780,7 +12963,7 @@ find_splicepairs_distant_rna (int *found_score, int *ndistantsplicepairs,
} else if (*ndistantsplicepairs <= MAXCHIMERAPATHS) {
distantsplicing = List_push(distantsplicing,
(void *) Stage3end_new_splice(&(*found_score),nmismatches1,nmismatches2,
- donor,acceptor,Substring_chimera_prob(donor),Substring_chimera_prob(acceptor),distance,
+ donor,acceptor,Substring_siteD_prob(donor),Substring_siteA_prob(acceptor),distance,
/*shortdistancep*/false,distantsplicing_penalty,querylength,
/*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
/*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
@@ -13813,25 +12996,25 @@ find_splicepairs_distant_rna (int *found_score, int *ndistantsplicepairs,
(unsigned long long) Substring_genomicstart(acceptor),
Substring_querystart(acceptor),Substring_queryend(acceptor)));
- if ((pos = Substring_chimera_pos(donor)) < min_endlength_2) {
+ if ((pos = Substring_siteD_pos(donor)) < min_endlength_2) {
debug4ld(printf("chimera_pos of donor < min_endlength_2\n"));
p = p->rest;
} else if (pos > querylength - min_endlength_1) {
debug4ld(printf("chimera_pos of donor > querylength - min_endlength_1\n"));
p = p->rest;
- } else if (pos < Substring_chimera_pos(acceptor)) {
- debug4ld(printf("chimera_pos of donor %d < chimera_pos of acceptor %d\n",pos,Substring_chimera_pos(acceptor)));
+ } else if (pos < Substring_siteA_pos(acceptor)) {
+ debug4ld(printf("chimera_pos of donor %d < chimera_pos of acceptor %d\n",pos,Substring_siteA_pos(acceptor)));
p = p->rest;
- } else if (pos > Substring_chimera_pos(acceptor)) {
- debug4ld(printf("chimera_pos of donor %d > chimera_pos of acceptor %d\n",pos,Substring_chimera_pos(acceptor)));
+ } else if (pos > Substring_siteA_pos(acceptor)) {
+ debug4ld(printf("chimera_pos of donor %d > chimera_pos of acceptor %d\n",pos,Substring_siteA_pos(acceptor)));
q = q->rest;
} else {
qsave = q;
- while (p != NULL /* && *nsplicepairs <= MAXCHIMERAPATHS */ && Substring_chimera_pos(((Substring_T) p->first)) == pos) {
+ while (p != NULL /* && *nsplicepairs <= MAXCHIMERAPATHS */ && Substring_siteD_pos(((Substring_T) p->first)) == pos) {
donor = (Substring_T) p->first;
debug4ld(printf("donor at %llu, pos %d\n",(unsigned long long) Substring_genomicstart(donor),pos));
q = qsave;
- while (q != NULL /* && *nsplicepairs <= MAXCHIMERAPATHS */ && Substring_chimera_pos(((Substring_T) q->first)) == pos) {
+ while (q != NULL /* && *nsplicepairs <= MAXCHIMERAPATHS */ && Substring_siteA_pos(((Substring_T) q->first)) == pos) {
acceptor = (Substring_T) q->first;
debug4ld(printf("acceptor at %llu, pos %d\n",(unsigned long long) Substring_genomicstart(acceptor),pos));
if (Substring_genomicstart(acceptor) == Substring_genomicstart(donor)) {
@@ -13862,7 +13045,7 @@ find_splicepairs_distant_rna (int *found_score, int *ndistantsplicepairs,
if (shortdistancep) {
*localsplicing = List_push(*localsplicing,
(void *) Stage3end_new_splice(&(*found_score),nmismatches1,nmismatches2,
- donor,acceptor,Substring_chimera_prob(donor),Substring_chimera_prob(acceptor),distance,
+ donor,acceptor,Substring_siteD_prob(donor),Substring_siteA_prob(acceptor),distance,
/*shortdistancep*/true,localsplicing_penalty,querylength,
/*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
/*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
@@ -13873,7 +13056,7 @@ find_splicepairs_distant_rna (int *found_score, int *ndistantsplicepairs,
} else if (*ndistantsplicepairs <= MAXCHIMERAPATHS) {
distantsplicing = List_push(distantsplicing,
(void *) Stage3end_new_splice(&(*found_score),nmismatches1,nmismatches2,
- donor,acceptor,Substring_chimera_prob(donor),Substring_chimera_prob(acceptor),distance,
+ donor,acceptor,Substring_siteD_prob(donor),Substring_siteA_prob(acceptor),distance,
/*shortdistancep*/false,distantsplicing_penalty,querylength,
/*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
/*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
@@ -13906,26 +13089,26 @@ find_splicepairs_distant_rna (int *found_score, int *ndistantsplicepairs,
(unsigned long long) Substring_genomicstart(acceptor),
Substring_querystart(acceptor),Substring_queryend(acceptor)));
- if ((pos = Substring_chimera_pos(donor)) < min_endlength_2) {
+ if ((pos = Substring_siteD_pos(donor)) < min_endlength_2) {
debug4ld(printf("chimera_pos of donor < min_endlength_2\n"));
p = p->rest;
} else if (pos > querylength - min_endlength_1) {
debug4ld(printf("chimera_pos of donor > querylength - min_endlength_1\n"));
p = p->rest;
- } else if (pos < Substring_chimera_pos(acceptor)) {
- debug4ld(printf("chimera_pos of donor %d < chimera_pos of acceptor %d\n",pos,Substring_chimera_pos(acceptor)));
+ } else if (pos < Substring_siteA_pos(acceptor)) {
+ debug4ld(printf("chimera_pos of donor %d < chimera_pos of acceptor %d\n",pos,Substring_siteA_pos(acceptor)));
p = p->rest;
- } else if (pos > Substring_chimera_pos(acceptor)) {
- debug4ld(printf("chimera_pos of donor %d > chimera_pos of acceptor %d\n",pos,Substring_chimera_pos(acceptor)));
+ } else if (pos > Substring_siteA_pos(acceptor)) {
+ debug4ld(printf("chimera_pos of donor %d > chimera_pos of acceptor %d\n",pos,Substring_siteA_pos(acceptor)));
q = q->rest;
} else {
qsave = q;
- while (p != NULL /* && *nsplicepairs <= MAXCHIMERAPATHS */ && Substring_chimera_pos(((Substring_T) p->first)) == pos) {
+ while (p != NULL /* && *nsplicepairs <= MAXCHIMERAPATHS */ && Substring_siteD_pos(((Substring_T) p->first)) == pos) {
donor = (Substring_T) p->first;
debug4ld(printf("donor at %llu, pos %d\n",(unsigned long long) Substring_genomicstart(donor),pos));
q = qsave;
- while (q != NULL /* && *nsplicepairs <= MAXCHIMERAPATHS */ && Substring_chimera_pos(((Substring_T) q->first)) == pos) {
+ while (q != NULL /* && *nsplicepairs <= MAXCHIMERAPATHS */ && Substring_siteA_pos(((Substring_T) q->first)) == pos) {
acceptor = (Substring_T) q->first;
debug4ld(printf("acceptor at %llu, pos %d\n",(unsigned long long) Substring_genomicstart(acceptor),pos));
if (Substring_genomicstart(acceptor) == Substring_genomicstart(donor)) {
@@ -13955,7 +13138,7 @@ find_splicepairs_distant_rna (int *found_score, int *ndistantsplicepairs,
if (shortdistancep) {
*localsplicing = List_push(*localsplicing,
(void *) Stage3end_new_splice(&(*found_score),nmismatches1,nmismatches2,
- donor,acceptor,Substring_chimera_prob(donor),Substring_chimera_prob(acceptor),distance,
+ donor,acceptor,Substring_siteD_prob(donor),Substring_siteA_prob(acceptor),distance,
/*shortdistancep*/true,localsplicing_penalty,querylength,
/*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
/*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
@@ -13966,7 +13149,7 @@ find_splicepairs_distant_rna (int *found_score, int *ndistantsplicepairs,
} else if (*ndistantsplicepairs <= MAXCHIMERAPATHS) {
distantsplicing = List_push(distantsplicing,
(void *) Stage3end_new_splice(&(*found_score),nmismatches1,nmismatches2,
- donor,acceptor,Substring_chimera_prob(donor),Substring_chimera_prob(acceptor),distance,
+ donor,acceptor,Substring_siteD_prob(donor),Substring_siteA_prob(acceptor),distance,
/*shortdistancep*/false,distantsplicing_penalty,querylength,
/*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
/*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
@@ -14004,25 +13187,25 @@ find_splicepairs_distant_rna (int *found_score, int *ndistantsplicepairs,
(unsigned long long) Substring_genomicstart(acceptor),
Substring_querystart(acceptor),Substring_queryend(acceptor)));
- if ((pos = Substring_chimera_pos(donor)) < min_endlength_1) {
+ if ((pos = Substring_siteD_pos(donor)) < min_endlength_1) {
debug4ld(printf("chimera_pos of donor < min_endlength_1\n"));
p = p->rest;
} else if (pos > querylength - min_endlength_2) {
debug4ld(printf("chimera_pos of donor > querylength - min_endlength_2\n"));
p = p->rest;
- } else if (pos < Substring_chimera_pos(acceptor)) {
- debug4ld(printf("chimera_pos of donor %d < chimera_pos of acceptor %d\n",pos,Substring_chimera_pos(acceptor)));
+ } else if (pos < Substring_siteA_pos(acceptor)) {
+ debug4ld(printf("chimera_pos of donor %d < chimera_pos of acceptor %d\n",pos,Substring_siteA_pos(acceptor)));
p = p->rest;
- } else if (pos > Substring_chimera_pos(acceptor)) {
- debug4ld(printf("chimera_pos of donor %d > chimera_pos of acceptor %d\n",pos,Substring_chimera_pos(acceptor)));
+ } else if (pos > Substring_siteA_pos(acceptor)) {
+ debug4ld(printf("chimera_pos of donor %d > chimera_pos of acceptor %d\n",pos,Substring_siteA_pos(acceptor)));
q = q->rest;
} else {
qsave = q;
- while (p != NULL && *ndistantsplicepairs <= MAXCHIMERAPATHS && Substring_chimera_pos(((Substring_T) p->first)) == pos) {
+ while (p != NULL && *ndistantsplicepairs <= MAXCHIMERAPATHS && Substring_siteD_pos(((Substring_T) p->first)) == pos) {
donor = (Substring_T) p->first;
debug4ld(printf("donor at %llu, pos %d\n",(unsigned long long) Substring_genomicstart(donor),pos));
q = qsave;
- while (q != NULL && *ndistantsplicepairs <= MAXCHIMERAPATHS && Substring_chimera_pos(((Substring_T) q->first)) == pos) {
+ while (q != NULL && *ndistantsplicepairs <= MAXCHIMERAPATHS && Substring_siteA_pos(((Substring_T) q->first)) == pos) {
acceptor = (Substring_T) q->first;
debug4ld(printf("acceptor at %llu, pos %d\n",(unsigned long long) Substring_genomicstart(acceptor),pos));
if (Substring_chrnum(donor) != Substring_chrnum(acceptor)) {
@@ -14038,7 +13221,7 @@ find_splicepairs_distant_rna (int *found_score, int *ndistantsplicepairs,
(unsigned long long) Substring_genomicstart(acceptor)));
distantsplicing = List_push(distantsplicing,
(void *) Stage3end_new_splice(&(*found_score),nmismatches1,nmismatches2,
- donor,acceptor,Substring_chimera_prob(donor),Substring_chimera_prob(acceptor),distance,
+ donor,acceptor,Substring_siteD_prob(donor),Substring_siteA_prob(acceptor),distance,
/*shortdistancep*/false,distantsplicing_penalty,querylength,
/*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
/*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
@@ -14068,25 +13251,25 @@ find_splicepairs_distant_rna (int *found_score, int *ndistantsplicepairs,
(unsigned long long) Substring_genomicstart(acceptor),
Substring_querystart(acceptor),Substring_queryend(acceptor)));
- if ((pos = Substring_chimera_pos(donor)) < min_endlength_1) {
+ if ((pos = Substring_siteD_pos(donor)) < min_endlength_1) {
debug4ld(printf("chimera_pos of donor < min_endlength_1\n"));
p = p->rest;
} else if (pos > querylength - min_endlength_2) {
debug4ld(printf("chimera_pos of donor > querylength - min_endlength_2\n"));
p = p->rest;
- } else if (pos < Substring_chimera_pos(acceptor)) {
- debug4ld(printf("chimera_pos of donor %d < chimera_pos of acceptor %d\n",pos,Substring_chimera_pos(acceptor)));
+ } else if (pos < Substring_siteA_pos(acceptor)) {
+ debug4ld(printf("chimera_pos of donor %d < chimera_pos of acceptor %d\n",pos,Substring_siteA_pos(acceptor)));
p = p->rest;
- } else if (pos > Substring_chimera_pos(acceptor)) {
- debug4ld(printf("chimera_pos of donor %d > chimera_pos of acceptor %d\n",pos,Substring_chimera_pos(acceptor)));
+ } else if (pos > Substring_siteA_pos(acceptor)) {
+ debug4ld(printf("chimera_pos of donor %d > chimera_pos of acceptor %d\n",pos,Substring_siteA_pos(acceptor)));
q = q->rest;
} else {
qsave = q;
- while (p != NULL && *ndistantsplicepairs <= MAXCHIMERAPATHS && Substring_chimera_pos(((Substring_T) p->first)) == pos) {
+ while (p != NULL && *ndistantsplicepairs <= MAXCHIMERAPATHS && Substring_siteD_pos(((Substring_T) p->first)) == pos) {
donor = (Substring_T) p->first;
debug4ld(printf("donor at %llu, pos %d\n",(unsigned long long) Substring_genomicstart(donor),pos));
q = qsave;
- while (q != NULL && *ndistantsplicepairs <= MAXCHIMERAPATHS && Substring_chimera_pos(((Substring_T) q->first)) == pos) {
+ while (q != NULL && *ndistantsplicepairs <= MAXCHIMERAPATHS && Substring_siteA_pos(((Substring_T) q->first)) == pos) {
acceptor = (Substring_T) q->first;
debug4ld(printf("acceptor at %llu, pos %d\n",(unsigned long long) Substring_genomicstart(acceptor),pos));
if (Substring_chrnum(donor) != Substring_chrnum(acceptor)) {
@@ -14102,7 +13285,7 @@ find_splicepairs_distant_rna (int *found_score, int *ndistantsplicepairs,
(unsigned long long) Substring_genomicstart(acceptor)));
distantsplicing = List_push(distantsplicing,
(void *) Stage3end_new_splice(&(*found_score),nmismatches1,nmismatches2,
- donor,acceptor,Substring_chimera_prob(donor),Substring_chimera_prob(acceptor),distance,
+ donor,acceptor,Substring_siteD_prob(donor),Substring_siteA_prob(acceptor),distance,
/*shortdistancep*/false,distantsplicing_penalty,querylength,
/*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
/*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
@@ -14133,25 +13316,25 @@ find_splicepairs_distant_rna (int *found_score, int *ndistantsplicepairs,
(unsigned long long) Substring_genomicstart(acceptor),
Substring_querystart(acceptor),Substring_queryend(acceptor)));
- if ((pos = Substring_chimera_pos(donor)) < min_endlength_2) {
+ if ((pos = Substring_siteD_pos(donor)) < min_endlength_2) {
debug4ld(printf("chimera_pos of donor < min_endlength_2\n"));
p = p->rest;
} else if (pos > querylength - min_endlength_1) {
debug4ld(printf("chimera_pos of donor > querylength - min_endlength_1\n"));
p = p->rest;
- } else if (pos < Substring_chimera_pos(acceptor)) {
- debug4ld(printf("chimera_pos of donor %d < chimera_pos of acceptor %d\n",pos,Substring_chimera_pos(acceptor)));
+ } else if (pos < Substring_siteA_pos(acceptor)) {
+ debug4ld(printf("chimera_pos of donor %d < chimera_pos of acceptor %d\n",pos,Substring_siteA_pos(acceptor)));
p = p->rest;
- } else if (pos > Substring_chimera_pos(acceptor)) {
- debug4ld(printf("chimera_pos of donor %d > chimera_pos of acceptor %d\n",pos,Substring_chimera_pos(acceptor)));
+ } else if (pos > Substring_siteA_pos(acceptor)) {
+ debug4ld(printf("chimera_pos of donor %d > chimera_pos of acceptor %d\n",pos,Substring_siteA_pos(acceptor)));
q = q->rest;
} else {
qsave = q;
- while (p != NULL && *ndistantsplicepairs <= MAXCHIMERAPATHS && Substring_chimera_pos(((Substring_T) p->first)) == pos) {
+ while (p != NULL && *ndistantsplicepairs <= MAXCHIMERAPATHS && Substring_siteD_pos(((Substring_T) p->first)) == pos) {
donor = (Substring_T) p->first;
debug4ld(printf("donor at %llu, pos %d\n",(unsigned long long) Substring_genomicstart(donor),pos));
q = qsave;
- while (q != NULL && *ndistantsplicepairs <= MAXCHIMERAPATHS && Substring_chimera_pos(((Substring_T) q->first)) == pos) {
+ while (q != NULL && *ndistantsplicepairs <= MAXCHIMERAPATHS && Substring_siteA_pos(((Substring_T) q->first)) == pos) {
acceptor = (Substring_T) q->first;
debug4ld(printf("acceptor at %llu, pos %d\n",(unsigned long long) Substring_genomicstart(acceptor),pos));
if (Substring_chrnum(donor) != Substring_chrnum(acceptor)) {
@@ -14167,7 +13350,7 @@ find_splicepairs_distant_rna (int *found_score, int *ndistantsplicepairs,
(unsigned long long) Substring_genomicstart(acceptor)));
distantsplicing = List_push(distantsplicing,
(void *) Stage3end_new_splice(&(*found_score),nmismatches1,nmismatches2,
- donor,acceptor,Substring_chimera_prob(donor),Substring_chimera_prob(acceptor),distance,
+ donor,acceptor,Substring_siteD_prob(donor),Substring_siteA_prob(acceptor),distance,
/*shortdistancep*/false,distantsplicing_penalty,querylength,
/*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
/*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
@@ -14197,25 +13380,25 @@ find_splicepairs_distant_rna (int *found_score, int *ndistantsplicepairs,
(unsigned long long) Substring_genomicstart(acceptor),
Substring_querystart(acceptor),Substring_queryend(acceptor)));
- if ((pos = Substring_chimera_pos(donor)) < min_endlength_2) {
+ if ((pos = Substring_siteD_pos(donor)) < min_endlength_2) {
debug4ld(printf("chimera_pos of donor < min_endlength_2\n"));
p = p->rest;
} else if (pos > querylength - min_endlength_1) {
debug4ld(printf("chimera_pos of donor > querylength - min_endlength_1\n"));
p = p->rest;
- } else if (pos < Substring_chimera_pos(acceptor)) {
- debug4ld(printf("chimera_pos of donor %d < chimera_pos of acceptor %d\n",pos,Substring_chimera_pos(acceptor)));
+ } else if (pos < Substring_siteA_pos(acceptor)) {
+ debug4ld(printf("chimera_pos of donor %d < chimera_pos of acceptor %d\n",pos,Substring_siteA_pos(acceptor)));
p = p->rest;
- } else if (pos > Substring_chimera_pos(acceptor)) {
- debug4ld(printf("chimera_pos of donor %d > chimera_pos of acceptor %d\n",pos,Substring_chimera_pos(acceptor)));
+ } else if (pos > Substring_siteA_pos(acceptor)) {
+ debug4ld(printf("chimera_pos of donor %d > chimera_pos of acceptor %d\n",pos,Substring_siteA_pos(acceptor)));
q = q->rest;
} else {
qsave = q;
- while (p != NULL && *ndistantsplicepairs <= MAXCHIMERAPATHS && Substring_chimera_pos(((Substring_T) p->first)) == pos) {
+ while (p != NULL && *ndistantsplicepairs <= MAXCHIMERAPATHS && Substring_siteD_pos(((Substring_T) p->first)) == pos) {
donor = (Substring_T) p->first;
debug4ld(printf("donor at %llu, pos %d\n",(unsigned long long) Substring_genomicstart(donor),pos));
q = qsave;
- while (q != NULL && *ndistantsplicepairs <= MAXCHIMERAPATHS && Substring_chimera_pos(((Substring_T) q->first)) == pos) {
+ while (q != NULL && *ndistantsplicepairs <= MAXCHIMERAPATHS && Substring_siteA_pos(((Substring_T) q->first)) == pos) {
acceptor = (Substring_T) q->first;
debug4ld(printf("acceptor at %llu, pos %d\n",(unsigned long long) Substring_genomicstart(acceptor),pos));
if (Substring_chrnum(donor) != Substring_chrnum(acceptor)) {
@@ -14231,7 +13414,7 @@ find_splicepairs_distant_rna (int *found_score, int *ndistantsplicepairs,
(unsigned long long) Substring_genomicstart(acceptor)));
distantsplicing = List_push(distantsplicing,
(void *) Stage3end_new_splice(&(*found_score),nmismatches1,nmismatches2,
- donor,acceptor,Substring_chimera_prob(donor),Substring_chimera_prob(acceptor),distance,
+ donor,acceptor,Substring_siteD_prob(donor),Substring_siteA_prob(acceptor),distance,
/*shortdistancep*/false,distantsplicing_penalty,querylength,
/*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
/*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
@@ -14320,7 +13503,7 @@ find_splicepairs_shortend (int *found_score, List_T hits,
/* End 1 */
for (p = donors_plus[nmismatches]; p != NULL; p = p->rest) {
donor = (Substring_T) p->first;
- support = Substring_chimera_pos(donor);
+ support = Substring_siteD_pos(donor);
endlength = querylength - support;
chrhigh = Substring_chrhigh(donor);
@@ -14335,7 +13518,7 @@ find_splicepairs_shortend (int *found_score, List_T hits,
debug4h(printf("End 1: short-overlap donor_plus: #%d:%u (%d mismatches) => searching right\n",
Substring_chrnum(donor),(Chrpos_T) (leftbound-1-chroffset),Substring_nmismatches_whole(donor)));
- if ((i = Substring_splicesites_knowni(donor)) >= 0) {
+ if ((i = Substring_splicesitesD_knowni(donor)) >= 0) {
origleft = Substring_genomicstart(donor);
if ((splicesites_i =
Splicetrie_find_right(&nmismatches_shortend,&nmismatches_list,i,
@@ -14348,9 +13531,9 @@ find_splicepairs_shortend (int *found_score, List_T hits,
ambcoords = lookup_splicesites(&probs_list,splicesites_i,splicesites);
debug4h(amb_length = endlength /*- nmismatches_shortend*/);
debug4h(printf("End 1: short-overlap donor_plus: Successful ambiguous from donor #%d with amb_length %d\n",
- Substring_splicesites_knowni(donor),amb_length));
+ Substring_splicesitesD_knowni(donor),amb_length));
hits = List_push(hits,(void *) Stage3end_new_splice(&(*found_score),nmismatches,nmismatches_shortend,
- donor,/*acceptor*/NULL,Substring_chimera_prob(donor),Doublelist_max(probs_list),/*distance*/0U,
+ donor,/*acceptor*/NULL,Substring_siteD_prob(donor),Doublelist_max(probs_list),/*distance*/0U,
/*shortdistancep*/false,/*penalty*/0,querylength,
/*ambcoords_donor*/NULL,ambcoords,
/*ambi_donor*/NULL,/*ambi_acceptor*/splicesites_i,
@@ -14368,15 +13551,15 @@ find_splicepairs_shortend (int *found_score, List_T hits,
bestj = Intlist_head(splicesites_i);
bestleft = splicesites[bestj] - support;
if ((acceptor = Substring_new_acceptor(/*acceptor_coord*/splicesites[bestj],/*acceptor_knowni*/bestj,
- Substring_chimera_pos(donor),/*substring_querystart*/0,/*substring_queryend*/querylength,
+ Substring_siteD_pos(donor),/*substring_querystart*/0,/*substring_queryend*/querylength,
nmismatches_shortend,/*prob*/2.0,/*left*/bestleft,query_compress_fwd,
querylength,/*plusp*/true,genestrand,/*sensedir*/SENSE_FORWARD,
Substring_chrnum(donor),Substring_chroffset(donor),
Substring_chrhigh(donor),Substring_chrlength(donor))) != NULL) {
debug4h(printf("End 1: short-overlap donor_plus: Successful splice from donor #%d to acceptor #%d\n",
- Substring_splicesites_knowni(donor),Substring_splicesites_knowni(acceptor)));
+ Substring_splicesitesD_knowni(donor),Substring_splicesitesA_knowni(acceptor)));
hits = List_push(hits,(void *) Stage3end_new_splice(&(*found_score),nmismatches,nmismatches_shortend,
- donor,acceptor,Substring_chimera_prob(donor),/*acceptor_prob*/2.0,/*distance*/bestleft-origleft,
+ donor,acceptor,Substring_siteD_prob(donor),/*acceptor_prob*/2.0,/*distance*/bestleft-origleft,
/*shortdistancep*/true,localsplicing_penalty,querylength,
/*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
/*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
@@ -14396,7 +13579,7 @@ find_splicepairs_shortend (int *found_score, List_T hits,
/* End 2 */
for (p = acceptors_plus[nmismatches]; p != NULL; p = p->rest) {
acceptor = (Substring_T) p->first;
- endlength = Substring_chimera_pos(acceptor);
+ endlength = Substring_siteA_pos(acceptor);
support = querylength - endlength;
chroffset = Substring_chroffset(acceptor);
@@ -14411,7 +13594,7 @@ find_splicepairs_shortend (int *found_score, List_T hits,
debug4h(printf("End 2: short-overlap acceptor_plus: #%d:%u (%d mismatches) => searching left\n",
Substring_chrnum(acceptor),(Chrpos_T) (rightbound+1-chroffset),Substring_nmismatches_whole(acceptor)));
- if ((i = Substring_splicesites_knowni(acceptor)) >= 0) {
+ if ((i = Substring_splicesitesA_knowni(acceptor)) >= 0) {
origleft = Substring_genomicstart(acceptor);
if ((splicesites_i =
Splicetrie_find_left(&nmismatches_shortend,&nmismatches_list,i,
@@ -14424,9 +13607,9 @@ find_splicepairs_shortend (int *found_score, List_T hits,
ambcoords = lookup_splicesites(&probs_list,splicesites_i,splicesites);
debug4h(amb_length = endlength /*- nmismatches_shortend*/);
debug4h(printf("End 2: short-overlap acceptor_plus: Successful ambiguous from acceptor #%d with amb_length %d\n",
- Substring_splicesites_knowni(acceptor),amb_length));
+ Substring_splicesitesA_knowni(acceptor),amb_length));
hits = List_push(hits,(void *) Stage3end_new_splice(&(*found_score),nmismatches_shortend,nmismatches,
- /*donor*/NULL,acceptor,Doublelist_max(probs_list),Substring_chimera_prob(acceptor),/*distance*/0U,
+ /*donor*/NULL,acceptor,Doublelist_max(probs_list),Substring_siteA_prob(acceptor),/*distance*/0U,
/*shortdistancep*/false,/*penalty*/0,querylength,
ambcoords,/*ambcoords_acceptor*/NULL,
/*amb_knowni_donor*/splicesites_i,/*amb_knowni_acceptor*/NULL,
@@ -14444,15 +13627,15 @@ find_splicepairs_shortend (int *found_score, List_T hits,
bestj = Intlist_head(splicesites_i);
bestleft = splicesites[bestj] - endlength;
if ((donor = Substring_new_donor(/*donor_coord*/splicesites[bestj],/*donor_knowni*/bestj,
- Substring_chimera_pos(acceptor),/*substring_querystart*/0,/*substring_queryend*/querylength,
+ Substring_siteA_pos(acceptor),/*substring_querystart*/0,/*substring_queryend*/querylength,
nmismatches_shortend,/*prob*/2.0,/*left*/bestleft,query_compress_fwd,
querylength,/*plusp*/true,genestrand,/*sensedir*/SENSE_FORWARD,
Substring_chrnum(acceptor),Substring_chroffset(acceptor),
Substring_chrhigh(acceptor),Substring_chrlength(acceptor))) != NULL) {
debug4h(printf("End 2: short-overlap acceptor_plus: Successful splice from acceptor #%d to donor #%d\n",
- Substring_splicesites_knowni(acceptor),Substring_splicesites_knowni(donor)));
+ Substring_splicesitesA_knowni(acceptor),Substring_splicesitesD_knowni(donor)));
hits = List_push(hits,(void *) Stage3end_new_splice(&(*found_score),nmismatches_shortend,nmismatches,
- donor,acceptor,/*donor_prob*/2.0,Substring_chimera_prob(acceptor),/*distance*/origleft-bestleft,
+ donor,acceptor,/*donor_prob*/2.0,Substring_siteA_prob(acceptor),/*distance*/origleft-bestleft,
/*shortdistancep*/true,localsplicing_penalty,querylength,
/*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
/*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
@@ -14472,7 +13655,7 @@ find_splicepairs_shortend (int *found_score, List_T hits,
/* End 3 */
for (p = donors_minus[nmismatches]; p != NULL; p = p->rest) {
donor = (Substring_T) p->first;
- support = Substring_chimera_pos(donor);
+ support = Substring_siteD_pos(donor);
endlength = querylength - support;
chroffset = Substring_chroffset(donor);
@@ -14487,7 +13670,7 @@ find_splicepairs_shortend (int *found_score, List_T hits,
debug4h(printf("End 3: short-overlap donor_minus: #%d:%u (%d mismatches) => searching left\n",
Substring_chrnum(donor),(Chrpos_T) (rightbound+1-chroffset),Substring_nmismatches_whole(donor)));
- if ((i = Substring_splicesites_knowni(donor)) >= 0) {
+ if ((i = Substring_splicesitesD_knowni(donor)) >= 0) {
origleft = Substring_genomicend(donor);
if ((splicesites_i =
Splicetrie_find_left(&nmismatches_shortend,&nmismatches_list,i,
@@ -14500,9 +13683,9 @@ find_splicepairs_shortend (int *found_score, List_T hits,
ambcoords = lookup_splicesites(&probs_list,splicesites_i,splicesites);
debug4h(amb_length = endlength /*- nmismatches_shortend*/);
debug4h(printf("End 3: short-overlap donor_minus: Successful ambiguous from donor #%d with amb_length %d\n",
- Substring_splicesites_knowni(donor),amb_length));
+ Substring_splicesitesD_knowni(donor),amb_length));
hits = List_push(hits,(void *) Stage3end_new_splice(&(*found_score),nmismatches,nmismatches_shortend,
- donor,/*acceptor*/NULL,Substring_chimera_prob(donor),Doublelist_max(probs_list),/*distance*/0U,
+ donor,/*acceptor*/NULL,Substring_siteD_prob(donor),Doublelist_max(probs_list),/*distance*/0U,
/*shortdistancep*/false,/*penalty*/0,querylength,
/*ambcoords_donor*/NULL,ambcoords,
/*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/splicesites_i,
@@ -14520,16 +13703,16 @@ find_splicepairs_shortend (int *found_score, List_T hits,
bestj = Intlist_head(splicesites_i);
bestleft = splicesites[bestj] - endlength;
if ((acceptor = Substring_new_acceptor(/*acceptor_coord*/splicesites[bestj],/*acceptor_knowni*/bestj,
- querylength-Substring_chimera_pos(donor),
+ querylength-Substring_siteD_pos(donor),
/*substring_querystart*/0,/*substring_queryend*/querylength,
nmismatches_shortend,/*prob*/2.0,/*left*/bestleft,query_compress_rev,
querylength,/*plusp*/false,genestrand,/*sensedir*/SENSE_FORWARD,
Substring_chrnum(donor),Substring_chroffset(donor),
Substring_chrhigh(donor),Substring_chrlength(donor))) != NULL) {
debug4h(printf("End 3: short-overlap donor_minus: Successful splice from donor #%d to acceptor #%d\n",
- Substring_splicesites_knowni(donor),Substring_splicesites_knowni(acceptor)));
+ Substring_splicesitesD_knowni(donor),Substring_splicesitesA_knowni(acceptor)));
hits = List_push(hits,(void *) Stage3end_new_splice(&(*found_score),nmismatches,nmismatches_shortend,
- donor,acceptor,Substring_chimera_prob(donor),/*acceptor_prob*/2.0,/*distance*/origleft-bestleft,
+ donor,acceptor,Substring_siteD_prob(donor),/*acceptor_prob*/2.0,/*distance*/origleft-bestleft,
/*shortdistancep*/true,localsplicing_penalty,querylength,
/*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
/*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
@@ -14549,7 +13732,7 @@ find_splicepairs_shortend (int *found_score, List_T hits,
/* End 4 */
for (p = acceptors_minus[nmismatches]; p != NULL; p = p->rest) {
acceptor = (Substring_T) p->first;
- endlength = Substring_chimera_pos(acceptor);
+ endlength = Substring_siteA_pos(acceptor);
support = querylength - endlength;
chrhigh = Substring_chrhigh(acceptor);
@@ -14565,7 +13748,7 @@ find_splicepairs_shortend (int *found_score, List_T hits,
debug4h(printf("End 4: short-overlap acceptor_minus: #%d:%u (%d mismatches) => searching right\n",
Substring_chrnum(acceptor),(Chrpos_T) (leftbound-1-chroffset),Substring_nmismatches_whole(acceptor)));
- if ((i = Substring_splicesites_knowni(acceptor)) >= 0) {
+ if ((i = Substring_splicesitesA_knowni(acceptor)) >= 0) {
origleft = Substring_genomicend(acceptor);
if ((splicesites_i =
Splicetrie_find_right(&nmismatches_shortend,&nmismatches_list,i,
@@ -14578,9 +13761,9 @@ find_splicepairs_shortend (int *found_score, List_T hits,
ambcoords = lookup_splicesites(&probs_list,splicesites_i,splicesites);
debug4h(amb_length = endlength /*- nmismatches_shortend*/);
debug4h(printf("End 4: short-overlap acceptor_minus: Successful ambiguous from acceptor #%d with amb_length %d\n",
- Substring_splicesites_knowni(acceptor),amb_length));
+ Substring_splicesitesA_knowni(acceptor),amb_length));
hits = List_push(hits,(void *) Stage3end_new_splice(&(*found_score),nmismatches_shortend,nmismatches,
- /*donor*/NULL,acceptor,Doublelist_max(probs_list),Substring_chimera_prob(acceptor),/*distance*/0U,
+ /*donor*/NULL,acceptor,Doublelist_max(probs_list),Substring_siteA_prob(acceptor),/*distance*/0U,
/*shortdistancep*/false,/*penalty*/0,querylength,
ambcoords,/*ambcoords_acceptor*/NULL,
/*amb_knowni_donor*/splicesites_i,/*amb_knowni_acceptor*/NULL,
@@ -14598,16 +13781,16 @@ find_splicepairs_shortend (int *found_score, List_T hits,
bestj = Intlist_head(splicesites_i);
bestleft = splicesites[bestj] - support;
if ((donor = Substring_new_donor(/*donor_coord*/splicesites[bestj],/*donor_knowni*/bestj,
- querylength-Substring_chimera_pos(acceptor),
+ querylength-Substring_siteA_pos(acceptor),
/*substring_querystart*/0,/*substring_queryend*/querylength,
nmismatches_shortend,/*prob*/2.0,/*left*/bestleft,query_compress_rev,
querylength,/*plusp*/false,genestrand,/*sensedir*/SENSE_FORWARD,
Substring_chrnum(acceptor),Substring_chroffset(acceptor),
Substring_chrhigh(acceptor),Substring_chrlength(acceptor))) != NULL) {
debug4h(printf("End 4: short-overlap acceptor_minus: Successful splice from acceptor #%d to #%d\n",
- Substring_splicesites_knowni(acceptor),Substring_splicesites_knowni(donor)));
+ Substring_splicesitesA_knowni(acceptor),Substring_splicesitesD_knowni(donor)));
hits = List_push(hits,(void *) Stage3end_new_splice(&(*found_score),nmismatches_shortend,nmismatches,
- donor,acceptor,/*donor_prob*/2.0,Substring_chimera_prob(acceptor),/*distance*/bestleft-origleft,
+ donor,acceptor,/*donor_prob*/2.0,Substring_siteA_prob(acceptor),/*distance*/bestleft-origleft,
/*shortdistancep*/true,localsplicing_penalty,querylength,
/*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
/*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
@@ -14627,7 +13810,7 @@ find_splicepairs_shortend (int *found_score, List_T hits,
/* End 5 */
for (p = antidonors_plus[nmismatches]; p != NULL; p = p->rest) {
donor = (Substring_T) p->first;
- endlength = Substring_chimera_pos(donor);
+ endlength = Substring_siteD_pos(donor);
support = querylength - endlength;
chroffset = Substring_chroffset(donor);
@@ -14642,7 +13825,7 @@ find_splicepairs_shortend (int *found_score, List_T hits,
debug4h(printf("End 5: short-overlap antidonor_plus: #%d:%u (%d mismatches) => searching left\n",
Substring_chrnum(donor),(Chrpos_T) (rightbound+1-chroffset),Substring_nmismatches_whole(donor)));
- if ((i = Substring_splicesites_knowni(donor)) >= 0) {
+ if ((i = Substring_splicesitesD_knowni(donor)) >= 0) {
origleft = Substring_genomicstart(donor);
if ((splicesites_i =
Splicetrie_find_left(&nmismatches_shortend,&nmismatches_list,i,
@@ -14655,9 +13838,9 @@ find_splicepairs_shortend (int *found_score, List_T hits,
ambcoords = lookup_splicesites(&probs_list,splicesites_i,splicesites);
debug4h(amb_length = endlength /*- nmismatches_shortend*/);
debug4h(printf("End 5: short-overlap antidonor_plus: Successful ambiguous from antidonor #%d with amb_length %d\n",
- Substring_splicesites_knowni(donor),amb_length));
+ Substring_splicesitesD_knowni(donor),amb_length));
hits = List_push(hits,(void *) Stage3end_new_splice(&(*found_score),nmismatches,nmismatches_shortend,
- donor,/*acceptor*/NULL,Substring_chimera_prob(donor),Doublelist_max(probs_list),/*distance*/0U,
+ donor,/*acceptor*/NULL,Substring_siteD_prob(donor),Doublelist_max(probs_list),/*distance*/0U,
/*shortdistancep*/false,/*penalty*/0,querylength,
/*ambcoords_donor*/NULL,ambcoords,
/*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/splicesites_i,
@@ -14675,16 +13858,16 @@ find_splicepairs_shortend (int *found_score, List_T hits,
bestj = Intlist_head(splicesites_i);
bestleft = splicesites[bestj] - endlength;
if ((acceptor = Substring_new_acceptor(/*acceptor_coord*/splicesites[bestj],/*acceptor_knowni*/bestj,
- Substring_chimera_pos(donor),
+ Substring_siteD_pos(donor),
/*substring_querystart*/0,/*substring_queryend*/querylength,
nmismatches_shortend,/*prob*/2.0,/*left*/bestleft,query_compress_fwd,
querylength,/*plusp*/true,genestrand,/*sensedir*/SENSE_ANTI,
Substring_chrnum(donor),Substring_chroffset(donor),
Substring_chrhigh(donor),Substring_chrlength(donor))) != NULL) {
debug4h(printf("End 5: short-overlap antidonor_plus: Successful splice from antidonor #%d to antiacceptor #%d\n",
- Substring_splicesites_knowni(donor),Substring_splicesites_knowni(acceptor)));
+ Substring_splicesitesD_knowni(donor),Substring_splicesitesA_knowni(acceptor)));
hits = List_push(hits,(void *) Stage3end_new_splice(&(*found_score),nmismatches,nmismatches_shortend,
- donor,acceptor,Substring_chimera_prob(donor),/*acceptor_prob*/2.0,/*distance*/origleft-bestleft,
+ donor,acceptor,Substring_siteD_prob(donor),/*acceptor_prob*/2.0,/*distance*/origleft-bestleft,
/*shortdistancep*/true,localsplicing_penalty,querylength,
/*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
/*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
@@ -14704,7 +13887,7 @@ find_splicepairs_shortend (int *found_score, List_T hits,
/* End 6 */
for (p = antiacceptors_plus[nmismatches]; p != NULL; p = p->rest) {
acceptor = (Substring_T) p->first;
- support = Substring_chimera_pos(acceptor);
+ support = Substring_siteA_pos(acceptor);
endlength = querylength - support;
chrhigh = Substring_chrhigh(acceptor);
@@ -14720,7 +13903,7 @@ find_splicepairs_shortend (int *found_score, List_T hits,
debug4h(printf("End 6: short-overlap antiacceptor_plus: #%d:%u (%d mismatches) => searching right\n",
Substring_chrnum(acceptor),(Chrpos_T) (leftbound-1-chroffset),Substring_nmismatches_whole(acceptor)));
- if ((i = Substring_splicesites_knowni(acceptor)) >= 0) {
+ if ((i = Substring_splicesitesA_knowni(acceptor)) >= 0) {
origleft = Substring_genomicstart(acceptor);
if ((splicesites_i =
Splicetrie_find_right(&nmismatches_shortend,&nmismatches_list,i,
@@ -14733,9 +13916,9 @@ find_splicepairs_shortend (int *found_score, List_T hits,
ambcoords = lookup_splicesites(&probs_list,splicesites_i,splicesites);
debug4h(amb_length = endlength /*- nmismatches_shortend*/);
debug4h(printf("End 6: short-overlap antiacceptor_plus: Successful ambiguous from antiacceptor #%d with amb_length %d\n",
- Substring_splicesites_knowni(acceptor),amb_length));
+ Substring_splicesitesA_knowni(acceptor),amb_length));
hits = List_push(hits,(void *) Stage3end_new_splice(&(*found_score),nmismatches_shortend,nmismatches,
- /*donor*/NULL,acceptor,Doublelist_max(probs_list),Substring_chimera_prob(acceptor),/*distance*/0U,
+ /*donor*/NULL,acceptor,Doublelist_max(probs_list),Substring_siteA_prob(acceptor),/*distance*/0U,
/*shortdistancep*/false,/*penalty*/0,querylength,
ambcoords,/*ambcoords_acceptor*/NULL,
/*amb_knowni_donor*/splicesites_i,/*amb_knowni_acceptor*/NULL,
@@ -14753,16 +13936,16 @@ find_splicepairs_shortend (int *found_score, List_T hits,
bestj = Intlist_head(splicesites_i);
bestleft = splicesites[bestj] - support;
if ((donor = Substring_new_donor(/*donor_coord*/splicesites[bestj],/*donor_knowni*/bestj,
- Substring_chimera_pos(acceptor),
+ Substring_siteA_pos(acceptor),
/*substring_querystart*/0,/*substring_queryend*/querylength,
nmismatches_shortend,/*prob*/2.0,/*left*/bestleft,query_compress_fwd,
querylength,/*plusp*/true,genestrand,/*sensedir*/SENSE_ANTI,
Substring_chrnum(acceptor),Substring_chroffset(acceptor),
Substring_chrhigh(acceptor),Substring_chrlength(acceptor))) != NULL) {
debug4h(printf("End 6: short-overlap antiacceptor_plus: Successful splice from antiacceptor #%d to antidonor #%d\n",
- Substring_splicesites_knowni(acceptor),Substring_splicesites_knowni(donor)));
+ Substring_splicesitesA_knowni(acceptor),Substring_splicesitesD_knowni(donor)));
hits = List_push(hits,(void *) Stage3end_new_splice(&(*found_score),nmismatches_shortend,nmismatches,
- donor,acceptor,/*donor_prob*/2.0,Substring_chimera_prob(acceptor),/*distance*/bestleft-origleft,
+ donor,acceptor,/*donor_prob*/2.0,Substring_siteA_prob(acceptor),/*distance*/bestleft-origleft,
/*shortdistancep*/true,localsplicing_penalty,querylength,
/*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
/*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
@@ -14782,7 +13965,7 @@ find_splicepairs_shortend (int *found_score, List_T hits,
/* End 7 */
for (p = antidonors_minus[nmismatches]; p != NULL; p = p->rest) {
donor = (Substring_T) p->first;
- endlength = Substring_chimera_pos(donor);
+ endlength = Substring_siteD_pos(donor);
support = querylength - endlength;
chrhigh = Substring_chrhigh(donor);
@@ -14798,7 +13981,7 @@ find_splicepairs_shortend (int *found_score, List_T hits,
debug4h(printf("End 7: short-overlap antidonor_minus: #%d:%u (%d mismatches) => searching right\n",
Substring_chrnum(donor),(Chrpos_T) (leftbound-1-chroffset),Substring_nmismatches_whole(donor)));
- if ((i = Substring_splicesites_knowni(donor)) >= 0) {
+ if ((i = Substring_splicesitesD_knowni(donor)) >= 0) {
origleft = Substring_genomicend(donor);
if ((splicesites_i =
Splicetrie_find_right(&nmismatches_shortend,&nmismatches_list,i,
@@ -14811,9 +13994,9 @@ find_splicepairs_shortend (int *found_score, List_T hits,
ambcoords = lookup_splicesites(&probs_list,splicesites_i,splicesites);
debug4h(amb_length = endlength /*- nmismatches_shortend*/);
debug4h(printf("End 7: short-overlap antidonor_minus: Successful ambiguous from antidonor #%d with amb_length %d\n",
- Substring_splicesites_knowni(donor),amb_length));
+ Substring_splicesitesD_knowni(donor),amb_length));
hits = List_push(hits,(void *) Stage3end_new_splice(&(*found_score),nmismatches,nmismatches_shortend,
- donor,/*acceptor*/NULL,Substring_chimera_prob(donor),Doublelist_max(probs_list),/*distance*/0U,
+ donor,/*acceptor*/NULL,Substring_siteD_prob(donor),Doublelist_max(probs_list),/*distance*/0U,
/*shortdistancep*/false,/*penalty*/0,querylength,
/*ambcoords_donor*/NULL,ambcoords,
/*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/splicesites_i,
@@ -14831,16 +14014,16 @@ find_splicepairs_shortend (int *found_score, List_T hits,
bestj = Intlist_head(splicesites_i);
bestleft = splicesites[bestj] - support;
if ((acceptor = Substring_new_acceptor(/*acceptor_coord*/splicesites[bestj],/*acceptor_knowni*/bestj,
- querylength-Substring_chimera_pos(donor),
+ querylength-Substring_siteD_pos(donor),
/*substring_querystart*/0,/*substring_queryend*/querylength,
nmismatches_shortend,/*prob*/2.0,/*left*/bestleft,query_compress_rev,
querylength,/*plusp*/false,genestrand,/*sensedir*/SENSE_ANTI,
Substring_chrnum(donor),Substring_chroffset(donor),
Substring_chrhigh(donor),Substring_chrlength(donor))) != NULL) {
debug4h(printf("End 7: short-overlap antidonor_minus: Successful splice from antidonor #%d to antiacceptor #%d\n",
- Substring_splicesites_knowni(donor),Substring_splicesites_knowni(acceptor)));
+ Substring_splicesitesD_knowni(donor),Substring_splicesitesA_knowni(acceptor)));
hits = List_push(hits,(void *) Stage3end_new_splice(&(*found_score),nmismatches,nmismatches_shortend,
- donor,acceptor,Substring_chimera_prob(donor),/*acceptor_prob*/2.0,/*distance*/bestleft-origleft,
+ donor,acceptor,Substring_siteD_prob(donor),/*acceptor_prob*/2.0,/*distance*/bestleft-origleft,
/*shortdistancep*/true,localsplicing_penalty,querylength,
/*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
/*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
@@ -14860,7 +14043,7 @@ find_splicepairs_shortend (int *found_score, List_T hits,
/* End 8 */
for (p = antiacceptors_minus[nmismatches]; p != NULL; p = p->rest) {
acceptor = (Substring_T) p->first;
- support = Substring_chimera_pos(acceptor);
+ support = Substring_siteA_pos(acceptor);
endlength = querylength - support;
chroffset = Substring_chroffset(acceptor);
@@ -14875,7 +14058,7 @@ find_splicepairs_shortend (int *found_score, List_T hits,
debug4h(printf("End 8: short-overlap antiacceptor_minus: #%d:%u (%d mismatches) => searching left\n",
Substring_chrnum(acceptor),(Chrpos_T) (rightbound+1-chroffset),Substring_nmismatches_whole(acceptor)));
- if ((i = Substring_splicesites_knowni(acceptor)) >= 0) {
+ if ((i = Substring_splicesitesA_knowni(acceptor)) >= 0) {
origleft = Substring_genomicend(acceptor);
if ((splicesites_i =
Splicetrie_find_left(&nmismatches_shortend,&nmismatches_list,i,
@@ -14888,9 +14071,9 @@ find_splicepairs_shortend (int *found_score, List_T hits,
ambcoords = lookup_splicesites(&probs_list,splicesites_i,splicesites);
debug4h(amb_length = endlength /*- nmismatches_shortend*/);
debug4h(printf("End 8: short-overlap antiacceptor_minus: Successful ambiguous from antiacceptor #%d with amb_length %d\n",
- Substring_splicesites_knowni(acceptor),amb_length));
+ Substring_splicesitesA_knowni(acceptor),amb_length));
hits = List_push(hits,(void *) Stage3end_new_splice(&(*found_score),nmismatches_shortend,nmismatches,
- /*donor*/NULL,acceptor,Doublelist_max(probs_list),Substring_chimera_prob(acceptor),/*distance*/0U,
+ /*donor*/NULL,acceptor,Doublelist_max(probs_list),Substring_siteA_prob(acceptor),/*distance*/0U,
/*shortdistancep*/false,/*penalty*/0,querylength,
ambcoords,/*ambcoords_acceptor*/NULL,
/*amb_knowni_donor*/splicesites_i,/*amb_knowni_acceptor*/NULL,
@@ -14908,16 +14091,16 @@ find_splicepairs_shortend (int *found_score, List_T hits,
bestj = Intlist_head(splicesites_i);
bestleft = splicesites[bestj] - endlength;
if ((donor = Substring_new_donor(/*donor_coord*/splicesites[bestj],/*donor_knowni*/bestj,
- querylength-Substring_chimera_pos(acceptor),
+ querylength-Substring_siteA_pos(acceptor),
/*substring_querystart*/0,/*substring_queryend*/querylength,
nmismatches_shortend,/*prob*/2.0,/*left*/bestleft,query_compress_rev,
querylength,/*plusp*/false,genestrand,/*sensedir*/SENSE_ANTI,
Substring_chrnum(acceptor),Substring_chroffset(acceptor),
Substring_chrhigh(acceptor),Substring_chrlength(acceptor))) != NULL) {
debug4h(printf("End 8: short-overlap antiacceptor_minus: Successful splice from antiacceptor #%d to antidonor #%d\n",
- Substring_splicesites_knowni(acceptor),Substring_splicesites_knowni(donor)));
+ Substring_splicesitesA_knowni(acceptor),Substring_splicesitesD_knowni(donor)));
hits = List_push(hits,(void *) Stage3end_new_splice(&(*found_score),nmismatches_shortend,nmismatches,
- donor,acceptor,/*donor_prob*/2.0,Substring_chimera_prob(acceptor),/*distance*/origleft-bestleft,
+ donor,acceptor,/*donor_prob*/2.0,Substring_siteA_prob(acceptor),/*distance*/origleft-bestleft,
/*shortdistancep*/true,localsplicing_penalty,querylength,
/*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
/*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
@@ -15049,7 +14232,7 @@ compute_floors (bool *any_omitted_p, bool *alloc_floors_p, Floors_T *floors_arra
} else if (*any_omitted_p) {
floors = Floors_new_omitted(querylength,max_end_insertions,this->omitted);
*alloc_floors_p = true;
- } else if (querylength > MAX_READLENGTH) {
+ } else if (querylength > max_floors_readlength) {
floors = Floors_new_standard(querylength,max_end_insertions,/*keep_floors_p*/false);
*alloc_floors_p = true;
} else if (keep_floors_p == false) {
@@ -18311,12 +17494,16 @@ align_end (int *cutoff_level, T this,
/* Search 3: Subs/indels via complete set */
/* 4, 5. Complete set mismatches and indels, omitting frequent oligos */
- completesetp = false;
- for (q = subs; q != NULL; q = List_next(q)) {
- hit = (Stage3end_T) List_head(q);
- debug(printf("Hit has total score of %d\n",Stage3end_score(hit)));
- if (Stage3end_score(hit) > done_level) {
- completesetp = true;
+ if (subs == NULL) {
+ completesetp = true;
+ } else {
+ completesetp = false;
+ for (q = subs; q != NULL; q = List_next(q)) {
+ hit = (Stage3end_T) List_head(q);
+ debug(printf("Hit has total score of %d\n",Stage3end_score(hit)));
+ if (Stage3end_score(hit) > done_level) {
+ completesetp = true;
+ }
}
}
debug(printf("completesetp %d\n",completesetp));
@@ -18463,11 +17650,10 @@ align_end (int *cutoff_level, T this,
}
#endif
- if (knownsplicingp == true && done_level >= localsplicing_penalty) {
+ if (knownsplicingp == true && done_level >= localsplicing_penalty &&
+ (max_splice_mismatches = done_level - localsplicing_penalty) >= 0) {
/* Want >= and not > to give better results. Negligible effect on speed. */
/* 8. Shortend splicing */
-
- max_splice_mismatches = done_level - localsplicing_penalty;
debug(printf("*** Stage 8. Short-end splicing, allowing %d mismatches ***\n",max_splice_mismatches));
donors_plus = (List_T *) CALLOCA(max_splice_mismatches+1,sizeof(List_T));
@@ -18575,9 +17761,9 @@ align_end (int *cutoff_level, T this,
debug(printf("Skipping distant splicing because done_level %d < distantsplicing_penalty %d and min_trim %d < %d\n",
done_level,distantsplicing_penalty,min_trim,min_distantsplicing_end_matches));
- } else if (find_dna_chimeras_p == true) {
+ } else if (find_dna_chimeras_p == true &&
+ (max_splice_mismatches = done_level - distantsplicing_penalty) >= 0) {
/* 9 (DNA). Find distant splicing for DNA */
- max_splice_mismatches = done_level - distantsplicing_penalty;
debug(printf("*** Stage 9 (DNA). Distant splice ends, allowing %d mismatches ***\n",max_splice_mismatches));
startfrags_plus = (List_T *) CALLOCA(max_splice_mismatches+1,sizeof(List_T));
@@ -18618,11 +17804,11 @@ align_end (int *cutoff_level, T this,
debug(printf("*** Stage 9 (DNA). Distant splicing, allowing %d mismatches ***\n",nmismatches));
debug4e(printf("Sorting splice ends\n"));
- startfrags_plus[nmismatches] = Substring_sort_chimera_halves(startfrags_plus[nmismatches],/*ascendingp*/true);
- endfrags_plus[nmismatches] = Substring_sort_chimera_halves(endfrags_plus[nmismatches],/*ascendingp*/true);
+ startfrags_plus[nmismatches] = Substring_sort_siteN_halves(startfrags_plus[nmismatches],/*ascendingp*/true);
+ endfrags_plus[nmismatches] = Substring_sort_siteN_halves(endfrags_plus[nmismatches],/*ascendingp*/true);
- startfrags_minus[nmismatches] = Substring_sort_chimera_halves(startfrags_minus[nmismatches],/*ascendingp*/false);
- endfrags_minus[nmismatches] = Substring_sort_chimera_halves(endfrags_minus[nmismatches],/*ascendingp*/false);
+ startfrags_minus[nmismatches] = Substring_sort_siteN_halves(startfrags_minus[nmismatches],/*ascendingp*/false);
+ endfrags_minus[nmismatches] = Substring_sort_siteN_halves(endfrags_minus[nmismatches],/*ascendingp*/false);
debug4e(printf("Splice ends at %d nmismatches: +startfrags/endfrags %d/%d, -startfrags/endfrags %d/%d\n",
nmismatches,
@@ -18683,9 +17869,9 @@ align_end (int *cutoff_level, T this,
FREEA(startfrags_minus);
FREEA(endfrags_minus);
- } else if (knownsplicingp || novelsplicingp) {
+ } else if ((knownsplicingp || novelsplicingp) &&
+ (max_splice_mismatches = done_level - distantsplicing_penalty) >= 0) {
/* 9 (RNA). Find distant splicing for RNA iteratively using both known and novel splice sites */
- max_splice_mismatches = done_level - distantsplicing_penalty;
debug(printf("*** Stage 9 (RNA). Distant splice ends, allowing %d mismatches ***\n",max_splice_mismatches));
donors_plus = (List_T *) CALLOCA(max_splice_mismatches+1,sizeof(List_T));
@@ -18734,17 +17920,17 @@ align_end (int *cutoff_level, T this,
debug(printf("*** Stage 9 (RNA). Distant splicing, allowing %d mismatches ***\n",nmismatches));
debug4e(printf("Sorting splice ends\n"));
- donors_plus[nmismatches] = Substring_sort_chimera_halves(donors_plus[nmismatches],/*ascendingp*/true);
- acceptors_plus[nmismatches] = Substring_sort_chimera_halves(acceptors_plus[nmismatches],/*ascendingp*/true);
+ donors_plus[nmismatches] = Substring_sort_siteD_halves(donors_plus[nmismatches],/*ascendingp*/true);
+ acceptors_plus[nmismatches] = Substring_sort_siteA_halves(acceptors_plus[nmismatches],/*ascendingp*/true);
- antidonors_plus[nmismatches] = Substring_sort_chimera_halves(antidonors_plus[nmismatches],/*ascendingp*/false);
- antiacceptors_plus[nmismatches] = Substring_sort_chimera_halves(antiacceptors_plus[nmismatches],/*ascendingp*/false);
+ antidonors_plus[nmismatches] = Substring_sort_siteD_halves(antidonors_plus[nmismatches],/*ascendingp*/false);
+ antiacceptors_plus[nmismatches] = Substring_sort_siteA_halves(antiacceptors_plus[nmismatches],/*ascendingp*/false);
- donors_minus[nmismatches] = Substring_sort_chimera_halves(donors_minus[nmismatches],/*ascendingp*/false);
- acceptors_minus[nmismatches] = Substring_sort_chimera_halves(acceptors_minus[nmismatches],/*ascendingp*/false);
+ donors_minus[nmismatches] = Substring_sort_siteD_halves(donors_minus[nmismatches],/*ascendingp*/false);
+ acceptors_minus[nmismatches] = Substring_sort_siteA_halves(acceptors_minus[nmismatches],/*ascendingp*/false);
- antidonors_minus[nmismatches] = Substring_sort_chimera_halves(antidonors_minus[nmismatches],/*ascendingp*/true);
- antiacceptors_minus[nmismatches] = Substring_sort_chimera_halves(antiacceptors_minus[nmismatches],/*ascendingp*/true);
+ antidonors_minus[nmismatches] = Substring_sort_siteD_halves(antidonors_minus[nmismatches],/*ascendingp*/true);
+ antiacceptors_minus[nmismatches] = Substring_sort_siteA_halves(antiacceptors_minus[nmismatches],/*ascendingp*/true);
debug4e(printf("Splice ends at %d nmismatches: +donors/acceptors %d/%d, +antidonors/antiacceptors %d/%d, -donors/acceptors %d/%d, -antidonors/antiacceptors %d/%d\n",
nmismatches,
@@ -18991,22 +18177,18 @@ single_read (int *npaths_primary, int *npaths_altloc, int *first_absmq, int *sec
int querylength, query_lastpos, cutoff_level;
char *queryuc_ptr, *quality_string;
Compress_T query_compress_fwd = NULL, query_compress_rev = NULL;
-
-#ifdef HAVE_ALLOCA
char *queryrc;
-#else
- char queryrc[MAX_READLENGTH+1];
-#endif
querylength = Shortread_fulllength(queryseq);
-#ifndef HAVE_ALLOCA
- if (querylength > MAX_READLENGTH) {
- fprintf(stderr,"Read %s has length %d > MAX_READLENGTH %d. Either run configure and make again with a higher value of MAX_READLENGTH, or consider using GMAP instead.\n",
- Shortread_accession(queryseq),querylength,MAX_READLENGTH);
- *npaths_primary = *npaths_altloc = 0;
- return (Stage3end_T *) NULL;
+#ifdef HAVE_ALLOCA
+ if (querylength <= MAX_STACK_READLENGTH) {
+ queryrc = (char *) ALLOCA((querylength+1)*sizeof(int));
+ } else {
+ queryrc = (char *) MALLOC((querylength+1)*sizeof(int));
}
+#else
+ queryrc = (char *) MALLOC((querylength+1)*sizeof(int));
#endif
if (user_maxlevel_float < 0.0) {
@@ -19034,9 +18216,6 @@ single_read (int *npaths_primary, int *npaths_altloc, int *first_absmq, int *sec
query_compress_fwd = Compress_new_fwd(queryuc_ptr,querylength);
query_compress_rev = Compress_new_rev(queryuc_ptr,querylength);
-#ifdef HAVE_ALLOCA
- queryrc = (char *) ALLOCA((querylength+1)*sizeof(int));
-#endif
make_complement_buffered(queryrc,queryuc_ptr,querylength);
this = Stage1_new(querylength);
@@ -19070,6 +18249,17 @@ single_read (int *npaths_primary, int *npaths_altloc, int *first_absmq, int *sec
Compress_free(&query_compress_fwd);
Compress_free(&query_compress_rev);
Stage1_free(&this,querylength);
+
+#ifdef HAVE_ALLOCA
+ if (querylength <= MAX_STACK_READLENGTH) {
+ FREEA(queryrc);
+ } else {
+ FREE(queryrc);
+ }
+#else
+ FREE(queryrc);
+#endif
+
return stage3array;
}
@@ -19095,22 +18285,18 @@ single_read_tolerant_nonstranded (int *npaths_primary, int *npaths_altloc, int *
char *queryuc_ptr, *quality_string;
Compress_T query_compress_fwd = NULL, query_compress_rev = NULL;
bool allvalidp;
-
-#ifdef HAVE_ALLOCA
char *queryrc;
-#else
- char queryrc[MAX_READLENGTH+1];
-#endif
querylength = Shortread_fulllength(queryseq);
-#ifndef HAVE_ALLOCA
- if (querylength > MAX_READLENGTH) {
- fprintf(stderr,"Read %s has length %d > MAX_READLENGTH %d. Either run configure and make again with a higher value of MAX_READLENGTH, or consider using GMAP instead.\n",
- Shortread_accession(queryseq),querylength,MAX_READLENGTH);
- *npaths_primary = *npaths_altloc = 0;
- return (Stage3end_T *) NULL;
+#ifdef HAVE_ALLOCA
+ if (querylength <= MAX_STACK_READLENGTH) {
+ queryrc = (char *) ALLOCA((querylength+1)*sizeof(int));
+ } else {
+ queryrc = (char *) MALLOC((querylength+1)*sizeof(int));
}
+#else
+ queryrc = (char *) MALLOC((querylength+1)*sizeof(int));
#endif
if (user_maxlevel_float < 0.0) {
@@ -19144,9 +18330,6 @@ single_read_tolerant_nonstranded (int *npaths_primary, int *npaths_altloc, int *
query_compress_fwd = Compress_new_fwd(queryuc_ptr,querylength);
query_compress_rev = Compress_new_rev(queryuc_ptr,querylength);
gmap_history = History_new();
-#ifdef HAVE_ALLOCA
- queryrc = (char *) ALLOCA((querylength+1)*sizeof(char));
-#endif
make_complement_buffered(queryrc,queryuc_ptr,querylength);
if (read_oligos(&allvalidp,this_geneplus,queryuc_ptr,querylength,query_lastpos,/*genestrand*/+1) > 0) {
@@ -19199,6 +18382,17 @@ single_read_tolerant_nonstranded (int *npaths_primary, int *npaths_altloc, int *
Compress_free(&query_compress_rev);
Stage1_free(&this_geneminus,querylength);
Stage1_free(&this_geneplus,querylength);
+
+#ifdef HAVE_ALLOCA
+ if (querylength <= MAX_STACK_READLENGTH) {
+ FREEA(queryrc);
+ } else {
+ FREE(queryrc);
+ }
+#else
+ FREE(queryrc);
+#endif
+
return stage3array;
}
@@ -21004,6 +20198,17 @@ align_pair (bool *abort_pairing_p, int *found_score, int *cutoff_level_5, int *c
}
debug(printf("Test for completeset using better_free_end_exists_p: completeset5p %d, completeset3p %d\n",completeset5p,completeset3p));
#endif
+
+#if 0
+ } else {
+ /* This causes very slow running time */
+ if (subs5 == NULL) {
+ completeset5p = true;
+ }
+ if (subs3 == NULL) {
+ completeset3p = true;
+ }
+#endif
}
if (querylength5 < min_kmer_readlength) {
@@ -21541,17 +20746,17 @@ align_pair (bool *abort_pairing_p, int *found_score, int *cutoff_level_5, int *c
nmismatches,max_splice_mismatches_5));
debug4e(printf("Sorting splice ends\n"));
- donors_plus_5[nmismatches] = Substring_sort_chimera_halves(donors_plus_5[nmismatches],/*ascendingp*/true);
- acceptors_plus_5[nmismatches] = Substring_sort_chimera_halves(acceptors_plus_5[nmismatches],/*ascendingp*/true);
+ donors_plus_5[nmismatches] = Substring_sort_siteD_halves(donors_plus_5[nmismatches],/*ascendingp*/true);
+ acceptors_plus_5[nmismatches] = Substring_sort_siteA_halves(acceptors_plus_5[nmismatches],/*ascendingp*/true);
- antidonors_plus_5[nmismatches] = Substring_sort_chimera_halves(antidonors_plus_5[nmismatches],/*ascendingp*/false);
- antiacceptors_plus_5[nmismatches] = Substring_sort_chimera_halves(antiacceptors_plus_5[nmismatches],/*ascendingp*/false);
+ antidonors_plus_5[nmismatches] = Substring_sort_siteD_halves(antidonors_plus_5[nmismatches],/*ascendingp*/false);
+ antiacceptors_plus_5[nmismatches] = Substring_sort_siteA_halves(antiacceptors_plus_5[nmismatches],/*ascendingp*/false);
- donors_minus_5[nmismatches] = Substring_sort_chimera_halves(donors_minus_5[nmismatches],/*ascendingp*/false);
- acceptors_minus_5[nmismatches] = Substring_sort_chimera_halves(acceptors_minus_5[nmismatches],/*ascendingp*/false);
+ donors_minus_5[nmismatches] = Substring_sort_siteD_halves(donors_minus_5[nmismatches],/*ascendingp*/false);
+ acceptors_minus_5[nmismatches] = Substring_sort_siteA_halves(acceptors_minus_5[nmismatches],/*ascendingp*/false);
- antidonors_minus_5[nmismatches] = Substring_sort_chimera_halves(antidonors_minus_5[nmismatches],/*ascendingp*/true);
- antiacceptors_minus_5[nmismatches] = Substring_sort_chimera_halves(antiacceptors_minus_5[nmismatches],/*ascendingp*/true);
+ antidonors_minus_5[nmismatches] = Substring_sort_siteD_halves(antidonors_minus_5[nmismatches],/*ascendingp*/true);
+ antiacceptors_minus_5[nmismatches] = Substring_sort_siteA_halves(antiacceptors_minus_5[nmismatches],/*ascendingp*/true);
debug4e(printf("Splice ends at %d nmismatches: +donors/acceptors %d/%d, +antidonors/antiacceptors %d/%d, -donors/acceptors %d/%d, -antidonors/antiacceptors %d/%d\n",
nmismatches,
@@ -21642,17 +20847,17 @@ align_pair (bool *abort_pairing_p, int *found_score, int *cutoff_level_5, int *c
nmismatches,max_splice_mismatches_3));
debug4e(printf("Sorting splice ends\n"));
- donors_plus_3[nmismatches] = Substring_sort_chimera_halves(donors_plus_3[nmismatches],/*ascendingp*/true);
- acceptors_plus_3[nmismatches] = Substring_sort_chimera_halves(acceptors_plus_3[nmismatches],/*ascendingp*/true);
+ donors_plus_3[nmismatches] = Substring_sort_siteD_halves(donors_plus_3[nmismatches],/*ascendingp*/true);
+ acceptors_plus_3[nmismatches] = Substring_sort_siteA_halves(acceptors_plus_3[nmismatches],/*ascendingp*/true);
- antidonors_plus_3[nmismatches] = Substring_sort_chimera_halves(antidonors_plus_3[nmismatches],/*ascendingp*/false);
- antiacceptors_plus_3[nmismatches] = Substring_sort_chimera_halves(antiacceptors_plus_3[nmismatches],/*ascendingp*/false);
+ antidonors_plus_3[nmismatches] = Substring_sort_siteD_halves(antidonors_plus_3[nmismatches],/*ascendingp*/false);
+ antiacceptors_plus_3[nmismatches] = Substring_sort_siteA_halves(antiacceptors_plus_3[nmismatches],/*ascendingp*/false);
- donors_minus_3[nmismatches] = Substring_sort_chimera_halves(donors_minus_3[nmismatches],/*ascendingp*/false);
- acceptors_minus_3[nmismatches] = Substring_sort_chimera_halves(acceptors_minus_3[nmismatches],/*ascendingp*/false);
+ donors_minus_3[nmismatches] = Substring_sort_siteD_halves(donors_minus_3[nmismatches],/*ascendingp*/false);
+ acceptors_minus_3[nmismatches] = Substring_sort_siteA_halves(acceptors_minus_3[nmismatches],/*ascendingp*/false);
- antidonors_minus_3[nmismatches] = Substring_sort_chimera_halves(antidonors_minus_3[nmismatches],/*ascendingp*/true);
- antiacceptors_minus_3[nmismatches] = Substring_sort_chimera_halves(antiacceptors_minus_3[nmismatches],/*ascendingp*/true);
+ antidonors_minus_3[nmismatches] = Substring_sort_siteD_halves(antidonors_minus_3[nmismatches],/*ascendingp*/true);
+ antiacceptors_minus_3[nmismatches] = Substring_sort_siteA_halves(antiacceptors_minus_3[nmismatches],/*ascendingp*/true);
debug4e(printf("Splice ends at %d nmismatches: +donors/acceptors %d/%d, +antidonors/antiacceptors %d/%d, -donors/acceptors %d/%d, -antidonors/antiacceptors %d/%d\n",
nmismatches,
@@ -23153,30 +22358,25 @@ paired_read (int *npaths_primary, int *npaths_altloc, int *first_absmq, int *sec
int maxpairedpaths = maxpaths_search; /* 100000 */
#endif
bool abort_pairing_p;
-
-#ifdef HAVE_ALLOCA
char *queryrc5, *queryrc3;
-#else
- char queryrc5[MAX_READLENGTH+1], queryrc3[MAX_READLENGTH+1];
-#endif
-
querylength5 = Shortread_fulllength(queryseq5);
querylength3 = Shortread_fulllength(queryseq3);
-#ifndef HAVE_ALLOCA
- if (querylength5 > MAX_READLENGTH || querylength3 > MAX_READLENGTH) {
- fprintf(stderr,"Paired-read %s has lengths %d and %d > MAX_READLENGTH %d. Either run configure and make again with a higher value of MAX_READLENGTH, or consider using GMAP instead.\n",
- Shortread_accession(queryseq5),querylength5,querylength3,MAX_READLENGTH);
- *npaths_primary = *npaths_altloc = 0;
- *nhits5_primary = *nhits5_altloc = 0;
- *nhits3_primary = *nhits3_altloc = 0;
- *stage3array5 = *stage3array3 = (Stage3end_T *) NULL;
- return (Stage3pair_T *) NULL;
+#ifdef HAVE_ALLOCA
+ if (querylength5 <= MAX_STACK_READLENGTH) {
+ queryrc5 = (char *) ALLOCA((querylength5+1)*sizeof(char));
+ } else {
+ queryrc5 = (char *) MALLOC((querylength5+1)*sizeof(char));
+ }
+ if (querylength3 <= MAX_STACK_READLENGTH) {
+ queryrc3 = (char *) ALLOCA((querylength3+1)*sizeof(char));
+ } else {
+ queryrc3 = (char *) MALLOC((querylength3+1)*sizeof(char));
}
#else
- queryrc5 = (char *) ALLOCA((querylength5+1)*sizeof(char));
- queryrc3 = (char *) ALLOCA((querylength3+1)*sizeof(char));
+ queryrc5 = (char *) MALLOC((querylength5+1)*sizeof(char));
+ queryrc3 = (char *) MALLOC((querylength3+1)*sizeof(char));
#endif
if (user_maxlevel_float < 0.0) {
@@ -23274,7 +22474,8 @@ paired_read (int *npaths_primary, int *npaths_altloc, int *first_absmq, int *sec
Compress_free(&query3_compress_rev);
Stage1_free(&this5,querylength5);
Stage1_free(&this3,querylength3);
- return (Stage3pair_T *) NULL;
+
+ stage3pairarray = (Stage3pair_T *) NULL;
} else {
stage3pairarray =
@@ -23296,8 +22497,25 @@ paired_read (int *npaths_primary, int *npaths_altloc, int *first_absmq, int *sec
Compress_free(&query3_compress_rev);
Stage1_free(&this5,querylength5);
Stage1_free(&this3,querylength3);
- return stage3pairarray;
}
+
+#ifdef HAVE_ALLOCA
+ if (querylength5 <= MAX_STACK_READLENGTH) {
+ FREEA(queryrc5);
+ } else {
+ FREE(queryrc5);
+ }
+ if (querylength3 <= MAX_STACK_READLENGTH) {
+ FREEA(queryrc3);
+ } else {
+ FREE(queryrc3);
+ }
+#else
+ FREE(queryrc5);
+ FREE(queryrc3);
+#endif
+
+ return stage3pairarray;
}
@@ -23335,30 +22553,26 @@ paired_read_tolerant_nonstranded (int *npaths_primary, int *npaths_altloc, int *
int maxpairedpaths = maxpaths_search; /* 100000 */
#endif
bool abort_pairing_p_geneplus, abort_pairing_p_geneminus;
-
-#ifdef HAVE_ALLOCA
char *queryrc5, *queryrc3;
-#else
- char queryrc5[MAX_READLENGTH+1], queryrc3[MAX_READLENGTH+1];
-#endif
querylength5 = Shortread_fulllength(queryseq5);
querylength3 = Shortread_fulllength(queryseq3);
-#ifndef HAVE_ALLOCA
- if (querylength5 > MAX_READLENGTH || querylength3 > MAX_READLENGTH) {
- fprintf(stderr,"Paired-read %s has lengths %d and %d > MAX_READLENGTH %d. Either run configure and make again with a higher value of MAX_READLENGTH, or consider using GMAP instead.\n",
- Shortread_accession(queryseq5),querylength5,querylength3,MAX_READLENGTH);
- *npaths_primary = *npaths_altloc = 0;
- *nhits5_primary = *nhits5_altloc = 0;
- *nhits3_primary = *nhits3_altloc = 0;
- *stage3array5 = *stage3array3 = (Stage3end_T *) NULL;
- return (Stage3pair_T *) NULL;
+#ifdef HAVE_ALLOCA
+ if (querylength5 <= MAX_STACK_READLENGTH) {
+ queryrc5 = (char *) ALLOCA((querylength5+1)*sizeof(char));
+ } else {
+ queryrc5 = (char *) MALLOC((querylength5+1)*sizeof(char));
+ }
+ if (querylength3 <= MAX_STACK_READLENGTH) {
+ queryrc3 = (char *) ALLOCA((querylength3+1)*sizeof(char));
+ } else {
+ queryrc3 = (char *) MALLOC((querylength3+1)*sizeof(char));
}
#else
- queryrc5 = (char *) ALLOCA((querylength5+1)*sizeof(char));
- queryrc3 = (char *) ALLOCA((querylength3+1)*sizeof(char));
+ queryrc5 = (char *) MALLOC((querylength5+1)*sizeof(char));
+ queryrc3 = (char *) MALLOC((querylength3+1)*sizeof(char));
#endif
if (user_maxlevel_float < 0.0) {
@@ -23458,120 +22672,122 @@ paired_read_tolerant_nonstranded (int *npaths_primary, int *npaths_altloc, int *
terminals_geneminus,hits_geneminus_5,hits_geneminus_3,querylength5,querylength3);
if (abort_pairing_p_geneplus == true) {
- debug16(printf("abort_pairing_p_geneplus is true\n"));
- paired_results_free(this_geneplus_5,this_geneplus_3,hitpairs_geneplus,samechr_geneplus,conc_transloc_geneplus,
- terminals_geneplus,hits_geneplus_5,hits_geneplus_3,querylength5,querylength3);
-
- this_geneplus_5 = Stage1_new(querylength5);
- this_geneplus_3 = Stage1_new(querylength3);
- realign_separately(stage3array5,&(*nhits5_primary),&(*nhits5_altloc),&(*first_absmq5),&(*second_absmq5),
- stage3array3,&(*nhits3_primary),&(*nhits3_altloc),&(*first_absmq3),&(*second_absmq3),
- this_geneplus_5,this_geneplus_3,
- query5_compress_fwd,query5_compress_rev,query3_compress_fwd,query3_compress_rev,
- queryseq5,queryuc_ptr_5,queryrc5,quality_string_5,querylength5,query5_lastpos,
- queryseq3,queryuc_ptr_3,queryrc3,quality_string_3,querylength3,query3_lastpos,
- indexdb_fwd,indexdb_rev,indexdb_size_threshold,floors_array,
- user_maxlevel_5,user_maxlevel_3,min_coverage_5,min_coverage_3,
- indel_penalty_middle,indel_penalty_end,
- allow_end_indels_p,max_end_insertions,max_end_deletions,min_indel_end_matches,
- localsplicing_penalty,distantsplicing_penalty,min_shortend,
- oligoindices_minor,pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,
- keep_floors_p,/*genestrand*/+1);
+ debug16(printf("abort_pairing_p_geneplus is true\n"));
+ paired_results_free(this_geneplus_5,this_geneplus_3,hitpairs_geneplus,samechr_geneplus,conc_transloc_geneplus,
+ terminals_geneplus,hits_geneplus_5,hits_geneplus_3,querylength5,querylength3);
+
+ this_geneplus_5 = Stage1_new(querylength5);
+ this_geneplus_3 = Stage1_new(querylength3);
+ realign_separately(stage3array5,&(*nhits5_primary),&(*nhits5_altloc),&(*first_absmq5),&(*second_absmq5),
+ stage3array3,&(*nhits3_primary),&(*nhits3_altloc),&(*first_absmq3),&(*second_absmq3),
+ this_geneplus_5,this_geneplus_3,
+ query5_compress_fwd,query5_compress_rev,query3_compress_fwd,query3_compress_rev,
+ queryseq5,queryuc_ptr_5,queryrc5,quality_string_5,querylength5,query5_lastpos,
+ queryseq3,queryuc_ptr_3,queryrc3,quality_string_3,querylength3,query3_lastpos,
+ indexdb_fwd,indexdb_rev,indexdb_size_threshold,floors_array,
+ user_maxlevel_5,user_maxlevel_3,min_coverage_5,min_coverage_3,
+ indel_penalty_middle,indel_penalty_end,
+ allow_end_indels_p,max_end_insertions,max_end_deletions,min_indel_end_matches,
+ localsplicing_penalty,distantsplicing_penalty,min_shortend,
+ oligoindices_minor,pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,
+ keep_floors_p,/*genestrand*/+1);
- *npaths_primary = *npaths_altloc = 0;
- *final_pairtype = UNPAIRED;
- History_free(&gmap_history_3);
- History_free(&gmap_history_5);
- Compress_free(&query5_compress_fwd);
- Compress_free(&query5_compress_rev);
- Compress_free(&query3_compress_fwd);
- Compress_free(&query3_compress_rev);
- Stage1_free(&this_geneplus_5,querylength5);
- Stage1_free(&this_geneplus_3,querylength3);
- return (Stage3pair_T *) NULL;
+ *npaths_primary = *npaths_altloc = 0;
+ *final_pairtype = UNPAIRED;
+ History_free(&gmap_history_3);
+ History_free(&gmap_history_5);
+ Compress_free(&query5_compress_fwd);
+ Compress_free(&query5_compress_rev);
+ Compress_free(&query3_compress_fwd);
+ Compress_free(&query3_compress_rev);
+ Stage1_free(&this_geneplus_5,querylength5);
+ Stage1_free(&this_geneplus_3,querylength3);
+
+ stage3pairarray = (Stage3pair_T *) NULL;
- } else {
- stage3pairarray =
- consolidate_paired_results(&(*npaths_primary),&(*npaths_altloc),&(*first_absmq),&(*second_absmq),&(*final_pairtype),
- &(*stage3array5),&(*nhits5_primary),&(*nhits5_altloc),&(*first_absmq5),&(*second_absmq5),
- &(*stage3array3),&(*nhits3_primary),&(*nhits3_altloc),&(*first_absmq3),&(*second_absmq3),
- hitpairs_geneplus,samechr_geneplus,conc_transloc_geneplus,terminals_geneplus,
- hits_geneplus_5,hits_geneplus_3,
- query5_compress_fwd,query5_compress_rev,query3_compress_fwd,query3_compress_rev,
- queryseq5,queryuc_ptr_5,quality_string_5,querylength5,
- queryseq3,queryuc_ptr_3,quality_string_3,querylength3,
- cutoff_level_5,cutoff_level_3,min_coverage_5,min_coverage_3,
- oligoindices_minor,pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR);
- History_free(&gmap_history_3);
- History_free(&gmap_history_5);
- Compress_free(&query5_compress_fwd);
- Compress_free(&query5_compress_rev);
- Compress_free(&query3_compress_fwd);
- Compress_free(&query3_compress_rev);
- Stage1_free(&this_geneplus_5,querylength5);
- Stage1_free(&this_geneplus_3,querylength3);
- return stage3pairarray;
- }
+ } else {
+ stage3pairarray =
+ consolidate_paired_results(&(*npaths_primary),&(*npaths_altloc),&(*first_absmq),&(*second_absmq),&(*final_pairtype),
+ &(*stage3array5),&(*nhits5_primary),&(*nhits5_altloc),&(*first_absmq5),&(*second_absmq5),
+ &(*stage3array3),&(*nhits3_primary),&(*nhits3_altloc),&(*first_absmq3),&(*second_absmq3),
+ hitpairs_geneplus,samechr_geneplus,conc_transloc_geneplus,terminals_geneplus,
+ hits_geneplus_5,hits_geneplus_3,
+ query5_compress_fwd,query5_compress_rev,query3_compress_fwd,query3_compress_rev,
+ queryseq5,queryuc_ptr_5,quality_string_5,querylength5,
+ queryseq3,queryuc_ptr_3,quality_string_3,querylength3,
+ cutoff_level_5,cutoff_level_3,min_coverage_5,min_coverage_3,
+ oligoindices_minor,pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR);
+ History_free(&gmap_history_3);
+ History_free(&gmap_history_5);
+ Compress_free(&query5_compress_fwd);
+ Compress_free(&query5_compress_rev);
+ Compress_free(&query3_compress_fwd);
+ Compress_free(&query3_compress_rev);
+ Stage1_free(&this_geneplus_5,querylength5);
+ Stage1_free(&this_geneplus_3,querylength3);
+ /* return stage3pairarray; */
+ }
} else if (found_score_geneminus < found_score_geneplus) {
paired_results_free(this_geneplus_5,this_geneplus_3,hitpairs_geneplus,samechr_geneplus,conc_transloc_geneplus,
terminals_geneplus,hits_geneplus_5,hits_geneplus_3,querylength5,querylength3);
if (abort_pairing_p_geneminus == true) {
- debug16(printf("abort_pairing_p_geneminus is true\n"));
- paired_results_free(this_geneminus_5,this_geneminus_3,hitpairs_geneminus,samechr_geneminus,conc_transloc_geneminus,
- terminals_geneminus,hits_geneminus_5,hits_geneminus_3,querylength5,querylength3);
-
- this_geneminus_5 = Stage1_new(querylength5);
- this_geneminus_3 = Stage1_new(querylength3);
- realign_separately(stage3array5,&(*nhits5_primary),&(*nhits5_altloc),&(*first_absmq5),&(*second_absmq5),
- stage3array3,&(*nhits3_primary),&(*nhits3_altloc),&(*first_absmq3),&(*second_absmq3),
- this_geneminus_5,this_geneminus_3,
- query5_compress_fwd,query5_compress_rev,query3_compress_fwd,query3_compress_rev,
- queryseq5,queryuc_ptr_5,queryrc5,quality_string_5,querylength5,query5_lastpos,
- queryseq3,queryuc_ptr_3,queryrc3,quality_string_3,querylength3,query3_lastpos,
- indexdb_fwd,indexdb_rev,indexdb_size_threshold,floors_array,
- user_maxlevel_5,user_maxlevel_3,min_coverage_5,min_coverage_3,
- indel_penalty_middle,indel_penalty_end,
- allow_end_indels_p,max_end_insertions,max_end_deletions,min_indel_end_matches,
- localsplicing_penalty,distantsplicing_penalty,min_shortend,
- oligoindices_minor,pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,
- keep_floors_p,/*genestrand*/+2);
+ debug16(printf("abort_pairing_p_geneminus is true\n"));
+ paired_results_free(this_geneminus_5,this_geneminus_3,hitpairs_geneminus,samechr_geneminus,conc_transloc_geneminus,
+ terminals_geneminus,hits_geneminus_5,hits_geneminus_3,querylength5,querylength3);
+
+ this_geneminus_5 = Stage1_new(querylength5);
+ this_geneminus_3 = Stage1_new(querylength3);
+ realign_separately(stage3array5,&(*nhits5_primary),&(*nhits5_altloc),&(*first_absmq5),&(*second_absmq5),
+ stage3array3,&(*nhits3_primary),&(*nhits3_altloc),&(*first_absmq3),&(*second_absmq3),
+ this_geneminus_5,this_geneminus_3,
+ query5_compress_fwd,query5_compress_rev,query3_compress_fwd,query3_compress_rev,
+ queryseq5,queryuc_ptr_5,queryrc5,quality_string_5,querylength5,query5_lastpos,
+ queryseq3,queryuc_ptr_3,queryrc3,quality_string_3,querylength3,query3_lastpos,
+ indexdb_fwd,indexdb_rev,indexdb_size_threshold,floors_array,
+ user_maxlevel_5,user_maxlevel_3,min_coverage_5,min_coverage_3,
+ indel_penalty_middle,indel_penalty_end,
+ allow_end_indels_p,max_end_insertions,max_end_deletions,min_indel_end_matches,
+ localsplicing_penalty,distantsplicing_penalty,min_shortend,
+ oligoindices_minor,pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,
+ keep_floors_p,/*genestrand*/+2);
- *npaths_primary = *npaths_altloc = 0;
- *final_pairtype = UNPAIRED;
- History_free(&gmap_history_3);
- History_free(&gmap_history_5);
- Compress_free(&query5_compress_fwd);
- Compress_free(&query5_compress_rev);
- Compress_free(&query3_compress_fwd);
- Compress_free(&query3_compress_rev);
- Stage1_free(&this_geneminus_5,querylength5);
- Stage1_free(&this_geneminus_3,querylength3);
- return (Stage3pair_T *) NULL;
+ *npaths_primary = *npaths_altloc = 0;
+ *final_pairtype = UNPAIRED;
+ History_free(&gmap_history_3);
+ History_free(&gmap_history_5);
+ Compress_free(&query5_compress_fwd);
+ Compress_free(&query5_compress_rev);
+ Compress_free(&query3_compress_fwd);
+ Compress_free(&query3_compress_rev);
+ Stage1_free(&this_geneminus_5,querylength5);
+ Stage1_free(&this_geneminus_3,querylength3);
+
+ stage3pairarray = (Stage3pair_T *) NULL;
- } else {
- stage3pairarray =
- consolidate_paired_results(&(*npaths_primary),&(*npaths_altloc),&(*first_absmq),&(*second_absmq),&(*final_pairtype),
- &(*stage3array5),&(*nhits5_primary),&(*nhits5_altloc),&(*first_absmq5),&(*second_absmq5),
- &(*stage3array3),&(*nhits3_primary),&(*nhits3_altloc),&(*first_absmq3),&(*second_absmq3),
- hitpairs_geneminus,samechr_geneminus,conc_transloc_geneminus,terminals_geneminus,
- hits_geneminus_5,hits_geneminus_3,
- query5_compress_fwd,query5_compress_rev,query3_compress_fwd,query3_compress_rev,
- queryseq5,queryuc_ptr_5,quality_string_5,querylength5,
- queryseq3,queryuc_ptr_3,quality_string_3,querylength3,
- cutoff_level_5,cutoff_level_3,min_coverage_5,min_coverage_3,
- oligoindices_minor,pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR);
- History_free(&gmap_history_3);
- History_free(&gmap_history_5);
- Compress_free(&query5_compress_fwd);
- Compress_free(&query5_compress_rev);
- Compress_free(&query3_compress_fwd);
- Compress_free(&query3_compress_rev);
- Stage1_free(&this_geneminus_5,querylength5);
- Stage1_free(&this_geneminus_3,querylength3);
- return stage3pairarray;
- }
+ } else {
+ stage3pairarray =
+ consolidate_paired_results(&(*npaths_primary),&(*npaths_altloc),&(*first_absmq),&(*second_absmq),&(*final_pairtype),
+ &(*stage3array5),&(*nhits5_primary),&(*nhits5_altloc),&(*first_absmq5),&(*second_absmq5),
+ &(*stage3array3),&(*nhits3_primary),&(*nhits3_altloc),&(*first_absmq3),&(*second_absmq3),
+ hitpairs_geneminus,samechr_geneminus,conc_transloc_geneminus,terminals_geneminus,
+ hits_geneminus_5,hits_geneminus_3,
+ query5_compress_fwd,query5_compress_rev,query3_compress_fwd,query3_compress_rev,
+ queryseq5,queryuc_ptr_5,quality_string_5,querylength5,
+ queryseq3,queryuc_ptr_3,quality_string_3,querylength3,
+ cutoff_level_5,cutoff_level_3,min_coverage_5,min_coverage_3,
+ oligoindices_minor,pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR);
+ History_free(&gmap_history_3);
+ History_free(&gmap_history_5);
+ Compress_free(&query5_compress_fwd);
+ Compress_free(&query5_compress_rev);
+ Compress_free(&query3_compress_fwd);
+ Compress_free(&query3_compress_rev);
+ Stage1_free(&this_geneminus_5,querylength5);
+ Stage1_free(&this_geneminus_3,querylength3);
+ /* return stage3pairarray; */
+ }
} else {
hitpairs = List_append(hitpairs_geneplus,hitpairs_geneminus);
@@ -23601,8 +22817,26 @@ paired_read_tolerant_nonstranded (int *npaths_primary, int *npaths_altloc, int *
Stage1_free(&this_geneminus_3,querylength3);
Stage1_free(&this_geneplus_5,querylength5);
Stage1_free(&this_geneplus_3,querylength3);
- return stage3pairarray;
+ /* return stage3pairarray */
+ }
+
+#ifdef HAVE_ALLOCA
+ if (querylength5 <= MAX_STACK_READLENGTH) {
+ FREEA(queryrc5);
+ } else {
+ FREE(queryrc5);
+ }
+ if (querylength3 <= MAX_STACK_READLENGTH) {
+ FREEA(queryrc3);
+ } else {
+ FREE(queryrc3);
}
+#else
+ FREE(queryrc5);
+ FREE(queryrc3);
+#endif
+
+ return stage3pairarray;
}
@@ -23678,7 +22912,8 @@ Stage1hr_setup (bool use_sarray_p_in, bool use_only_sarray_p_in, int index1part_
int extramaterial_end_in, int extramaterial_paired_in,
int gmap_mode, int trigger_score_for_gmap_in, int gmap_allowance_in,
int max_gmap_pairsearch_in, int max_gmap_segments_in,
- int max_gmap_improvement_in, int antistranded_penalty_in) {
+ int max_gmap_improvement_in, int antistranded_penalty_in,
+ int max_floors_readlength_in) {
bool gmapp = false;
use_sarray_p = use_sarray_p_in;
@@ -23819,5 +23054,7 @@ Stage1hr_setup (bool use_sarray_p_in, bool use_only_sarray_p_in, int index1part_
snpp = false;
}
+ max_floors_readlength = max_floors_readlength_in;
+
return;
}
diff --git a/src/stage1hr.h b/src/stage1hr.h
index 72a45d4..144490c 100644
--- a/src/stage1hr.h
+++ b/src/stage1hr.h
@@ -1,4 +1,4 @@
-/* $Id: stage1hr.h 186091 2016-03-17 22:23:16Z twu $ */
+/* $Id: stage1hr.h 196434 2016-08-16 20:21:03Z twu $ */
#ifndef STAGE1HR_INCLUDED
#define STAGE1HR_INCLUDED
@@ -107,7 +107,8 @@ Stage1hr_setup (bool use_sarray_p_in, bool use_only_sarray_p_in, int index1part_
int extramaterial_end_in, int extramaterial_paired_in,
int gmap_mode, int trigger_score_for_gmap_in, int gmap_allowance_in,
int max_gmap_pairsearch_in, int max_gmap_terminal_in,
- int max_gmap_improvement_in, int antistranded_penalty_in);
+ int max_gmap_improvement_in, int antistranded_penalty_in,
+ int max_floors_readlength_in);
#undef T
diff --git a/src/stage3.c b/src/stage3.c
index ef171c2..1edc528 100644
--- a/src/stage3.c
+++ b/src/stage3.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: stage3.c 195963 2016-08-08 16:38:05Z twu $";
+static char rcsid[] = "$Id: stage3.c 196409 2016-08-16 15:42:27Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -3623,6 +3623,11 @@ trim_end5_exon_indels (bool *trim5p, int ambig_end_length, List_T pairs,
debug3(Pair_dump_list(exon,true));
+ if (exon == NULL) {
+ *trim5p = false;
+ return pairs;
+ }
+
max_nmatches = max_nmismatches = 0;
nmatches = nmismatches = 0;
max_score = score = 0;
@@ -3911,6 +3916,11 @@ trim_end3_exon_indels (bool *trim3p, int ambig_end_length, List_T path,
debug3(Pair_dump_list(exon,true));
+ if (exon == NULL) {
+ *trim3p = false;
+ return path;
+ }
+
max_nmatches = max_nmismatches = 0;
nmatches = nmismatches = 0;
max_score = score = 0;
@@ -12404,7 +12414,7 @@ Stage3_compute (int *cdna_direction, int *sensedir, List_T *finalpairs1, int *np
int sense_try, int sense_filter,
Oligoindex_array_T oligoindices_minor, Diagpool_T diagpool, Cellpool_T cellpool) {
struct Pair_T *pairarray1;
- List_T pairs_fwd_copy, pairs_rev_copy, p;
+ List_T p;
Chrpos_T *last_genomedp5_fwd = NULL, *last_genomedp3_fwd = NULL, *last_genomedp5_rev = NULL, *last_genomedp3_rev = NULL;
List_T pairs_pretrim, pairs_fwd, pairs_rev, best_pairs, temp_pairs, path_fwd, path_rev, best_path, temp_path;
List_T copy;
@@ -12420,6 +12430,11 @@ Stage3_compute (int *cdna_direction, int *sensedir, List_T *finalpairs1, int *np
int fwd_ambig_end_length_5 = 0, fwd_ambig_end_length_3 = 0, rev_ambig_end_length_5 = 0, rev_ambig_end_length_3 = 0, temp_ambig_end_length;
Splicetype_T fwd_ambig_splicetype_5, fwd_ambig_splicetype_3, rev_ambig_splicetype_5, rev_ambig_splicetype_3, temp_ambig_splicetype;
double fwd_ambig_prob_5, fwd_ambig_prob_3, rev_ambig_prob_5, rev_ambig_prob_3, temp_ambig_prob;
+#ifdef GSNAP
+ List_T pairs_fwd_copy, pairs_rev_copy;
+#endif
+
+
#ifdef COMPLEX_DIRECTION
int indel_alignment_score_fwd, indel_alignment_score_rev;
diff --git a/src/stage3hr.c b/src/stage3hr.c
index d5fba8b..52b02cc 100644
--- a/src/stage3hr.c
+++ b/src/stage3hr.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: stage3hr.c 195760 2016-08-04 00:12:04Z twu $";
+static char rcsid[] = "$Id: stage3hr.c 196429 2016-08-16 20:09:56Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -1782,7 +1782,17 @@ Stage3end_free (T *old) {
Substring_T substring;
Junction_T junction;
- debug0(printf("Freeing Stage3end %p of type %s\n",*old,hittype_string((*old)->hittype)));
+#ifdef DEBUG0
+ printf("Freeing Stage3end %p of type %s",*old,hittype_string((*old)->hittype));
+ if ((*old)->hittype == SUBSTRINGS) {
+ if (Substring_list_ambiguous_p((*old)->substrings_1toN) == true) {
+ printf(" ambiguous");
+ } else {
+ printf(" not ambiguous");
+ }
+ }
+ printf("\n");
+#endif
#if 0
FREE_OUT((*old)->ambcoords_donor);
@@ -7863,13 +7873,12 @@ Stage3end_new_splice (int *found_score, int nmismatches_donor, int nmismatches_a
new->genomicstart = Substring_genomicstart(acceptor);
new->genomicend = Substring_genomicend(acceptor);
- donor = Substring_new_ambig(/*querystart*/0,/*queryend*/Substring_querystart(acceptor),
- /*splice_pos*/Substring_querystart(acceptor),querylength,
- new->chrnum,new->chroffset,new->chrhigh,new->chrlength,
- new->plusp,new->genestrand,
- ambcoords_donor,amb_knowni_donor,amb_nmismatches_donor,amb_probs_donor,
- /*amb_common_prob*/acceptor_prob,/*amb_donor_common_p*/false,
- /*substring1p*/true);
+ donor = Substring_new_ambig_D(/*querystart*/0,/*queryend*/Substring_querystart(acceptor),
+ /*splice_pos*/Substring_querystart(acceptor),querylength,
+ new->chrnum,new->chroffset,new->chrhigh,new->chrlength,
+ new->plusp,new->genestrand,
+ ambcoords_donor,amb_knowni_donor,amb_nmismatches_donor,amb_probs_donor,
+ /*amb_common_prob*/acceptor_prob,/*substring1p*/true);
debug0(printf("Making sense ambiguous donor at %d..%d with %d matches\n",
0,Substring_querystart(acceptor),Substring_nmatches(donor)));
donor_prob = Doublelist_max(amb_probs_donor);
@@ -7878,13 +7887,12 @@ Stage3end_new_splice (int *found_score, int nmismatches_donor, int nmismatches_a
new->genomicstart = Substring_genomicstart(donor);
new->genomicend = Substring_genomicend(donor);
- acceptor = Substring_new_ambig(/*querystart*/Substring_queryend(donor),/*queryend*/querylength,
- /*splice_pos*/Substring_queryend(donor),querylength,
- new->chrnum,new->chroffset,new->chrhigh,new->chrlength,
- new->plusp,new->genestrand,
- ambcoords_acceptor,amb_knowni_acceptor,amb_nmismatches_acceptor,amb_probs_acceptor,
- /*amb_common_prob*/donor_prob,/*amb_donor_common_p*/true,
- /*substring1p*/false);
+ acceptor = Substring_new_ambig_A(/*querystart*/Substring_queryend(donor),/*queryend*/querylength,
+ /*splice_pos*/Substring_queryend(donor),querylength,
+ new->chrnum,new->chroffset,new->chrhigh,new->chrlength,
+ new->plusp,new->genestrand,
+ ambcoords_acceptor,amb_knowni_acceptor,amb_nmismatches_acceptor,amb_probs_acceptor,
+ /*amb_common_prob*/donor_prob,/*substring1p*/false);
debug0(printf("Making sense ambiguous donor at %d..%d with %d matches\n",
Substring_queryend(donor),querylength,Substring_nmatches(acceptor)));
acceptor_prob = Doublelist_max(amb_probs_acceptor);
@@ -7900,13 +7908,12 @@ Stage3end_new_splice (int *found_score, int nmismatches_donor, int nmismatches_a
new->genomicstart = Substring_genomicstart(acceptor);
new->genomicend = Substring_genomicend(acceptor);
- donor = Substring_new_ambig(/*querystart*/Substring_queryend(acceptor),/*queryend*/querylength,
- /*splice_pos*/Substring_queryend(acceptor),querylength,
- new->chrnum,new->chroffset,new->chrhigh,new->chrlength,
- new->plusp,new->genestrand,
- ambcoords_donor,amb_knowni_donor,amb_nmismatches_donor,amb_probs_donor,
- /*amb_common_prob*/acceptor_prob,/*amb_donor_common_p*/false,
- /*substring1p*/false);
+ donor = Substring_new_ambig_D(/*querystart*/Substring_queryend(acceptor),/*queryend*/querylength,
+ /*splice_pos*/Substring_queryend(acceptor),querylength,
+ new->chrnum,new->chroffset,new->chrhigh,new->chrlength,
+ new->plusp,new->genestrand,
+ ambcoords_donor,amb_knowni_donor,amb_nmismatches_donor,amb_probs_donor,
+ /*amb_common_prob*/acceptor_prob,/*substring1p*/false);
debug0(printf("Making antisense ambiguous donor at %d..%d with %d matches\n",
Substring_queryend(acceptor),querylength,Substring_nmatches(donor)));
donor_prob = Doublelist_max(amb_probs_donor);
@@ -7915,13 +7922,12 @@ Stage3end_new_splice (int *found_score, int nmismatches_donor, int nmismatches_a
new->genomicstart = Substring_genomicstart(donor);
new->genomicend = Substring_genomicend(donor);
- acceptor = Substring_new_ambig(/*querystart*/0,/*queryend*/Substring_querystart(donor),
- /*splice_pos*/Substring_querystart(donor),querylength,
- new->chrnum,new->chroffset,new->chrhigh,new->chrlength,
- new->plusp,new->genestrand,
- ambcoords_acceptor,amb_knowni_acceptor,amb_nmismatches_acceptor,amb_probs_acceptor,
- /*amb_common_prob*/donor_prob,/*amb_donor_common_p*/true,
- /*substring1p*/true);
+ acceptor = Substring_new_ambig_A(/*querystart*/0,/*queryend*/Substring_querystart(donor),
+ /*splice_pos*/Substring_querystart(donor),querylength,
+ new->chrnum,new->chroffset,new->chrhigh,new->chrlength,
+ new->plusp,new->genestrand,
+ ambcoords_acceptor,amb_knowni_acceptor,amb_nmismatches_acceptor,amb_probs_acceptor,
+ /*amb_common_prob*/donor_prob,/*substring1p*/true);
debug0(printf("Making antisense ambiguous acceptor at %d..%d with %d matches\n",
0,Substring_querystart(donor),Substring_nmatches(acceptor)));
acceptor_prob = Doublelist_max(amb_probs_acceptor);
@@ -7991,7 +7997,6 @@ Stage3end_new_splice (int *found_score, int nmismatches_donor, int nmismatches_a
new->substrings_Nto1 = List_reverse(List_copy(new->substrings_LtoH));
assert(Substring_querystart(List_head(new->substrings_1toN)) < Substring_querystart(List_head(new->substrings_Nto1)));
-
if (first_read_p == true) {
substring_for_concordance = (Substring_T) List_head(new->substrings_Nto1);
substring_other = (Substring_T) List_head(new->substrings_1toN);
@@ -8154,9 +8159,9 @@ Stage3end_new_splice (int *found_score, int nmismatches_donor, int nmismatches_a
debug0(printf("Returning new splice %p at genomic %u..%u, donor %p (%u => %u), acceptor %p (%u => %u), score %d\n",
new,new->genomicstart - new->chroffset,new->genomicend - new->chroffset,donor,
donor == NULL ? 0 : Substring_left_genomicseg(donor),
- donor == NULL ? 0 : Substring_splicecoord(donor),
+ donor == NULL ? 0 : Substring_splicecoord_D(donor),
acceptor,acceptor == NULL ? 0 : Substring_left_genomicseg(acceptor),
- acceptor == NULL ? 0 : Substring_splicecoord(acceptor),new->score));
+ acceptor == NULL ? 0 : Substring_splicecoord_A(acceptor),new->score));
debug0(printf("sensedir %d\n",new->sensedir));
return new;
}
@@ -8238,18 +8243,18 @@ Stage3end_new_shortexon (int *found_score, Substring_T donor, Substring_T accept
/* Compute distances */
if (donor == NULL) {
new->shortexonA_distance = 0;
- } else if (Substring_splicecoord_A(shortexon) > Substring_splicecoord(donor)) {
- new->shortexonA_distance = Substring_splicecoord_A(shortexon) - Substring_splicecoord(donor);
+ } else if (Substring_splicecoord_A(shortexon) > Substring_splicecoord_D(donor)) {
+ new->shortexonA_distance = Substring_splicecoord_A(shortexon) - Substring_splicecoord_D(donor);
} else {
- new->shortexonA_distance = Substring_splicecoord(donor) - Substring_splicecoord_A(shortexon);
+ new->shortexonA_distance = Substring_splicecoord_D(donor) - Substring_splicecoord_A(shortexon);
}
if (acceptor == NULL) {
new->shortexonD_distance = 0;
- } else if (Substring_splicecoord_D(shortexon) > Substring_splicecoord(acceptor)) {
- new->shortexonD_distance = Substring_splicecoord_D(shortexon) - Substring_splicecoord(acceptor);
+ } else if (Substring_splicecoord_D(shortexon) > Substring_splicecoord_A(acceptor)) {
+ new->shortexonD_distance = Substring_splicecoord_D(shortexon) - Substring_splicecoord_A(acceptor);
} else {
- new->shortexonD_distance = Substring_splicecoord(acceptor) - Substring_splicecoord_D(shortexon);
+ new->shortexonD_distance = Substring_splicecoord_A(acceptor) - Substring_splicecoord_D(shortexon);
}
new->distance = new->shortexonA_distance + new->shortexonD_distance;
@@ -8269,82 +8274,78 @@ Stage3end_new_shortexon (int *found_score, Substring_T donor, Substring_T accept
if (sensedir == SENSE_FORWARD) {
substring0 = copy_donor_p ? Substring_copy(donor) : donor;
if (donor == NULL) {
- donor = substring0 = Substring_new_ambig(/*querystart*/0,/*queryend*/Substring_querystart(shortexon),
- /*splice_pos*/Substring_querystart(shortexon),querylength,
- new->chrnum,new->chroffset,new->chrhigh,new->chrlength,
- new->plusp,new->genestrand,
- ambcoords_donor,amb_knowni_donor,amb_nmismatches_donor,amb_probs_donor,
- /*amb_common_prob*/acceptor_prob,/*amb_donor_common_p*/false,
- /*substring1p*/true);
+ donor = substring0 = Substring_new_ambig_D(/*querystart*/0,/*queryend*/Substring_querystart(shortexon),
+ /*splice_pos*/Substring_querystart(shortexon),querylength,
+ new->chrnum,new->chroffset,new->chrhigh,new->chrlength,
+ new->plusp,new->genestrand,
+ ambcoords_donor,amb_knowni_donor,amb_nmismatches_donor,amb_probs_donor,
+ /*amb_common_prob*/acceptor_prob,/*substring1p*/true);
/* new->start_amb_prob = Doublelist_max(amb_probs_donor); */
/* new->start_amb_length = amb_length_donor; */
junction0 = Junction_new_splice(/*distance*/0,sensedir,Doublelist_max(amb_probs_donor),shortexonA_prob);
- } else if (Substring_splicecoord_A(shortexon) > Substring_splicecoord(donor)) {
- distance = Substring_splicecoord_A(shortexon) - Substring_splicecoord(donor);
+ } else if (Substring_splicecoord_A(shortexon) > Substring_splicecoord_D(donor)) {
+ distance = Substring_splicecoord_A(shortexon) - Substring_splicecoord_D(donor);
junction0 = Junction_new_splice(distance,sensedir,donor_prob,shortexonA_prob);
} else {
- distance = Substring_splicecoord(donor) - Substring_splicecoord_A(shortexon);
+ distance = Substring_splicecoord_D(donor) - Substring_splicecoord_A(shortexon);
junction0 = Junction_new_splice(distance,sensedir,donor_prob,shortexonA_prob);
}
substring2 = copy_acceptor_p ? Substring_copy(acceptor) : acceptor;
if (acceptor == NULL) {
- acceptor = substring2 = Substring_new_ambig(/*querystart*/Substring_queryend(shortexon),/*queryend*/querylength,
- /*splice_pos*/Substring_queryend(shortexon),querylength,
- new->chrnum,new->chroffset,new->chrhigh,new->chrlength,
- new->plusp,new->genestrand,
- ambcoords_acceptor,amb_knowni_acceptor,amb_nmismatches_acceptor,amb_probs_acceptor,
- /*amb_common_prob*/donor_prob,/*amb_donor_common_p*/true,
- /*substring1p*/false);
+ acceptor = substring2 = Substring_new_ambig_A(/*querystart*/Substring_queryend(shortexon),/*queryend*/querylength,
+ /*splice_pos*/Substring_queryend(shortexon),querylength,
+ new->chrnum,new->chroffset,new->chrhigh,new->chrlength,
+ new->plusp,new->genestrand,
+ ambcoords_acceptor,amb_knowni_acceptor,amb_nmismatches_acceptor,amb_probs_acceptor,
+ /*amb_common_prob*/donor_prob,/*substring1p*/false);
/* new->end_amb_prob = Doublelist_max(amb_probs_acceptor); */
/* new->end_amb_length = amb_length_acceptor; */
junction2 = Junction_new_splice(/*distance*/0,sensedir,shortexonD_prob,Doublelist_max(amb_probs_acceptor));
- } else if (Substring_splicecoord_D(shortexon) > Substring_splicecoord(acceptor)) {
- distance = Substring_splicecoord_D(shortexon) - Substring_splicecoord(acceptor);
+ } else if (Substring_splicecoord_D(shortexon) > Substring_splicecoord_A(acceptor)) {
+ distance = Substring_splicecoord_D(shortexon) - Substring_splicecoord_A(acceptor);
junction2 = Junction_new_splice(distance,sensedir,shortexonD_prob,acceptor_prob);
} else {
- distance = Substring_splicecoord(acceptor) - Substring_splicecoord_D(shortexon);
+ distance = Substring_splicecoord_A(acceptor) - Substring_splicecoord_D(shortexon);
junction2 = Junction_new_splice(distance,sensedir,shortexonD_prob,acceptor_prob);
}
} else if (sensedir == SENSE_ANTI) {
substring0 = copy_acceptor_p ? Substring_copy(acceptor) : acceptor;
if (acceptor == NULL) {
- acceptor = substring0 = Substring_new_ambig(/*querystart*/0,/*queryend*/Substring_querystart(shortexon),
- /*splice_pos*/Substring_querystart(shortexon),querylength,
- new->chrnum,new->chroffset,new->chrhigh,new->chrlength,
- new->plusp,new->genestrand,
- ambcoords_acceptor,amb_knowni_acceptor,amb_nmismatches_acceptor,amb_probs_acceptor,
- /*amb_common_prob*/donor_prob,/*amb_donor_common_p*/true,
- /*substring1p*/true);
+ acceptor = substring0 = Substring_new_ambig_A(/*querystart*/0,/*queryend*/Substring_querystart(shortexon),
+ /*splice_pos*/Substring_querystart(shortexon),querylength,
+ new->chrnum,new->chroffset,new->chrhigh,new->chrlength,
+ new->plusp,new->genestrand,
+ ambcoords_acceptor,amb_knowni_acceptor,amb_nmismatches_acceptor,amb_probs_acceptor,
+ /*amb_common_prob*/donor_prob,/*substring1p*/true);
/* new->start_amb_prob = Doublelist_max(amb_probs_acceptor); */
/* new->start_amb_length = amb_length_acceptor; */
junction0 = Junction_new_splice(/*distance*/0,sensedir,shortexonD_prob,Doublelist_max(amb_probs_acceptor));
- } else if (Substring_splicecoord_D(shortexon) > Substring_splicecoord(acceptor)) {
- distance = Substring_splicecoord_D(shortexon) - Substring_splicecoord(acceptor);
+ } else if (Substring_splicecoord_D(shortexon) > Substring_splicecoord_A(acceptor)) {
+ distance = Substring_splicecoord_D(shortexon) - Substring_splicecoord_A(acceptor);
junction0 = Junction_new_splice(distance,sensedir,shortexonD_prob,acceptor_prob);
} else {
- distance = Substring_splicecoord(acceptor) - Substring_splicecoord_D(shortexon);
+ distance = Substring_splicecoord_A(acceptor) - Substring_splicecoord_D(shortexon);
junction0 = Junction_new_splice(distance,sensedir,shortexonD_prob,acceptor_prob);
}
substring2 = copy_donor_p ? Substring_copy(donor) : donor;
if (donor == NULL) {
- donor = substring2 = Substring_new_ambig(/*querystart*/Substring_queryend(shortexon),/*queryend*/querylength,
- /*splice_pos*/Substring_queryend(shortexon),querylength,
- new->chrnum,new->chroffset,new->chrhigh,new->chrlength,
- new->plusp,new->genestrand,
- ambcoords_donor,amb_knowni_donor,amb_nmismatches_donor,amb_probs_donor,
- /*amb_common_prob*/acceptor_prob,/*amb_donor_common_p*/false,
- /*substring1p*/false);
+ donor = substring2 = Substring_new_ambig_D(/*querystart*/Substring_queryend(shortexon),/*queryend*/querylength,
+ /*splice_pos*/Substring_queryend(shortexon),querylength,
+ new->chrnum,new->chroffset,new->chrhigh,new->chrlength,
+ new->plusp,new->genestrand,
+ ambcoords_donor,amb_knowni_donor,amb_nmismatches_donor,amb_probs_donor,
+ /*amb_common_prob*/acceptor_prob,/*substring1p*/false);
/* new->end_amb_prob = Doublelist_max(amb_probs_donor); */
/* new->end_amb_length = amb_length_donor; */
junction2 = Junction_new_splice(/*distance*/0,sensedir,Doublelist_max(amb_probs_donor),shortexonA_prob);
- } else if (Substring_splicecoord_A(shortexon) > Substring_splicecoord(donor)) {
- distance = Substring_splicecoord_A(shortexon) - Substring_splicecoord(donor);
+ } else if (Substring_splicecoord_A(shortexon) > Substring_splicecoord_D(donor)) {
+ distance = Substring_splicecoord_A(shortexon) - Substring_splicecoord_D(donor);
junction2 = Junction_new_splice(distance,sensedir,donor_prob,shortexonA_prob);
} else {
- distance = Substring_splicecoord(donor) - Substring_splicecoord_A(shortexon);
+ distance = Substring_splicecoord_D(donor) - Substring_splicecoord_A(shortexon);
junction2 = Junction_new_splice(distance,sensedir,donor_prob,shortexonA_prob);
}
@@ -10154,6 +10155,9 @@ Stage3end_optimal_score_aux (bool *eliminatedp, List_T hitlist,
if (hit->hittype == TERMINAL) {
/* Don't allow terminals to set trims */
+ } else if (hit->hittype == INSERTION || hit->hittype == DELETION) {
+ /* Don't allow indels to set trims, since they artificially align at the end */
+
#if 0
} else if ((hit->hittype == INSERTION || hit->hittype == DELETION) &&
(hit->indel_pos < 15 || hit->indel_pos > hit->querylength - 15)) {
@@ -11390,8 +11394,7 @@ hit_goodness_cmp (bool *equalp, Stage3end_T hit,
}
#endif
- /* Favors definitive splices over ambiguous splices. So need to
- make sure we don't make definitive splices unnecessarily */
+ /* Favors ambiguous splices over definitive splices */
if (hit->nsegments > best_hit->nsegments) {
if (hit->nmatches_posttrim > best_hit->nmatches_posttrim) {
/* More segments and strictly more matches */
@@ -11433,16 +11436,14 @@ hit_goodness_cmp (bool *equalp, Stage3end_T hit,
debug7(printf(" => %d wins by hittype\n",k));
return +1;
-#if 0
- } else if (hit->start_amb_length + hit->end_amb_length == 0 &&
- best_hit->start_amb_length + best_hit->end_amb_length > 0) {
+ } else if (start_amb_length(hit) + end_amb_length(hit) > 0 &&
+ start_amb_length(best_hit) + end_amb_length(best_hit) == 0) {
debug7(printf(" => %d loses by ambiguity\n",k));
return -1;
- } else if (hit->start_amb_length + hit->end_amb_length > 0 &&
- best_hit->start_amb_length + best_hit->end_amb_length == 0) {
+ } else if (start_amb_length(hit) + end_amb_length(hit) == 0 &&
+ start_amb_length(best_hit) + end_amb_length(best_hit) > 0) {
debug7(printf(" => %d wins by ambiguity\n",k));
return +1;
-#endif
} else if (hit->nindels > best_hit->nindels) {
debug7(printf(" => %d loses by nindels\n",k));
@@ -15507,9 +15508,7 @@ hitpair_goodness_cmp (bool *equalp, Stage3pair_T hitpair,
#endif
- /* Favors definitive splices over ambiguous splices. So need to
- make sure we don't make definitive splices unnecessarily */
-
+ /* Favors ambiguous splices over definitive splices */
if (hitpair->hit5->nsegments + hitpair->hit3->nsegments > best_hitpair->hit5->nsegments + best_hitpair->hit3->nsegments) {
if (hitpair->nmatches_posttrim > best_hitpair->nmatches_posttrim) {
/* More segments and strictly more matches */
@@ -15598,21 +15597,19 @@ hitpair_goodness_cmp (bool *equalp, Stage3pair_T hitpair,
return +1;
#endif
-#if 0
- } else if (hitpair->hit5->start_amb_length + hitpair->hit5->end_amb_length +
- hitpair->hit3->start_amb_length + hitpair->hit3->end_amb_length > 0 &&
- best_hitpair->hit5->start_amb_length + best_hitpair->hit5->end_amb_length +
- best_hitpair->hit3->start_amb_length + best_hitpair->hit3->end_amb_length == 0) {
+ } else if (start_amb_length(hitpair->hit5) + end_amb_length(hitpair->hit5) +
+ start_amb_length(hitpair->hit3) + end_amb_length(hitpair->hit3) == 0 &&
+ start_amb_length(best_hitpair->hit5) + end_amb_length(best_hitpair->hit5) +
+ start_amb_length(best_hitpair->hit3) + end_amb_length(best_hitpair->hit3) > 0) {
debug8(printf(" => loses by ambiguity\n"));
return -1;
- } else if (hitpair->hit5->start_amb_length + hitpair->hit5->end_amb_length +
- hitpair->hit3->start_amb_length + hitpair->hit3->end_amb_length == 0 &&
- best_hitpair->hit5->start_amb_length + best_hitpair->hit5->end_amb_length +
- best_hitpair->hit3->start_amb_length + best_hitpair->hit3->end_amb_length > 0) {
+ } else if (start_amb_length(hitpair->hit5) + end_amb_length(hitpair->hit5) +
+ start_amb_length(hitpair->hit3) + end_amb_length(hitpair->hit3) > 0 &&
+ start_amb_length(best_hitpair->hit5) + end_amb_length(best_hitpair->hit5) +
+ start_amb_length(best_hitpair->hit3) + end_amb_length(best_hitpair->hit3) == 0) {
debug8(printf(" => wins by ambiguity\n"));
return +1;
-#endif
#if 0
} else if (hitpair->absdifflength < best_hitpair->absdifflength) {
@@ -16532,6 +16529,9 @@ Stage3pair_optimal_score_aux (bool *eliminatedp, List_T hitpairlist,
if (hit5->hittype == TERMINAL) {
/* Don't allow terminals to set trims */
+ } else if (hit5->hittype == INSERTION || hit5->hittype == DELETION) {
+ /* Don't allow indels to set trims, since they artificially align at the end */
+
#if 0
} else if ((hit5->hittype == INSERTION || hit5->hittype == DELETION) &&
(hit5->indel_pos < 15 || hit5->indel_pos > hit5->querylength - 15)) {
@@ -16554,6 +16554,9 @@ Stage3pair_optimal_score_aux (bool *eliminatedp, List_T hitpairlist,
if (hit3->hittype == TERMINAL) {
/* Don't allow terminals to set trims */
+ } else if (hit3->hittype == INSERTION || hit3->hittype == DELETION) {
+ /* Don't allow indels to set trims, since they artificially align at the end */
+
#if 0
} else if ((hit3->hittype == INSERTION || hit3->hittype == DELETION) &&
(hit3->indel_pos < 15 || hit3->indel_pos > hit3->querylength - 15)) {
@@ -17358,7 +17361,7 @@ pair_up_concordant_aux (bool *abort_pairing_p, int *found_score, int *nconcordan
(*nconcordant)++;
}
- if (0 && *nconcordant > maxpairedpaths) {
+ if (*nconcordant > maxpairedpaths) {
debug(printf(" -- %d concordant paths exceeds %d",*nconcordant,maxpairedpaths));
*abort_pairing_p = true;
}
@@ -17478,7 +17481,7 @@ pair_up_concordant_aux (bool *abort_pairing_p, int *found_score, int *nconcordan
(*nconcordant)++;
}
- if (0 && *nconcordant > maxpairedpaths) {
+ if (*nconcordant > maxpairedpaths) {
debug(printf(" -- %d concordant paths exceeds %d",*nconcordant,maxpairedpaths));
*abort_pairing_p = true;
}
diff --git a/src/stage3hr.h b/src/stage3hr.h
index 15cbc60..488e865 100644
--- a/src/stage3hr.h
+++ b/src/stage3hr.h
@@ -1,4 +1,4 @@
-/* $Id: stage3hr.h 195760 2016-08-04 00:12:04Z twu $ */
+/* $Id: stage3hr.h 196273 2016-08-12 15:15:06Z twu $ */
#ifndef STAGE3HR_INCLUDED
#define STAGE3HR_INCLUDED
diff --git a/src/substring.c b/src/substring.c
index 5ec012f..c91c1b9 100644
--- a/src/substring.c
+++ b/src/substring.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: substring.c 195961 2016-08-08 16:36:34Z twu $";
+static char rcsid[] = "$Id: substring.c 196404 2016-08-16 14:47:49Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -31,6 +31,7 @@ static char rcsid[] = "$Id: substring.c 195961 2016-08-08 16:36:34Z twu $";
#define SCRAMBLE_TEXT "scramble"
#define END_SPLICESITE_SEARCH 10
+#define MIN_EXON_LENGTH 9
#define END_SPLICESITE_PROB_MATCH 0.90
#define END_SPLICESITE_PROB_MISMATCH 0.95
@@ -191,6 +192,20 @@ Endtype_string (Endtype_T endtype) {
return "";
}
+char *
+Trimaction_string (Trimaction_T trimaction) {
+ switch (trimaction) {
+ case NO_TRIM: return "NO_TRIM";
+ case PRE_TRIMMED: return "PRE_TRIMMED";
+ case COMPUTE_TRIM: return "COMPUTE_TRIM";
+ default:
+ fprintf(stderr,"Unexpected trimaction %d\n",trimaction);
+ abort();
+ }
+ return "";
+}
+
+
static char complCode[128] = COMPLEMENT_LC;
@@ -297,28 +312,27 @@ struct T {
/* for splices */
int chimera_sensedir;
- Univcoord_T splicecoord;
- int splicesites_knowni; /* Needed for intragenic_splice_p in stage1hr.c */
+ Univcoord_T splicecoord_D;
+ int splicesitesD_knowni; /* Needed for intragenic_splice_p in stage1hr.c */
- bool chimera_knownp; /* Used for computing Substring_nchimera_known */
- bool chimera_novelp;
- Univcoord_T chimera_modelpos;
- int chimera_pos;
- double chimera_prob;
+ bool siteD_knownp; /* Used for computing Substring_nchimera_known */
+ bool siteD_novelp;
+ int siteD_pos;
+ double siteD_prob;
/* for shortexon (always use *_1 for acceptor and *_2 for donor) */
/* for donor/acceptor: the ambiguous position */
- Univcoord_T splicecoord_2;
- int splicesites_knowni_2;
-
- bool chimera_knownp_2;
- bool chimera_novelp_2;
- Univcoord_T chimera_modelpos_2;
- int chimera_pos_2;
- double chimera_prob_2;
+ Univcoord_T splicecoord_A;
+ int splicesitesA_knowni;
+ bool siteA_knownp;
+ bool siteA_novelp;
+ int siteA_pos;
double siteA_prob;
- double siteD_prob;
+
+ Univcoord_T splicecoord_N; /* For DNA fusions */
+ int siteN_pos;
+
bool ambiguous_p;
int nambcoords;
@@ -326,8 +340,7 @@ struct T {
int *amb_knowni;
int *amb_nmismatches;
double *amb_probs;
- double amb_common_prob;
- bool amb_donor_common_p;
+ Endtype_T amb_type; /* Ambiguous DONs or ACCs */
};
@@ -355,8 +368,9 @@ Substring_alias_circular (T this) {
this->alignend += chrlength;
this->alignstart_trim += chrlength;
this->alignend_trim += chrlength;
- this->chimera_modelpos += chrlength;
- this->chimera_modelpos_2 += chrlength;
+ this->splicecoord_D += chrlength;
+ this->splicecoord_A += chrlength;
+ this->splicecoord_N += chrlength;
}
return;
@@ -387,8 +401,9 @@ Substring_unalias_circular (T this) {
this->alignend -= chrlength;
this->alignstart_trim -= chrlength;
this->alignend_trim -= chrlength;
- this->chimera_modelpos -= chrlength;
- this->chimera_modelpos_2 -= chrlength;
+ this->splicecoord_D -= chrlength;
+ this->splicecoord_A -= chrlength;
+ this->splicecoord_N -= chrlength;
}
return;
@@ -629,7 +644,9 @@ trim_left_end (int *nmismatches_end, Compress_T query_compress, Univcoord_T left
int trim5, alignlength, pos, prevpos, i;
int nmismatches;
-#ifdef HAVE_ALLOCA
+#if defined(LONG_READLENGTHS)
+ int *mismatch_positions = (int *) MALLOC(querylength*sizeof(int));
+#elif defined(HAVE_ALLOCA)
int *mismatch_positions = (int *) ALLOCA(querylength*sizeof(int));
#else
int mismatch_positions[MAX_READLENGTH];
@@ -735,6 +752,13 @@ trim_left_end (int *nmismatches_end, Compress_T query_compress, Univcoord_T left
}
}
+#if defined(LONG_READLENGTHS)
+ FREE(mismatch_positions);
+#elif defined(HAVE_ALLOCA)
+ FREEA(mismatch_positions);
+#else
+ /* Hard-coded use of MAX_READLENGTH */
+#endif
debug8(printf("Trim left pos 0, score %d, trim5 %d, nmismatches_end %d\n",score,trim5,*nmismatches_end));
debug8(printf("\n"));
@@ -753,7 +777,9 @@ trim_right_end (int *nmismatches_end, Compress_T query_compress, Univcoord_T lef
int trim3, alignlength, pos, prevpos, i;
int nmismatches;
-#ifdef HAVE_ALLOCA
+#if defined(LONG_READLENGTHS)
+ int *mismatch_positions = (int *) MALLOC(querylength*sizeof(int));
+#elif defined(HAVE_ALLOCA)
int *mismatch_positions = (int *) ALLOCA(querylength*sizeof(int));
#else
int mismatch_positions[MAX_READLENGTH];
@@ -855,6 +881,13 @@ trim_right_end (int *nmismatches_end, Compress_T query_compress, Univcoord_T lef
}
}
+#if defined(LONG_READLENGTHS)
+ FREE(mismatch_positions);
+#elif defined(HAVE_ALLOCA)
+ FREEA(mismatch_positions);
+#else
+ /* Hard-coded use of MAX_READLENGTH */
+#endif
debug8(printf("Trim right pos %d, score %d, trim3 %d, nmismatches_end %d\n",queryend-1,score,trim3,*nmismatches_end));
debug8(printf("\n"));
@@ -1788,7 +1821,10 @@ embellish_genomic_sam (char *genomic_diff, char *query, int querystart, int quer
/* Modified from trim_novel_spliceends in stage3.c */
-void
+/* Note: If substring does not extend to ends of query, then region
+ beyond querystart and queryend might actually be matching, and not
+ mismatches. Could fix in the future. */
+static void
substring_trim_novel_spliceends (T substring1, T substringN, int *ambig_end_length_5, int *ambig_end_length_3,
Splicetype_T *ambig_splicetype_5, Splicetype_T *ambig_splicetype_3,
double *ambig_prob_5, double *ambig_prob_3, int *sensedir,
@@ -1808,25 +1844,34 @@ substring_trim_novel_spliceends (T substring1, T substringN, int *ambig_end_leng
int splice_sensedir_5, splice_sensedir_3, splice_sensedir_5_mm, splice_sensedir_3_mm;
- debug13(printf("\nEntered Substring_trim_novel_spliceends with sensedir %d\n",*sensedir));
+ debug13(printf("\nEntered substring_trim_novel_spliceends with sensedir %d\n",*sensedir));
*ambig_end_length_5 = 0;
*ambig_end_length_3 = 0;
+ *ambig_prob_5 = 0.0;
+ *ambig_prob_3 = 0.0;
/* start is distal, end is medial */
if (substringN == NULL) {
/* Skip 3' end*/
} else if (substringN->plusp == true) {
- start = substringN->genomicend;
middle = substringN->alignend_trim + 1;
- if ((end = middle - END_SPLICESITE_SEARCH) < substringN->alignstart_trim) {
- end = substringN->alignstart_trim;
+ if ((start = middle + END_SPLICESITE_SEARCH) > substringN->genomicend) {
+ start = substringN->genomicend;
+ }
+ if ((end = middle - END_SPLICESITE_SEARCH) < substringN->alignstart_trim + MIN_EXON_LENGTH) {
+ end = substringN->alignstart_trim + MIN_EXON_LENGTH;
}
+ debug13(printf("\n1 Set end points for 3' trim to be %u..%u..%u\n",start,middle,end));
+
} else {
- start = substringN->genomicend;
middle = substringN->alignend_trim - 1;
- if ((end = middle + END_SPLICESITE_SEARCH) > substringN->alignstart_trim) {
- end = substringN->alignstart_trim;
+ if ((start = middle - END_SPLICESITE_SEARCH) < substringN->genomicend) {
+ start = substringN->genomicend;
+ }
+ if ((end = middle + END_SPLICESITE_SEARCH) > substringN->alignstart_trim - MIN_EXON_LENGTH) {
+ end = substringN->alignstart_trim - MIN_EXON_LENGTH;
}
+ debug13(printf("\n2 Set end points for 3' trim to be %u..%u..%u\n",start,middle,end));
}
if (substringN == NULL) {
@@ -1839,7 +1884,7 @@ substring_trim_novel_spliceends (T substring1, T substringN, int *ambig_end_leng
middle_genomicpos = middle;
end_genomicpos = end;
- assert(start_genomicpos >= end_genomicpos);
+ /* assert(start_genomicpos >= end_genomicpos); */
genomicpos = start_genomicpos;
while (genomicpos >= middle_genomicpos) {
donor_prob = Maxent_hr_donor_prob(genomicpos,chroffset); /* Case 1 */
@@ -1868,7 +1913,7 @@ substring_trim_novel_spliceends (T substring1, T substringN, int *ambig_end_leng
middle_genomicpos = middle;
end_genomicpos = end;
- assert(start_genomicpos <= end_genomicpos);
+ /* assert(start_genomicpos <= end_genomicpos); */
genomicpos = start_genomicpos;
while (genomicpos <= middle_genomicpos) {
donor_prob = Maxent_hr_antidonor_prob(genomicpos,chroffset); /* Case 3 */
@@ -1899,7 +1944,7 @@ substring_trim_novel_spliceends (T substring1, T substringN, int *ambig_end_leng
middle_genomicpos = middle;
end_genomicpos = end;
- assert(start_genomicpos >= end_genomicpos);
+ /* assert(start_genomicpos >= end_genomicpos); */
genomicpos = start_genomicpos;
while (genomicpos >= middle_genomicpos) {
acceptor_prob = Maxent_hr_antiacceptor_prob(genomicpos,chroffset); /* Case 5 */
@@ -1928,7 +1973,7 @@ substring_trim_novel_spliceends (T substring1, T substringN, int *ambig_end_leng
middle_genomicpos = middle;
end_genomicpos = end;
- assert(start_genomicpos <= end_genomicpos);
+ /* assert(start_genomicpos <= end_genomicpos); */
genomicpos = start_genomicpos;
while (genomicpos <= middle_genomicpos) {
acceptor_prob = Maxent_hr_acceptor_prob(genomicpos,chroffset); /* Case 7 */
@@ -1957,7 +2002,7 @@ substring_trim_novel_spliceends (T substring1, T substringN, int *ambig_end_leng
middle_genomicpos = middle;
end_genomicpos = end;
- assert(start_genomicpos >= end_genomicpos);
+ /* assert(start_genomicpos >= end_genomicpos); */
genomicpos = start_genomicpos;
while (genomicpos >= middle_genomicpos) {
donor_prob = Maxent_hr_donor_prob(genomicpos,chroffset); /* Case 1 */
@@ -2018,7 +2063,7 @@ substring_trim_novel_spliceends (T substring1, T substringN, int *ambig_end_leng
middle_genomicpos = middle;
end_genomicpos = end;
- assert(start_genomicpos <= end_genomicpos);
+ /* assert(start_genomicpos <= end_genomicpos); */
genomicpos = start_genomicpos;
while (genomicpos <= middle_genomicpos) {
donor_prob = Maxent_hr_antidonor_prob(genomicpos,chroffset); /* Case 3 */
@@ -2084,24 +2129,26 @@ substring_trim_novel_spliceends (T substring1, T substringN, int *ambig_end_leng
Splicetype_string(splicetype3),splice_genomepos_3-chroffset,max_prob_3));
if (substringN->plusp) {
*ambig_end_length_3 = substringN->genomicend - splice_genomepos_3;
+ debug13(printf("Set ambig_end_length_3 to be %d = %u - %u\n",*ambig_end_length_3,substringN->genomicend,splice_genomepos_3));
} else {
*ambig_end_length_3 = splice_genomepos_3 - substringN->genomicend;
+ debug13(printf("Set ambig_end_length_3 to be %d = %u - %u\n",*ambig_end_length_3,splice_genomepos_3,substringN->genomicend));
}
*ambig_splicetype_3 = splicetype3;
*ambig_prob_3 = max_prob_3;
- debug13(printf("Set ambig_end_length_3 to be %d\n",*ambig_end_length_3));
} else if (max_prob_3_mm > END_SPLICESITE_PROB_MISMATCH) {
debug13(printf("Found good mismatch splice %s on 3' end at %u with probability %f\n",
Splicetype_string(splicetype3_mm),splice_genomepos_3_mm-chroffset,max_prob_3_mm));
if (substringN->plusp) {
*ambig_end_length_3 = substringN->genomicend - splice_genomepos_3_mm;
+ debug13(printf("Set ambig_end_length_3 to be %d = %u - %u\n",*ambig_end_length_3,substringN->genomicend,splice_genomepos_3_mm));
} else {
*ambig_end_length_3 = splice_genomepos_3_mm - substringN->genomicend;
+ debug13(printf("Set ambig_end_length_3 to be %d = %u - %u\n",*ambig_end_length_3,splice_genomepos_3_mm,substringN->genomicend));
}
*ambig_splicetype_3 = splicetype3_mm;
*ambig_prob_3 = max_prob_3_mm;
- debug13(printf("Set ambig_end_length_3 to be %d\n",*ambig_end_length_3));
}
}
@@ -2110,17 +2157,24 @@ substring_trim_novel_spliceends (T substring1, T substringN, int *ambig_end_leng
if (substring1 == NULL) {
/* Skip 5' end */
} else if (substring1->plusp == true) {
- start = substring1->genomicstart;
middle = substring1->alignstart_trim - 1;
- if ((end = middle + END_SPLICESITE_SEARCH) > substring1->alignend_trim) {
- end = substring1->alignend_trim;
+ if ((start = middle - END_SPLICESITE_SEARCH) < substring1->genomicstart) {
+ start = substring1->genomicstart;
+ }
+ if ((end = middle + END_SPLICESITE_SEARCH) > substring1->alignend_trim - MIN_EXON_LENGTH) {
+ end = substring1->alignend_trim - MIN_EXON_LENGTH;
}
+ debug13(printf("\n1 Set end points for 5' trim to be %u..%u..%u\n",start,middle,end));
+
} else {
- start = substring1->genomicstart;
middle = substring1->alignstart_trim + 1;
- if ((end = middle - END_SPLICESITE_SEARCH) < substring1->alignend_trim) {
- end = substring1->alignend_trim;
+ if ((start = middle + END_SPLICESITE_SEARCH) > substring1->genomicstart) {
+ start = substring1->genomicstart;
+ }
+ if ((end = middle - END_SPLICESITE_SEARCH) < substring1->alignend_trim + MIN_EXON_LENGTH) {
+ end = substring1->alignend_trim + MIN_EXON_LENGTH;
}
+ debug13(printf("\n2 Set end points for 5' trim to be %u..%u..%u\n",start,middle,end));
}
if (substring1 == NULL) {
@@ -2133,7 +2187,7 @@ substring_trim_novel_spliceends (T substring1, T substringN, int *ambig_end_leng
middle_genomicpos = middle;
end_genomicpos = end;
- assert(start_genomicpos <= end_genomicpos);
+ /* assert(start_genomicpos <= end_genomicpos); */
genomicpos = start_genomicpos;
while (genomicpos <= middle_genomicpos) {
acceptor_prob = Maxent_hr_acceptor_prob(genomicpos,chroffset); /* Case 2 */
@@ -2162,7 +2216,7 @@ substring_trim_novel_spliceends (T substring1, T substringN, int *ambig_end_leng
middle_genomicpos = middle;
end_genomicpos = end;
- assert(start_genomicpos >= end_genomicpos);
+ /* assert(start_genomicpos >= end_genomicpos); */
genomicpos = start_genomicpos;
while (genomicpos >= middle_genomicpos) {
acceptor_prob = Maxent_hr_antiacceptor_prob(genomicpos,chroffset); /* Case 4 */
@@ -2193,7 +2247,7 @@ substring_trim_novel_spliceends (T substring1, T substringN, int *ambig_end_leng
middle_genomicpos = middle;
end_genomicpos = end;
- assert(start_genomicpos <= end_genomicpos);
+ /* assert(start_genomicpos <= end_genomicpos); */
genomicpos = start_genomicpos;
while (genomicpos <= middle_genomicpos) {
donor_prob = Maxent_hr_antidonor_prob(genomicpos,chroffset); /* Case 6 */
@@ -2222,7 +2276,7 @@ substring_trim_novel_spliceends (T substring1, T substringN, int *ambig_end_leng
middle_genomicpos = middle;
end_genomicpos = end;
- assert(start_genomicpos >= end_genomicpos);
+ /* assert(start_genomicpos >= end_genomicpos); */
genomicpos = start_genomicpos;
while (genomicpos >= middle_genomicpos) {
donor_prob = Maxent_hr_donor_prob(genomicpos,chroffset); /* Case 8 */
@@ -2251,7 +2305,7 @@ substring_trim_novel_spliceends (T substring1, T substringN, int *ambig_end_leng
middle_genomicpos = middle;
end_genomicpos = end;
- assert(start_genomicpos <= end_genomicpos);
+ /* assert(start_genomicpos <= end_genomicpos); */
genomicpos = start_genomicpos;
while (genomicpos <= middle_genomicpos) {
acceptor_prob = Maxent_hr_acceptor_prob(genomicpos,chroffset); /* Case 2 */
@@ -2312,7 +2366,7 @@ substring_trim_novel_spliceends (T substring1, T substringN, int *ambig_end_leng
middle_genomicpos = middle;
end_genomicpos = end;
- assert(start_genomicpos >= end_genomicpos);
+ /* assert(start_genomicpos >= end_genomicpos); */
genomicpos = start_genomicpos;
while (genomicpos >= middle_genomicpos) {
acceptor_prob = Maxent_hr_antiacceptor_prob(genomicpos,chroffset); /* Case 4 */
@@ -2378,23 +2432,25 @@ substring_trim_novel_spliceends (T substring1, T substringN, int *ambig_end_leng
Splicetype_string(splicetype5),splice_genomepos_5-chroffset,max_prob_5));
if (substring1->plusp) {
*ambig_end_length_5 = splice_genomepos_5 - substring1->genomicstart;
+ debug13(printf("1 Set ambig_end_length_5 to be %d = %u - %u\n",*ambig_end_length_5,splice_genomepos_5,substring1->genomicstart));
} else {
*ambig_end_length_5 = substring1->genomicstart - splice_genomepos_5;
+ debug13(printf("2 Set ambig_end_length_5 to be %d = %u - %u\n",*ambig_end_length_5,substring1->genomicstart,splice_genomepos_5));
}
*ambig_splicetype_5 = splicetype5;
*ambig_prob_5 = max_prob_5;
- debug13(printf("Set ambig_end_length_5 to be %d\n",*ambig_end_length_5));
} else if (max_prob_5_mm > END_SPLICESITE_PROB_MISMATCH) {
debug13(printf("Found good mismatch splice %s on 5' end at %u with probability %f\n",
Splicetype_string(splicetype5_mm),splice_genomepos_5_mm-chroffset,max_prob_5_mm));
if (substring1->plusp) {
- *ambig_end_length_5 = splice_genomepos_5 - substring1->genomicstart;
+ *ambig_end_length_5 = splice_genomepos_5_mm - substring1->genomicstart;
+ debug13(printf("3 Set ambig_end_length_5 to be %d = %u - %u\n",*ambig_end_length_5,splice_genomepos_5_mm,substring1->genomicstart));
} else {
- *ambig_end_length_5 = substring1->genomicstart - splice_genomepos_5;
+ *ambig_end_length_5 = substring1->genomicstart - splice_genomepos_5_mm;
+ debug13(printf("4 Set ambig_end_length_5 to be %d = %u - %u\n",*ambig_end_length_5,substring1->genomicstart,splice_genomepos_5_mm));
}
*ambig_splicetype_5 = splicetype5_mm;
*ambig_prob_5 = max_prob_5_mm;
- debug13(printf("Set ambig_end_length_5 to be %d\n",*ambig_end_length_5));
}
}
@@ -2467,13 +2523,14 @@ substring_trim_novel_spliceends (T substring1, T substringN, int *ambig_end_leng
Splicetype_string(splicetype5),splice_genomepos_5-chroffset,max_prob_5));
if (substring1->plusp) {
*ambig_end_length_5 = splice_genomepos_5 - substring1->genomicstart;
+ debug13(printf("5 Set ambig_end_length_5 to be %d = %u - %u\n",*ambig_end_length_5,splice_genomepos_5,substring1->genomicstart));
} else {
*ambig_end_length_5 = substring1->genomicstart - splice_genomepos_5;
+ debug13(printf("6 Set ambig_end_length_5 to be %d = %u - %u\n",*ambig_end_length_5,substring1->genomicstart,splice_genomepos_5));
}
*ambig_splicetype_5 = splicetype5;
*ambig_prob_5 = max_prob_5;
/* *cdna_direction = splice_cdna_direction_5; */
- debug13(printf("Set ambig_end_length_5 to be %d\n",*ambig_end_length_5));
if (max_prob_sense_forward_5 >= END_SPLICESITE_PROB_MATCH && max_prob_sense_anti_5 < END_SPLICESITE_PROB_MATCH
&& max_prob_sense_anti_3 < END_SPLICESITE_PROB_MATCH) {
*sensedir = splice_sensedir_5;
@@ -2491,13 +2548,14 @@ substring_trim_novel_spliceends (T substring1, T substringN, int *ambig_end_leng
Splicetype_string(splicetype3_mm),splice_genomepos_3_mm-chroffset,max_prob_3_mm));
if (substringN->plusp) {
*ambig_end_length_3 = substringN->genomicend - splice_genomepos_3_mm;
+ debug13(printf("Set ambig_end_length_3 to be %d = %u - %u\n",*ambig_end_length_3,substringN->genomicend,splice_genomepos_3_mm));
} else {
*ambig_end_length_3 = splice_genomepos_3_mm - substringN->genomicend;
+ debug13(printf("Set ambig_end_length_3 to be %d = %u - %u\n",*ambig_end_length_3,splice_genomepos_3_mm,substringN->genomicend));
}
*ambig_splicetype_3 = splicetype3_mm;
*ambig_prob_3 = max_prob_3_mm;
/* *cdna_direction = splice_cdna_direction_3_mm; */
- debug13(printf("Set ambig_end_length_3 to be %d\n",*ambig_end_length_3));
if (max_prob_sense_forward_3_mm >= END_SPLICESITE_PROB_MISMATCH && max_prob_sense_anti_3_mm < END_SPLICESITE_PROB_MISMATCH
&& max_prob_sense_anti_5_mm < END_SPLICESITE_PROB_MISMATCH) {
*sensedir = splice_sensedir_3_mm;
@@ -2512,13 +2570,14 @@ substring_trim_novel_spliceends (T substring1, T substringN, int *ambig_end_leng
Splicetype_string(splicetype5_mm),splice_genomepos_5_mm-chroffset,max_prob_5_mm));
if (substring1->plusp) {
*ambig_end_length_5 = splice_genomepos_5_mm - substring1->genomicstart;
+ debug13(printf("7 Set ambig_end_length_5 to be %d = %u - %u\n",*ambig_end_length_5,splice_genomepos_5_mm,substring1->genomicstart));
} else {
*ambig_end_length_5 = substring1->genomicstart - splice_genomepos_5_mm;
+ debug13(printf("8 Set ambig_end_length_5 to be %d = %u - %u\n",*ambig_end_length_5,substring1->genomicstart,splice_genomepos_5_mm));
}
*ambig_splicetype_5 = splicetype5_mm;
*ambig_prob_5 = max_prob_5_mm;
/* *cdna_direction = splice_cdna_direction_5_mm; */
- debug13(printf("Set ambig_end_length_5 to be %d\n",*ambig_end_length_5));
if (max_prob_sense_forward_5_mm >= END_SPLICESITE_PROB_MISMATCH && max_prob_sense_anti_5_mm < END_SPLICESITE_PROB_MISMATCH
&& max_prob_sense_anti_3_mm < END_SPLICESITE_PROB_MISMATCH) {
*sensedir = splice_sensedir_5_mm;
@@ -2532,7 +2591,8 @@ substring_trim_novel_spliceends (T substring1, T substringN, int *ambig_end_leng
}
}
- debug13(printf("Returning ambig_end_length_5 %d and ambig_end_length_3 %d\n",*ambig_end_length_5,*ambig_end_length_3));
+ debug13(printf("Returning ambig_end_length_5 %d and ambig_end_length_3 %d, probs %f and %f\n",
+ *ambig_end_length_5,*ambig_end_length_3,*ambig_prob_5,*ambig_prob_3));
return;
}
@@ -2550,12 +2610,13 @@ Substring_new (int nmismatches_whole, Chrnum_T chrnum, Univcoord_T chroffset,
int outofbounds_start, int outofbounds_end, int minlength, int sensedir) {
T new;
int nmatches;
- int nonterminal_trim = 0;
+ /* int nonterminal_trim = 0; */
int ambig_end_length_5, ambig_end_length_3;
Splicetype_T ambig_splicetype_5, ambig_splicetype_3;
double ambig_prob_5, ambig_prob_3;
int nmismatches_end_left, nmismatches_end_right;
+ int trim;
/* General test for goodness over original region */
@@ -2600,11 +2661,12 @@ Substring_new (int nmismatches_whole, Chrnum_T chrnum, Univcoord_T chroffset,
new->plusp = plusp;
new->genestrand = genestrand;
- new->chimera_prob = 0.0;
- new->chimera_knownp = false;
- new->chimera_knownp_2 = false;
- new->chimera_novelp = false;
- new->chimera_novelp_2 = false;
+ new->splicecoord_D = new->splicecoord_A = new->splicecoord_N = 0;
+ new->siteD_pos = new->siteA_pos = new->siteN_pos = 0;
+
+ new->siteD_prob = new->siteA_prob = 0.0;
+ new->siteD_knownp = new->siteA_knownp = false;
+ new->siteD_novelp = new->siteA_novelp = false;
debug2(printf("\n***Entered Substring_new with query %d..%d, chrnum %d (chroffset %u, chrhigh %u), plusp %d, outofbounds start %d and end %d\n",
querystart,queryend,chrnum,chroffset,chrhigh,plusp,outofbounds_start,outofbounds_end));
@@ -2618,6 +2680,7 @@ Substring_new (int nmismatches_whole, Chrnum_T chrnum, Univcoord_T chroffset,
new->genomicend = new->left + querylength;
debug2(printf("left is %u\n",new->left));
+ debug2(printf("genomicstart is %u, genomicend is %u\n",new->genomicstart,new->genomicend));
debug2(printf("querylength is %d, alignstart is %u, alignend is %u\n",querylength,alignstart,alignend));
assert(alignstart + outofbounds_start >= chroffset);
assert(alignend - outofbounds_end <= chrhigh);
@@ -2628,6 +2691,7 @@ Substring_new (int nmismatches_whole, Chrnum_T chrnum, Univcoord_T chroffset,
new->genomicstart = new->left + querylength;
debug2(printf("left is %u\n",new->left));
+ debug2(printf("genomicstart is %u, genomicend is %u\n",new->genomicstart,new->genomicend));
debug2(printf("querylength is %d, alignstart is %u, alignend is %u\n",querylength,alignstart,alignend));
assert(alignstart - outofbounds_start <= chrhigh);
assert(alignend + outofbounds_end >= chroffset);
@@ -2655,7 +2719,8 @@ Substring_new (int nmismatches_whole, Chrnum_T chrnum, Univcoord_T chroffset,
new->genomic_refdiff = (char *) NULL;
/* Do trimming */
- debug8(printf("trim_left_action %d, trim_right_action %d\n",trim_left_action,trim_right_action));
+ debug8(printf("trim_left_action %s, trim_right_action %s\n",
+ Trimaction_string(trim_left_action),Trimaction_string(trim_right_action)));
new->mandatory_trim_left = 0;
new->mandatory_trim_right = 0;
@@ -2670,27 +2735,34 @@ Substring_new (int nmismatches_whole, Chrnum_T chrnum, Univcoord_T chroffset,
Substring_free(&new);
return (T) NULL;
} else {
- new->trim_left = 0;
+ new->trim_left = querystart;
}
} else if (new->start_endtype == TERM) {
/* Accept true terminals generated by GSNAP procedure */
- new->trim_left = trim_left_end(&nmismatches_end_left,query_compress,new->left,querystart,queryend,querylength,
- plusp,genestrand,/*trim_mismatch_score*/-3);
- if (outofbounds_start > new->trim_left) {
- new->trim_left = outofbounds_start;
+ trim = trim_left_end(&nmismatches_end_left,query_compress,new->left,querystart,queryend,querylength,
+ plusp,genestrand,/*trim_mismatch_score*/-3);
+ debug8(printf("trim_left_end: trim_left +%d from querystart %d, nmismatches_end_left = %d\n",
+ trim,querystart,nmismatches_end_left));
+ if (outofbounds_start > querystart + trim) {
+ trim = outofbounds_start - querystart;
}
- new->querystart += new->trim_left;
+ new->querystart += trim;
+ new->trim_left = new->querystart;
+ debug8(printf("querystart is now %d\n",new->querystart));
} else {
- new->trim_left = trim_left_end(&nmismatches_end_left,query_compress,new->left,querystart,queryend,querylength,
- plusp,genestrand,trim_mismatch_score);
- debug13(printf("trim_left %d, nmismatches_end_left = %d\n",new->trim_left,nmismatches_end_left));
- if (outofbounds_start > new->trim_left) {
- new->trim_left = outofbounds_start;
+ trim = trim_left_end(&nmismatches_end_left,query_compress,new->left,querystart,queryend,querylength,
+ plusp,genestrand,trim_mismatch_score);
+ debug8(printf("trim_left_end: trim_left +%d from querystart %d, nmismatches_end_left = %d\n",
+ trim,querystart,nmismatches_end_left));
+ if (outofbounds_start > querystart + trim) {
+ trim = outofbounds_start - querystart;
}
- nonterminal_trim += new->trim_left;
- new->querystart += new->trim_left;
+ /* nonterminal_trim += new->trim_left; */
+ new->querystart += trim;
+ new->trim_left = new->querystart;
+ debug8(printf("querystart is now %d\n",new->querystart));
}
if (trim_right_action == PRE_TRIMMED) {
@@ -2703,27 +2775,34 @@ Substring_new (int nmismatches_whole, Chrnum_T chrnum, Univcoord_T chroffset,
debug2(printf("outofbounds_end %d > 0, so returning NULL\n",outofbounds_end));
return (T) NULL;
} else {
- new->trim_right = 0;
+ new->trim_right = querylength - queryend;
}
} else if (new->end_endtype == TERM) {
/* Accept true terminals generated by GSNAP procedure */
- new->trim_right = trim_right_end(&nmismatches_end_right,query_compress,new->left,querystart,queryend,querylength,
- plusp,genestrand,/*trim_mismatch_score*/-3);
- if (outofbounds_end > new->trim_right) {
- new->trim_right = outofbounds_end;
+ trim = trim_right_end(&nmismatches_end_right,query_compress,new->left,querystart,queryend,querylength,
+ plusp,genestrand,/*trim_mismatch_score*/-3);
+ debug8(printf("trim_right_end: trim_right +%d from queryend %d, nmismatches_end_right = %d\n",
+ trim,queryend,nmismatches_end_right));
+ if (outofbounds_end > queryend - trim) {
+ trim = queryend - outofbounds_end;
}
- new->queryend -= new->trim_right;
+ new->queryend -= trim;
+ new->trim_right = querylength - new->queryend;
+ debug8(printf("queryend is now %d\n",new->queryend));
} else {
- new->trim_right = trim_right_end(&nmismatches_end_right,query_compress,new->left,querystart,queryend,querylength,
- plusp,genestrand,trim_mismatch_score);
- debug13(printf("trim_right %d, nmismatches_end_right = %d\n",new->trim_right,nmismatches_end_right));
- if (outofbounds_end > new->trim_right) {
- new->trim_right = outofbounds_end;
+ trim = trim_right_end(&nmismatches_end_right,query_compress,new->left,querystart,queryend,querylength,
+ plusp,genestrand,trim_mismatch_score);
+ debug8(printf("trim_right_end: trim_right +%d from queryend %d, nmismatches_end_right = %d\n",
+ trim,queryend,nmismatches_end_right));
+ if (outofbounds_end > queryend - trim) {
+ trim = queryend - outofbounds_end;
}
- nonterminal_trim += new->trim_right;
- new->queryend -= new->trim_right;
+ /* nonterminal_trim += new->trim_right; */
+ new->queryend -= trim;
+ new->trim_right = querylength - new->queryend;
+ debug8(printf("queryend is now %d\n",new->queryend));
}
#if 0
@@ -2758,22 +2837,25 @@ Substring_new (int nmismatches_whole, Chrnum_T chrnum, Univcoord_T chroffset,
new->nmatches = (new->alignend - new->alignstart) - new->nmismatches_whole;
if (trim_left_action == COMPUTE_TRIM) {
- if (nmismatches_end_left == 0) {
+ if (querystart == 0 && nmismatches_end_left == 0) {
+ debug8(printf("querystart is 0 and nmismatches_end_left is 0, so setting left_end_action to be NO_TRIM\n"));
trim_left_action = NO_TRIM;
} else {
- new->alignstart_trim += new->trim_left;
+ new->alignstart_trim = new->genomicstart + new->trim_left;
}
}
if (trim_right_action == COMPUTE_TRIM) {
- if (nmismatches_end_right == 0) {
+ if (queryend == querylength && nmismatches_end_right == 0) {
+ debug8(printf("queryend is querylength and nmismatches_end_right is 0, so setting right_end_action to be NO_TRIM\n"));
trim_right_action = NO_TRIM;
} else {
- new->alignend_trim -= new->trim_right;
+ new->alignend_trim = new->genomicend - new->trim_right;
}
}
debug2(printf("Got trims of %d and %d => Revised alignstart_trim and alignend_trim to be %u..%u (%u..%u)\n",
new->trim_left,new->trim_right,new->alignstart_trim,new->alignend_trim,
new->alignstart_trim - new->chroffset,new->alignend_trim - new->chroffset));
+ debug2(printf("genomicstart is %u, genomicend is %u\n",new->genomicstart,new->genomicend));
new->trim_left_splicep = new->trim_right_splicep = false;
if (novelsplicingp == true) {
@@ -2783,27 +2865,35 @@ Substring_new (int nmismatches_whole, Chrnum_T chrnum, Univcoord_T chroffset,
&sensedir,chroffset);
if (ambig_end_length_5 > 0) {
new->trim_left_splicep = true;
- new->querystart += (ambig_end_length_5 - new->trim_left);
- new->alignstart_trim += (ambig_end_length_5 - new->trim_left);
+ /* new->querystart += (ambig_end_length_5 - new->trim_left); */
+ /* new->alignstart_trim += (ambig_end_length_5 - new->trim_left); */
+ new->querystart = ambig_end_length_5;
+ new->alignstart_trim = new->genomicstart + ambig_end_length_5;
+
new->trim_left = ambig_end_length_5;
if (ambig_splicetype_5 == DONOR || ambig_splicetype_5 == ANTIDONOR) {
new->start_endtype = DON;
+ new->siteD_prob = ambig_prob_5;
} else {
new->start_endtype = ACC;
+ new->siteA_prob = ambig_prob_5;
}
- new->chimera_prob = ambig_prob_5;
}
if (ambig_end_length_3 > 0) {
new->trim_right_splicep = true;
- new->queryend -= (ambig_end_length_3 - new->trim_right);
- new->alignend_trim -= (ambig_end_length_3 - new->trim_right);
+ /* new->queryend -= (ambig_end_length_3 - new->trim_right); */
+ /* new->alignend_trim -= (ambig_end_length_3 - new->trim_right); */
+ new->queryend = querylength - ambig_end_length_3;
+ new->alignend_trim = new->genomicend - ambig_end_length_3;
+
new->trim_right = ambig_end_length_3;
if (ambig_splicetype_3 == DONOR || ambig_splicetype_3 == ANTIDONOR) {
new->end_endtype = DON;
+ new->siteD_prob = ambig_prob_3;
} else {
new->end_endtype = ACC;
+ new->siteA_prob = ambig_prob_3;
}
- new->chimera_prob_2 = ambig_prob_3;
}
} else if (trim_left_action == COMPUTE_TRIM) {
@@ -2812,15 +2902,19 @@ Substring_new (int nmismatches_whole, Chrnum_T chrnum, Univcoord_T chroffset,
&sensedir,chroffset);
if (ambig_end_length_5 > 0) {
new->trim_left_splicep = true;
- new->querystart += (ambig_end_length_5 - new->trim_left);
- new->alignstart_trim += (ambig_end_length_5 - new->trim_left);
+ /* new->querystart += (ambig_end_length_5 - new->trim_left); */
+ /* new->alignstart_trim += (ambig_end_length_5 - new->trim_left); */
+ new->querystart = ambig_end_length_5;
+ new->alignstart_trim = new->genomicstart + ambig_end_length_5;
+
new->trim_left = ambig_end_length_5;
if (ambig_splicetype_5 == DONOR || ambig_splicetype_5 == ANTIDONOR) {
new->start_endtype = DON;
+ new->siteD_prob = ambig_prob_5;
} else {
new->start_endtype = ACC;
+ new->siteA_prob = ambig_prob_5;
}
- new->chimera_prob = ambig_prob_5;
}
} else if (trim_right_action == COMPUTE_TRIM) {
@@ -2829,15 +2923,19 @@ Substring_new (int nmismatches_whole, Chrnum_T chrnum, Univcoord_T chroffset,
&sensedir,chroffset);
if (ambig_end_length_3 > 0) {
new->trim_right_splicep = true;
- new->queryend -= (ambig_end_length_3 - new->trim_right);
- new->alignend_trim -= (ambig_end_length_3 - new->trim_right);
+ /* new->queryend -= (ambig_end_length_3 - new->trim_right); */
+ /* new->alignend_trim -= (ambig_end_length_3 - new->trim_right); */
+ new->queryend = querylength - ambig_end_length_3;
+ new->alignend_trim = new->genomicend - ambig_end_length_3;
+
new->trim_right = ambig_end_length_3;
if (ambig_splicetype_3 == DONOR || ambig_splicetype_3 == ANTIDONOR) {
new->end_endtype = DON;
+ new->siteD_prob = ambig_prob_3;
} else {
new->end_endtype = ACC;
+ new->siteA_prob = ambig_prob_3;
}
- new->chimera_prob_2 = ambig_prob_3;
}
}
}
@@ -2847,17 +2945,19 @@ Substring_new (int nmismatches_whole, Chrnum_T chrnum, Univcoord_T chroffset,
new->nmatches = (new->alignstart - new->alignend) - new->nmismatches_whole;
if (trim_left_action == COMPUTE_TRIM) {
- if (nmismatches_end_left == 0) {
+ if (querystart == 0 && nmismatches_end_left == 0) {
+ debug8(printf("querystart is 0 and nmismatches_end_left is 0, so setting left_end_action to be NO_TRIM\n"));
trim_left_action = NO_TRIM;
} else {
- new->alignstart_trim -= new->trim_left;
+ new->alignstart_trim = new->genomicstart - new->trim_left;
}
}
if (trim_right_action == COMPUTE_TRIM) {
- if (nmismatches_end_right == 0) {
+ if (queryend == querylength && nmismatches_end_right == 0) {
+ debug8(printf("queryend is querylength and nmismatches_end_right is 0, so setting right_end_action to be NO_TRIM\n"));
trim_right_action = NO_TRIM;
} else {
- new->alignend_trim += new->trim_right;
+ new->alignend_trim = new->genomicend + new->trim_right;
}
}
debug2(printf("Revised alignstart_trim and alignend_trim to be %u..%u (%u..%u)\n",
@@ -2872,27 +2972,35 @@ Substring_new (int nmismatches_whole, Chrnum_T chrnum, Univcoord_T chroffset,
&sensedir,chroffset);
if (ambig_end_length_5 > 0) {
new->trim_left_splicep = true;
- new->querystart += (ambig_end_length_5 - new->trim_left);
- new->alignstart_trim -= (ambig_end_length_5 - new->trim_left);
+ /* new->querystart += (ambig_end_length_5 - new->trim_left); */
+ /* new->alignstart_trim -= (ambig_end_length_5 - new->trim_left); */
+ new->querystart = ambig_end_length_5;
+ new->alignstart_trim = new->genomicstart - ambig_end_length_5;
+
new->trim_left = ambig_end_length_5;
if (ambig_splicetype_5 == DONOR || ambig_splicetype_5 == ANTIDONOR) {
new->start_endtype = DON;
+ new->siteD_prob = ambig_prob_5;
} else {
new->start_endtype = ACC;
+ new->siteA_prob = ambig_prob_5;
}
- new->chimera_prob = ambig_prob_5;
}
if (ambig_end_length_3 > 0) {
new->trim_right_splicep = true;
- new->queryend -= (ambig_end_length_3 - new->trim_right);
- new->alignend_trim += (ambig_end_length_3 - new->trim_right);
+ /* new->queryend -= (ambig_end_length_3 - new->trim_right); */
+ /* new->alignend_trim += (ambig_end_length_3 - new->trim_right); */
+ new->queryend = querylength - ambig_end_length_3;
+ new->alignend_trim = new->genomicend + ambig_end_length_3;
+
new->trim_right = ambig_end_length_3;
if (ambig_splicetype_3 == DONOR || ambig_splicetype_3 == ANTIDONOR) {
new->end_endtype = DON;
+ new->siteD_prob = ambig_prob_3;
} else {
new->end_endtype = ACC;
+ new->siteA_prob = ambig_prob_3;
}
- new->chimera_prob_2 = ambig_prob_3;
}
} else if (trim_left_action == COMPUTE_TRIM) {
@@ -2901,15 +3009,19 @@ Substring_new (int nmismatches_whole, Chrnum_T chrnum, Univcoord_T chroffset,
&sensedir,chroffset);
if (ambig_end_length_5 > 0) {
new->trim_left_splicep = true;
- new->querystart += (ambig_end_length_5 - new->trim_left);
- new->alignstart_trim -= (ambig_end_length_5 - new->trim_left);
+ /* new->querystart += (ambig_end_length_5 - new->trim_left); */
+ /* new->alignstart_trim -= (ambig_end_length_5 - new->trim_left); */
+ new->querystart = ambig_end_length_5;
+ new->alignstart_trim = new->genomicstart - ambig_end_length_5;
+
new->trim_left = ambig_end_length_5;
if (ambig_splicetype_5 == DONOR || ambig_splicetype_5 == ANTIDONOR) {
new->start_endtype = DON;
+ new->siteD_prob = ambig_prob_5;
} else {
new->start_endtype = ACC;
+ new->siteA_prob = ambig_prob_5;
}
- new->chimera_prob = ambig_prob_5;
}
} else if (trim_right_action == COMPUTE_TRIM) {
@@ -2918,15 +3030,19 @@ Substring_new (int nmismatches_whole, Chrnum_T chrnum, Univcoord_T chroffset,
&sensedir,chroffset);
if (ambig_end_length_3 > 0) {
new->trim_right_splicep = true;
- new->queryend -= (ambig_end_length_3 - new->trim_right);
- new->alignend_trim += (ambig_end_length_3 - new->trim_right);
+ /* new->queryend -= (ambig_end_length_3 - new->trim_right); */
+ /* new->alignend_trim += (ambig_end_length_3 - new->trim_right); */
+ new->queryend = querylength - ambig_end_length_3;
+ new->alignend_trim = new->genomicend + ambig_end_length_3;
+
new->trim_right = ambig_end_length_3;
if (ambig_splicetype_3 == DONOR || ambig_splicetype_3 == ANTIDONOR) {
new->end_endtype = DON;
+ new->siteD_prob = ambig_prob_3;
} else {
new->end_endtype = ACC;
+ new->siteA_prob = ambig_prob_3;
}
- new->chimera_prob_2 = ambig_prob_3;
}
}
}
@@ -2964,30 +3080,30 @@ Substring_new (int nmismatches_whole, Chrnum_T chrnum, Univcoord_T chroffset,
new->amb_knowni = (int *) NULL;
new->amb_nmismatches = (int *) NULL;
new->amb_probs = (double *) NULL;
- new->amb_common_prob = 0.0;
- new->amb_donor_common_p = false;
+ new->amb_type = END;
- debug2(printf("Returning substring %p\n",new));
+ debug2(printf("Returning substring %p, query %d..%d, trim %d..%d\n",
+ new,new->querystart,new->queryend,new->trim_left,new->trim_right));
return new;
}
T
-Substring_new_ambig (int querystart, int queryend, int splice_pos, int querylength,
- Chrnum_T chrnum, Univcoord_T chroffset,
- Univcoord_T chrhigh, Chrpos_T chrlength,
- bool plusp, int genestrand,
+Substring_new_ambig_D (int querystart, int queryend, int splice_pos, int querylength,
+ Chrnum_T chrnum, Univcoord_T chroffset,
+ Univcoord_T chrhigh, Chrpos_T chrlength,
+ bool plusp, int genestrand,
#ifdef LARGE_GENOMES
- Uint8list_T ambcoords,
+ Uint8list_T ambcoords,
#else
- Uintlist_T ambcoords,
+ Uintlist_T ambcoords,
#endif
- Intlist_T amb_knowni, Intlist_T amb_nmismatches, Doublelist_T amb_probs,
- double amb_common_prob, bool amb_donor_common_p, bool substring1p) {
+ Intlist_T amb_knowni, Intlist_T amb_nmismatches, Doublelist_T amb_probs,
+ double amb_common_prob, bool substring1p) {
int ignore;
T new = (T) MALLOC_OUT(sizeof(*new));
- debug2(printf("Entered Substring_new_ambig with chrnum %d (chroffset %u, chrhigh %u), %d..%d, querylength %d, plusp %d\n",
+ debug2(printf("Entered Substring_new_ambig_D with chrnum %d (chroffset %u, chrhigh %u), %d..%d, querylength %d, plusp %d\n",
chrnum,chroffset,chrhigh,querystart,queryend,querylength,plusp));
new->exactp = false;
@@ -3020,7 +3136,114 @@ Substring_new_ambig (int querystart, int queryend, int splice_pos, int queryleng
new->querystart_orig = new->querystart = querystart;
new->queryend_orig = new->queryend = queryend;
+ new->querylength = querylength;
+
+ new->alignstart = new->alignstart_trim = 0;
+ new->alignend = new->alignend_trim = 0;
+
+ new->plusp = plusp;
+ new->genestrand = genestrand;
+
+ new->siteD_knownp = new->siteA_knownp = false;
+ new->siteD_novelp = new->siteA_novelp = false;
+
+ new->siteD_prob = 0.0;
+ new->siteA_prob = amb_common_prob;
+
+ new->nmismatches_bothdiff = new->nmismatches_whole = Intlist_min(amb_nmismatches);
+
+#if 0
+ if (plusp == true) {
+ /* Fails because alignstart and alignend are not known */
+ new->nmatches = (new->alignend_trim - new->alignstart_trim) - new->nmismatches_whole;
+ } else {
+ new->alignoffset = querylength - queryend;
+ /* Fails because alignstart and alignend are not known */
+ new->nmatches = (new->alignstart_trim - new->alignend_trim) - new->nmismatches_whole;
+ }
+#endif
+ new->nmatches = (queryend - querystart) - new->nmismatches_whole;
+
+ new->genomic_bothdiff = (char *) NULL;
+ new->genomic_refdiff = (char *) NULL;
+ if (substring1p == true) {
+ debug2(printf("substring1p is true, so setting trims to be %d and %d\n",querystart,0));
+ new->trim_left = querystart;
+ new->trim_right = 0;
+ } else {
+ debug2(printf("substring1p is false, so setting trims to be %d and %d\n",0,querylength - queryend));
+ new->trim_left = 0;
+ new->trim_right = querylength - queryend;
+ }
+ new->mandatory_trim_left = 0;
+ new->mandatory_trim_right = 0;
+ new->trim_left_splicep = new->trim_right_splicep = false;
+
+
+ new->ambiguous_p = true;
+#ifdef LARGE_GENOMES
+ new->ambcoords = Uint8list_to_array_out(&new->nambcoords,ambcoords);
+#else
+ new->ambcoords = Uintlist_to_array_out(&new->nambcoords,ambcoords);
+ debug2(printf("ambcoords: %s\n",Uintlist_to_string(ambcoords)));
+#endif
+ new->amb_knowni = Intlist_to_array_out(&ignore,amb_knowni);
+ new->amb_nmismatches = Intlist_to_array_out(&ignore,amb_nmismatches);
+ new->amb_probs = Doublelist_to_array_out(&ignore,amb_probs);
new->amb_splice_pos = splice_pos;
+ new->amb_type = DON;
+
+ return new;
+}
+
+T
+Substring_new_ambig_A (int querystart, int queryend, int splice_pos, int querylength,
+ Chrnum_T chrnum, Univcoord_T chroffset,
+ Univcoord_T chrhigh, Chrpos_T chrlength,
+ bool plusp, int genestrand,
+#ifdef LARGE_GENOMES
+ Uint8list_T ambcoords,
+#else
+ Uintlist_T ambcoords,
+#endif
+ Intlist_T amb_knowni, Intlist_T amb_nmismatches, Doublelist_T amb_probs,
+ double amb_common_prob, bool substring1p) {
+ int ignore;
+ T new = (T) MALLOC_OUT(sizeof(*new));
+
+ debug2(printf("Entered Substring_new_ambig_A with chrnum %d (chroffset %u, chrhigh %u), %d..%d, querylength %d, plusp %d\n",
+ chrnum,chroffset,chrhigh,querystart,queryend,querylength,plusp));
+
+ new->exactp = false;
+
+ new->chrnum = chrnum;
+ new->chroffset = chroffset;
+ new->chrhigh = chrhigh;
+ new->chrlength = chrlength;
+
+ new->left = 0;
+#ifdef LARGE_GENOMES
+ if (plusp == true) {
+ new->genomicstart = Uint8list_max(ambcoords);
+ new->genomicend = Uint8list_min(ambcoords);
+ } else {
+ new->genomicstart = Uint8list_min(ambcoords);
+ new->genomicend = Uint8list_max(ambcoords);
+ }
+#else
+ if (plusp == true) {
+ new->genomicstart = Uintlist_max(ambcoords);
+ new->genomicend = Uintlist_min(ambcoords);
+ } else {
+ new->genomicstart = Uintlist_min(ambcoords);
+ new->genomicend = Uintlist_max(ambcoords);
+ }
+#endif
+ new->start_endtype = END;
+ new->end_endtype = END;
+
+ new->querystart_orig = new->querystart = querystart;
+ new->queryend_orig = new->queryend = queryend;
new->querylength = querylength;
new->alignstart = new->alignstart_trim = 0;
@@ -3029,11 +3252,11 @@ Substring_new_ambig (int querystart, int queryend, int splice_pos, int queryleng
new->plusp = plusp;
new->genestrand = genestrand;
- new->chimera_prob = 0.0;
- new->chimera_knownp = false;
- new->chimera_knownp_2 = false;
- new->chimera_novelp = false;
- new->chimera_novelp_2 = false;
+ new->siteD_knownp = new->siteA_knownp = false;
+ new->siteD_novelp = new->siteA_novelp = false;
+
+ new->siteA_prob = 0.0;
+ new->siteD_prob = amb_common_prob;
new->nmismatches_bothdiff = new->nmismatches_whole = Intlist_min(amb_nmismatches);
@@ -3075,8 +3298,8 @@ Substring_new_ambig (int querystart, int queryend, int splice_pos, int queryleng
new->amb_knowni = Intlist_to_array_out(&ignore,amb_knowni);
new->amb_nmismatches = Intlist_to_array_out(&ignore,amb_nmismatches);
new->amb_probs = Doublelist_to_array_out(&ignore,amb_probs);
- new->amb_common_prob = amb_common_prob;
- new->amb_donor_common_p = amb_donor_common_p;
+ new->amb_splice_pos = splice_pos;
+ new->amb_type = ACC;
return new;
}
@@ -3109,15 +3332,32 @@ Univcoord_T
Substring_set_unambiguous (double *donor_prob, double *acceptor_prob, Univcoord_T *genomicstart, Univcoord_T *genomicend,
T this, int bingoi) {
- debug2(printf("Entered Substring_set_unambiguous\n"));
+#ifdef DEBUG2
+ printf("Entered Substring_set_unambiguous. plusp %d",this->plusp);
+ if (this->amb_type == DON) {
+ printf("type DON\n");
+ } else {
+ printf("type ACC\n");
+ }
+#endif
- this->splicecoord = this->ambcoords[bingoi];
- this->splicesites_knowni = this->amb_knowni[bingoi];
this->nmismatches_whole = this->amb_nmismatches[bingoi];
- this->chimera_prob = this->amb_probs[bingoi];
if (this->plusp == true) {
- this->left = this->splicecoord - this->amb_splice_pos;
+ if (this->amb_type == DON) {
+ *acceptor_prob = this->siteA_prob;
+ *donor_prob = this->siteD_prob = this->amb_probs[bingoi];
+ this->splicecoord_D = this->ambcoords[bingoi];
+ this->splicesitesD_knowni = this->amb_knowni[bingoi];
+ this->left = this->splicecoord_D - this->amb_splice_pos;
+ } else {
+ *donor_prob = this->siteD_prob;
+ *acceptor_prob = this->siteA_prob = this->amb_probs[bingoi];
+ this->splicecoord_A = this->ambcoords[bingoi];
+ this->splicesitesA_knowni = this->amb_knowni[bingoi];
+ this->left = this->splicecoord_A - this->amb_splice_pos;
+ }
+
debug2(printf("left %u\n",this->left));
*genomicstart = this->genomicstart = this->left;
*genomicend = this->genomicend = this->left + this->querylength;
@@ -3130,7 +3370,20 @@ Substring_set_unambiguous (double *donor_prob, double *acceptor_prob, Univcoord_
this->alignend,this->alignend - this->chroffset,this->genomicstart,this->genomicend));
} else {
- this->left = this->splicecoord - (this->querylength - this->amb_splice_pos);
+ if (this->amb_type == DON) {
+ *acceptor_prob = this->siteA_prob;
+ *donor_prob = this->siteD_prob = this->amb_probs[bingoi];
+ this->splicecoord_D = this->ambcoords[bingoi];
+ this->splicesitesD_knowni = this->amb_knowni[bingoi];
+ this->left = this->splicecoord_D - (this->querylength - this->amb_splice_pos);
+ } else {
+ *donor_prob = this->siteD_prob;
+ *acceptor_prob = this->siteA_prob = this->amb_probs[bingoi];
+ this->splicecoord_A = this->ambcoords[bingoi];
+ this->splicesitesA_knowni = this->amb_knowni[bingoi];
+ this->left = this->splicecoord_A - (this->querylength - this->amb_splice_pos);
+ }
+
debug2(printf("left %u\n",this->left));
*genomicend = this->genomicend = this->left;
*genomicstart = this->genomicstart = this->left + this->querylength;
@@ -3143,14 +3396,6 @@ Substring_set_unambiguous (double *donor_prob, double *acceptor_prob, Univcoord_
this->alignend,this->alignend - this->chroffset,this->genomicstart,this->genomicend));
}
- if (this->amb_donor_common_p == true) {
- *donor_prob = this->amb_common_prob;
- *acceptor_prob = this->amb_probs[bingoi];
- } else {
- *acceptor_prob = this->amb_common_prob;
- *donor_prob = this->amb_probs[bingoi];
- }
-
this->ambiguous_p = false;
return this->left;
@@ -3237,9 +3482,13 @@ Substring_display_prep (T this, char *queryuc_ptr, int querylength,
int extraleft, int extraright,
Compress_T query_compress_fwd, Compress_T query_compress_rev,
Genome_T genome) {
- char *genomic_diff;
- char *gbuffer;
-#ifndef HAVE_ALLOCA
+
+#if defined(LONG_READLENGTHS)
+ char *genomic_diff, *gbuffer;
+#elif defined(HAVE_ALLOCA)
+ char *genomic_diff, *gbuffer;
+#else
+ char *genomic_diff, *gbuffer;
char gbuffer_alloc[MAX_READLENGTH/*+MAX_END_DELETIONS*/+1];
bool allocp;
#endif
@@ -3259,7 +3508,9 @@ Substring_display_prep (T this, char *queryuc_ptr, int querylength,
} else {
/* Used to be this->genomiclength, but doesn't work for large insertions */
-#ifdef HAVE_ALLOCA
+#if defined(LONG_READLENGTHS)
+ gbuffer = (char *) MALLOC((querylength+1) * sizeof(char));
+#elif defined(HAVE_ALLOCA)
gbuffer = (char *) ALLOCA((querylength+1) * sizeof(char));
#else
if (querylength < MAX_READLENGTH) {
@@ -3312,7 +3563,9 @@ Substring_display_prep (T this, char *queryuc_ptr, int querylength,
if (0 && this->exactp == true && extraleft == 0 && extraright == 0) {
} else {
-#ifdef HAVE_ALLOCA
+#if defined(LONG_READLENGTHS)
+ FREE(gbuffer);
+#elif defined(HAVE_ALLOCA)
FREEA(gbuffer);
#else
if (allocp == true) {
@@ -3330,7 +3583,9 @@ Substring_display_prep (T this, char *queryuc_ptr, int querylength,
} else {
/* Used to be this->genomiclength, but doesn't work for large insertions */
-#ifdef HAVE_ALLOCA
+#if defined(LONG_READLENGTHS)
+ gbuffer = (char *) MALLOC((querylength+1) * sizeof(char));
+#elif defined(HAVE_ALLOCA)
gbuffer = (char *) ALLOCA((querylength+1) * sizeof(char));
#else
if (querylength < MAX_READLENGTH) {
@@ -3386,7 +3641,9 @@ Substring_display_prep (T this, char *queryuc_ptr, int querylength,
if (0 && this->exactp == true && extraleft == 0 && extraright == 0) {
} else {
-#ifdef HAVE_ALLOCA
+#if defined(LONG_READLENGTHS)
+ FREE(gbuffer);
+#elif defined(HAVE_ALLOCA)
FREEA(gbuffer);
#else
if (allocp == true) {
@@ -3405,29 +3662,35 @@ Substring_left (T this) {
return this->left;
}
+
+Univcoord_T
+Substring_splicecoord_D (T this) {
+ return this->splicecoord_D;
+}
+
Univcoord_T
-Substring_splicecoord (T this) {
- return this->splicecoord;
+Substring_splicecoord_A (T this) {
+ return this->splicecoord_A;
}
Chrpos_T
-Substring_chr_splicecoord (T this) {
- return (Chrpos_T) (this->splicecoord - this->chroffset);
+Substring_chr_splicecoord_D (T this) {
+ return (Chrpos_T) (this->splicecoord_D - this->chroffset);
}
-int
-Substring_splicesites_knowni (T this) {
- return this->splicesites_knowni;
+Chrpos_T
+Substring_chr_splicecoord_A (T this) {
+ return (Chrpos_T) (this->splicecoord_A - this->chroffset);
}
-Univcoord_T
-Substring_splicecoord_A (T this) {
- return this->splicecoord;
+int
+Substring_splicesitesD_knowni (T this) {
+ return this->splicesitesD_knowni;
}
-Univcoord_T
-Substring_splicecoord_D (T this) {
- return this->splicecoord_2;
+int
+Substring_splicesitesA_knowni (T this) {
+ return this->splicesitesA_knowni;
}
bool
@@ -3744,8 +4007,8 @@ Substring_amb_donor_prob (T this) {
double max;
int i;
- if (this->amb_donor_common_p == true) {
- return this->amb_common_prob;
+ if (this->amb_type == DON) {
+ return this->siteD_prob;
} else {
max = this->amb_probs[0];
for (i = 1; i < this->nambcoords; i++) {
@@ -3762,7 +4025,9 @@ Substring_amb_acceptor_prob (T this) {
double max;
int i;
- if (this->amb_donor_common_p == true) {
+ if (this->amb_type == ACC) {
+ return this->siteA_prob;
+ } else {
max = this->amb_probs[0];
for (i = 1; i < this->nambcoords; i++) {
if (this->amb_probs[i] > max) {
@@ -3770,62 +4035,44 @@ Substring_amb_acceptor_prob (T this) {
}
}
return max;
- } else {
- return this->amb_common_prob;
}
}
double
-Substring_siteA_prob (T this) {
- return this->siteA_prob;
-}
-
-double
Substring_siteD_prob (T this) {
return this->siteD_prob;
}
-
double
-Substring_chimera_prob (T this) {
- return this->chimera_prob;
-}
+Substring_siteA_prob (T this) {
+ return this->siteA_prob;
+}
-double
-Substring_chimera_prob_2 (T this) {
- return this->chimera_prob_2;
-}
int
-Substring_chimera_pos (T this) {
- return this->chimera_pos;
+Substring_siteD_pos (T this) {
+ return this->siteD_pos;
}
-/* For shortexon */
int
-Substring_chimera_pos_A (T this) {
- return this->chimera_pos;
+Substring_siteA_pos (T this) {
+ return this->siteA_pos;
}
-/* For shortexon */
int
-Substring_chimera_pos_D (T this) {
- return this->chimera_pos_2;
+Substring_siteN_pos (T this) {
+ return this->siteN_pos;
}
-bool
-Substring_chimera_knownp (T this) {
- return this->chimera_knownp;
-}
int
Substring_nchimera_known (T this) {
if (this == NULL) {
return 0;
} else {
- return (int) this->chimera_knownp + (int) this->chimera_knownp_2;
+ return (int) this->siteD_knownp + (int) this->siteA_knownp;
}
}
@@ -3834,7 +4081,7 @@ Substring_nchimera_novel (T this) {
if (this == NULL) {
return 0;
} else {
- return (int) this->chimera_novelp + (int) this->chimera_novelp_2;
+ return (int) this->siteD_novelp + (int) this->siteA_novelp;
}
}
@@ -3851,6 +4098,20 @@ Substring_ambiguous_p (T this) {
return this->ambiguous_p;
}
+bool
+Substring_list_ambiguous_p (List_T list) {
+ T this;
+ List_T p;
+
+ for (p = list; p != NULL; p = List_next(p)) {
+ this = (T) List_head(p);
+ if (this->ambiguous_p == true) {
+ return true;
+ }
+ }
+ return false;
+}
+
int
Substring_nambcoords (T this) {
return this->nambcoords;
@@ -3879,8 +4140,6 @@ Substring_amb_probs (T this) {
-
-
/* circularpos measures query distance from SAM chrlow to origin */
int
Substring_circularpos (T this) {
@@ -3980,21 +4239,22 @@ Substring_copy (T old) {
new->chimera_sensedir = old->chimera_sensedir;
- new->splicecoord = old->splicecoord;
- new->splicesites_knowni = old->splicesites_knowni;
- new->chimera_knownp = old->chimera_knownp;
- new->chimera_novelp = old->chimera_novelp;
- new->chimera_modelpos = old->chimera_modelpos;
- new->chimera_pos = old->chimera_pos;
- new->chimera_prob = old->chimera_prob;
-
- new->splicecoord_2 = old->splicecoord_2;
- new->splicesites_knowni_2 = old->splicesites_knowni_2;
- new->chimera_knownp_2 = old->chimera_knownp_2;
- new->chimera_novelp_2 = old->chimera_novelp_2;
- new->chimera_modelpos_2 = old->chimera_modelpos_2;
- new->chimera_pos_2 = old->chimera_pos_2;
- new->chimera_prob_2 = old->chimera_prob_2;
+ new->splicecoord_D = old->splicecoord_D;
+ new->splicesitesD_knowni = old->splicesitesD_knowni;
+ new->siteD_knownp = old->siteD_knownp;
+ new->siteD_novelp = old->siteD_novelp;
+ new->siteD_pos = old->siteD_pos;
+ new->siteD_prob = old->siteD_prob;
+
+ new->splicecoord_A = old->splicecoord_A;
+ new->splicesitesA_knowni = old->splicesitesA_knowni;
+ new->siteA_knownp = old->siteA_knownp;
+ new->siteA_novelp = old->siteA_novelp;
+ new->siteA_pos = old->siteA_pos;
+ new->siteA_prob = old->siteA_prob;
+
+ new->splicecoord_N = old->splicecoord_N;
+ new->siteN_pos = old->siteN_pos;
new->ambiguous_p = old->ambiguous_p;
if (old->nambcoords == 0) {
@@ -4003,22 +4263,19 @@ Substring_copy (T old) {
new->amb_knowni = (int *) NULL;
new->amb_nmismatches = (int *) NULL;
new->amb_probs = (double *) NULL;
- new->amb_common_prob = 0.0;
- new->amb_donor_common_p = false;
} else {
new->nambcoords = old->nambcoords;
new->ambcoords = (Univcoord_T *) MALLOC_OUT(old->nambcoords * sizeof(Univcoord_T));
new->amb_knowni = (int *) MALLOC_OUT(old->nambcoords * sizeof(int));
new->amb_nmismatches = (int *) MALLOC_OUT(old->nambcoords * sizeof(int));
new->amb_probs = (double *) MALLOC_OUT(old->nambcoords * sizeof(double));
- new->amb_common_prob = old->amb_common_prob;
- new->amb_donor_common_p = old->amb_donor_common_p;
memcpy(new->ambcoords,old->ambcoords,old->nambcoords * sizeof(Univcoord_T));
memcpy(new->amb_knowni,old->amb_knowni,old->nambcoords * sizeof(int));
memcpy(new->amb_nmismatches,old->amb_nmismatches,old->nambcoords * sizeof(int));
memcpy(new->amb_probs,old->amb_probs,old->nambcoords * sizeof(double));
}
+ new->amb_type = old->amb_type;
return new;
}
@@ -4084,24 +4341,16 @@ Substring_new_startfrag (Univcoord_T startfrag_coord, int splice_pos, int nmisma
debug2(printf("Making new startfrag with coord %u and left %u, plusp %d, query %d..%d, genome %u..%u\n",
startfrag_coord,left,plusp,querystart,queryend,alignstart - chroffset,alignend - chroffset));
- new->splicecoord = startfrag_coord;
- new->splicesites_knowni = -1;
+ new->splicecoord_N = startfrag_coord;
+ assert(startfrag_coord == left + splice_pos);
- new->chimera_modelpos = left + splice_pos;
- assert(new->splicecoord == new->chimera_modelpos);
new->chimera_sensedir = SENSE_NULL;
- /* new->chimera_knownp = false; */
- new->chimera_novelp = true;
if (plusp == true) {
- new->chimera_pos = splice_pos;
+ new->siteN_pos = splice_pos;
} else {
- new->chimera_pos = querylength - splice_pos;
+ new->siteN_pos = querylength - splice_pos;
}
- new->chimera_prob = 0.0;
-
- new->siteA_prob = 0.0;
- new->siteD_prob = 0.0;
return new;
}
@@ -4166,24 +4415,16 @@ Substring_new_endfrag (Univcoord_T endfrag_coord, int splice_pos, int nmismatche
debug2(printf("Making new endfrag with coord %u and left %u, plusp %d, query %d..%d, genome %u..%u\n",
endfrag_coord,left,plusp,querystart,queryend,alignstart - chroffset,alignend - chroffset));
- new->splicecoord = endfrag_coord;
- new->splicesites_knowni = -1;
+ new->splicecoord_N = endfrag_coord;
+ assert(endfrag_coord == left + splice_pos);
- new->chimera_modelpos = left + splice_pos;
- assert(new->splicecoord == new->chimera_modelpos);
new->chimera_sensedir = SENSE_NULL;
- /* new->chimera_knownp = false; */
- new->chimera_novelp = true;
if (plusp == true) {
- new->chimera_pos = splice_pos;
+ new->siteN_pos = splice_pos;
} else {
- new->chimera_pos = querylength - splice_pos;
+ new->siteN_pos = querylength - splice_pos;
}
- new->chimera_prob = 0.0;
-
- new->siteA_prob = 0.0;
- new->siteD_prob = 0.0;
return new;
}
@@ -4220,11 +4461,15 @@ Substring_new_donor (Univcoord_T donor_coord, int donor_knowni, int donor_pos,
querystart = substring_querystart; /* 0, for an end piece */
queryend = donor_pos;
+#if 0
if (querystart == 0) {
trim_left_action = COMPUTE_TRIM; /* querystart == 0 */
} else {
trim_left_action = PRE_TRIMMED;
}
+#else
+ trim_left_action = COMPUTE_TRIM;
+#endif
trim_right_action = NO_TRIM;
} else if (sensedir == SENSE_ANTI) {
@@ -4234,11 +4479,15 @@ Substring_new_donor (Univcoord_T donor_coord, int donor_knowni, int donor_pos,
querystart = donor_pos;
queryend = substring_queryend; /* querylength, for an end piece */
trim_left_action = NO_TRIM;
+#if 0
if (queryend == querylength) {
trim_right_action = COMPUTE_TRIM; /* queryend == querylength */
} else {
trim_right_action = PRE_TRIMMED;
}
+#else
+ trim_right_action = COMPUTE_TRIM;
+#endif
} else {
abort();
@@ -4256,11 +4505,15 @@ Substring_new_donor (Univcoord_T donor_coord, int donor_knowni, int donor_pos,
querystart = substring_querystart; /* 0, for an end piece */
queryend = querylength - donor_pos;
+#if 0
if (querystart == 0) {
trim_left_action = COMPUTE_TRIM; /* querystart == 0 */
} else {
trim_left_action = PRE_TRIMMED;
}
+#else
+ trim_left_action = COMPUTE_TRIM;
+#endif
trim_right_action = NO_TRIM;
} else if (sensedir == SENSE_ANTI) {
@@ -4270,11 +4523,15 @@ Substring_new_donor (Univcoord_T donor_coord, int donor_knowni, int donor_pos,
querystart = querylength - donor_pos;
queryend = substring_queryend; /* querylength, for an end piece */
trim_left_action = NO_TRIM;
+#if 0
if (queryend == querylength) {
trim_right_action = COMPUTE_TRIM; /* queryend == querylength */
} else {
trim_right_action = PRE_TRIMMED;
}
+#else
+ trim_right_action = COMPUTE_TRIM;
+#endif
} else {
abort();
@@ -4292,30 +4549,30 @@ Substring_new_donor (Univcoord_T donor_coord, int donor_knowni, int donor_pos,
return (T) NULL;
}
- debug2(printf("Making new donor with splicesites_i %d, coord %u and left %u, plusp %d, sensedir %d, query %d..%d, genome %u..%u\n",
- donor_knowni,donor_coord,left,plusp,sensedir,querystart,queryend,alignstart - chroffset,alignend - chroffset));
- new->splicecoord = donor_coord;
- new->splicesites_knowni = donor_knowni;
+ debug2(printf("Making new donor with splicesites_i %d, coord %u and left %u, plusp %d, sensedir %d, query %d..%d, trim %d..%d, genome %u..%u\n",
+ donor_knowni,donor_coord,left,plusp,sensedir,new->querystart,new->queryend,
+ new->trim_left,new->trim_right,alignstart - chroffset,alignend - chroffset));
+ debug2(printf("Original bounds were %d..%d\n",substring_querystart,substring_queryend));
+ debug2(printf("Setting siteD_prob to be %f\n",donor_prob));
+
+ new->splicecoord_D = donor_coord;
+ new->splicesitesD_knowni = donor_knowni;
+ assert(donor_coord == left + donor_pos);
- new->chimera_modelpos = left + donor_pos;
- assert(new->splicecoord == new->chimera_modelpos);
new->chimera_sensedir = sensedir;
if (donor_knowni >= 0) {
- new->chimera_knownp = true;
+ new->siteD_knownp = true;
/* new->chimera_novelp = false */
} else {
- /* new->chimera_knownp = false; */
- new->chimera_novelp = true;
+ /* new->siteD_knownp = false; */
+ new->siteD_novelp = true;
}
if (plusp == true) {
- new->chimera_pos = donor_pos;
+ new->siteD_pos = donor_pos;
} else {
- new->chimera_pos = querylength - donor_pos;
+ new->siteD_pos = querylength - donor_pos;
}
- new->chimera_prob = donor_prob;
-
- new->siteA_prob = 0.0;
new->siteD_prob = donor_prob;
return new;
@@ -4353,11 +4610,15 @@ Substring_new_acceptor (Univcoord_T acceptor_coord, int acceptor_knowni, int acc
querystart = acceptor_pos;
queryend = substring_queryend; /* querylength, for an end piece */
trim_left_action = NO_TRIM;
+#if 0
if (queryend == querylength) {
trim_right_action = COMPUTE_TRIM; /* queryend == querylength */
} else {
trim_right_action = PRE_TRIMMED;
}
+#else
+ trim_right_action = COMPUTE_TRIM;
+#endif
} else if (sensedir == SENSE_ANTI) {
start_endtype = END;
@@ -4365,11 +4626,15 @@ Substring_new_acceptor (Univcoord_T acceptor_coord, int acceptor_knowni, int acc
querystart = substring_querystart; /* 0, for an end piece */
queryend = acceptor_pos;
+#if 0
if (querystart == 0) {
trim_left_action = COMPUTE_TRIM; /* querystart == 0 */
} else {
trim_left_action = PRE_TRIMMED;
}
+#else
+ trim_left_action = COMPUTE_TRIM;
+#endif
trim_right_action = NO_TRIM;
} else {
@@ -4389,11 +4654,15 @@ Substring_new_acceptor (Univcoord_T acceptor_coord, int acceptor_knowni, int acc
querystart = querylength - acceptor_pos;
queryend = substring_queryend; /* querylength, for an end piece */
trim_left_action = NO_TRIM;
+#if 0
if (queryend == querylength) {
trim_right_action = COMPUTE_TRIM; /* queryend == querylength */
} else {
trim_right_action = PRE_TRIMMED;
}
+#else
+ trim_right_action = COMPUTE_TRIM;
+#endif
} else if (sensedir == SENSE_ANTI) {
start_endtype = END;
@@ -4401,11 +4670,15 @@ Substring_new_acceptor (Univcoord_T acceptor_coord, int acceptor_knowni, int acc
querystart = substring_querystart; /* 0, for an end piece */
queryend = querylength - acceptor_pos;
+#if 0
if (querystart == 0) {
trim_left_action = COMPUTE_TRIM; /* querystart == 0 */
} else {
trim_left_action = PRE_TRIMMED;
}
+#else
+ trim_left_action = COMPUTE_TRIM;
+#endif
trim_right_action = NO_TRIM;
} else {
@@ -4424,33 +4697,31 @@ Substring_new_acceptor (Univcoord_T acceptor_coord, int acceptor_knowni, int acc
return (T) NULL;
}
- debug2(printf("Making new acceptor with splicesites_i %d, coord %u and left %u, plusp %d, sensedir %d, query %d..%d, genome %u..%u\n",
- acceptor_knowni,acceptor_coord,left,plusp,sensedir,querystart,queryend,alignstart - chroffset,alignend - chroffset));
+ debug2(printf("Making new acceptor with splicesites_i %d, coord %u and left %u, plusp %d, sensedir %d, query %d..%d, trim %d..%d, genome %u..%u\n",
+ acceptor_knowni,acceptor_coord,left,plusp,sensedir,new->querystart,new->queryend,
+ new->trim_left,new->trim_right,alignstart - chroffset,alignend - chroffset));
debug2(printf("Original bounds were %d..%d\n",substring_querystart,substring_queryend));
+ debug2(printf("Setting siteA_prob to be %f\n",acceptor_prob));
- new->splicecoord = acceptor_coord;
- new->splicesites_knowni = acceptor_knowni;
+ new->splicecoord_A = acceptor_coord;
+ new->splicesitesA_knowni = acceptor_knowni;
+ assert(acceptor_coord == left + acceptor_pos);
- new->chimera_modelpos = left + acceptor_pos;
- assert(new->splicecoord == new->chimera_modelpos);
new->chimera_sensedir = sensedir;
if (acceptor_knowni >= 0) {
- new->chimera_knownp = true;
+ new->siteA_knownp = true;
/* new->chimera_novelp = false */
} else {
/* new->chimera_knownp = false; */
- new->chimera_novelp = true;
+ new->siteA_novelp = true;
}
if (plusp == true) {
- new->chimera_pos = acceptor_pos;
+ new->siteA_pos = acceptor_pos;
} else {
- new->chimera_pos = querylength - acceptor_pos;
+ new->siteA_pos = querylength - acceptor_pos;
}
- new->chimera_prob = acceptor_prob;
-
new->siteA_prob = acceptor_prob;
- new->siteD_prob = 0.0;
return new;
}
@@ -4526,42 +4797,37 @@ Substring_new_shortexon (Univcoord_T acceptor_coord, int acceptor_knowni, Univco
}
debug2(printf("Making new middle with left %u, plusp %d\n",left,plusp));
- new->splicecoord = acceptor_coord;
- new->splicesites_knowni = acceptor_knowni;
- new->splicecoord_2 = donor_coord;
- new->splicesites_knowni_2 = donor_knowni;
+ new->splicecoord_A = acceptor_coord;
+ new->splicesitesA_knowni = acceptor_knowni;
+ new->splicecoord_D = donor_coord;
+ new->splicesitesD_knowni = donor_knowni;
- new->chimera_modelpos = left + acceptor_pos;
- new->chimera_modelpos_2 = left + donor_pos;
new->chimera_sensedir = sensedir;
if (acceptor_knowni >= 0) {
- new->chimera_knownp = true;
+ new->siteA_knownp = true;
/* new->chimera_novelp = false; */
} else {
/* new->chimera_knownp = false; */
- new->chimera_novelp = true;
+ new->siteA_novelp = true;
}
if (donor_knowni >= 0) {
- new->chimera_knownp_2 = true;
+ new->siteD_knownp = true;
/* new->chimera_novelp_2 = false; */
} else {
- /* new->chimera_knownp_2 = false; */
- new->chimera_novelp_2 = true;
+ /* new->siteD_knownp_2 = false; */
+ new->siteD_novelp = true;
}
if (plusp == true) {
- new->chimera_pos = acceptor_pos;
- new->chimera_pos_2 = donor_pos;
+ new->siteA_pos = acceptor_pos;
+ new->siteD_pos = donor_pos;
} else {
- new->chimera_pos = querylength - acceptor_pos;
- new->chimera_pos_2 = querylength - donor_pos;
+ new->siteA_pos = querylength - acceptor_pos;
+ new->siteD_pos = querylength - donor_pos;
}
- new->chimera_prob = acceptor_prob;
- new->chimera_prob_2 = donor_prob;
-
new->siteA_prob = acceptor_prob;
new->siteD_prob = donor_prob;
@@ -4576,26 +4842,26 @@ Substring_assign_donor_prob (T donor) {
if (donor == NULL) {
return;
- } else if (donor->chimera_knownp == false) {
+ } else if (donor->siteD_knownp == false) {
/* Prob already assigned */
} else if (donor->chimera_sensedir == SENSE_FORWARD) {
if (donor->plusp == true) {
- donor->chimera_prob = Maxent_hr_donor_prob(donor->splicecoord,donor->chroffset);
+ donor->siteD_prob = Maxent_hr_donor_prob(donor->splicecoord_D,donor->chroffset);
} else {
- donor->chimera_prob = Maxent_hr_antidonor_prob(donor->splicecoord,donor->chroffset);
+ donor->siteD_prob = Maxent_hr_antidonor_prob(donor->splicecoord_D,donor->chroffset);
}
} else if (donor->chimera_sensedir == SENSE_ANTI) {
if (donor->plusp == true) {
- donor->chimera_prob = Maxent_hr_antidonor_prob(donor->splicecoord,donor->chroffset);
+ donor->siteD_prob = Maxent_hr_antidonor_prob(donor->splicecoord_D,donor->chroffset);
} else {
- donor->chimera_prob = Maxent_hr_donor_prob(donor->splicecoord,donor->chroffset);
+ donor->siteD_prob = Maxent_hr_donor_prob(donor->splicecoord_D,donor->chroffset);
}
} else {
/* SENSE_NULL */
- donor->chimera_prob = 0.0;
+ donor->siteD_prob = 0.0;
}
return;
@@ -4607,26 +4873,26 @@ Substring_assign_acceptor_prob (T acceptor) {
if (acceptor == NULL) {
return;
- } else if (acceptor->chimera_knownp == false) {
+ } else if (acceptor->siteA_knownp == false) {
/* Prob already assigned */
} else if (acceptor->chimera_sensedir == SENSE_FORWARD) {
if (acceptor->plusp == true) {
- acceptor->chimera_prob = Maxent_hr_acceptor_prob(acceptor->splicecoord,acceptor->chroffset);
+ acceptor->siteA_prob = Maxent_hr_acceptor_prob(acceptor->splicecoord_A,acceptor->chroffset);
} else {
- acceptor->chimera_prob = Maxent_hr_antiacceptor_prob(acceptor->splicecoord,acceptor->chroffset);
+ acceptor->siteA_prob = Maxent_hr_antiacceptor_prob(acceptor->splicecoord_A,acceptor->chroffset);
}
} else if (acceptor->chimera_sensedir == SENSE_ANTI) {
if (acceptor->plusp == true) {
- acceptor->chimera_prob = Maxent_hr_antiacceptor_prob(acceptor->splicecoord,acceptor->chroffset);
+ acceptor->siteA_prob = Maxent_hr_antiacceptor_prob(acceptor->splicecoord_A,acceptor->chroffset);
} else {
- acceptor->chimera_prob = Maxent_hr_acceptor_prob(acceptor->splicecoord,acceptor->chroffset);
+ acceptor->siteA_prob = Maxent_hr_acceptor_prob(acceptor->splicecoord_A,acceptor->chroffset);
}
} else {
/* SENSE_NULL */
- acceptor->chimera_prob = 0.0;
+ acceptor->siteA_prob = 0.0;
}
return;
@@ -4636,42 +4902,42 @@ Substring_assign_acceptor_prob (T acceptor) {
void
Substring_assign_shortexon_prob (T shortexon) {
- if (shortexon->chimera_knownp == false) {
+ if (shortexon->siteA_knownp == false) {
/* Prob1 already assigned */
} else if (shortexon->chimera_sensedir == SENSE_FORWARD) {
if (shortexon->plusp == true) {
- shortexon->chimera_prob = Maxent_hr_acceptor_prob(shortexon->chimera_modelpos,shortexon->chroffset);
+ shortexon->siteA_prob = Maxent_hr_acceptor_prob(shortexon->splicecoord_A,shortexon->chroffset);
} else {
- shortexon->chimera_prob = Maxent_hr_antiacceptor_prob(shortexon->chimera_modelpos,shortexon->chroffset);
+ shortexon->siteA_prob = Maxent_hr_antiacceptor_prob(shortexon->splicecoord_A,shortexon->chroffset);
}
} else if (shortexon->chimera_sensedir == SENSE_ANTI) {
if (shortexon->plusp == true) {
- shortexon->chimera_prob = Maxent_hr_antiacceptor_prob(shortexon->chimera_modelpos,shortexon->chroffset);
+ shortexon->siteA_prob = Maxent_hr_antiacceptor_prob(shortexon->splicecoord_A,shortexon->chroffset);
} else {
- shortexon->chimera_prob = Maxent_hr_acceptor_prob(shortexon->chimera_modelpos,shortexon->chroffset);
+ shortexon->siteA_prob = Maxent_hr_acceptor_prob(shortexon->splicecoord_A,shortexon->chroffset);
}
} else {
abort();
}
- if (shortexon->chimera_knownp_2 == false) {
+ if (shortexon->siteD_knownp == false) {
/* Prob2 already assigned */
} else if (shortexon->chimera_sensedir == SENSE_FORWARD) {
if (shortexon->plusp == true) {
- shortexon->chimera_prob_2 = Maxent_hr_donor_prob(shortexon->chimera_modelpos_2,shortexon->chroffset);
+ shortexon->siteD_prob = Maxent_hr_donor_prob(shortexon->splicecoord_D,shortexon->chroffset);
} else {
- shortexon->chimera_prob_2 = Maxent_hr_antidonor_prob(shortexon->chimera_modelpos_2,shortexon->chroffset);
+ shortexon->siteD_prob = Maxent_hr_antidonor_prob(shortexon->splicecoord_D,shortexon->chroffset);
}
} else if (shortexon->chimera_sensedir == SENSE_ANTI) {
if (shortexon->plusp == true) {
- shortexon->chimera_prob_2 = Maxent_hr_antidonor_prob(shortexon->chimera_modelpos_2,shortexon->chroffset);
+ shortexon->siteD_prob = Maxent_hr_antidonor_prob(shortexon->splicecoord_D,shortexon->chroffset);
} else {
- shortexon->chimera_prob_2 = Maxent_hr_donor_prob(shortexon->chimera_modelpos_2,shortexon->chroffset);
+ shortexon->siteD_prob = Maxent_hr_donor_prob(shortexon->splicecoord_D,shortexon->chroffset);
}
} else {
@@ -4684,21 +4950,21 @@ Substring_assign_shortexon_prob (T shortexon) {
static int
-ascending_pos_cmp (const void *a, const void *b) {
+ascending_siteD_pos_cmp (const void *a, const void *b) {
T x = * (T *) a;
T y = * (T *) b;
- if (x->chimera_pos < y->chimera_pos) {
+ if (x->siteD_pos < y->siteD_pos) {
return -1;
- } else if (x->chimera_pos > y->chimera_pos) {
+ } else if (x->siteD_pos > y->siteD_pos) {
return +1;
} else if (x->genomicstart < y->genomicstart) {
return -1;
} else if (x->genomicstart > y->genomicstart) {
return +1;
- } else if (x->chimera_knownp == true && y->chimera_knownp == false) {
+ } else if (x->siteD_knownp == true && y->siteD_knownp == false) {
return -1;
- } else if (y->chimera_knownp == true && x->chimera_knownp == false) {
+ } else if (y->siteD_knownp == true && x->siteD_knownp == false) {
return +1;
} else {
return 0;
@@ -4706,21 +4972,101 @@ ascending_pos_cmp (const void *a, const void *b) {
}
static int
-descending_pos_cmp (const void *a, const void *b) {
+ascending_siteA_pos_cmp (const void *a, const void *b) {
T x = * (T *) a;
T y = * (T *) b;
- if (x->chimera_pos < y->chimera_pos) {
+ if (x->siteA_pos < y->siteA_pos) {
return -1;
- } else if (x->chimera_pos > y->chimera_pos) {
+ } else if (x->siteA_pos > y->siteA_pos) {
return +1;
+ } else if (x->genomicstart < y->genomicstart) {
+ return -1;
} else if (x->genomicstart > y->genomicstart) {
+ return +1;
+ } else if (x->siteA_knownp == true && y->siteA_knownp == false) {
return -1;
+ } else if (y->siteA_knownp == true && x->siteA_knownp == false) {
+ return +1;
+ } else {
+ return 0;
+ }
+}
+
+static int
+ascending_siteN_pos_cmp (const void *a, const void *b) {
+ T x = * (T *) a;
+ T y = * (T *) b;
+
+ if (x->siteN_pos < y->siteN_pos) {
+ return -1;
+ } else if (x->siteN_pos > y->siteN_pos) {
+ return +1;
} else if (x->genomicstart < y->genomicstart) {
+ return -1;
+ } else if (x->genomicstart > y->genomicstart) {
+ return +1;
+ } else {
+ return 0;
+ }
+}
+
+static int
+descending_siteD_pos_cmp (const void *a, const void *b) {
+ T x = * (T *) a;
+ T y = * (T *) b;
+
+ if (x->siteD_pos < y->siteD_pos) {
+ return -1;
+ } else if (x->siteD_pos > y->siteD_pos) {
+ return +1;
+ } else if (x->genomicstart > y->genomicstart) {
+ return -1;
+ } else if (x->genomicstart < y->genomicstart) {
+ return +1;
+ } else if (x->siteD_knownp == true && y->siteD_knownp == false) {
+ return -1;
+ } else if (y->siteD_knownp == true && x->siteD_knownp == false) {
+ return +1;
+ } else {
+ return 0;
+ }
+}
+
+static int
+descending_siteA_pos_cmp (const void *a, const void *b) {
+ T x = * (T *) a;
+ T y = * (T *) b;
+
+ if (x->siteA_pos < y->siteA_pos) {
+ return -1;
+ } else if (x->siteA_pos > y->siteA_pos) {
+ return +1;
+ } else if (x->genomicstart > y->genomicstart) {
+ return -1;
+ } else if (x->genomicstart < y->genomicstart) {
+ return +1;
+ } else if (x->siteA_knownp == true && y->siteA_knownp == false) {
+ return -1;
+ } else if (y->siteA_knownp == true && x->siteA_knownp == false) {
+ return +1;
+ } else {
+ return 0;
+ }
+}
+
+static int
+descending_siteN_pos_cmp (const void *a, const void *b) {
+ T x = * (T *) a;
+ T y = * (T *) b;
+
+ if (x->siteN_pos < y->siteN_pos) {
+ return -1;
+ } else if (x->siteN_pos > y->siteN_pos) {
return +1;
- } else if (x->chimera_knownp == true && y->chimera_knownp == false) {
+ } else if (x->genomicstart > y->genomicstart) {
return -1;
- } else if (y->chimera_knownp == true && x->chimera_knownp == false) {
+ } else if (x->genomicstart < y->genomicstart) {
return +1;
} else {
return 0;
@@ -4728,7 +5074,111 @@ descending_pos_cmp (const void *a, const void *b) {
}
List_T
-Substring_sort_chimera_halves (List_T hitlist, bool ascendingp) {
+Substring_sort_siteD_halves (List_T hitlist, bool ascendingp) {
+ List_T sorted = NULL;
+ T x, *hits;
+ int n, i, j;
+ bool *eliminate;
+
+ n = List_length(hitlist);
+ debug(printf("Checking %d spliceends for duplicates...",n));
+ if (n == 0) {
+ debug(printf("\n"));
+ return NULL;
+ }
+
+ hits = (T *) MALLOCA(n * sizeof(T));
+ List_fill_array_and_free((void **) hits,&hitlist);
+
+ if (ascendingp == true) {
+ qsort(hits,n,sizeof(T),ascending_siteD_pos_cmp);
+ } else {
+ qsort(hits,n,sizeof(T),descending_siteD_pos_cmp);
+ }
+
+ /* Check for duplicates */
+ eliminate = (bool *) CALLOCA(n,sizeof(bool));
+ for (i = 0; i < n; i++) {
+ x = hits[i];
+ j = i+1;
+ while (j < n && hits[j]->siteD_pos == x->siteD_pos && hits[j]->genomicstart == x->genomicstart) {
+ eliminate[j] = true;
+ j++;
+ }
+ }
+
+ debug(j = 0);
+ for (i = n-1; i >= 0; i--) {
+ x = hits[i];
+ if (eliminate[i] == false) {
+ sorted = List_push(sorted,x);
+ } else {
+ Substring_free(&x);
+ debug(j++);
+ }
+ }
+ debug(printf("%d eliminated\n",j));
+
+ FREEA(hits);
+ FREEA(eliminate);
+
+ return sorted;
+}
+
+List_T
+Substring_sort_siteA_halves (List_T hitlist, bool ascendingp) {
+ List_T sorted = NULL;
+ T x, *hits;
+ int n, i, j;
+ bool *eliminate;
+
+ n = List_length(hitlist);
+ debug(printf("Checking %d spliceends for duplicates...",n));
+ if (n == 0) {
+ debug(printf("\n"));
+ return NULL;
+ }
+
+ hits = (T *) MALLOCA(n * sizeof(T));
+ List_fill_array_and_free((void **) hits,&hitlist);
+
+ if (ascendingp == true) {
+ qsort(hits,n,sizeof(T),ascending_siteA_pos_cmp);
+ } else {
+ qsort(hits,n,sizeof(T),descending_siteA_pos_cmp);
+ }
+
+ /* Check for duplicates */
+ eliminate = (bool *) CALLOCA(n,sizeof(bool));
+ for (i = 0; i < n; i++) {
+ x = hits[i];
+ j = i+1;
+ while (j < n && hits[j]->siteA_pos == x->siteA_pos && hits[j]->genomicstart == x->genomicstart) {
+ eliminate[j] = true;
+ j++;
+ }
+ }
+
+ debug(j = 0);
+ for (i = n-1; i >= 0; i--) {
+ x = hits[i];
+ if (eliminate[i] == false) {
+ sorted = List_push(sorted,x);
+ } else {
+ Substring_free(&x);
+ debug(j++);
+ }
+ }
+ debug(printf("%d eliminated\n",j));
+
+ FREEA(hits);
+ FREEA(eliminate);
+
+ return sorted;
+}
+
+List_T
+Substring_sort_siteN_halves (List_T hitlist, bool ascendingp) {
List_T sorted = NULL;
T x, *hits;
int n, i, j;
@@ -4745,9 +5195,9 @@ Substring_sort_chimera_halves (List_T hitlist, bool ascendingp) {
List_fill_array_and_free((void **) hits,&hitlist);
if (ascendingp == true) {
- qsort(hits,n,sizeof(T),ascending_pos_cmp);
+ qsort(hits,n,sizeof(T),ascending_siteN_pos_cmp);
} else {
- qsort(hits,n,sizeof(T),descending_pos_cmp);
+ qsort(hits,n,sizeof(T),descending_siteN_pos_cmp);
}
/* Check for duplicates */
@@ -4755,7 +5205,7 @@ Substring_sort_chimera_halves (List_T hitlist, bool ascendingp) {
for (i = 0; i < n; i++) {
x = hits[i];
j = i+1;
- while (j < n && hits[j]->chimera_pos == x->chimera_pos && hits[j]->genomicstart == x->genomicstart) {
+ while (j < n && hits[j]->siteN_pos == x->siteN_pos && hits[j]->genomicstart == x->genomicstart) {
eliminate[j] = true;
j++;
}
@@ -4946,7 +5396,7 @@ print_splicesite_labels (Filestring_T fp, T this, int typeint, int chimera_pos,
}
/* Note: this->chimera_knownp might not be set for GMAP alignments */
- if (this->chimera_knownp == true) {
+ if (this->siteD_knownp == true) {
/* Note: IIT_get_typed_signed_with_divno does not work here */
splicesites = IIT_get_exact_multiple_with_divno(&nsplicesites,splicesites_iit,
splicesites_divint_crosstable[this->chrnum],
@@ -5328,17 +5778,17 @@ Substring_print_alignment (Filestring_T fp, Junction_T pre_junction, T substring
/* Handle result of substring_trim_novel_spliceends */
if (invertp == false) {
if (substring->start_endtype == DON) {
- FPRINTF(fp,"donor:%.2f",substring->trim_left,substring->chimera_prob);
+ FPRINTF(fp,"donor:%.2f",substring->siteD_prob);
} else if (substring->start_endtype == ACC) {
- FPRINTF(fp,"acceptor:%.2f",substring->trim_left,substring->chimera_prob);
+ FPRINTF(fp,"acceptor:%.2f",substring->siteA_prob);
} else {
FPRINTF(fp,"start:%d",substring->trim_left);
}
} else {
if (substring->end_endtype == DON) {
- FPRINTF(fp,"donor:%.2f",substring->trim_right,substring->chimera_prob_2);
+ FPRINTF(fp,"donor:%.2f",substring->siteD_prob);
} else if (substring->end_endtype == ACC) {
- FPRINTF(fp,"acceptor:%.2f",substring->trim_right,substring->chimera_prob_2);
+ FPRINTF(fp,"acceptor:%.2f",substring->siteA_prob);
} else {
FPRINTF(fp,"start:%d",substring->trim_right);
}
@@ -5374,17 +5824,17 @@ Substring_print_alignment (Filestring_T fp, Junction_T pre_junction, T substring
/* Handle result of substring_trim_novel_spliceends */
if (invertp == false) {
if (substring->end_endtype == DON) {
- FPRINTF(fp,"donor:%.2f",substring->trim_right,substring->chimera_prob_2);
+ FPRINTF(fp,"donor:%.2f",substring->siteD_prob);
} else if (substring->end_endtype == ACC) {
- FPRINTF(fp,"acceptor:%.2f",substring->trim_right,substring->chimera_prob_2);
+ FPRINTF(fp,"acceptor:%.2f",substring->siteA_prob);
} else {
FPRINTF(fp,"end:%d",substring->trim_right);
}
} else {
if (substring->start_endtype == DON) {
- FPRINTF(fp,"donor:%.2f",substring->trim_left,substring->chimera_prob);
+ FPRINTF(fp,"donor:%.2f",substring->siteD_prob);
} else if (substring->start_endtype == ACC) {
- FPRINTF(fp,"acceptor:%.2f",substring->trim_left,substring->chimera_prob);
+ FPRINTF(fp,"acceptor:%.2f",substring->siteA_prob);
} else {
FPRINTF(fp,"end:%d",substring->trim_left);
}
@@ -5810,32 +6260,32 @@ Substring_print_donor (Filestring_T fp, T donor, int sensedir, bool invertp, Sho
FPRINTF(fp,"\t");
if (sensedir == SENSE_FORWARD) {
if (invertp == false) {
- FPRINTF(fp,"start:%d..donor:%.2f",donor->trim_left,donor->chimera_prob);
+ FPRINTF(fp,"start:%d..donor:%.2f",donor->trim_left,donor->siteD_prob);
label_tag = "label_2";
splice_dist_tag = "splice_dist_2";
} else {
- FPRINTF(fp,"donor:%.2f..end:%d",donor->chimera_prob,donor->trim_left);
+ FPRINTF(fp,"donor:%.2f..end:%d",donor->siteD_prob,donor->trim_left);
label_tag = "label_1";
splice_dist_tag = "splice_dist_1";
}
} else if (sensedir == SENSE_ANTI) {
if (invertp == false) {
- FPRINTF(fp,"donor:%.2f..end:%d",donor->chimera_prob,donor->trim_right);
+ FPRINTF(fp,"donor:%.2f..end:%d",donor->siteD_prob,donor->trim_right);
label_tag = "label_1";
splice_dist_tag = "splice_dist_1";
} else {
- FPRINTF(fp,"start:%d..donor:%.2f",donor->trim_right,donor->chimera_prob);
+ FPRINTF(fp,"start:%d..donor:%.2f",donor->trim_right,donor->siteD_prob);
label_tag = "label_2";
splice_dist_tag = "splice_dist_2";
}
} else {
/* SENSE_NULL */
if (invertp == false) {
- FPRINTF(fp,"start:%d..splice:%.2f",donor->trim_left,donor->chimera_prob);
+ FPRINTF(fp,"start:%d..splice:%.2f",donor->trim_left,donor->siteD_prob);
label_tag = "label_2";
splice_dist_tag = "splice_dist_2";
} else {
- FPRINTF(fp,"splice:%.2f..end:%d",donor->chimera_prob,donor->trim_left);
+ FPRINTF(fp,"splice:%.2f..end:%d",donor->siteD_prob,donor->trim_left);
label_tag = "label_1";
splice_dist_tag = "splice_dist_1";
}
@@ -5871,7 +6321,7 @@ Substring_print_donor (Filestring_T fp, T donor, int sensedir, bool invertp, Sho
}
#ifdef CHECK_KNOWNI
- if (donor->chimera_knownp == false && splicesites_iit) {
+ if (donor->siteD_knownp == false && splicesites_iit) {
if (donor->plusp == true) {
splicesitepos = donor->genomicstart - donor->chroffset + donor->chimera_pos;
} else {
@@ -5884,8 +6334,8 @@ Substring_print_donor (Filestring_T fp, T donor, int sensedir, bool invertp, Sho
}
#endif
- if (donor->chimera_knownp && splicesites_iit) {
- print_splicesite_labels(fp,donor,donor_typeint,donor->chimera_pos,label_tag);
+ if (donor->siteD_knownp && splicesites_iit) {
+ print_splicesite_labels(fp,donor,donor_typeint,donor->siteD_pos,label_tag);
}
if (allocp == true) {
@@ -5917,32 +6367,32 @@ Substring_print_acceptor (Filestring_T fp, T acceptor, int sensedir, bool invert
FPRINTF(fp,"\t");
if (sensedir == SENSE_FORWARD) {
if (invertp == false) {
- FPRINTF(fp,"acceptor:%.2f..end:%d",acceptor->chimera_prob,acceptor->trim_right);
+ FPRINTF(fp,"acceptor:%.2f..end:%d",acceptor->siteA_prob,acceptor->trim_right);
label_tag = "label_1";
splice_dist_tag = "splice_dist_1";
} else {
- FPRINTF(fp,"start:%d..acceptor:%.2f",acceptor->trim_right,acceptor->chimera_prob);
+ FPRINTF(fp,"start:%d..acceptor:%.2f",acceptor->trim_right,acceptor->siteA_prob);
label_tag = "label_2";
splice_dist_tag = "splice_dist_2";
}
} else if (sensedir == SENSE_ANTI) {
if (invertp == false) {
- FPRINTF(fp,"start:%d..acceptor:%.2f",acceptor->trim_left,acceptor->chimera_prob);
+ FPRINTF(fp,"start:%d..acceptor:%.2f",acceptor->trim_left,acceptor->siteA_prob);
label_tag = "label_2";
splice_dist_tag = "splice_dist_2";
} else {
- FPRINTF(fp,"acceptor:%.2f..end:%d",acceptor->chimera_prob,acceptor->trim_left);
+ FPRINTF(fp,"acceptor:%.2f..end:%d",acceptor->siteA_prob,acceptor->trim_left);
label_tag = "label_1";
splice_dist_tag = "splice_dist_1";
}
} else {
/* SENSE_NULL */
if (invertp == false) {
- FPRINTF(fp,"splice:%.2f..end:%d",acceptor->chimera_prob,acceptor->trim_right);
+ FPRINTF(fp,"splice:%.2f..end:%d",acceptor->siteA_prob,acceptor->trim_right);
label_tag = "label_1";
splice_dist_tag = "splice_dist_1";
} else {
- FPRINTF(fp,"start:%d..splice:%.2f",acceptor->trim_right,acceptor->chimera_prob);
+ FPRINTF(fp,"start:%d..splice:%.2f",acceptor->trim_right,acceptor->siteA_prob);
label_tag = "label_2";
splice_dist_tag = "splice_dist_2";
}
@@ -5980,9 +6430,9 @@ Substring_print_acceptor (Filestring_T fp, T acceptor, int sensedir, bool invert
#ifdef CHECK_KNOWNI
if (acceptor->chimera_knownp == false && splicesites_iit) {
if (acceptor->plusp == true) {
- splicesitepos = acceptor->genomicstart - acceptor->chroffset + acceptor->chimera_pos;
+ splicesitepos = acceptor->genomicstart - acceptor->chroffset + acceptor->siteA_pos;
} else {
- splicesitepos = acceptor->genomicstart - acceptor->chroffset - acceptor->chimera_pos;
+ splicesitepos = acceptor->genomicstart - acceptor->chroffset - acceptor->siteA_pos;
}
splicesites = IIT_get_exact_multiple_with_divno(&nsplicesites,splicesites_iit,
splicesites_divint_crosstable[acceptor->chrnum],
@@ -5992,8 +6442,8 @@ Substring_print_acceptor (Filestring_T fp, T acceptor, int sensedir, bool invert
#endif
- if (acceptor->chimera_knownp && splicesites_iit) {
- print_splicesite_labels(fp,acceptor,acceptor_typeint,acceptor->chimera_pos,label_tag);
+ if (acceptor->siteA_knownp && splicesites_iit) {
+ print_splicesite_labels(fp,acceptor,acceptor_typeint,acceptor->siteA_pos,label_tag);
}
@@ -6052,13 +6502,13 @@ Substring_print_shortexon (Filestring_T fp, T shortexon, int sensedir, bool inve
FPRINTF(fp,"\t");
if (sensedir == SENSE_FORWARD && invertp == false) {
- FPRINTF(fp,"acceptor:%.2f..donor:%.2f",shortexon->chimera_prob,shortexon->chimera_prob_2);
+ FPRINTF(fp,"acceptor:%.2f..donor:%.2f",shortexon->siteA_prob,shortexon->siteD_prob);
} else if (sensedir == SENSE_FORWARD && invertp == true) {
- FPRINTF(fp,"donor:%.2f..acceptor:%.2f",shortexon->chimera_prob_2,shortexon->chimera_prob);
+ FPRINTF(fp,"donor:%.2f..acceptor:%.2f",shortexon->siteD_prob,shortexon->siteA_prob);
} else if (sensedir == SENSE_ANTI && invertp == false) {
- FPRINTF(fp,"donor:%.2f..acceptor:%.2f",shortexon->chimera_prob_2,shortexon->chimera_prob);
+ FPRINTF(fp,"donor:%.2f..acceptor:%.2f",shortexon->siteD_prob,shortexon->siteA_prob);
} else if (sensedir == SENSE_ANTI && invertp == true) {
- FPRINTF(fp,"acceptor:%.2f..donor:%.2f",shortexon->chimera_prob,shortexon->chimera_prob_2);
+ FPRINTF(fp,"acceptor:%.2f..donor:%.2f",shortexon->siteA_prob,shortexon->siteD_prob);
}
FPRINTF(fp,",matches:%d,sub:%d",shortexon->nmatches,shortexon->nmismatches_bothdiff);
@@ -6073,26 +6523,26 @@ Substring_print_shortexon (Filestring_T fp, T shortexon, int sensedir, bool inve
FPRINTF(fp,",dir:sense");
print_shortexon_splice_distances(fp,distance1,distance2);
- if (shortexon->chimera_knownp && splicesites_iit) {
+ if (shortexon->siteA_knownp && splicesites_iit) {
print_splicesite_labels(fp,shortexon,acceptor_typeint,
- shortexon->chimera_pos,/*tag*/"label_1");
+ shortexon->siteA_pos,/*tag*/"label_1");
}
- if (shortexon->chimera_knownp_2 && splicesites_iit) {
+ if (shortexon->siteD_knownp && splicesites_iit) {
print_splicesite_labels(fp,shortexon,donor_typeint,
- shortexon->chimera_pos_2,/*tag*/"label_2");
+ shortexon->siteD_pos,/*tag*/"label_2");
}
} else if (sensedir == SENSE_FORWARD && invertp == true) {
FPRINTF(fp,",dir:antisense");
print_shortexon_splice_distances(fp,distance1,distance2);
- if (shortexon->chimera_knownp_2 && splicesites_iit) {
+ if (shortexon->siteD_knownp && splicesites_iit) {
print_splicesite_labels(fp,shortexon,donor_typeint,
- shortexon->chimera_pos_2,/*tag*/"label_1");
+ shortexon->siteD_pos,/*tag*/"label_1");
}
- if (shortexon->chimera_knownp && splicesites_iit) {
+ if (shortexon->siteA_knownp && splicesites_iit) {
print_splicesite_labels(fp,shortexon,acceptor_typeint,
- shortexon->chimera_pos,/*tag*/"label_2");
+ shortexon->siteA_pos,/*tag*/"label_2");
}
} else if (sensedir == SENSE_ANTI && invertp == false) {
@@ -6101,26 +6551,26 @@ Substring_print_shortexon (Filestring_T fp, T shortexon, int sensedir, bool inve
- if (shortexon->chimera_knownp_2 && splicesites_iit) {
+ if (shortexon->siteD_knownp && splicesites_iit) {
print_splicesite_labels(fp,shortexon,donor_typeint,
- shortexon->chimera_pos_2,/*tag*/"label_1");
+ shortexon->siteD_pos,/*tag*/"label_1");
}
- if (shortexon->chimera_knownp && splicesites_iit) {
+ if (shortexon->siteA_knownp && splicesites_iit) {
print_splicesite_labels(fp,shortexon,acceptor_typeint,
- shortexon->chimera_pos,/*tag*/"label_2");
+ shortexon->siteA_pos,/*tag*/"label_2");
}
} else if (sensedir == SENSE_ANTI && invertp == true) {
FPRINTF(fp,",dir:sense");
print_shortexon_splice_distances(fp,distance1,distance2);
- if (shortexon->chimera_knownp && splicesites_iit) {
+ if (shortexon->siteA_knownp && splicesites_iit) {
print_splicesite_labels(fp,shortexon,acceptor_typeint,
- shortexon->chimera_pos,/*tag*/"label_1");
+ shortexon->siteA_pos,/*tag*/"label_1");
}
- if (shortexon->chimera_knownp_2 && splicesites_iit) {
+ if (shortexon->siteD_knownp && splicesites_iit) {
print_splicesite_labels(fp,shortexon,donor_typeint,
- shortexon->chimera_pos_2,/*tag*/"label_2");
+ shortexon->siteD_pos,/*tag*/"label_2");
}
}
diff --git a/src/substring.h b/src/substring.h
index 56936c6..85356dd 100644
--- a/src/substring.h
+++ b/src/substring.h
@@ -1,4 +1,4 @@
-/* $Id: substring.h 195961 2016-08-08 16:36:34Z twu $ */
+/* $Id: substring.h 196273 2016-08-12 15:15:06Z twu $ */
#ifndef SUBSTRING_INCLUDED
#define SUBSTRING_INCLUDED
@@ -18,6 +18,7 @@
#include "junction.h"
#include "intlist.h"
#include "doublelist.h"
+#include "list.h"
#ifdef LARGE_GENOMES
#include "uint8list.h"
#else
@@ -33,6 +34,9 @@ typedef enum {NO_TRIM, PRE_TRIMMED, COMPUTE_TRIM} Trimaction_T;
extern char *
Endtype_string (Endtype_T endtype);
+extern char *
+Trimaction_string (Trimaction_T trimaction);
+
extern void
Substring_setup (bool print_nsnpdiffs_p_in, bool print_snplabels_p_in,
bool show_refdiff_p_in, IIT_T snps_iit_in, int *snps_divint_crosstable_in,
@@ -61,17 +65,30 @@ Substring_new (int nmismatches_whole, Chrnum_T chrnum, Univcoord_T chroffset,
int minlength, int sensedir);
extern T
-Substring_new_ambig (int querystart, int queryend, int splice_pos, int querylength,
- Chrnum_T chrnum, Univcoord_T chroffset,
- Univcoord_T chrhigh, Chrpos_T chrlength,
- bool plusp, int genestrand,
+Substring_new_ambig_D (int querystart, int queryend, int splice_pos, int querylength,
+ Chrnum_T chrnum, Univcoord_T chroffset,
+ Univcoord_T chrhigh, Chrpos_T chrlength,
+ bool plusp, int genestrand,
+#ifdef LARGE_GENOMES
+ Uint8list_T ambcoords,
+#else
+ Uintlist_T ambcoords,
+#endif
+ Intlist_T amb_knowni, Intlist_T amb_nmismatches, Doublelist_T amb_probs,
+ double amb_common_prob, bool substring1p);
+
+extern T
+Substring_new_ambig_A (int querystart, int queryend, int splice_pos, int querylength,
+ Chrnum_T chrnum, Univcoord_T chroffset,
+ Univcoord_T chrhigh, Chrpos_T chrlength,
+ bool plusp, int genestrand,
#ifdef LARGE_GENOMES
- Uint8list_T ambcoords,
+ Uint8list_T ambcoords,
#else
- Uintlist_T ambcoords,
+ Uintlist_T ambcoords,
#endif
- Intlist_T amb_knowni, Intlist_T amb_nmismatches, Doublelist_T amb_probs,
- double amb_common_prob, bool amb_donor_common_p, bool substring1p);
+ Intlist_T amb_knowni, Intlist_T amb_nmismatches, Doublelist_T amb_probs,
+ double amb_common_prob, bool substring1p);
extern Univcoord_T
Substring_set_unambiguous (double *donor_prob, double *acceptor_prob, Univcoord_T *genomicstart, Univcoord_T *genomicend,
@@ -107,15 +124,17 @@ Substring_overlap_segment_trimmed (T substring1, T substring2);
extern Univcoord_T
Substring_left (T this);
extern Univcoord_T
-Substring_splicecoord (T this);
-extern Chrpos_T
-Substring_chr_splicecoord (T this);
-extern int
-Substring_splicesites_knowni (T this);
-extern Univcoord_T
Substring_splicecoord_A (T this);
extern Univcoord_T
Substring_splicecoord_D (T this);
+extern Chrpos_T
+Substring_chr_splicecoord_D (T this);
+extern Chrpos_T
+Substring_chr_splicecoord_A (T this);
+extern int
+Substring_splicesitesD_knowni (T this);
+extern int
+Substring_splicesitesA_knowni (T this);
extern bool
Substring_plusp (T this);
@@ -226,25 +245,23 @@ extern double
Substring_amb_acceptor_prob (T this);
extern double
-Substring_siteA_prob (T this);
-extern double
Substring_siteD_prob (T this);
-
-extern double
-Substring_chimera_prob (T this);
extern double
-Substring_chimera_prob_2 (T this);
+Substring_siteA_prob (T this);
+
extern int
-Substring_chimera_pos (T this);
+Substring_siteD_pos (T this);
extern int
-Substring_chimera_pos_A (T this);
+Substring_siteA_pos (T this);
extern int
-Substring_chimera_pos_D (T this);
+Substring_siteN_pos (T this);
extern int
Substring_chimera_sensedir (T this);
extern bool
Substring_ambiguous_p (T this);
+extern bool
+Substring_list_ambiguous_p (List_T list);
extern int
Substring_nambcoords (T this);
extern Univcoord_T *
@@ -296,7 +313,11 @@ Substring_new_shortexon (Univcoord_T acceptor_coord, int acceptor_knowni, Univco
Chrnum_T chrnum, Univcoord_T chroffset, Univcoord_T chrhigh, Chrpos_T chrlength);
extern List_T
-Substring_sort_chimera_halves (List_T hitlist, bool ascendingp);
+Substring_sort_siteD_halves (List_T hitlist, bool ascendingp);
+extern List_T
+Substring_sort_siteA_halves (List_T hitlist, bool ascendingp);
+extern List_T
+Substring_sort_siteN_halves (List_T hitlist, bool ascendingp);
extern Chrpos_T
@@ -349,11 +370,6 @@ extern List_T
Substring_add_intron (List_T pairs, T substringA, T substringB, int querylength,
int hardclip_low, int hardclip_high, int queryseq_offset);
-extern void
-Substring_trim_novel_spliceends (T substring1, T substringN, int *ambig_end_length_5, int *ambig_end_length_3,
- Splicetype_T *ambig_splicetype_5, Splicetype_T *ambig_splicetype_3,
- double *ambig_prob_5, double *ambig_prob_3, int *sensedir);
-
#undef T
#endif
diff --git a/src/uniqscan.c b/src/uniqscan.c
index 67e63ef..fc13f9a 100644
--- a/src/uniqscan.c
+++ b/src/uniqscan.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: uniqscan.c 193877 2016-07-12 02:46:33Z twu $";
+static char rcsid[] = "$Id: uniqscan.c 196438 2016-08-16 20:23:27Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -59,6 +59,7 @@ static char rcsid[] = "$Id: uniqscan.c 193877 2016-07-12 02:46:33Z twu $";
#include "getopt.h"
+#define MAX_FLOORS_READLENGTH 300
#define MAX_QUERYLENGTH_FOR_ALLOC 100000
#define MAX_GENOMICLENGTH_FOR_ALLOC 1000000
@@ -393,7 +394,7 @@ print_program_version () {
fprintf(stdout,"Sizes: off_t (%d), size_t (%d), unsigned int (%d), long int (%d), long long int (%d)\n",
(int) sizeof(off_t),(int) sizeof(size_t),(int) sizeof(unsigned int),(int) sizeof(long int),(int) sizeof(long long int));
fprintf(stdout,"Default gmap directory: %s\n",GMAPDB);
- fprintf(stdout,"Maximum read length: %d\n",MAX_READLENGTH);
+ fprintf(stdout,"Maximum stack read length: %d\n",MAX_STACK_READLENGTH);
fprintf(stdout,"Thomas D. Wu, Genentech, Inc.\n");
fprintf(stdout,"Contact: twu at gene.com\n");
fprintf(stdout,"\n");
@@ -448,7 +449,7 @@ uniqueness_scan (bool from_right_p) {
diagpool = Diagpool_new();
cellpool = Cellpool_new();
- floors_array = (Floors_T *) CALLOC(MAX_READLENGTH+1,sizeof(Floors_T));
+ floors_array = (Floors_T *) CALLOC(MAX_FLOORS_READLENGTH+1,sizeof(Floors_T));
/* Except_stack_create(); -- requires pthreads */
for (i = 0; i < 10; i++) {
@@ -553,7 +554,7 @@ uniqueness_scan (bool from_right_p) {
}
- for (i = 0; i <= MAX_READLENGTH; i++) {
+ for (i = 0; i <= MAX_FLOORS_READLENGTH; i++) {
if (floors_array[i] != NULL) {
Floors_free_keep(&(floors_array[i]));
}
@@ -1302,7 +1303,8 @@ main (int argc, char *argv[]) {
nullgap,maxpeelback,maxpeelback_distalmedial,
extramaterial_end,extramaterial_paired,gmap_mode,
trigger_score_for_gmap,gmap_allowance,max_gmap_pairsearch,
- max_gmap_terminal,max_gmap_improvement,antistranded_penalty);
+ max_gmap_terminal,max_gmap_improvement,antistranded_penalty,
+ MAX_FLOORS_READLENGTH);
Substring_setup(/*print_nsnpdiffs_p*/false,/*print_snplabels_p*/false,
/*show_refdiff_p*/false,snps_iit,snps_divint_crosstable,
genes_iit,genes_divint_crosstable,
@@ -1322,7 +1324,8 @@ main (int argc, char *argv[]) {
Pair_setup(trim_mismatch_score,trim_indel_score,/*gff3_separators_p*/false,/*sam_insert_0M_p*/false,
/*force_xs_direction_p*/false,/*md_lowercase_variant_p*/false,
/*snps_p*/snps_iit ? true : false,/*print_nsnpdiffs_p*/snps_iit ? true : false,
- Univ_IIT_genomelength(chromosome_iit,/*with_circular_alias*/false));
+ Univ_IIT_genomelength(chromosome_iit,/*with_circular_alias*/false),
+ /*gff3_phase_swap_p*/false);
Stage3_setup(/*splicingp*/novelsplicingp == true || knownsplicingp == true,novelsplicingp,
/*require_splicedir_p*/false,splicing_iit,splicing_divint_crosstable,
donor_typeint,acceptor_typeint,splicesites,altlocp,alias_starts,alias_ends,
diff --git a/src/univdiag.h b/src/univdiag.h
index b10fab2..ff46d37 100644
--- a/src/univdiag.h
+++ b/src/univdiag.h
@@ -1,4 +1,4 @@
-/* $Id: univdiag.h 195760 2016-08-04 00:12:04Z twu $ */
+/* $Id: univdiag.h 196273 2016-08-12 15:15:06Z twu $ */
#ifndef UNIVDIAG_INCLUDED
#define UNIVDIAG_INCLUDED
diff --git a/tests/Makefile.in b/tests/Makefile.in
index d66cdd2..35e66ea 100644
--- a/tests/Makefile.in
+++ b/tests/Makefile.in
@@ -390,9 +390,10 @@ LIPO = @LIPO@
LN_S = @LN_S@
LTLIBOBJS = @LTLIBOBJS@
LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@
+MAINT = @MAINT@
MAKEINFO = @MAKEINFO@
MANIFEST_TOOL = @MANIFEST_TOOL@
-MAX_READLENGTH = @MAX_READLENGTH@
+MAX_STACK_READLENGTH = @MAX_STACK_READLENGTH@
MKDIR_P = @MKDIR_P@
MPICC = @MPICC@
MPILIBS = @MPILIBS@
@@ -507,7 +508,7 @@ all: all-am
.SUFFIXES:
.SUFFIXES: .log .test .test$(EXEEXT) .trs
-$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps)
+$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps)
@for dep in $?; do \
case '$(am__configure_deps)' in \
*$$dep*) \
@@ -531,9 +532,9 @@ Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
-$(top_srcdir)/configure: $(am__configure_deps)
+$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps)
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
-$(ACLOCAL_M4): $(am__aclocal_m4_deps)
+$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps)
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
$(am__aclocal_m4_deps):
align.test: $(top_builddir)/config.status $(srcdir)/align.test.in
diff --git a/util/Makefile.in b/util/Makefile.in
index baf4515..fbee2bd 100644
--- a/util/Makefile.in
+++ b/util/Makefile.in
@@ -231,9 +231,10 @@ LIPO = @LIPO@
LN_S = @LN_S@
LTLIBOBJS = @LTLIBOBJS@
LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@
+MAINT = @MAINT@
MAKEINFO = @MAKEINFO@
MANIFEST_TOOL = @MANIFEST_TOOL@
-MAX_READLENGTH = @MAX_READLENGTH@
+MAX_STACK_READLENGTH = @MAX_STACK_READLENGTH@
MKDIR_P = @MKDIR_P@
MPICC = @MPICC@
MPILIBS = @MPILIBS@
@@ -357,7 +358,7 @@ top_srcdir = @top_srcdir@
all: all-am
.SUFFIXES:
-$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps)
+$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps)
@for dep in $?; do \
case '$(am__configure_deps)' in \
*$$dep*) \
@@ -381,9 +382,9 @@ Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
-$(top_srcdir)/configure: $(am__configure_deps)
+$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps)
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
-$(ACLOCAL_M4): $(am__aclocal_m4_deps)
+$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps)
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
$(am__aclocal_m4_deps):
gmap_compress.pl: $(top_builddir)/config.status $(srcdir)/gmap_compress.pl.in
diff --git a/util/gtf_genes.pl.in b/util/gtf_genes.pl.in
index 3ec4f0b..d5eb745 100644
--- a/util/gtf_genes.pl.in
+++ b/util/gtf_genes.pl.in
@@ -38,7 +38,7 @@ while (defined($line = <>)) {
}
@exons = ();
$sortp = 0;
- $gene_name = get_info(\@info,"gene_name","gene_id");
+ $gene_name = cat_info(\@info,"gene_id","gene_name");
$last_transcript_id = $transcript_id;
$chr = $fields[0];
$strand = $fields[6];
@@ -107,6 +107,28 @@ sub get_info {
return "NA";
}
+sub cat_info {
+ my $info = shift @_;
+ my @desired_keys = @_;
+ my @result = ();
+
+ foreach $desired_key (@desired_keys) {
+ foreach $item (@ {$info}) {
+ ($key,$value) = $item =~ /(\S+) (.+)/;
+ if ($key eq $desired_key) {
+ push @result,$value;
+ }
+ }
+ }
+
+ if ($#result < 0) {
+ print STDERR "Cannot find " . join(" or ", at desired_keys) . " in " . join("; ",@ {$info}) . "\n";
+ return "NA";
+ } else {
+ return join(" ", at result);
+ }
+}
+
sub get_info_optional {
my $info = shift @_;
diff --git a/util/gtf_introns.pl.in b/util/gtf_introns.pl.in
index 232dc40..546bba5 100755
--- a/util/gtf_introns.pl.in
+++ b/util/gtf_introns.pl.in
@@ -108,7 +108,7 @@ while (defined($line = get_line())) {
}
@exons = ();
$sortp = 0;
- $gene_name = get_info(\@info,"gene_name","gene_id");
+ $gene_name = get_info(\@info,"gene_id","gene_name");
$last_transcript_id = $transcript_id;
$chr = $fields[0];
$strand = $fields[6];
diff --git a/util/gtf_splicesites.pl.in b/util/gtf_splicesites.pl.in
index d296a9c..ea8e82a 100755
--- a/util/gtf_splicesites.pl.in
+++ b/util/gtf_splicesites.pl.in
@@ -108,7 +108,7 @@ while (defined($line = get_line())) {
}
@exons = ();
$sortp = 0;
- $gene_name = get_info(\@info,"gene_name","gene_id");
+ $gene_name = get_info(\@info,"gene_id","gene_name");
$last_transcript_id = $transcript_id;
$chr = $fields[0];
$strand = $fields[6];
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/gmap.git
More information about the debian-med-commit
mailing list