[med-svn] [gmap] 01/03: New upstream version 2016-09-14
Alex Mestiashvili
malex-guest at moszumanska.debian.org
Mon Sep 19 14:41:17 UTC 2016
This is an automated email from the git hooks/post-receive script.
malex-guest pushed a commit to branch master
in repository gmap.
commit 5ad053663467892f2b7218f0067645a372ece020
Author: Alexandre Mestiashvili <alex at biotec.tu-dresden.de>
Date: Mon Sep 19 16:06:30 2016 +0200
New upstream version 2016-09-14
---
ChangeLog | 90 +++++
VERSION | 2 +-
configure | 24 +-
src/bitpack64-incr.c | 6 +-
src/dynprog_genome.c | 225 ++++++-------
src/gmap.c | 34 +-
src/gsnap.c | 32 +-
src/indel.c | 8 +-
src/indexdb.c | 14 +-
src/indexdb.h | 4 +-
src/sarray-read.c | 266 +++++++++------
src/splice.c | 805 +++++++++++++++++++++++++++++++++++++++++++-
src/splice.h | 12 +-
src/stage1hr.c | 932 +++++++++++++++++++++++++++------------------------
src/stage3.c | 304 ++++++++++++++++-
src/stage3hr.c | 408 +++++++++++++++-------
src/stage3hr.h | 8 +-
src/substring.c | 925 ++++++++++----------------------------------------
src/substring.h | 14 +-
src/uniqscan.c | 30 +-
20 files changed, 2508 insertions(+), 1635 deletions(-)
diff --git a/ChangeLog b/ChangeLog
index da5ae31..07fca4c 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,93 @@
+2016-09-14 twu
+
+ * stage3hr.h: Removed obsolete functions
+
+ * stage3hr.c: Fixed cases where trim was added to amb_length. Removed
+ specific amb_length fields for GMAP alignments, and calculating instead
+ using trim_left_splicep and trim_right_splicep
+
+ * stage1hr.c: Modified debugging statements
+
+ * substring.h: Removed an include statement
+
+ * substring.c: Removed an include statement
+
+ * splice.c, splice.h: Moved splice site probability calculations from
+ substring_trim_novel_spliceends to here
+
+ * stage3.c: Fixed gmap_trim_novel_spliceends to initialize mismatchp to be
+ true if the alignment does not extend to the end
+
+2016-09-13 twu
+
+ * dynprog_genome.c: Turned off debugging
+
+ * substring.c: Added comments
+
+ * dynprog_genome.c: Fixed bug in decision-making for using bestscore when it
+ has a good probability. Previously, this switched to the
+ probability-based algorithm. Renamed variables to clarify the algorithms.
+
+2016-09-12 twu
+
+ * stage3hr.c: For overlap calculation, using just trim, not trim plus amb
+ length
+
+2016-09-10 twu
+
+ * stage3hr.h: Defining Stage3end_nmatches
+
+ * stage3hr.c: Defining nmatches to be nmatches_posttrim plus amb length.
+ Requiring minimum number of matches to allow a transloc splice. Favoring
+ definite ambig results, plus insertlength, over definite splices or
+ trimmed ambig, and then favoring definite splices over trimmed ambig.
+
+ * stage1hr.c: Using Stage3end_nmatches instead of
+ Stage3end_nmatches_posttrim to decide whether to run GMAP
+
+ * substring.h: Defining procedures for returning nmatches and amb lengths
+
+ * substring.c: Defining nmatches to be nmatches_posttrim plus ambiguous
+ length. Computing MAPQ over trimmed region to be consistent with
+ pair-based method. For new donor and acceptor substrings, extending the
+ trim calculation to 0 or querylength.
+
+2016-09-08 twu
+
+ * stage1hr.c: Checking whether result of Stage3end_new_splice is NULL
+
+ * stage3hr.c: Using number of matches and nmatches_posttrim in
+ hit_goodness_cmp and hitpair_goodness_cmp. Requiring a minimum number of
+ matches in donor and acceptor before creating a transloc splice. Added
+ code for checking suffix array mismatches.
+
+ * sarray-read.c: After finding an insertion, modifying querystart of current
+ diagonal, so next substring operation starts from that position
+
+ * indel.c: Improved debugging statements
+
+ * bitpack64-incr.c: Fixed errors in code for transferring from bitpack sizes
+ 22 to 24, and from 26 to 28
+
+2016-09-03 twu
+
+ * gmap.c, gsnap.c, uniqscan.c: Using new interface to Indexdb_new_genome
+
+ * splice.c: When splice is not found, return -1 as values for nmismatches
+
+ * sarray-read.c: Allowing initial value of nmismatches to be used if it is
+ 0. Fixed case involving ambiguous substrings.
+
+ * sarray-read.c: Setting nmismatches correctly in various cases, so we do
+ not have to recompute them. Looking at endpoints to determine if the
+ nmismatches value is correct.
+
+2016-09-01 twu
+
+ * indexdb.c, indexdb.h: For the option --unload-shared-memory, use
+ allocation and not memory mapping to make sure we deallocate any shared
+ memory
+
2016-08-24 twu
* genome.c: Not accessing beyond end of blocks when enddiscard is 0
diff --git a/VERSION b/VERSION
index f41d1f1..f7edb0e 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-2016-08-24
\ No newline at end of file
+2016-09-14
\ No newline at end of file
diff --git a/configure b/configure
index 304c0bc..d08e66d 100755
--- a/configure
+++ b/configure
@@ -1,6 +1,6 @@
#! /bin/sh
# Guess values for system-dependent variables and create Makefiles.
-# Generated by GNU Autoconf 2.69 for gmap 2016-08-24.
+# Generated by GNU Autoconf 2.69 for gmap 2016-09-14.
#
# Report bugs to <Thomas Wu <twu at gene.com>>.
#
@@ -590,8 +590,8 @@ MAKEFLAGS=
# Identity of this package.
PACKAGE_NAME='gmap'
PACKAGE_TARNAME='gmap'
-PACKAGE_VERSION='2016-08-24'
-PACKAGE_STRING='gmap 2016-08-24'
+PACKAGE_VERSION='2016-09-14'
+PACKAGE_STRING='gmap 2016-09-14'
PACKAGE_BUGREPORT='Thomas Wu <twu at gene.com>'
PACKAGE_URL=''
@@ -1372,7 +1372,7 @@ if test "$ac_init_help" = "long"; then
# Omit some internal or obsolete options to make the list less imposing.
# This message is too long to be a string in the A/UX 3.1 sh.
cat <<_ACEOF
-\`configure' configures gmap 2016-08-24 to adapt to many kinds of systems.
+\`configure' configures gmap 2016-09-14 to adapt to many kinds of systems.
Usage: $0 [OPTION]... [VAR=VALUE]...
@@ -1443,7 +1443,7 @@ fi
if test -n "$ac_init_help"; then
case $ac_init_help in
- short | recursive ) echo "Configuration of gmap 2016-08-24:";;
+ short | recursive ) echo "Configuration of gmap 2016-09-14:";;
esac
cat <<\_ACEOF
@@ -1582,7 +1582,7 @@ fi
test -n "$ac_init_help" && exit $ac_status
if $ac_init_version; then
cat <<\_ACEOF
-gmap configure 2016-08-24
+gmap configure 2016-09-14
generated by GNU Autoconf 2.69
Copyright (C) 2012 Free Software Foundation, Inc.
@@ -2188,7 +2188,7 @@ cat >config.log <<_ACEOF
This file contains any messages produced by compilers while
running configure, to aid debugging if configure makes a mistake.
-It was created by gmap $as_me 2016-08-24, which was
+It was created by gmap $as_me 2016-09-14, which was
generated by GNU Autoconf 2.69. Invocation command line was
$ $0 $@
@@ -2538,8 +2538,8 @@ ac_compiler_gnu=$ac_cv_c_compiler_gnu
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking package version" >&5
$as_echo_n "checking package version... " >&6; }
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: 2016-08-24" >&5
-$as_echo "2016-08-24" >&6; }
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: 2016-09-14" >&5
+$as_echo "2016-09-14" >&6; }
### Read defaults
@@ -4404,7 +4404,7 @@ fi
# Define the identity of the package.
PACKAGE='gmap'
- VERSION='2016-08-24'
+ VERSION='2016-09-14'
cat >>confdefs.h <<_ACEOF
@@ -20109,7 +20109,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
# report actual input values of CONFIG_FILES etc. instead of their
# values after options handling.
ac_log="
-This file was extended by gmap $as_me 2016-08-24, which was
+This file was extended by gmap $as_me 2016-09-14, which was
generated by GNU Autoconf 2.69. Invocation command line was
CONFIG_FILES = $CONFIG_FILES
@@ -20175,7 +20175,7 @@ _ACEOF
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
ac_cs_version="\\
-gmap config.status 2016-08-24
+gmap config.status 2016-09-14
configured by $0, generated by GNU Autoconf 2.69,
with options \\"\$ac_cs_config\\"
diff --git a/src/bitpack64-incr.c b/src/bitpack64-incr.c
index 1146aee..ab5276e 100644
--- a/src/bitpack64-incr.c
+++ b/src/bitpack64-incr.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: bitpack64-incr.c 180341 2015-12-07 18:29:40Z twu $";
+static char rcsid[] = "$Id: bitpack64-incr.c 197549 2016-09-08 01:14:55Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -9531,7 +9531,7 @@ transfer_22_24 (UINT4 *out, const UINT4 *in) {
value = ( CONVERT(*in) >> 18 ) % (1U << 22 ) ;
in += WORD_INCR;
value |= (CONVERT(*in) % (1U<< 8 ))<<( 22 - 8 );
- *out |= (value << 16);
+ *out |= (value << 8); /* was 16 */
/* 12 */
value = ( CONVERT(*in) >> 8 ) % (1U << 22 ) ;
@@ -9713,7 +9713,7 @@ transfer_26_28 (UINT4 *out, const UINT4 *in) {
value |= (CONVERT(*in) % (1U<< 2 ))<<( 26 - 2 );
*out |= (value << 16);
out += WORD_INCR;
- *out |= (value >> (28 - 8));
+ *out |= (value >> (28 - 12)); /* was (28 - 8) */
/* 05 */
value = ( CONVERT(*in) >> 2 ) % (1U << 26 ) ;
diff --git a/src/dynprog_genome.c b/src/dynprog_genome.c
index 236b633..b385c33 100644
--- a/src/dynprog_genome.c
+++ b/src/dynprog_genome.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: dynprog_genome.c 188752 2016-05-01 17:28:22Z twu $";
+static char rcsid[] = "$Id: dynprog_genome.c 197738 2016-09-13 17:53:52Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -816,8 +816,8 @@ bridge_intron_gap_8_site_level (int *bestrL, int *bestrR, int *bestcL, int *best
int cloR, chighR;
int introntype;
int bestscore = NEG_INFINITY_8, score, scoreL, scoreR, scoreI;
- int bestscore_with_prob = NEG_INFINITY_8;
- double *left_probabilities, *right_probabilities, probL, probR, probL_trunc, probR_trunc, bestprob, bestprob_trunc;
+ int bestscore_with_suffprob = NEG_INFINITY_8;
+ double *left_probabilities, *right_probabilities, probL, probR, probL_trunc, probR_trunc, bestprob_with_score, bestprob_trunc;
Univcoord_T splicesitepos;
char left1, left2, right2, right1, left1_alt, left2_alt, right2_alt, right1_alt;
int *leftdi, *rightdi;
@@ -972,7 +972,7 @@ bridge_intron_gap_8_site_level (int *bestrL, int *bestrR, int *bestcL, int *best
/* Search using probs and without simultaneously */
bestscore = NEG_INFINITY_8;
- bestprob = bestprob_trunc = 0.0;
+ bestprob_with_score = bestprob_trunc = 0.0;
for (rL = 1, rR = rlength-1; rL < rlength; rL++, rR--) {
debug3(printf("\nAt row %d on left and %d on right\n",rL,rR));
if ((cloL = rL - lbandL) < 1) {
@@ -1028,8 +1028,8 @@ bridge_intron_gap_8_site_level (int *bestrL, int *bestrR, int *bestcL, int *best
*bestrR = rR;
*bestcL = cL;
*bestcR = cR;
- bestprob = probL + probR;
- } else if (score == bestscore && probL + probR > bestprob) {
+ bestprob_with_score = probL + probR;
+ } else if (score == bestscore && probL + probR > bestprob_with_score) {
debug3(printf("Improved prob: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
debug3(printf("probL %f, probR %f\n",left_probabilities[cL],right_probabilities[cR]));
@@ -1037,7 +1037,7 @@ bridge_intron_gap_8_site_level (int *bestrL, int *bestrR, int *bestcL, int *best
*bestrR = rR;
*bestcL = cL;
*bestcR = cR;
- bestprob = probL + probR;
+ bestprob_with_score = probL + probR;
} else {
debug3a(printf("Not best score: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
@@ -1051,14 +1051,14 @@ bridge_intron_gap_8_site_level (int *bestrL, int *bestrR, int *bestcL, int *best
debug3(printf("At %d left to %d right, prob is %f + %f = %f\n",
cL,cR,probL_trunc,probR_trunc,probL_trunc+probR_trunc));
- if (scoreL + scoreI + scoreR > bestscore_with_prob) {
- debug3(printf(" (bestscore %d)\n",scoreL+scoreR));
+ if (scoreL + scoreI + scoreR > bestscore_with_suffprob) {
+ debug3(printf(" (bestscore_with_sufficient_prob %d)\n",scoreL+scoreR));
bestprob_trunc = probL_trunc + probR_trunc;
bestcL_with_prob = cL;
bestcR_with_prob = cR;
bestrL_with_prob = rL;
bestrR_with_prob = rR;
- bestscore_with_prob = scoreL + scoreI + scoreR;
+ bestscore_with_suffprob = scoreL + scoreI + scoreR;
}
} else {
@@ -1066,13 +1066,13 @@ bridge_intron_gap_8_site_level (int *bestrL, int *bestrR, int *bestcL, int *best
debug3(printf("At %d left to %d right, prob is %f + %f = %f\n",
cL,cR,probL_trunc,probR_trunc,probL_trunc+probR_trunc));
- debug3(printf(" (bestscore %d)\n",scoreL+scoreR));
+ debug3(printf(" (bestscore_with_sufficient_prob %d)\n",scoreL+scoreR));
bestprob_trunc = probL_trunc + probR_trunc;
bestcL_with_prob = cL;
bestcR_with_prob = cR;
bestrL_with_prob = rL;
bestrR_with_prob = rR;
- bestscore_with_prob = scoreL + scoreI + scoreR;
+ bestscore_with_suffprob = scoreL + scoreI + scoreR;
}
@@ -1124,8 +1124,8 @@ bridge_intron_gap_8_site_level (int *bestrL, int *bestrR, int *bestcL, int *best
*bestrR = rR;
*bestcL = cL;
*bestcR = cR;
- bestprob = probL + probR;
- } else if (score == bestscore && probL + probR > bestprob) {
+ bestprob_with_score = probL + probR;
+ } else if (score == bestscore && probL + probR > bestprob_with_score) {
debug3(printf("Improved prob: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
debug3(printf("probL %f, probR %f\n",left_probabilities[cL],right_probabilities[cR]));
@@ -1133,7 +1133,7 @@ bridge_intron_gap_8_site_level (int *bestrL, int *bestrR, int *bestcL, int *best
*bestrR = rR;
*bestcL = cL;
*bestcR = cR;
- bestprob = probL + probR;
+ bestprob_with_score = probL + probR;
} else {
debug3a(printf("Not best score: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
@@ -1147,14 +1147,14 @@ bridge_intron_gap_8_site_level (int *bestrL, int *bestrR, int *bestcL, int *best
debug3(printf("At %d left to %d right, prob is %f + %f = %f\n",
cL,cR,probL_trunc,probR_trunc,probL_trunc+probR_trunc));
- if (scoreL + scoreI + scoreR > bestscore_with_prob) {
+ if (scoreL + scoreI + scoreR > bestscore_with_suffprob) {
debug3(printf(" (bestscore %d)\n",scoreL+scoreR));
bestprob_trunc = probL_trunc + probR_trunc;
bestcL_with_prob = cL;
bestcR_with_prob = cR;
bestrL_with_prob = rL;
bestrR_with_prob = rR;
- bestscore_with_prob = scoreL + scoreI + scoreR;
+ bestscore_with_suffprob = scoreL + scoreI + scoreR;
}
} else {
@@ -1168,7 +1168,7 @@ bridge_intron_gap_8_site_level (int *bestrL, int *bestrR, int *bestcL, int *best
bestcR_with_prob = cR;
bestrL_with_prob = rL;
bestrR_with_prob = rR;
- bestscore_with_prob = scoreL + scoreI + scoreR;
+ bestscore_with_suffprob = scoreL + scoreI + scoreR;
}
}
@@ -1203,8 +1203,8 @@ bridge_intron_gap_8_site_level (int *bestrL, int *bestrR, int *bestcL, int *best
*bestrR = rR;
*bestcL = cL;
*bestcR = cR;
- bestprob = probL + probR;
- } else if (score == bestscore && probL + probR > bestprob) {
+ bestprob_with_score = probL + probR;
+ } else if (score == bestscore && probL + probR > bestprob_with_score) {
debug3(printf("Improved prob: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
debug3(printf("probL %f, probR %f\n",left_probabilities[cL],right_probabilities[cR]));
@@ -1212,7 +1212,7 @@ bridge_intron_gap_8_site_level (int *bestrL, int *bestrR, int *bestcL, int *best
*bestrR = rR;
*bestcL = cL;
*bestcR = cR;
- bestprob = probL + probR;
+ bestprob_with_score = probL + probR;
} else {
debug3a(printf("Not best score: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
@@ -1226,14 +1226,14 @@ bridge_intron_gap_8_site_level (int *bestrL, int *bestrR, int *bestcL, int *best
debug3(printf("At %d left to %d right, prob is %f + %f = %f\n",
cL,cR,probL_trunc,probR_trunc,probL_trunc+probR_trunc));
- if (scoreL + scoreI + scoreR > bestscore_with_prob) {
- debug3(printf(" (bestscore %d)\n",scoreL+scoreR));
+ if (scoreL + scoreI + scoreR > bestscore_with_suffprob) {
+ debug3(printf(" (bestscore_with_sufficient_prob %d)\n",scoreL+scoreR));
bestprob_trunc = probL_trunc + probR_trunc;
bestcL_with_prob = cL;
bestcR_with_prob = cR;
bestrL_with_prob = rL;
bestrR_with_prob = rR;
- bestscore_with_prob = scoreL + scoreI + scoreR;
+ bestscore_with_suffprob = scoreL + scoreI + scoreR;
}
} else {
@@ -1241,13 +1241,13 @@ bridge_intron_gap_8_site_level (int *bestrL, int *bestrR, int *bestcL, int *best
debug3(printf("At %d left to %d right, prob is %f + %f = %f\n",
cL,cR,probL_trunc,probR_trunc,probL_trunc+probR_trunc));
- debug3(printf(" (bestscore %d)\n",scoreL+scoreR));
+ debug3(printf(" (bestscore_with_sufficient_prob %d)\n",scoreL+scoreR));
bestprob_trunc = probL_trunc + probR_trunc;
bestcL_with_prob = cL;
bestcR_with_prob = cR;
bestrL_with_prob = rL;
bestrR_with_prob = rR;
- bestscore_with_prob = scoreL + scoreI + scoreR;
+ bestscore_with_suffprob = scoreL + scoreI + scoreR;
}
}
@@ -1299,8 +1299,8 @@ bridge_intron_gap_8_site_level (int *bestrL, int *bestrR, int *bestcL, int *best
*bestrR = rR;
*bestcL = cL;
*bestcR = cR;
- bestprob = probL + probR;
- } else if (score == bestscore && probL + probR > bestprob) {
+ bestprob_with_score = probL + probR;
+ } else if (score == bestscore && probL + probR > bestprob_with_score) {
debug3(printf("Improved prob: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
debug3(printf("probL %f, probR %f\n",left_probabilities[cL],right_probabilities[cR]));
@@ -1308,7 +1308,7 @@ bridge_intron_gap_8_site_level (int *bestrL, int *bestrR, int *bestcL, int *best
*bestrR = rR;
*bestcL = cL;
*bestcR = cR;
- bestprob = probL + probR;
+ bestprob_with_score = probL + probR;
} else {
debug3a(printf("Not best score: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
@@ -1322,14 +1322,14 @@ bridge_intron_gap_8_site_level (int *bestrL, int *bestrR, int *bestcL, int *best
debug3(printf("At %d left to %d right, prob is %f + %f = %f\n",
cL,cR,probL_trunc,probR_trunc,probL_trunc+probR_trunc));
- if (scoreL + scoreI + scoreR > bestscore_with_prob) {
+ if (scoreL + scoreI + scoreR > bestscore_with_suffprob) {
debug3(printf(" (bestscore %d)\n",scoreL+scoreR));
bestprob_trunc = probL_trunc + probR_trunc;
bestcL_with_prob = cL;
bestcR_with_prob = cR;
bestrL_with_prob = rL;
bestrR_with_prob = rR;
- bestscore_with_prob = scoreL + scoreI + scoreR;
+ bestscore_with_suffprob = scoreL + scoreI + scoreR;
}
} else {
@@ -1343,7 +1343,7 @@ bridge_intron_gap_8_site_level (int *bestrL, int *bestrR, int *bestcL, int *best
bestcR_with_prob = cR;
bestrL_with_prob = rL;
bestrR_with_prob = rR;
- bestscore_with_prob = scoreL + scoreI + scoreR;
+ bestscore_with_suffprob = scoreL + scoreI + scoreR;
}
}
@@ -1378,8 +1378,8 @@ bridge_intron_gap_8_site_level (int *bestrL, int *bestrR, int *bestcL, int *best
*bestrR = rR;
*bestcL = cL;
*bestcR = cR;
- bestprob = probL + probR;
- } else if (score == bestscore && probL + probR > bestprob) {
+ bestprob_with_score = probL + probR;
+ } else if (score == bestscore && probL + probR > bestprob_with_score) {
debug3(printf("Improved prob: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
debug3(printf("probL %f, probR %f\n",left_probabilities[cL],right_probabilities[cR]));
@@ -1387,7 +1387,7 @@ bridge_intron_gap_8_site_level (int *bestrL, int *bestrR, int *bestcL, int *best
*bestrR = rR;
*bestcL = cL;
*bestcR = cR;
- bestprob = probL + probR;
+ bestprob_with_score = probL + probR;
} else {
debug3a(printf("Not best score: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
@@ -1401,14 +1401,14 @@ bridge_intron_gap_8_site_level (int *bestrL, int *bestrR, int *bestcL, int *best
debug3(printf("At %d left to %d right, prob is %f + %f = %f\n",
cL,cR,probL_trunc,probR_trunc,probL_trunc+probR_trunc));
- if (scoreL + scoreI + scoreR > bestscore_with_prob) {
+ if (scoreL + scoreI + scoreR > bestscore_with_suffprob) {
debug3(printf(" (bestscore %d)\n",scoreL+scoreR));
bestprob_trunc = probL_trunc + probR_trunc;
bestcL_with_prob = cL;
bestcR_with_prob = cR;
bestrL_with_prob = rL;
bestrR_with_prob = rR;
- bestscore_with_prob = scoreL + scoreI + scoreR;
+ bestscore_with_suffprob = scoreL + scoreI + scoreR;
}
} else {
@@ -1422,22 +1422,22 @@ bridge_intron_gap_8_site_level (int *bestrL, int *bestrR, int *bestcL, int *best
bestcR_with_prob = cR;
bestrL_with_prob = rL;
bestrR_with_prob = rR;
- bestscore_with_prob = scoreL + scoreI + scoreR;
+ bestscore_with_suffprob = scoreL + scoreI + scoreR;
}
}
}
- if (bestprob > 2*PROB_CEILING) {
+ if (bestprob_with_score > 2*PROB_CEILING) {
/* Probability is good with best alignment, so take that */
- debug3(printf("Best alignment has good probability\n"));
- use_prob_p = true;
+ debug3(printf("Best alignment based on score alone has good probability\n"));
+ use_prob_p = false; /* was previously true (bug) */
} else if (left_probabilities[bestcL_with_prob] < PROB_CEILING && right_probabilities[bestcR_with_prob] < PROB_CEILING) {
/* Probability-based solution is bad, so use alignment */
- debug3(printf("Probability-based solution is bad\n"));
+ debug3(printf("Probability-based solution is bad on both sites\n"));
use_prob_p = false;
- } else if (bestscore_with_prob < bestscore - 9) {
- debug3(printf("Probability-based solution requires very bad alignment, because bestscore_with_prob %d < bestscore %d - 9\n",
- bestscore_with_prob,bestscore));
+ } else if (bestscore_with_suffprob < 0 || bestscore_with_suffprob < bestscore - 9) {
+ debug3(printf("Probability-based solution requires very bad alignment, because bestscore_with_suffprob %d < bestscore %d - 9\n",
+ bestscore_with_suffprob,bestscore));
use_prob_p = false;
} else {
use_prob_p = true;
@@ -1446,13 +1446,13 @@ bridge_intron_gap_8_site_level (int *bestrL, int *bestrR, int *bestcL, int *best
if (use_prob_p == true) {
/* Best alignment yields bad probability, and probability-based alignment yields good probability, so switch */
debug3(printf("Switch to probability-based solution\n"));
- debug3(printf("SIMD 8. bestscore %d (bestprob %f) vs bestscore_with_prob %d (bestprob_trunc %f, actually %f and %f)\n",
- bestscore,bestprob,bestscore_with_prob,bestprob_trunc,left_probabilities[bestcL_with_prob],right_probabilities[bestcR_with_prob]));
+ debug3(printf("SIMD 8. bestscore %d (bestprob_with_score %f) vs bestscore_with_suffprob %d (bestprob_trunc %f, actually %f and %f)\n",
+ bestscore,bestprob_with_score,bestscore_with_suffprob,bestprob_trunc,left_probabilities[bestcL_with_prob],right_probabilities[bestcR_with_prob]));
*bestcL = bestcL_with_prob;
*bestcR = bestcR_with_prob;
*bestrL = bestrL_with_prob;
*bestrR = bestrR_with_prob;
- bestscore = bestscore_with_prob;
+ bestscore = bestscore_with_suffprob;
}
FREEA(rightdi);
@@ -1839,8 +1839,8 @@ bridge_intron_gap_16_site_level (int *bestrL, int *bestrR, int *bestcL, int *bes
int cloR, chighR;
int introntype;
int bestscore = NEG_INFINITY_16, score, scoreL, scoreR, scoreI;
- int bestscore_with_prob = NEG_INFINITY_16;
- double *left_probabilities, *right_probabilities, probL, probR, probL_trunc, probR_trunc, bestprob, bestprob_trunc;
+ int bestscore_with_suffprob = NEG_INFINITY_16;
+ double *left_probabilities, *right_probabilities, probL, probR, probL_trunc, probR_trunc, bestprob_with_score, bestprob_trunc;
Univcoord_T splicesitepos;
char left1, left2, right2, right1, left1_alt, left2_alt, right2_alt, right1_alt;
int *leftdi, *rightdi;
@@ -1995,7 +1995,7 @@ bridge_intron_gap_16_site_level (int *bestrL, int *bestrR, int *bestcL, int *bes
/* Search using probs and without simultaneously */
bestscore = NEG_INFINITY_16;
- bestprob = bestprob_trunc = 0.0;
+ bestprob_with_score = bestprob_trunc = 0.0;
for (rL = 1, rR = rlength-1; rL < rlength; rL++, rR--) {
debug3(printf("\nAt row %d on left and %d on right\n",rL,rR));
if ((cloL = rL - lbandL) < 1) {
@@ -2050,8 +2050,8 @@ bridge_intron_gap_16_site_level (int *bestrL, int *bestrR, int *bestcL, int *bes
*bestrR = rR;
*bestcL = cL;
*bestcR = cR;
- bestprob = probL + probR;
- } else if (score == bestscore && probL + probR > bestprob) {
+ bestprob_with_score = probL + probR;
+ } else if (score == bestscore && probL + probR > bestprob_with_score) {
debug3(printf("Improved prob: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
debug3(printf("probL %f, probR %f\n",left_probabilities[cL],right_probabilities[cR]));
@@ -2059,7 +2059,7 @@ bridge_intron_gap_16_site_level (int *bestrL, int *bestrR, int *bestcL, int *bes
*bestrR = rR;
*bestcL = cL;
*bestcR = cR;
- bestprob = probL + probR;
+ bestprob_with_score = probL + probR;
} else {
debug3a(printf("Not best score: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
@@ -2073,14 +2073,14 @@ bridge_intron_gap_16_site_level (int *bestrL, int *bestrR, int *bestcL, int *bes
debug3(printf("At %d left to %d right, prob is %f + %f = %f\n",
cL,cR,probL_trunc,probR_trunc,probL_trunc+probR_trunc));
- if (scoreL + scoreI + scoreR > bestscore_with_prob) {
+ if (scoreL + scoreI + scoreR > bestscore_with_suffprob) {
debug3(printf(" (bestscore %d)\n",scoreL+scoreR));
bestprob_trunc = probL_trunc + probR_trunc;
bestcL_with_prob = cL;
bestcR_with_prob = cR;
bestrL_with_prob = rL;
bestrR_with_prob = rR;
- bestscore_with_prob = scoreL + scoreI + scoreR;
+ bestscore_with_suffprob = scoreL + scoreI + scoreR;
}
} else {
@@ -2094,7 +2094,7 @@ bridge_intron_gap_16_site_level (int *bestrL, int *bestrR, int *bestcL, int *bes
bestcR_with_prob = cR;
bestrL_with_prob = rL;
bestrR_with_prob = rR;
- bestscore_with_prob = scoreL + scoreI + scoreR;
+ bestscore_with_suffprob = scoreL + scoreI + scoreR;
}
@@ -2146,8 +2146,8 @@ bridge_intron_gap_16_site_level (int *bestrL, int *bestrR, int *bestcL, int *bes
*bestrR = rR;
*bestcL = cL;
*bestcR = cR;
- bestprob = probL + probR;
- } else if (score == bestscore && probL + probR > bestprob) {
+ bestprob_with_score = probL + probR;
+ } else if (score == bestscore && probL + probR > bestprob_with_score) {
debug3(printf("Improved prob: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
debug3(printf("probL %f, probR %f\n",left_probabilities[cL],right_probabilities[cR]));
@@ -2155,7 +2155,7 @@ bridge_intron_gap_16_site_level (int *bestrL, int *bestrR, int *bestcL, int *bes
*bestrR = rR;
*bestcL = cL;
*bestcR = cR;
- bestprob = probL + probR;
+ bestprob_with_score = probL + probR;
} else {
debug3a(printf("Not best score: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
@@ -2169,14 +2169,14 @@ bridge_intron_gap_16_site_level (int *bestrL, int *bestrR, int *bestcL, int *bes
debug3(printf("At %d left to %d right, prob is %f + %f = %f\n",
cL,cR,probL_trunc,probR_trunc,probL_trunc+probR_trunc));
- if (scoreL + scoreI + scoreR > bestscore_with_prob) {
+ if (scoreL + scoreI + scoreR > bestscore_with_suffprob) {
debug3(printf(" (bestscore %d)\n",scoreL+scoreR));
bestprob_trunc = probL_trunc + probR_trunc;
bestcL_with_prob = cL;
bestcR_with_prob = cR;
bestrL_with_prob = rL;
bestrR_with_prob = rR;
- bestscore_with_prob = scoreL + scoreI + scoreR;
+ bestscore_with_suffprob = scoreL + scoreI + scoreR;
}
} else {
@@ -2190,7 +2190,7 @@ bridge_intron_gap_16_site_level (int *bestrL, int *bestrR, int *bestcL, int *bes
bestcR_with_prob = cR;
bestrL_with_prob = rL;
bestrR_with_prob = rR;
- bestscore_with_prob = scoreL + scoreI + scoreR;
+ bestscore_with_suffprob = scoreL + scoreI + scoreR;
}
}
@@ -2225,8 +2225,8 @@ bridge_intron_gap_16_site_level (int *bestrL, int *bestrR, int *bestcL, int *bes
*bestrR = rR;
*bestcL = cL;
*bestcR = cR;
- bestprob = probL + probR;
- } else if (score == bestscore && probL + probR > bestprob) {
+ bestprob_with_score = probL + probR;
+ } else if (score == bestscore && probL + probR > bestprob_with_score) {
debug3(printf("Improved prob: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
debug3(printf("probL %f, probR %f\n",left_probabilities[cL],right_probabilities[cR]));
@@ -2234,7 +2234,7 @@ bridge_intron_gap_16_site_level (int *bestrL, int *bestrR, int *bestcL, int *bes
*bestrR = rR;
*bestcL = cL;
*bestcR = cR;
- bestprob = probL + probR;
+ bestprob_with_score = probL + probR;
} else {
debug3a(printf("Not best score: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
@@ -2248,14 +2248,14 @@ bridge_intron_gap_16_site_level (int *bestrL, int *bestrR, int *bestcL, int *bes
debug3(printf("At %d left to %d right, prob is %f + %f = %f\n",
cL,cR,probL_trunc,probR_trunc,probL_trunc+probR_trunc));
- if (scoreL + scoreI + scoreR > bestscore_with_prob) {
+ if (scoreL + scoreI + scoreR > bestscore_with_suffprob) {
debug3(printf(" (bestscore %d)\n",scoreL+scoreR));
bestprob_trunc = probL_trunc + probR_trunc;
bestcL_with_prob = cL;
bestcR_with_prob = cR;
bestrL_with_prob = rL;
bestrR_with_prob = rR;
- bestscore_with_prob = scoreL + scoreI + scoreR;
+ bestscore_with_suffprob = scoreL + scoreI + scoreR;
}
} else {
@@ -2269,7 +2269,7 @@ bridge_intron_gap_16_site_level (int *bestrL, int *bestrR, int *bestcL, int *bes
bestcR_with_prob = cR;
bestrL_with_prob = rL;
bestrR_with_prob = rR;
- bestscore_with_prob = scoreL + scoreI + scoreR;
+ bestscore_with_suffprob = scoreL + scoreI + scoreR;
}
}
@@ -2322,8 +2322,8 @@ bridge_intron_gap_16_site_level (int *bestrL, int *bestrR, int *bestcL, int *bes
*bestrR = rR;
*bestcL = cL;
*bestcR = cR;
- bestprob = probL + probR;
- } else if (score == bestscore && probL + probR > bestprob) {
+ bestprob_with_score = probL + probR;
+ } else if (score == bestscore && probL + probR > bestprob_with_score) {
debug3(printf("Improved prob: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
debug3(printf("probL %f, probR %f\n",left_probabilities[cL],right_probabilities[cR]));
@@ -2331,7 +2331,7 @@ bridge_intron_gap_16_site_level (int *bestrL, int *bestrR, int *bestcL, int *bes
*bestrR = rR;
*bestcL = cL;
*bestcR = cR;
- bestprob = probL + probR;
+ bestprob_with_score = probL + probR;
} else {
debug3a(printf("Not best score: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
@@ -2345,14 +2345,14 @@ bridge_intron_gap_16_site_level (int *bestrL, int *bestrR, int *bestcL, int *bes
debug3(printf("At %d left to %d right, prob is %f + %f = %f\n",
cL,cR,probL_trunc,probR_trunc,probL_trunc+probR_trunc));
- if (scoreL + scoreI + scoreR > bestscore_with_prob) {
+ if (scoreL + scoreI + scoreR > bestscore_with_suffprob) {
debug3(printf(" (bestscore %d)\n",scoreL+scoreR));
bestprob_trunc = probL_trunc + probR_trunc;
bestcL_with_prob = cL;
bestcR_with_prob = cR;
bestrL_with_prob = rL;
bestrR_with_prob = rR;
- bestscore_with_prob = scoreL + scoreI + scoreR;
+ bestscore_with_suffprob = scoreL + scoreI + scoreR;
}
} else {
@@ -2366,7 +2366,7 @@ bridge_intron_gap_16_site_level (int *bestrL, int *bestrR, int *bestcL, int *bes
bestcR_with_prob = cR;
bestrL_with_prob = rL;
bestrR_with_prob = rR;
- bestscore_with_prob = scoreL + scoreI + scoreR;
+ bestscore_with_suffprob = scoreL + scoreI + scoreR;
}
}
@@ -2401,8 +2401,8 @@ bridge_intron_gap_16_site_level (int *bestrL, int *bestrR, int *bestcL, int *bes
*bestrR = rR;
*bestcL = cL;
*bestcR = cR;
- bestprob = probL + probR;
- } else if (score == bestscore && probL + probR > bestprob) {
+ bestprob_with_score = probL + probR;
+ } else if (score == bestscore && probL + probR > bestprob_with_score) {
debug3(printf("Improved prob: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
debug3(printf("probL %f, probR %f\n",left_probabilities[cL],right_probabilities[cR]));
@@ -2410,7 +2410,7 @@ bridge_intron_gap_16_site_level (int *bestrL, int *bestrR, int *bestcL, int *bes
*bestrR = rR;
*bestcL = cL;
*bestcR = cR;
- bestprob = probL + probR;
+ bestprob_with_score = probL + probR;
} else {
debug3a(printf("Not best score: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
@@ -2424,14 +2424,14 @@ bridge_intron_gap_16_site_level (int *bestrL, int *bestrR, int *bestcL, int *bes
debug3(printf("At %d left to %d right, prob is %f + %f = %f\n",
cL,cR,probL_trunc,probR_trunc,probL_trunc+probR_trunc));
- if (scoreL + scoreI + scoreR > bestscore_with_prob) {
+ if (scoreL + scoreI + scoreR > bestscore_with_suffprob) {
debug3(printf(" (bestscore %d)\n",scoreL+scoreR));
bestprob_trunc = probL_trunc + probR_trunc;
bestcL_with_prob = cL;
bestcR_with_prob = cR;
bestrL_with_prob = rL;
bestrR_with_prob = rR;
- bestscore_with_prob = scoreL + scoreI + scoreR;
+ bestscore_with_suffprob = scoreL + scoreI + scoreR;
}
} else {
@@ -2445,20 +2445,20 @@ bridge_intron_gap_16_site_level (int *bestrL, int *bestrR, int *bestcL, int *bes
bestcR_with_prob = cR;
bestrL_with_prob = rL;
bestrR_with_prob = rR;
- bestscore_with_prob = scoreL + scoreI + scoreR;
+ bestscore_with_suffprob = scoreL + scoreI + scoreR;
}
}
}
- if (bestprob > 2*PROB_CEILING) {
+ if (bestprob_with_score > 2*PROB_CEILING) {
/* Probability is good with best alignment, so take that */
- debug(printf("Best alignment has good probability\n"));
- use_prob_p = true;
+ debug(printf("Best alignment based on score alone has good probability\n"));
+ use_prob_p = false; /* was previously true (bug) */
} else if (left_probabilities[bestcL_with_prob] < PROB_CEILING && right_probabilities[bestcR_with_prob] < PROB_CEILING) {
/* Probability-based solution is bad, so use alignment */
- debug(printf("Probability-based solution is bad\n"));
+ debug(printf("Probability-based solution is bad on both sites\n"));
use_prob_p = false;
- } else if (bestscore_with_prob < bestscore - 9) {
+ } else if (bestscore_with_suffprob < 0 || bestscore_with_suffprob < bestscore - 9) {
debug(printf("Probability-based solution requires very bad alignment\n"));
use_prob_p = false;
} else {
@@ -2468,13 +2468,13 @@ bridge_intron_gap_16_site_level (int *bestrL, int *bestrR, int *bestcL, int *bes
if (use_prob_p == true) {
/* Best alignment yields bad probability, and probability-based alignment yields good probability, so switch */
debug(printf("Switch to probability-based solution\n"));
- debug(printf("SIMD 16. bestscore %d (bestprob %f) vs bestscore_with_prob %d (bestprob_trunc %f, actually %f and %f)\n",
- bestscore,bestprob,bestscore_with_prob,bestprob_trunc,left_probabilities[bestcL_with_prob],right_probabilities[bestcR_with_prob]));
+ debug(printf("SIMD 16. bestscore %d (bestprob_with_score %f) vs bestscore_with_suffprob %d (bestprob_trunc %f, actually %f and %f)\n",
+ bestscore,bestprob_with_score,bestscore_with_suffprob,bestprob_trunc,left_probabilities[bestcL_with_prob],right_probabilities[bestcR_with_prob]));
*bestcL = bestcL_with_prob;
*bestcR = bestcR_with_prob;
*bestrL = bestrL_with_prob;
*bestrR = bestrR_with_prob;
- bestscore = bestscore_with_prob;
+ bestscore = bestscore_with_suffprob;
}
FREEA(rightdi);
@@ -2712,8 +2712,8 @@ bridge_intron_gap_site_level (int *bestrL, int *bestrR, int *bestcL, int *bestcR
int cloR, chighR;
int introntype;
int bestscore = NEG_INFINITY_32, score, scoreL, scoreR, scoreI;
- int bestscore_with_prob = NEG_INFINITY_32;
- double *left_probabilities, *right_probabilities, probL, probR, probL_trunc, probR_trunc, bestprob, bestprob_trunc;
+ int bestscore_with_suffprob = NEG_INFINITY_32;
+ double *left_probabilities, *right_probabilities, probL, probR, probL_trunc, probR_trunc, bestprob_with_score, bestprob_trunc;
Univcoord_T splicesitepos;
char left1, left2, right2, right1, left1_alt, left2_alt, right2_alt, right1_alt;
int *leftdi, *rightdi;
@@ -2868,7 +2868,7 @@ bridge_intron_gap_site_level (int *bestrL, int *bestrR, int *bestcL, int *bestcR
/* Search using probs and without simultaneously */
bestscore = NEG_INFINITY_32;
- bestprob = bestprob_trunc = 0.0;
+ bestprob_with_score = bestprob_trunc = 0.0;
for (rL = 1, rR = rlength-1; rL < rlength; rL++, rR--) {
debug3(printf("\nAt row %d on left and %d on right\n",rL,rR));
if ((cloL = rL - lbandL) < 1) {
@@ -2957,8 +2957,8 @@ bridge_intron_gap_site_level (int *bestrL, int *bestrR, int *bestcL, int *bestcR
*bestrR = rR;
*bestcL = cL;
*bestcR = cR;
- bestprob = probL + probR;
- } else if (score == bestscore && probL + probR > bestprob) {
+ bestprob_with_score = probL + probR;
+ } else if (score == bestscore && probL + probR > bestprob_with_score) {
debug3(printf("Improved prob: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
debug3(printf("probL %f, probR %f\n",left_probabilities[cL],right_probabilities[cR]));
@@ -2966,7 +2966,7 @@ bridge_intron_gap_site_level (int *bestrL, int *bestrR, int *bestcL, int *bestcR
*bestrR = rR;
*bestcL = cL;
*bestcR = cR;
- bestprob = probL + probR;
+ bestprob_with_score = probL + probR;
} else {
debug3a(printf("Not best score: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
@@ -2980,14 +2980,14 @@ bridge_intron_gap_site_level (int *bestrL, int *bestrR, int *bestcL, int *bestcR
debug3(printf("At %d left to %d right, prob is %f + %f = %f\n",
cL,cR,probL_trunc,probR_trunc,probL_trunc+probR_trunc));
- if (scoreL + scoreI + scoreR > bestscore_with_prob) {
+ if (scoreL + scoreI + scoreR > bestscore_with_suffprob) {
debug3(printf(" (bestscore %d)\n",scoreL+scoreR));
bestprob_trunc = probL_trunc + probR_trunc;
bestcL_with_prob = cL;
bestcR_with_prob = cR;
bestrL_with_prob = rL;
bestrR_with_prob = rR;
- bestscore_with_prob = scoreL + scoreI + scoreR;
+ bestscore_with_suffprob = scoreL + scoreI + scoreR;
}
} else {
@@ -3001,7 +3001,7 @@ bridge_intron_gap_site_level (int *bestrL, int *bestrR, int *bestcL, int *bestcR
bestcR_with_prob = cR;
bestrL_with_prob = rL;
bestrR_with_prob = rR;
- bestscore_with_prob = scoreL + scoreI + scoreR;
+ bestscore_with_suffprob = scoreL + scoreI + scoreR;
}
}
@@ -3053,8 +3053,8 @@ bridge_intron_gap_site_level (int *bestrL, int *bestrR, int *bestcL, int *bestcR
*bestrR = rR;
*bestcL = cL;
*bestcR = cR;
- bestprob = probL + probR;
- } else if (score == bestscore && probL + probR > bestprob) {
+ bestprob_with_score = probL + probR;
+ } else if (score == bestscore && probL + probR > bestprob_with_score) {
debug3(printf("Improved prob: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
debug3(printf("probL %f, probR %f\n",left_probabilities[cL],right_probabilities[cR]));
@@ -3062,7 +3062,7 @@ bridge_intron_gap_site_level (int *bestrL, int *bestrR, int *bestcL, int *bestcR
*bestrR = rR;
*bestcL = cL;
*bestcR = cR;
- bestprob = probL + probR;
+ bestprob_with_score = probL + probR;
} else {
debug3a(printf("Not best score: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
@@ -3076,14 +3076,14 @@ bridge_intron_gap_site_level (int *bestrL, int *bestrR, int *bestcL, int *bestcR
debug3(printf("At %d left to %d right, prob is %f + %f = %f\n",
cL,cR,probL_trunc,probR_trunc,probL_trunc+probR_trunc));
- if (scoreL + scoreI + scoreR > bestscore_with_prob) {
+ if (scoreL + scoreI + scoreR > bestscore_with_suffprob) {
debug3(printf(" (bestscore %d)\n",scoreL+scoreR));
bestprob_trunc = probL_trunc + probR_trunc;
bestcL_with_prob = cL;
bestcR_with_prob = cR;
bestrL_with_prob = rL;
bestrR_with_prob = rR;
- bestscore_with_prob = scoreL + scoreI + scoreR;
+ bestscore_with_suffprob = scoreL + scoreI + scoreR;
}
} else {
@@ -3097,20 +3097,20 @@ bridge_intron_gap_site_level (int *bestrL, int *bestrR, int *bestcL, int *bestcR
bestcR_with_prob = cR;
bestrL_with_prob = rL;
bestrR_with_prob = rR;
- bestscore_with_prob = scoreL + scoreI + scoreR;
+ bestscore_with_suffprob = scoreL + scoreI + scoreR;
}
}
}
- if (bestprob > 2*PROB_CEILING) {
+ if (bestprob_with_score > 2*PROB_CEILING) {
/* Probability is good with best alignment, so take that */
- debug(printf("Best alignment has good probability\n"));
- use_prob_p = true;
+ debug(printf("Best alignment based on score alone has good probability\n"));
+ use_prob_p = false; /* was previously true (bug) */
} else if (left_probabilities[bestcL_with_prob] < PROB_CEILING && right_probabilities[bestcR_with_prob] < PROB_CEILING) {
/* Probability-based solution is bad, so use alignment */
- debug(printf("Probability-based solution is bad\n"));
+ debug(printf("Probability-based solution is bad on both sites\n"));
use_prob_p = false;
- } else if (bestscore_with_prob < bestscore - 9) {
+ } else if (bestscore_with_suffprob < 0 || bestscore_with_suffprob < bestscore - 9) {
debug(printf("Probability-based solution requires very bad alignment\n"));
use_prob_p = false;
} else {
@@ -3120,13 +3120,13 @@ bridge_intron_gap_site_level (int *bestrL, int *bestrR, int *bestcL, int *bestcR
if (use_prob_p == true) {
/* Best alignment yields bad probability, and probability-based alignment yields good probability, so switch */
debug(printf("Switch to probability-based solution\n"));
- debug(printf("Non-SIMD. bestscore %d (bestprob %f) vs bestscore_with_prob %d (bestprob_trunc %f, actually %f and %f)\n",
- bestscore,bestprob,bestscore_with_prob,bestprob_trunc,left_probabilities[bestcL_with_prob],right_probabilities[bestcR_with_prob]));
+ debug(printf("Non-SIMD. bestscore %d (bestprob_with_score %f) vs bestscore_with_suffprob %d (bestprob_trunc %f, actually %f and %f)\n",
+ bestscore,bestprob_with_score,bestscore_with_suffprob,bestprob_trunc,left_probabilities[bestcL_with_prob],right_probabilities[bestcR_with_prob]));
*bestcL = bestcL_with_prob;
*bestcR = bestcR_with_prob;
*bestrL = bestrL_with_prob;
*bestrR = bestrR_with_prob;
- bestscore = bestscore_with_prob;
+ bestscore = bestscore_with_suffprob;
}
@@ -3865,6 +3865,7 @@ Dynprog_genome_gap (int *dynprogindex, int *finalscore, int *new_leftgenomepos,
*dynprogindex += (*dynprogindex > 0 ? +1 : -1);
debug3(Pair_dump_list(pairs,true));
debug3(printf("maxnegscore = %d\n",Pair_maxnegscore(pairs)));
+
if (Pair_maxnegscore(pairs) < -10) {
return (List_T) NULL;
} else {
diff --git a/src/gmap.c b/src/gmap.c
index 30f9c9a..5a0fe5f 100644
--- a/src/gmap.c
+++ b/src/gmap.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: gmap.c 196403 2016-08-16 14:33:56Z twu $";
+static char rcsid[] = "$Id: gmap.c 197391 2016-09-03 00:43:23Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -6170,13 +6170,13 @@ main (int argc, char *argv[]) {
&alphabet,&alphabet_size,required_alphabet,
required_index1part,required_index1interval,
expand_offsets_p,offsetsstrm_access,positions_access,sharedp,
- multiple_sequences_p);
+ multiple_sequences_p,/*unload_shared_memory_p*/false);
indexdb_rev = Indexdb_new_genome(&index1part_aa,&index1interval,
genomesubdir,fileroot,REV_FILESUFFIX,/*snps_root*/NULL,
&alphabet,&alphabet_size,required_alphabet,
required_index1part,required_index1interval,
expand_offsets_p,offsetsstrm_access,positions_access,sharedp,
- multiple_sequences_p);
+ multiple_sequences_p,/*unload_shared_memory_p*/false);
if (indexdb_fwd == NULL || indexdb_rev == NULL) {
fprintf(stderr,"Cannot find offsets file %s.%s*offsets or %s.%s*offsets.\n",
@@ -6235,7 +6235,7 @@ main (int argc, char *argv[]) {
modedir,fileroot,/*idx_filesuffix*/"metct",/*snps_root*/NULL,
required_index1part,required_index1interval,
expand_offsets_p,offsetsstrm_access,positions_access,
- sharedp,multiple_sequences_p)) == NULL) {
+ sharedp,multiple_sequences_p,/*unload_shared_memory_p*/false)) == NULL) {
fprintf(stderr,"Cannot find metct index file. Need to run cmetindex first\n");
exit(9);
}
@@ -6244,7 +6244,7 @@ main (int argc, char *argv[]) {
modedir,fileroot,/*idx_filesuffix*/"metga",/*snps_root*/NULL,
required_index1part,required_index1interval,
expand_offsets_p,offsetsstrm_access,positions_access,
- sharedp,multiple_sequences_p)) == NULL) {
+ sharedp,multiple_sequences_p,/*unload_shared_memory_p*/false)) == NULL) {
fprintf(stderr,"Cannot find metga index file. Need to run cmetindex first\n");
exit(9);
}
@@ -6260,7 +6260,7 @@ main (int argc, char *argv[]) {
modedir,fileroot,/*idx_filesuffix*/"a2iag",/*snps_root*/NULL,
required_index1part,required_index1interval,
expand_offsets_p,offsetsstrm_access,positions_access,
- sharedp,multiple_sequences_p)) == NULL) {
+ sharedp,multiple_sequences_p,/*unload_shared_memory_p*/false)) == NULL) {
fprintf(stderr,"Cannot find a2iag index file. Need to run atoiindex first\n");
exit(9);
}
@@ -6269,7 +6269,7 @@ main (int argc, char *argv[]) {
modedir,fileroot,/*idx_filesuffix*/"a2itc",/*snps_root*/NULL,
required_index1part,required_index1interval,
expand_offsets_p,offsetsstrm_access,positions_access,
- sharedp,multiple_sequences_p)) == NULL) {
+ sharedp,multiple_sequences_p,/*unload_shared_memory_p*/false)) == NULL) {
fprintf(stderr,"Cannot find a2itc index file. Need to run atoiindex first\n");
exit(9);
}
@@ -6285,7 +6285,7 @@ main (int argc, char *argv[]) {
modedir,fileroot,/*idx_filesuffix*/"a2itc",/*snps_root*/NULL,
required_index1part,required_index1interval,
expand_offsets_p,offsetsstrm_access,positions_access,
- sharedp,multiple_sequences_p)) == NULL) {
+ sharedp,multiple_sequences_p,/*unload_shared_memory_p*/false)) == NULL) {
fprintf(stderr,"Cannot find a2itc index file. Need to run atoiindex first\n");
exit(9);
}
@@ -6294,7 +6294,7 @@ main (int argc, char *argv[]) {
modedir,fileroot,/*idx_filesuffix*/"a2iag",/*snps_root*/NULL,
required_index1part,required_index1interval,
expand_offsets_p,offsetsstrm_access,positions_access,
- sharedp,multiple_sequences_p)) == NULL) {
+ sharedp,multiple_sequences_p,/*unload_shared_memory_p*/false)) == NULL) {
fprintf(stderr,"Cannot find a2iag index file. Need to run atoiindex first\n");
exit(9);
}
@@ -6305,7 +6305,7 @@ main (int argc, char *argv[]) {
genomesubdir,fileroot,IDX_FILESUFFIX,/*snps_root*/NULL,
required_index1part,required_index1interval,
expand_offsets_p,offsetsstrm_access,positions_access,
- sharedp,multiple_sequences_p)) == NULL) {
+ sharedp,multiple_sequences_p,/*unload_shared_memory_p*/false)) == NULL) {
fprintf(stderr,"Cannot find offsets file %s.%s*offsets, needed for GSNAP\n",fileroot,IDX_FILESUFFIX);
exit(9);
}
@@ -6370,7 +6370,7 @@ main (int argc, char *argv[]) {
modedir,fileroot,/*idx_filesuffix*/"metct",snps_root,
required_index1part,required_index1interval,
expand_offsets_p,offsetsstrm_access,positions_access,
- sharedp,multiple_sequences_p)) == NULL) {
+ sharedp,multiple_sequences_p,/*unload_shared_memory_p*/false)) == NULL) {
fprintf(stderr,"Cannot find metct index file. Need to run cmetindex first\n");
exit(9);
}
@@ -6378,7 +6378,7 @@ main (int argc, char *argv[]) {
modedir,fileroot,/*idx_filesuffix*/"metga",snps_root,
required_index1part,required_index1interval,
expand_offsets_p,offsetsstrm_access,positions_access,
- sharedp,multiple_sequences_p)) == NULL) {
+ sharedp,multiple_sequences_p,/*unload_shared_memory_p*/false)) == NULL) {
fprintf(stderr,"Cannot find metga index file. Need to run cmetindex first\n");
exit(9);
}
@@ -6394,7 +6394,7 @@ main (int argc, char *argv[]) {
modedir,fileroot,/*idx_filesuffix*/"a2iag",snps_root,
required_index1part,required_index1interval,
expand_offsets_p,offsetsstrm_access,positions_access,
- sharedp,multiple_sequences_p)) == NULL) {
+ sharedp,multiple_sequences_p,/*unload_shared_memory_p*/false)) == NULL) {
fprintf(stderr,"Cannot find a2iag index file. Need to run atoiindex first\n");
exit(9);
}
@@ -6402,7 +6402,7 @@ main (int argc, char *argv[]) {
modedir,fileroot,/*idx_filesuffix*/"a2itc",snps_root,
required_index1part,required_index1interval,
expand_offsets_p,offsetsstrm_access,positions_access,
- sharedp,multiple_sequences_p)) == NULL) {
+ sharedp,multiple_sequences_p,/*unload_shared_memory_p*/false)) == NULL) {
fprintf(stderr,"Cannot find a2itc index file. Need to run atoiindex first\n");
exit(9);
}
@@ -6418,7 +6418,7 @@ main (int argc, char *argv[]) {
modedir,fileroot,/*idx_filesuffix*/"a2itc",snps_root,
required_index1part,required_index1interval,
expand_offsets_p,offsetsstrm_access,positions_access,
- sharedp,multiple_sequences_p)) == NULL) {
+ sharedp,multiple_sequences_p,/*unload_shared_memory_p*/false)) == NULL) {
fprintf(stderr,"Cannot find a2itc index file. Need to run atoiindex first\n");
exit(9);
}
@@ -6426,7 +6426,7 @@ main (int argc, char *argv[]) {
modedir,fileroot,/*idx_filesuffix*/"a2iag",snps_root,
required_index1part,required_index1interval,
expand_offsets_p,offsetsstrm_access,positions_access,
- sharedp,multiple_sequences_p)) == NULL) {
+ sharedp,multiple_sequences_p,/*unload_shared_memory_p*/false)) == NULL) {
fprintf(stderr,"Cannot find a2iag index file. Need to run atoiindex first\n");
exit(9);
}
@@ -6436,7 +6436,7 @@ main (int argc, char *argv[]) {
snpsdir,fileroot,/*idx_filesuffix*/"ref",snps_root,
required_index1part,required_index1interval,
expand_offsets_p,offsetsstrm_access,positions_access,
- sharedp,multiple_sequences_p);
+ sharedp,multiple_sequences_p,/*unload_shared_memory_p*/false);
if (indexdb_fwd == NULL) {
fprintf(stderr,"Cannot find snps index file for %s in directory %s\n",snps_root,snpsdir);
exit(9);
diff --git a/src/gsnap.c b/src/gsnap.c
index a6fa7aa..963f3f7 100644
--- a/src/gsnap.c
+++ b/src/gsnap.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: gsnap.c 196438 2016-08-16 20:23:27Z twu $";
+static char rcsid[] = "$Id: gsnap.c 197391 2016-09-03 00:43:23Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -2771,7 +2771,7 @@ worker_setup (char *genomesubdir, char *fileroot) {
genomesubdir,fileroot,/*idx_filesuffix*/"dibase",/*snps_root*/NULL,
required_index1part,required_index1interval,
expand_offsets_p,offsetsstrm_access,positions_access,sharedp,
- multiple_sequences_p)) == NULL) {
+ multiple_sequences_p,unload_shared_memory_p)) == NULL) {
fprintf(stderr,"Cannot find offsets file %s.%s*offsets, needed for GSNAP color mode\n",fileroot,"dibase");
exit(9);
}
@@ -2789,7 +2789,7 @@ worker_setup (char *genomesubdir, char *fileroot) {
modedir,fileroot,/*idx_filesuffix*/"metct",/*snps_root*/NULL,
required_index1part,required_index1interval,
expand_offsets_p,offsetsstrm_access,positions_access,sharedp,
- multiple_sequences_p)) == NULL) {
+ multiple_sequences_p,unload_shared_memory_p)) == NULL) {
fprintf(stderr,"Cannot find metct index file. Need to run cmetindex first\n");
exit(9);
}
@@ -2798,7 +2798,7 @@ worker_setup (char *genomesubdir, char *fileroot) {
modedir,fileroot,/*idx_filesuffix*/"metga",/*snps_root*/NULL,
required_index1part,required_index1interval,
expand_offsets_p,offsetsstrm_access,positions_access,sharedp,
- multiple_sequences_p)) == NULL) {
+ multiple_sequences_p,unload_shared_memory_p)) == NULL) {
fprintf(stderr,"Cannot find metga index file. Need to run cmetindex first\n");
exit(9);
}
@@ -2814,7 +2814,7 @@ worker_setup (char *genomesubdir, char *fileroot) {
modedir,fileroot,/*idx_filesuffix*/"a2iag",/*snps_root*/NULL,
required_index1part,required_index1interval,
expand_offsets_p,offsetsstrm_access,positions_access,sharedp,
- multiple_sequences_p)) == NULL) {
+ multiple_sequences_p,unload_shared_memory_p)) == NULL) {
fprintf(stderr,"Cannot find a2iag index file. Need to run atoiindex first\n");
exit(9);
}
@@ -2823,7 +2823,7 @@ worker_setup (char *genomesubdir, char *fileroot) {
modedir,fileroot,/*idx_filesuffix*/"a2itc",/*snps_root*/NULL,
required_index1part,required_index1interval,
expand_offsets_p,offsetsstrm_access,positions_access,sharedp,
- multiple_sequences_p)) == NULL) {
+ multiple_sequences_p,unload_shared_memory_p)) == NULL) {
fprintf(stderr,"Cannot find a2itc index file. Need to run atoiindex first\n");
exit(9);
}
@@ -2839,7 +2839,7 @@ worker_setup (char *genomesubdir, char *fileroot) {
modedir,fileroot,/*idx_filesuffix*/"a2itc",/*snps_root*/NULL,
required_index1part,required_index1interval,
expand_offsets_p,offsetsstrm_access,positions_access,sharedp,
- multiple_sequences_p)) == NULL) {
+ multiple_sequences_p,unload_shared_memory_p)) == NULL) {
fprintf(stderr,"Cannot find a2itc index file. Need to run atoiindex first\n");
exit(9);
}
@@ -2848,7 +2848,7 @@ worker_setup (char *genomesubdir, char *fileroot) {
modedir,fileroot,/*idx_filesuffix*/"a2iag",/*snps_root*/NULL,
required_index1part,required_index1interval,
expand_offsets_p,offsetsstrm_access,positions_access,sharedp,
- multiple_sequences_p)) == NULL) {
+ multiple_sequences_p,unload_shared_memory_p)) == NULL) {
fprintf(stderr,"Cannot find a2iag index file. Need to run atoiindex first\n");
exit(9);
}
@@ -2860,7 +2860,7 @@ worker_setup (char *genomesubdir, char *fileroot) {
genomesubdir,fileroot,IDX_FILESUFFIX,/*snps_root*/NULL,
required_index1part,required_index1interval,
expand_offsets_p,offsetsstrm_access,positions_access,sharedp,
- multiple_sequences_p)) == NULL) {
+ multiple_sequences_p,unload_shared_memory_p)) == NULL) {
fprintf(stderr,"Cannot find offsets file %s.%s*offsets, needed for GSNAP\n",fileroot,IDX_FILESUFFIX);
exit(9);
}
@@ -2923,7 +2923,7 @@ worker_setup (char *genomesubdir, char *fileroot) {
modedir,fileroot,/*idx_filesuffix*/"metct",snps_root,
required_index1part,required_index1interval,
expand_offsets_p,offsetsstrm_access,positions_access,sharedp,
- multiple_sequences_p)) == NULL) {
+ multiple_sequences_p,unload_shared_memory_p)) == NULL) {
fprintf(stderr,"Cannot find metct index file. Need to run cmetindex first\n");
exit(9);
}
@@ -2931,7 +2931,7 @@ worker_setup (char *genomesubdir, char *fileroot) {
modedir,fileroot,/*idx_filesuffix*/"metga",snps_root,
required_index1part,required_index1interval,
expand_offsets_p,offsetsstrm_access,positions_access,sharedp,
- multiple_sequences_p)) == NULL) {
+ multiple_sequences_p,unload_shared_memory_p)) == NULL) {
fprintf(stderr,"Cannot find metga index file. Need to run cmetindex first\n");
exit(9);
}
@@ -2947,7 +2947,7 @@ worker_setup (char *genomesubdir, char *fileroot) {
modedir,fileroot,/*idx_filesuffix*/"a2iag",snps_root,
required_index1part,required_index1interval,
expand_offsets_p,offsetsstrm_access,positions_access,sharedp,
- multiple_sequences_p)) == NULL) {
+ multiple_sequences_p,unload_shared_memory_p)) == NULL) {
fprintf(stderr,"Cannot find a2iag index file. Need to run atoiindex first\n");
exit(9);
}
@@ -2955,7 +2955,7 @@ worker_setup (char *genomesubdir, char *fileroot) {
modedir,fileroot,/*idx_filesuffix*/"a2itc",snps_root,
required_index1part,required_index1interval,
expand_offsets_p,offsetsstrm_access,positions_access,sharedp,
- multiple_sequences_p)) == NULL) {
+ multiple_sequences_p,unload_shared_memory_p)) == NULL) {
fprintf(stderr,"Cannot find a2itc index file. Need to run atoiindex first\n");
exit(9);
}
@@ -2971,7 +2971,7 @@ worker_setup (char *genomesubdir, char *fileroot) {
modedir,fileroot,/*idx_filesuffix*/"a2itc",snps_root,
required_index1part,required_index1interval,
expand_offsets_p,offsetsstrm_access,positions_access,sharedp,
- multiple_sequences_p)) == NULL) {
+ multiple_sequences_p,unload_shared_memory_p)) == NULL) {
fprintf(stderr,"Cannot find a2itc index file. Need to run atoiindex first\n");
exit(9);
}
@@ -2979,7 +2979,7 @@ worker_setup (char *genomesubdir, char *fileroot) {
modedir,fileroot,/*idx_filesuffix*/"a2iag",snps_root,
required_index1part,required_index1interval,
expand_offsets_p,offsetsstrm_access,positions_access,sharedp,
- multiple_sequences_p)) == NULL) {
+ multiple_sequences_p,unload_shared_memory_p)) == NULL) {
fprintf(stderr,"Cannot find a2iag index file. Need to run atoiindex first\n");
exit(9);
}
@@ -2989,7 +2989,7 @@ worker_setup (char *genomesubdir, char *fileroot) {
snpsdir,fileroot,/*idx_filesuffix*/"ref",snps_root,
required_index1part,required_index1interval,
expand_offsets_p,offsetsstrm_access,positions_access,sharedp,
- multiple_sequences_p);
+ multiple_sequences_p,unload_shared_memory_p);
if (indexdb == NULL) {
fprintf(stderr,"Cannot find snps index file for %s in directory %s\n",snps_root,snpsdir);
exit(9);
diff --git a/src/indel.c b/src/indel.c
index 8cfbd1a..92ea133 100644
--- a/src/indel.c
+++ b/src/indel.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: indel.c 196431 2016-08-16 20:19:22Z twu $";
+static char rcsid[] = "$Id: indel.c 197550 2016-09-08 01:15:16Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -76,7 +76,8 @@ Indel_resolve_middle_insertion (int *best_nmismatches_i, int *best_nmismatches_j
debug2(printf("g2: %s\n",&(gbuffer[indels])));
/* No need to check chromosome bounds */
- debug2(printf("max_mismatches_allowed is %d\n",max_mismatches_allowed));
+ debug2(printf("max_mismatches_allowed is %d. Calling Genome_mismatches_left over %d..%d\n",
+ max_mismatches_allowed,querystart,queryend));
nmismatches_left = Genome_mismatches_left(mismatch_positions_left,max_mismatches_allowed,
query_compress,left,/*pos5*/querystart,/*pos3*/queryend,
plusp,genestrand);
@@ -91,7 +92,8 @@ Indel_resolve_middle_insertion (int *best_nmismatches_i, int *best_nmismatches_j
/* No need to check chromosome bounds */
- debug2(printf("max_mismatches_allowed is %d\n",max_mismatches_allowed));
+ debug2(printf("max_mismatches_allowed is %d. Calling Genome_mismatches_right over %d..%d\n",
+ max_mismatches_allowed,querystart,queryend));
nmismatches_right = Genome_mismatches_right(mismatch_positions_right,max_mismatches_allowed,
query_compress,left-indels,/*pos5*/querystart,/*pos3*/queryend,
plusp,genestrand);
diff --git a/src/indexdb.c b/src/indexdb.c
index 6bd5d7e..699b6e5 100644
--- a/src/indexdb.c
+++ b/src/indexdb.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: indexdb.c 191134 2016-06-03 17:27:37Z twu $";
+static char rcsid[] = "$Id: indexdb.c 197240 2016-09-01 17:07:26Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -1167,7 +1167,7 @@ Indexdb_new_genome (Width_T *index1part, Width_T *index1interval,
#endif
Width_T required_index1part, Width_T required_interval, bool expand_offsets_p,
Access_mode_T offsetsstrm_access, Access_mode_T positions_access, bool sharedp,
- bool multiple_sequences_p) {
+ bool multiple_sequences_p, bool unload_shared_memory_p) {
T new = (T) MALLOC(sizeof(*new));
Filenames_T filenames;
Oligospace_T basespace, base;
@@ -1375,7 +1375,7 @@ Indexdb_new_genome (Width_T *index1part, Width_T *index1interval,
idx_filesuffix,new->index1part,new->index1interval);
}
#ifdef HAVE_MMAP
- if (multiple_sequences_p == false) {
+ if (multiple_sequences_p == false && unload_shared_memory_p == false) {
new->offsetsmeta = (UINT4 *) Access_mmap(&new->offsetsmeta_fd,&new->offsetsmeta_len,
filenames->pointers_filename,/*randomp*/false);
new->offsetsmeta_access = MMAPPED;
@@ -1479,7 +1479,7 @@ Indexdb_new_genome (Width_T *index1part, Width_T *index1interval,
#ifdef LARGE_GENOMES
if (filenames->pages_filename != NULL) {
#ifdef HAVE_MMAP
- if (multiple_sequences_p == false) {
+ if (multiple_sequences_p == false && unload_shared_memory_p == false) {
new->offsetspages = (UINT4 *) Access_mmap(&new->offsetspages_fd,&new->offsetspages_len,
filenames->pages_filename,/*randomp*/false);
new->offsetspages_access = MMAPPED;
@@ -1553,7 +1553,7 @@ Indexdb_new_genome (Width_T *index1part, Width_T *index1interval,
}
#ifdef LARGE_GENOMES
#ifdef HAVE_MMAP
- if (multiple_sequences_p == false) {
+ if (multiple_sequences_p == false && unload_shared_memory_p == false) {
new->positions_high = (unsigned char *) Access_mmap(&new->positions_high_fd,&new->positions_high_len,
filenames->positions_high_filename,/*randomp*/false);
} else
@@ -1580,7 +1580,7 @@ Indexdb_new_genome (Width_T *index1part, Width_T *index1interval,
fprintf(stderr,")\n");
#ifdef HAVE_MMAP
- if (multiple_sequences_p == false) {
+ if (multiple_sequences_p == false && unload_shared_memory_p == false) {
new->positions_low = (UINT4 *) Access_mmap(&new->positions_low_fd,&new->positions_low_len,
filenames->positions_low_filename,/*randomp*/false);
new->positions_low_access = MMAPPED;
@@ -1611,7 +1611,7 @@ Indexdb_new_genome (Width_T *index1part, Width_T *index1interval,
#else
#ifdef HAVE_MMAP
- if (multiple_sequences_p == false) {
+ if (multiple_sequences_p == false && unload_shared_memory_p == false) {
new->positions = (UINT4 *) Access_mmap(&new->positions_fd,&new->positions_len,
filenames->positions_low_filename,/*randomp*/false);
new->positions_access = MMAPPED;
diff --git a/src/indexdb.h b/src/indexdb.h
index 0335c57..a4ea875 100644
--- a/src/indexdb.h
+++ b/src/indexdb.h
@@ -1,4 +1,4 @@
-/* $Id: indexdb.h 183995 2016-02-09 18:55:29Z twu $ */
+/* $Id: indexdb.h 197240 2016-09-01 17:07:26Z twu $ */
#ifndef INDEXDB_INCLUDED
#define INDEXDB_INCLUDED
#ifdef HAVE_CONFIG_H
@@ -152,7 +152,7 @@ Indexdb_new_genome (Width_T *index1part, Width_T *index1interval,
#endif
Width_T required_index1part, Width_T required_interval, bool expand_offsets_p,
Access_mode_T offsetsstrm_access, Access_mode_T positions_access, bool sharedp,
- bool multiple_sequences_p);
+ bool multiple_sequences_p, bool unload_shared_memory_p);
#ifndef UTILITYP
extern T
Indexdb_new_segment (char *genomicseg,
diff --git a/src/sarray-read.c b/src/sarray-read.c
index 3a02f07..c69089f 100644
--- a/src/sarray-read.c
+++ b/src/sarray-read.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: sarray-read.c 196431 2016-08-16 20:19:22Z twu $";
+static char rcsid[] = "$Id: sarray-read.c 197551 2016-09-08 01:16:14Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -7358,7 +7358,7 @@ solve_via_segments (int *found_score, bool *completep, List_T hits, List_T middl
List_T p;
Univdiag_T diagonal, prev_diagonal, new_diagonal;
Chrpos_T splice_distance;
- int querystart_for_merge, querystart, queryend, ignore;
+ int querystart_for_merge, querystart, queryend;
int max_leftward;
int nmismatches, prev_nmismatches;
bool fillin_p;
@@ -7510,10 +7510,15 @@ solve_via_segments (int *found_score, bool *completep, List_T hits, List_T middl
antisense_endpoints = Intlist_push(NULL,prev_diagonal->querystart);
/* Previously pushed prev_diagonal->intscore, but that is not
- correct. Pushing -1 indicates that we need to compute the
- value */
- sense_nmismatches = Intlist_push(NULL,-1);
- antisense_nmismatches = Intlist_push(NULL,-1);
+ correct (unless there are no mismatches). Pushing -1 indicates
+ that we need to compute the value */
+ if (prev_diagonal->intscore == 0) {
+ sense_nmismatches = Intlist_push(NULL,0);
+ antisense_nmismatches = Intlist_push(NULL,0);
+ } else {
+ sense_nmismatches = Intlist_push(NULL,-1);
+ antisense_nmismatches = Intlist_push(NULL,-1);
+ }
for (p = List_next(p); p != NULL; p = List_next(p)) {
diagonal = (Univdiag_T) List_head(p);
@@ -7545,28 +7550,39 @@ solve_via_segments (int *found_score, bool *completep, List_T hits, List_T middl
} else {
sense_junctions = List_push(sense_junctions,Junction_new_insertion(nindels));
antisense_junctions = List_push(antisense_junctions,Junction_new_insertion(nindels));
+ diagonal->querystart += nindels; /* Needed for subsequent indel computation */
}
if ((prev_nmismatches = Intlist_head(sense_nmismatches)) < 0) {
/* Still need to compute */
+ sense_nmismatches = Intlist_push(sense_nmismatches,best_nmismatches_j);
+ } else if (Intlist_head(sense_endpoints) != prev_diagonal->querystart) {
+ /* Endpoints not quite the same, so need to recompute */
+ Intlist_head_set(sense_nmismatches,-1);
+ sense_nmismatches = Intlist_push(sense_nmismatches,-1);
} else {
- Intlist_head_set(sense_nmismatches,best_nmismatches_i + prev_nmismatches);
+ Intlist_head_set(sense_nmismatches,best_nmismatches_i /*+ prev_nmismatches*/);
+ sense_nmismatches = Intlist_push(sense_nmismatches,best_nmismatches_j);
}
- sense_nmismatches = Intlist_push(sense_nmismatches,best_nmismatches_j);
if ((prev_nmismatches = Intlist_head(antisense_nmismatches)) < 0) {
/* Still need to compute */
+ antisense_nmismatches = Intlist_push(antisense_nmismatches,best_nmismatches_j);
+ } else if (Intlist_head(antisense_endpoints) != prev_diagonal->querystart) {
+ /* Endpoints not quite the same, so need to recompute */
+ Intlist_head_set(antisense_nmismatches,-1);
+ antisense_nmismatches = Intlist_push(antisense_nmismatches,-1);
} else {
- Intlist_head_set(antisense_nmismatches,best_nmismatches_i + prev_nmismatches);
+ Intlist_head_set(antisense_nmismatches,best_nmismatches_i /*+ prev_nmismatches*/);
+ antisense_nmismatches = Intlist_push(antisense_nmismatches,best_nmismatches_j);
}
- antisense_nmismatches = Intlist_push(antisense_nmismatches,best_nmismatches_j);
sense_lefts = Uintlist_push(sense_lefts,prev_left);
antisense_lefts = Uintlist_push(antisense_lefts,prev_left);
sense_endpoints = Intlist_push(sense_endpoints,indel_pos);
antisense_endpoints = Intlist_push(antisense_endpoints,indel_pos);
- debug13(printf("insertion pos in range %d..%d is %d with nmatches %d+%d\n",
+ debug13(printf("insertion pos in range %d..%d is %d with nmismatches %d+%d\n",
prev_diagonal->querystart,diagonal->queryend,indel_pos,best_nmismatches_i,best_nmismatches_j));
} else if (left <= prev_left + max_deletionlen) {
@@ -7596,17 +7612,27 @@ solve_via_segments (int *found_score, bool *completep, List_T hits, List_T middl
if ((prev_nmismatches = Intlist_head(sense_nmismatches)) < 0) {
/* Still need to compute */
+ sense_nmismatches = Intlist_push(sense_nmismatches,best_nmismatches_j);
+ } else if (Intlist_head(sense_endpoints) != prev_diagonal->querystart) {
+ /* Endpoints not quite the same, so need to recompute */
+ Intlist_head_set(sense_nmismatches,-1);
+ sense_nmismatches = Intlist_push(sense_nmismatches,-1);
} else {
- Intlist_head_set(sense_nmismatches,best_nmismatches_i + prev_nmismatches);
+ Intlist_head_set(sense_nmismatches,best_nmismatches_i /*+ prev_nmismatches*/);
+ sense_nmismatches = Intlist_push(sense_nmismatches,best_nmismatches_j);
}
- sense_nmismatches = Intlist_push(sense_nmismatches,best_nmismatches_j);
if ((prev_nmismatches = Intlist_head(antisense_nmismatches)) < 0) {
/* Still need to compute */
+ antisense_nmismatches = Intlist_push(antisense_nmismatches,best_nmismatches_j);
+ } else if (Intlist_head(antisense_endpoints) != prev_diagonal->querystart) {
+ /* Endpoints not quite the same, so need to recompute */
+ Intlist_head_set(antisense_nmismatches,-1);
+ antisense_nmismatches = Intlist_push(antisense_nmismatches,-1);
} else {
- Intlist_head_set(antisense_nmismatches,best_nmismatches_i + prev_nmismatches);
+ Intlist_head_set(antisense_nmismatches,best_nmismatches_i /*+ prev_nmismatches*/);
+ antisense_nmismatches = Intlist_push(antisense_nmismatches,best_nmismatches_j);
}
- antisense_nmismatches = Intlist_push(antisense_nmismatches,best_nmismatches_j);
sense_lefts = Uintlist_push(sense_lefts,prev_left);
antisense_lefts = Uintlist_push(antisense_lefts,prev_left);
@@ -7696,10 +7722,15 @@ solve_via_segments (int *found_score, bool *completep, List_T hits, List_T middl
prev_diagonal->querystart,diagonal->queryend,splice_pos,best_nmismatches_i,best_nmismatches_j));
if ((prev_nmismatches = Intlist_head(sense_nmismatches)) < 0) {
/* Still need to compute */
+ sense_nmismatches = Intlist_push(sense_nmismatches,best_nmismatches_j);
+ } else if (Intlist_head(sense_endpoints) != prev_diagonal->querystart) {
+ /* Endpoints not quite the same, so need to recompute */
+ Intlist_head_set(sense_nmismatches,-1);
+ sense_nmismatches = Intlist_push(sense_nmismatches,-1);
} else {
- Intlist_head_set(sense_nmismatches,best_nmismatches_i + prev_nmismatches);
+ Intlist_head_set(sense_nmismatches,best_nmismatches_i /*+ prev_nmismatches*/);
+ sense_nmismatches = Intlist_push(sense_nmismatches,best_nmismatches_j);
}
- sense_nmismatches = Intlist_push(sense_nmismatches,best_nmismatches_j);
sense_lefts = Uintlist_push(sense_lefts,prev_left);
if ((splice_pos = Splice_resolve_antisense(&best_knowni_i,&best_knowni_j,&best_nmismatches_i,&best_nmismatches_j,
@@ -7728,10 +7759,15 @@ solve_via_segments (int *found_score, bool *completep, List_T hits, List_T middl
prev_diagonal->querystart,diagonal->queryend,splice_pos,best_nmismatches_i,best_nmismatches_j));
if ((prev_nmismatches = Intlist_head(antisense_nmismatches)) < 0) {
/* Still need to compute */
+ antisense_nmismatches = Intlist_push(antisense_nmismatches,best_nmismatches_j);
+ } else if (Intlist_head(antisense_endpoints) != prev_diagonal->querystart) {
+ /* Endpoints not quite the same, so need to recompute */
+ Intlist_head_set(antisense_nmismatches,-1);
+ antisense_nmismatches = Intlist_push(antisense_nmismatches,-1);
} else {
- Intlist_head_set(antisense_nmismatches,best_nmismatches_i + prev_nmismatches);
+ Intlist_head_set(antisense_nmismatches,best_nmismatches_i /*+ prev_nmismatches*/);
+ antisense_nmismatches = Intlist_push(antisense_nmismatches,best_nmismatches_j);
}
- antisense_nmismatches = Intlist_push(antisense_nmismatches,best_nmismatches_j);
antisense_lefts = Uintlist_push(antisense_lefts,prev_left);
}
@@ -7747,7 +7783,7 @@ solve_via_segments (int *found_score, bool *completep, List_T hits, List_T middl
antisense_endpoints = Intlist_push(antisense_endpoints,prev_diagonal->queryend + 1);
- debug13(printf("After step 2\n"));
+ debug13(printf("After step 2 (indels and splices)\n"));
debug13(printf("sense (wrt plus): %s\n",Intlist_to_string(sense_endpoints)));
debug13(printf("antisense (wrt plus): %s\n",Intlist_to_string(antisense_endpoints)));
debug13(printf("sense nmismatches: %s\n",Intlist_to_string(sense_nmismatches)));
@@ -7764,6 +7800,8 @@ solve_via_segments (int *found_score, bool *completep, List_T hits, List_T middl
} else if (Intlist_length(right_endpoints_sense) == 1) {
/* Only one splice on right */
+ debug13(printf("Only one sense splice on right, which should have %d and %d mismatches\n",
+ Intlist_head(right_amb_nmismatchesi_sense),Intlist_head(right_amb_nmismatchesj_sense)));
splice_pos = Intlist_head(right_endpoints_sense);
queryend = Intlist_head(right_queryends_sense);
left = Uintlist_head(right_ambcoords_sense) - splice_pos;
@@ -7776,14 +7814,18 @@ solve_via_segments (int *found_score, bool *completep, List_T hits, List_T middl
donor_prob = Doublelist_head(right_amb_probsj_sense);
}
- sense_nmismatches = Intlist_pop(sense_nmismatches,&ignore);
- sense_nmismatches = Intlist_push(sense_nmismatches,Intlist_head(right_amb_nmismatchesi_sense));
- sense_nmismatches = Intlist_push(sense_nmismatches,Intlist_head(right_amb_nmismatchesj_sense));
- sense_lefts = Uintlist_push(sense_lefts,left);
-
- sense_endpoints = Intlist_pop(sense_endpoints,&ignore);
- sense_endpoints = Intlist_push(sense_endpoints,splice_pos);
+ if (Intlist_head(sense_endpoints) != splice_pos) {
+ Intlist_head_set(sense_nmismatches,-1);
+ sense_nmismatches = Intlist_push(sense_nmismatches,-1);
+ Intlist_head_set(sense_endpoints,splice_pos);
+ } else {
+ /* Only distal nmismatches is reliable */
+ /* Intlist_head_set(sense_nmismatches,Intlist_head(right_amb_nmismatchesi_sense)); */
+ sense_nmismatches = Intlist_push(sense_nmismatches,Intlist_head(right_amb_nmismatchesj_sense));
+ }
sense_endpoints = Intlist_push(sense_endpoints,queryend);
+
+ sense_lefts = Uintlist_push(sense_lefts,left);
sense_junctions = List_push(sense_junctions,Junction_new_splice(splice_distance,SENSE_FORWARD,
donor_prob,acceptor_prob));
@@ -7791,11 +7833,14 @@ solve_via_segments (int *found_score, bool *completep, List_T hits, List_T middl
/* Skip */
} else {
/* Ambiguous substring on right */
+ debug13(printf("Ambiguous substring on right\n"));
splice_pos = Intlist_head(right_endpoints_sense);
queryend = Intlist_head(right_queryends_sense); /* Should all be the same */
- sense_endpoints = Intlist_pop(sense_endpoints,&ignore);
- sense_endpoints = Intlist_push(sense_endpoints,splice_pos);
+ if (Intlist_head(sense_endpoints) != splice_pos) {
+ Intlist_head_set(sense_nmismatches,-1);
+ Intlist_head_set(sense_endpoints,splice_pos);
+ }
/* sense_endpoints = Intlist_push(sense_endpoints,queryend); */
if (plusp == true) {
@@ -7822,11 +7867,8 @@ solve_via_segments (int *found_score, bool *completep, List_T hits, List_T middl
} else if (Intlist_head(sense_endpoints) == querylength) {
/* Last substring already goes to the end */
} else {
- sense_endpoints = Intlist_pop(sense_endpoints,&ignore);
- sense_endpoints = Intlist_push(sense_endpoints,querylength);
-
- sense_nmismatches = Intlist_pop(sense_nmismatches,&ignore);
- sense_nmismatches = Intlist_push(sense_nmismatches,-1); /* Recalculate */
+ Intlist_head_set(sense_endpoints,querylength);
+ Intlist_head_set(sense_nmismatches,-1); /* Recalculate */
}
@@ -7839,6 +7881,8 @@ solve_via_segments (int *found_score, bool *completep, List_T hits, List_T middl
} else if (Intlist_length(right_endpoints_antisense) == 1) {
/* Only one splice on right */
+ debug13(printf("Only one antisense splice on right, which should have %d and %d mismatches\n",
+ Intlist_head(right_amb_nmismatchesi_antisense),Intlist_head(right_amb_nmismatchesj_antisense)));
splice_pos = Intlist_head(right_endpoints_antisense);
queryend = Intlist_head(right_queryends_antisense);
left = Uintlist_head(right_ambcoords_antisense) - splice_pos;
@@ -7851,14 +7895,18 @@ solve_via_segments (int *found_score, bool *completep, List_T hits, List_T middl
acceptor_prob = Doublelist_head(right_amb_probsj_antisense);
}
- antisense_nmismatches = Intlist_pop(antisense_nmismatches,&ignore);
- antisense_nmismatches = Intlist_push(antisense_nmismatches,Intlist_head(right_amb_nmismatchesi_antisense));
- antisense_nmismatches = Intlist_push(antisense_nmismatches,Intlist_head(right_amb_nmismatchesj_antisense));
- antisense_lefts = Uintlist_push(antisense_lefts,left);
-
- antisense_endpoints = Intlist_pop(antisense_endpoints,&ignore);
- antisense_endpoints = Intlist_push(antisense_endpoints,splice_pos);
+ if (Intlist_head(antisense_endpoints) != splice_pos) {
+ Intlist_head_set(antisense_nmismatches,-1);
+ antisense_nmismatches = Intlist_push(antisense_nmismatches,-1);
+ Intlist_head_set(antisense_endpoints,splice_pos);
+ } else {
+ /* Only distal nmismatches is reliable */
+ /* Intlist_head_set(antisense_nmismatches,Intlist_head(right_amb_nmismatchesi_antisense)); */
+ antisense_nmismatches = Intlist_push(antisense_nmismatches,Intlist_head(right_amb_nmismatchesj_antisense));
+ }
antisense_endpoints = Intlist_push(antisense_endpoints,queryend);
+
+ antisense_lefts = Uintlist_push(antisense_lefts,left);
antisense_junctions = List_push(antisense_junctions,Junction_new_splice(splice_distance,SENSE_ANTI,
donor_prob,acceptor_prob));
@@ -7866,11 +7914,14 @@ solve_via_segments (int *found_score, bool *completep, List_T hits, List_T middl
/* Skip */
} else {
/* Ambiguous substring on right */
+ debug13(printf("Ambiguous substring on right\n"));
splice_pos = Intlist_head(right_endpoints_antisense);
queryend = Intlist_head(right_queryends_antisense); /* Should all be the same */
- antisense_endpoints = Intlist_pop(antisense_endpoints,&ignore);
- antisense_endpoints = Intlist_push(antisense_endpoints,splice_pos);
+ if (Intlist_head(antisense_endpoints) != splice_pos) {
+ Intlist_head_set(antisense_nmismatches,-1);
+ Intlist_head_set(antisense_endpoints,splice_pos);
+ }
/* antisense_endpoints = Intlist_push(antisense_endpoints,queryend); */
if (plusp == true) {
@@ -7897,15 +7948,12 @@ solve_via_segments (int *found_score, bool *completep, List_T hits, List_T middl
} else if (Intlist_head(antisense_endpoints) == querylength) {
/* Last substring already goes to the end */
} else {
- antisense_endpoints = Intlist_pop(antisense_endpoints,&ignore);
- antisense_endpoints = Intlist_push(antisense_endpoints,querylength);
-
- antisense_nmismatches = Intlist_pop(antisense_nmismatches,&ignore);
- antisense_nmismatches = Intlist_push(antisense_nmismatches,-1); /* Recalculate */
+ Intlist_head_set(antisense_endpoints,querylength);
+ Intlist_head_set(antisense_nmismatches,-1); /* Recalculate */
}
- debug13(printf("After step 3\n"));
+ debug13(printf("After step 3 (ambiguous ends on right)\n"));
debug13(printf("sense (wrt plus): %s\n",Intlist_to_string(sense_endpoints)));
debug13(printf("antisense (wrt plus): %s\n",Intlist_to_string(antisense_endpoints)));
debug13(printf("sense nmismatches: %s\n",Intlist_to_string(sense_nmismatches)));
@@ -7921,7 +7969,7 @@ solve_via_segments (int *found_score, bool *completep, List_T hits, List_T middl
sense_junctions = List_reverse(sense_junctions);
antisense_junctions = List_reverse(antisense_junctions);
- debug13(printf("After step 4\n"));
+ debug13(printf("After step 4 (reverse alignments)\n"));
debug13(printf("sense (wrt plus): %s\n",Intlist_to_string(sense_endpoints)));
debug13(printf("antisense (wrt plus): %s\n",Intlist_to_string(antisense_endpoints)));
debug13(printf("sense nmismatches: %s\n",Intlist_to_string(sense_nmismatches)));
@@ -7932,24 +7980,30 @@ solve_via_segments (int *found_score, bool *completep, List_T hits, List_T middl
left_ambig_sense = (Substring_T) NULL;
if (circularp[chrnum] == true) {
/* Skip */
+ debug13(printf("Sense: Chrnum %d is circular, so not computing mismatches\n",chrnum));
} else if (left_endpoints_sense == NULL) {
/* Skip, but extend leftward */
+ debug13(printf("Sense: Skip, but extend leftward\n"));
if (Intlist_head(sense_endpoints) > 0) {
- sense_endpoints = Intlist_pop(sense_endpoints,&querystart);
+ querystart = Intlist_head(sense_endpoints);
if ((max_leftward = Genome_consecutive_matches_leftward(query_compress,/*left*/Uintlist_head(sense_lefts),
/*pos5*/0,/*pos3*/querystart,plusp,genestrand)) > 0) {
- sense_endpoints = Intlist_push(sense_endpoints,querystart - max_leftward);
+ Intlist_head_set(sense_endpoints,querystart - max_leftward);
+ Intlist_head_set(sense_nmismatches,-1);
} else if ((max_leftward = Genome_consecutive_matches_leftward(query_compress,/*left*/Uintlist_head(sense_lefts),
/*pos5*/0,/*pos3*/querystart-1,plusp,genestrand)) > 0) {
- sense_endpoints = Intlist_push(sense_endpoints,querystart - max_leftward - 1);
+ Intlist_head_set(sense_endpoints,querystart - max_leftward - 1);
+ Intlist_head_set(sense_nmismatches,-1);
} else {
- sense_endpoints = Intlist_push(sense_endpoints,querystart);
+ /* Keep value as querystart */
}
}
} else if (Intlist_length(left_endpoints_sense) == 1) {
/* Only one splice on left */
+ debug13(printf("Only one sense splice on left, which should have %d and %d mismatches, plusp %d\n",
+ Intlist_head(left_amb_nmismatchesi_sense),Intlist_head(left_amb_nmismatchesj_sense),plusp));
prev_left = Uintlist_head(sense_lefts);
splice_pos = Intlist_head(left_endpoints_sense);
querystart = Intlist_head(left_querystarts_sense);
@@ -7963,37 +8017,46 @@ solve_via_segments (int *found_score, bool *completep, List_T hits, List_T middl
donor_prob = Doublelist_head(left_amb_probsj_sense);
}
- sense_nmismatches = Intlist_pop(sense_nmismatches,&ignore);
- sense_nmismatches = Intlist_push(sense_nmismatches,Intlist_head(left_amb_nmismatchesi_sense));
- sense_nmismatches = Intlist_push(sense_nmismatches,Intlist_head(left_amb_nmismatchesj_sense));
- sense_lefts = Uintlist_push(sense_lefts,left);
-
- sense_endpoints = Intlist_pop(sense_endpoints,&ignore);
- sense_endpoints = Intlist_push(sense_endpoints,splice_pos);
+ if (Intlist_head(sense_endpoints) != splice_pos) {
+ Intlist_head_set(sense_nmismatches,-1);
+ sense_nmismatches = Intlist_push(sense_nmismatches,-1);
+ Intlist_head_set(sense_endpoints,splice_pos);
+ } else {
+ /* Only distal nmismatches is reliable */
+ /* Intlist_head_set(sense_nmismatches,Intlist_head(left_amb_nmismatchesj_sense)); */
+ sense_nmismatches = Intlist_push(sense_nmismatches,Intlist_head(left_amb_nmismatchesi_sense));
+ }
sense_endpoints = Intlist_push(sense_endpoints,querystart);
+
+ sense_lefts = Uintlist_push(sense_lefts,left);
sense_junctions = List_push(sense_junctions,Junction_new_splice(splice_distance,SENSE_FORWARD,
donor_prob,acceptor_prob));
} else if (Intlist_vary(left_endpoints_sense) == true) {
/* Skip, but extend leftward */
- sense_endpoints = Intlist_pop(sense_endpoints,&querystart);
+ querystart = Intlist_head(sense_endpoints);
if ((max_leftward = Genome_consecutive_matches_leftward(query_compress,/*left*/Uintlist_head(sense_lefts),
/*pos5*/0,/*pos3*/querystart,plusp,genestrand)) > 0) {
- sense_endpoints = Intlist_push(sense_endpoints,querystart - max_leftward);
+ Intlist_head_set(sense_endpoints,querystart - max_leftward);
+ Intlist_head_set(sense_nmismatches,-1);
} else if ((max_leftward = Genome_consecutive_matches_leftward(query_compress,/*left*/Uintlist_head(sense_lefts),
/*pos5*/0,/*pos3*/querystart-1,plusp,genestrand)) > 0) {
- sense_endpoints = Intlist_push(sense_endpoints,querystart - max_leftward - 1);
+ Intlist_head_set(sense_endpoints,querystart - max_leftward - 1);
+ Intlist_head_set(sense_nmismatches,-1);
} else {
- sense_endpoints = Intlist_push(sense_endpoints,querystart);
+ /* Keep value as querystart */
}
} else {
/* Ambiguous substring on left */
+ debug13(printf("Ambiguous substring on left\n"));
splice_pos = Intlist_head(left_endpoints_sense);
querystart = Intlist_head(left_querystarts_sense); /* Should all be the same */
- sense_endpoints = Intlist_pop(sense_endpoints,&ignore);
- sense_endpoints = Intlist_push(sense_endpoints,splice_pos);
+ if (Intlist_head(sense_endpoints) != splice_pos) {
+ Intlist_head_set(sense_nmismatches,-1);
+ Intlist_head_set(sense_endpoints,splice_pos);
+ }
/* sense_endpoints = Intlist_push(sense_endpoints,querystart); */
if (plusp == true) {
@@ -8017,38 +8080,43 @@ solve_via_segments (int *found_score, bool *completep, List_T hits, List_T middl
if (left_ambig_sense != NULL) {
/* Endpoints begin after ambiguous substring */
+ debug13(printf("Sense: Endpoints begin after ambiguous substring\n"));
} else if (Intlist_head(sense_endpoints) == 0) {
/* First substring already goes to the beginning */
+ debug13(printf("Sense: First substring already goes to the beginning\n"));
} else {
- sense_endpoints = Intlist_pop(sense_endpoints,&ignore);
- sense_endpoints = Intlist_push(sense_endpoints,0);
-
- sense_nmismatches = Intlist_pop(sense_nmismatches,&ignore);
- sense_nmismatches = Intlist_push(sense_nmismatches,-1); /* Recalculate */
+ Intlist_head_set(sense_endpoints,0);
+ Intlist_head_set(sense_nmismatches,-1); /* Recalculate */
}
left_ambig_antisense = (Substring_T) NULL;
if (circularp[chrnum] == true) {
/* Skip */
+ debug13(printf("Antisense: Chrnum %d is circular, so not computing mismatches\n",chrnum));
} else if (left_endpoints_antisense == NULL) {
/* Skip, but extend leftward */
+ debug13(printf("Antisense: Skip, but extend leftward\n"));
+ querystart = Intlist_head(antisense_endpoints);
if (Intlist_head(antisense_endpoints) > 0) {
- antisense_endpoints = Intlist_pop(antisense_endpoints,&querystart);
if ((max_leftward = Genome_consecutive_matches_leftward(query_compress,/*left*/Uintlist_head(antisense_lefts),
/*pos5*/0,/*pos3*/querystart,plusp,genestrand)) > 0) {
- antisense_endpoints = Intlist_push(antisense_endpoints,querystart - max_leftward);
+ Intlist_head_set(antisense_endpoints,querystart - max_leftward);
+ Intlist_head_set(antisense_nmismatches,-1);
} else if ((max_leftward = Genome_consecutive_matches_leftward(query_compress,/*left*/Uintlist_head(antisense_lefts),
/*pos5*/0,/*pos3*/querystart-1,plusp,genestrand)) > 0) {
- antisense_endpoints = Intlist_push(antisense_endpoints,querystart - max_leftward - 1);
+ Intlist_head_set(antisense_endpoints,querystart - max_leftward - 1);
+ Intlist_head_set(antisense_nmismatches,-1);
} else {
- antisense_endpoints = Intlist_push(antisense_endpoints,querystart);
+ /* Keep value as querystart */
}
}
} else if (Intlist_length(left_endpoints_antisense) == 1) {
/* Only one splice on left */
+ debug13(printf("Only one antisense splice on left, which should have %d and %d mismatches\n",
+ Intlist_head(left_amb_nmismatchesi_antisense),Intlist_head(left_amb_nmismatchesj_antisense)));
prev_left = Uintlist_head(antisense_lefts);
splice_pos = Intlist_head(left_endpoints_antisense);
querystart = Intlist_head(left_querystarts_antisense);
@@ -8062,37 +8130,46 @@ solve_via_segments (int *found_score, bool *completep, List_T hits, List_T middl
acceptor_prob = Doublelist_head(left_amb_probsj_antisense);
}
- antisense_nmismatches = Intlist_pop(antisense_nmismatches,&ignore);
- antisense_nmismatches = Intlist_push(antisense_nmismatches,Intlist_head(left_amb_nmismatchesi_antisense));
- antisense_nmismatches = Intlist_push(antisense_nmismatches,Intlist_head(left_amb_nmismatchesj_antisense));
- antisense_lefts = Uintlist_push(antisense_lefts,left);
-
- antisense_endpoints = Intlist_pop(antisense_endpoints,&ignore);
- antisense_endpoints = Intlist_push(antisense_endpoints,splice_pos);
+ if (Intlist_head(antisense_endpoints) != splice_pos) {
+ Intlist_head_set(antisense_nmismatches,-1);
+ antisense_nmismatches = Intlist_push(antisense_nmismatches,-1);
+ Intlist_head_set(antisense_endpoints,splice_pos);
+ } else {
+ /* Only distal nmismatches is reliable */
+ /* Intlist_head_set(antisense_nmismatches,Intlist_head(left_amb_nmismatchesj_antisense)); */
+ antisense_nmismatches = Intlist_push(antisense_nmismatches,Intlist_head(left_amb_nmismatchesi_antisense));
+ }
antisense_endpoints = Intlist_push(antisense_endpoints,querystart);
+
+ antisense_lefts = Uintlist_push(antisense_lefts,left);
antisense_junctions = List_push(antisense_junctions,Junction_new_splice(splice_distance,SENSE_ANTI,
donor_prob,acceptor_prob));
} else if (Intlist_vary(left_endpoints_antisense) == true) {
/* Skip, but extend leftward */
- antisense_endpoints = Intlist_pop(antisense_endpoints,&querystart);
+ querystart = Intlist_head(antisense_endpoints);
if ((max_leftward = Genome_consecutive_matches_leftward(query_compress,/*left*/Uintlist_head(antisense_lefts),
/*pos5*/0,/*pos3*/querystart,plusp,genestrand)) > 0) {
- antisense_endpoints = Intlist_push(antisense_endpoints,querystart - max_leftward);
+ Intlist_head_set(antisense_endpoints,querystart - max_leftward);
+ Intlist_head_set(antisense_endpoints,-1);
} else if ((max_leftward = Genome_consecutive_matches_leftward(query_compress,/*left*/Uintlist_head(antisense_lefts),
/*pos5*/0,/*pos3*/querystart-1,plusp,genestrand)) > 0) {
- antisense_endpoints = Intlist_push(antisense_endpoints,querystart - max_leftward - 1);
+ Intlist_head_set(antisense_endpoints,querystart - max_leftward - 1);
+ Intlist_head_set(antisense_endpoints,-1);
} else {
- antisense_endpoints = Intlist_push(antisense_endpoints,querystart);
+ /* Keep value as querystart */
}
} else {
/* Ambiguous substring on left */
+ debug13(printf("Ambiguous substring on left\n"));
splice_pos = Intlist_head(left_endpoints_antisense);
querystart = Intlist_head(left_querystarts_antisense); /* Should all be the same */
- antisense_endpoints = Intlist_pop(antisense_endpoints,&ignore);
- antisense_endpoints = Intlist_push(antisense_endpoints,splice_pos);
+ if (Intlist_head(antisense_endpoints) != splice_pos) {
+ Intlist_head_set(antisense_nmismatches,-1);
+ Intlist_head_set(antisense_endpoints,splice_pos);
+ }
/* antisense_endpoints = Intlist_push(antisense_endpoints,querystart); */
if (plusp == true) {
@@ -8116,18 +8193,17 @@ solve_via_segments (int *found_score, bool *completep, List_T hits, List_T middl
if (left_ambig_antisense != NULL) {
/* Endpoints begin after ambiguous substring */
+ debug13(printf("Antisense: Endpoints begin after ambiguous substring\n"));
} else if (Intlist_head(antisense_endpoints) == 0) {
/* First substring already goes to the beginning */
+ debug13(printf("Antisense: First substring already goes to the beginning\n"));
} else {
- antisense_endpoints = Intlist_pop(antisense_endpoints,&ignore);
- antisense_endpoints = Intlist_push(antisense_endpoints,0);
-
- antisense_nmismatches = Intlist_pop(antisense_nmismatches,&ignore);
- antisense_nmismatches = Intlist_push(antisense_nmismatches,-1); /* Recalculate */
+ Intlist_head_set(antisense_endpoints,0);
+ Intlist_head_set(antisense_nmismatches,-1); /* Recalculate */
}
- debug13(printf("After step 5\n"));
+ debug13(printf("After step 5 (ambiguous ends on left)\n"));
debug13(printf("sense (wrt plus): %s\n",Intlist_to_string(sense_endpoints)));
debug13(printf("antisense (wrt plus): %s\n",Intlist_to_string(antisense_endpoints)));
debug13(printf("sense nmismatches: %s\n",Intlist_to_string(sense_nmismatches)));
diff --git a/src/splice.c b/src/splice.c
index 0c8ee0a..59876c4 100644
--- a/src/splice.c
+++ b/src/splice.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: splice.c 196431 2016-08-16 20:19:22Z twu $";
+static char rcsid[] = "$Id: splice.c 197773 2016-09-14 00:39:12Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -49,6 +49,13 @@ static char rcsid[] = "$Id: splice.c 196431 2016-08-16 20:19:22Z twu $";
#define debug7(x)
#endif
+/* Trim novel splice ends */
+#ifdef DEBUG13
+#define debug13(x) x
+#else
+#define debug13(x)
+#endif
+
@@ -168,7 +175,7 @@ Splice_resolve_sense (int *best_knowni_i, int *best_knowni_j,
segmenti_left,segmentj_left,segmentj_left-segmenti_left,querystart,queryend));
*best_knowni_i = *best_knowni_j = -1;
- *best_nmismatches_i = *best_nmismatches_j = 0;
+ *best_nmismatches_i = *best_nmismatches_j = -1; /* Indicates that calling procedure needs to compute numbers of mismatches */
*best_prob_i = *best_prob_j = 0.0;
splice_pos_start = querystart;
@@ -450,6 +457,7 @@ Splice_resolve_sense (int *best_knowni_i, int *best_knowni_j,
return best_splice_pos;
} else {
debug1(printf("Not returning %d with probi %f and probj %f\n",best_splice_pos,*best_prob_i,*best_prob_j));
+ *best_nmismatches_i = *best_nmismatches_j = -1; /* Indicates that calling procedure needs to compute numbers of mismatches */
return -1;
}
}
@@ -508,7 +516,7 @@ Splice_resolve_antisense (int *best_knowni_i, int *best_knowni_j,
segmenti_left,segmentj_left,segmentj_left-segmenti_left,querystart,queryend));
*best_knowni_i = *best_knowni_j = -1;
- *best_nmismatches_i = *best_nmismatches_j = 0;
+ *best_nmismatches_i = *best_nmismatches_j = -1; /* Indicates that calling procedure needs to compute numbers of mismatches */
*best_prob_i = *best_prob_j = 0.0;
splice_pos_start = querystart;
@@ -789,6 +797,7 @@ Splice_resolve_antisense (int *best_knowni_i, int *best_knowni_j,
return best_splice_pos;
} else {
debug1(printf("Not returning %d with probi %f and probj %f\n",best_splice_pos,*best_prob_i,*best_prob_j));
+ *best_nmismatches_i = *best_nmismatches_j = -1; /* Indicates that calling procedure needs to compute numbers of mismatches */
return -1;
}
}
@@ -2480,3 +2489,793 @@ Splice_group_by_segmentj (int *found_score, List_T localsplicing, List_T *ambigu
return winners;
}
+
+
+#define END_SPLICESITE_PROB_MATCH 0.90
+#define END_SPLICESITE_PROB_MISMATCH 0.95
+
+/* Derived from substring_trim_novel_spliceends in substring.c, which
+ was modified from trim_novel_spliceends in stage3.c */
+/* Note: If substring does not extend to ends of query, then region
+ beyond querystart and queryend might actually be matching, and not
+ mismatches. Could fix in the future. */
+void
+Splice_trim_novel_spliceends (int *ambig_end_length_5, int *ambig_end_length_3,
+ Splicetype_T *ambig_splicetype_5, Splicetype_T *ambig_splicetype_3,
+ double *ambig_prob_5, double *ambig_prob_3, int *sensedir,
+ Univcoord_T start5, Univcoord_T middle5, Univcoord_T end5, bool solve5p,
+ Univcoord_T start3, Univcoord_T middle3, Univcoord_T end3, bool solve3p,
+ Univcoord_T genomicstart5, Univcoord_T genomicend3,
+ Univcoord_T chroffset, bool plusp) {
+
+ Univcoord_T genomicpos, start_genomicpos, middle_genomicpos, end_genomicpos;
+ Univcoord_T splice_genomepos_5, splice_genomepos_3, splice_genomepos_5_mm, splice_genomepos_3_mm;
+ double donor_prob, acceptor_prob;
+ double max_prob_5 = 0.0, max_prob_3 = 0.0,
+ max_prob_sense_forward_5 = 0.0, max_prob_sense_anti_5 = 0.0,
+ max_prob_sense_forward_3 = 0.0, max_prob_sense_anti_3 = 0.0;
+ double max_prob_5_mm = 0.0, max_prob_3_mm = 0.0,
+ max_prob_sense_forward_5_mm = 0.0, max_prob_sense_anti_5_mm = 0.0,
+ max_prob_sense_forward_3_mm = 0.0, max_prob_sense_anti_3_mm = 0.0;
+ Splicetype_T splicetype5, splicetype3, splicetype5_mm, splicetype3_mm;
+ int splice_sensedir_5, splice_sensedir_3, splice_sensedir_5_mm, splice_sensedir_3_mm;
+
+
+ debug13(printf("\nEntered Splice_trim_novel_spliceends with sensedir %d\n",*sensedir));
+ *ambig_end_length_5 = 0;
+ *ambig_end_length_3 = 0;
+ *ambig_prob_5 = 0.0;
+ *ambig_prob_3 = 0.0;
+
+#if 0
+ /* Responsibility of caller */
+ /* start is distal, end is medial */
+ if (solve3p == false) {
+ /* Skip 3' end*/
+ } else if (plusp == true) {
+ middle = substringN->alignend_trim + 1;
+ if ((start = middle + END_SPLICESITE_SEARCH) > substringN->genomicend) {
+ start = substringN->genomicend;
+ }
+ if ((end = middle - END_SPLICESITE_SEARCH) < substringN->alignstart_trim + MIN_EXON_LENGTH) {
+ end = substringN->alignstart_trim + MIN_EXON_LENGTH;
+ }
+ debug13(printf("\n1 Set end points for 3' trim to be %u..%u..%u\n",start,middle,end));
+
+ } else {
+ middle = substringN->alignend_trim - 1;
+ if ((start = middle - END_SPLICESITE_SEARCH) < substringN->genomicend) {
+ start = substringN->genomicend;
+ }
+ if ((end = middle + END_SPLICESITE_SEARCH) > substringN->alignstart_trim - MIN_EXON_LENGTH) {
+ end = substringN->alignstart_trim - MIN_EXON_LENGTH;
+ }
+ debug13(printf("\n2 Set end points for 3' trim to be %u..%u..%u\n",start,middle,end));
+ }
+#endif
+
+ if (solve3p == false) {
+ /* Skip 3' end */
+ } else if (*sensedir == SENSE_FORWARD) {
+ if (plusp) {
+ splicetype3 = splicetype3_mm = DONOR;
+
+ start_genomicpos = start3;
+ middle_genomicpos = middle3;
+ end_genomicpos = end3;
+
+ /* assert(start_genomicpos >= end_genomicpos); */
+ genomicpos = start_genomicpos;
+ while (genomicpos >= middle_genomicpos) {
+ donor_prob = Maxent_hr_donor_prob(genomicpos,chroffset); /* Case 1 */
+ debug13(printf("3', watson, sense anti %u %u %f mm\n",genomicpos,genomicpos-chroffset,donor_prob));
+ if (donor_prob > max_prob_3_mm) {
+ max_prob_3_mm = donor_prob;
+ splice_genomepos_3_mm = genomicpos;
+ }
+ genomicpos--;
+ }
+ while (genomicpos >= end_genomicpos) {
+ donor_prob = Maxent_hr_donor_prob(genomicpos,chroffset); /* Case 1 */
+ debug13(printf("3', watson, sense anti %u %u %f\n",genomicpos,genomicpos-chroffset,donor_prob));
+ if (donor_prob > max_prob_3) {
+ max_prob_3 = donor_prob;
+ splice_genomepos_3 = genomicpos;
+ }
+ genomicpos--;
+ }
+ debug13(printf("\n"));
+
+ } else {
+ splicetype3 = splicetype3_mm = ANTIDONOR;
+
+ start_genomicpos = start3;
+ middle_genomicpos = middle3;
+ end_genomicpos = end3;
+
+ /* assert(start_genomicpos <= end_genomicpos); */
+ genomicpos = start_genomicpos;
+ while (genomicpos <= middle_genomicpos) {
+ donor_prob = Maxent_hr_antidonor_prob(genomicpos,chroffset); /* Case 3 */
+ debug13(printf("3', crick, sense forward %u %u %f mm\n",genomicpos,genomicpos-chroffset,donor_prob));
+ if (donor_prob > max_prob_3_mm) {
+ max_prob_3_mm = donor_prob;
+ splice_genomepos_3_mm = genomicpos;
+ }
+ genomicpos++;
+ }
+ while (genomicpos <= end_genomicpos) {
+ donor_prob = Maxent_hr_antidonor_prob(genomicpos,chroffset); /* Case 3 */
+ debug13(printf("3', crick, sense forward %u %u %f\n",genomicpos,genomicpos-chroffset,donor_prob));
+ if (donor_prob > max_prob_3) {
+ max_prob_3 = donor_prob;
+ splice_genomepos_3 = genomicpos;
+ }
+ genomicpos++;
+ }
+ debug13(printf("\n"));
+ }
+
+ } else if (*sensedir == SENSE_ANTI) {
+ if (plusp) {
+ splicetype3 = splicetype3_mm = ANTIACCEPTOR;
+
+ start_genomicpos = start3;
+ middle_genomicpos = middle3;
+ end_genomicpos = end3;
+
+ /* assert(start_genomicpos >= end_genomicpos); */
+ genomicpos = start_genomicpos;
+ while (genomicpos >= middle_genomicpos) {
+ acceptor_prob = Maxent_hr_antiacceptor_prob(genomicpos,chroffset); /* Case 5 */
+ debug13(printf("3', watson, sense forward %u %u %f mm\n",genomicpos,genomicpos-chroffset,acceptor_prob));
+ if (acceptor_prob > max_prob_3_mm) {
+ max_prob_3_mm = acceptor_prob;
+ splice_genomepos_3_mm = genomicpos;
+ }
+ genomicpos--;
+ }
+ while (genomicpos >= end_genomicpos) {
+ acceptor_prob = Maxent_hr_antiacceptor_prob(genomicpos,chroffset); /* Case 5 */
+ debug13(printf("3', watson, sense forward %u %u %f\n",genomicpos,genomicpos-chroffset,acceptor_prob));
+ if (acceptor_prob > max_prob_3) {
+ max_prob_3 = acceptor_prob;
+ splice_genomepos_3 = genomicpos;
+ }
+ genomicpos--;
+ }
+ debug13(printf("\n"));
+
+ } else {
+ splicetype3 = splicetype3_mm = ACCEPTOR;
+
+ start_genomicpos = start3;
+ middle_genomicpos = middle3;
+ end_genomicpos = end3;
+
+ /* assert(start_genomicpos <= end_genomicpos); */
+ genomicpos = start_genomicpos;
+ while (genomicpos <= middle_genomicpos) {
+ acceptor_prob = Maxent_hr_acceptor_prob(genomicpos,chroffset); /* Case 7 */
+ debug13(printf("3', crick, sense anti %u %u %f mm\n",genomicpos,genomicpos-chroffset,acceptor_prob));
+ if (acceptor_prob > max_prob_3_mm) {
+ max_prob_3_mm = acceptor_prob;
+ splice_genomepos_3_mm = genomicpos;
+ }
+ genomicpos++;
+ }
+ while (genomicpos <= end_genomicpos) {
+ acceptor_prob = Maxent_hr_acceptor_prob(genomicpos,chroffset); /* Case 7 */
+ debug13(printf("3', crick, sense anti %u %u %f\n",genomicpos,genomicpos-chroffset,acceptor_prob));
+ if (acceptor_prob > max_prob_3) {
+ max_prob_3 = acceptor_prob;
+ splice_genomepos_3 = genomicpos;
+ }
+ genomicpos++;
+ }
+ debug13(printf("\n"));
+ }
+
+ } else {
+ if (plusp) {
+ start_genomicpos = start3;
+ middle_genomicpos = middle3;
+ end_genomicpos = end3;
+
+ /* assert(start_genomicpos >= end_genomicpos); */
+ genomicpos = start_genomicpos;
+ while (genomicpos >= middle_genomicpos) {
+ donor_prob = Maxent_hr_donor_prob(genomicpos,chroffset); /* Case 1 */
+ acceptor_prob = Maxent_hr_antiacceptor_prob(genomicpos,chroffset); /* Case 5 */
+ debug13(printf("3', watson, sense null %u %u %f %f mm\n",genomicpos,genomicpos-chroffset,donor_prob,acceptor_prob));
+ if (donor_prob > max_prob_sense_forward_3_mm) {
+ max_prob_sense_forward_3_mm = donor_prob;
+ if (donor_prob > max_prob_3_mm) {
+ max_prob_3_mm = donor_prob;
+ splice_genomepos_3_mm = genomicpos;
+ /* splice_cdna_direction_3_mm = +1; */
+ splice_sensedir_3_mm = SENSE_FORWARD;
+ splicetype3_mm = DONOR;
+ }
+ }
+ if (acceptor_prob > max_prob_sense_anti_3_mm) {
+ max_prob_sense_anti_3_mm = acceptor_prob;
+ if (acceptor_prob > max_prob_3_mm) {
+ max_prob_3_mm = acceptor_prob;
+ splice_genomepos_3_mm = genomicpos;
+ /* splice_cdna_direction_3_mm = -1; */
+ splice_sensedir_3_mm = SENSE_ANTI;
+ splicetype3_mm = ANTIACCEPTOR;
+ }
+ }
+ genomicpos--;
+ }
+ while (genomicpos >= end_genomicpos) {
+ donor_prob = Maxent_hr_donor_prob(genomicpos,chroffset); /* Case 1 */
+ acceptor_prob = Maxent_hr_antiacceptor_prob(genomicpos,chroffset); /* Case 5 */
+ debug13(printf("3', watson, sense null %u %u %f %f\n",genomicpos,genomicpos-chroffset,donor_prob,acceptor_prob));
+ if (donor_prob > max_prob_sense_forward_3) {
+ max_prob_sense_forward_3 = donor_prob;
+ if (donor_prob > max_prob_3) {
+ max_prob_3 = donor_prob;
+ splice_genomepos_3 = genomicpos;
+ /* splice_cdna_direction_3 = +1; */
+ splice_sensedir_3 = SENSE_FORWARD;
+ splicetype3 = DONOR;
+ }
+ }
+ if (acceptor_prob > max_prob_sense_anti_3) {
+ max_prob_sense_anti_3 = acceptor_prob;
+ if (acceptor_prob > max_prob_3) {
+ max_prob_3 = acceptor_prob;
+ splice_genomepos_3 = genomicpos;
+ /* splice_cdna_direction_3 = -1; */
+ splice_sensedir_3 = SENSE_ANTI;
+ splicetype3 = ANTIACCEPTOR;
+ }
+ }
+ genomicpos--;
+ }
+ debug13(printf("\n"));
+
+ } else {
+ start_genomicpos = start3;
+ middle_genomicpos = middle3;
+ end_genomicpos = end3;
+
+ /* assert(start_genomicpos <= end_genomicpos); */
+ genomicpos = start_genomicpos;
+ while (genomicpos <= middle_genomicpos) {
+ donor_prob = Maxent_hr_antidonor_prob(genomicpos,chroffset); /* Case 3 */
+ acceptor_prob = Maxent_hr_acceptor_prob(genomicpos,chroffset); /* Case 7 */
+ debug13(printf("3', crick, sense null %u %u %f %f mm\n",genomicpos,genomicpos-chroffset,donor_prob,acceptor_prob));
+ if (donor_prob > max_prob_sense_forward_3_mm) {
+ max_prob_sense_forward_3_mm = donor_prob;
+ if (donor_prob > max_prob_3_mm) {
+ max_prob_3_mm = donor_prob;
+ splice_genomepos_3_mm = genomicpos;
+ /* splice_cdna_direction_3_mm = +1; */
+ splice_sensedir_3_mm = SENSE_FORWARD;
+ splicetype3_mm = ANTIDONOR;
+ }
+ }
+ if (acceptor_prob > max_prob_sense_anti_3_mm) {
+ max_prob_sense_anti_3_mm = acceptor_prob;
+ if (acceptor_prob > max_prob_3_mm) {
+ max_prob_3_mm = acceptor_prob;
+ splice_genomepos_3_mm = genomicpos;
+ /* splice_cdna_direction_3_mm = -1; */
+ splice_sensedir_3_mm = SENSE_ANTI;
+ splicetype3_mm = ACCEPTOR;
+ }
+ }
+ genomicpos++;
+ }
+ while (genomicpos <= end_genomicpos) {
+ donor_prob = Maxent_hr_antidonor_prob(genomicpos,chroffset); /* Case 3 */
+ acceptor_prob = Maxent_hr_acceptor_prob(genomicpos,chroffset); /* Case 7 */
+ debug13(printf("3', crick, sense null %u %u %f %f\n",genomicpos,genomicpos-chroffset,donor_prob,acceptor_prob));
+ if (donor_prob > max_prob_sense_forward_3) {
+ max_prob_sense_forward_3 = donor_prob;
+ if (donor_prob > max_prob_3) {
+ max_prob_3 = donor_prob;
+ splice_genomepos_3 = genomicpos;
+ /* splice_cdna_direction_3 = +1; */
+ splice_sensedir_3 = SENSE_FORWARD;
+ splicetype3 = ANTIDONOR;
+ }
+ }
+ if (acceptor_prob > max_prob_sense_anti_3) {
+ max_prob_sense_anti_3 = acceptor_prob;
+ if (acceptor_prob > max_prob_3) {
+ max_prob_3 = acceptor_prob;
+ splice_genomepos_3 = genomicpos;
+ /* splice_cdna_direction_3 = -1; */
+ splice_sensedir_3 = SENSE_ANTI;
+ splicetype3 = ACCEPTOR;
+ }
+ }
+ genomicpos++;
+ }
+ debug13(printf("\n"));
+ }
+ }
+
+ if (solve3p == false) {
+ /* Skip 3' end */
+ } else if (*sensedir != SENSE_NULL) {
+ if (max_prob_3 > END_SPLICESITE_PROB_MATCH) {
+ debug13(printf("Found good splice %s on 3' end at %u with probability %f\n",
+ Splicetype_string(splicetype3),splice_genomepos_3-chroffset,max_prob_3));
+ if (plusp) {
+ *ambig_end_length_3 = genomicend3 - splice_genomepos_3;
+ debug13(printf("Set ambig_end_length_3 to be %d = %u - %u\n",*ambig_end_length_3,genomicend3,splice_genomepos_3));
+ } else {
+ *ambig_end_length_3 = splice_genomepos_3 - genomicend3;
+ debug13(printf("Set ambig_end_length_3 to be %d = %u - %u\n",*ambig_end_length_3,splice_genomepos_3,genomicend3));
+ }
+ *ambig_splicetype_3 = splicetype3;
+ *ambig_prob_3 = max_prob_3;
+
+ } else if (max_prob_3_mm > END_SPLICESITE_PROB_MISMATCH) {
+ debug13(printf("Found good mismatch splice %s on 3' end at %u with probability %f\n",
+ Splicetype_string(splicetype3_mm),splice_genomepos_3_mm-chroffset,max_prob_3_mm));
+ if (plusp) {
+ *ambig_end_length_3 = genomicend3 - splice_genomepos_3_mm;
+ debug13(printf("Set ambig_end_length_3 to be %d = %u - %u\n",*ambig_end_length_3,genomicend3,splice_genomepos_3_mm));
+ } else {
+ *ambig_end_length_3 = splice_genomepos_3_mm - genomicend3;
+ debug13(printf("Set ambig_end_length_3 to be %d = %u - %u\n",*ambig_end_length_3,splice_genomepos_3_mm,genomicend3));
+ }
+ *ambig_splicetype_3 = splicetype3_mm;
+ *ambig_prob_3 = max_prob_3_mm;
+ }
+ }
+
+
+#if 0
+ /* Responsibility of caller */
+ /* start is distal, end is medial */
+ if (solve5p == false) {
+ /* Skip 5' end */
+ } else if (plusp == true) {
+ middle = substring1->alignstart_trim - 1;
+ if ((start = middle - END_SPLICESITE_SEARCH) < substring1->genomicstart) {
+ start = substring1->genomicstart;
+ }
+ if ((end = middle + END_SPLICESITE_SEARCH) > substring1->alignend_trim - MIN_EXON_LENGTH) {
+ end = substring1->alignend_trim - MIN_EXON_LENGTH;
+ }
+ debug13(printf("\n1 Set end points for 5' trim to be %u..%u..%u\n",start,middle,end));
+
+ } else {
+ middle = substring1->alignstart_trim + 1;
+ if ((start = middle + END_SPLICESITE_SEARCH) > substring1->genomicstart) {
+ start = substring1->genomicstart;
+ }
+ if ((end = middle - END_SPLICESITE_SEARCH) < substring1->alignend_trim + MIN_EXON_LENGTH) {
+ end = substring1->alignend_trim + MIN_EXON_LENGTH;
+ }
+ debug13(printf("\n2 Set end points for 5' trim to be %u..%u..%u\n",start,middle,end));
+ }
+#endif
+
+ if (solve5p == false) {
+ /* Skip 5' end */
+ } else if (*sensedir == SENSE_FORWARD) {
+ if (plusp) {
+ splicetype5 = splicetype5_mm = ACCEPTOR;
+
+ start_genomicpos = start5;
+ middle_genomicpos = middle5;
+ end_genomicpos = end5;
+
+ /* assert(start_genomicpos <= end_genomicpos); */
+ genomicpos = start_genomicpos;
+ while (genomicpos <= middle_genomicpos) {
+ acceptor_prob = Maxent_hr_acceptor_prob(genomicpos,chroffset); /* Case 2 */
+ debug13(printf("5', watson, sense forward %u %u %f mm\n",genomicpos,genomicpos-chroffset,acceptor_prob));
+ if (acceptor_prob > max_prob_5_mm) {
+ max_prob_5_mm = acceptor_prob;
+ splice_genomepos_5_mm = genomicpos;
+ }
+ genomicpos++;
+ }
+ while (genomicpos <= end_genomicpos) {
+ acceptor_prob = Maxent_hr_acceptor_prob(genomicpos,chroffset); /* Case 2 */
+ debug13(printf("5', watson, sense forward %u %u %f\n",genomicpos,genomicpos-chroffset,acceptor_prob));
+ if (acceptor_prob > max_prob_5) {
+ max_prob_5 = acceptor_prob;
+ splice_genomepos_5 = genomicpos;
+ }
+ genomicpos++;
+ }
+ debug13(printf("\n"));
+
+ } else {
+ splicetype5 = splicetype5_mm = ANTIACCEPTOR;
+
+ start_genomicpos = start5;
+ middle_genomicpos = middle5;
+ end_genomicpos = end5;
+
+ /* assert(start_genomicpos >= end_genomicpos); */
+ genomicpos = start_genomicpos;
+ while (genomicpos >= middle_genomicpos) {
+ acceptor_prob = Maxent_hr_antiacceptor_prob(genomicpos,chroffset); /* Case 4 */
+ debug13(printf("5', crick, sense anti %u %u %f mm\n",genomicpos,genomicpos-chroffset,acceptor_prob));
+ if (acceptor_prob > max_prob_5_mm) {
+ max_prob_5_mm = acceptor_prob;
+ splice_genomepos_5_mm = genomicpos;
+ }
+ genomicpos--;
+ }
+ while (genomicpos >= end_genomicpos) {
+ acceptor_prob = Maxent_hr_antiacceptor_prob(genomicpos,chroffset); /* Case 4 */
+ debug13(printf("5', crick, sense anti %u %u %f\n",genomicpos,genomicpos-chroffset,acceptor_prob));
+ if (acceptor_prob > max_prob_5) {
+ max_prob_5 = acceptor_prob;
+ splice_genomepos_5 = genomicpos;
+ }
+ genomicpos--;
+ }
+ debug13(printf("\n"));
+ }
+
+ } else if (*sensedir == SENSE_ANTI) {
+ if (plusp) {
+ splicetype5 = splicetype5_mm = ANTIDONOR;
+
+ start_genomicpos = start5;
+ middle_genomicpos = middle5;
+ end_genomicpos = end5;
+
+ /* assert(start_genomicpos <= end_genomicpos); */
+ genomicpos = start_genomicpos;
+ while (genomicpos <= middle_genomicpos) {
+ donor_prob = Maxent_hr_antidonor_prob(genomicpos,chroffset); /* Case 6 */
+ debug13(printf("5', watson, sense anti %u %u %f mm\n",genomicpos,genomicpos-chroffset,donor_prob));
+ if (donor_prob > max_prob_5_mm) {
+ max_prob_5_mm = donor_prob;
+ splice_genomepos_5_mm = genomicpos;
+ }
+ genomicpos++;
+ }
+ while (genomicpos <= end_genomicpos) {
+ donor_prob = Maxent_hr_antidonor_prob(genomicpos,chroffset); /* Case 6 */
+ debug13(printf("5', watson, sense anti %u %u %f\n",genomicpos,genomicpos-chroffset,donor_prob));
+ if (donor_prob > max_prob_5) {
+ max_prob_5 = donor_prob;
+ splice_genomepos_5 = genomicpos;
+ }
+ genomicpos++;
+ }
+ debug13(printf("\n"));
+
+ } else {
+ splicetype5 = splicetype5_mm = DONOR;
+
+ start_genomicpos = start5;
+ middle_genomicpos = middle5;
+ end_genomicpos = end5;
+
+ /* assert(start_genomicpos >= end_genomicpos); */
+ genomicpos = start_genomicpos;
+ while (genomicpos >= middle_genomicpos) {
+ donor_prob = Maxent_hr_donor_prob(genomicpos,chroffset); /* Case 8 */
+ debug13(printf("5', crick, sense forward %u %u %f mm\n",genomicpos,genomicpos-chroffset,donor_prob));
+ if (donor_prob > max_prob_5_mm) {
+ max_prob_5_mm = donor_prob;
+ splice_genomepos_5_mm = genomicpos;
+ }
+ genomicpos--;
+ }
+ while (genomicpos >= end_genomicpos) {
+ donor_prob = Maxent_hr_donor_prob(genomicpos,chroffset); /* Case 8 */
+ debug13(printf("5', crick, sense forward %u %u %f\n",genomicpos,genomicpos-chroffset,donor_prob));
+ if (donor_prob > max_prob_5) {
+ max_prob_5 = donor_prob;
+ splice_genomepos_5 = genomicpos;
+ }
+ genomicpos--;
+ }
+ debug13(printf("\n"));
+ }
+
+ } else {
+ if (plusp) {
+ start_genomicpos = start5;
+ middle_genomicpos = middle5;
+ end_genomicpos = end5;
+
+ /* assert(start_genomicpos <= end_genomicpos); */
+ genomicpos = start_genomicpos;
+ while (genomicpos <= middle_genomicpos) {
+ acceptor_prob = Maxent_hr_acceptor_prob(genomicpos,chroffset); /* Case 2 */
+ donor_prob = Maxent_hr_antidonor_prob(genomicpos,chroffset); /* Case 6 */
+ debug13(printf("5', watson, sense null %u %u %f %f mm\n",genomicpos,genomicpos-chroffset,donor_prob,acceptor_prob));
+ if (acceptor_prob > max_prob_sense_forward_5_mm) {
+ max_prob_sense_forward_5_mm = acceptor_prob;
+ if (acceptor_prob > max_prob_5_mm) {
+ max_prob_5_mm = acceptor_prob;
+ splice_genomepos_5_mm = genomicpos;
+ /* splice_cdna_direction_5_mm = +1; */
+ splice_sensedir_5_mm = SENSE_FORWARD;
+ splicetype5_mm = ACCEPTOR;
+ }
+ }
+ if (donor_prob > max_prob_sense_anti_5_mm) {
+ max_prob_sense_anti_5_mm = donor_prob;
+ if (donor_prob > max_prob_5_mm) {
+ max_prob_5_mm = donor_prob;
+ splice_genomepos_5_mm = genomicpos;
+ /* splice_cdna_direction_5_mm = -1; */
+ splice_sensedir_5_mm = SENSE_ANTI;
+ splicetype5_mm = ANTIDONOR;
+ }
+ }
+ genomicpos++;
+ }
+ while (genomicpos <= end_genomicpos) {
+ acceptor_prob = Maxent_hr_acceptor_prob(genomicpos,chroffset); /* Case 2 */
+ donor_prob = Maxent_hr_antidonor_prob(genomicpos,chroffset); /* Case 6 */
+ debug13(printf("5', watson, sense null %u %u %f %f\n",genomicpos,genomicpos-chroffset,donor_prob,acceptor_prob));
+ if (acceptor_prob > max_prob_sense_forward_5) {
+ max_prob_sense_forward_5 = acceptor_prob;
+ if (acceptor_prob > max_prob_5) {
+ max_prob_5 = acceptor_prob;
+ splice_genomepos_5 = genomicpos;
+ /* splice_cdna_direction_5 = +1; */
+ splice_sensedir_5 = SENSE_FORWARD;
+ splicetype5 = ACCEPTOR;
+ }
+ }
+ if (donor_prob > max_prob_sense_anti_5) {
+ max_prob_sense_anti_5 = donor_prob;
+ if (donor_prob > max_prob_5) {
+ max_prob_5 = donor_prob;
+ splice_genomepos_5 = genomicpos;
+ /* splice_cdna_direction_5 = -1; */
+ splice_sensedir_5 = SENSE_ANTI;
+ splicetype5 = ANTIDONOR;
+ }
+ }
+ genomicpos++;
+ }
+ debug13(printf("\n"));
+
+ } else {
+ start_genomicpos = start5;
+ middle_genomicpos = middle5;
+ end_genomicpos = end5;
+
+ /* assert(start_genomicpos >= end_genomicpos); */
+ genomicpos = start_genomicpos;
+ while (genomicpos >= middle_genomicpos) {
+ acceptor_prob = Maxent_hr_antiacceptor_prob(genomicpos,chroffset); /* Case 4 */
+ donor_prob = Maxent_hr_donor_prob(genomicpos,chroffset); /* Case 8 */
+ debug13(printf("5', crick, sense null %u %u %f %f mm\n",genomicpos,genomicpos-chroffset,donor_prob,acceptor_prob));
+ if (acceptor_prob > max_prob_sense_forward_5_mm) {
+ max_prob_sense_forward_5_mm = acceptor_prob;
+ if (acceptor_prob > max_prob_5_mm) {
+ max_prob_5_mm = acceptor_prob;
+ splice_genomepos_5_mm = genomicpos;
+ /* splice_cdna_direction_5_mm = +1; */
+ splice_sensedir_5_mm = SENSE_FORWARD;
+ splicetype5_mm = ANTIACCEPTOR;
+ }
+ }
+ if (donor_prob > max_prob_sense_anti_5_mm) {
+ max_prob_sense_anti_5_mm = donor_prob;
+ if (donor_prob > max_prob_5_mm) {
+ max_prob_5_mm = donor_prob;
+ splice_genomepos_5_mm = genomicpos;
+ /* splice_cdna_direction_5_mm = -1; */
+ splice_sensedir_5_mm = SENSE_ANTI;
+ splicetype5_mm = DONOR;
+ }
+ }
+ genomicpos--;
+ }
+ while (genomicpos >= end_genomicpos) {
+ acceptor_prob = Maxent_hr_antiacceptor_prob(genomicpos,chroffset); /* Case 4 */
+ donor_prob = Maxent_hr_donor_prob(genomicpos,chroffset); /* Case 8 */
+ debug13(printf("5', crick, sense null %u %u %f %f\n",genomicpos,genomicpos-chroffset,donor_prob,acceptor_prob));
+ if (acceptor_prob > max_prob_sense_forward_5) {
+ max_prob_sense_forward_5 = acceptor_prob;
+ if (acceptor_prob > max_prob_5) {
+ max_prob_5 = acceptor_prob;
+ splice_genomepos_5 = genomicpos;
+ /* splice_cdna_direction_5 = +1; */
+ splice_sensedir_5 = SENSE_FORWARD;
+ splicetype5 = ANTIACCEPTOR;
+ }
+ }
+ if (donor_prob > max_prob_sense_anti_5) {
+ max_prob_sense_anti_5 = donor_prob;
+ if (donor_prob > max_prob_5) {
+ max_prob_5 = donor_prob;
+ splice_genomepos_5 = genomicpos;
+ /* splice_cdna_direction_5 = -1; */
+ splice_sensedir_5 = SENSE_ANTI;
+ splicetype5 = DONOR;
+ }
+ }
+ genomicpos--;
+ }
+ debug13(printf("\n"));
+ }
+ }
+
+ if (solve5p == false) {
+ /* Skip 5' end */
+ } else if (*sensedir != SENSE_NULL) {
+ if (max_prob_5 > END_SPLICESITE_PROB_MATCH) {
+ debug13(printf("Found good splice %s on 5' end at %u with probability %f\n",
+ Splicetype_string(splicetype5),splice_genomepos_5-chroffset,max_prob_5));
+ if (plusp) {
+ *ambig_end_length_5 = splice_genomepos_5 - genomicstart5;
+ debug13(printf("1 Set ambig_end_length_5 to be %d = %u - %u\n",*ambig_end_length_5,splice_genomepos_5,genomicstart5));
+ } else {
+ *ambig_end_length_5 = genomicstart5 - splice_genomepos_5;
+ debug13(printf("2 Set ambig_end_length_5 to be %d = %u - %u\n",*ambig_end_length_5,genomicstart5,splice_genomepos_5));
+ }
+ *ambig_splicetype_5 = splicetype5;
+ *ambig_prob_5 = max_prob_5;
+ } else if (max_prob_5_mm > END_SPLICESITE_PROB_MISMATCH) {
+ debug13(printf("Found good mismatch splice %s on 5' end at %u with probability %f\n",
+ Splicetype_string(splicetype5_mm),splice_genomepos_5_mm-chroffset,max_prob_5_mm));
+ if (plusp) {
+ *ambig_end_length_5 = splice_genomepos_5_mm - genomicstart5;
+ debug13(printf("3 Set ambig_end_length_5 to be %d = %u - %u\n",*ambig_end_length_5,splice_genomepos_5_mm,genomicstart5));
+ } else {
+ *ambig_end_length_5 = genomicstart5 - splice_genomepos_5_mm;
+ debug13(printf("4 Set ambig_end_length_5 to be %d = %u - %u\n",*ambig_end_length_5,genomicstart5,splice_genomepos_5_mm));
+ }
+ *ambig_splicetype_5 = splicetype5_mm;
+ *ambig_prob_5 = max_prob_5_mm;
+ }
+ }
+
+ if (*sensedir == SENSE_NULL) {
+ if (max_prob_3 >= END_SPLICESITE_PROB_MATCH || max_prob_5 >= END_SPLICESITE_PROB_MATCH) {
+ if (max_prob_3 >= END_SPLICESITE_PROB_MATCH && max_prob_5 >= END_SPLICESITE_PROB_MATCH
+ && max_prob_sense_forward_3 >= END_SPLICESITE_PROB_MATCH && max_prob_sense_anti_3 < END_SPLICESITE_PROB_MATCH
+ && max_prob_sense_forward_5 >= END_SPLICESITE_PROB_MATCH && max_prob_sense_anti_5 < END_SPLICESITE_PROB_MATCH) {
+ /* Forward sense wins on both sides */
+ if (plusp) {
+ *ambig_end_length_3 = genomicend3 - splice_genomepos_3;
+ *ambig_end_length_5 = splice_genomepos_5 - genomicstart5;
+ } else {
+ *ambig_end_length_3 = splice_genomepos_3 - genomicend3;
+ *ambig_end_length_5 = genomicstart5 - splice_genomepos_5;
+ }
+ *ambig_splicetype_3 = splicetype3;
+ *ambig_prob_3 = max_prob_3;
+ debug13(printf("Set ambig_end_length_3 to be %d\n",*ambig_end_length_3));
+ *ambig_splicetype_5 = splicetype5;
+ *ambig_prob_5 = max_prob_5;
+ debug13(printf("Set ambig_end_length_5 to be %d\n",*ambig_end_length_5));
+ *sensedir = SENSE_FORWARD; /* = splice_sensedir_3 */
+
+ } else if (max_prob_3 >= END_SPLICESITE_PROB_MATCH && max_prob_5 >= END_SPLICESITE_PROB_MATCH
+ && max_prob_sense_anti_3 >= END_SPLICESITE_PROB_MATCH && max_prob_sense_forward_3 < END_SPLICESITE_PROB_MATCH
+ && max_prob_sense_anti_5 >= END_SPLICESITE_PROB_MATCH && max_prob_sense_forward_5 < END_SPLICESITE_PROB_MATCH) {
+ /* Anti sense wins on both sides */
+ if (plusp) {
+ *ambig_end_length_3 = genomicend3 - splice_genomepos_3;
+ *ambig_end_length_5 = splice_genomepos_5 - genomicstart5;
+ } else {
+ *ambig_end_length_3 = splice_genomepos_3 - genomicend3;
+ *ambig_end_length_5 = genomicstart5 - splice_genomepos_5;
+ }
+ *ambig_splicetype_3 = splicetype3;
+ *ambig_prob_3 = max_prob_3;
+ debug13(printf("Set ambig_end_length_3 to be %d\n",*ambig_end_length_3));
+ *ambig_splicetype_5 = splicetype5;
+ *ambig_prob_5 = max_prob_5;
+ debug13(printf("Set ambig_end_length_5 to be %d\n",*ambig_end_length_5));
+ *sensedir = SENSE_ANTI; /* = splice_sensedir_3 */
+
+ } else if (max_prob_3 > max_prob_5) {
+ /* Consider just 3' end */
+ debug13(printf("Found good splice %s on 3' end at %u with probability %f\n",
+ Splicetype_string(splicetype3),splice_genomepos_3-chroffset,max_prob_3));
+ if (plusp) {
+ *ambig_end_length_3 = genomicend3 - splice_genomepos_3;
+ } else {
+ *ambig_end_length_3 = splice_genomepos_3 - genomicend3;
+ }
+ *ambig_splicetype_3 = splicetype3;
+ *ambig_prob_3 = max_prob_3;
+ /* *cdna_direction = splice_cdna_direction_3; */
+ debug13(printf("Set ambig_end_length_3 to be %d\n",*ambig_end_length_3));
+ if (max_prob_sense_forward_3 >= END_SPLICESITE_PROB_MATCH && max_prob_sense_anti_3 < END_SPLICESITE_PROB_MATCH
+ && max_prob_sense_anti_5 < END_SPLICESITE_PROB_MATCH) {
+ *sensedir = splice_sensedir_3;
+ } else if (max_prob_sense_anti_3 >= END_SPLICESITE_PROB_MATCH && max_prob_sense_forward_3 < END_SPLICESITE_PROB_MATCH
+ && max_prob_sense_forward_5 < END_SPLICESITE_PROB_MATCH) {
+ *sensedir = splice_sensedir_3;
+ } else {
+ /* Not enough evidence to set sensedir */
+ }
+
+ } else {
+ /* Consider just 5' end */
+ debug13(printf("Found good splice %s on 5' end at %u with probability %f\n",
+ Splicetype_string(splicetype5),splice_genomepos_5-chroffset,max_prob_5));
+ if (plusp) {
+ *ambig_end_length_5 = splice_genomepos_5 - genomicstart5;
+ debug13(printf("5 Set ambig_end_length_5 to be %d = %u - %u\n",*ambig_end_length_5,splice_genomepos_5,genomicstart5));
+ } else {
+ *ambig_end_length_5 = genomicstart5 - splice_genomepos_5;
+ debug13(printf("6 Set ambig_end_length_5 to be %d = %u - %u\n",*ambig_end_length_5,genomicstart5,splice_genomepos_5));
+ }
+ *ambig_splicetype_5 = splicetype5;
+ *ambig_prob_5 = max_prob_5;
+ /* *cdna_direction = splice_cdna_direction_5; */
+ if (max_prob_sense_forward_5 >= END_SPLICESITE_PROB_MATCH && max_prob_sense_anti_5 < END_SPLICESITE_PROB_MATCH
+ && max_prob_sense_anti_3 < END_SPLICESITE_PROB_MATCH) {
+ *sensedir = splice_sensedir_5;
+ } else if (max_prob_sense_anti_5 >= END_SPLICESITE_PROB_MATCH && max_prob_sense_forward_5 < END_SPLICESITE_PROB_MATCH
+ && max_prob_sense_forward_3 < END_SPLICESITE_PROB_MATCH) {
+ *sensedir = splice_sensedir_5;
+ } else {
+ /* Not enough evidence to set sensedir */
+ }
+ }
+
+ } else if (max_prob_3_mm >= END_SPLICESITE_PROB_MISMATCH || max_prob_5_mm >= END_SPLICESITE_PROB_MISMATCH) {
+ if (max_prob_3_mm > max_prob_5_mm) {
+ debug13(printf("Found good mismatch splice %s on 3' end at %u with probability %f\n",
+ Splicetype_string(splicetype3_mm),splice_genomepos_3_mm-chroffset,max_prob_3_mm));
+ if (plusp) {
+ *ambig_end_length_3 = genomicend3 - splice_genomepos_3_mm;
+ debug13(printf("Set ambig_end_length_3 to be %d = %u - %u\n",*ambig_end_length_3,genomicend3,splice_genomepos_3_mm));
+ } else {
+ *ambig_end_length_3 = splice_genomepos_3_mm - genomicend3;
+ debug13(printf("Set ambig_end_length_3 to be %d = %u - %u\n",*ambig_end_length_3,splice_genomepos_3_mm,genomicend3));
+ }
+ *ambig_splicetype_3 = splicetype3_mm;
+ *ambig_prob_3 = max_prob_3_mm;
+ /* *cdna_direction = splice_cdna_direction_3_mm; */
+ if (max_prob_sense_forward_3_mm >= END_SPLICESITE_PROB_MISMATCH && max_prob_sense_anti_3_mm < END_SPLICESITE_PROB_MISMATCH
+ && max_prob_sense_anti_5_mm < END_SPLICESITE_PROB_MISMATCH) {
+ *sensedir = splice_sensedir_3_mm;
+ } else if (max_prob_sense_anti_3_mm >= END_SPLICESITE_PROB_MISMATCH && max_prob_sense_forward_3_mm < END_SPLICESITE_PROB_MISMATCH
+ && max_prob_sense_forward_5_mm < END_SPLICESITE_PROB_MISMATCH) {
+ *sensedir = splice_sensedir_3_mm;
+ } else {
+ /* Not enough evidence to set sensedir */
+ }
+ } else {
+ debug13(printf("Found good mismatch splice %s on 5' end at %u with probability %f\n",
+ Splicetype_string(splicetype5_mm),splice_genomepos_5_mm-chroffset,max_prob_5_mm));
+ if (plusp) {
+ *ambig_end_length_5 = splice_genomepos_5_mm - genomicstart5;
+ debug13(printf("7 Set ambig_end_length_5 to be %d = %u - %u\n",*ambig_end_length_5,splice_genomepos_5_mm,genomicstart5));
+ } else {
+ *ambig_end_length_5 = genomicstart5 - splice_genomepos_5_mm;
+ debug13(printf("8 Set ambig_end_length_5 to be %d = %u - %u\n",*ambig_end_length_5,genomicstart5,splice_genomepos_5_mm));
+ }
+ *ambig_splicetype_5 = splicetype5_mm;
+ *ambig_prob_5 = max_prob_5_mm;
+ /* *cdna_direction = splice_cdna_direction_5_mm; */
+ if (max_prob_sense_forward_5_mm >= END_SPLICESITE_PROB_MISMATCH && max_prob_sense_anti_5_mm < END_SPLICESITE_PROB_MISMATCH
+ && max_prob_sense_anti_3_mm < END_SPLICESITE_PROB_MISMATCH) {
+ *sensedir = splice_sensedir_5_mm;
+ } else if (max_prob_sense_anti_5_mm >= END_SPLICESITE_PROB_MISMATCH && max_prob_sense_forward_5_mm < END_SPLICESITE_PROB_MISMATCH
+ && max_prob_sense_forward_3_mm < END_SPLICESITE_PROB_MISMATCH) {
+ *sensedir = splice_sensedir_5_mm;
+ } else {
+ /* Not enough evidence to set sensedir */
+ }
+ }
+ }
+ }
+
+ debug13(printf("Returning ambig_end_length_5 %d and ambig_end_length_3 %d, probs %f and %f\n",
+ *ambig_end_length_5,*ambig_end_length_3,*ambig_prob_5,*ambig_prob_3));
+ return;
+}
+
diff --git a/src/splice.h b/src/splice.h
index b7a1a7b..8ab3631 100644
--- a/src/splice.h
+++ b/src/splice.h
@@ -1,4 +1,4 @@
-/* $Id: splice.h 184446 2016-02-17 21:19:48Z twu $ */
+/* $Id: splice.h 197773 2016-09-14 00:39:12Z twu $ */
#ifndef SPLICE_INCLUDED
#define SPLICE_INCLUDED
#include "bool.h"
@@ -6,6 +6,7 @@
#include "chrnum.h"
#include "genomicpos.h"
#include "compress.h"
+#include "splicetrie_build.h" /* For Splicetype_T */
extern void
Splice_setup (int min_shortend_in);
@@ -94,5 +95,14 @@ extern List_T
Splice_group_by_segmentj (int *found_score, List_T localsplicing, List_T *ambiguous,
int querylength, bool first_read_p, bool sarrayp);
+extern void
+Splice_trim_novel_spliceends (int *ambig_end_length_5, int *ambig_end_length_3,
+ Splicetype_T *ambig_splicetype_5, Splicetype_T *ambig_splicetype_3,
+ double *ambig_prob_5, double *ambig_prob_3, int *sensedir,
+ Univcoord_T start5, Univcoord_T middle5, Univcoord_T end5, bool solve5p,
+ Univcoord_T start3, Univcoord_T middle3, Univcoord_T end3, bool solve3p,
+ Univcoord_T genomicstart5, Univcoord_T genomicend3,
+ Univcoord_T chroffset, bool plusp);
+
#endif
diff --git a/src/stage1hr.c b/src/stage1hr.c
index 8e2cf43..a9b5350 100644
--- a/src/stage1hr.c
+++ b/src/stage1hr.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: stage1hr.c 196433 2016-08-16 20:20:51Z twu $";
+static char rcsid[] = "$Id: stage1hr.c 197776 2016-09-14 00:42:27Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -7977,6 +7977,7 @@ find_singlesplices_plus (int *found_score, List_T hits, List_T *ambiguous, List_
#endif
Intlist_T amb_knowni, amb_nmismatches;
Doublelist_T amb_probs;
+ Stage3end_T splice;
debug4s(printf("*** Starting find_singlesplices_plus on %d spliceable segments ***\n",plus_nspliceable));
@@ -8290,17 +8291,18 @@ find_singlesplices_plus (int *found_score, List_T hits, List_T *ambiguous, List_
nmismatches_acceptor = best_nmismatches - Substring_nmismatches_whole(donor);
donor_prob = Junction_donor_prob(Stage3end_junctionA(hit));
prob = best_prob - donor_prob;
- *ambiguous = List_push(*ambiguous,
- (void *) Stage3end_new_splice(&(*found_score),
- /*nmismatches_donor*/Substring_nmismatches_whole(donor),nmismatches_acceptor,
- donor,/*acceptor*/NULL,donor_prob,/*acceptor_prob*/prob,/*distance*/0U,
- /*shortdistancep*/false,/*penalty*/0,querylength,
- /*ambcoords_donor*/NULL,ambcoords,
- /*amb_knowni_donor*/NULL,amb_knowni,
- /*amb_nmismatches_donor*/NULL,amb_nmismatches,
- /*amb_probs_donor*/NULL,amb_probs,
- /*copy_donor_p*/true,/*copy_acceptor_p*/false,first_read_p,
- Stage3end_sensedir(hit),/*sarrayp*/false));
+ if ((splice = Stage3end_new_splice(&(*found_score),
+ /*nmismatches_donor*/Substring_nmismatches_whole(donor),nmismatches_acceptor,
+ donor,/*acceptor*/NULL,donor_prob,/*acceptor_prob*/prob,/*distance*/0U,
+ /*shortdistancep*/false,/*penalty*/0,querylength,
+ /*ambcoords_donor*/NULL,ambcoords,
+ /*amb_knowni_donor*/NULL,amb_knowni,
+ /*amb_nmismatches_donor*/NULL,amb_nmismatches,
+ /*amb_probs_donor*/NULL,amb_probs,
+ /*copy_donor_p*/true,/*copy_acceptor_p*/false,first_read_p,
+ Stage3end_sensedir(hit),/*sarrayp*/false)) != NULL) {
+ *ambiguous = List_push(*ambiguous,(void *) splice);
+ }
Doublelist_free(&amb_probs);
Intlist_free(&amb_nmismatches);
Intlist_free(&amb_knowni);
@@ -8361,17 +8363,18 @@ find_singlesplices_plus (int *found_score, List_T hits, List_T *ambiguous, List_
nmismatches_donor = best_nmismatches - Substring_nmismatches_whole(acceptor);
acceptor_prob = Junction_acceptor_prob(Stage3end_junctionD(hit));
prob = best_prob - acceptor_prob;
- *ambiguous = List_push(*ambiguous,
- (void *) Stage3end_new_splice(&(*found_score),
- nmismatches_donor,/*nmismatches_acceptor*/Substring_nmismatches_whole(acceptor),
- /*donor*/NULL,acceptor,/*donor_prob*/prob,acceptor_prob,/*distance*/0U,
- /*shortdistancep*/false,/*penalty*/0,querylength,
- ambcoords,/*ambcoords_acceptor*/NULL,
- amb_knowni,/*amb_knowni_acceptor*/NULL,
- amb_nmismatches,/*amb_nmismatches_acceptor*/NULL,
- amb_probs,/*amb_probs_acceptor*/NULL,
- /*copy_donor_p*/false,/*copy_acceptor_p*/true,first_read_p,
- Stage3end_sensedir(hit),/*sarrayp*/false));
+ if ((splice = Stage3end_new_splice(&(*found_score),
+ nmismatches_donor,/*nmismatches_acceptor*/Substring_nmismatches_whole(acceptor),
+ /*donor*/NULL,acceptor,/*donor_prob*/prob,acceptor_prob,/*distance*/0U,
+ /*shortdistancep*/false,/*penalty*/0,querylength,
+ ambcoords,/*ambcoords_acceptor*/NULL,
+ amb_knowni,/*amb_knowni_acceptor*/NULL,
+ amb_nmismatches,/*amb_nmismatches_acceptor*/NULL,
+ amb_probs,/*amb_probs_acceptor*/NULL,
+ /*copy_donor_p*/false,/*copy_acceptor_p*/true,first_read_p,
+ Stage3end_sensedir(hit),/*sarrayp*/false)) != NULL) {
+ *ambiguous = List_push(*ambiguous,(void *) splice);
+ }
Doublelist_free(&amb_probs);
Intlist_free(&amb_nmismatches);
Intlist_free(&amb_knowni);
@@ -8527,17 +8530,18 @@ find_singlesplices_plus (int *found_score, List_T hits, List_T *ambiguous, List_
nmismatches_acceptor = best_nmismatches - Substring_nmismatches_whole(donor);
donor_prob = Junction_donor_prob(Stage3end_junctionA(hit));
prob = best_prob - donor_prob;
- *ambiguous = List_push(*ambiguous,
- (void *) Stage3end_new_splice(&(*found_score),
- /*nmismatches_donor*/Substring_nmismatches_whole(donor),nmismatches_acceptor,
- donor,/*acceptor*/NULL,donor_prob,/*acceptor_prob*/prob,/*distance*/0U,
- /*shortdistancep*/false,/*penalty*/0,querylength,
- /*ambcoords_donor*/NULL,ambcoords,
- /*amb_knowni_donor*/NULL,amb_knowni,
- /*amb_nmismatches_donort*/NULL,amb_nmismatches,
- /*amb_probs_donor*/NULL,amb_probs,
- /*copy_donor_p*/true,/*copy_acceptor_p*/false,first_read_p,
- Stage3end_sensedir(hit),/*sarrayp*/false));
+ if ((splice = Stage3end_new_splice(&(*found_score),
+ /*nmismatches_donor*/Substring_nmismatches_whole(donor),nmismatches_acceptor,
+ donor,/*acceptor*/NULL,donor_prob,/*acceptor_prob*/prob,/*distance*/0U,
+ /*shortdistancep*/false,/*penalty*/0,querylength,
+ /*ambcoords_donor*/NULL,ambcoords,
+ /*amb_knowni_donor*/NULL,amb_knowni,
+ /*amb_nmismatches_donort*/NULL,amb_nmismatches,
+ /*amb_probs_donor*/NULL,amb_probs,
+ /*copy_donor_p*/true,/*copy_acceptor_p*/false,first_read_p,
+ Stage3end_sensedir(hit),/*sarrayp*/false)) != NULL) {
+ *ambiguous = List_push(*ambiguous,(void *) splice);
+ }
Doublelist_free(&amb_probs);
Intlist_free(&amb_nmismatches);
Intlist_free(&amb_knowni);
@@ -8598,17 +8602,18 @@ find_singlesplices_plus (int *found_score, List_T hits, List_T *ambiguous, List_
nmismatches_donor = best_nmismatches - Substring_nmismatches_whole(acceptor);
acceptor_prob = Junction_acceptor_prob(Stage3end_junctionD(hit));
prob = best_prob - acceptor_prob;
- *ambiguous = List_push(*ambiguous,
- (void *) Stage3end_new_splice(&(*found_score),
- nmismatches_donor,/*nmismatches_acceptor*/Substring_nmismatches_whole(acceptor),
- /*donor*/NULL,acceptor,/*donor_prob*/prob,acceptor_prob,/*distance*/0U,
- /*shortdistancep*/false,/*penalty*/0,querylength,
- ambcoords,/*ambcoords_acceptor*/NULL,
- amb_knowni,/*amb_knowni_acceptor*/NULL,
- amb_nmismatches,/*amb_nmismatches_acceptor*/NULL,
- amb_probs,/*amb_probs_acceptor*/NULL,
- /*copy_donor_p*/false,/*copy_acceptor_p*/true,first_read_p,
- Stage3end_sensedir(hit),/*sarrayp*/false));
+ if ((splice = Stage3end_new_splice(&(*found_score),
+ nmismatches_donor,/*nmismatches_acceptor*/Substring_nmismatches_whole(acceptor),
+ /*donor*/NULL,acceptor,/*donor_prob*/prob,acceptor_prob,/*distance*/0U,
+ /*shortdistancep*/false,/*penalty*/0,querylength,
+ ambcoords,/*ambcoords_acceptor*/NULL,
+ amb_knowni,/*amb_knowni_acceptor*/NULL,
+ amb_nmismatches,/*amb_nmismatches_acceptor*/NULL,
+ amb_probs,/*amb_probs_acceptor*/NULL,
+ /*copy_donor_p*/false,/*copy_acceptor_p*/true,first_read_p,
+ Stage3end_sensedir(hit),/*sarrayp*/false)) != NULL) {
+ *ambiguous = List_push(*ambiguous,(void *) splice);
+ }
Doublelist_free(&amb_probs);
Intlist_free(&amb_nmismatches);
Intlist_free(&amb_knowni);
@@ -8759,6 +8764,7 @@ find_singlesplices_minus (int *found_score, List_T hits, List_T *ambiguous, List
#endif
Intlist_T amb_knowni, amb_nmismatches;
Doublelist_T amb_probs;
+ Stage3end_T splice;
debug4s(printf("*** Starting find_singlesplices_minus on %d spliceable segments ***\n",minus_nspliceable));
@@ -9071,17 +9077,18 @@ find_singlesplices_minus (int *found_score, List_T hits, List_T *ambiguous, List
nmismatches_acceptor = best_nmismatches - Substring_nmismatches_whole(donor);
donor_prob = Junction_donor_prob(Stage3end_junctionA(hit));
prob = best_prob - donor_prob;
- *ambiguous = List_push(*ambiguous,
- (void *) Stage3end_new_splice(&(*found_score),
- /*nmismatches_donor*/Substring_nmismatches_whole(donor),nmismatches_acceptor,
- donor,/*acceptor*/NULL,donor_prob,/*acceptor_prob*/prob,/*distance*/0U,
- /*shortdistancep*/false,/*penalty*/0,querylength,
- /*ambcoords_donor*/NULL,ambcoords,
- /*amb_knowni_donor*/NULL,amb_knowni,
- /*amb_nmismatches_donort*/NULL,amb_nmismatches,
- /*amb_probs_donor*/NULL,amb_probs,
- /*copy_donor_p*/true,/*copy_acceptor_p*/false,first_read_p,
- Stage3end_sensedir(hit),/*sarrayp*/false));
+ if ((splice = Stage3end_new_splice(&(*found_score),
+ /*nmismatches_donor*/Substring_nmismatches_whole(donor),nmismatches_acceptor,
+ donor,/*acceptor*/NULL,donor_prob,/*acceptor_prob*/prob,/*distance*/0U,
+ /*shortdistancep*/false,/*penalty*/0,querylength,
+ /*ambcoords_donor*/NULL,ambcoords,
+ /*amb_knowni_donor*/NULL,amb_knowni,
+ /*amb_nmismatches_donort*/NULL,amb_nmismatches,
+ /*amb_probs_donor*/NULL,amb_probs,
+ /*copy_donor_p*/true,/*copy_acceptor_p*/false,first_read_p,
+ Stage3end_sensedir(hit),/*sarrayp*/false)) != NULL) {
+ *ambiguous = List_push(*ambiguous,(void *) splice);
+ }
Doublelist_free(&amb_probs);
Intlist_free(&amb_nmismatches);
Intlist_free(&amb_knowni);
@@ -9142,17 +9149,18 @@ find_singlesplices_minus (int *found_score, List_T hits, List_T *ambiguous, List
nmismatches_donor = best_nmismatches - Substring_nmismatches_whole(acceptor);
acceptor_prob = Junction_acceptor_prob(Stage3end_junctionD(hit));
prob = best_prob - acceptor_prob;
- *ambiguous = List_push(*ambiguous,
- (void *) Stage3end_new_splice(&(*found_score),
- nmismatches_donor,/*nmismatches_acceptor*/Substring_nmismatches_whole(acceptor),
- /*donor*/NULL,acceptor,/*donor_prob*/prob,acceptor_prob,/*distance*/0U,
- /*shortdistancep*/false,/*penalty*/0,querylength,
- ambcoords,/*ambcoords_acceptor*/NULL,
- amb_knowni,/*amb_knowni_acceptor*/NULL,
- amb_nmismatches,/*amb_nmismatches_acceptor*/NULL,
- amb_probs,/*amb_probs_acceptor*/NULL,
- /*copy_donor_p*/false,/*copy_acceptor_p*/true,first_read_p,
- Stage3end_sensedir(hit),/*sarrayp*/false));
+ if ((splice = Stage3end_new_splice(&(*found_score),
+ nmismatches_donor,/*nmismatches_acceptor*/Substring_nmismatches_whole(acceptor),
+ /*donor*/NULL,acceptor,/*donor_prob*/prob,acceptor_prob,/*distance*/0U,
+ /*shortdistancep*/false,/*penalty*/0,querylength,
+ ambcoords,/*ambcoords_acceptor*/NULL,
+ amb_knowni,/*amb_knowni_acceptor*/NULL,
+ amb_nmismatches,/*amb_nmismatches_acceptor*/NULL,
+ amb_probs,/*amb_probs_acceptor*/NULL,
+ /*copy_donor_p*/false,/*copy_acceptor_p*/true,first_read_p,
+ Stage3end_sensedir(hit),/*sarrayp*/false)) != NULL) {
+ *ambiguous = List_push(*ambiguous,(void *) splice);
+ }
Doublelist_free(&amb_probs);
Intlist_free(&amb_nmismatches);
Intlist_free(&amb_knowni);
@@ -9308,17 +9316,18 @@ find_singlesplices_minus (int *found_score, List_T hits, List_T *ambiguous, List
nmismatches_acceptor = best_nmismatches - Substring_nmismatches_whole(donor);
donor_prob = Junction_donor_prob(Stage3end_junctionA(hit));
prob = best_prob - donor_prob;
- *ambiguous = List_push(*ambiguous,
- (void *) Stage3end_new_splice(&(*found_score),
- /*nmismatches_donor*/Substring_nmismatches_whole(donor),nmismatches_acceptor,
- donor,/*acceptor*/NULL,donor_prob,/*acceptor_prob*/prob,/*distance*/0U,
- /*shortdistancep*/false,/*penalty*/0,querylength,
- /*ambcoords_donor*/NULL,ambcoords,
- /*amb_knowni_donor*/NULL,amb_knowni,
- /*amb_nmismatches_donort*/NULL,amb_nmismatches,
- /*amb_probs_donor*/NULL,amb_probs,
- /*copy_donor_p*/true,/*copy_acceptor_p*/false,first_read_p,
- Stage3end_sensedir(hit),/*sarrayp*/false));
+ if ((splice = Stage3end_new_splice(&(*found_score),
+ /*nmismatches_donor*/Substring_nmismatches_whole(donor),nmismatches_acceptor,
+ donor,/*acceptor*/NULL,donor_prob,/*acceptor_prob*/prob,/*distance*/0U,
+ /*shortdistancep*/false,/*penalty*/0,querylength,
+ /*ambcoords_donor*/NULL,ambcoords,
+ /*amb_knowni_donor*/NULL,amb_knowni,
+ /*amb_nmismatches_donort*/NULL,amb_nmismatches,
+ /*amb_probs_donor*/NULL,amb_probs,
+ /*copy_donor_p*/true,/*copy_acceptor_p*/false,first_read_p,
+ Stage3end_sensedir(hit),/*sarrayp*/false)) != NULL) {
+ *ambiguous = List_push(*ambiguous,(void *) splice);
+ }
Doublelist_free(&amb_probs);
Intlist_free(&amb_nmismatches);
Intlist_free(&amb_knowni);
@@ -9378,17 +9387,18 @@ find_singlesplices_minus (int *found_score, List_T hits, List_T *ambiguous, List
nmismatches_donor = best_nmismatches - Substring_nmismatches_whole(acceptor);
acceptor_prob = Junction_acceptor_prob(Stage3end_junctionD(hit));
prob = best_prob - acceptor_prob;
- *ambiguous = List_push(*ambiguous,
- (void *) Stage3end_new_splice(&(*found_score),
- nmismatches_donor,/*nmismatches_acceptor*/Substring_nmismatches_whole(acceptor),
- /*donor*/NULL,acceptor,/*donor_prob*/prob,acceptor_prob,/*distance*/0U,
- /*shortdistancep*/false,/*penalty*/0,querylength,
- ambcoords,/*ambcoords_acceptor*/NULL,
- amb_knowni,/*amb_knowni_acceptor*/NULL,
- amb_nmismatches,/*amb_nmismatches_acceptor*/NULL,
- amb_probs,/*amb_probs_acceptor*/NULL,
- /*copy_donor_p*/false,/*copy_acceptor_p*/true,first_read_p,
- Stage3end_sensedir(hit),/*sarrayp*/false));
+ if ((splice = Stage3end_new_splice(&(*found_score),
+ nmismatches_donor,/*nmismatches_acceptor*/Substring_nmismatches_whole(acceptor),
+ /*donor*/NULL,acceptor,/*donor_prob*/prob,acceptor_prob,/*distance*/0U,
+ /*shortdistancep*/false,/*penalty*/0,querylength,
+ ambcoords,/*ambcoords_acceptor*/NULL,
+ amb_knowni,/*amb_knowni_acceptor*/NULL,
+ amb_nmismatches,/*amb_nmismatches_acceptor*/NULL,
+ amb_probs,/*amb_probs_acceptor*/NULL,
+ /*copy_donor_p*/false,/*copy_acceptor_p*/true,first_read_p,
+ Stage3end_sensedir(hit),/*sarrayp*/false)) != NULL) {
+ *ambiguous = List_push(*ambiguous,(void *) splice);
+ }
Doublelist_free(&amb_probs);
Intlist_free(&amb_nmismatches);
Intlist_free(&amb_knowni);
@@ -12377,6 +12387,7 @@ find_splicepairs_distant_dna (int *found_score, int *ndistantsplicepairs,
bool shortdistancep;
double nonidentity = 1.0 - min_distantsplicing_identity;
Chrnum_T chrnum;
+ Stage3end_T splice;
debug(printf("Starting find_splicepairs_distant_dna with nonidentity %f\n",nonidentity));
debug4l(printf("Starting find_splicepairs_distant_dna with nonidentity %f\n",nonidentity));
@@ -12470,28 +12481,30 @@ find_splicepairs_distant_dna (int *found_score, int *ndistantsplicepairs,
(unsigned long long) Substring_genomicstart(endfrag),shortdistancep));
if (shortdistancep) {
- *localsplicing = List_push(*localsplicing,
- (void *) Stage3end_new_splice(&(*found_score),nmismatches1,nmismatches2,
- startfrag,endfrag,/*donor_prob*/0.0,/*acceptor_prob*/0.0,distance,
- /*shortdistancep*/true,localsplicing_penalty,querylength,
- /*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
- /*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
- /*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
- /*amb_probs_donor*/NULL,/*amb_probs_acceptor*/NULL,
- /*copy_donor_p*/true,/*copy_acceptor_p*/true,first_read_p,
- /*sensedir*/SENSE_NULL,/*sarrayp*/false));
+ if ((splice = Stage3end_new_splice(&(*found_score),nmismatches1,nmismatches2,
+ startfrag,endfrag,/*donor_prob*/0.0,/*acceptor_prob*/0.0,distance,
+ /*shortdistancep*/true,localsplicing_penalty,querylength,
+ /*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
+ /*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
+ /*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
+ /*amb_probs_donor*/NULL,/*amb_probs_acceptor*/NULL,
+ /*copy_donor_p*/true,/*copy_acceptor_p*/true,first_read_p,
+ /*sensedir*/SENSE_NULL,/*sarrayp*/false)) != NULL) {
+ *localsplicing = List_push(*localsplicing,(void *) splice);
+ }
} else if (*ndistantsplicepairs <= MAXCHIMERAPATHS) {
- distantsplicing = List_push(distantsplicing,
- (void *) Stage3end_new_splice(&(*found_score),nmismatches1,nmismatches2,
- startfrag,endfrag,/*donor_prob*/0.0,/*acceptor_prob*/0.0,distance,
- /*shortdistancep*/false,distantsplicing_penalty,querylength,
- /*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
- /*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
- /*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
- /*amb_probs_donor*/NULL,/*amb_probs_acceptor*/NULL,
- /*copy_donor_p*/true,/*copy_acceptor_p*/true,first_read_p,
- /*sensedir*/SENSE_NULL,/*sarrayp*/false));
- (*ndistantsplicepairs)++;
+ if ((splice = Stage3end_new_splice(&(*found_score),nmismatches1,nmismatches2,
+ startfrag,endfrag,/*donor_prob*/0.0,/*acceptor_prob*/0.0,distance,
+ /*shortdistancep*/false,distantsplicing_penalty,querylength,
+ /*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
+ /*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
+ /*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
+ /*amb_probs_donor*/NULL,/*amb_probs_acceptor*/NULL,
+ /*copy_donor_p*/true,/*copy_acceptor_p*/true,first_read_p,
+ /*sensedir*/SENSE_NULL,/*sarrayp*/false)) != NULL) {
+ distantsplicing = List_push(distantsplicing,(void *) splice);
+ (*ndistantsplicepairs)++;
+ }
}
}
@@ -12563,28 +12576,30 @@ find_splicepairs_distant_dna (int *found_score, int *ndistantsplicepairs,
(unsigned long long) Substring_genomicstart(startfrag),
(unsigned long long) Substring_genomicstart(endfrag),shortdistancep));
if (shortdistancep) {
- *localsplicing = List_push(*localsplicing,
- (void *) Stage3end_new_splice(&(*found_score),nmismatches1,nmismatches2,
- startfrag,endfrag,/*donor_prob*/0.0,/*acceptor_prob*/0.0,distance,
- /*shortdistancep*/true,localsplicing_penalty,querylength,
- /*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
- /*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
- /*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
- /*amb_probs_donor*/NULL,/*amb_probs_acceptor*/NULL,
- /*copy_donor_p*/true,/*copy_acceptor_p*/true,first_read_p,
- /*sensedir*/SENSE_NULL,/*sarrayp*/false));
+ if ((splice = Stage3end_new_splice(&(*found_score),nmismatches1,nmismatches2,
+ startfrag,endfrag,/*donor_prob*/0.0,/*acceptor_prob*/0.0,distance,
+ /*shortdistancep*/true,localsplicing_penalty,querylength,
+ /*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
+ /*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
+ /*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
+ /*amb_probs_donor*/NULL,/*amb_probs_acceptor*/NULL,
+ /*copy_donor_p*/true,/*copy_acceptor_p*/true,first_read_p,
+ /*sensedir*/SENSE_NULL,/*sarrayp*/false)) != NULL) {
+ *localsplicing = List_push(*localsplicing,(void *) splice);
+ }
} else if (*ndistantsplicepairs <= MAXCHIMERAPATHS) {
- distantsplicing = List_push(distantsplicing,
- (void *) Stage3end_new_splice(&(*found_score),nmismatches1,nmismatches2,
- startfrag,endfrag,/*donor_prob*/0.0,/*acceptor_prob*/0.0,distance,
- /*shortdistancep*/false,distantsplicing_penalty,querylength,
- /*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
- /*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
- /*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
- /*amb_probs_donor*/NULL,/*amb_probs_acceptor*/NULL,
- /*copy_donor_p*/true,/*copy_acceptor_p*/true,first_read_p,
- /*sensedir*/SENSE_NULL,/*sarrayp*/false));
- (*ndistantsplicepairs)++;
+ if ((splice = Stage3end_new_splice(&(*found_score),nmismatches1,nmismatches2,
+ startfrag,endfrag,/*donor_prob*/0.0,/*acceptor_prob*/0.0,distance,
+ /*shortdistancep*/false,distantsplicing_penalty,querylength,
+ /*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
+ /*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
+ /*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
+ /*amb_probs_donor*/NULL,/*amb_probs_acceptor*/NULL,
+ /*copy_donor_p*/true,/*copy_acceptor_p*/true,first_read_p,
+ /*sensedir*/SENSE_NULL,/*sarrayp*/false)) != NULL) {
+ distantsplicing = List_push(distantsplicing,(void *) splice);
+ (*ndistantsplicepairs)++;
+ }
}
}
q = q->rest;
@@ -12649,17 +12664,18 @@ find_splicepairs_distant_dna (int *found_score, int *ndistantsplicepairs,
pos,min_endlength_1,querylength-min_endlength_2,
(unsigned long long) Substring_genomicstart(startfrag),
(unsigned long long) Substring_genomicstart(endfrag)));
- distantsplicing = List_push(distantsplicing,
- (void *) Stage3end_new_splice(&(*found_score),nmismatches1,nmismatches2,
- startfrag,endfrag,/*donor_prob*/0.0,/*acceptor_prob*/0.0,distance,
- /*shortdistancep*/false,distantsplicing_penalty,querylength,
- /*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
- /*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
- /*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
- /*amb_probs_donor*/NULL,/*amb_probs_acceptor*/NULL,
- /*copy_donor_p*/true,/*copy_acceptor_p*/true,first_read_p,
- /*sensedir*/SENSE_NULL,/*sarrayp*/false));
- (*ndistantsplicepairs)++;
+ if ((splice = Stage3end_new_splice(&(*found_score),nmismatches1,nmismatches2,
+ startfrag,endfrag,/*donor_prob*/0.0,/*acceptor_prob*/0.0,distance,
+ /*shortdistancep*/false,distantsplicing_penalty,querylength,
+ /*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
+ /*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
+ /*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
+ /*amb_probs_donor*/NULL,/*amb_probs_acceptor*/NULL,
+ /*copy_donor_p*/true,/*copy_acceptor_p*/true,first_read_p,
+ /*sensedir*/SENSE_NULL,/*sarrayp*/false)) != NULL) {
+ distantsplicing = List_push(distantsplicing,(void *) splice);
+ (*ndistantsplicepairs)++;
+ }
q = q->rest;
}
p = p->rest;
@@ -12713,17 +12729,18 @@ find_splicepairs_distant_dna (int *found_score, int *ndistantsplicepairs,
pos,min_endlength_1,querylength-min_endlength_2,
(unsigned long long) Substring_genomicstart(startfrag),
(unsigned long long) Substring_genomicstart(endfrag)));
- distantsplicing = List_push(distantsplicing,
- (void *) Stage3end_new_splice(&(*found_score),nmismatches1,nmismatches2,
- startfrag,endfrag,/*donor_prob*/0.0,/*acceptor_prob*/0.0,distance,
- /*shortdistancep*/false,distantsplicing_penalty,querylength,
- /*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
- /*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
- /*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
- /*amb_probs_donor*/NULL,/*amb_probs_acceptor*/NULL,
- /*copy_donor_p*/true,/*copy_acceptor_p*/true,first_read_p,
- /*sensedir*/SENSE_NULL,/*sarrayp*/false));
- (*ndistantsplicepairs)++;
+ if ((splice = Stage3end_new_splice(&(*found_score),nmismatches1,nmismatches2,
+ startfrag,endfrag,/*donor_prob*/0.0,/*acceptor_prob*/0.0,distance,
+ /*shortdistancep*/false,distantsplicing_penalty,querylength,
+ /*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
+ /*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
+ /*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
+ /*amb_probs_donor*/NULL,/*amb_probs_acceptor*/NULL,
+ /*copy_donor_p*/true,/*copy_acceptor_p*/true,first_read_p,
+ /*sensedir*/SENSE_NULL,/*sarrayp*/false)) != NULL) {
+ distantsplicing = List_push(distantsplicing,(void *) splice);
+ (*ndistantsplicepairs)++;
+ }
q = q->rest;
}
p = p->rest;
@@ -12764,6 +12781,7 @@ find_splicepairs_distant_rna (int *found_score, int *ndistantsplicepairs,
bool shortdistancep;
double nonidentity = 1.0 - min_distantsplicing_identity;
Chrnum_T chrnum;
+ Stage3end_T splice;
debug(printf("Starting find_splicepairs_distant_rna with nonidentity %f\n",nonidentity));
debug4l(printf("Starting find_splicepairs_distant_rna with nonidentity %f\n",nonidentity));
@@ -12857,28 +12875,30 @@ find_splicepairs_distant_rna (int *found_score, int *ndistantsplicepairs,
(unsigned long long) Substring_genomicstart(acceptor),shortdistancep));
if (shortdistancep) {
- *localsplicing = List_push(*localsplicing,
- (void *) Stage3end_new_splice(&(*found_score),nmismatches1,nmismatches2,
- donor,acceptor,Substring_siteD_prob(donor),Substring_siteA_prob(acceptor),distance,
- /*shortdistancep*/true,localsplicing_penalty,querylength,
- /*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
- /*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
- /*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
- /*amb_probs_donor*/NULL,/*amb_probs_acceptor*/NULL,
- /*copy_donor_p*/true,/*copy_acceptor_p*/true,first_read_p,
- /*sensedir*/SENSE_FORWARD,/*sarrayp*/false));
+ if ((splice = Stage3end_new_splice(&(*found_score),nmismatches1,nmismatches2,
+ donor,acceptor,Substring_siteD_prob(donor),Substring_siteA_prob(acceptor),distance,
+ /*shortdistancep*/true,localsplicing_penalty,querylength,
+ /*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
+ /*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
+ /*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
+ /*amb_probs_donor*/NULL,/*amb_probs_acceptor*/NULL,
+ /*copy_donor_p*/true,/*copy_acceptor_p*/true,first_read_p,
+ /*sensedir*/SENSE_FORWARD,/*sarrayp*/false)) != NULL) {
+ *localsplicing = List_push(*localsplicing,(void *) splice);
+ }
} else if (*ndistantsplicepairs <= MAXCHIMERAPATHS) {
- distantsplicing = List_push(distantsplicing,
- (void *) Stage3end_new_splice(&(*found_score),nmismatches1,nmismatches2,
- donor,acceptor,Substring_siteD_prob(donor),Substring_siteA_prob(acceptor),distance,
- /*shortdistancep*/false,distantsplicing_penalty,querylength,
- /*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
- /*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
- /*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
- /*amb_probs_donor*/NULL,/*amb_probs_acceptor*/NULL,
- /*copy_donor_p*/true,/*copy_acceptor_p*/true,first_read_p,
- /*sensedir*/SENSE_FORWARD,/*sarrayp*/false));
- (*ndistantsplicepairs)++;
+ if ((splice = Stage3end_new_splice(&(*found_score),nmismatches1,nmismatches2,
+ donor,acceptor,Substring_siteD_prob(donor),Substring_siteA_prob(acceptor),distance,
+ /*shortdistancep*/false,distantsplicing_penalty,querylength,
+ /*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
+ /*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
+ /*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
+ /*amb_probs_donor*/NULL,/*amb_probs_acceptor*/NULL,
+ /*copy_donor_p*/true,/*copy_acceptor_p*/true,first_read_p,
+ /*sensedir*/SENSE_FORWARD,/*sarrayp*/false)) != NULL) {
+ distantsplicing = List_push(distantsplicing,(void *) splice);
+ (*ndistantsplicepairs)++;
+ }
}
}
@@ -12950,28 +12970,30 @@ find_splicepairs_distant_rna (int *found_score, int *ndistantsplicepairs,
(unsigned long long) Substring_genomicstart(donor),
(unsigned long long) Substring_genomicstart(acceptor),shortdistancep));
if (shortdistancep) {
- *localsplicing = List_push(*localsplicing,
- (void *) Stage3end_new_splice(&(*found_score),nmismatches1,nmismatches2,
- donor,acceptor,Substring_siteD_prob(donor),Substring_siteA_prob(acceptor),distance,
- /*shortdistancep*/true,localsplicing_penalty,querylength,
- /*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
- /*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
- /*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
- /*amb_probs_donor*/NULL,/*amb_probs_acceptor*/NULL,
- /*copy_donor_p*/true,/*copy_acceptor_p*/true,first_read_p,
- /*sensedir*/SENSE_FORWARD,/*sarrayp*/false));
+ if ((splice = Stage3end_new_splice(&(*found_score),nmismatches1,nmismatches2,
+ donor,acceptor,Substring_siteD_prob(donor),Substring_siteA_prob(acceptor),distance,
+ /*shortdistancep*/true,localsplicing_penalty,querylength,
+ /*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
+ /*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
+ /*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
+ /*amb_probs_donor*/NULL,/*amb_probs_acceptor*/NULL,
+ /*copy_donor_p*/true,/*copy_acceptor_p*/true,first_read_p,
+ /*sensedir*/SENSE_FORWARD,/*sarrayp*/false)) != NULL) {
+ *localsplicing = List_push(*localsplicing,(void *) splice);
+ }
} else if (*ndistantsplicepairs <= MAXCHIMERAPATHS) {
- distantsplicing = List_push(distantsplicing,
- (void *) Stage3end_new_splice(&(*found_score),nmismatches1,nmismatches2,
- donor,acceptor,Substring_siteD_prob(donor),Substring_siteA_prob(acceptor),distance,
- /*shortdistancep*/false,distantsplicing_penalty,querylength,
- /*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
- /*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
- /*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
- /*amb_probs_donor*/NULL,/*amb_probs_acceptor*/NULL,
- /*copy_donor_p*/true,/*copy_acceptor_p*/true,first_read_p,
- /*sensedir*/SENSE_FORWARD,/*sarrayp*/false));
- (*ndistantsplicepairs)++;
+ if ((splice = Stage3end_new_splice(&(*found_score),nmismatches1,nmismatches2,
+ donor,acceptor,Substring_siteD_prob(donor),Substring_siteA_prob(acceptor),distance,
+ /*shortdistancep*/false,distantsplicing_penalty,querylength,
+ /*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
+ /*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
+ /*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
+ /*amb_probs_donor*/NULL,/*amb_probs_acceptor*/NULL,
+ /*copy_donor_p*/true,/*copy_acceptor_p*/true,first_read_p,
+ /*sensedir*/SENSE_FORWARD,/*sarrayp*/false)) != NULL) {
+ distantsplicing = List_push(distantsplicing,(void *) splice);
+ (*ndistantsplicepairs)++;
+ }
}
}
q = q->rest;
@@ -13043,28 +13065,30 @@ find_splicepairs_distant_rna (int *found_score, int *ndistantsplicepairs,
(unsigned long long) Substring_genomicstart(donor),
(unsigned long long) Substring_genomicstart(acceptor),shortdistancep));
if (shortdistancep) {
- *localsplicing = List_push(*localsplicing,
- (void *) Stage3end_new_splice(&(*found_score),nmismatches1,nmismatches2,
- donor,acceptor,Substring_siteD_prob(donor),Substring_siteA_prob(acceptor),distance,
- /*shortdistancep*/true,localsplicing_penalty,querylength,
- /*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
- /*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
- /*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
- /*amb_probs_donor*/NULL,/*amb_probs_acceptor*/NULL,
- /*copy_donor_p*/true,/*copy_acceptor_p*/true,first_read_p,
- /*sensedir*/SENSE_ANTI,/*sarrayp*/false));
+ if ((splice = Stage3end_new_splice(&(*found_score),nmismatches1,nmismatches2,
+ donor,acceptor,Substring_siteD_prob(donor),Substring_siteA_prob(acceptor),distance,
+ /*shortdistancep*/true,localsplicing_penalty,querylength,
+ /*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
+ /*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
+ /*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
+ /*amb_probs_donor*/NULL,/*amb_probs_acceptor*/NULL,
+ /*copy_donor_p*/true,/*copy_acceptor_p*/true,first_read_p,
+ /*sensedir*/SENSE_ANTI,/*sarrayp*/false)) != NULL) {
+ *localsplicing = List_push(*localsplicing,(void *) splice);
+ }
} else if (*ndistantsplicepairs <= MAXCHIMERAPATHS) {
- distantsplicing = List_push(distantsplicing,
- (void *) Stage3end_new_splice(&(*found_score),nmismatches1,nmismatches2,
- donor,acceptor,Substring_siteD_prob(donor),Substring_siteA_prob(acceptor),distance,
- /*shortdistancep*/false,distantsplicing_penalty,querylength,
- /*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
- /*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
- /*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
- /*amb_probs_donor*/NULL,/*amb_probs_acceptor*/NULL,
- /*copy_donor_p*/true,/*copy_acceptor_p*/true,first_read_p,
- /*sensedir*/SENSE_ANTI,/*sarrayp*/false));
- (*ndistantsplicepairs)++;
+ if ((splice = Stage3end_new_splice(&(*found_score),nmismatches1,nmismatches2,
+ donor,acceptor,Substring_siteD_prob(donor),Substring_siteA_prob(acceptor),distance,
+ /*shortdistancep*/false,distantsplicing_penalty,querylength,
+ /*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
+ /*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
+ /*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
+ /*amb_probs_donor*/NULL,/*amb_probs_acceptor*/NULL,
+ /*copy_donor_p*/true,/*copy_acceptor_p*/true,first_read_p,
+ /*sensedir*/SENSE_ANTI,/*sarrayp*/false)) != NULL) {
+ distantsplicing = List_push(distantsplicing,(void *) splice);
+ (*ndistantsplicepairs)++;
+ }
}
}
q = q->rest;
@@ -13136,28 +13160,30 @@ find_splicepairs_distant_rna (int *found_score, int *ndistantsplicepairs,
(unsigned long long) Substring_genomicstart(donor),
(unsigned long long) Substring_genomicstart(acceptor),shortdistancep));
if (shortdistancep) {
- *localsplicing = List_push(*localsplicing,
- (void *) Stage3end_new_splice(&(*found_score),nmismatches1,nmismatches2,
- donor,acceptor,Substring_siteD_prob(donor),Substring_siteA_prob(acceptor),distance,
- /*shortdistancep*/true,localsplicing_penalty,querylength,
- /*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
- /*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
- /*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
- /*amb_probs_donor*/NULL,/*amb_probs_acceptor*/NULL,
- /*copy_donor_p*/true,/*copy_acceptor_p*/true,first_read_p,
- /*sensedir*/SENSE_ANTI,/*sarrayp*/false));
+ if ((splice = Stage3end_new_splice(&(*found_score),nmismatches1,nmismatches2,
+ donor,acceptor,Substring_siteD_prob(donor),Substring_siteA_prob(acceptor),distance,
+ /*shortdistancep*/true,localsplicing_penalty,querylength,
+ /*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
+ /*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
+ /*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
+ /*amb_probs_donor*/NULL,/*amb_probs_acceptor*/NULL,
+ /*copy_donor_p*/true,/*copy_acceptor_p*/true,first_read_p,
+ /*sensedir*/SENSE_ANTI,/*sarrayp*/false)) != NULL) {
+ *localsplicing = List_push(*localsplicing,(void *) splice);
+ }
} else if (*ndistantsplicepairs <= MAXCHIMERAPATHS) {
- distantsplicing = List_push(distantsplicing,
- (void *) Stage3end_new_splice(&(*found_score),nmismatches1,nmismatches2,
- donor,acceptor,Substring_siteD_prob(donor),Substring_siteA_prob(acceptor),distance,
- /*shortdistancep*/false,distantsplicing_penalty,querylength,
- /*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
- /*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
- /*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
- /*amb_probs_donor*/NULL,/*amb_probs_acceptor*/NULL,
- /*copy_donor_p*/true,/*copy_acceptor_p*/true,first_read_p,
- /*sensedir*/SENSE_ANTI,/*sarrayp*/false));
- (*ndistantsplicepairs)++;
+ if ((splice = Stage3end_new_splice(&(*found_score),nmismatches1,nmismatches2,
+ donor,acceptor,Substring_siteD_prob(donor),Substring_siteA_prob(acceptor),distance,
+ /*shortdistancep*/false,distantsplicing_penalty,querylength,
+ /*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
+ /*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
+ /*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
+ /*amb_probs_donor*/NULL,/*amb_probs_acceptor*/NULL,
+ /*copy_donor_p*/true,/*copy_acceptor_p*/true,first_read_p,
+ /*sensedir*/SENSE_ANTI,/*sarrayp*/false)) != NULL) {
+ distantsplicing = List_push(distantsplicing,(void *) splice);
+ (*ndistantsplicepairs)++;
+ }
}
}
q = q->rest;
@@ -13219,17 +13245,18 @@ find_splicepairs_distant_rna (int *found_score, int *ndistantsplicepairs,
pos,min_endlength_1,querylength-min_endlength_2,
(unsigned long long) Substring_genomicstart(donor),
(unsigned long long) Substring_genomicstart(acceptor)));
- distantsplicing = List_push(distantsplicing,
- (void *) Stage3end_new_splice(&(*found_score),nmismatches1,nmismatches2,
- donor,acceptor,Substring_siteD_prob(donor),Substring_siteA_prob(acceptor),distance,
- /*shortdistancep*/false,distantsplicing_penalty,querylength,
- /*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
- /*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
- /*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
- /*amb_probs_donor*/NULL,/*amb_probs_acceptor*/NULL,
- /*copy_donor_p*/true,/*copy_acceptor_p*/true,first_read_p,
- /*sensedir*/SENSE_FORWARD,/*sarrayp*/false));
- (*ndistantsplicepairs)++;
+ if ((splice = Stage3end_new_splice(&(*found_score),nmismatches1,nmismatches2,
+ donor,acceptor,Substring_siteD_prob(donor),Substring_siteA_prob(acceptor),distance,
+ /*shortdistancep*/false,distantsplicing_penalty,querylength,
+ /*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
+ /*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
+ /*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
+ /*amb_probs_donor*/NULL,/*amb_probs_acceptor*/NULL,
+ /*copy_donor_p*/true,/*copy_acceptor_p*/true,first_read_p,
+ /*sensedir*/SENSE_FORWARD,/*sarrayp*/false)) != NULL) {
+ distantsplicing = List_push(distantsplicing,(void *) splice);
+ (*ndistantsplicepairs)++;
+ }
q = q->rest;
}
p = p->rest;
@@ -13283,17 +13310,18 @@ find_splicepairs_distant_rna (int *found_score, int *ndistantsplicepairs,
pos,min_endlength_1,querylength-min_endlength_2,
(unsigned long long) Substring_genomicstart(donor),
(unsigned long long) Substring_genomicstart(acceptor)));
- distantsplicing = List_push(distantsplicing,
- (void *) Stage3end_new_splice(&(*found_score),nmismatches1,nmismatches2,
- donor,acceptor,Substring_siteD_prob(donor),Substring_siteA_prob(acceptor),distance,
- /*shortdistancep*/false,distantsplicing_penalty,querylength,
- /*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
- /*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
- /*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
- /*amb_probs_donor*/NULL,/*amb_probs_acceptor*/NULL,
- /*copy_donor_p*/true,/*copy_acceptor_p*/true,first_read_p,
- /*sensedir*/SENSE_FORWARD,/*sarrayp*/false));
- (*ndistantsplicepairs)++;
+ if ((splice = Stage3end_new_splice(&(*found_score),nmismatches1,nmismatches2,
+ donor,acceptor,Substring_siteD_prob(donor),Substring_siteA_prob(acceptor),distance,
+ /*shortdistancep*/false,distantsplicing_penalty,querylength,
+ /*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
+ /*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
+ /*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
+ /*amb_probs_donor*/NULL,/*amb_probs_acceptor*/NULL,
+ /*copy_donor_p*/true,/*copy_acceptor_p*/true,first_read_p,
+ /*sensedir*/SENSE_FORWARD,/*sarrayp*/false)) != NULL) {
+ distantsplicing = List_push(distantsplicing,(void *) splice);
+ (*ndistantsplicepairs)++;
+ }
q = q->rest;
}
p = p->rest;
@@ -13348,17 +13376,18 @@ find_splicepairs_distant_rna (int *found_score, int *ndistantsplicepairs,
pos,min_endlength_2,querylength-min_endlength_1,
(unsigned long long) Substring_genomicstart(donor),
(unsigned long long) Substring_genomicstart(acceptor)));
- distantsplicing = List_push(distantsplicing,
- (void *) Stage3end_new_splice(&(*found_score),nmismatches1,nmismatches2,
- donor,acceptor,Substring_siteD_prob(donor),Substring_siteA_prob(acceptor),distance,
- /*shortdistancep*/false,distantsplicing_penalty,querylength,
- /*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
- /*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
- /*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
- /*amb_probs_donor*/NULL,/*amb_probs_acceptor*/NULL,
- /*copy_donor_p*/true,/*copy_acceptor_p*/true,first_read_p,
- /*sensedir*/SENSE_ANTI,/*sarrayp*/false));
- (*ndistantsplicepairs)++;
+ if ((splice = Stage3end_new_splice(&(*found_score),nmismatches1,nmismatches2,
+ donor,acceptor,Substring_siteD_prob(donor),Substring_siteA_prob(acceptor),distance,
+ /*shortdistancep*/false,distantsplicing_penalty,querylength,
+ /*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
+ /*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
+ /*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
+ /*amb_probs_donor*/NULL,/*amb_probs_acceptor*/NULL,
+ /*copy_donor_p*/true,/*copy_acceptor_p*/true,first_read_p,
+ /*sensedir*/SENSE_ANTI,/*sarrayp*/false)) != NULL) {
+ distantsplicing = List_push(distantsplicing,(void *) splice);
+ (*ndistantsplicepairs)++;
+ }
q = q->rest;
}
p = p->rest;
@@ -13412,17 +13441,18 @@ find_splicepairs_distant_rna (int *found_score, int *ndistantsplicepairs,
pos,min_endlength_2,querylength-min_endlength_1,
(unsigned long long) Substring_genomicstart(donor),
(unsigned long long) Substring_genomicstart(acceptor)));
- distantsplicing = List_push(distantsplicing,
- (void *) Stage3end_new_splice(&(*found_score),nmismatches1,nmismatches2,
- donor,acceptor,Substring_siteD_prob(donor),Substring_siteA_prob(acceptor),distance,
- /*shortdistancep*/false,distantsplicing_penalty,querylength,
- /*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
- /*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
- /*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
- /*amb_probs_donor*/NULL,/*amb_probs_acceptor*/NULL,
- /*copy_donor_p*/true,/*copy_acceptor_p*/true,first_read_p,
- /*sensedir*/SENSE_ANTI,/*sarrayp*/false));
- (*ndistantsplicepairs)++;
+ if ((splice = Stage3end_new_splice(&(*found_score),nmismatches1,nmismatches2,
+ donor,acceptor,Substring_siteD_prob(donor),Substring_siteA_prob(acceptor),distance,
+ /*shortdistancep*/false,distantsplicing_penalty,querylength,
+ /*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
+ /*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
+ /*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
+ /*amb_probs_donor*/NULL,/*amb_probs_acceptor*/NULL,
+ /*copy_donor_p*/true,/*copy_acceptor_p*/true,first_read_p,
+ /*sensedir*/SENSE_ANTI,/*sarrayp*/false)) != NULL) {
+ distantsplicing = List_push(distantsplicing,(void *) splice);
+ (*ndistantsplicepairs)++;
+ }
q = q->rest;
}
p = p->rest;
@@ -13470,6 +13500,7 @@ find_splicepairs_shortend (int *found_score, List_T hits,
Univcoord_T bestleft, origleft, chroffset, chrhigh;
int i;
int bestj = 0;
+ Stage3end_T splice;
debug(printf("Starting find_splicepairs_shortend\n"));
@@ -13532,15 +13563,17 @@ find_splicepairs_shortend (int *found_score, List_T hits,
debug4h(amb_length = endlength /*- nmismatches_shortend*/);
debug4h(printf("End 1: short-overlap donor_plus: Successful ambiguous from donor #%d with amb_length %d\n",
Substring_splicesitesD_knowni(donor),amb_length));
- hits = List_push(hits,(void *) Stage3end_new_splice(&(*found_score),nmismatches,nmismatches_shortend,
- donor,/*acceptor*/NULL,Substring_siteD_prob(donor),Doublelist_max(probs_list),/*distance*/0U,
- /*shortdistancep*/false,/*penalty*/0,querylength,
- /*ambcoords_donor*/NULL,ambcoords,
- /*ambi_donor*/NULL,/*ambi_acceptor*/splicesites_i,
- /*amb_nmismatches_donor*/NULL,/*nmismatches_acceptor*/nmismatches_list,
- /*amb_probs_donor*/NULL,/*amb_probs_acceptor*/probs_list,
- /*copy_donor_p*/true,/*copy_acceptor_p*/false,first_read_p,
- /*sensedir*/SENSE_FORWARD,/*sarrayp*/false));
+ if ((splice = Stage3end_new_splice(&(*found_score),nmismatches,nmismatches_shortend,
+ donor,/*acceptor*/NULL,Substring_siteD_prob(donor),Doublelist_max(probs_list),/*distance*/0U,
+ /*shortdistancep*/false,/*penalty*/0,querylength,
+ /*ambcoords_donor*/NULL,ambcoords,
+ /*ambi_donor*/NULL,/*ambi_acceptor*/splicesites_i,
+ /*amb_nmismatches_donor*/NULL,/*nmismatches_acceptor*/nmismatches_list,
+ /*amb_probs_donor*/NULL,/*amb_probs_acceptor*/probs_list,
+ /*copy_donor_p*/true,/*copy_acceptor_p*/false,first_read_p,
+ /*sensedir*/SENSE_FORWARD,/*sarrayp*/false)) != NULL) {
+ hits = List_push(hits,(void *) splice);
+ }
Doublelist_free(&probs_list);
#ifdef LARGE_GENOMES
Uint8list_free(&ambcoords);
@@ -13558,15 +13591,17 @@ find_splicepairs_shortend (int *found_score, List_T hits,
Substring_chrhigh(donor),Substring_chrlength(donor))) != NULL) {
debug4h(printf("End 1: short-overlap donor_plus: Successful splice from donor #%d to acceptor #%d\n",
Substring_splicesitesD_knowni(donor),Substring_splicesitesA_knowni(acceptor)));
- hits = List_push(hits,(void *) Stage3end_new_splice(&(*found_score),nmismatches,nmismatches_shortend,
- donor,acceptor,Substring_siteD_prob(donor),/*acceptor_prob*/2.0,/*distance*/bestleft-origleft,
- /*shortdistancep*/true,localsplicing_penalty,querylength,
- /*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
- /*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
- /*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
- /*amb_probs_donor*/NULL,/*amb_probs_acceptor*/NULL,
- /*copy_donor_p*/true,/*copy_acceptor_p*/false,first_read_p,
- /*sensedir*/SENSE_FORWARD,/*sarrayp*/false));
+ if ((splice = Stage3end_new_splice(&(*found_score),nmismatches,nmismatches_shortend,
+ donor,acceptor,Substring_siteD_prob(donor),/*acceptor_prob*/2.0,/*distance*/bestleft-origleft,
+ /*shortdistancep*/true,localsplicing_penalty,querylength,
+ /*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
+ /*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
+ /*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
+ /*amb_probs_donor*/NULL,/*amb_probs_acceptor*/NULL,
+ /*copy_donor_p*/true,/*copy_acceptor_p*/false,first_read_p,
+ /*sensedir*/SENSE_FORWARD,/*sarrayp*/false)) != NULL) {
+ hits = List_push(hits,(void *) splice);
+ }
}
}
Intlist_free(&nmismatches_list);
@@ -13608,15 +13643,17 @@ find_splicepairs_shortend (int *found_score, List_T hits,
debug4h(amb_length = endlength /*- nmismatches_shortend*/);
debug4h(printf("End 2: short-overlap acceptor_plus: Successful ambiguous from acceptor #%d with amb_length %d\n",
Substring_splicesitesA_knowni(acceptor),amb_length));
- hits = List_push(hits,(void *) Stage3end_new_splice(&(*found_score),nmismatches_shortend,nmismatches,
- /*donor*/NULL,acceptor,Doublelist_max(probs_list),Substring_siteA_prob(acceptor),/*distance*/0U,
- /*shortdistancep*/false,/*penalty*/0,querylength,
- ambcoords,/*ambcoords_acceptor*/NULL,
- /*amb_knowni_donor*/splicesites_i,/*amb_knowni_acceptor*/NULL,
- /*amb_nmismatches_donor*/nmismatches_list,/*amb_nmismatches_acceptor*/NULL,
- /*amb_probs_donor*/probs_list,/*amb_probs_acceptor*/NULL,
- /*copy_donor_p*/false,/*copy_acceptor_p*/true,first_read_p,
- /*sensedir*/SENSE_FORWARD,/*sarrayp*/false));
+ if ((splice = Stage3end_new_splice(&(*found_score),nmismatches_shortend,nmismatches,
+ /*donor*/NULL,acceptor,Doublelist_max(probs_list),Substring_siteA_prob(acceptor),/*distance*/0U,
+ /*shortdistancep*/false,/*penalty*/0,querylength,
+ ambcoords,/*ambcoords_acceptor*/NULL,
+ /*amb_knowni_donor*/splicesites_i,/*amb_knowni_acceptor*/NULL,
+ /*amb_nmismatches_donor*/nmismatches_list,/*amb_nmismatches_acceptor*/NULL,
+ /*amb_probs_donor*/probs_list,/*amb_probs_acceptor*/NULL,
+ /*copy_donor_p*/false,/*copy_acceptor_p*/true,first_read_p,
+ /*sensedir*/SENSE_FORWARD,/*sarrayp*/false)) != NULL) {
+ hits = List_push(hits,(void *) splice);
+ }
Doublelist_free(&probs_list);
#ifdef LARGE_GENOMES
Uint8list_free(&ambcoords);
@@ -13634,15 +13671,17 @@ find_splicepairs_shortend (int *found_score, List_T hits,
Substring_chrhigh(acceptor),Substring_chrlength(acceptor))) != NULL) {
debug4h(printf("End 2: short-overlap acceptor_plus: Successful splice from acceptor #%d to donor #%d\n",
Substring_splicesitesA_knowni(acceptor),Substring_splicesitesD_knowni(donor)));
- hits = List_push(hits,(void *) Stage3end_new_splice(&(*found_score),nmismatches_shortend,nmismatches,
- donor,acceptor,/*donor_prob*/2.0,Substring_siteA_prob(acceptor),/*distance*/origleft-bestleft,
- /*shortdistancep*/true,localsplicing_penalty,querylength,
- /*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
- /*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
- /*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
- /*amb_probs_donor*/NULL,/*amb_probs_acceptor*/NULL,
- /*copy_donor_p*/false,/*copy_acceptor_p*/true,first_read_p,
- /*sensedir*/SENSE_FORWARD,/*sarrayp*/false));
+ if ((splice = Stage3end_new_splice(&(*found_score),nmismatches_shortend,nmismatches,
+ donor,acceptor,/*donor_prob*/2.0,Substring_siteA_prob(acceptor),/*distance*/origleft-bestleft,
+ /*shortdistancep*/true,localsplicing_penalty,querylength,
+ /*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
+ /*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
+ /*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
+ /*amb_probs_donor*/NULL,/*amb_probs_acceptor*/NULL,
+ /*copy_donor_p*/false,/*copy_acceptor_p*/true,first_read_p,
+ /*sensedir*/SENSE_FORWARD,/*sarrayp*/false)) != NULL) {
+ hits = List_push(hits,(void *) splice);
+ }
}
}
Intlist_free(&nmismatches_list);
@@ -13684,15 +13723,17 @@ find_splicepairs_shortend (int *found_score, List_T hits,
debug4h(amb_length = endlength /*- nmismatches_shortend*/);
debug4h(printf("End 3: short-overlap donor_minus: Successful ambiguous from donor #%d with amb_length %d\n",
Substring_splicesitesD_knowni(donor),amb_length));
- hits = List_push(hits,(void *) Stage3end_new_splice(&(*found_score),nmismatches,nmismatches_shortend,
- donor,/*acceptor*/NULL,Substring_siteD_prob(donor),Doublelist_max(probs_list),/*distance*/0U,
- /*shortdistancep*/false,/*penalty*/0,querylength,
- /*ambcoords_donor*/NULL,ambcoords,
- /*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/splicesites_i,
- /*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/nmismatches_list,
- /*amb_probs_donor*/NULL,/*amb_probs_acceptor*/probs_list,
- /*copy_donor_p*/true,/*copy_acceptor_p*/false,first_read_p,
- /*sensedir*/SENSE_FORWARD,/*sarrayp*/false));
+ if ((splice = Stage3end_new_splice(&(*found_score),nmismatches,nmismatches_shortend,
+ donor,/*acceptor*/NULL,Substring_siteD_prob(donor),Doublelist_max(probs_list),/*distance*/0U,
+ /*shortdistancep*/false,/*penalty*/0,querylength,
+ /*ambcoords_donor*/NULL,ambcoords,
+ /*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/splicesites_i,
+ /*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/nmismatches_list,
+ /*amb_probs_donor*/NULL,/*amb_probs_acceptor*/probs_list,
+ /*copy_donor_p*/true,/*copy_acceptor_p*/false,first_read_p,
+ /*sensedir*/SENSE_FORWARD,/*sarrayp*/false)) != NULL) {
+ hits = List_push(hits,(void *) splice);
+ }
Doublelist_free(&probs_list);
#ifdef LARGE_GENOMES
Uint8list_free(&ambcoords);
@@ -13711,15 +13752,17 @@ find_splicepairs_shortend (int *found_score, List_T hits,
Substring_chrhigh(donor),Substring_chrlength(donor))) != NULL) {
debug4h(printf("End 3: short-overlap donor_minus: Successful splice from donor #%d to acceptor #%d\n",
Substring_splicesitesD_knowni(donor),Substring_splicesitesA_knowni(acceptor)));
- hits = List_push(hits,(void *) Stage3end_new_splice(&(*found_score),nmismatches,nmismatches_shortend,
- donor,acceptor,Substring_siteD_prob(donor),/*acceptor_prob*/2.0,/*distance*/origleft-bestleft,
- /*shortdistancep*/true,localsplicing_penalty,querylength,
- /*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
- /*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
- /*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
- /*amb_probs_donor*/NULL,/*amb_probs_acceptor*/NULL,
- /*copy_donor_p*/true,/*copy_acceptor_p*/false,first_read_p,
- /*sensedir*/SENSE_FORWARD,/*sarrayp*/false));
+ if ((splice = Stage3end_new_splice(&(*found_score),nmismatches,nmismatches_shortend,
+ donor,acceptor,Substring_siteD_prob(donor),/*acceptor_prob*/2.0,/*distance*/origleft-bestleft,
+ /*shortdistancep*/true,localsplicing_penalty,querylength,
+ /*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
+ /*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
+ /*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
+ /*amb_probs_donor*/NULL,/*amb_probs_acceptor*/NULL,
+ /*copy_donor_p*/true,/*copy_acceptor_p*/false,first_read_p,
+ /*sensedir*/SENSE_FORWARD,/*sarrayp*/false)) != NULL) {
+ hits = List_push(hits,(void *) splice);
+ }
}
}
Intlist_free(&nmismatches_list);
@@ -13762,15 +13805,17 @@ find_splicepairs_shortend (int *found_score, List_T hits,
debug4h(amb_length = endlength /*- nmismatches_shortend*/);
debug4h(printf("End 4: short-overlap acceptor_minus: Successful ambiguous from acceptor #%d with amb_length %d\n",
Substring_splicesitesA_knowni(acceptor),amb_length));
- hits = List_push(hits,(void *) Stage3end_new_splice(&(*found_score),nmismatches_shortend,nmismatches,
- /*donor*/NULL,acceptor,Doublelist_max(probs_list),Substring_siteA_prob(acceptor),/*distance*/0U,
- /*shortdistancep*/false,/*penalty*/0,querylength,
- ambcoords,/*ambcoords_acceptor*/NULL,
- /*amb_knowni_donor*/splicesites_i,/*amb_knowni_acceptor*/NULL,
- /*amb_nmismatches_donor*/nmismatches_list,/*amb_nmismatches_acceptor*/NULL,
- /*amb_probs_donor*/probs_list,/*amb_probs_acceptor*/NULL,
- /*copy_donor_p*/false,/*copy_acceptor_p*/true,first_read_p,
- /*sensedir*/SENSE_FORWARD,/*sarrayp*/false));
+ if ((splice = Stage3end_new_splice(&(*found_score),nmismatches_shortend,nmismatches,
+ /*donor*/NULL,acceptor,Doublelist_max(probs_list),Substring_siteA_prob(acceptor),/*distance*/0U,
+ /*shortdistancep*/false,/*penalty*/0,querylength,
+ ambcoords,/*ambcoords_acceptor*/NULL,
+ /*amb_knowni_donor*/splicesites_i,/*amb_knowni_acceptor*/NULL,
+ /*amb_nmismatches_donor*/nmismatches_list,/*amb_nmismatches_acceptor*/NULL,
+ /*amb_probs_donor*/probs_list,/*amb_probs_acceptor*/NULL,
+ /*copy_donor_p*/false,/*copy_acceptor_p*/true,first_read_p,
+ /*sensedir*/SENSE_FORWARD,/*sarrayp*/false)) != NULL) {
+ hits = List_push(hits,(void *) splice);
+ }
Doublelist_free(&probs_list);
#ifdef LARGE_GENOMES
Uint8list_free(&ambcoords);
@@ -13789,15 +13834,17 @@ find_splicepairs_shortend (int *found_score, List_T hits,
Substring_chrhigh(acceptor),Substring_chrlength(acceptor))) != NULL) {
debug4h(printf("End 4: short-overlap acceptor_minus: Successful splice from acceptor #%d to #%d\n",
Substring_splicesitesA_knowni(acceptor),Substring_splicesitesD_knowni(donor)));
- hits = List_push(hits,(void *) Stage3end_new_splice(&(*found_score),nmismatches_shortend,nmismatches,
- donor,acceptor,/*donor_prob*/2.0,Substring_siteA_prob(acceptor),/*distance*/bestleft-origleft,
- /*shortdistancep*/true,localsplicing_penalty,querylength,
- /*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
- /*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
- /*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
- /*amb_probs_donor*/NULL,/*amb_probs_acceptor*/NULL,
- /*copy_donor_p*/false,/*copy_acceptor_p*/true,first_read_p,
- /*sensedir*/SENSE_FORWARD,/*sarrayp*/false));
+ if ((splice = Stage3end_new_splice(&(*found_score),nmismatches_shortend,nmismatches,
+ donor,acceptor,/*donor_prob*/2.0,Substring_siteA_prob(acceptor),/*distance*/bestleft-origleft,
+ /*shortdistancep*/true,localsplicing_penalty,querylength,
+ /*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
+ /*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
+ /*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
+ /*amb_probs_donor*/NULL,/*amb_probs_acceptor*/NULL,
+ /*copy_donor_p*/false,/*copy_acceptor_p*/true,first_read_p,
+ /*sensedir*/SENSE_FORWARD,/*sarrayp*/false)) != NULL) {
+ hits = List_push(hits,(void *) splice);
+ }
}
}
Intlist_free(&nmismatches_list);
@@ -13839,15 +13886,17 @@ find_splicepairs_shortend (int *found_score, List_T hits,
debug4h(amb_length = endlength /*- nmismatches_shortend*/);
debug4h(printf("End 5: short-overlap antidonor_plus: Successful ambiguous from antidonor #%d with amb_length %d\n",
Substring_splicesitesD_knowni(donor),amb_length));
- hits = List_push(hits,(void *) Stage3end_new_splice(&(*found_score),nmismatches,nmismatches_shortend,
- donor,/*acceptor*/NULL,Substring_siteD_prob(donor),Doublelist_max(probs_list),/*distance*/0U,
- /*shortdistancep*/false,/*penalty*/0,querylength,
- /*ambcoords_donor*/NULL,ambcoords,
- /*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/splicesites_i,
- /*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/nmismatches_list,
- /*amb_probs_donor*/NULL,/*amb_probs_acceptor*/probs_list,
- /*copy_donor_p*/true,/*copy_acceptor_p*/false,first_read_p,
- /*sensedir*/SENSE_ANTI,/*sarrayp*/false));
+ if ((splice = Stage3end_new_splice(&(*found_score),nmismatches,nmismatches_shortend,
+ donor,/*acceptor*/NULL,Substring_siteD_prob(donor),Doublelist_max(probs_list),/*distance*/0U,
+ /*shortdistancep*/false,/*penalty*/0,querylength,
+ /*ambcoords_donor*/NULL,ambcoords,
+ /*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/splicesites_i,
+ /*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/nmismatches_list,
+ /*amb_probs_donor*/NULL,/*amb_probs_acceptor*/probs_list,
+ /*copy_donor_p*/true,/*copy_acceptor_p*/false,first_read_p,
+ /*sensedir*/SENSE_ANTI,/*sarrayp*/false)) != NULL) {
+ hits = List_push(hits,(void *) splice);
+ }
Doublelist_free(&probs_list);
#ifdef LARGE_GENOMES
Uint8list_free(&ambcoords);
@@ -13866,15 +13915,17 @@ find_splicepairs_shortend (int *found_score, List_T hits,
Substring_chrhigh(donor),Substring_chrlength(donor))) != NULL) {
debug4h(printf("End 5: short-overlap antidonor_plus: Successful splice from antidonor #%d to antiacceptor #%d\n",
Substring_splicesitesD_knowni(donor),Substring_splicesitesA_knowni(acceptor)));
- hits = List_push(hits,(void *) Stage3end_new_splice(&(*found_score),nmismatches,nmismatches_shortend,
- donor,acceptor,Substring_siteD_prob(donor),/*acceptor_prob*/2.0,/*distance*/origleft-bestleft,
- /*shortdistancep*/true,localsplicing_penalty,querylength,
- /*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
- /*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
- /*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
- /*amb_probs_donor*/NULL,/*amb_probs_acceptor*/NULL,
- /*copy_donor_p*/true,/*copy_acceptor_p*/false,first_read_p,
- /*sensedir*/SENSE_ANTI,/*sarrayp*/false));
+ if ((splice = Stage3end_new_splice(&(*found_score),nmismatches,nmismatches_shortend,
+ donor,acceptor,Substring_siteD_prob(donor),/*acceptor_prob*/2.0,/*distance*/origleft-bestleft,
+ /*shortdistancep*/true,localsplicing_penalty,querylength,
+ /*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
+ /*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
+ /*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
+ /*amb_probs_donor*/NULL,/*amb_probs_acceptor*/NULL,
+ /*copy_donor_p*/true,/*copy_acceptor_p*/false,first_read_p,
+ /*sensedir*/SENSE_ANTI,/*sarrayp*/false)) != NULL) {
+ hits = List_push(hits,(void *) splice);
+ }
}
}
Intlist_free(&nmismatches_list);
@@ -13917,15 +13968,17 @@ find_splicepairs_shortend (int *found_score, List_T hits,
debug4h(amb_length = endlength /*- nmismatches_shortend*/);
debug4h(printf("End 6: short-overlap antiacceptor_plus: Successful ambiguous from antiacceptor #%d with amb_length %d\n",
Substring_splicesitesA_knowni(acceptor),amb_length));
- hits = List_push(hits,(void *) Stage3end_new_splice(&(*found_score),nmismatches_shortend,nmismatches,
- /*donor*/NULL,acceptor,Doublelist_max(probs_list),Substring_siteA_prob(acceptor),/*distance*/0U,
- /*shortdistancep*/false,/*penalty*/0,querylength,
- ambcoords,/*ambcoords_acceptor*/NULL,
- /*amb_knowni_donor*/splicesites_i,/*amb_knowni_acceptor*/NULL,
- /*amb_nmismatches_donor*/nmismatches_list,/*amb_nmismatches_acceptor*/NULL,
- /*amb_probs_donor*/probs_list,/*amb_probs_acceptor*/NULL,
- /*copy_donor_p*/false,/*copy_acceptor_p*/true,first_read_p,
- /*sensedir*/SENSE_ANTI,/*sarrayp*/false));
+ if ((splice = Stage3end_new_splice(&(*found_score),nmismatches_shortend,nmismatches,
+ /*donor*/NULL,acceptor,Doublelist_max(probs_list),Substring_siteA_prob(acceptor),/*distance*/0U,
+ /*shortdistancep*/false,/*penalty*/0,querylength,
+ ambcoords,/*ambcoords_acceptor*/NULL,
+ /*amb_knowni_donor*/splicesites_i,/*amb_knowni_acceptor*/NULL,
+ /*amb_nmismatches_donor*/nmismatches_list,/*amb_nmismatches_acceptor*/NULL,
+ /*amb_probs_donor*/probs_list,/*amb_probs_acceptor*/NULL,
+ /*copy_donor_p*/false,/*copy_acceptor_p*/true,first_read_p,
+ /*sensedir*/SENSE_ANTI,/*sarrayp*/false)) != NULL) {
+ hits = List_push(hits,(void *) splice);
+ }
Doublelist_free(&probs_list);
#ifdef LARGE_GENOMES
Uint8list_free(&ambcoords);
@@ -13944,15 +13997,17 @@ find_splicepairs_shortend (int *found_score, List_T hits,
Substring_chrhigh(acceptor),Substring_chrlength(acceptor))) != NULL) {
debug4h(printf("End 6: short-overlap antiacceptor_plus: Successful splice from antiacceptor #%d to antidonor #%d\n",
Substring_splicesitesA_knowni(acceptor),Substring_splicesitesD_knowni(donor)));
- hits = List_push(hits,(void *) Stage3end_new_splice(&(*found_score),nmismatches_shortend,nmismatches,
- donor,acceptor,/*donor_prob*/2.0,Substring_siteA_prob(acceptor),/*distance*/bestleft-origleft,
- /*shortdistancep*/true,localsplicing_penalty,querylength,
- /*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
- /*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
- /*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
- /*amb_probs_donor*/NULL,/*amb_probs_acceptor*/NULL,
- /*copy_donor_p*/false,/*copy_acceptor_p*/true,first_read_p,
- /*sensedir*/SENSE_ANTI,/*sarrayp*/false));
+ if ((splice = Stage3end_new_splice(&(*found_score),nmismatches_shortend,nmismatches,
+ donor,acceptor,/*donor_prob*/2.0,Substring_siteA_prob(acceptor),/*distance*/bestleft-origleft,
+ /*shortdistancep*/true,localsplicing_penalty,querylength,
+ /*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
+ /*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
+ /*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
+ /*amb_probs_donor*/NULL,/*amb_probs_acceptor*/NULL,
+ /*copy_donor_p*/false,/*copy_acceptor_p*/true,first_read_p,
+ /*sensedir*/SENSE_ANTI,/*sarrayp*/false)) != NULL) {
+ hits = List_push(hits,(void *) splice);
+ }
}
}
Intlist_free(&nmismatches_list);
@@ -13995,15 +14050,17 @@ find_splicepairs_shortend (int *found_score, List_T hits,
debug4h(amb_length = endlength /*- nmismatches_shortend*/);
debug4h(printf("End 7: short-overlap antidonor_minus: Successful ambiguous from antidonor #%d with amb_length %d\n",
Substring_splicesitesD_knowni(donor),amb_length));
- hits = List_push(hits,(void *) Stage3end_new_splice(&(*found_score),nmismatches,nmismatches_shortend,
- donor,/*acceptor*/NULL,Substring_siteD_prob(donor),Doublelist_max(probs_list),/*distance*/0U,
- /*shortdistancep*/false,/*penalty*/0,querylength,
- /*ambcoords_donor*/NULL,ambcoords,
- /*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/splicesites_i,
- /*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/nmismatches_list,
- /*amb_probs_donor*/NULL,/*amb_probs_acceptor*/probs_list,
- /*copy_donor_p*/true,/*copy_acceptor_p*/false,first_read_p,
- /*sensedir*/SENSE_ANTI,/*sarrayp*/false));
+ if ((splice = Stage3end_new_splice(&(*found_score),nmismatches,nmismatches_shortend,
+ donor,/*acceptor*/NULL,Substring_siteD_prob(donor),Doublelist_max(probs_list),/*distance*/0U,
+ /*shortdistancep*/false,/*penalty*/0,querylength,
+ /*ambcoords_donor*/NULL,ambcoords,
+ /*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/splicesites_i,
+ /*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/nmismatches_list,
+ /*amb_probs_donor*/NULL,/*amb_probs_acceptor*/probs_list,
+ /*copy_donor_p*/true,/*copy_acceptor_p*/false,first_read_p,
+ /*sensedir*/SENSE_ANTI,/*sarrayp*/false)) != NULL) {
+ hits = List_push(hits,(void *) splice);
+ }
Doublelist_free(&probs_list);
#ifdef LARGE_GENOMES
Uint8list_free(&ambcoords);
@@ -14022,15 +14079,17 @@ find_splicepairs_shortend (int *found_score, List_T hits,
Substring_chrhigh(donor),Substring_chrlength(donor))) != NULL) {
debug4h(printf("End 7: short-overlap antidonor_minus: Successful splice from antidonor #%d to antiacceptor #%d\n",
Substring_splicesitesD_knowni(donor),Substring_splicesitesA_knowni(acceptor)));
- hits = List_push(hits,(void *) Stage3end_new_splice(&(*found_score),nmismatches,nmismatches_shortend,
- donor,acceptor,Substring_siteD_prob(donor),/*acceptor_prob*/2.0,/*distance*/bestleft-origleft,
- /*shortdistancep*/true,localsplicing_penalty,querylength,
- /*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
- /*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
- /*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
- /*amb_probs_donor*/NULL,/*amb_probs_acceptor*/NULL,
- /*copy_donor_p*/true,/*copy_acceptor_p*/false,first_read_p,
- /*sensedir*/SENSE_ANTI,/*sarrayp*/false));
+ if ((splice = Stage3end_new_splice(&(*found_score),nmismatches,nmismatches_shortend,
+ donor,acceptor,Substring_siteD_prob(donor),/*acceptor_prob*/2.0,/*distance*/bestleft-origleft,
+ /*shortdistancep*/true,localsplicing_penalty,querylength,
+ /*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
+ /*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
+ /*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
+ /*amb_probs_donor*/NULL,/*amb_probs_acceptor*/NULL,
+ /*copy_donor_p*/true,/*copy_acceptor_p*/false,first_read_p,
+ /*sensedir*/SENSE_ANTI,/*sarrayp*/false)) != NULL) {
+ hits = List_push(hits,(void *) splice);
+ }
}
}
Intlist_free(&nmismatches_list);
@@ -14072,15 +14131,17 @@ find_splicepairs_shortend (int *found_score, List_T hits,
debug4h(amb_length = endlength /*- nmismatches_shortend*/);
debug4h(printf("End 8: short-overlap antiacceptor_minus: Successful ambiguous from antiacceptor #%d with amb_length %d\n",
Substring_splicesitesA_knowni(acceptor),amb_length));
- hits = List_push(hits,(void *) Stage3end_new_splice(&(*found_score),nmismatches_shortend,nmismatches,
- /*donor*/NULL,acceptor,Doublelist_max(probs_list),Substring_siteA_prob(acceptor),/*distance*/0U,
- /*shortdistancep*/false,/*penalty*/0,querylength,
- ambcoords,/*ambcoords_acceptor*/NULL,
- /*amb_knowni_donor*/splicesites_i,/*amb_knowni_acceptor*/NULL,
- /*amb_nmismatches_donor*/nmismatches_list,/*amb_nmismatches_acceptor*/NULL,
- /*amb_probs_donor*/probs_list,/*amb_probs_acceptor*/NULL,
- /*copy_donor_p*/false,/*copy_acceptor_p*/true,first_read_p,
- /*sensedir*/SENSE_ANTI,/*sarrayp*/false));
+ if ((splice = Stage3end_new_splice(&(*found_score),nmismatches_shortend,nmismatches,
+ /*donor*/NULL,acceptor,Doublelist_max(probs_list),Substring_siteA_prob(acceptor),/*distance*/0U,
+ /*shortdistancep*/false,/*penalty*/0,querylength,
+ ambcoords,/*ambcoords_acceptor*/NULL,
+ /*amb_knowni_donor*/splicesites_i,/*amb_knowni_acceptor*/NULL,
+ /*amb_nmismatches_donor*/nmismatches_list,/*amb_nmismatches_acceptor*/NULL,
+ /*amb_probs_donor*/probs_list,/*amb_probs_acceptor*/NULL,
+ /*copy_donor_p*/false,/*copy_acceptor_p*/true,first_read_p,
+ /*sensedir*/SENSE_ANTI,/*sarrayp*/false)) != NULL) {
+ hits = List_push(hits,(void *) splice);
+ }
Doublelist_free(&probs_list);
#ifdef LARGE_GENOMES
Uint8list_free(&ambcoords);
@@ -14099,15 +14160,17 @@ find_splicepairs_shortend (int *found_score, List_T hits,
Substring_chrhigh(acceptor),Substring_chrlength(acceptor))) != NULL) {
debug4h(printf("End 8: short-overlap antiacceptor_minus: Successful splice from antiacceptor #%d to antidonor #%d\n",
Substring_splicesitesA_knowni(acceptor),Substring_splicesitesD_knowni(donor)));
- hits = List_push(hits,(void *) Stage3end_new_splice(&(*found_score),nmismatches_shortend,nmismatches,
- donor,acceptor,/*donor_prob*/2.0,Substring_siteA_prob(acceptor),/*distance*/origleft-bestleft,
- /*shortdistancep*/true,localsplicing_penalty,querylength,
- /*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
- /*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
- /*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
- /*amb_probs_donor*/NULL,/*amb_probs_acceptor*/NULL,
- /*copy_donor_p*/false,/*copy_acceptor_p*/true,first_read_p,
- /*sensedir*/SENSE_ANTI,/*sarrayp*/false));
+ if ((splice = Stage3end_new_splice(&(*found_score),nmismatches_shortend,nmismatches,
+ donor,acceptor,/*donor_prob*/2.0,Substring_siteA_prob(acceptor),/*distance*/origleft-bestleft,
+ /*shortdistancep*/true,localsplicing_penalty,querylength,
+ /*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
+ /*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
+ /*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
+ /*amb_probs_donor*/NULL,/*amb_probs_acceptor*/NULL,
+ /*copy_donor_p*/false,/*copy_acceptor_p*/true,first_read_p,
+ /*sensedir*/SENSE_ANTI,/*sarrayp*/false)) != NULL) {
+ hits = List_push(hits,(void *) splice);
+ }
}
}
Intlist_free(&nmismatches_list);
@@ -14894,9 +14957,8 @@ run_gmap_for_region (bool *good_start_p, bool *good_end_p, History_T gmap_histor
if (Stage3end_trim_left(hit) < GOOD_GMAP_END) {
*good_end_p = true;
}
- debug13(printf("Trim at start: %d, trim at end: %d (raw %d and %d)\n",
- Stage3end_trim_right(hit),Stage3end_trim_left(hit),
- Stage3end_trim_right_raw(hit),Stage3end_trim_left_raw(hit)));
+ debug13(printf("Trim at start: %d, trim at end: %d\n",
+ Stage3end_trim_right(hit),Stage3end_trim_left(hit)));
/* Don't throw away GMAP hits */
stored_hits = List_push(stored_hits,(void *) Stage3end_copy(hit));
hits = List_push(hits,(void *) hit);
@@ -14966,9 +15028,8 @@ run_gmap_for_region (bool *good_start_p, bool *good_end_p, History_T gmap_histor
if (Stage3end_trim_left(hit) < GOOD_GMAP_END) {
*good_end_p = true;
}
- debug13(printf("Trim at start: %d, trim at end: %d (raw %d and %d)\n",
- Stage3end_trim_right(hit),Stage3end_trim_left(hit),
- Stage3end_trim_right_raw(hit),Stage3end_trim_left_raw(hit)));
+ debug13(printf("Trim at start: %d, trim at end: %d\n",
+ Stage3end_trim_right(hit),Stage3end_trim_left(hit)));
/* Don't throw away GMAP hits */
stored_hits = List_push(stored_hits,(void *) Stage3end_copy(hit));
hits = List_push(hits,(void *) hit);
@@ -15038,9 +15099,8 @@ run_gmap_for_region (bool *good_start_p, bool *good_end_p, History_T gmap_histor
if (Stage3end_trim_left(hit) < GOOD_GMAP_END) {
*good_end_p = true;
}
- debug13(printf("Trim at start: %d, trim at end: %d (raw %d and %d)\n",
- Stage3end_trim_right(hit),Stage3end_trim_left(hit),
- Stage3end_trim_right_raw(hit),Stage3end_trim_left_raw(hit)));
+ debug13(printf("Trim at start: %d, trim at end: %d\n",
+ Stage3end_trim_right(hit),Stage3end_trim_left(hit)));
/* Don't throw away GMAP hits */
stored_hits = List_push(stored_hits,(void *) Stage3end_copy(hit));
hits = List_push(hits,(void *) hit);
@@ -19375,7 +19435,7 @@ align_pair_with_gmap (Pairtype_T *final_pairtype, List_T result,
/* Skip */
debug13(printf("Skipping hit5 already of type GMAP\n"));
- } else if (querylength5 - Stage3end_nmatches_posttrim(hit5) <= cutoff_level_5) {
+ } else if (querylength5 - Stage3end_nmatches(hit5) <= cutoff_level_5) {
/* Skip, because already good enough */
debug13(printf("Skipping hit5 with nmismatches %d - %d <= cutoff_level %d\n",
querylength5,Stage3end_nmatches_posttrim(hit5),cutoff_level_5));
@@ -19409,7 +19469,7 @@ align_pair_with_gmap (Pairtype_T *final_pairtype, List_T result,
/* Skip */
debug13(printf("Skipping hit3 already of type GMAP\n"));
- } else if (querylength3 - Stage3end_nmatches_posttrim(hit3) <= cutoff_level_3) {
+ } else if (querylength3 - Stage3end_nmatches(hit3) <= cutoff_level_3) {
/* Skip, because already good enough */
debug13(printf("Skipping hit3 with nmismatches %d - %d <= cutoff_level %d\n",
querylength3,Stage3end_nmatches_posttrim(hit3),cutoff_level_3));
diff --git a/src/stage3.c b/src/stage3.c
index 1edc528..1745fec 100644
--- a/src/stage3.c
+++ b/src/stage3.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: stage3.c 196409 2016-08-16 15:42:27Z twu $";
+static char rcsid[] = "$Id: stage3.c 197772 2016-09-14 00:38:08Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -36,6 +36,7 @@ static char rcsid[] = "$Id: stage3.c 196409 2016-08-16 15:42:27Z twu $";
#include "dynprog_genome.h"
#include "dynprog_cdna.h"
#include "dynprog_end.h"
+#include "splice.h"
#include "maxent.h"
#include "maxent_hr.h"
#include "fastlog.h"
@@ -8851,6 +8852,8 @@ build_path_end3 (bool *knownsplicep, int *ambig_end_length_3, Splicetype_T *ambi
debug(Pair_dump_list(path,true));
debug(printf("\n"));
+ debug(printf("ambig_end_length_3 is %d\n",*ambig_end_length_3));
+
return path;
}
@@ -8956,6 +8959,8 @@ build_pairs_end5 (bool *knownsplicep, int *ambig_end_length_5, Splicetype_T *amb
debug(Pair_dump_list(pairs,true));
debug(printf("\n"));
+ debug(printf("ambig_end_length_5 is %d\n",*ambig_end_length_5));
+
return pairs;
}
@@ -11355,7 +11360,7 @@ trim_novel_spliceends (List_T pairs,
int *ambig_end_length_5, int *ambig_end_length_3,
Splicetype_T *ambig_splicetype_5, Splicetype_T *ambig_splicetype_3,
double *ambig_prob_5, double *ambig_prob_3,
- int *cdna_direction, int *sensedir, bool watsonp, int querylength,
+ int *sensedir, bool watsonp, int querylength,
Univcoord_T chroffset, Univcoord_T chrhigh,
bool knownsplice5p, bool knownsplice3p) {
List_T path, p;
@@ -11379,8 +11384,8 @@ trim_novel_spliceends (List_T pairs,
bool mismatchp;
- debug13(printf("\nEntered gmap_trim_novel_spliceends with cdna_direction %d and sensedir %d, ambig_end_lengths %d and %d\n",
- *cdna_direction,*sensedir,*ambig_end_length_5,*ambig_end_length_3));
+ debug13(printf("\nEntered gmap_trim_novel_spliceends with sensedir %d, ambig_end_lengths %d and %d\n",
+ *sensedir,*ambig_end_length_5,*ambig_end_length_3));
Pair_trim_distances(&trim5,&trim3,pairs);
debug13(printf("Trim distances are %d and %d\n",trim5,trim3));
@@ -11389,14 +11394,19 @@ trim_novel_spliceends (List_T pairs,
if (path != NULL && knownsplice3p == false && *ambig_end_length_3 == 0
/* && exon_length_3(path) >= END_SPLICESITE_EXON_LENGTH */) {
/* See if there is a good splice site at the 3' end */
- debug13(Pair_dump_list(path,true));
+ /* debug13(Pair_dump_list(path,true)); */
pair = (Pair_T) List_head(p = path);
start = middle = end = pair->genomepos;
debug13(printf("Initializing start and end to be %u\n",start));
+ if (pair->querypos != querylength - 1) {
+ mismatchp = true;
+ } else {
+ mismatchp = false;
+ }
+
i = 0;
- mismatchp = false;
while (i < trim3) {
if ((p = List_next(p)) == NULL) {
break;
@@ -11438,6 +11448,7 @@ trim_novel_spliceends (List_T pairs,
if (mismatchp == false) {
/* Allow perfect overhangs into intron */
+ /* Note: pairs may not extend all the way to the end, which is why we look at end pair to initialize mismatchp */
debug13(printf("Allowing perfect overhang into potential intron\n"));
} else if (*sensedir == SENSE_FORWARD) {
@@ -11733,14 +11744,19 @@ trim_novel_spliceends (List_T pairs,
if (pairs != NULL && knownsplice5p == false && *ambig_end_length_5 == 0
/* && exon_length_5(pairs) >= END_SPLICESITE_EXON_LENGTH */) {
/* See if there is a good splice site at the 5' end */
- debug13(Pair_dump_list(pairs,true));
+ /* debug13(Pair_dump_list(pairs,true)); */
pair = (Pair_T) List_head(p = pairs);
start = middle = end = pair->genomepos;
debug13(printf("Initializing start and end to be %u\n",start));
+ if (pair->querypos != 0) {
+ mismatchp = true;
+ } else {
+ mismatchp = false;
+ }
+
i = 0;
- mismatchp = false;
while (i < trim5) {
if ((p = List_next(p)) == NULL) {
break;
@@ -11782,6 +11798,7 @@ trim_novel_spliceends (List_T pairs,
if (mismatchp == false) {
/* Allow perfect overhangs into intron */
+ /* Note: pairs may not extend all the way to the end, which is why we look at end pair to initialize mismatchp */
debug13(printf("Allowing perfect overhang into potential intron\n"));
} else if (*sensedir == SENSE_FORWARD) {
@@ -12089,7 +12106,7 @@ trim_novel_spliceends (List_T pairs,
*ambig_end_length_5 = ((Pair_T) pairs->first)->querypos;
*ambig_splicetype_5 = splicetype5;
*ambig_prob_5 = max_prob_5;
- *cdna_direction = splice_cdna_direction_5;
+ /* *cdna_direction = splice_cdna_direction_5; */
debug13(printf("Set ambig_end_length_5 to be %d\n",*ambig_end_length_5));
}
@@ -12104,7 +12121,7 @@ trim_novel_spliceends (List_T pairs,
*ambig_end_length_3 = (querylength - 1) - ((Pair_T) path->first)->querypos;
*ambig_splicetype_3 = splicetype3;
*ambig_prob_3 = max_prob_3;
- *cdna_direction = splice_cdna_direction_3;
+ /* *cdna_direction = splice_cdna_direction_3; */
debug13(printf("Set ambig_end_length_3 to be %d\n",*ambig_end_length_3));
}
*sensedir = SENSE_FORWARD;
@@ -12125,7 +12142,7 @@ trim_novel_spliceends (List_T pairs,
*ambig_end_length_5 = ((Pair_T) pairs->first)->querypos;
*ambig_splicetype_5 = splicetype5;
*ambig_prob_5 = max_prob_5;
- *cdna_direction = splice_cdna_direction_5;
+ /* *cdna_direction = splice_cdna_direction_5; */
debug13(printf("Set ambig_end_length_5 to be %d\n",*ambig_end_length_5));
}
@@ -12140,7 +12157,7 @@ trim_novel_spliceends (List_T pairs,
*ambig_end_length_3 = (querylength - 1) - ((Pair_T) path->first)->querypos;
*ambig_splicetype_3 = splicetype3;
*ambig_prob_3 = max_prob_3;
- *cdna_direction = splice_cdna_direction_3;
+ /* *cdna_direction = splice_cdna_direction_3; */
debug13(printf("Set ambig_end_length_3 to be %d\n",*ambig_end_length_3));
}
*sensedir = SENSE_ANTI;
@@ -12159,7 +12176,7 @@ trim_novel_spliceends (List_T pairs,
*ambig_end_length_3 = (querylength - 1) - ((Pair_T) path->first)->querypos;
*ambig_splicetype_3 = splicetype3;
*ambig_prob_3 = max_prob_3;
- *cdna_direction = splice_cdna_direction_3;
+ /* *cdna_direction = splice_cdna_direction_3; */
debug13(printf("Set ambig_end_length_3 to be %d\n",*ambig_end_length_3));
if (max_prob_sense_forward_3 >= END_SPLICESITE_PROB_MATCH && max_prob_sense_anti_3 < END_SPLICESITE_PROB_MATCH
&& max_prob_sense_anti_5 < END_SPLICESITE_PROB_MATCH) {
@@ -12185,7 +12202,7 @@ trim_novel_spliceends (List_T pairs,
*ambig_end_length_5 = ((Pair_T) pairs->first)->querypos;
*ambig_splicetype_5 = splicetype5;
*ambig_prob_5 = max_prob_5;
- *cdna_direction = splice_cdna_direction_5;
+ /* *cdna_direction = splice_cdna_direction_5; */
debug13(printf("Set ambig_end_length_5 to be %d\n",*ambig_end_length_5));
if (max_prob_sense_forward_5 >= END_SPLICESITE_PROB_MATCH && max_prob_sense_anti_5 < END_SPLICESITE_PROB_MATCH
&& max_prob_sense_anti_3 < END_SPLICESITE_PROB_MATCH) {
@@ -12212,7 +12229,7 @@ trim_novel_spliceends (List_T pairs,
*ambig_end_length_3 = (querylength - 1) - ((Pair_T) path->first)->querypos;
*ambig_splicetype_3 = splicetype3_mm;
*ambig_prob_3 = max_prob_3_mm;
- *cdna_direction = splice_cdna_direction_3_mm;
+ /* *cdna_direction = splice_cdna_direction_3_mm; */
debug13(printf("Set ambig_end_length_3 to be %d\n",*ambig_end_length_3));
if (max_prob_sense_forward_3_mm >= END_SPLICESITE_PROB_MISMATCH && max_prob_sense_anti_3_mm < END_SPLICESITE_PROB_MISMATCH
&& max_prob_sense_anti_5_mm < END_SPLICESITE_PROB_MISMATCH) {
@@ -12236,7 +12253,7 @@ trim_novel_spliceends (List_T pairs,
*ambig_end_length_5 = ((Pair_T) pairs->first)->querypos;
*ambig_splicetype_5 = splicetype5_mm;
*ambig_prob_5 = max_prob_5_mm;
- *cdna_direction = splice_cdna_direction_5_mm;
+ /* *cdna_direction = splice_cdna_direction_5_mm; */
debug13(printf("Set ambig_end_length_5 to be %d\n",*ambig_end_length_5));
if (max_prob_sense_forward_5_mm >= END_SPLICESITE_PROB_MISMATCH && max_prob_sense_anti_5_mm < END_SPLICESITE_PROB_MISMATCH
&& max_prob_sense_anti_3_mm < END_SPLICESITE_PROB_MISMATCH) {
@@ -12258,6 +12275,258 @@ trim_novel_spliceends (List_T pairs,
+#if 0
+/* Still somewhat buggy */
+static List_T
+trim_novel_spliceends_new (List_T pairs,
+ int *ambig_end_length_5, int *ambig_end_length_3,
+ Splicetype_T *ambig_splicetype_5, Splicetype_T *ambig_splicetype_3,
+ double *ambig_prob_5, double *ambig_prob_3,
+ int *sensedir, bool watsonp, int querylength,
+ Univcoord_T chroffset, Univcoord_T chrhigh,
+ bool knownsplice5p, bool knownsplice3p) {
+ List_T path, p;
+ int i;
+
+ Pair_T pair, prev;
+ int trim5, trim3, exondist5, exondist3;
+
+ /* start to middle has mismatches, while middle to end has none */
+ Univcoord_T start5, middle5, end5, start3, middle3, end3;
+ Univcoord_T genomicstart5, genomicend3;
+ bool solve5p, solve3p, mismatchp;
+
+
+ debug13(printf("\nEntered gmap_trim_novel_spliceends with sensedir %d, ambig_end_lengths %d and %d\n",
+ *sensedir,*ambig_end_length_5,*ambig_end_length_3));
+
+ debug13(Pair_dump_list(pairs,true));
+
+ Pair_trim_distances(&trim5,&trim3,pairs);
+ debug13(printf("Trim distances (where we would trim theoretically) are %d and %d\n",trim5,trim3));
+
+ if (pairs != NULL && knownsplice5p == false && *ambig_end_length_5 == 0
+ /* && exon_length_5(pairs) >= END_SPLICESITE_EXON_LENGTH */) {
+ /* See if there is a good splice site at the 5' end */
+
+ pair = (Pair_T) List_head(p = pairs);
+ middle5 = (Univcoord_T) pair->genomepos - 1;
+
+ if (pair->querypos != 0) {
+ mismatchp = true;
+ } else {
+ mismatchp = false;
+ }
+
+ i = 0;
+ while (i < trim5) {
+ if ((p = List_next(p)) == NULL) {
+ break;
+ } else if (pair->gapp == true) {
+ break;
+ } else if (pair->comp == MATCH_COMP || pair->comp == DYNPROG_MATCH_COMP || pair->comp == AMBIGUOUS_COMP) {
+ middle5 = pair->genomepos - 1;
+ debug13(printf("Resetting middle to be %u\n",middle5));
+ } else {
+ middle5 = pair->genomepos - 1;
+ mismatchp = true;
+ debug13(printf("Resetting middle to be %u\n",middle5));
+ }
+ prev = pair;
+ pair = (Pair_T) List_head(p);
+ i++;
+ }
+
+ end5 = middle5;
+ while (i < trim5 + END_SPLICESITE_SEARCH) {
+ if ((p = List_next(p)) == NULL) {
+ break;
+ } else if (pair->gapp == true) {
+ break;
+ } else {
+ end5 = pair->genomepos - 1;
+ debug13(printf("Resetting end to be %u\n",end5));
+ }
+ prev = pair;
+ pair = (Pair_T) List_head(p);
+ i++;
+ }
+
+ /* Determine genomicstart5 after indels hve been skipped */
+ if (pair->gapp == true) {
+ pair = prev;
+ }
+ if (watsonp) {
+ debug13(printf("Plus: Setting genomicstart5 to be genomepos %u - querypos %d\n",
+ pair->genomepos,pair->querypos));
+ genomicstart5 = pair->genomepos - pair->querypos;
+ } else {
+ debug13(printf("Minus: Setting genomicstart5 to be genomepos %u - querypos %d - 1\n",
+ pair->genomepos,pair->querypos));
+ genomicstart5 = pair->genomepos - pair->querypos - 1;
+ }
+ if ((start5 = middle5 - END_SPLICESITE_SEARCH) < genomicstart5) {
+ start5 = genomicstart5;
+ }
+
+
+ /* Find distance from end to intron, if any */
+ exondist5 = 0;
+ while (p != NULL && ((Pair_T) List_head(p))->gapp == false &&
+ exondist5 < END_MIN_EXONLENGTH) {
+ p = List_next(p);
+ exondist5++;
+ }
+ debug13(printf("exondist5 is %d\n",exondist5));
+ }
+
+ if (mismatchp == false) {
+ solve5p = false;
+ genomicstart5 = 0;
+
+ } else if (watsonp) {
+ solve5p = true;
+ genomicstart5 = chroffset + genomicstart5;
+
+ start5 = chroffset + start5;
+ middle5 = chroffset + middle5;
+ end5 = chroffset + end5;
+ debug13(printf("\n2 Set end points for 5' trim to be %u..%u..%u, plusp %d\n",
+ start5 - chroffset,middle5 - chroffset,end5 - chroffset,watsonp));
+
+ } else {
+ solve5p = true;
+ genomicstart5 = chrhigh - genomicstart5;
+
+ start5 = chrhigh - start5;
+ middle5 = chrhigh - middle5;
+ end5 = chrhigh - end5;
+ debug13(printf("\n2 Set end points for 5' trim to be %u..%u..%u, plusp %d\n",
+ start5 - chroffset,middle5 - chroffset,end5 - chroffset,watsonp));
+ }
+
+
+ path = List_reverse(pairs);
+ if (path != NULL && knownsplice3p == false && *ambig_end_length_3 == 0
+ /* && exon_length_3(path) >= END_SPLICESITE_EXON_LENGTH */) {
+ /* See if there is a good splice site at the 3' end */
+ /* debug13(Pair_dump_list(path,true)); */
+
+ pair = (Pair_T) List_head(p = path);
+ middle3 = (Univcoord_T) pair->genomepos + 1;
+
+ if (pair->querypos != querylength - 1) {
+ mismatchp = true;
+ } else {
+ mismatchp = false;
+ }
+
+ i = 0;
+ while (i < trim3) {
+ if ((p = List_next(p)) == NULL) {
+ break;
+ } else if (pair->gapp == true) {
+ break;
+ } else if (pair->comp == MATCH_COMP || pair->comp == DYNPROG_MATCH_COMP || pair->comp == AMBIGUOUS_COMP) {
+ middle3 = pair->genomepos + 1;
+ debug13(printf("Resetting middle to be %u\n",middle3));
+ } else {
+ middle3 = pair->genomepos + 1;
+ mismatchp = true;
+ debug13(printf("Resetting middle to be %u\n",middle3));
+ }
+ prev = pair;
+ pair = (Pair_T) List_head(p);
+ i++;
+ }
+
+ end3 = middle3;
+ while (i < trim3 + END_SPLICESITE_SEARCH) {
+ if ((p = List_next(p)) == NULL) {
+ break;
+ } else if (pair->gapp == true) {
+ break;
+ } else {
+ end3 = pair->genomepos + 1;
+ debug13(printf("Resetting end to be %u\n",end3));
+ }
+ prev = pair;
+ pair = (Pair_T) List_head(p);
+ i++;
+ }
+
+ /* Determine genomicend3 after indels hve been skipped */
+ if (pair->gapp == true) {
+ pair = prev;
+ }
+ if (watsonp) {
+ debug13(printf("Plus: Setting genomicend3 to be genomepos %u + (querylength %d - querypos %d)\n",
+ pair->genomepos,querylength,pair->querypos));
+ genomicend3 = pair->genomepos + (querylength - pair->querypos);
+ } else {
+ debug13(printf("Minus: Setting genomicend3 to be genomepos %u + (querylength %d - 1 - querypos %d)\n",
+ pair->genomepos,querylength,pair->querypos));
+ genomicend3 = pair->genomepos + (querylength - 1 - pair->querypos);
+ }
+ if ((start3 = middle3 + END_SPLICESITE_SEARCH) > genomicend3) {
+ start3 = genomicend3;
+ }
+
+
+ /* Find distance from end to intron, if any */
+ exondist3 = 0;
+ while (p != NULL && ((Pair_T) List_head(p))->gapp == false &&
+ exondist3 < END_MIN_EXONLENGTH) {
+ p = List_next(p);
+ exondist3++;
+ }
+ debug13(printf("exondist3 is %d\n",exondist3));
+ }
+
+ if (mismatchp == false) {
+ solve3p = false;
+ genomicend3 = 0;
+
+ } else if (watsonp) {
+ solve3p = true;
+ genomicend3 = chroffset + genomicend3;
+
+ start3 = chroffset + start3;
+ middle3 = chroffset + middle3;
+ end3 = chroffset + end3;
+
+ } else {
+ solve3p = true;
+ genomicend3 = chrhigh - genomicend3;
+ start3 = chrhigh - start3;
+ middle3 = chrhigh - middle3;
+ end3 = chrhigh - end3;
+ }
+ debug13(printf("\n2 Set end points for 3' trim to be %u..%u..%u, plusp %d\n",
+ start3 - chroffset,middle3 - chroffset,end3 - chroffset,watsonp));
+
+ Splice_trim_novel_spliceends(&(*ambig_end_length_5),&(*ambig_end_length_3),
+ &(*ambig_splicetype_5),&(*ambig_splicetype_3),
+ &(*ambig_prob_5),&(*ambig_prob_3),&(*sensedir),
+ start5,middle5,end5,solve5p,start3,middle3,end3,solve3p,
+ genomicstart5,genomicend3,chroffset,/*plusp*/watsonp);
+
+ while (path != NULL && ((Pair_T) path->first)->querypos > (querylength - 1) - *ambig_end_length_3) {
+ path = Pairpool_pop(path,&pair);
+ }
+
+ pairs = List_reverse(path);
+ while (pairs != NULL && ((Pair_T) pairs->first)->querypos < *ambig_end_length_5) {
+ pairs = Pairpool_pop(pairs,&pair);
+ }
+
+ debug13(printf("Returning ambig_end_length_5 %d and ambig_end_length_3 %d, probs %f and %f\n",
+ *ambig_end_length_5,*ambig_end_length_3,*ambig_prob_5,*ambig_prob_3));
+ debug13(Pair_dump_list(pairs,true));
+
+ return pairs;
+}
+#endif
@@ -12294,7 +12563,7 @@ path_trim (double defect_rate, int *ambig_end_length_5, int *ambig_end_length_3,
pairs = trim_novel_spliceends(pairs,&(*ambig_end_length_5),&(*ambig_end_length_3),
&(*ambig_splicetype_5),&(*ambig_splicetype_3),
&(*ambig_prob_5),&(*ambig_prob_3),
- &(*cdna_direction),&(*sensedir),watsonp,querylength,
+ &(*sensedir),watsonp,querylength,
chroffset,chrhigh,knownsplice5p,knownsplice3p);
}
#endif
@@ -12960,6 +13229,7 @@ Stage3_compute (int *cdna_direction, int *sensedir, List_T *finalpairs1, int *np
*cdna_direction = -1;
*sensedir = SENSE_ANTI;
+
pairs_rev_copy = Pairpool_copy(pairs_rev,pairpool);
*finalpairs2 = path_trim(defect_rate_rev,&(*ambig_end_length_5_2),&(*ambig_end_length_3_2),
&(*ambig_splicetype_5_2),&(*ambig_splicetype_3_2),
diff --git a/src/stage3hr.c b/src/stage3hr.c
index 52b02cc..2184b66 100644
--- a/src/stage3hr.c
+++ b/src/stage3hr.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: stage3hr.c 196429 2016-08-16 20:09:56Z twu $";
+static char rcsid[] = "$Id: stage3hr.c 197777 2016-09-14 00:43:45Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -444,10 +444,13 @@ struct T {
int ntscore; /* Includes penalties */
int nmatches;
int nmatches_posttrim;
+
int gmap_max_match_length; /* Used only by GMAP */
double gmap_min_splice_prob; /* Used only by GMAP */
/* trim_left and trim_right should really be named trim_start and trim_end */
+ /* if trim_left_splicep or trim_right_splicep is true, then trim is of type "unknown amb" */
+ /* if trim_left_splicep or trim_right_splicep is false, then trim is of type "unknown" */
int trim_left; /* Used by Stage3end_optimal_score for comparing terminals and non-terminals */
int trim_right;
bool trim_left_splicep;
@@ -461,7 +464,7 @@ struct T {
Overlap_T gene_overlap;
long int tally;
- int nmismatches_whole;
+ int nmismatches_whole; /* Should probably not use anymore */
int nmismatches_bothdiff;
int nmismatches_refdiff; /* Set only for display */
@@ -487,8 +490,6 @@ struct T {
double amb_prob_acceptor; /* For shortexon */
#endif
- int gmap_start_amb_length; /* Needed because GMAP doesn't have substrings */
- int gmap_end_amb_length; /* Needed because GMAP doesn't have substrings */
Endtype_T gmap_start_endtype; /* For GMAP, which has no substrings */
Endtype_T gmap_end_endtype; /* For GMAP, which has no substrings */
@@ -851,6 +852,11 @@ Stage3end_nmatches_posttrim (T this) {
}
int
+Stage3end_nmatches (T this) {
+ return this->nmatches;
+}
+
+int
Stage3end_nmismatches_whole (T this) {
return this->nmismatches_whole;
}
@@ -928,34 +934,69 @@ Stage3end_trim_right (T this) {
static int
start_amb_length (T this) {
if (this->hittype == GMAP) {
- return this->gmap_start_amb_length;
+ if (this->trim_left_splicep == true) {
+ return this->trim_left;
+ } else {
+ return 0;
+ }
} else {
- return Substring_match_length_amb((Substring_T) List_head(this->substrings_1toN));
+ return Substring_start_amb_length((Substring_T) List_head(this->substrings_1toN));
}
}
static int
end_amb_length (T this) {
if (this->hittype == GMAP) {
- return this->gmap_end_amb_length;
+ if (this->trim_right_splicep == true) {
+ return this->trim_right;
+ } else {
+ return 0;
+ }
} else {
- return Substring_match_length_amb((Substring_T) List_head(this->substrings_Nto1));
+ return Substring_end_amb_length((Substring_T) List_head(this->substrings_Nto1));
}
}
-int
-Stage3end_trim_left_raw (T this) {
- return this->trim_left + start_amb_length(this);
+static int
+amb_length (T this) {
+ int amb_length;
+
+ if (this->hittype == GMAP) {
+ amb_length = 0;
+ if (this->trim_left_splicep == true) {
+ amb_length += this->trim_left;
+ }
+ if (this->trim_right_splicep == true) {
+ amb_length += this->trim_right;
+ }
+ return amb_length;
+
+ } else {
+ return Substring_start_amb_length((Substring_T) List_head(this->substrings_1toN)) +
+ Substring_end_amb_length((Substring_T) List_head(this->substrings_Nto1));
+ }
}
-int
-Stage3end_trim_right_raw (T this) {
- return this->trim_right + end_amb_length(this);
+
+/* Two types of ambiguity: known amb (mapped to >1 genomic place) and unknown amb (splice site seen) */
+static bool
+known_ambiguous_p (T this) {
+ if (this->hittype == GMAP) {
+ return false;
+ } else if (Substring_ambiguous_p((Substring_T) List_head(this->substrings_1toN))) {
+ return true;
+ } else if (Substring_ambiguous_p((Substring_T) List_head(this->substrings_Nto1))) {
+ return true;
+ } else {
+ return false;
+ }
}
+
+/* Includes amb and non-amb */
int
Stage3end_total_trim (T this) {
- return this->trim_left + start_amb_length(this) + this->trim_right + end_amb_length(this);
+ return this->trim_left + this->trim_right;
}
int
@@ -2692,10 +2733,10 @@ pair_common_genomicpos (Stage3end_T hit5, Stage3end_T hit3) {
/* minus/minus */
debug15(printf("Computing overlap using substrings minus/minus\n"));
- start5 = (hit5->genomicstart - 1) - hit5->trim_left - start_amb_length(hit5);
- end5 = hit5->genomicend + hit5->trim_right + end_amb_length(hit5);
- start3 = (hit3->genomicstart - 1) - hit3->trim_left - start_amb_length(hit3);
- end3 = hit3->genomicend + hit3->trim_right + end_amb_length(hit3);
+ start5 = (hit5->genomicstart - 1) - hit5->trim_left /*- start_amb_length(hit5)*/;
+ end5 = hit5->genomicend + hit5->trim_right /*+ end_amb_length(hit5)*/;
+ start3 = (hit3->genomicstart - 1) - hit3->trim_left /*- start_amb_length(hit3)*/;
+ end3 = hit3->genomicend + hit3->trim_right /*+ end_amb_length(hit3)*/;
debug15(printf("hit5 endpoints are %u..%u. hit3 endpoints are %u..%u\n",
start5-hit5->chroffset,end5-hit5->chroffset,start3-hit3->chroffset,end3-hit3->chroffset));
@@ -4073,8 +4114,8 @@ Stage3pair_overlap (int *hardclip5_low, int *hardclip5_high, int *hardclip3_low,
*hardclip3_low = ilength3_low + common_shift;
debug15(printf("Overlap clip for ilength53 plus is hardclip5 %d..%d and hardclip3 %d..%d\n",
*hardclip5_low,*hardclip5_high,*hardclip3_low,*hardclip3_high));
- *hardclip5_high += hit5->trim_right + end_amb_length(hit5);
- *hardclip3_low += hit3->trim_left + start_amb_length(hit3);
+ *hardclip5_high += hit5->trim_right /*+ end_amb_length(hit5)*/;
+ *hardclip3_low += hit3->trim_left /*+ start_amb_length(hit3)*/;
debug15(printf("Ambig clip for ilength53 plus is hardclip5 %d..%d and hardclip3 %d..%d\n",
*hardclip5_low,*hardclip5_high,*hardclip3_low,*hardclip3_high));
@@ -4089,8 +4130,8 @@ Stage3pair_overlap (int *hardclip5_low, int *hardclip5_high, int *hardclip3_low,
/* Try original position without a shift */
*hardclip5_high = ilength5_high /*- common_shift*/;
*hardclip3_low = ilength3_low /*+ common_shift*/;
- *hardclip5_high += hit5->trim_right + end_amb_length(hit5);
- *hardclip3_low += hit3->trim_left + start_amb_length(hit3);
+ *hardclip5_high += hit5->trim_right /*+ end_amb_length(hit5)*/;
+ *hardclip3_low += hit3->trim_left /*+ start_amb_length(hit3)*/;
if (test_hardclips(&common_genomicpos,*hardclip3_low,hit3,*hardclip5_high,hit5,hit3->chroffset) == false) {
*hardclip5_low = *hardclip5_high = *hardclip3_low = *hardclip3_high = 0;
return 0;
@@ -4123,9 +4164,11 @@ Stage3pair_overlap (int *hardclip5_low, int *hardclip5_high, int *hardclip3_low,
*hardclip5_high = ilength5_high /*- common_shift*/;
*hardclip3_low = ilength3_low /*+ common_shift*/;
- *hardclip5_high += hit5->trim_right + end_amb_length(hit5);
- *hardclip3_low += hit3->trim_left + start_amb_length(hit3);
+ debug15(printf("Initial computation of clip for ilength53 plus is hardclip5 %d..%d and hardclip3 %d..%d\n",
+ *hardclip5_low,*hardclip5_high,*hardclip3_low,*hardclip3_high));
+ *hardclip5_high += hit5->trim_right /*+ end_amb_length(hit5)*/;
+ *hardclip3_low += hit3->trim_left /*+ start_amb_length(hit3)*/;
debug15(printf("Recomputed clip for ilength53 plus is hardclip5 %d..%d and hardclip3 %d..%d\n",
*hardclip5_low,*hardclip5_high,*hardclip3_low,*hardclip3_high));
}
@@ -4157,8 +4200,8 @@ Stage3pair_overlap (int *hardclip5_low, int *hardclip5_high, int *hardclip3_low,
*hardclip3_high = ilength3_high - common_shift;
debug15(printf("Overlap clip for ilength35 plus is hardclip5 %d..%d and hardclip3 %d..%d\n",
*hardclip5_low,*hardclip5_high,*hardclip3_low,*hardclip3_high));
- *hardclip5_low += hit5->trim_left + start_amb_length(hit5);
- *hardclip3_high += hit3->trim_right + end_amb_length(hit3);
+ *hardclip5_low += hit5->trim_left /*+ start_amb_length(hit5)*/;
+ *hardclip3_high += hit3->trim_right /*+ end_amb_length(hit3)*/;
debug15(printf("Ambig clip for ilength35 plus is hardclip5 %d..%d and hardclip3 %d..%d\n",
*hardclip5_low,*hardclip5_high,*hardclip3_low,*hardclip3_high));
@@ -4173,8 +4216,8 @@ Stage3pair_overlap (int *hardclip5_low, int *hardclip5_high, int *hardclip3_low,
/* Try original position without a shift */
*hardclip5_low = ilength5_low /*+ common_shift*/;
*hardclip3_high = ilength3_high /*- common_shift*/;
- *hardclip5_low += hit5->trim_left + start_amb_length(hit5);
- *hardclip3_high += hit3->trim_right + end_amb_length(hit3);
+ *hardclip5_low += hit5->trim_left /*+ start_amb_length(hit5)*/;
+ *hardclip3_high += hit3->trim_right /*+ end_amb_length(hit3)*/;
if (test_hardclips(&common_genomicpos,*hardclip3_low,hit3,*hardclip5_high,hit5,hit3->chroffset) == false) {
*hardclip5_low = *hardclip5_high = *hardclip3_low = *hardclip3_high = 0;
return 0;
@@ -4207,8 +4250,11 @@ Stage3pair_overlap (int *hardclip5_low, int *hardclip5_high, int *hardclip3_low,
*hardclip5_low = ilength5_low /*+ common_shift*/;
*hardclip3_high = ilength3_high /*- common_shift*/;
- *hardclip5_low += hit5->trim_left + start_amb_length(hit5);
- *hardclip3_high += hit3->trim_right + end_amb_length(hit3);
+ debug15(printf("Initial computation of clip for ilength35 plus is hardclip5 %d..%d and hardclip3 %d..%d\n",
+ *hardclip5_low,*hardclip5_high,*hardclip3_low,*hardclip3_high));
+
+ *hardclip5_low += hit5->trim_left /*+ start_amb_length(hit5)*/;
+ *hardclip3_high += hit3->trim_right /*+ end_amb_length(hit3)*/;
debug15(printf("Recomputed clip for ilength35 plus is hardclip5 %d..%d and hardclip3 %d..%d\n",
*hardclip5_low,*hardclip5_high,*hardclip3_low,*hardclip3_high));
}
@@ -4292,8 +4338,8 @@ Stage3pair_overlap (int *hardclip5_low, int *hardclip5_high, int *hardclip3_low,
*hardclip3_low = ilength3_low + common_shift;
debug15(printf("Overlap clip for ilength53 minus is hardclip5 %d..%d and hardclip3 %d..%d\n",
*hardclip5_low,*hardclip5_high,*hardclip3_low,*hardclip3_high));
- *hardclip5_high += hit5->trim_left + start_amb_length(hit5);
- *hardclip3_low += hit3->trim_right + end_amb_length(hit3);
+ *hardclip5_high += hit5->trim_left /*+ start_amb_length(hit5)*/;
+ *hardclip3_low += hit3->trim_right /*+ end_amb_length(hit3)*/;
debug15(printf("Ambig clip for ilength53 minus is hardclip5 %d..%d and hardclip3 %d..%d\n",
*hardclip5_low,*hardclip5_high,*hardclip3_low,*hardclip3_high));
@@ -4308,8 +4354,8 @@ Stage3pair_overlap (int *hardclip5_low, int *hardclip5_high, int *hardclip3_low,
/* Try original position without a shift */
*hardclip5_high = ilength5_high /*- common_shift*/;
*hardclip3_low = ilength3_low /*+ common_shift*/;
- *hardclip5_high += hit5->trim_left + start_amb_length(hit5);
- *hardclip3_low += hit3->trim_right + end_amb_length(hit3);
+ *hardclip5_high += hit5->trim_left /*+ start_amb_length(hit5)*/;
+ *hardclip3_low += hit3->trim_right /*+ end_amb_length(hit3)*/;
if (test_hardclips(&common_genomicpos,*hardclip3_low,hit3,*hardclip5_high,hit5,hit3->chroffset) == false) {
*hardclip5_low = *hardclip5_high = *hardclip3_low = *hardclip3_high = 0;
return 0;
@@ -4342,8 +4388,11 @@ Stage3pair_overlap (int *hardclip5_low, int *hardclip5_high, int *hardclip3_low,
*hardclip5_high = ilength5_high /*- common_shift*/;
*hardclip3_low = ilength3_low /*+ common_shift*/;
- *hardclip5_high += hit5->trim_left + start_amb_length(hit5);
- *hardclip3_low += hit3->trim_right + end_amb_length(hit3);
+ debug15(printf("Initial computation of clip for ilength53 minus is hardclip5 %d..%d and hardclip3 %d..%d\n",
+ *hardclip5_low,*hardclip5_high,*hardclip3_low,*hardclip3_high));
+
+ *hardclip5_high += hit5->trim_left /*+ start_amb_length(hit5)*/;
+ *hardclip3_low += hit3->trim_right /*+ end_amb_length(hit3)*/;
debug15(printf("Recomputed clip for ilength53 minus is hardclip5 %d..%d and hardclip3 %d..%d\n",
*hardclip5_low,*hardclip5_high,*hardclip3_low,*hardclip3_high));
}
@@ -4375,8 +4424,8 @@ Stage3pair_overlap (int *hardclip5_low, int *hardclip5_high, int *hardclip3_low,
*hardclip3_high = ilength3_high - common_shift;
debug15(printf("Overlap clip for ilength35 minus is hardclip5 %d..%d and hardclip3 %d..%d\n",
*hardclip5_low,*hardclip5_high,*hardclip3_low,*hardclip3_high));
- *hardclip5_low += hit5->trim_right + end_amb_length(hit5);
- *hardclip3_high += hit3->trim_left + start_amb_length(hit3);
+ *hardclip5_low += hit5->trim_right /*+ end_amb_length(hit5)*/;
+ *hardclip3_high += hit3->trim_left /*+ start_amb_length(hit3)*/;
debug15(printf("Ambig clip for ilength35 minus is hardclip5 %d..%d and hardclip3 %d..%d\n",
*hardclip5_low,*hardclip5_high,*hardclip3_low,*hardclip3_high));
@@ -4391,8 +4440,8 @@ Stage3pair_overlap (int *hardclip5_low, int *hardclip5_high, int *hardclip3_low,
/* Try original position without a shift */
*hardclip5_low = ilength5_low /*+ common_shift*/;
*hardclip3_high = ilength3_high /*- common_shift*/;
- *hardclip5_low += hit5->trim_right + end_amb_length(hit5);
- *hardclip3_high += hit3->trim_left + start_amb_length(hit3);
+ *hardclip5_low += hit5->trim_right /*+ end_amb_length(hit5)*/;
+ *hardclip3_high += hit3->trim_left /*+ start_amb_length(hit3)*/;
if (test_hardclips(&common_genomicpos,*hardclip3_low,hit3,*hardclip5_high,hit5,hit3->chroffset) == false) {
*hardclip5_low = *hardclip5_high = *hardclip3_low = *hardclip3_high = 0;
return 0;
@@ -4425,8 +4474,11 @@ Stage3pair_overlap (int *hardclip5_low, int *hardclip5_high, int *hardclip3_low,
*hardclip5_low = ilength5_low /*+ common_shift*/;
*hardclip3_high = ilength3_high /*- common_shift*/;
- *hardclip5_low += hit5->trim_right + end_amb_length(hit5);
- *hardclip3_high += hit3->trim_left + start_amb_length(hit3);
+ debug15(printf("Initial computation of clip for ilength35 minus is hardclip5 %d..%d and hardclip3 %d..%d\n",
+ *hardclip5_low,*hardclip5_high,*hardclip3_low,*hardclip3_high));
+
+ *hardclip5_low += hit5->trim_right /*+ end_amb_length(hit5)*/;
+ *hardclip3_high += hit3->trim_left /*+ start_amb_length(hit3)*/;
debug15(printf("Recomputed clip for ilength35 minus is hardclip5 %d..%d and hardclip3 %d..%d\n",
*hardclip5_low,*hardclip5_high,*hardclip3_low,*hardclip3_high));
}
@@ -4612,8 +4664,8 @@ Stage3end_copy (T old) {
new->nsegments = old->nsegments;
new->score = old->score;
new->ntscore = old->ntscore;
- new->nmatches = old->nmatches;
new->nmatches_posttrim = old->nmatches_posttrim;
+ new->nmatches = old->nmatches;
new->gmap_max_match_length = old->gmap_max_match_length;
new->gmap_min_splice_prob = old->gmap_min_splice_prob;
@@ -4643,8 +4695,6 @@ Stage3end_copy (T old) {
new->gmap_nintrons = old->gmap_nintrons;
new->sensedir = old->sensedir;
- new->gmap_start_amb_length = old->gmap_start_amb_length;
- new->gmap_end_amb_length = old->gmap_end_amb_length;
new->gmap_start_endtype = old->gmap_start_endtype;
new->gmap_end_endtype = old->gmap_end_endtype;
@@ -4992,7 +5042,16 @@ Stage3end_new_substrings (int *found_score, Intlist_T endpoints,
if ((nmismatches = Intlist_head(x)) < 0) {
nmismatches = Genome_count_mismatches_substring(query_compress,left,/*pos5*/querystart,/*pos3*/queryend,
/*plusp*/true,genestrand);
- debug0(printf("nmismatches %d from genome\n",nmismatches));
+ debug0(printf("nmismatches %d from genome over query %d..%d\n",nmismatches,querystart,queryend));
+#ifdef CHECK_SARRAY
+ } else {
+ printf("nmismatches %d from sarray\n",nmismatches);
+ printf("nmismatches %d from genome\n",
+ Genome_count_mismatches_substring(query_compress,left,/*pos5*/querystart,/*pos3*/queryend,
+ /*plusp*/true,genestrand));
+ assert(nmismatches == Genome_count_mismatches_substring(query_compress,left,/*pos5*/querystart,/*pos3*/queryend,
+ /*plusp*/true,genestrand));
+#endif
}
nmismatches_whole += nmismatches;
debug0(printf("nmismatches %d from sarray\n",nmismatches));
@@ -5027,7 +5086,10 @@ Stage3end_new_substrings (int *found_score, Intlist_T endpoints,
Junction_gc(&junctions);
debug0(printf("Stage3end_new_substrings returning NULL\n"));
return (T) NULL;
+
} else {
+ debug0(printf("Substring_new returns nmatches_posttrim %d, amb %d over query %d..%d\n",
+ Substring_nmatches_posttrim(substring),Substring_amb_length(substring),Substring_querystart(substring),Substring_queryend(substring)));
substrings = List_push(substrings,(void *) substring);
nmismatches_bothdiff += Substring_nmismatches_bothdiff(substring);
querylength_trimmed += Substring_match_length(substring);
@@ -5137,6 +5199,15 @@ Stage3end_new_substrings (int *found_score, Intlist_T endpoints,
nmismatches = Genome_count_mismatches_substring(query_compress,left,/*pos5*/querystart,/*pos3*/queryend,
/*plusp*/false,genestrand);
debug0(printf("nmismatches %d from genome\n",nmismatches));
+#ifdef CHECK_SARRAY
+ } else {
+ printf("nmismatches %d from sarray\n",nmismatches);
+ printf("nmismatches %d from genome\n",
+ Genome_count_mismatches_substring(query_compress,left,/*pos5*/querystart,/*pos3*/queryend,
+ /*plusp*/false,genestrand));
+ assert(nmismatches == Genome_count_mismatches_substring(query_compress,left,/*pos5*/querystart,/*pos3*/queryend,
+ /*plusp*/false,genestrand));
+#endif
}
nmismatches_whole += nmismatches;
debug0(printf("nmismatches %d from sarray\n",nmismatches));
@@ -5225,10 +5296,12 @@ Stage3end_new_substrings (int *found_score, Intlist_T endpoints,
new->substrings_LtoH = substrings;
new->substrings_1toN = List_copy(substrings); /* Takes over as primary holder of substrings */
new->substrings_Nto1 = List_copy(substrings);
+ /* Do not use substrings after this */
new->junctions_LtoH = junctions;
new->junctions_1toN = List_copy(junctions); /* Takes over as primary holder of substrings */
new->junctions_Nto1 = List_copy(junctions);
+ /* Do not use junctions after this */
/* Note differences between substrings and junctions. Substrings
were pushed onto lists above, and junctions were created by the
@@ -5261,11 +5334,12 @@ Stage3end_new_substrings (int *found_score, Intlist_T endpoints,
for (p = new->substrings_1toN; p != NULL; p = List_next(p)) {
substring = List_head(p);
if (Substring_ambiguous_p(substring) == true) {
- printf("%d..%d\tambig\tmismatches:%d\n",Substring_querystart(substring),Substring_queryend(substring),
- Substring_nmismatches_whole(substring));
+ printf("%d..%d\tambig\tmismatches_whole:%d\tmatches_posttrim:%d\tamb:%d\n",Substring_querystart(substring),Substring_queryend(substring),
+ Substring_nmismatches_whole(substring),Substring_nmatches_posttrim(substring),Substring_amb_length(substring));
} else {
- printf("%d..%d\t%u..%u\tmismatches:%d\n",Substring_querystart(substring),Substring_queryend(substring),
- Substring_alignstart_trim_chr(substring),Substring_alignend_trim_chr(substring),Substring_nmismatches_whole(substring));
+ printf("%d..%d\t%u..%u\tmismatches_whole:%d\tmatches_posttrim:%d\tamb:%d\n",Substring_querystart(substring),Substring_queryend(substring),
+ Substring_alignstart_trim_chr(substring),Substring_alignend_trim_chr(substring),Substring_nmismatches_whole(substring),
+ Substring_nmatches_posttrim(substring),Substring_amb_length(substring));
}
}
printf("\n");
@@ -5312,14 +5386,27 @@ Stage3end_new_substrings (int *found_score, Intlist_T endpoints,
new->score = nmismatches_whole + indel_score; /* Want untrimmed */
new->nsegments = List_length(new->substrings_1toN);
- new->nmatches = querylength - nmismatches_whole;
- new->nmatches_posttrim = querylength_trimmed - nmismatches_whole;
+ /* new->nmatches_posttrim = querylength_trimmed - nmismatches_whole; */
+ new->nmatches_posttrim = 0;
+ /* Note: Cannot use substrings variable here. Need to use new->substrings_1toN */
+ for (p = new->substrings_1toN; p != NULL; p = List_next(p)) {
+ substring = (Substring_T) List_head(p);
+ new->nmatches_posttrim += Substring_nmatches_posttrim(substring);
+ }
+ debug0(printf("Setting nmatches_posttrim to be %d\n",new->nmatches_posttrim));
+
+ new->nmatches = new->nmatches_posttrim;
new->trim_left = Substring_trim_left(substring1);
new->trim_right = Substring_trim_right(substringN);
- new->trim_left_splicep = Substring_trim_left_splicep(substring1);
- new->trim_right_splicep = Substring_trim_right_splicep(substringN);
+ if ((new->trim_left_splicep = Substring_trim_left_splicep(substring1)) == true) {
+ new->nmatches += new->trim_left;
+ }
+ if ((new->trim_right_splicep = Substring_trim_right_splicep(substringN)) == true) {
+ new->nmatches += new->trim_right;
+ }
debug0(printf("substrings trim_left %d, trim_right %d\n",new->trim_left,new->trim_right));
+ debug0(printf("Setting nmatches to be %d\n",new->nmatches));
/* new->penalties = 0; */
@@ -5355,17 +5442,17 @@ Stage3end_new_substrings (int *found_score, Intlist_T endpoints,
} else if (new->circularalias >= 0) {
new->altlocp = false;
debug0(printf("Returning circular %p from Stage3end_new_substrings with found_score %d\n",new,*found_score));
- debug0(printf("Stage3end_new_substrings returning %p\n",new));
+ debug0(printf("Stage3end_new_substrings returning %p\n\n",new));
return new;
} else if ((new->altlocp = altlocp[chrnum]) == false) {
debug0(printf("Returning primary %p from Stage3end_new_substrings with found_score %d\n",new,*found_score));
- debug0(printf("Stage3end_new_substrings returning %p\n",new));
+ debug0(printf("Stage3end_new_substrings returning %p\n\n",new));
return new;
} else {
debug0(printf("Returning altloc %p from Stage3end_new_substrings with found_score %d\n",new,*found_score));
- debug0(printf("Stage3end_new_substrings returning %p\n",new));
+ debug0(printf("Stage3end_new_substrings returning %p\n\n",new));
return new;
}
}
@@ -5424,7 +5511,7 @@ Stage3end_substrings_run_gmap_plus (T *hit2, T this, char *queryuc_ptr, int quer
for (p = this->substrings_1toN; p != NULL; p = List_next(p)) {
substring = (Substring_T) List_head(p);
printf("%d..%d with %d nmismatches",
- Substring_querystart(substring),Substring_queryend(substring),Substring_nmismatches_region(substring));
+ Substring_querystart(substring),Substring_queryend(substring),Substring_nmismatches_bothdiff(substring));
if (Substring_ambiguous_p(substring) == true) {
printf(" ambiguous");
}
@@ -5608,8 +5695,8 @@ Stage3end_substrings_run_gmap_plus (T *hit2, T this, char *queryuc_ptr, int quer
max_nmismatches = 0;
for (p = this->substrings_1toN; p != NULL; p = List_next(p)) {
substring = (Substring_T) List_head(p);
- if (Substring_nmismatches_region(substring) > max_nmismatches) {
- max_nmismatches = Substring_nmismatches_region(substring);
+ if (Substring_nmismatches_bothdiff(substring) > max_nmismatches) {
+ max_nmismatches = Substring_nmismatches_bothdiff(substring);
}
}
@@ -5828,7 +5915,7 @@ Stage3end_substrings_run_gmap_minus (T *hit2, T this, char *queryuc_ptr, int que
for (p = this->substrings_1toN; p != NULL; p = List_next(p)) {
substring = (Substring_T) List_head(p);
printf("%d..%d with %d nmismatches",
- Substring_querystart(substring),Substring_queryend(substring),Substring_nmismatches_region(substring));
+ Substring_querystart(substring),Substring_queryend(substring),Substring_nmismatches_bothdiff(substring));
if (Substring_ambiguous_p(substring) == true) {
printf(" ambiguous");
}
@@ -6012,8 +6099,8 @@ Stage3end_substrings_run_gmap_minus (T *hit2, T this, char *queryuc_ptr, int que
max_nmismatches = 0;
for (p = this->substrings_1toN; p != NULL; p = List_next(p)) {
substring = (Substring_T) List_head(p);
- if (Substring_nmismatches_region(substring) > max_nmismatches) {
- max_nmismatches = Substring_nmismatches_region(substring);
+ if (Substring_nmismatches_bothdiff(substring) > max_nmismatches) {
+ max_nmismatches = Substring_nmismatches_bothdiff(substring);
}
}
@@ -6798,8 +6885,8 @@ Stage3end_new_exact (int *found_score, Univcoord_T left, int genomiclength, Comp
new->ntscore = 0;
new->score = 0;
new->nsegments = 1;
- new->nmatches = genomiclength;
new->nmatches_posttrim = genomiclength;
+ new->nmatches = genomiclength;
new->trim_left = 0;
new->trim_right = 0;
@@ -7009,14 +7096,20 @@ Stage3end_new_substitution (int *found_score, int nmismatches_whole, Univcoord_T
new->nmatches = Substring_match_length(new->substring1) - new->total_nmismatches;
#else
/* This method is now correct for SNP-tolerant alignment */
- new->nmatches = Substring_nmatches(substring);
new->nmatches_posttrim = Substring_nmatches_posttrim(substring);
#endif
+ new->nmatches = new->nmatches_posttrim;
new->trim_left = Substring_trim_left(substring);
new->trim_right = Substring_trim_right(substring);
- new->trim_left_splicep = Substring_trim_left_splicep(substring);
- new->trim_right_splicep = Substring_trim_right_splicep(substring);
+ if ((new->trim_left_splicep = Substring_trim_left_splicep(substring)) == true) {
+ new->nmatches += new->trim_left;
+ }
+ if ((new->trim_right_splicep = Substring_trim_right_splicep(substring)) == true) {
+ new->nmatches += new->trim_right;
+ }
+ debug0(printf(" trim on left: %d (splicep %d)\n",new->trim_left,new->trim_left_splicep));
+ debug0(printf(" trim on right: %d (splicep %d)\n",new->trim_right,new->trim_right_splicep));
/* new->penalties = 0; */
@@ -7300,16 +7393,20 @@ Stage3end_new_insertion (int *found_score, int nindels, int indel_pos, int nmism
new->nmatches = Substring_match_length(substring1) + Substring_match_length(substring2) - new->total_nmismatches;
#else
/* This method is now correct for SNP-tolerant alignment */
- new->nmatches = Substring_nmatches(substring1) + Substring_nmatches(substring2);
new->nmatches_posttrim = Substring_nmatches_posttrim(substring1) + Substring_nmatches_posttrim(substring2);
/* new->nmatches_posttrim += nindels; -- for use in goodness_cmp procedures */
/* new->nmatches_posttrim -= indel_penalty; -- for use in goodness_cmp procedures */
#endif
+ new->nmatches = new->nmatches_posttrim;
new->trim_left = Substring_trim_left(substring1);
new->trim_right = Substring_trim_right(substring2);
- new->trim_left_splicep = Substring_trim_left_splicep(substring1);
- new->trim_right_splicep = Substring_trim_right_splicep(substring2);
+ if ((new->trim_left_splicep = Substring_trim_left_splicep(substring1)) == true) {
+ new->nmatches += new->trim_left;
+ }
+ if ((new->trim_right_splicep = Substring_trim_right_splicep(substring2)) == true) {
+ new->nmatches += new->trim_right;
+ }
#if 0
#ifdef SCORE_INDELS_EVENTRIM
@@ -7626,15 +7723,19 @@ Stage3end_new_deletion (int *found_score, int nindels, int indel_pos, int nmisma
new->nmatches = Substring_match_length(substring1) + Substring_match_length(substring2) - new->total_nmismatches;
#else
/* This method is now correct for SNP-tolerant alignment */
- new->nmatches = Substring_nmatches(substring1) + Substring_nmatches(substring2);
new->nmatches_posttrim = Substring_nmatches_posttrim(substring1) + Substring_nmatches_posttrim(substring2);
/* new->nmatches_posttrim -= indel_penalty; -- for use in goodness_cmp procedures */
#endif
+ new->nmatches = new->nmatches_posttrim;
new->trim_left = Substring_trim_left(substring1);
new->trim_right = Substring_trim_right(substring2);
- new->trim_left_splicep = Substring_trim_left_splicep(substring1);
- new->trim_right_splicep = Substring_trim_right_splicep(substring2);
+ if ((new->trim_left_splicep = Substring_trim_left_splicep(substring1)) == true) {
+ new->nmatches += new->trim_left;
+ }
+ if ((new->trim_right_splicep = Substring_trim_right_splicep(substring2)) == true) {
+ new->nmatches += new->trim_right;
+ }
#if 0
#ifdef SCORE_INDELS_EVENTRIM
@@ -7839,6 +7940,18 @@ Stage3end_new_splice (int *found_score, int nmismatches_donor, int nmismatches_a
}
new->plusp = Substring_plusp(donor); /* default value, used if merge_samechr_p is true */
+ } else if (Substring_nmatches_posttrim(donor) < 15 ||
+ Substring_nmatches_posttrim(acceptor) < 15) {
+ /* Not enough evidence to find each end of the translocation */
+ new->substrings_LtoH = (List_T) NULL;
+ new->substrings_1toN = (List_T) NULL;
+ new->substrings_Nto1 = (List_T) NULL;
+ new->junctions_LtoH = (List_T) NULL;
+ new->junctions_1toN = (List_T) NULL;
+ new->junctions_Nto1 = (List_T) NULL;
+ Stage3end_free(&new);
+ return (T) NULL;
+
} else {
new->hittype = TRANSLOC_SPLICE;
new->genestrand = 0;
@@ -8084,8 +8197,8 @@ Stage3end_new_splice (int *found_score, int nmismatches_donor, int nmismatches_a
/* new->mapq_loglik = Substring_mapq_loglik(acceptor); */
new->nmismatches_bothdiff = Substring_nmismatches_bothdiff(acceptor) + nmismatches_donor;
/* new->nmismatches_refdiff = Substring_nmismatches_refdiff(acceptor) + nmismatches_donor; */
- new->nmatches = Substring_nmatches(acceptor);
new->nmatches_posttrim = Substring_nmatches_posttrim(acceptor);
+ new->nmatches = Substring_nmatches(acceptor);
if (favor_ambiguous_p == true) {
new->nmatches += amb_length;
}
@@ -8096,8 +8209,8 @@ Stage3end_new_splice (int *found_score, int nmismatches_donor, int nmismatches_a
/* new->mapq_loglik = Substring_mapq_loglik(donor); */
new->nmismatches_bothdiff = Substring_nmismatches_bothdiff(donor) + nmismatches_acceptor;
/* new->nmismatches_refdiff = Substring_nmismatches_refdiff(donor) + nmismatches_acceptor; */
- new->nmatches = Substring_nmatches(donor);
new->nmatches_posttrim = Substring_nmatches_posttrim(donor);
+ new->nmatches = Substring_nmatches(donor);
if (favor_ambiguous_p == true) {
new->nmatches += amb_length;
}
@@ -8110,8 +8223,8 @@ Stage3end_new_splice (int *found_score, int nmismatches_donor, int nmismatches_a
/* new->mapq_loglik = Substring_mapq_loglik(donor) + Substring_mapq_loglik(acceptor); */
new->nmismatches_bothdiff = Substring_nmismatches_bothdiff(donor) + Substring_nmismatches_bothdiff(acceptor);
/* new->nmismatches_refdiff = Substring_nmismatches_refdiff(donor) + Substring_nmismatches_refdiff(acceptor); */
- new->nmatches = Substring_nmatches(donor) + Substring_nmatches(acceptor);
new->nmatches_posttrim = Substring_nmatches_posttrim(donor) + Substring_nmatches_posttrim(acceptor);
+ new->nmatches = Substring_nmatches(donor) + Substring_nmatches(acceptor);
debug0(printf("New splice has donor %d + acceptor %d matches, sensedir %d\n",
Substring_nmatches(donor),Substring_nmatches(acceptor),new->sensedir));
@@ -8446,10 +8559,10 @@ Stage3end_new_shortexon (int *found_score, Substring_T donor, Substring_T accept
new->nmismatches_bothdiff += Substring_nmismatches_bothdiff(acceptor);
/* new->nmismatches_refdiff = Substring_nmismatches_refdiff(donor) + Substring_nmismatches_refdiff(acceptor) + Substring_nmismatches_refdiff(shortexon); */
- new->nmatches = Substring_nmatches(shortexon);
new->nmatches_posttrim = Substring_nmatches_posttrim(shortexon);
+ new->nmatches = Substring_nmatches(shortexon);
if (donor == NULL) {
- if (favor_ambiguous_p == true) {
+ if (0 && favor_ambiguous_p == true) {
new->nmatches += amb_length_donor;
}
} else {
@@ -8457,7 +8570,7 @@ Stage3end_new_shortexon (int *found_score, Substring_T donor, Substring_T accept
new->nmatches += Substring_nmatches(donor);
}
if (acceptor == NULL) {
- if (favor_ambiguous_p == true) {
+ if (0 && favor_ambiguous_p == true) {
new->nmatches += amb_length_acceptor;
}
} else {
@@ -9139,9 +9252,9 @@ Stage3end_new_gmap (int nmismatches_whole, int nmatches_posttrim, int max_match_
/* new->nmismatches_refdiff = Substring_nmismatches_refdiff(new->substring1); */
/* Adding ambig_end_lengths to nmatches_posttrim would unnecessarily favor long ambig ends when comparing GMAP results */
- new->nmatches = nmatches_posttrim; /* To make addition of ambiguous lengths work, we need to use posttrim, not pretrim */
new->nmatches_posttrim = nmatches_posttrim;
- if (favor_ambiguous_p == true) {
+ new->nmatches = nmatches_posttrim + ambig_end_length_5 + ambig_end_length_3;
+ if (0 && favor_ambiguous_p == true) {
new->nmatches += ambig_end_length_5 + ambig_end_length_3;
}
debug0(printf(" nmatches %d = posttrim %d + ambig_end_length_5 %d + ambig_end_length_3 %d\n",
@@ -9167,8 +9280,8 @@ Stage3end_new_gmap (int nmismatches_whole, int nmatches_posttrim, int max_match_
new->gmap_min_splice_prob = min_splice_prob;
- new->trim_left = Pair_querypos(&(pairarray[0])) - ambig_end_length_5;
- if ((new->gmap_start_amb_length = ambig_end_length_5) > 0) {
+ new->trim_left = Pair_querypos(&(pairarray[0])) /*- ambig_end_length_5*/; /* Do not subtract ambig_end_length, so we are equivalent with substrings */
+ if (ambig_end_length_5 > 0) {
new->trim_left_splicep = true;
} else if (novelsplicingp == false) {
new->trim_left_splicep = false;
@@ -9190,8 +9303,8 @@ Stage3end_new_gmap (int nmismatches_whole, int nmatches_posttrim, int max_match_
}
}
- new->trim_right = (querylength - 1) - Pair_querypos(&(pairarray[npairs-1])) - ambig_end_length_3;
- if ((new->gmap_end_amb_length = ambig_end_length_3) > 0) {
+ new->trim_right = (querylength - 1) - Pair_querypos(&(pairarray[npairs-1])) /*- ambig_end_length_3*/; /* Do not subtract ambig_end_length, so we are equivalent with substrings */
+ if (ambig_end_length_3 > 0) {
new->trim_right_splicep = true;
} else if (novelsplicingp == false) {
new->trim_right_splicep = false;
@@ -9279,7 +9392,7 @@ Stage3end_new_gmap (int nmismatches_whole, int nmatches_posttrim, int max_match_
return (T) NULL;
} else if (new->circularalias >= 0) {
- debug0(printf("Returning GMAP %p\n",new));
+ debug0(printf("Returning GMAP %p with trims %d and %d (splicep %d and %d)\n",new,new->trim_left,new->trim_right,new->trim_left_splicep,new->trim_right_splicep));
new->altlocp = false;
return new;
@@ -9288,7 +9401,7 @@ Stage3end_new_gmap (int nmismatches_whole, int nmatches_posttrim, int max_match_
return new;
} else {
- debug0(printf("Returning GMAP %p\n",new));
+ debug0(printf("Returning GMAP %p with trims %d and %d (splicep %d and %d)\n",new,new->trim_left,new->trim_right,new->trim_left_splicep,new->trim_right_splicep));
return new;
}
}
@@ -10263,7 +10376,7 @@ Stage3end_optimal_score_aux (bool *eliminatedp, List_T hitlist,
#endif
} else {
if (hit->nmatches > max_nmatches) {
- max_nmatches = hit->nmatches;
+ max_nmatches = hit->nmatches_posttrim + amb_length(hit);
max_nmatches_posttrim = hit->nmatches_posttrim;
}
#ifdef TERMINAL_SECOND_CLASS
@@ -10314,8 +10427,8 @@ Stage3end_optimal_score_aux (bool *eliminatedp, List_T hitlist,
#ifdef TERMINAL_SECOND_CLASS
} else if ((hit->hittype == TERMINAL || hit->hittype == GMAP) &&
non_gmap_terminal_p == true) {
- if (hit->nmatches >= max_nmatches) {
- debug4(printf("Keeping a terminal with nmatches %d\n",hit->nmatches));
+ if (hit->nmatches_posttrim + amb_length(hit) >= max_nmatches) {
+ debug4(printf("Keeping a terminal with nmatches_posttrim %d + amb %d\n",hit->nmatches_posttrim,amb_length(hit)));
optimal = List_push(optimal,(void *) hit);
} else {
debug4(printf("Eliminating a terminal where non-terminals are present\n"));
@@ -11394,28 +11507,38 @@ hit_goodness_cmp (bool *equalp, Stage3end_T hit,
}
#endif
- /* Favors ambiguous splices over definitive splices */
- if (hit->nsegments > best_hit->nsegments) {
- if (hit->nmatches_posttrim > best_hit->nmatches_posttrim) {
- /* More segments and strictly more matches */
+ /* Favors definitive splices over ambiguous ones (by using nmatches_posttrim) */
+ if (known_ambiguous_p(hit) == true && known_ambiguous_p(best_hit) == false) {
+ return +1;
+ } else if (known_ambiguous_p(hit) == false && known_ambiguous_p(best_hit) == true) {
+ return -1;
+
+ } else if (hit->nsegments > best_hit->nsegments) {
+ if (hit->nmatches_posttrim >= best_hit->nmatches_posttrim) {
+ /* More segments and same or more matches */
+ debug7(printf("More segments and strictly more matches (posttrim)\n"));
return +1;
} else {
/* More segments, but don't add anything */
+ debug7(printf("More segments but don't add anything\n"));
return -1;
}
} else if (hit->nsegments < best_hit->nsegments) {
- if (hit->nmatches_posttrim >= best_hit->nmatches_posttrim) {
- /* Fewer segments, but same or more matches */
+ if (hit->nmatches_posttrim > best_hit->nmatches_posttrim) {
+ /* Fewer segments, but strictly more matches */
+ debug7(printf("Fewer segments and same or more matches (posttrim)\n"));
return +1;
} else {
- /* Fewer segments and don't add anything */
+ debug7(printf("Fewer segments and don't add anything\n"));
+ /* Fewer segments, and don't add anything */
return -1;
}
} else if (hit->nmatches_posttrim < best_hit->nmatches_posttrim) {
debug7(printf(" => %d loses by nmatches_posttrim\n",k));
return -1;
+
} else if (hit->nmatches_posttrim > best_hit->nmatches_posttrim) {
debug7(printf(" => %d wins by nmatches_posttrim\n",k));
return +1;
@@ -11436,6 +11559,7 @@ hit_goodness_cmp (bool *equalp, Stage3end_T hit,
debug7(printf(" => %d wins by hittype\n",k));
return +1;
+#if 0
} else if (start_amb_length(hit) + end_amb_length(hit) > 0 &&
start_amb_length(best_hit) + end_amb_length(best_hit) == 0) {
debug7(printf(" => %d loses by ambiguity\n",k));
@@ -11444,6 +11568,7 @@ hit_goodness_cmp (bool *equalp, Stage3end_T hit,
start_amb_length(best_hit) + end_amb_length(best_hit) > 0) {
debug7(printf(" => %d wins by ambiguity\n",k));
return +1;
+#endif
} else if (hit->nindels > best_hit->nindels) {
debug7(printf(" => %d loses by nindels\n",k));
@@ -14001,8 +14126,8 @@ Stage3pair_new (T hit5, T hit3, int genestrand, Pairtype_T pairtype,
/* Do not try to resolve ambiguity on inside of concordant ends */
if (hit5->plusp == true && hit3->plusp == true) {
new->dir = +1;
- insertlength1 = (ilength5_low + ilength3_high - 1) + hit5->trim_left + hit3->trim_right + hit5->gmap_start_amb_length + hit3->gmap_end_amb_length;
- insertlength2 = (ilength3_low + ilength5_high - 1) + hit3->trim_left + hit5->trim_right + hit3->gmap_start_amb_length + hit5->gmap_end_amb_length;
+ insertlength1 = (ilength5_low + ilength3_high - 1) + hit5->trim_left + hit3->trim_right /*+ hit5->gmap_start_amb_length + hit3->gmap_end_amb_length*/;
+ insertlength2 = (ilength3_low + ilength5_high - 1) + hit3->trim_left + hit5->trim_right /*+ hit3->gmap_start_amb_length + hit5->gmap_end_amb_length*/;
debug10(printf("insertlength1 is %d, insertlength2 is %d\n",insertlength1,insertlength2));
if (insertlength1 < insertlength2) {
new->insertlength = insertlength1;
@@ -14014,8 +14139,8 @@ Stage3pair_new (T hit5, T hit3, int genestrand, Pairtype_T pairtype,
} else if (hit5->plusp == false && hit3->plusp == false) {
new->dir = -1;
- insertlength1 = (ilength5_low + ilength3_high - 1) + hit3->trim_left + hit5->trim_right + hit3->gmap_start_amb_length + hit5->gmap_end_amb_length;
- insertlength2 = (ilength3_low + ilength5_high - 1) + hit5->trim_left + hit3->trim_right + hit5->gmap_start_amb_length + hit3->gmap_end_amb_length;
+ insertlength1 = (ilength5_low + ilength3_high - 1) + hit3->trim_left + hit5->trim_right /*+ hit3->gmap_start_amb_length + hit5->gmap_end_amb_length*/;
+ insertlength2 = (ilength3_low + ilength5_high - 1) + hit5->trim_left + hit3->trim_right /*+ hit5->gmap_start_amb_length + hit3->gmap_end_amb_length*/;
debug10(printf("insertlength1 is %d, insertlength2 is %d\n",insertlength1,insertlength2));
if (insertlength1 < insertlength2) {
new->insertlength = insertlength1;
@@ -14627,8 +14752,8 @@ Stage3pair_new (T hit5, T hit3, int genestrand, Pairtype_T pairtype,
/* Do not alter score, so the alignmnent terminates at the known splice site */
new->score = hit5->score + hit3->score /* + unresolved_amb_length */;
- new->nmatches = hit5->nmatches + hit3->nmatches - unresolved_amb_length;
new->nmatches_posttrim = hit5->nmatches_posttrim + hit3->nmatches_posttrim;
+ new->nmatches = hit5->nmatches + hit3->nmatches - unresolved_amb_length;
/* new->overlap_known_gene_p = false; -- initialized later when resolving multimappers */
new->tally = -1L;
@@ -14746,8 +14871,7 @@ hitpair_sort_cmp (const void *a, const void *b) {
x->hit5->low - x->hit5->chroffset,x->hit5->high - x->hit5->chroffset,
x->hit3->low - x->hit3->chroffset,x->hit3->high - x->hit3->chroffset,
x->dir,x->hit5->circularalias,x->hit3->circularalias,x->nmatches,x->nmatches_posttrim,
- start_amb_length(x->hit5) + end_amb_length(x->hit5),start_amb_length(x->hit3) + end_amb_length(x->hit3),
- x->hit5->sensedir,x->hit3->sensedir));
+ amb_length(x->hit5),amb_length(x->hit3),x->hit5->sensedir,x->hit3->sensedir));
debug8(printf(" with (%s, %s-%s): %p, %u..%u|%u..%u (dir = %d), circularalias %d|%d, nmatches: %d (%d posttrim), amb_lengths %d and %d, sensedirs %d-%d\n",
Pairtype_string(y->pairtype),hittype_string(y->hit5->hittype),
@@ -14755,8 +14879,7 @@ hitpair_sort_cmp (const void *a, const void *b) {
y->hit5->low - y->hit5->chroffset,y->hit5->high - y->hit5->chroffset,
y->hit3->low - y->hit3->chroffset,y->hit3->high - y->hit3->chroffset,
y->dir,y->hit5->circularalias,y->hit3->circularalias,y->nmatches,y->nmatches_posttrim,
- start_amb_length(y->hit5) + end_amb_length(y->hit5),start_amb_length(y->hit3) + end_amb_length(y->hit3),
- y->hit5->sensedir,y->hit3->sensedir));
+ amb_length(y->hit5),amb_length(y->hit3),y->hit5->sensedir,y->hit3->sensedir));
x_hit5_low = normalize_coord(x->hit5->low,x->hit5->circularalias,x->hit5->chrlength);
@@ -15508,22 +15631,51 @@ hitpair_goodness_cmp (bool *equalp, Stage3pair_T hitpair,
#endif
- /* Favors ambiguous splices over definitive splices */
- if (hitpair->hit5->nsegments + hitpair->hit3->nsegments > best_hitpair->hit5->nsegments + best_hitpair->hit3->nsegments) {
- if (hitpair->nmatches_posttrim > best_hitpair->nmatches_posttrim) {
- /* More segments and strictly more matches */
+ /* Favors substring ambiguous splices over definintive splices, and
+ favors definitive splices over trimmed ambiguous ones (by using
+ nmatches_posttrim) */
+ if (known_ambiguous_p(hitpair->hit5) == true && known_ambiguous_p(best_hitpair->hit5) == false &&
+ known_ambiguous_p(hitpair->hit3) == known_ambiguous_p(best_hitpair->hit3) &&
+ hitpair->insertlength <= best_hitpair->insertlength) {
+ debug8(printf("Case 1\n"));
+ return +1;
+ } else if (known_ambiguous_p(hitpair->hit5) == false && known_ambiguous_p(best_hitpair->hit5) == true &&
+ known_ambiguous_p(hitpair->hit3) == known_ambiguous_p(best_hitpair->hit3) &&
+ hitpair->insertlength >= best_hitpair->insertlength) {
+ debug8(printf("Case 2\n"));
+ return -1;
+
+ } else if (known_ambiguous_p(hitpair->hit3) == true && known_ambiguous_p(best_hitpair->hit3) == false &&
+ known_ambiguous_p(hitpair->hit5) == known_ambiguous_p(best_hitpair->hit5) &&
+ hitpair->insertlength <= best_hitpair->insertlength) {
+ debug8(printf("Case 3\n"));
+ return +1;
+
+ } else if (known_ambiguous_p(hitpair->hit3) == false && known_ambiguous_p(best_hitpair->hit3) == true &&
+ known_ambiguous_p(hitpair->hit5) == known_ambiguous_p(best_hitpair->hit5) &&
+ hitpair->insertlength > best_hitpair->insertlength) {
+ debug8(printf("Case 4\n"));
+ return -1;
+
+ } else if (hitpair->hit5->nsegments + hitpair->hit3->nsegments > best_hitpair->hit5->nsegments + best_hitpair->hit3->nsegments) {
+ if (hitpair->nmatches_posttrim >= best_hitpair->nmatches_posttrim) {
+ /* More segments and same or more matches */
+ debug8(printf("More segments and strictly more matches (posttrim)\n"));
return +1;
} else {
/* More segments, but don't add anything */
+ debug8(printf("More segments but don't add anything\n"));
return -1;
}
} else if (hitpair->hit5->nsegments + hitpair->hit3->nsegments < best_hitpair->hit5->nsegments + best_hitpair->hit3->nsegments) {
- if (hitpair->nmatches_posttrim >= best_hitpair->nmatches_posttrim) {
- /* Fewer segments, but same or more matches */
+ if (hitpair->nmatches_posttrim > best_hitpair->nmatches_posttrim) {
+ /* Fewer segments, but strictly more matches */
+ debug8(printf("Fewer segments and same or more matches (posttrim)\n"));
return +1;
} else {
/* Fewer segments and don't add anything */
+ debug8(printf("Fewer segments and don't add anything\n"));
return -1;
}
@@ -15597,6 +15749,7 @@ hitpair_goodness_cmp (bool *equalp, Stage3pair_T hitpair,
return +1;
#endif
+#if 0
} else if (start_amb_length(hitpair->hit5) + end_amb_length(hitpair->hit5) +
start_amb_length(hitpair->hit3) + end_amb_length(hitpair->hit3) == 0 &&
start_amb_length(best_hitpair->hit5) + end_amb_length(best_hitpair->hit5) +
@@ -15610,6 +15763,7 @@ hitpair_goodness_cmp (bool *equalp, Stage3pair_T hitpair,
start_amb_length(best_hitpair->hit3) + end_amb_length(best_hitpair->hit3) == 0) {
debug8(printf(" => wins by ambiguity\n"));
return +1;
+#endif
#if 0
} else if (hitpair->absdifflength < best_hitpair->absdifflength) {
@@ -15773,8 +15927,7 @@ pair_remove_bad_superstretches (bool *keep_p, Stage3pair_T superstretch, List_T
stage3pair->hit5->low - stage3pair->hit5->chroffset,stage3pair->hit5->high - stage3pair->hit5->chroffset,
stage3pair->hit3->low - stage3pair->hit3->chroffset,stage3pair->hit3->high - stage3pair->hit3->chroffset,
stage3pair->dir,stage3pair->nmatches,stage3pair->nmatches_posttrim,
- stage3pair->insertlength,stage3pair->amb_status_inside,
- start_amb_length(stage3pair->hit5)+ end_amb_length(stage3pair->hit5),start_amb_length(stage3pair->hit3) + end_amb_length(stage3pair->hit3));
+ stage3pair->insertlength,stage3pair->amb_status_inside,amb_length(stage3pair->hit5),amb_length(stage3pair->hit3));
if (stage3pair->hit5->hittype == GMAP) {
Pair_dump_comp_array(stage3pair->hit5->pairarray,stage3pair->hit5->npairs);
}
@@ -15789,8 +15942,7 @@ pair_remove_bad_superstretches (bool *keep_p, Stage3pair_T superstretch, List_T
hitpair->hit5->low - hitpair->hit5->chroffset,hitpair->hit5->high - hitpair->hit5->chroffset,
hitpair->hit3->low - hitpair->hit3->chroffset,hitpair->hit3->high - hitpair->hit3->chroffset,
hitpair->dir,hitpair->nmatches,hitpair->nmatches_posttrim,
- hitpair->insertlength,hitpair->amb_status_inside,
- start_amb_length(hitpair->hit5) + end_amb_length(hitpair->hit5),start_amb_length(hitpair->hit3) + end_amb_length(hitpair->hit3));
+ hitpair->insertlength,hitpair->amb_status_inside,amb_length(hitpair->hit5),amb_length(hitpair->hit3));
if (hitpair->hit5->hittype == GMAP) {
Pair_dump_comp_array(hitpair->hit5->pairarray,hitpair->hit5->npairs);
}
@@ -15933,8 +16085,7 @@ pair_remove_overlaps (List_T hitpairlist, bool translocp, bool finalp) {
hitpair->hit5->low - hitpair->hit5->chroffset,hitpair->hit5->high - hitpair->hit5->chroffset,
hitpair->hit3->low - hitpair->hit3->chroffset,hitpair->hit3->high - hitpair->hit3->chroffset,
hitpair->dir,hitpair->hit5->circularalias,hitpair->hit3->circularalias,hitpair->nmatches,hitpair->nmatches_posttrim,
- start_amb_length(hitpair->hit5) + end_amb_length(hitpair->hit5),start_amb_length(hitpair->hit3) + end_amb_length(hitpair->hit3),
- hitpair->hit5->sensedir,hitpair->hit3->sensedir);
+ amb_length(hitpair->hit5),amb_length(hitpair->hit3),hitpair->hit5->sensedir,hitpair->hit3->sensedir);
}
);
@@ -16847,19 +16998,19 @@ Stage3pair_optimal_score_aux (bool *eliminatedp, List_T hitpairlist,
} else {
#ifdef DO_FINAL
- /* Final: based on nmatches. ? leads to indelbreaks and bad introns. But skipping this leads to nearly-identical alignments */
+ /* Final: based on nmatches_posttrim + amb_length */
max_nmatches = 0;
for (p = hitpairlist; p != NULL; p = p->rest) {
hitpair = (Stage3pair_T) p->first;
- debug6(printf("%u..%u|%u..%u types %s and %s, score_eventrim %d+%d, pairlength %d, outerlength %u\n",
+ debug6(printf("%u..%u|%u..%u types %s and %s, score_eventrim %d+%d, nmatches_posttrim %d + amb %d+%d, pairlength %d, outerlength %u\n",
hitpair->hit5->low - hitpair->hit5->chroffset,hitpair->hit5->high - hitpair->hit5->chroffset,
hitpair->hit3->low - hitpair->hit3->chroffset,hitpair->hit3->high - hitpair->hit3->chroffset,
hittype_string(hitpair->hit5->hittype),hittype_string(hitpair->hit3->hittype),
- hitpair->hit5->score_eventrim,hitpair->hit3->score_eventrim,
- hitpair->insertlength,hitpair->outerlength));
+ hitpair->hit5->score_eventrim,hitpair->hit3->score_eventrim,hitpair->nmatches_posttrim,
+ amb_length(hitpair->hit5),amb_length(hitpair->hit3),hitpair->insertlength,hitpair->outerlength));
- if (hitpair->nmatches > max_nmatches) {
- max_nmatches = hitpair->nmatches;
+ if (hitpair->nmatches_posttrim + amb_length(hitpair->hit5) + amb_length(hitpair->hit3) > max_nmatches) {
+ max_nmatches = hitpair->nmatches_posttrim + amb_length(hitpair->hit5) + amb_length(hitpair->hit3);
}
}
@@ -16910,11 +17061,12 @@ Stage3pair_optimal_score_aux (bool *eliminatedp, List_T hitpairlist,
optimal = List_push(optimal,hitpair);
#ifdef DO_FINAL
- } else if (hitpair->nmatches < cutoff_level) {
- debug6(printf("Final: Eliminating hit pair %p at %u..%u|%u..%u with nmatches %d < cutoff_level %d (finalp %d)\n",
+ } else if (hitpair->nmatches_posttrim + amb_length(hitpair->hit5) + amb_length(hitpair->hit3) < cutoff_level) {
+ debug6(printf("Final: Eliminating hit pair %p at %u..%u|%u..%u with nmatches_posttrim %d (%d+%d) + amb %d+%d < cutoff_level %d (finalp %d)\n",
hitpair,hitpair->hit5->low - hitpair->hit5->chroffset,hitpair->hit5->high - hitpair->hit5->chroffset,
hitpair->hit3->low - hitpair->hit3->chroffset,hitpair->hit3->high - hitpair->hit3->chroffset,
- hitpair->nmatches,cutoff_level,finalp));
+ hitpair->nmatches_posttrim,hitpair->hit5->nmatches_posttrim,hitpair->hit3->nmatches_posttrim,
+ amb_length(hitpair->hit5),amb_length(hitpair->hit3),cutoff_level,finalp));
*eliminatedp = true;
Stage3pair_free(&hitpair);
#else
diff --git a/src/stage3hr.h b/src/stage3hr.h
index 488e865..ec2637a 100644
--- a/src/stage3hr.h
+++ b/src/stage3hr.h
@@ -1,4 +1,4 @@
-/* $Id: stage3hr.h 196273 2016-08-12 15:15:06Z twu $ */
+/* $Id: stage3hr.h 197778 2016-09-14 00:44:23Z twu $ */
#ifndef STAGE3HR_INCLUDED
#define STAGE3HR_INCLUDED
@@ -104,6 +104,8 @@ Stage3end_best_score_paired (List_T hits);
extern int
Stage3end_nmatches_posttrim (T this);
extern int
+Stage3end_nmatches (T this);
+extern int
Stage3end_nmismatches_whole (T this);
extern int
Stage3end_nmismatches_bothdiff (T this);
@@ -130,10 +132,6 @@ Stage3end_trim_left (T this);
extern int
Stage3end_trim_right (T this);
extern int
-Stage3end_trim_left_raw (T this);
-extern int
-Stage3end_trim_right_raw (T this);
-extern int
Stage3end_total_trim (T this);
extern int
Stage3end_circularpos (T this);
diff --git a/src/substring.c b/src/substring.c
index c91c1b9..87ae86d 100644
--- a/src/substring.c
+++ b/src/substring.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: substring.c 196404 2016-08-16 14:47:49Z twu $";
+static char rcsid[] = "$Id: substring.c 197774 2016-09-14 00:40:45Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -20,6 +20,7 @@ static char rcsid[] = "$Id: substring.c 196404 2016-08-16 14:47:49Z twu $";
#include "pairdef.h" /* For State_T */
#include "pair.h" /* For Pair_print_gsnap */
#include "comp.h"
+#include "splice.h"
#define TRIM_MATCH_SCORE 1
@@ -250,7 +251,8 @@ struct T {
int nmismatches_bothdiff; /* Over region left after trimming */
int nmismatches_refdiff; /* Over region left after trimming */
/* nsnpdiffs = nmismatches_bothdiff - nmismatches_refdiff */
- int nmatches; /* Over region left after trimming */
+
+ int nmatches; /* Not used anymore. Use nmatches_posttrim instead. */
int trim_left;
int trim_right;
@@ -1820,28 +1822,15 @@ embellish_genomic_sam (char *genomic_diff, char *query, int querystart, int quer
}
-/* Modified from trim_novel_spliceends in stage3.c */
-/* Note: If substring does not extend to ends of query, then region
- beyond querystart and queryend might actually be matching, and not
- mismatches. Could fix in the future. */
static void
substring_trim_novel_spliceends (T substring1, T substringN, int *ambig_end_length_5, int *ambig_end_length_3,
Splicetype_T *ambig_splicetype_5, Splicetype_T *ambig_splicetype_3,
double *ambig_prob_5, double *ambig_prob_3, int *sensedir,
Univcoord_T chroffset) {
- Univcoord_T genomicpos, start_genomicpos, middle_genomicpos, end_genomicpos;
- Univcoord_T splice_genomepos_5, splice_genomepos_3, splice_genomepos_5_mm, splice_genomepos_3_mm;
- Univcoord_T start, middle, end; /* start to middle has mismatches, while middle to end has none */
- double donor_prob, acceptor_prob;
- double max_prob_5 = 0.0, max_prob_3 = 0.0,
- max_prob_sense_forward_5 = 0.0, max_prob_sense_anti_5 = 0.0,
- max_prob_sense_forward_3 = 0.0, max_prob_sense_anti_3 = 0.0;
- double max_prob_5_mm = 0.0, max_prob_3_mm = 0.0,
- max_prob_sense_forward_5_mm = 0.0, max_prob_sense_anti_5_mm = 0.0,
- max_prob_sense_forward_3_mm = 0.0, max_prob_sense_anti_3_mm = 0.0;
- Splicetype_T splicetype5, splicetype3, splicetype5_mm, splicetype3_mm;
- int splice_sensedir_5, splice_sensedir_3, splice_sensedir_5_mm, splice_sensedir_3_mm;
+ Univcoord_T start5, middle5, end5, start3, middle3, end3;
+ Univcoord_T genomicstart5, genomicend3;
+ bool solve5p, solve3p, plusp;
debug13(printf("\nEntered substring_trim_novel_spliceends with sensedir %d\n",*sensedir));
@@ -1852,744 +1841,79 @@ substring_trim_novel_spliceends (T substring1, T substringN, int *ambig_end_leng
/* start is distal, end is medial */
if (substringN == NULL) {
- /* Skip 3' end*/
- } else if (substringN->plusp == true) {
- middle = substringN->alignend_trim + 1;
- if ((start = middle + END_SPLICESITE_SEARCH) > substringN->genomicend) {
- start = substringN->genomicend;
- }
- if ((end = middle - END_SPLICESITE_SEARCH) < substringN->alignstart_trim + MIN_EXON_LENGTH) {
- end = substringN->alignstart_trim + MIN_EXON_LENGTH;
- }
- debug13(printf("\n1 Set end points for 3' trim to be %u..%u..%u\n",start,middle,end));
-
- } else {
- middle = substringN->alignend_trim - 1;
- if ((start = middle - END_SPLICESITE_SEARCH) < substringN->genomicend) {
- start = substringN->genomicend;
- }
- if ((end = middle + END_SPLICESITE_SEARCH) > substringN->alignstart_trim - MIN_EXON_LENGTH) {
- end = substringN->alignstart_trim - MIN_EXON_LENGTH;
- }
- debug13(printf("\n2 Set end points for 3' trim to be %u..%u..%u\n",start,middle,end));
- }
-
- if (substringN == NULL) {
/* Skip 3' end */
- } else if (*sensedir == SENSE_FORWARD) {
- if (substringN->plusp) {
- splicetype3 = splicetype3_mm = DONOR;
-
- start_genomicpos = start;
- middle_genomicpos = middle;
- end_genomicpos = end;
-
- /* assert(start_genomicpos >= end_genomicpos); */
- genomicpos = start_genomicpos;
- while (genomicpos >= middle_genomicpos) {
- donor_prob = Maxent_hr_donor_prob(genomicpos,chroffset); /* Case 1 */
- debug13(printf("3', watson, sense anti %u %u %f mm\n",genomicpos,genomicpos-chroffset,donor_prob));
- if (donor_prob > max_prob_3_mm) {
- max_prob_3_mm = donor_prob;
- splice_genomepos_3_mm = genomicpos;
- }
- genomicpos--;
- }
- while (genomicpos >= end_genomicpos) {
- donor_prob = Maxent_hr_donor_prob(genomicpos,chroffset); /* Case 1 */
- debug13(printf("3', watson, sense anti %u %u %f\n",genomicpos,genomicpos-chroffset,donor_prob));
- if (donor_prob > max_prob_3) {
- max_prob_3 = donor_prob;
- splice_genomepos_3 = genomicpos;
- }
- genomicpos--;
- }
- debug13(printf("\n"));
-
- } else {
- splicetype3 = splicetype3_mm = ANTIDONOR;
-
- start_genomicpos = start;
- middle_genomicpos = middle;
- end_genomicpos = end;
-
- /* assert(start_genomicpos <= end_genomicpos); */
- genomicpos = start_genomicpos;
- while (genomicpos <= middle_genomicpos) {
- donor_prob = Maxent_hr_antidonor_prob(genomicpos,chroffset); /* Case 3 */
- debug13(printf("3', crick, sense forward %u %u %f mm\n",genomicpos,genomicpos-chroffset,donor_prob));
- if (donor_prob > max_prob_3_mm) {
- max_prob_3_mm = donor_prob;
- splice_genomepos_3_mm = genomicpos;
- }
- genomicpos++;
- }
- while (genomicpos <= end_genomicpos) {
- donor_prob = Maxent_hr_antidonor_prob(genomicpos,chroffset); /* Case 3 */
- debug13(printf("3', crick, sense forward %u %u %f\n",genomicpos,genomicpos-chroffset,donor_prob));
- if (donor_prob > max_prob_3) {
- max_prob_3 = donor_prob;
- splice_genomepos_3 = genomicpos;
- }
- genomicpos++;
- }
- debug13(printf("\n"));
- }
-
- } else if (*sensedir == SENSE_ANTI) {
- if (substringN->plusp) {
- splicetype3 = splicetype3_mm = ANTIACCEPTOR;
+ solve3p = false;
+ genomicend3 = 0;
- start_genomicpos = start;
- middle_genomicpos = middle;
- end_genomicpos = end;
-
- /* assert(start_genomicpos >= end_genomicpos); */
- genomicpos = start_genomicpos;
- while (genomicpos >= middle_genomicpos) {
- acceptor_prob = Maxent_hr_antiacceptor_prob(genomicpos,chroffset); /* Case 5 */
- debug13(printf("3', watson, sense forward %u %u %f mm\n",genomicpos,genomicpos-chroffset,acceptor_prob));
- if (acceptor_prob > max_prob_3_mm) {
- max_prob_3_mm = acceptor_prob;
- splice_genomepos_3_mm = genomicpos;
- }
- genomicpos--;
- }
- while (genomicpos >= end_genomicpos) {
- acceptor_prob = Maxent_hr_antiacceptor_prob(genomicpos,chroffset); /* Case 5 */
- debug13(printf("3', watson, sense forward %u %u %f\n",genomicpos,genomicpos-chroffset,acceptor_prob));
- if (acceptor_prob > max_prob_3) {
- max_prob_3 = acceptor_prob;
- splice_genomepos_3 = genomicpos;
- }
- genomicpos--;
- }
- debug13(printf("\n"));
+ } else if ((plusp = substringN->plusp) == true) {
+ solve3p = true;
+ genomicend3 = substringN->genomicend;
- } else {
- splicetype3 = splicetype3_mm = ACCEPTOR;
-
- start_genomicpos = start;
- middle_genomicpos = middle;
- end_genomicpos = end;
-
- /* assert(start_genomicpos <= end_genomicpos); */
- genomicpos = start_genomicpos;
- while (genomicpos <= middle_genomicpos) {
- acceptor_prob = Maxent_hr_acceptor_prob(genomicpos,chroffset); /* Case 7 */
- debug13(printf("3', crick, sense anti %u %u %f mm\n",genomicpos,genomicpos-chroffset,acceptor_prob));
- if (acceptor_prob > max_prob_3_mm) {
- max_prob_3_mm = acceptor_prob;
- splice_genomepos_3_mm = genomicpos;
- }
- genomicpos++;
- }
- while (genomicpos <= end_genomicpos) {
- acceptor_prob = Maxent_hr_acceptor_prob(genomicpos,chroffset); /* Case 7 */
- debug13(printf("3', crick, sense anti %u %u %f\n",genomicpos,genomicpos-chroffset,acceptor_prob));
- if (acceptor_prob > max_prob_3) {
- max_prob_3 = acceptor_prob;
- splice_genomepos_3 = genomicpos;
- }
- genomicpos++;
- }
- debug13(printf("\n"));
+ middle3 = substringN->alignend_trim + 1;
+ if ((start3 = middle3 + END_SPLICESITE_SEARCH) > substringN->genomicend) {
+ start3 = substringN->genomicend;
}
-
+ if ((end3 = middle3 - END_SPLICESITE_SEARCH) < substringN->alignstart_trim + MIN_EXON_LENGTH) {
+ end3 = substringN->alignstart_trim + MIN_EXON_LENGTH;
+ }
+ debug13(printf("\n1 Set end points for 3' trim to be %u..%u..%u\n",
+ start3 - chroffset,middle3 - chroffset,end3 - chroffset));
+
} else {
- if (substringN->plusp) {
- start_genomicpos = start;
- middle_genomicpos = middle;
- end_genomicpos = end;
-
- /* assert(start_genomicpos >= end_genomicpos); */
- genomicpos = start_genomicpos;
- while (genomicpos >= middle_genomicpos) {
- donor_prob = Maxent_hr_donor_prob(genomicpos,chroffset); /* Case 1 */
- acceptor_prob = Maxent_hr_antiacceptor_prob(genomicpos,chroffset); /* Case 5 */
- debug13(printf("3', watson, sense null %u %u %f %f mm\n",genomicpos,genomicpos-chroffset,donor_prob,acceptor_prob));
- if (donor_prob > max_prob_sense_forward_3_mm) {
- max_prob_sense_forward_3_mm = donor_prob;
- if (donor_prob > max_prob_3_mm) {
- max_prob_3_mm = donor_prob;
- splice_genomepos_3_mm = genomicpos;
- /* splice_cdna_direction_3_mm = +1; */
- splice_sensedir_3_mm = SENSE_FORWARD;
- splicetype3_mm = DONOR;
- }
- }
- if (acceptor_prob > max_prob_sense_anti_3_mm) {
- max_prob_sense_anti_3_mm = acceptor_prob;
- if (acceptor_prob > max_prob_3_mm) {
- max_prob_3_mm = acceptor_prob;
- splice_genomepos_3_mm = genomicpos;
- /* splice_cdna_direction_3_mm = -1; */
- splice_sensedir_3_mm = SENSE_ANTI;
- splicetype3_mm = ANTIACCEPTOR;
- }
- }
- genomicpos--;
- }
- while (genomicpos >= end_genomicpos) {
- donor_prob = Maxent_hr_donor_prob(genomicpos,chroffset); /* Case 1 */
- acceptor_prob = Maxent_hr_antiacceptor_prob(genomicpos,chroffset); /* Case 5 */
- debug13(printf("3', watson, sense null %u %u %f %f\n",genomicpos,genomicpos-chroffset,donor_prob,acceptor_prob));
- if (donor_prob > max_prob_sense_forward_3) {
- max_prob_sense_forward_3 = donor_prob;
- if (donor_prob > max_prob_3) {
- max_prob_3 = donor_prob;
- splice_genomepos_3 = genomicpos;
- /* splice_cdna_direction_3 = +1; */
- splice_sensedir_3 = SENSE_FORWARD;
- splicetype3 = DONOR;
- }
- }
- if (acceptor_prob > max_prob_sense_anti_3) {
- max_prob_sense_anti_3 = acceptor_prob;
- if (acceptor_prob > max_prob_3) {
- max_prob_3 = acceptor_prob;
- splice_genomepos_3 = genomicpos;
- /* splice_cdna_direction_3 = -1; */
- splice_sensedir_3 = SENSE_ANTI;
- splicetype3 = ANTIACCEPTOR;
- }
- }
- genomicpos--;
- }
- debug13(printf("\n"));
+ solve3p = true;
+ genomicend3 = substringN->genomicend;
- } else {
- start_genomicpos = start;
- middle_genomicpos = middle;
- end_genomicpos = end;
-
- /* assert(start_genomicpos <= end_genomicpos); */
- genomicpos = start_genomicpos;
- while (genomicpos <= middle_genomicpos) {
- donor_prob = Maxent_hr_antidonor_prob(genomicpos,chroffset); /* Case 3 */
- acceptor_prob = Maxent_hr_acceptor_prob(genomicpos,chroffset); /* Case 7 */
- debug13(printf("3', crick, sense null %u %u %f %f mm\n",genomicpos,genomicpos-chroffset,donor_prob,acceptor_prob));
- if (donor_prob > max_prob_sense_forward_3_mm) {
- max_prob_sense_forward_3_mm = donor_prob;
- if (donor_prob > max_prob_3_mm) {
- max_prob_3_mm = donor_prob;
- splice_genomepos_3_mm = genomicpos;
- /* splice_cdna_direction_3_mm = +1; */
- splice_sensedir_3_mm = SENSE_FORWARD;
- splicetype3_mm = ANTIDONOR;
- }
- }
- if (acceptor_prob > max_prob_sense_anti_3_mm) {
- max_prob_sense_anti_3_mm = acceptor_prob;
- if (acceptor_prob > max_prob_3_mm) {
- max_prob_3_mm = acceptor_prob;
- splice_genomepos_3_mm = genomicpos;
- /* splice_cdna_direction_3_mm = -1; */
- splice_sensedir_3_mm = SENSE_ANTI;
- splicetype3_mm = ACCEPTOR;
- }
- }
- genomicpos++;
- }
- while (genomicpos <= end_genomicpos) {
- donor_prob = Maxent_hr_antidonor_prob(genomicpos,chroffset); /* Case 3 */
- acceptor_prob = Maxent_hr_acceptor_prob(genomicpos,chroffset); /* Case 7 */
- debug13(printf("3', crick, sense null %u %u %f %f\n",genomicpos,genomicpos-chroffset,donor_prob,acceptor_prob));
- if (donor_prob > max_prob_sense_forward_3) {
- max_prob_sense_forward_3 = donor_prob;
- if (donor_prob > max_prob_3) {
- max_prob_3 = donor_prob;
- splice_genomepos_3 = genomicpos;
- /* splice_cdna_direction_3 = +1; */
- splice_sensedir_3 = SENSE_FORWARD;
- splicetype3 = ANTIDONOR;
- }
- }
- if (acceptor_prob > max_prob_sense_anti_3) {
- max_prob_sense_anti_3 = acceptor_prob;
- if (acceptor_prob > max_prob_3) {
- max_prob_3 = acceptor_prob;
- splice_genomepos_3 = genomicpos;
- /* splice_cdna_direction_3 = -1; */
- splice_sensedir_3 = SENSE_ANTI;
- splicetype3 = ACCEPTOR;
- }
- }
- genomicpos++;
- }
- debug13(printf("\n"));
+ middle3 = substringN->alignend_trim - 1;
+ if ((start3 = middle3 - END_SPLICESITE_SEARCH) < substringN->genomicend) {
+ start3 = substringN->genomicend;
}
- }
-
- if (substringN == NULL) {
- /* Skip 3' end */
- } else if (*sensedir != SENSE_NULL) {
- if (max_prob_3 > END_SPLICESITE_PROB_MATCH) {
- debug13(printf("Found good splice %s on 3' end at %u with probability %f\n",
- Splicetype_string(splicetype3),splice_genomepos_3-chroffset,max_prob_3));
- if (substringN->plusp) {
- *ambig_end_length_3 = substringN->genomicend - splice_genomepos_3;
- debug13(printf("Set ambig_end_length_3 to be %d = %u - %u\n",*ambig_end_length_3,substringN->genomicend,splice_genomepos_3));
- } else {
- *ambig_end_length_3 = splice_genomepos_3 - substringN->genomicend;
- debug13(printf("Set ambig_end_length_3 to be %d = %u - %u\n",*ambig_end_length_3,splice_genomepos_3,substringN->genomicend));
- }
- *ambig_splicetype_3 = splicetype3;
- *ambig_prob_3 = max_prob_3;
-
- } else if (max_prob_3_mm > END_SPLICESITE_PROB_MISMATCH) {
- debug13(printf("Found good mismatch splice %s on 3' end at %u with probability %f\n",
- Splicetype_string(splicetype3_mm),splice_genomepos_3_mm-chroffset,max_prob_3_mm));
- if (substringN->plusp) {
- *ambig_end_length_3 = substringN->genomicend - splice_genomepos_3_mm;
- debug13(printf("Set ambig_end_length_3 to be %d = %u - %u\n",*ambig_end_length_3,substringN->genomicend,splice_genomepos_3_mm));
- } else {
- *ambig_end_length_3 = splice_genomepos_3_mm - substringN->genomicend;
- debug13(printf("Set ambig_end_length_3 to be %d = %u - %u\n",*ambig_end_length_3,splice_genomepos_3_mm,substringN->genomicend));
- }
- *ambig_splicetype_3 = splicetype3_mm;
- *ambig_prob_3 = max_prob_3_mm;
+ if ((end3 = middle3 + END_SPLICESITE_SEARCH) > substringN->alignstart_trim - MIN_EXON_LENGTH) {
+ end3 = substringN->alignstart_trim - MIN_EXON_LENGTH;
}
+ debug13(printf("\n2 Set end points for 3' trim to be %u..%u..%u\n",
+ start3 - chroffset,middle3 - chroffset,end3 - chroffset));
}
-
/* start is distal, end is medial */
if (substring1 == NULL) {
/* Skip 5' end */
- } else if (substring1->plusp == true) {
- middle = substring1->alignstart_trim - 1;
- if ((start = middle - END_SPLICESITE_SEARCH) < substring1->genomicstart) {
- start = substring1->genomicstart;
- }
- if ((end = middle + END_SPLICESITE_SEARCH) > substring1->alignend_trim - MIN_EXON_LENGTH) {
- end = substring1->alignend_trim - MIN_EXON_LENGTH;
- }
- debug13(printf("\n1 Set end points for 5' trim to be %u..%u..%u\n",start,middle,end));
+ solve5p = false;
+ genomicstart5 = 0;
- } else {
- middle = substring1->alignstart_trim + 1;
- if ((start = middle + END_SPLICESITE_SEARCH) > substring1->genomicstart) {
- start = substring1->genomicstart;
- }
- if ((end = middle - END_SPLICESITE_SEARCH) < substring1->alignend_trim + MIN_EXON_LENGTH) {
- end = substring1->alignend_trim + MIN_EXON_LENGTH;
- }
- debug13(printf("\n2 Set end points for 5' trim to be %u..%u..%u\n",start,middle,end));
- }
-
- if (substring1 == NULL) {
- /* Skip 5' end */
- } else if (*sensedir == SENSE_FORWARD) {
- if (substring1->plusp) {
- splicetype5 = splicetype5_mm = ACCEPTOR;
-
- start_genomicpos = start;
- middle_genomicpos = middle;
- end_genomicpos = end;
-
- /* assert(start_genomicpos <= end_genomicpos); */
- genomicpos = start_genomicpos;
- while (genomicpos <= middle_genomicpos) {
- acceptor_prob = Maxent_hr_acceptor_prob(genomicpos,chroffset); /* Case 2 */
- debug13(printf("5', watson, sense forward %u %u %f mm\n",genomicpos,genomicpos-chroffset,acceptor_prob));
- if (acceptor_prob > max_prob_5_mm) {
- max_prob_5_mm = acceptor_prob;
- splice_genomepos_5_mm = genomicpos;
- }
- genomicpos++;
- }
- while (genomicpos <= end_genomicpos) {
- acceptor_prob = Maxent_hr_acceptor_prob(genomicpos,chroffset); /* Case 2 */
- debug13(printf("5', watson, sense forward %u %u %f\n",genomicpos,genomicpos-chroffset,acceptor_prob));
- if (acceptor_prob > max_prob_5) {
- max_prob_5 = acceptor_prob;
- splice_genomepos_5 = genomicpos;
- }
- genomicpos++;
- }
- debug13(printf("\n"));
+ } else if ((plusp = substring1->plusp) == true) {
+ solve5p = true;
+ genomicstart5 = substring1->genomicstart;
- } else {
- splicetype5 = splicetype5_mm = ANTIACCEPTOR;
-
- start_genomicpos = start;
- middle_genomicpos = middle;
- end_genomicpos = end;
-
- /* assert(start_genomicpos >= end_genomicpos); */
- genomicpos = start_genomicpos;
- while (genomicpos >= middle_genomicpos) {
- acceptor_prob = Maxent_hr_antiacceptor_prob(genomicpos,chroffset); /* Case 4 */
- debug13(printf("5', crick, sense anti %u %u %f mm\n",genomicpos,genomicpos-chroffset,acceptor_prob));
- if (acceptor_prob > max_prob_5_mm) {
- max_prob_5_mm = acceptor_prob;
- splice_genomepos_5_mm = genomicpos;
- }
- genomicpos--;
- }
- while (genomicpos >= end_genomicpos) {
- acceptor_prob = Maxent_hr_antiacceptor_prob(genomicpos,chroffset); /* Case 4 */
- debug13(printf("5', crick, sense anti %u %u %f\n",genomicpos,genomicpos-chroffset,acceptor_prob));
- if (acceptor_prob > max_prob_5) {
- max_prob_5 = acceptor_prob;
- splice_genomepos_5 = genomicpos;
- }
- genomicpos--;
- }
- debug13(printf("\n"));
+ middle5 = substring1->alignstart_trim - 1;
+ if ((start5 = middle5 - END_SPLICESITE_SEARCH) < substring1->genomicstart) {
+ start5 = substring1->genomicstart;
}
-
- } else if (*sensedir == SENSE_ANTI) {
- if (substring1->plusp) {
- splicetype5 = splicetype5_mm = ANTIDONOR;
-
- start_genomicpos = start;
- middle_genomicpos = middle;
- end_genomicpos = end;
-
- /* assert(start_genomicpos <= end_genomicpos); */
- genomicpos = start_genomicpos;
- while (genomicpos <= middle_genomicpos) {
- donor_prob = Maxent_hr_antidonor_prob(genomicpos,chroffset); /* Case 6 */
- debug13(printf("5', watson, sense anti %u %u %f mm\n",genomicpos,genomicpos-chroffset,donor_prob));
- if (donor_prob > max_prob_5_mm) {
- max_prob_5_mm = donor_prob;
- splice_genomepos_5_mm = genomicpos;
- }
- genomicpos++;
- }
- while (genomicpos <= end_genomicpos) {
- donor_prob = Maxent_hr_antidonor_prob(genomicpos,chroffset); /* Case 6 */
- debug13(printf("5', watson, sense anti %u %u %f\n",genomicpos,genomicpos-chroffset,donor_prob));
- if (donor_prob > max_prob_5) {
- max_prob_5 = donor_prob;
- splice_genomepos_5 = genomicpos;
- }
- genomicpos++;
- }
- debug13(printf("\n"));
-
- } else {
- splicetype5 = splicetype5_mm = DONOR;
-
- start_genomicpos = start;
- middle_genomicpos = middle;
- end_genomicpos = end;
-
- /* assert(start_genomicpos >= end_genomicpos); */
- genomicpos = start_genomicpos;
- while (genomicpos >= middle_genomicpos) {
- donor_prob = Maxent_hr_donor_prob(genomicpos,chroffset); /* Case 8 */
- debug13(printf("5', crick, sense forward %u %u %f mm\n",genomicpos,genomicpos-chroffset,donor_prob));
- if (donor_prob > max_prob_5_mm) {
- max_prob_5_mm = donor_prob;
- splice_genomepos_5_mm = genomicpos;
- }
- genomicpos--;
- }
- while (genomicpos >= end_genomicpos) {
- donor_prob = Maxent_hr_donor_prob(genomicpos,chroffset); /* Case 8 */
- debug13(printf("5', crick, sense forward %u %u %f\n",genomicpos,genomicpos-chroffset,donor_prob));
- if (donor_prob > max_prob_5) {
- max_prob_5 = donor_prob;
- splice_genomepos_5 = genomicpos;
- }
- genomicpos--;
- }
- debug13(printf("\n"));
+ if ((end5 = middle5 + END_SPLICESITE_SEARCH) > substring1->alignend_trim - MIN_EXON_LENGTH) {
+ end5 = substring1->alignend_trim - MIN_EXON_LENGTH;
}
-
+ debug13(printf("\n1 Set end points for 5' trim to be %u..%u..%u\n",
+ start5 - chroffset,middle5 - chroffset,end5 - chroffset));
+
} else {
- if (substring1->plusp) {
- start_genomicpos = start;
- middle_genomicpos = middle;
- end_genomicpos = end;
-
- /* assert(start_genomicpos <= end_genomicpos); */
- genomicpos = start_genomicpos;
- while (genomicpos <= middle_genomicpos) {
- acceptor_prob = Maxent_hr_acceptor_prob(genomicpos,chroffset); /* Case 2 */
- donor_prob = Maxent_hr_antidonor_prob(genomicpos,chroffset); /* Case 6 */
- debug13(printf("5', watson, sense null %u %u %f %f mm\n",genomicpos,genomicpos-chroffset,donor_prob,acceptor_prob));
- if (acceptor_prob > max_prob_sense_forward_5_mm) {
- max_prob_sense_forward_5_mm = acceptor_prob;
- if (acceptor_prob > max_prob_5_mm) {
- max_prob_5_mm = acceptor_prob;
- splice_genomepos_5_mm = genomicpos;
- /* splice_cdna_direction_5_mm = +1; */
- splice_sensedir_5_mm = SENSE_FORWARD;
- splicetype5_mm = ACCEPTOR;
- }
- }
- if (donor_prob > max_prob_sense_anti_5_mm) {
- max_prob_sense_anti_5_mm = donor_prob;
- if (donor_prob > max_prob_5_mm) {
- max_prob_5_mm = donor_prob;
- splice_genomepos_5_mm = genomicpos;
- /* splice_cdna_direction_5_mm = -1; */
- splice_sensedir_5_mm = SENSE_ANTI;
- splicetype5_mm = ANTIDONOR;
- }
- }
- genomicpos++;
- }
- while (genomicpos <= end_genomicpos) {
- acceptor_prob = Maxent_hr_acceptor_prob(genomicpos,chroffset); /* Case 2 */
- donor_prob = Maxent_hr_antidonor_prob(genomicpos,chroffset); /* Case 6 */
- debug13(printf("5', watson, sense null %u %u %f %f\n",genomicpos,genomicpos-chroffset,donor_prob,acceptor_prob));
- if (acceptor_prob > max_prob_sense_forward_5) {
- max_prob_sense_forward_5 = acceptor_prob;
- if (acceptor_prob > max_prob_5) {
- max_prob_5 = acceptor_prob;
- splice_genomepos_5 = genomicpos;
- /* splice_cdna_direction_5 = +1; */
- splice_sensedir_5 = SENSE_FORWARD;
- splicetype5 = ACCEPTOR;
- }
- }
- if (donor_prob > max_prob_sense_anti_5) {
- max_prob_sense_anti_5 = donor_prob;
- if (donor_prob > max_prob_5) {
- max_prob_5 = donor_prob;
- splice_genomepos_5 = genomicpos;
- /* splice_cdna_direction_5 = -1; */
- splice_sensedir_5 = SENSE_ANTI;
- splicetype5 = ANTIDONOR;
- }
- }
- genomicpos++;
- }
- debug13(printf("\n"));
+ solve5p = true;
+ genomicstart5 = substring1->genomicstart;
- } else {
- start_genomicpos = start; /* check */
- middle_genomicpos = middle;
- end_genomicpos = end;
-
- /* assert(start_genomicpos >= end_genomicpos); */
- genomicpos = start_genomicpos;
- while (genomicpos >= middle_genomicpos) {
- acceptor_prob = Maxent_hr_antiacceptor_prob(genomicpos,chroffset); /* Case 4 */
- donor_prob = Maxent_hr_donor_prob(genomicpos,chroffset); /* Case 8 */
- debug13(printf("5', crick, sense null %u %u %f %f mm\n",genomicpos,genomicpos-chroffset,donor_prob,acceptor_prob));
- if (acceptor_prob > max_prob_sense_forward_5_mm) {
- max_prob_sense_forward_5_mm = acceptor_prob;
- if (acceptor_prob > max_prob_5_mm) {
- max_prob_5_mm = acceptor_prob;
- splice_genomepos_5_mm = genomicpos;
- /* splice_cdna_direction_5_mm = +1; */
- splice_sensedir_5_mm = SENSE_FORWARD;
- splicetype5_mm = ANTIACCEPTOR;
- }
- }
- if (donor_prob > max_prob_sense_anti_5_mm) {
- max_prob_sense_anti_5_mm = donor_prob;
- if (donor_prob > max_prob_5_mm) {
- max_prob_5_mm = donor_prob;
- splice_genomepos_5_mm = genomicpos;
- /* splice_cdna_direction_5_mm = -1; */
- splice_sensedir_5_mm = SENSE_ANTI;
- splicetype5_mm = DONOR;
- }
- }
- genomicpos--;
- }
- while (genomicpos >= end_genomicpos) {
- acceptor_prob = Maxent_hr_antiacceptor_prob(genomicpos,chroffset); /* Case 4 */
- donor_prob = Maxent_hr_donor_prob(genomicpos,chroffset); /* Case 8 */
- debug13(printf("5', crick, sense null %u %u %f %f\n",genomicpos,genomicpos-chroffset,donor_prob,acceptor_prob));
- if (acceptor_prob > max_prob_sense_forward_5) {
- max_prob_sense_forward_5 = acceptor_prob;
- if (acceptor_prob > max_prob_5) {
- max_prob_5 = acceptor_prob;
- splice_genomepos_5 = genomicpos;
- /* splice_cdna_direction_5 = +1; */
- splice_sensedir_5 = SENSE_FORWARD;
- splicetype5 = ANTIACCEPTOR;
- }
- }
- if (donor_prob > max_prob_sense_anti_5) {
- max_prob_sense_anti_5 = donor_prob;
- if (donor_prob > max_prob_5) {
- max_prob_5 = donor_prob;
- splice_genomepos_5 = genomicpos;
- /* splice_cdna_direction_5 = -1; */
- splice_sensedir_5 = SENSE_ANTI;
- splicetype5 = DONOR;
- }
- }
- genomicpos--;
- }
- debug13(printf("\n"));
+ middle5 = substring1->alignstart_trim + 1;
+ if ((start5 = middle5 + END_SPLICESITE_SEARCH) > substring1->genomicstart) {
+ start5 = substring1->genomicstart;
}
- }
-
- if (substring1 == NULL) {
- /* Skip 5' end */
- } else if (*sensedir != SENSE_NULL) {
- if (max_prob_5 > END_SPLICESITE_PROB_MATCH) {
- debug13(printf("Found good splice %s on 5' end at %u with probability %f\n",
- Splicetype_string(splicetype5),splice_genomepos_5-chroffset,max_prob_5));
- if (substring1->plusp) {
- *ambig_end_length_5 = splice_genomepos_5 - substring1->genomicstart;
- debug13(printf("1 Set ambig_end_length_5 to be %d = %u - %u\n",*ambig_end_length_5,splice_genomepos_5,substring1->genomicstart));
- } else {
- *ambig_end_length_5 = substring1->genomicstart - splice_genomepos_5;
- debug13(printf("2 Set ambig_end_length_5 to be %d = %u - %u\n",*ambig_end_length_5,substring1->genomicstart,splice_genomepos_5));
- }
- *ambig_splicetype_5 = splicetype5;
- *ambig_prob_5 = max_prob_5;
- } else if (max_prob_5_mm > END_SPLICESITE_PROB_MISMATCH) {
- debug13(printf("Found good mismatch splice %s on 5' end at %u with probability %f\n",
- Splicetype_string(splicetype5_mm),splice_genomepos_5_mm-chroffset,max_prob_5_mm));
- if (substring1->plusp) {
- *ambig_end_length_5 = splice_genomepos_5_mm - substring1->genomicstart;
- debug13(printf("3 Set ambig_end_length_5 to be %d = %u - %u\n",*ambig_end_length_5,splice_genomepos_5_mm,substring1->genomicstart));
- } else {
- *ambig_end_length_5 = substring1->genomicstart - splice_genomepos_5_mm;
- debug13(printf("4 Set ambig_end_length_5 to be %d = %u - %u\n",*ambig_end_length_5,substring1->genomicstart,splice_genomepos_5_mm));
- }
- *ambig_splicetype_5 = splicetype5_mm;
- *ambig_prob_5 = max_prob_5_mm;
+ if ((end5 = middle5 - END_SPLICESITE_SEARCH) < substring1->alignend_trim + MIN_EXON_LENGTH) {
+ end5 = substring1->alignend_trim + MIN_EXON_LENGTH;
}
+ debug13(printf("\n2 Set end points for 5' trim to be %u..%u..%u\n",
+ start5 - chroffset,middle5 - chroffset,end5 - chroffset));
}
- if (*sensedir == SENSE_NULL) {
- if (max_prob_3 >= END_SPLICESITE_PROB_MATCH || max_prob_5 >= END_SPLICESITE_PROB_MATCH) {
- if (max_prob_3 >= END_SPLICESITE_PROB_MATCH && max_prob_5 >= END_SPLICESITE_PROB_MATCH
- && max_prob_sense_forward_3 >= END_SPLICESITE_PROB_MATCH && max_prob_sense_anti_3 < END_SPLICESITE_PROB_MATCH
- && max_prob_sense_forward_5 >= END_SPLICESITE_PROB_MATCH && max_prob_sense_anti_5 < END_SPLICESITE_PROB_MATCH) {
- /* Forward sense wins on both sides */
- if (substringN->plusp) {
- *ambig_end_length_3 = substringN->genomicend - splice_genomepos_3;
- *ambig_end_length_5 = splice_genomepos_5 - substring1->genomicstart;
- } else {
- *ambig_end_length_3 = splice_genomepos_3 - substringN->genomicend;
- *ambig_end_length_5 = substring1->genomicstart - splice_genomepos_5;
- }
- *ambig_splicetype_3 = splicetype3;
- *ambig_prob_3 = max_prob_3;
- debug13(printf("Set ambig_end_length_3 to be %d\n",*ambig_end_length_3));
- *ambig_splicetype_5 = splicetype5;
- *ambig_prob_5 = max_prob_5;
- debug13(printf("Set ambig_end_length_5 to be %d\n",*ambig_end_length_5));
- *sensedir = SENSE_FORWARD; /* = splice_sensedir_3 */
-
- } else if (max_prob_3 >= END_SPLICESITE_PROB_MATCH && max_prob_5 >= END_SPLICESITE_PROB_MATCH
- && max_prob_sense_anti_3 >= END_SPLICESITE_PROB_MATCH && max_prob_sense_forward_3 < END_SPLICESITE_PROB_MATCH
- && max_prob_sense_anti_5 >= END_SPLICESITE_PROB_MATCH && max_prob_sense_forward_5 < END_SPLICESITE_PROB_MATCH) {
- /* Anti sense wins on both sides */
- if (substringN->plusp) {
- *ambig_end_length_3 = substringN->genomicend - splice_genomepos_3;
- *ambig_end_length_5 = splice_genomepos_5 - substring1->genomicstart;
- } else {
- *ambig_end_length_3 = splice_genomepos_3 - substringN->genomicend;
- *ambig_end_length_5 = substring1->genomicstart - splice_genomepos_5;
- }
- *ambig_splicetype_3 = splicetype3;
- *ambig_prob_3 = max_prob_3;
- debug13(printf("Set ambig_end_length_3 to be %d\n",*ambig_end_length_3));
- *ambig_splicetype_5 = splicetype5;
- *ambig_prob_5 = max_prob_5;
- debug13(printf("Set ambig_end_length_5 to be %d\n",*ambig_end_length_5));
- *sensedir = SENSE_ANTI; /* = splice_sensedir_3 */
-
- } else if (max_prob_3 > max_prob_5) {
- /* Consider just 3' end */
- debug13(printf("Found good splice %s on 3' end at %u with probability %f\n",
- Splicetype_string(splicetype3),splice_genomepos_3-chroffset,max_prob_3));
- if (substringN->plusp) {
- *ambig_end_length_3 = substringN->genomicend - splice_genomepos_3;
- } else {
- *ambig_end_length_3 = splice_genomepos_3 - substringN->genomicend;
- }
- *ambig_splicetype_3 = splicetype3;
- *ambig_prob_3 = max_prob_3;
- /* *cdna_direction = splice_cdna_direction_3; */
- debug13(printf("Set ambig_end_length_3 to be %d\n",*ambig_end_length_3));
- if (max_prob_sense_forward_3 >= END_SPLICESITE_PROB_MATCH && max_prob_sense_anti_3 < END_SPLICESITE_PROB_MATCH
- && max_prob_sense_anti_5 < END_SPLICESITE_PROB_MATCH) {
- *sensedir = splice_sensedir_3;
- } else if (max_prob_sense_anti_3 >= END_SPLICESITE_PROB_MATCH && max_prob_sense_forward_3 < END_SPLICESITE_PROB_MATCH
- && max_prob_sense_forward_5 < END_SPLICESITE_PROB_MATCH) {
- *sensedir = splice_sensedir_3;
- } else {
- /* Not enough evidence to set sensedir */
- }
-
- } else {
- /* Consider just 5' end */
- debug13(printf("Found good splice %s on 5' end at %u with probability %f\n",
- Splicetype_string(splicetype5),splice_genomepos_5-chroffset,max_prob_5));
- if (substring1->plusp) {
- *ambig_end_length_5 = splice_genomepos_5 - substring1->genomicstart;
- debug13(printf("5 Set ambig_end_length_5 to be %d = %u - %u\n",*ambig_end_length_5,splice_genomepos_5,substring1->genomicstart));
- } else {
- *ambig_end_length_5 = substring1->genomicstart - splice_genomepos_5;
- debug13(printf("6 Set ambig_end_length_5 to be %d = %u - %u\n",*ambig_end_length_5,substring1->genomicstart,splice_genomepos_5));
- }
- *ambig_splicetype_5 = splicetype5;
- *ambig_prob_5 = max_prob_5;
- /* *cdna_direction = splice_cdna_direction_5; */
- if (max_prob_sense_forward_5 >= END_SPLICESITE_PROB_MATCH && max_prob_sense_anti_5 < END_SPLICESITE_PROB_MATCH
- && max_prob_sense_anti_3 < END_SPLICESITE_PROB_MATCH) {
- *sensedir = splice_sensedir_5;
- } else if (max_prob_sense_anti_5 >= END_SPLICESITE_PROB_MATCH && max_prob_sense_forward_5 < END_SPLICESITE_PROB_MATCH
- && max_prob_sense_forward_3 < END_SPLICESITE_PROB_MATCH) {
- *sensedir = splice_sensedir_5;
- } else {
- /* Not enough evidence to set sensedir */
- }
- }
-
- } else if (max_prob_3_mm >= END_SPLICESITE_PROB_MISMATCH || max_prob_5_mm >= END_SPLICESITE_PROB_MISMATCH) {
- if (max_prob_3_mm > max_prob_5_mm) {
- debug13(printf("Found good mismatch splice %s on 3' end at %u with probability %f\n",
- Splicetype_string(splicetype3_mm),splice_genomepos_3_mm-chroffset,max_prob_3_mm));
- if (substringN->plusp) {
- *ambig_end_length_3 = substringN->genomicend - splice_genomepos_3_mm;
- debug13(printf("Set ambig_end_length_3 to be %d = %u - %u\n",*ambig_end_length_3,substringN->genomicend,splice_genomepos_3_mm));
- } else {
- *ambig_end_length_3 = splice_genomepos_3_mm - substringN->genomicend;
- debug13(printf("Set ambig_end_length_3 to be %d = %u - %u\n",*ambig_end_length_3,splice_genomepos_3_mm,substringN->genomicend));
- }
- *ambig_splicetype_3 = splicetype3_mm;
- *ambig_prob_3 = max_prob_3_mm;
- /* *cdna_direction = splice_cdna_direction_3_mm; */
- if (max_prob_sense_forward_3_mm >= END_SPLICESITE_PROB_MISMATCH && max_prob_sense_anti_3_mm < END_SPLICESITE_PROB_MISMATCH
- && max_prob_sense_anti_5_mm < END_SPLICESITE_PROB_MISMATCH) {
- *sensedir = splice_sensedir_3_mm;
- } else if (max_prob_sense_anti_3_mm >= END_SPLICESITE_PROB_MISMATCH && max_prob_sense_forward_3_mm < END_SPLICESITE_PROB_MISMATCH
- && max_prob_sense_forward_5_mm < END_SPLICESITE_PROB_MISMATCH) {
- *sensedir = splice_sensedir_3_mm;
- } else {
- /* Not enough evidence to set sensedir */
- }
- } else {
- debug13(printf("Found good mismatch splice %s on 5' end at %u with probability %f\n",
- Splicetype_string(splicetype5_mm),splice_genomepos_5_mm-chroffset,max_prob_5_mm));
- if (substring1->plusp) {
- *ambig_end_length_5 = splice_genomepos_5_mm - substring1->genomicstart;
- debug13(printf("7 Set ambig_end_length_5 to be %d = %u - %u\n",*ambig_end_length_5,splice_genomepos_5_mm,substring1->genomicstart));
- } else {
- *ambig_end_length_5 = substring1->genomicstart - splice_genomepos_5_mm;
- debug13(printf("8 Set ambig_end_length_5 to be %d = %u - %u\n",*ambig_end_length_5,substring1->genomicstart,splice_genomepos_5_mm));
- }
- *ambig_splicetype_5 = splicetype5_mm;
- *ambig_prob_5 = max_prob_5_mm;
- /* *cdna_direction = splice_cdna_direction_5_mm; */
- if (max_prob_sense_forward_5_mm >= END_SPLICESITE_PROB_MISMATCH && max_prob_sense_anti_5_mm < END_SPLICESITE_PROB_MISMATCH
- && max_prob_sense_anti_3_mm < END_SPLICESITE_PROB_MISMATCH) {
- *sensedir = splice_sensedir_5_mm;
- } else if (max_prob_sense_anti_5_mm >= END_SPLICESITE_PROB_MISMATCH && max_prob_sense_forward_5_mm < END_SPLICESITE_PROB_MISMATCH
- && max_prob_sense_forward_3_mm < END_SPLICESITE_PROB_MISMATCH) {
- *sensedir = splice_sensedir_5_mm;
- } else {
- /* Not enough evidence to set sensedir */
- }
- }
- }
- }
+ Splice_trim_novel_spliceends(&(*ambig_end_length_5),&(*ambig_end_length_3),
+ &(*ambig_splicetype_5),&(*ambig_splicetype_3),
+ &(*ambig_prob_5),&(*ambig_prob_3),&(*sensedir),
+ start5,middle5,end5,solve5p,start3,middle3,end3,solve3p,
+ genomicstart5,genomicend3,chroffset,plusp);
debug13(printf("Returning ambig_end_length_5 %d and ambig_end_length_3 %d, probs %f and %f\n",
*ambig_end_length_5,*ambig_end_length_3,*ambig_prob_5,*ambig_prob_3));
@@ -2597,7 +1921,6 @@ substring_trim_novel_spliceends (T substring1, T substringN, int *ambig_end_leng
}
-
/* Want querylength and not querylength_adj */
T
Substring_new (int nmismatches_whole, Chrnum_T chrnum, Univcoord_T chroffset,
@@ -2864,6 +2187,7 @@ Substring_new (int nmismatches_whole, Chrnum_T chrnum, Univcoord_T chroffset,
&ambig_splicetype_5,&ambig_splicetype_3,&ambig_prob_5,&ambig_prob_3,
&sensedir,chroffset);
if (ambig_end_length_5 > 0) {
+ /* Revise trim_left to be at the splice site */
new->trim_left_splicep = true;
/* new->querystart += (ambig_end_length_5 - new->trim_left); */
/* new->alignstart_trim += (ambig_end_length_5 - new->trim_left); */
@@ -2880,6 +2204,7 @@ Substring_new (int nmismatches_whole, Chrnum_T chrnum, Univcoord_T chroffset,
}
}
if (ambig_end_length_3 > 0) {
+ /* Revise trim_right to be at the splice site */
new->trim_right_splicep = true;
/* new->queryend -= (ambig_end_length_3 - new->trim_right); */
/* new->alignend_trim -= (ambig_end_length_3 - new->trim_right); */
@@ -2901,6 +2226,7 @@ Substring_new (int nmismatches_whole, Chrnum_T chrnum, Univcoord_T chroffset,
&ambig_splicetype_5,&ambig_splicetype_3,&ambig_prob_5,&ambig_prob_3,
&sensedir,chroffset);
if (ambig_end_length_5 > 0) {
+ /* Revise trim_left to be at the splice site */
new->trim_left_splicep = true;
/* new->querystart += (ambig_end_length_5 - new->trim_left); */
/* new->alignstart_trim += (ambig_end_length_5 - new->trim_left); */
@@ -2922,6 +2248,7 @@ Substring_new (int nmismatches_whole, Chrnum_T chrnum, Univcoord_T chroffset,
&ambig_splicetype_5,&ambig_splicetype_3,&ambig_prob_5,&ambig_prob_3,
&sensedir,chroffset);
if (ambig_end_length_3 > 0) {
+ /* Revise trim_right to be at the splice site */
new->trim_right_splicep = true;
/* new->queryend -= (ambig_end_length_3 - new->trim_right); */
/* new->alignend_trim -= (ambig_end_length_3 - new->trim_right); */
@@ -2971,6 +2298,7 @@ Substring_new (int nmismatches_whole, Chrnum_T chrnum, Univcoord_T chroffset,
&ambig_splicetype_5,&ambig_splicetype_3,&ambig_prob_5,&ambig_prob_3,
&sensedir,chroffset);
if (ambig_end_length_5 > 0) {
+ /* Revise trim_left to be at the splice site */
new->trim_left_splicep = true;
/* new->querystart += (ambig_end_length_5 - new->trim_left); */
/* new->alignstart_trim -= (ambig_end_length_5 - new->trim_left); */
@@ -2987,6 +2315,7 @@ Substring_new (int nmismatches_whole, Chrnum_T chrnum, Univcoord_T chroffset,
}
}
if (ambig_end_length_3 > 0) {
+ /* Revise trim_right to be at the splice site */
new->trim_right_splicep = true;
/* new->queryend -= (ambig_end_length_3 - new->trim_right); */
/* new->alignend_trim += (ambig_end_length_3 - new->trim_right); */
@@ -3008,6 +2337,7 @@ Substring_new (int nmismatches_whole, Chrnum_T chrnum, Univcoord_T chroffset,
&ambig_splicetype_5,&ambig_splicetype_3,&ambig_prob_5,&ambig_prob_3,
&sensedir,chroffset);
if (ambig_end_length_5 > 0) {
+ /* Revise trim_left to be at the splice site */
new->trim_left_splicep = true;
/* new->querystart += (ambig_end_length_5 - new->trim_left); */
/* new->alignstart_trim -= (ambig_end_length_5 - new->trim_left); */
@@ -3029,6 +2359,7 @@ Substring_new (int nmismatches_whole, Chrnum_T chrnum, Univcoord_T chroffset,
&ambig_splicetype_5,&ambig_splicetype_3,&ambig_prob_5,&ambig_prob_3,
&sensedir,chroffset);
if (ambig_end_length_3 > 0) {
+ /* Revise trim_right to be at the splice site */
new->trim_right_splicep = true;
/* new->queryend -= (ambig_end_length_3 - new->trim_right); */
/* new->alignend_trim += (ambig_end_length_3 - new->trim_right); */
@@ -3168,12 +2499,12 @@ Substring_new_ambig_D (int querystart, int queryend, int splice_pos, int queryle
new->genomic_refdiff = (char *) NULL;
if (substring1p == true) {
debug2(printf("substring1p is true, so setting trims to be %d and %d\n",querystart,0));
- new->trim_left = querystart;
+ new->trim_left = querystart; /* trim_left includes the ambiguous part */
new->trim_right = 0;
} else {
debug2(printf("substring1p is false, so setting trims to be %d and %d\n",0,querylength - queryend));
new->trim_left = 0;
- new->trim_right = querylength - queryend;
+ new->trim_right = querylength - queryend; /* trim_right includes the ambiguous part */
}
new->mandatory_trim_left = 0;
new->mandatory_trim_right = 0;
@@ -3276,12 +2607,12 @@ Substring_new_ambig_A (int querystart, int queryend, int splice_pos, int queryle
new->genomic_refdiff = (char *) NULL;
if (substring1p == true) {
debug2(printf("substring1p is true, so setting trims to be %d and %d\n",querystart,0));
- new->trim_left = querystart;
+ new->trim_left = querystart; /* trim_left includes the ambiguous part */
new->trim_right = 0;
} else {
debug2(printf("substring1p is false, so setting trims to be %d and %d\n",0,querylength - queryend));
new->trim_left = 0;
- new->trim_right = querylength - queryend;
+ new->trim_right = querylength - queryend; /* trim_right includes the ambiguous part */
}
new->mandatory_trim_left = 0;
new->mandatory_trim_right = 0;
@@ -3333,7 +2664,7 @@ Substring_set_unambiguous (double *donor_prob, double *acceptor_prob, Univcoord_
T this, int bingoi) {
#ifdef DEBUG2
- printf("Entered Substring_set_unambiguous. plusp %d",this->plusp);
+ printf("Entered Substring_set_unambiguous for %d..%d. plusp %d",this->querystart,this->queryend,this->plusp);
if (this->amb_type == DON) {
printf("type DON\n");
} else {
@@ -3411,14 +2742,22 @@ Substring_compute_mapq (T this, Compress_T query_compress, char *quality_string,
int i;
/* mapq */
+#if 0
mapq_start = this->querystart_orig;
mapq_end = this->queryend_orig;
+#else
+ /* Need trimmed regions to make this calculation equivalent to Pair_compute_mapq */
+ mapq_start = this->querystart;
+ mapq_end = this->queryend;
+#endif
/* It appears from simulated reads that it is better not to trim in
computing MAPQ. The correct mapping then tends to be selected
with a higher MAPQ score. */
/* But if all ends are terminals, then terminal parts should not be
included in MAPQ scoring */
+
+#if 0
if (trim_terminals_p == true) {
if (this->start_endtype == TERM) {
mapq_start += this->trim_left;
@@ -3427,6 +2766,7 @@ Substring_compute_mapq (T this, Compress_T query_compress, char *quality_string,
mapq_end -= this->trim_right;
}
}
+#endif
if (this->exactp == true) {
/* this->mapq_loglik = MAPQ_loglik_exact(quality_string,0,querylength); */
@@ -3732,18 +3072,34 @@ Substring_nmismatches_refdiff (T this) {
}
int
-Substring_nmismatches_region (T this) {
- return this->queryend - this->querystart - this->nmatches;
+Substring_nmatches_posttrim (T this) {
+ if (this->ambiguous_p == true) {
+ return 0;
+ } else {
+ return this->queryend - this->querystart - this->nmismatches_bothdiff;
+ }
}
+/* nmatches_posttrim plus amb_length */
int
Substring_nmatches (T this) {
- return this->nmatches;
-}
+ int amb_length;
-int
-Substring_nmatches_posttrim (T this) {
- return this->queryend - this->querystart - this->nmismatches_bothdiff;
+ if (this->ambiguous_p == false) {
+ /* Add the amb_length part */
+ amb_length = 0;
+ if (this->trim_left_splicep == true) {
+ amb_length += this->trim_left;
+ }
+ if (this->trim_right_splicep == true) {
+ amb_length += this->trim_right;
+ }
+ return amb_length + /*nmatches_posttrim*/ (this->queryend - this->querystart - this->nmismatches_bothdiff);
+
+ } else {
+ /* Include the entire ambiguous substring */
+ return (this->queryend - this->querystart);
+ }
}
@@ -3856,11 +3212,60 @@ Substring_match_length (T this) {
}
}
+/* Mapped and unmapped */
int
-Substring_match_length_amb (T this) {
+Substring_amb_length (T this) {
+ int amb_length;
+
if (this->ambiguous_p == false) {
- return 0;
+ /* For GSNAP algorithm, we represent trimmed ambiguous ends using these fields */
+ amb_length = 0;
+ if (this->trim_left_splicep == true) {
+ amb_length += this->trim_left;
+ }
+ if (this->trim_right_splicep == true) {
+ amb_length += this->trim_right;
+ }
+ return amb_length;
+
+ } else {
+ /* For substrings algorithm, we represent known ambiguous ends with a substring having ambiguous_p == true */
+ return this->queryend - this->querystart;
+ }
+}
+
+
+/* Mapped and unmapped */
+int
+Substring_start_amb_length (T this) {
+ if (this->ambiguous_p == false) {
+ /* For GSNAP algorithm, we represent trimmed ambiguous ends using these fields */
+ if (this->trim_left_splicep == true) {
+ return this->trim_left;
+ } else {
+ return 0;
+ }
+
} else {
+ /* For substrings algorithm, we represent known ambiguous ends with a substring having ambiguous_p == true */
+ return this->queryend - this->querystart;
+ }
+}
+
+
+/* Mapped and unmapped */
+int
+Substring_end_amb_length (T this) {
+ if (this->ambiguous_p == false) {
+ /* For GSNAP algorithm, we represent trimmed ambiguous ends using these fields */
+ if (this->trim_right_splicep == true) {
+ return this->trim_right;
+ } else {
+ return 0;
+ }
+
+ } else {
+ /* For substrings algorithm, we represent known ambiguous ends with a substring having ambiguous_p == true */
return this->queryend - this->querystart;
}
}
@@ -4459,15 +3864,16 @@ Substring_new_donor (Univcoord_T donor_coord, int donor_knowni, int donor_pos,
start_endtype = END;
end_endtype = DON;
- querystart = substring_querystart; /* 0, for an end piece */
queryend = donor_pos;
#if 0
+ querystart = substring_querystart; /* 0, for an end piece */
if (querystart == 0) {
trim_left_action = COMPUTE_TRIM; /* querystart == 0 */
} else {
trim_left_action = PRE_TRIMMED;
}
#else
+ querystart = 0;
trim_left_action = COMPUTE_TRIM;
#endif
trim_right_action = NO_TRIM;
@@ -4477,15 +3883,16 @@ Substring_new_donor (Univcoord_T donor_coord, int donor_knowni, int donor_pos,
end_endtype = END;
querystart = donor_pos;
- queryend = substring_queryend; /* querylength, for an end piece */
trim_left_action = NO_TRIM;
#if 0
+ queryend = substring_queryend; /* querylength, for an end piece */
if (queryend == querylength) {
trim_right_action = COMPUTE_TRIM; /* queryend == querylength */
} else {
trim_right_action = PRE_TRIMMED;
}
#else
+ queryend = querylength;
trim_right_action = COMPUTE_TRIM;
#endif
@@ -4503,15 +3910,16 @@ Substring_new_donor (Univcoord_T donor_coord, int donor_knowni, int donor_pos,
start_endtype = END;
end_endtype = DON;
- querystart = substring_querystart; /* 0, for an end piece */
queryend = querylength - donor_pos;
#if 0
+ querystart = substring_querystart; /* 0, for an end piece */
if (querystart == 0) {
trim_left_action = COMPUTE_TRIM; /* querystart == 0 */
} else {
trim_left_action = PRE_TRIMMED;
}
#else
+ querystart = 0;
trim_left_action = COMPUTE_TRIM;
#endif
trim_right_action = NO_TRIM;
@@ -4521,15 +3929,16 @@ Substring_new_donor (Univcoord_T donor_coord, int donor_knowni, int donor_pos,
end_endtype = END;
querystart = querylength - donor_pos;
- queryend = substring_queryend; /* querylength, for an end piece */
trim_left_action = NO_TRIM;
#if 0
+ queryend = substring_queryend; /* querylength, for an end piece */
if (queryend == querylength) {
trim_right_action = COMPUTE_TRIM; /* queryend == querylength */
} else {
trim_right_action = PRE_TRIMMED;
}
#else
+ queryend = querylength;
trim_right_action = COMPUTE_TRIM;
#endif
@@ -4608,15 +4017,16 @@ Substring_new_acceptor (Univcoord_T acceptor_coord, int acceptor_knowni, int acc
end_endtype = END;
querystart = acceptor_pos;
- queryend = substring_queryend; /* querylength, for an end piece */
trim_left_action = NO_TRIM;
#if 0
+ queryend = substring_queryend; /* querylength, for an end piece */
if (queryend == querylength) {
trim_right_action = COMPUTE_TRIM; /* queryend == querylength */
} else {
trim_right_action = PRE_TRIMMED;
}
#else
+ queryend = querylength;
trim_right_action = COMPUTE_TRIM;
#endif
@@ -4624,15 +4034,16 @@ Substring_new_acceptor (Univcoord_T acceptor_coord, int acceptor_knowni, int acc
start_endtype = END;
end_endtype = ACC;
- querystart = substring_querystart; /* 0, for an end piece */
queryend = acceptor_pos;
#if 0
+ querystart = substring_querystart; /* 0, for an end piece */
if (querystart == 0) {
trim_left_action = COMPUTE_TRIM; /* querystart == 0 */
} else {
trim_left_action = PRE_TRIMMED;
}
#else
+ querystart = 0;
trim_left_action = COMPUTE_TRIM;
#endif
trim_right_action = NO_TRIM;
@@ -4652,15 +4063,16 @@ Substring_new_acceptor (Univcoord_T acceptor_coord, int acceptor_knowni, int acc
end_endtype = END;
querystart = querylength - acceptor_pos;
- queryend = substring_queryend; /* querylength, for an end piece */
trim_left_action = NO_TRIM;
#if 0
+ queryend = substring_queryend; /* querylength, for an end piece */
if (queryend == querylength) {
trim_right_action = COMPUTE_TRIM; /* queryend == querylength */
} else {
trim_right_action = PRE_TRIMMED;
}
#else
+ queryend = querylength;
trim_right_action = COMPUTE_TRIM;
#endif
@@ -4668,15 +4080,16 @@ Substring_new_acceptor (Univcoord_T acceptor_coord, int acceptor_knowni, int acc
start_endtype = END;
end_endtype = ACC;
- querystart = substring_querystart; /* 0, for an end piece */
queryend = querylength - acceptor_pos;
#if 0
+ querystart = substring_querystart; /* 0, for an end piece */
if (querystart == 0) {
trim_left_action = COMPUTE_TRIM; /* querystart == 0 */
} else {
trim_left_action = PRE_TRIMMED;
}
#else
+ querystart = 0;
trim_left_action = COMPUTE_TRIM;
#endif
trim_right_action = NO_TRIM;
diff --git a/src/substring.h b/src/substring.h
index 85356dd..3cbdfc6 100644
--- a/src/substring.h
+++ b/src/substring.h
@@ -1,4 +1,4 @@
-/* $Id: substring.h 196273 2016-08-12 15:15:06Z twu $ */
+/* $Id: substring.h 197775 2016-09-14 00:41:46Z twu $ */
#ifndef SUBSTRING_INCLUDED
#define SUBSTRING_INCLUDED
@@ -24,7 +24,6 @@
#else
#include "uintlist.h"
#endif
-#include "splicetrie_build.h" /* For Splicetype_T */
typedef enum {GMAP_NOT_APPLICABLE, GMAP_VIA_SUBSTRINGS, GMAP_VIA_SEGMENTS, GMAP_VIA_REGION} GMAP_source_T;
@@ -153,11 +152,9 @@ Substring_nmismatches_bothdiff (T this);
extern int
Substring_nmismatches_refdiff (T this);
extern int
-Substring_nmismatches_region (T this);
+Substring_nmatches_posttrim (T this);
extern int
Substring_nmatches (T this);
-extern int
-Substring_nmatches_posttrim (T this);
extern void
Substring_set_nmismatches_terminal (T this, int nmismatches_whole, int nmismatches_bothdiff);
extern Endtype_T
@@ -196,7 +193,12 @@ Substring_querylength (T this);
extern int
Substring_match_length (T this);
extern int
-Substring_match_length_amb (T this);
+Substring_amb_length (T this);
+extern int
+Substring_start_amb_length (T this);
+extern int
+Substring_end_amb_length (T this);
+
extern int
Substring_match_length_orig (T this);
extern Chrpos_T
diff --git a/src/uniqscan.c b/src/uniqscan.c
index fc13f9a..d5c3fbe 100644
--- a/src/uniqscan.c
+++ b/src/uniqscan.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: uniqscan.c 196438 2016-08-16 20:23:27Z twu $";
+static char rcsid[] = "$Id: uniqscan.c 197391 2016-09-03 00:43:23Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -1062,7 +1062,7 @@ main (int argc, char *argv[]) {
modedir,fileroot,/*idx_filesuffix*/"metct",/*snps_root*/NULL,
required_index1part,required_interval,
expand_offsets_p,offsetsstrm_access,positions_access,
- /*sharedp*/false,/*multiple_sequences_p*/false)) == NULL) {
+ /*sharedp*/false,/*multiple_sequences_p*/false,/*unload_shared_memory_p*/false)) == NULL) {
fprintf(stderr,"Cannot find metct index file. Need to run cmetindex first\n");
exit(9);
}
@@ -1071,7 +1071,7 @@ main (int argc, char *argv[]) {
modedir,fileroot,/*idx_filesuffix*/"metga",/*snps_root*/NULL,
required_index1part,required_interval,
expand_offsets_p,offsetsstrm_access,positions_access,
- /*sharedp*/false,/*multiple_sequences_p*/false)) == NULL) {
+ /*sharedp*/false,/*multiple_sequences_p*/false,/*unload_shared_memory_p*/false)) == NULL) {
fprintf(stderr,"Cannot find metga index file. Need to run cmetindex first\n");
exit(9);
}
@@ -1087,7 +1087,7 @@ main (int argc, char *argv[]) {
modedir,fileroot,/*idx_filesuffix*/"a2iag",/*snps_root*/NULL,
required_index1part,required_interval,
expand_offsets_p,offsetsstrm_access,positions_access,
- /*sharedp*/false,/*multiple_sequences_p*/false)) == NULL) {
+ /*sharedp*/false,/*multiple_sequences_p*/false,/*unload_shared_memory_p*/false)) == NULL) {
fprintf(stderr,"Cannot find a2iag index file. Need to run atoiindex first\n");
exit(9);
}
@@ -1096,7 +1096,7 @@ main (int argc, char *argv[]) {
modedir,fileroot,/*idx_filesuffix*/"a2itc",/*snps_root*/NULL,
required_index1part,required_interval,
expand_offsets_p,offsetsstrm_access,positions_access,
- /*sharedp*/false,/*multiple_sequences_p*/false)) == NULL) {
+ /*sharedp*/false,/*multiple_sequences_p*/false,/*unload_shared_memory_p*/false)) == NULL) {
fprintf(stderr,"Cannot find a2itc index file. Need to run atoiindex first\n");
exit(9);
}
@@ -1112,7 +1112,7 @@ main (int argc, char *argv[]) {
modedir,fileroot,/*idx_filesuffix*/"a2itc",/*snps_root*/NULL,
required_index1part,required_interval,
expand_offsets_p,offsetsstrm_access,positions_access,
- /*sharedp*/false,/*multiple_sequences_p*/false)) == NULL) {
+ /*sharedp*/false,/*multiple_sequences_p*/false,/*unload_shared_memory_p*/false)) == NULL) {
fprintf(stderr,"Cannot find a2itc index file. Need to run atoiindex first\n");
exit(9);
}
@@ -1121,7 +1121,7 @@ main (int argc, char *argv[]) {
modedir,fileroot,/*idx_filesuffix*/"a2iag",/*snps_root*/NULL,
required_index1part,required_interval,
expand_offsets_p,offsetsstrm_access,positions_access,
- /*sharedp*/false,/*multiple_sequences_p*/false)) == NULL) {
+ /*sharedp*/false,/*multiple_sequences_p*/false,/*unload_shared_memory_p*/false)) == NULL) {
fprintf(stderr,"Cannot find a2iag index file. Need to run atoiindex first\n");
exit(9);
}
@@ -1132,7 +1132,7 @@ main (int argc, char *argv[]) {
genomesubdir,fileroot,IDX_FILESUFFIX,/*snps_root*/NULL,
required_index1part,required_interval,
expand_offsets_p,offsetsstrm_access,positions_access,
- /*sharedp*/false,/*multiple_sequences_p*/false)) == NULL) {
+ /*sharedp*/false,/*multiple_sequences_p*/false,/*unload_shared_memory_p*/false)) == NULL) {
fprintf(stderr,"Cannot find offsets file %s.%s*offsets, needed for GSNAP\n",fileroot,IDX_FILESUFFIX);
exit(9);
}
@@ -1165,7 +1165,7 @@ main (int argc, char *argv[]) {
modedir,fileroot,/*idx_filesuffix*/"metct",snps_root,
required_index1part,required_interval,
expand_offsets_p,offsetsstrm_access,positions_access,
- /*sharedp*/false,/*multiple_sequences_p*/false)) == NULL) {
+ /*sharedp*/false,/*multiple_sequences_p*/false,/*unload_shared_memory_p*/false)) == NULL) {
fprintf(stderr,"Cannot find metct index file. Need to run cmetindex first\n");
exit(9);
}
@@ -1173,7 +1173,7 @@ main (int argc, char *argv[]) {
modedir,fileroot,/*idx_filesuffix*/"metga",snps_root,
required_index1part,required_interval,
expand_offsets_p,offsetsstrm_access,positions_access,
- /*sharedp*/false,/*multiple_sequences_p*/false)) == NULL) {
+ /*sharedp*/false,/*multiple_sequences_p*/false,/*unload_shared_memory_p*/false)) == NULL) {
fprintf(stderr,"Cannot find metga index file. Need to run cmetindex first\n");
exit(9);
}
@@ -1189,7 +1189,7 @@ main (int argc, char *argv[]) {
modedir,fileroot,/*idx_filesuffix*/"a2iag",snps_root,
required_index1part,required_interval,
expand_offsets_p,offsetsstrm_access,positions_access,
- /*sharedp*/false,/*multiple_sequences_p*/false)) == NULL) {
+ /*sharedp*/false,/*multiple_sequences_p*/false,/*unload_shared_memory_p*/false)) == NULL) {
fprintf(stderr,"Cannot find a2iag index file. Need to run atoiindex first\n");
exit(9);
}
@@ -1197,7 +1197,7 @@ main (int argc, char *argv[]) {
modedir,fileroot,/*idx_filesuffix*/"a2itc",snps_root,
required_index1part,required_interval,
expand_offsets_p,offsetsstrm_access,positions_access,
- /*sharedp*/false,/*multiple_sequences_p*/false)) == NULL) {
+ /*sharedp*/false,/*multiple_sequences_p*/false,/*unload_shared_memory_p*/false)) == NULL) {
fprintf(stderr,"Cannot find a2itc index file. Need to run atoiindex first\n");
exit(9);
}
@@ -1213,7 +1213,7 @@ main (int argc, char *argv[]) {
modedir,fileroot,/*idx_filesuffix*/"a2itc",snps_root,
required_index1part,required_interval,
expand_offsets_p,offsetsstrm_access,positions_access,
- /*sharedp*/false,/*multiple_sequences_p*/false)) == NULL) {
+ /*sharedp*/false,/*multiple_sequences_p*/false,/*unload_shared_memory_p*/false)) == NULL) {
fprintf(stderr,"Cannot find a2itc index file. Need to run atoiindex first\n");
exit(9);
}
@@ -1221,7 +1221,7 @@ main (int argc, char *argv[]) {
modedir,fileroot,/*idx_filesuffix*/"a2iag",snps_root,
required_index1part,required_interval,
expand_offsets_p,offsetsstrm_access,positions_access,
- /*sharedp*/false,/*multiple_sequences_p*/false)) == NULL) {
+ /*sharedp*/false,/*multiple_sequences_p*/false,/*unload_shared_memory_p*/false)) == NULL) {
fprintf(stderr,"Cannot find a2iag index file. Need to run atoiindex first\n");
exit(9);
}
@@ -1231,7 +1231,7 @@ main (int argc, char *argv[]) {
snpsdir,fileroot,/*idx_filesuffix*/"ref",snps_root,
required_index1part,required_interval,
expand_offsets_p,offsetsstrm_access,positions_access,
- /*sharedp*/false,/*multiple_sequences_p*/false);
+ /*sharedp*/false,/*multiple_sequences_p*/false,/*unload_shared_memory_p*/false);
if (indexdb == NULL) {
fprintf(stderr,"Cannot find snps index file for %s in directory %s\n",snps_root,snpsdir);
exit(9);
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/gmap.git
More information about the debian-med-commit
mailing list