[med-svn] [gmap] 01/03: Imported Upstream version 2015-09-29
Alex Mestiashvili
malex-guest at moszumanska.debian.org
Wed Oct 14 08:53:41 UTC 2015
This is an automated email from the git hooks/post-receive script.
malex-guest pushed a commit to branch master
in repository gmap.
commit fda1ac5c2c051957053d2487cc9ad186d503a857
Author: Alexandre Mestiashvili <alex at biotec.tu-dresden.de>
Date: Wed Oct 14 09:32:07 2015 +0200
Imported Upstream version 2015-09-29
---
ChangeLog | 93 ++
Makefile.in | 2 +-
TODO | 3 -
VERSION | 2 +-
configure | 24 +-
src/ChangeLog | 0
src/Makefile.in | 2 +-
src/chimera.c | 75 +-
src/chimera.h | 5 +-
src/dynprog_genome.c | 4254 +++++++++++++++++++++++++------------------------
src/genome.c | 4 +-
src/gmap.c | 85 +-
src/gsnap.c | 87 +-
src/inbuffer.c | 8 +-
src/oligoindex_hr.c | 55 +-
src/outbuffer.c | 4 +-
src/pair.c | 76 +-
src/pair.h | 5 +-
src/samprint.c | 164 +-
src/samprint.h | 5 +-
src/sarray-read.c | 59 +-
src/splice.c | 10 +-
src/stage1.c | 28 +-
src/stage1hr.c | 408 +++--
src/stage1hr.h | 9 +-
src/stage3.c | 78 +-
src/stage3hr.c | 221 ++-
src/stage3hr.h | 10 +-
src/substring.c | 11 +-
src/substring.h | 5 +-
src/uniqscan.c | 14 +-
util/gmap_build.pl.in | 35 +-
32 files changed, 3304 insertions(+), 2537 deletions(-)
diff --git a/ChangeLog b/ChangeLog
index bcab131..c721c9a 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,96 @@
+2015-09-30 twu
+
+ * VERSION: Updated version number
+
+ * stage1hr.c: In converting segments to GMAP, ensuring that the pairs are
+ monotonic in genomepos
+
+ * inbuffer.c: Initializing variables
+
+2015-09-28 twu
+
+ * oligoindex_hr.c, public-2015-07-23, src: Merged revision 175543 from trunk
+ to handle the case where left_plus_length < indexsize
+
+ * VERSION: Updated version number
+
+ * stage1hr.c: Putting debugging statement inside debug macro
+
+2015-09-22 twu
+
+ * dynprog_genome.c, pair.c, pair.h, public-2015-07-23, src, stage1hr.c,
+ stage3.c, stage3hr.c: Merged revisions 174475 through 174480 from trunk to
+ improve alignments involving microexons, splices at ends, bridging intron
+ gaps, and multiple segments
+
+ * VERSION, public-2015-07-23, src, stage1hr.c, stage3hr.c: Merged revision
+ 174117 from trunk to fix calls to Genome_get_segment_blocks_left
+
+2015-09-12 twu
+
+ * splice.c: Fixed FREEA statements
+
+ * gmap_build.pl.in, util: Merged revision 173888 from trunk to allow for
+ spaces in destination directory
+
+ * gsnap.c, src, stage1hr.c, stage1hr.h, stage3hr.c, stage3hr.h, substring.c,
+ substring.h, uniqscan.c: Merged revisions 173889 through 173894 from trunk
+ to add --min-coverage and remove --terminal-threshold and
+ reject_trimlength. Changed criteria for running find_terminals.
+
+2015-09-01 twu
+
+ * VERSION, chimera.c, gmap.c, public-2015-07-23, src: Merged revisions
+ 173188 and 173189 from trunk to set dinucleotides for out-of-bound
+ chimeric breakpoints and to set some uninitialized variables
+
+ * stage3hr.c: Merged revision 173165 from trunk to favor non-zero sensedirs
+ when sorting results
+
+ * splice.c: Merged revision 173164 from trunk to fix variable names for
+ FREEA
+
+ * oligoindex_hr.c: Merged revision 173163 from trunk to initialize some
+ return variables when exiting trimming procedure early
+
+ * chimera.c, chimera.h, gmap.c, pair.c: Merged revision 173162 from trunk to
+ fix issues when chimeras extend to beginning or end of chromosomes
+
+ * dynprog_genome.c: Merged revision 173161 from trunk to fix bridging intron
+ gaps when no probabilities are found
+
+2015-08-31 twu
+
+ * gmap.c, outbuffer.c, src, stage1.c, stage3.c: Merged revision 173034 to
+ not use alloca for array of Batch_T objects
+
+2015-08-27 twu
+
+ * stage3.c: Applied revision 172740 from trunk to change criterion for
+ evaluating splice neighborhood to allow for short ends
+
+ * sarray-read.c: Applied revision 172478 from trunk to use
+ max_mismatches_allowed from original call to suffix array algorithm, and
+ not allowing it to be unlimited
+
+ * stage3hr.c: Applied part of revision 172472 from trunk to call
+ Genome_get_segment_blocks_left with chroffset and not chrhigh
+
+ * genome.c: Applied revision 172476 from trunk to set end of genomealt
+ string to be NULL
+
+ * dynprog_genome.c: Applied revision 172475 from trunk to change loop end
+ condition to avoid accessing uninitialized variables
+
+ * gsnap.c, samprint.c, samprint.h: Applied revisions 171145 and 171803 from
+ trunk to add flags --add-paired-nomappers and
+ --paired-flag-means-concordant
+
+ * oligoindex_hr.c: Applied revision 170792 from trunk to restore missing
+ line in counting of 9-mers
+
+ * public-2015-07-23: Created release branch for 2015-07-23
+
2015-07-23 twu
* VERSION: Updated version number
diff --git a/Makefile.in b/Makefile.in
index 651b3b2..9dadef1 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -37,7 +37,7 @@ target_triplet = @target@
subdir = .
DIST_COMMON = README $(am__configure_deps) $(srcdir)/Makefile.am \
$(srcdir)/Makefile.in $(top_srcdir)/configure AUTHORS COPYING \
- ChangeLog INSTALL NEWS TODO config/compile config/config.guess \
+ ChangeLog INSTALL NEWS config/compile config/config.guess \
config/config.sub config/depcomp config/install-sh \
config/ltmain.sh config/missing
ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
diff --git a/TODO b/TODO
deleted file mode 100644
index c5a7bd7..0000000
--- a/TODO
+++ /dev/null
@@ -1,3 +0,0 @@
-
-Add flag that allows for splitting afterwards.
-
diff --git a/VERSION b/VERSION
index b93269f..0189fb2 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-2015-07-23
\ No newline at end of file
+2015-09-29
\ No newline at end of file
diff --git a/configure b/configure
index 9e9988e..9df6267 100755
--- a/configure
+++ b/configure
@@ -1,6 +1,6 @@
#! /bin/sh
# Guess values for system-dependent variables and create Makefiles.
-# Generated by GNU Autoconf 2.63 for gmap 2015-07-23.
+# Generated by GNU Autoconf 2.63 for gmap 2015-09-29.
#
# Report bugs to <Thomas Wu <twu at gene.com>>.
#
@@ -745,8 +745,8 @@ SHELL=${CONFIG_SHELL-/bin/sh}
# Identity of this package.
PACKAGE_NAME='gmap'
PACKAGE_TARNAME='gmap'
-PACKAGE_VERSION='2015-07-23'
-PACKAGE_STRING='gmap 2015-07-23'
+PACKAGE_VERSION='2015-09-29'
+PACKAGE_STRING='gmap 2015-09-29'
PACKAGE_BUGREPORT='Thomas Wu <twu at gene.com>'
ac_unique_file="src/gmap.c"
@@ -1513,7 +1513,7 @@ if test "$ac_init_help" = "long"; then
# Omit some internal or obsolete options to make the list less imposing.
# This message is too long to be a string in the A/UX 3.1 sh.
cat <<_ACEOF
-\`configure' configures gmap 2015-07-23 to adapt to many kinds of systems.
+\`configure' configures gmap 2015-09-29 to adapt to many kinds of systems.
Usage: $0 [OPTION]... [VAR=VALUE]...
@@ -1584,7 +1584,7 @@ fi
if test -n "$ac_init_help"; then
case $ac_init_help in
- short | recursive ) echo "Configuration of gmap 2015-07-23:";;
+ short | recursive ) echo "Configuration of gmap 2015-09-29:";;
esac
cat <<\_ACEOF
@@ -1721,7 +1721,7 @@ fi
test -n "$ac_init_help" && exit $ac_status
if $ac_init_version; then
cat <<\_ACEOF
-gmap configure 2015-07-23
+gmap configure 2015-09-29
generated by GNU Autoconf 2.63
Copyright (C) 1992, 1993, 1994, 1995, 1996, 1998, 1999, 2000, 2001,
@@ -1735,7 +1735,7 @@ cat >config.log <<_ACEOF
This file contains any messages produced by compilers while
running configure, to aid debugging if configure makes a mistake.
-It was created by gmap $as_me 2015-07-23, which was
+It was created by gmap $as_me 2015-09-29, which was
generated by GNU Autoconf 2.63. Invocation command line was
$ $0 $@
@@ -2105,8 +2105,8 @@ ac_compiler_gnu=$ac_cv_c_compiler_gnu
{ $as_echo "$as_me:$LINENO: checking package version" >&5
$as_echo_n "checking package version... " >&6; }
-{ $as_echo "$as_me:$LINENO: result: 2015-07-23" >&5
-$as_echo "2015-07-23" >&6; }
+{ $as_echo "$as_me:$LINENO: result: 2015-09-29" >&5
+$as_echo "2015-09-29" >&6; }
### Read defaults
@@ -4172,7 +4172,7 @@ fi
# Define the identity of the package.
PACKAGE='gmap'
- VERSION='2015-07-23'
+ VERSION='2015-09-29'
cat >>confdefs.h <<_ACEOF
@@ -26591,7 +26591,7 @@ exec 6>&1
# report actual input values of CONFIG_FILES etc. instead of their
# values after options handling.
ac_log="
-This file was extended by gmap $as_me 2015-07-23, which was
+This file was extended by gmap $as_me 2015-09-29, which was
generated by GNU Autoconf 2.63. Invocation command line was
CONFIG_FILES = $CONFIG_FILES
@@ -26654,7 +26654,7 @@ Report bugs to <bug-autoconf at gnu.org>."
_ACEOF
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
ac_cs_version="\\
-gmap config.status 2015-07-23
+gmap config.status 2015-09-29
configured by $0, generated by GNU Autoconf 2.63,
with options \\"`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`\\"
diff --git a/src/ChangeLog b/src/ChangeLog
deleted file mode 100644
index e69de29..0000000
diff --git a/src/Makefile.in b/src/Makefile.in
index 708e7f3..7e8d342 100644
--- a/src/Makefile.in
+++ b/src/Makefile.in
@@ -42,7 +42,7 @@ bin_PROGRAMS = gmap$(EXEEXT) gmapl$(EXEEXT) get-genome$(EXEEXT) \
cmetindex$(EXEEXT) atoiindex$(EXEEXT) sam_sort$(EXEEXT)
subdir = src
DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in \
- $(srcdir)/config.h.in ChangeLog
+ $(srcdir)/config.h.in
ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
am__aclocal_m4_deps = $(top_srcdir)/config/libtool.m4 \
$(top_srcdir)/config/ltoptions.m4 \
diff --git a/src/chimera.c b/src/chimera.c
index 740019e..5a9a110 100644
--- a/src/chimera.c
+++ b/src/chimera.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: chimera.c 162196 2015-03-27 21:42:43Z twu $";
+static char rcsid[] = "$Id: chimera.c 173190 2015-09-01 18:59:44Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -626,7 +626,8 @@ static char *complCode = COMPLEMENT_UC;
/* Called if Chimera_find_exonexon fails */
int
Chimera_find_breakpoint (int *chimeraequivpos, char *donor1, char *donor2, char *acceptor2, char *acceptor1,
- Stage3_T left_part, Stage3_T right_part, int queryntlength, Genome_T genome) {
+ Stage3_T left_part, Stage3_T right_part, int queryntlength, Genome_T genome,
+ Chrpos_T left_chrlength, Chrpos_T right_chrlength) {
int chimerapos = 0, breakpoint;
int *matrix_sub1, *matrix_sub2, pos, score, bestscore;
bool *gapp_sub1, *gapp_sub2;
@@ -682,7 +683,7 @@ Chimera_find_breakpoint (int *chimeraequivpos, char *donor1, char *donor2, char
debug(
printf("%d = %d + %d",score,matrix_sub1[pos],matrix_sub2[pos+1]);
if (pos >= chimerapos && pos <= *chimeraequivpos) {
- printf(" ** ");
+ printf(" ** chimerapos %d, chimeraequivpos %d",chimerapos,*chimeraequivpos);
}
);
@@ -690,6 +691,7 @@ Chimera_find_breakpoint (int *chimeraequivpos, char *donor1, char *donor2, char
}
debug(printf("\n"));
}
+ debug(printf("chimerapos %d, chimeraequivpos %d\n",chimerapos,*chimeraequivpos));
#if 0
*five_score = matrix_sub1[*chimerapos] /* - 0 */;
@@ -702,29 +704,54 @@ Chimera_find_breakpoint (int *chimeraequivpos, char *donor1, char *donor2, char
FREE(gapp_sub1);
FREE(matrix_sub1);
- breakpoint = (chimerapos + (*chimeraequivpos))/2;
-
- if (Stage3_watsonp(left_part) == true) {
- left = Stage3_genomicpos(left_part,breakpoint,/*headp*/false);
- *donor1 = Genome_get_char(genome,left+1);
- *donor2 = Genome_get_char(genome,left+2);
+ if (chimerapos == 0) {
+ /* Never found a breakpoint */
+ return -1;
} else {
- left = Stage3_genomicpos(left_part,breakpoint,/*headp*/false);
- *donor1 = complCode[(int) Genome_get_char(genome,left-1)];
- *donor2 = complCode[(int) Genome_get_char(genome,left-2)];
- }
+ breakpoint = (chimerapos + (*chimeraequivpos))/2;
- if (Stage3_watsonp(right_part) == true) {
- left = Stage3_genomicpos(right_part,breakpoint+1,/*headp*/true);
- *acceptor2 = Genome_get_char(genome,left-2);
- *acceptor1 = Genome_get_char(genome,left-1);
- } else {
- left = Stage3_genomicpos(right_part,breakpoint+1,/*headp*/true);
- *acceptor2 = complCode[(int) Genome_get_char(genome,left+2)];
- *acceptor1 = complCode[(int) Genome_get_char(genome,left+1)];
- }
+ if (Stage3_watsonp(left_part) == true) {
+ if ((left = Stage3_genomicpos(left_part,breakpoint,/*headp*/false)) >= left_chrlength - 2) {
+ debug(printf("left %u >= left_chrlength %u - 2, so not finding donor dinucleotides\n",left,left_chrlength));
+ *donor1 = *donor2 = 'N';
+ } else {
+ debug(printf("left %u < left_chrlength %u - 2, so okay\n",left,left_chrlength));
+ *donor1 = Genome_get_char(genome,left+1);
+ *donor2 = Genome_get_char(genome,left+2);
+ }
+ } else {
+ if ((left = Stage3_genomicpos(left_part,breakpoint,/*headp*/false)) < 2) {
+ debug(printf("left %u < 2, so not finding donor dinucleotides\n",left));
+ *donor1 = *donor2 = 'N';
+ } else {
+ debug(printf("left %u >= 2, so okay\n",left));
+ *donor1 = complCode[(int) Genome_get_char(genome,left-1)];
+ *donor2 = complCode[(int) Genome_get_char(genome,left-2)];
+ }
+ }
+
+ if (Stage3_watsonp(right_part) == true) {
+ if ((left = Stage3_genomicpos(right_part,breakpoint+1,/*headp*/true)) < 2) {
+ debug(printf("left %u < 2, so not finding acceptor dinucleotides\n",left));
+ *acceptor1 = *acceptor2 = 'N';
+ } else {
+ debug(printf("left %u >= 2, so okay\n",left));
+ *acceptor2 = Genome_get_char(genome,left-2);
+ *acceptor1 = Genome_get_char(genome,left-1);
+ }
+ } else {
+ if ((left = Stage3_genomicpos(right_part,breakpoint+1,/*headp*/true)) >= right_chrlength - 2) {
+ debug(printf("left %u >= right_chrlength %u - 2, so not finding acceptor dinucleotides\n",left,right_chrlength));
+ *acceptor1 = *acceptor2 = 'N';
+ } else {
+ debug(printf("left %u <right_chrlength %u - 2, so okay\n",left,right_chrlength));
+ *acceptor2 = complCode[(int) Genome_get_char(genome,left+2)];
+ *acceptor1 = complCode[(int) Genome_get_char(genome,left+1)];
+ }
+ }
- return chimerapos;
+ return chimerapos;
+ }
}
@@ -1043,6 +1070,7 @@ Chimera_find_exonexon (int *found_cdna_direction, int *try_cdna_direction,
if (breakpoint_end < breakpoint_start) {
debug2(printf("Breakpoints do not make sense, so not computing\n"));
+ *found_cdna_direction = *try_cdna_direction = 0;
return -1;
}
@@ -1127,4 +1155,3 @@ Chimera_find_exonexon (int *found_cdna_direction, int *try_cdna_direction,
}
-
diff --git a/src/chimera.h b/src/chimera.h
index 2eb0a36..6aa94f3 100644
--- a/src/chimera.h
+++ b/src/chimera.h
@@ -1,4 +1,4 @@
-/* $Id: chimera.h 156811 2015-01-15 20:51:29Z twu $ */
+/* $Id: chimera.h 173168 2015-09-01 18:20:01Z twu $ */
#ifndef CHIMERA_INCLUDED
#define CHIMERA_INCLUDED
@@ -54,7 +54,8 @@ Chimera_bestpath (int *five_score, int *three_score, int *chimerapos, int *chime
int queryntlength, int chimera_slop, bool localp);
extern int
Chimera_find_breakpoint (int *chimeraequivpos, char *donor1, char *donor2, char *acceptor2, char *acceptor1,
- Stage3_T left_part, Stage3_T right_part, int queryntlength, Genome_T genome);
+ Stage3_T left_part, Stage3_T right_part, int queryntlength, Genome_T genome,
+ Chrpos_T left_chrlength, Chrpos_T right_chrlength);
#if 0
extern void
diff --git a/src/dynprog_genome.c b/src/dynprog_genome.c
index 8d2c9dc..d9ff04f 100644
--- a/src/dynprog_genome.c
+++ b/src/dynprog_genome.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: dynprog_genome.c 170390 2015-07-23 01:29:31Z twu $";
+static char rcsid[] = "$Id: dynprog_genome.c 174482 2015-09-22 00:58:39Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -82,17 +82,30 @@ static char rcsid[] = "$Id: dynprog_genome.c 170390 2015-07-23 01:29:31Z twu $";
#endif
+#define USE_SCOREI 1
+#define USE_WEAK_SCOREI 1
+
#define PROB_CEILING 0.85
#define PROB_FLOOR 0.75
#define PROB_BAD 0.50
/* Prefer alternate intron to other non-canonicals, but don't
introduce mismatches or gaps to identify */
+#ifdef USE_WEAK_SCOREI
+#define CANONICAL_INTRON 6
+#define GCAG_INTRON 4
+#define ATAC_INTRON 2
+#define FINAL_GCAG_INTRON 4 /* Amount above regular should approximately
+ match FINAL_CANONICAL_INTRON - CANONICAL_INTRON */
+#define FINAL_ATAC_INTRON 2
+#else
#define GCAG_INTRON 15
#define ATAC_INTRON 12
#define FINAL_GCAG_INTRON 20 /* Amount above regular should approximately
match FINAL_CANONICAL_INTRON - CANONICAL_INTRON */
#define FINAL_ATAC_INTRON 12
+#endif
+
/* Don't want to make too high, otherwise we will harm evaluation of
dual introns vs. single intron */
@@ -211,6 +224,10 @@ intron_score (int *introntype, int leftdi, int rightdi, int cdna_direction, int
bool finalp) {
int scoreI;
+#ifdef USE_WEAK_SCOREI
+ canonical_reward = CANONICAL_INTRON;
+#endif
+
#ifdef PMAP
if ((*introntype = leftdi & rightdi) == NONINTRON) {
scoreI = 0.0;
@@ -534,49 +551,274 @@ get_known_splicesites (int *left_known, int *right_known, int glengthL, int glen
#if defined(HAVE_SSE4_1) || defined(HAVE_SSE2)
-static bool
-bridge_intron_gap_8_ud (int *finalscore, int *bestrL, int *bestrR, int *bestcL, int *bestcR,
- int *best_introntype, double *left_prob, double *right_prob,
- Score8_T **matrixL_upper, Score8_T **matrixL_lower,
- Score8_T **matrixR_upper, Score8_T **matrixR_lower,
- Direction8_T **directionsL_upper_nogap, Direction8_T **directionsL_lower_nogap,
- Direction8_T **directionsR_upper_nogap, Direction8_T **directionsR_lower_nogap,
- char *gsequenceL, char *gsequenceL_alt, char *rev_gsequenceR, char *rev_gsequenceR_alt,
- int goffsetL, int rev_goffsetR, int rlength, int glengthL, int glengthR,
- int cdna_direction, bool watsonp, int lbandL, int ubandL, int lbandR, int ubandR,
- double defect_rate, int canonical_reward, int leftoffset, int rightoffset,
- Chrnum_T chrnum, Univcoord_T chroffset, Univcoord_T chrhigh,
- bool halfp, bool finalp, bool jump_late_p) {
- bool result;
- int bestscore = NEG_INFINITY_8, score, scoreL, scoreR, scoreI;
-#if 0
- int bestscoreI = NEG_INFINITY_8;
-#endif
- int bestscore_with_prob = NEG_INFINITY_8;
+
+/* Returns finalscore */
+static int
+bridge_intron_gap_8_intron_level (int *bestrL, int *bestrR, int *bestcL, int *bestcR,
+ int *best_introntype,
+ Score8_T **matrixL_upper, Score8_T **matrixL_lower,
+ Score8_T **matrixR_upper, Score8_T **matrixR_lower,
+ Direction8_T **directionsL_upper_nogap, Direction8_T **directionsL_lower_nogap,
+ Direction8_T **directionsR_upper_nogap, Direction8_T **directionsR_lower_nogap,
+ int *left_known, int *right_known,
+ int rlength, int glengthL, int glengthR,
+ int cdna_direction, bool watsonp, int lbandL, int ubandL, int lbandR, int ubandR,
+ int leftoffset, int rightoffset,
+ Chrnum_T chrnum, Univcoord_T chroffset, Univcoord_T chrhigh,
+ bool jump_late_p) {
int rL, rR, cL, cR;
- int bestrL_with_prob, bestrR_with_prob, bestcL_with_prob, bestcR_with_prob;
int cloL, chighL;
int cloR, chighR;
- char left1, left2, right2, right1, left1_alt, left2_alt, right2_alt, right1_alt;
- int *leftdi, *rightdi, introntype;
- int *left_known, *right_known;
- double *left_probabilities, *right_probabilities, probL, probR, probL_trunc, probR_trunc, bestprob, bestprob_trunc;
- Univcoord_T splicesitepos, splicesitepos1, splicesitepos2;
+ int bestscore = NEG_INFINITY_8, score, scoreL, scoreR;
+ Univcoord_T splicesitepos1, splicesitepos2;
bool bestp;
- debug(printf("Running bridge_intron_gap_8_ud\n"));
+ for (rL = 1, rR = rlength-1; rL < rlength; rL++, rR--) {
+ debug3(printf("\nGenomic insert: At row %d on left and %d on right\n",rL,rR));
+ if ((cloL = rL - lbandL) < 1) {
+ cloL = 1;
+ }
+ if ((chighL = rL + ubandL) > glengthL-1) {
+ chighL = glengthL-1;
+ }
+
+ if ((cloR = rR - lbandR) < 1) {
+ cloR = 1;
+ }
+ if ((chighR = rR + ubandR) > glengthR-1) {
+ chighR = glengthR-1;
+ }
- if (glengthL+1 <= 0) {
- fprintf(stderr,"Problem with glengthL = %d\n",glengthL);
- abort();
- }
+ /* Test indels on left and right */
+ for (cL = cloL; cL < /* left of main diagonal*/rL; cL++) {
+ /* The following check limits genomic inserts (horizontal) and
+ multiple cDNA inserts (vertical). */
+ if (left_known[cL] > 0) {
+ scoreL = (int) matrixL_lower[rL][cL];
+ if (directionsL_lower_nogap[rL][cL] != DIAG) {
+ /* Favor gaps away from intron if possible */
+ scoreL -= 1;
+ }
- if (glengthR+1 <= 0) {
- fprintf(stderr,"Problem with glengthR = %d\n",glengthR);
- abort();
+ /* Disallow leftoffset + cL >= rightoffset - cR, or cR >= rightoffset - leftoffset - cL */
+ for (cR = cloR; cR < /* left of main diagonal*/rR && cR < rightoffset-leftoffset-cL; cR++) {
+ if (right_known[cR] > 0) {
+ scoreR = (int) matrixR_lower[rR][cR];
+ if (directionsR_lower_nogap[rR][cR] != DIAG) {
+ /* Favor gaps away from intron if possible */
+ scoreR -= 1;
+ }
+
+ if ((score = scoreL + scoreR) > bestscore ||
+ (score >= bestscore && jump_late_p)) { /* Use >= for jump late */
+ bestp = false;
+ if (watsonp == true) {
+ splicesitepos1 = leftoffset + cL;
+ splicesitepos2 = rightoffset - cR + 1;
+ if (IIT_exists_with_divno_signed(splicing_iit,splicing_divint_crosstable[chrnum],
+ splicesitepos1,splicesitepos2+1U,/*sign*/cdna_direction) == true) {
+ bestp = true;
+ }
+ } else {
+ splicesitepos1 = (chrhigh - chroffset) - leftoffset - cL + 1;
+ splicesitepos2 = (chrhigh - chroffset) - rightoffset + cR;
+ if (IIT_exists_with_divno_signed(splicing_iit,splicing_divint_crosstable[chrnum],
+ splicesitepos2,splicesitepos1+1U,/*sign*/-cdna_direction) == true) {
+ bestp = true;
+ }
+ }
+ if (bestp == true) {
+ debug3(printf("At %d left to %d right, score is (%d)+(%d) = %d (bestscore)\n",
+ cL,cR,scoreL,scoreR,score));
+ bestscore = score;
+ *bestrL = rL;
+ *bestrR = rR;
+ *bestcL = cL;
+ *bestcR = cR;
+ } else {
+ debug3a(printf("At %d left to %d right, score is (%d)+(%d) = %d\n",
+ cL,cR,scoreL,scoreR,score));
+ }
+ }
+ }
+ }
+
+ for (/* at main diagonal*/; cR < chighR && cR < rightoffset-leftoffset-cL; cR++) {
+ if (right_known[cR] > 0) {
+ scoreR = (int) matrixR_upper[cR][rR];
+ if (directionsR_upper_nogap[cR][rR] != DIAG) {
+ /* Favor gaps away from intron if possible */
+ scoreR -= 1;
+ }
+
+ if ((score = scoreL + scoreR) > bestscore ||
+ (score >= bestscore && jump_late_p)) { /* Use >= for jump late */
+ bestp = false;
+ if (watsonp == true) {
+ splicesitepos1 = leftoffset + cL;
+ splicesitepos2 = rightoffset - cR + 1;
+ if (IIT_exists_with_divno_signed(splicing_iit,splicing_divint_crosstable[chrnum],
+ splicesitepos1,splicesitepos2+1U,/*sign*/cdna_direction) == true) {
+ bestp = true;
+ }
+ } else {
+ splicesitepos1 = (chrhigh - chroffset) - leftoffset - cL + 1;
+ splicesitepos2 = (chrhigh - chroffset) - rightoffset + cR;
+ if (IIT_exists_with_divno_signed(splicing_iit,splicing_divint_crosstable[chrnum],
+ splicesitepos2,splicesitepos1+1U,/*sign*/-cdna_direction) == true) {
+ bestp = true;
+ }
+ }
+ if (bestp == true) {
+ debug3(printf("At %d left to %d right, score is (%d)+(%d) = %d (bestscore)\n",
+ cL,cR,scoreL,scoreR,score));
+ bestscore = score;
+ *bestrL = rL;
+ *bestrR = rR;
+ *bestcL = cL;
+ *bestcR = cR;
+ } else {
+ debug3a(printf("At %d left to %d right, score is (%d)+(%d) = %d\n",
+ cL,cR,scoreL,scoreR,score));
+ }
+ }
+ }
+ }
+ }
+ }
+
+ for (/* at main diagonal*/; cL < chighL; cL++) {
+ /* The following check limits genomic inserts (horizontal) and
+ multiple cDNA inserts (vertical). */
+ if (left_known[cL] > 0) {
+ scoreL = (int) matrixL_upper[cL][rL];
+ if (directionsL_upper_nogap[cL][rL] != DIAG) {
+ /* Favor gaps away from intron if possible */
+ scoreL -= 1;
+ }
+
+ /* Disallow leftoffset + cL >= rightoffset - cR, or cR >= rightoffset - leftoffset - cL */
+ for (cR = cloR; cR < /* left of main diagonal*/rR && cR < rightoffset-leftoffset-cL; cR++) {
+ if (right_known[cR] > 0) {
+ scoreR = (int) matrixR_lower[rR][cR];
+ if (directionsR_lower_nogap[rR][cR] != DIAG) {
+ /* Favor gaps away from intron if possible */
+ scoreR -= 1;
+ }
+
+ if ((score = scoreL + scoreR) > bestscore ||
+ (score >= bestscore && jump_late_p)) { /* Use >= for jump late */
+ bestp = false;
+ if (watsonp == true) {
+ splicesitepos1 = leftoffset + cL;
+ splicesitepos2 = rightoffset - cR + 1;
+ if (IIT_exists_with_divno_signed(splicing_iit,splicing_divint_crosstable[chrnum],
+ splicesitepos1,splicesitepos2+1U,/*sign*/cdna_direction) == true) {
+ bestp = true;
+ }
+ } else {
+ splicesitepos1 = (chrhigh - chroffset) - leftoffset - cL + 1;
+ splicesitepos2 = (chrhigh - chroffset) - rightoffset + cR;
+ if (IIT_exists_with_divno_signed(splicing_iit,splicing_divint_crosstable[chrnum],
+ splicesitepos2,splicesitepos1+1U,/*sign*/-cdna_direction) == true) {
+ bestp = true;
+ }
+ }
+ if (bestp == true) {
+ debug3(printf("At %d left to %d right, score is (%d)+(%d) = %d (bestscore)\n",
+ cL,cR,scoreL,scoreR,score));
+ bestscore = score;
+ *bestrL = rL;
+ *bestrR = rR;
+ *bestcL = cL;
+ *bestcR = cR;
+ } else {
+ debug3a(printf("At %d left to %d right, score is (%d)+(%d) = %d\n",
+ cL,cR,scoreL,scoreR,score));
+ }
+ }
+ }
+ }
+
+ for (/* at main diagonal*/; cR < chighR && cR < rightoffset-leftoffset-cL; cR++) {
+ if (right_known[cR] > 0) {
+ scoreR = (int) matrixR_upper[cR][rR];
+ if (directionsR_upper_nogap[cR][rR] != DIAG) {
+ /* Favor gaps away from intron if possible */
+ scoreR -= 1;
+ }
+
+ if ((score = scoreL + scoreR) > bestscore ||
+ (score >= bestscore && jump_late_p)) { /* Use >= for jump late */
+ bestp = false;
+ if (watsonp == true) {
+ splicesitepos1 = leftoffset + cL;
+ splicesitepos2 = rightoffset - cR + 1;
+ if (IIT_exists_with_divno_signed(splicing_iit,splicing_divint_crosstable[chrnum],
+ splicesitepos1,splicesitepos2+1U,/*sign*/cdna_direction) == true) {
+ bestp = true;
+ }
+ } else {
+ splicesitepos1 = (chrhigh - chroffset) - leftoffset - cL + 1;
+ splicesitepos2 = (chrhigh - chroffset) - rightoffset + cR;
+ if (IIT_exists_with_divno_signed(splicing_iit,splicing_divint_crosstable[chrnum],
+ splicesitepos2,splicesitepos1+1U,/*sign*/-cdna_direction) == true) {
+ bestp = true;
+ }
+ }
+ if (bestp == true) {
+ debug3(printf("At %d left to %d right, score is (%d)+(%d) = %d (bestscore)\n",
+ cL,cR,scoreL,scoreR,score));
+ bestscore = score;
+ *bestrL = rL;
+ *bestrR = rR;
+ *bestcL = cL;
+ *bestcR = cR;
+ } else {
+ debug3a(printf("At %d left to %d right, score is (%d)+(%d) = %d\n",
+ cL,cR,scoreL,scoreR,score));
+ }
+ }
+ }
+ }
+ }
+ }
}
+ *best_introntype = NONINTRON;
+ return (int) bestscore;
+}
+
+
+/* Returns finalscore */
+static int
+bridge_intron_gap_8_site_level (int *bestrL, int *bestrR, int *bestcL, int *bestcR,
+ Score8_T **matrixL_upper, Score8_T **matrixL_lower,
+ Score8_T **matrixR_upper, Score8_T **matrixR_lower,
+ Direction8_T **directionsL_upper_nogap, Direction8_T **directionsL_lower_nogap,
+ Direction8_T **directionsR_upper_nogap, Direction8_T **directionsR_lower_nogap,
+ char *gsequenceL, char *gsequenceL_alt, char *rev_gsequenceR, char *rev_gsequenceR_alt,
+ int goffsetL, int rev_goffsetR, int *left_known, int *right_known,
+ int rlength, int glengthL, int glengthR,
+ int cdna_direction, bool watsonp, int lbandL, int ubandL, int lbandR, int ubandR,
+ int canonical_reward, int leftoffset, int rightoffset,
+ Univcoord_T chroffset, Univcoord_T chrhigh,
+ bool halfp, bool finalp) {
+ int rL, rR, cL, cR;
+ int bestrL_with_prob, bestrR_with_prob, bestcL_with_prob, bestcR_with_prob;
+ int cloL, chighL;
+ int cloR, chighR;
+ int introntype;
+ int bestscore = NEG_INFINITY_8, score, scoreL, scoreR, scoreI;
+ int bestscore_with_prob = NEG_INFINITY_8;
+ double *left_probabilities, *right_probabilities, probL, probR, probL_trunc, probR_trunc, bestprob, bestprob_trunc;
+ Univcoord_T splicesitepos;
+ char left1, left2, right2, right1, left1_alt, left2_alt, right2_alt, right1_alt;
+ int *leftdi, *rightdi;
+ bool use_prob_p;
+
+
/* Read dinucleotides */
leftdi = (int *) MALLOCA((glengthL+1) * sizeof(int));
rightdi = (int *) MALLOCA((glengthR+1) * sizeof(int));
@@ -629,535 +871,279 @@ bridge_intron_gap_8_ud (int *finalscore, int *bestrL, int *bestrR, int *bestcL,
}
rightdi[glengthR-1] = rightdi[glengthR] = 0x00;
- left_known = (int *) CALLOCA(glengthL+1,sizeof(int));
- right_known = (int *) CALLOCA(glengthR+1,sizeof(int));
- get_known_splicesites(left_known,right_known,glengthL,glengthR,
- /*leftoffset*/goffsetL,/*rightoffset*/rev_goffsetR,
- cdna_direction,watsonp,chrnum,chroffset,chrhigh);
-
- /* Perform computations */
-#if 0
- /* Bands already computed during dynamic programming */
-#if 1
- /* Allows unlimited indel lengths */
- ubandL = glengthL - rlength + extraband_paired;
- lbandL = extraband_paired;
-
- ubandR = glengthR - rlength + extraband_paired;
- lbandR = extraband_paired;
-#else
- /* Limit indels to 3 bp around splice sites. Doesn't work on PacBio reads. */
- ubandL = 3;
- lbandL = 3;
-
- ubandR = 3;
- lbandR = 3;
-#endif
-#endif
+ left_probabilities = (double *) MALLOCA(glengthL * sizeof(double));
+ right_probabilities = (double *) MALLOCA(glengthR * sizeof(double));
- if (novelsplicingp == false && splicing_iit != NULL && (donor_typeint < 0 || acceptor_typeint < 0)) {
- /* Constrain to given introns */
- for (rL = 1, rR = rlength-1; rL < rlength; rL++, rR--) {
- debug3(printf("\nGenomic insert: At row %d on left and %d on right\n",rL,rR));
- if ((cloL = rL - lbandL) < 1) {
- cloL = 1;
- }
- if ((chighL = rL + ubandL) > glengthL-1) {
- chighL = glengthL-1;
+ debug3(printf("watsonp is %d. cdna_direction is %d\n",watsonp,cdna_direction));
+ if (watsonp == true) {
+ if (cdna_direction > 0) {
+ for (cL = 0; cL < glengthL - 1; cL++) {
+ splicesitepos = chroffset + leftoffset + cL;
+ if (left_known[cL]) {
+ left_probabilities[cL] = 1.0;
+ } else {
+ left_probabilities[cL] = Maxent_hr_donor_prob(splicesitepos,chroffset);
+ debug3(printf("left donor probability at cL %d is %f\n",cL,left_probabilities[cL]));
+ }
}
- if ((cloR = rR - lbandR) < 1) {
- cloR = 1;
- }
- if ((chighR = rR + ubandR) > glengthR-1) {
- chighR = glengthR-1;
+ for (cR = 0; cR < glengthR - 1; cR++) {
+ splicesitepos = chroffset + rightoffset - cR + 1;
+ if (right_known[cR]) {
+ right_probabilities[cR] = 1.0;
+ } else {
+ right_probabilities[cR] = Maxent_hr_acceptor_prob(splicesitepos,chroffset);
+ debug3(printf("right acceptor probability at cR %d is %f\n",cR,right_probabilities[cR]));
+ }
}
- /* Test indels on left and right */
- for (cL = cloL; cL < /* left of main diagonal*/rL; cL++) {
- /* The following check limits genomic inserts (horizontal) and
- multiple cDNA inserts (vertical). */
- if (left_known[cL] > 0) {
- scoreL = (int) matrixL_lower[rL][cL];
- if (directionsL_lower_nogap[rL][cL] != DIAG) {
- /* Favor gaps away from intron if possible */
- scoreL -= 1;
- }
-
- /* Disallow leftoffset + cL >= rightoffset - cR, or cR >= rightoffset - leftoffset - cL */
- for (cR = cloR; cR < /* left of main diagonal*/rR && cR < rightoffset-leftoffset-cL; cR++) {
- if (right_known[cR] > 0) {
- scoreR = (int) matrixR_lower[rR][cR];
- if (directionsR_lower_nogap[rR][cR] != DIAG) {
- /* Favor gaps away from intron if possible */
- scoreR -= 1;
- }
-
- if ((score = scoreL + scoreR) > bestscore ||
- (score >= bestscore && jump_late_p)) { /* Use >= for jump late */
- bestp = false;
- if (watsonp == true) {
- splicesitepos1 = leftoffset + cL;
- splicesitepos2 = rightoffset - cR + 1;
- if (IIT_exists_with_divno_signed(splicing_iit,splicing_divint_crosstable[chrnum],
- splicesitepos1,splicesitepos2+1U,/*sign*/cdna_direction) == true) {
- bestp = true;
- }
- } else {
- splicesitepos1 = (chrhigh - chroffset) - leftoffset - cL + 1;
- splicesitepos2 = (chrhigh - chroffset) - rightoffset + cR;
- if (IIT_exists_with_divno_signed(splicing_iit,splicing_divint_crosstable[chrnum],
- splicesitepos2,splicesitepos1+1U,/*sign*/-cdna_direction) == true) {
- bestp = true;
- }
- }
- if (bestp == true) {
- debug3(printf("At %d left to %d right, score is (%d)+(%d) = %d (bestscore)\n",
- cL,cR,scoreL,scoreR,score));
- bestscore = score;
- *bestrL = rL;
- *bestrR = rR;
- *bestcL = cL;
- *bestcR = cR;
- } else {
- debug3a(printf("At %d left to %d right, score is (%d)+(%d) = %d\n",
- cL,cR,scoreL,scoreR,score));
- }
- }
- }
- }
-
- for (/* at main diagonal*/; cR <= chighR && cR < rightoffset-leftoffset-cL; cR++) {
- if (right_known[cR] > 0) {
- scoreR = (int) matrixR_upper[cR][rR];
- if (directionsR_upper_nogap[cR][rR] != DIAG) {
- /* Favor gaps away from intron if possible */
- scoreR -= 1;
- }
-
- if ((score = scoreL + scoreR) > bestscore ||
- (score >= bestscore && jump_late_p)) { /* Use >= for jump late */
- bestp = false;
- if (watsonp == true) {
- splicesitepos1 = leftoffset + cL;
- splicesitepos2 = rightoffset - cR + 1;
- if (IIT_exists_with_divno_signed(splicing_iit,splicing_divint_crosstable[chrnum],
- splicesitepos1,splicesitepos2+1U,/*sign*/cdna_direction) == true) {
- bestp = true;
- }
- } else {
- splicesitepos1 = (chrhigh - chroffset) - leftoffset - cL + 1;
- splicesitepos2 = (chrhigh - chroffset) - rightoffset + cR;
- if (IIT_exists_with_divno_signed(splicing_iit,splicing_divint_crosstable[chrnum],
- splicesitepos2,splicesitepos1+1U,/*sign*/-cdna_direction) == true) {
- bestp = true;
- }
- }
- if (bestp == true) {
- debug3(printf("At %d left to %d right, score is (%d)+(%d) = %d (bestscore)\n",
- cL,cR,scoreL,scoreR,score));
- bestscore = score;
- *bestrL = rL;
- *bestrR = rR;
- *bestcL = cL;
- *bestcR = cR;
- } else {
- debug3a(printf("At %d left to %d right, score is (%d)+(%d) = %d\n",
- cL,cR,scoreL,scoreR,score));
- }
- }
- }
- }
+ } else {
+ for (cL = 0; cL < glengthL - 1; cL++) {
+ splicesitepos = chroffset + leftoffset + cL;
+ if (left_known[cL]) {
+ left_probabilities[cL] = 1.0;
+ } else {
+ left_probabilities[cL] = Maxent_hr_antiacceptor_prob(splicesitepos,chroffset);
+ debug3(printf("left antiacceptor probability at cL %d is %f\n",cL,left_probabilities[cL]));
}
}
- for (/* at main diagonal*/; cL <= chighL; cL++) {
- /* The following check limits genomic inserts (horizontal) and
- multiple cDNA inserts (vertical). */
- if (left_known[cL] > 0) {
- scoreL = (int) matrixL_upper[cL][rL];
- if (directionsL_upper_nogap[cL][rL] != DIAG) {
- /* Favor gaps away from intron if possible */
- scoreL -= 1;
- }
-
- /* Disallow leftoffset + cL >= rightoffset - cR, or cR >= rightoffset - leftoffset - cL */
- for (cR = cloR; cR < /* left of main diagonal*/rR && cR < rightoffset-leftoffset-cL; cR++) {
- if (right_known[cR] > 0) {
- scoreR = (int) matrixR_lower[rR][cR];
- if (directionsR_lower_nogap[rR][cR] != DIAG) {
- /* Favor gaps away from intron if possible */
- scoreR -= 1;
- }
-
- if ((score = scoreL + scoreR) > bestscore ||
- (score >= bestscore && jump_late_p)) { /* Use >= for jump late */
- bestp = false;
- if (watsonp == true) {
- splicesitepos1 = leftoffset + cL;
- splicesitepos2 = rightoffset - cR + 1;
- if (IIT_exists_with_divno_signed(splicing_iit,splicing_divint_crosstable[chrnum],
- splicesitepos1,splicesitepos2+1U,/*sign*/cdna_direction) == true) {
- bestp = true;
- }
- } else {
- splicesitepos1 = (chrhigh - chroffset) - leftoffset - cL + 1;
- splicesitepos2 = (chrhigh - chroffset) - rightoffset + cR;
- if (IIT_exists_with_divno_signed(splicing_iit,splicing_divint_crosstable[chrnum],
- splicesitepos2,splicesitepos1+1U,/*sign*/-cdna_direction) == true) {
- bestp = true;
- }
- }
- if (bestp == true) {
- debug3(printf("At %d left to %d right, score is (%d)+(%d) = %d (bestscore)\n",
- cL,cR,scoreL,scoreR,score));
- bestscore = score;
- *bestrL = rL;
- *bestrR = rR;
- *bestcL = cL;
- *bestcR = cR;
- } else {
- debug3a(printf("At %d left to %d right, score is (%d)+(%d) = %d\n",
- cL,cR,scoreL,scoreR,score));
- }
- }
- }
- }
-
- for (/* at main diagonal*/; cR <= chighR && cR < rightoffset-leftoffset-cL; cR++) {
- if (right_known[cR] > 0) {
- scoreR = (int) matrixR_upper[cR][rR];
- if (directionsR_upper_nogap[cR][rR] != DIAG) {
- /* Favor gaps away from intron if possible */
- scoreR -= 1;
- }
-
- if ((score = scoreL + scoreR) > bestscore ||
- (score >= bestscore && jump_late_p)) { /* Use >= for jump late */
- bestp = false;
- if (watsonp == true) {
- splicesitepos1 = leftoffset + cL;
- splicesitepos2 = rightoffset - cR + 1;
- if (IIT_exists_with_divno_signed(splicing_iit,splicing_divint_crosstable[chrnum],
- splicesitepos1,splicesitepos2+1U,/*sign*/cdna_direction) == true) {
- bestp = true;
- }
- } else {
- splicesitepos1 = (chrhigh - chroffset) - leftoffset - cL + 1;
- splicesitepos2 = (chrhigh - chroffset) - rightoffset + cR;
- if (IIT_exists_with_divno_signed(splicing_iit,splicing_divint_crosstable[chrnum],
- splicesitepos2,splicesitepos1+1U,/*sign*/-cdna_direction) == true) {
- bestp = true;
- }
- }
- if (bestp == true) {
- debug3(printf("At %d left to %d right, score is (%d)+(%d) = %d (bestscore)\n",
- cL,cR,scoreL,scoreR,score));
- bestscore = score;
- *bestrL = rL;
- *bestrR = rR;
- *bestcL = cL;
- *bestcR = cR;
- } else {
- debug3a(printf("At %d left to %d right, score is (%d)+(%d) = %d\n",
- cL,cR,scoreL,scoreR,score));
- }
- }
- }
- }
+ for (cR = 0; cR < glengthR - 1; cR++) {
+ splicesitepos = chroffset + rightoffset - cR + 1;
+ if (right_known[cR]) {
+ right_probabilities[cR] = 1.0;
+ } else {
+ right_probabilities[cR] = Maxent_hr_antidonor_prob(splicesitepos,chroffset);
+ debug3(printf("right antidonor probability at cR %d is %f\n",cR,right_probabilities[cR]));
}
}
}
- *finalscore = (int) bestscore;
- *best_introntype = NONINTRON;
-
} else {
- left_probabilities = (double *) MALLOCA(glengthL * sizeof(double));
- right_probabilities = (double *) MALLOCA(glengthR * sizeof(double));
-
- if (watsonp == true) {
- if (cdna_direction > 0) {
- for (cL = 0; cL < glengthL - 1; cL++) {
- splicesitepos = chroffset + leftoffset + cL;
- if (left_known[cL]) {
- left_probabilities[cL] = 1.0;
- } else {
- left_probabilities[cL] = Maxent_hr_donor_prob(splicesitepos,chroffset);
- }
- }
-
- for (cR = 0; cR < glengthR - 1; cR++) {
- splicesitepos = chroffset + rightoffset - cR + 1;
- if (right_known[cR]) {
- right_probabilities[cR] = 1.0;
- } else {
- right_probabilities[cR] = Maxent_hr_acceptor_prob(splicesitepos,chroffset);
- }
- }
-
- } else {
- for (cL = 0; cL < glengthL - 1; cL++) {
- splicesitepos = chroffset + leftoffset + cL;
- if (left_known[cL]) {
- left_probabilities[cL] = 1.0;
- } else {
- left_probabilities[cL] = Maxent_hr_antiacceptor_prob(splicesitepos,chroffset);
- }
+ if (cdna_direction > 0) {
+ for (cL = 0; cL < glengthL - 1; cL++) {
+ splicesitepos = chrhigh - leftoffset - cL + 1;
+ if (left_known[cL]) {
+ left_probabilities[cL] = 1.0;
+ } else {
+ left_probabilities[cL] = Maxent_hr_antidonor_prob(splicesitepos,chroffset);
+ debug3(printf("left antidonor probability at cL %d is %f\n",cL,left_probabilities[cL]));
}
+ }
- for (cR = 0; cR < glengthR - 1; cR++) {
- splicesitepos = chroffset + rightoffset - cR + 1;
- if (right_known[cR]) {
- right_probabilities[cR] = 1.0;
- } else {
- right_probabilities[cR] = Maxent_hr_antidonor_prob(splicesitepos,chroffset);
- }
+ for (cR = 0; cR < glengthR - 1; cR++) {
+ splicesitepos = chrhigh - rightoffset + cR;
+ if (right_known[cR]) {
+ right_probabilities[cR] = 1.0;
+ } else {
+ right_probabilities[cR] = Maxent_hr_antiacceptor_prob(splicesitepos,chroffset);
+ debug3(printf("right antiacceptor probability at cR %d is %f\n",cR,right_probabilities[cR]));
}
}
} else {
- if (cdna_direction > 0) {
- for (cL = 0; cL < glengthL - 1; cL++) {
- splicesitepos = chrhigh - leftoffset - cL + 1;
- if (left_known[cL]) {
- left_probabilities[cL] = 1.0;
- } else {
- left_probabilities[cL] = Maxent_hr_antidonor_prob(splicesitepos,chroffset);
- }
- }
-
- for (cR = 0; cR < glengthR - 1; cR++) {
- splicesitepos = chrhigh - rightoffset + cR;
- if (right_known[cR]) {
- right_probabilities[cR] = 1.0;
- } else {
- right_probabilities[cR] = Maxent_hr_antiacceptor_prob(splicesitepos,chroffset);
- }
- }
-
- } else {
- for (cL = 0; cL < glengthL - 1; cL++) {
- splicesitepos = chrhigh - leftoffset - cL + 1;
- if (left_known[cL]) {
- left_probabilities[cL] = 1.0;
- } else {
- left_probabilities[cL] = Maxent_hr_acceptor_prob(splicesitepos,chroffset);
- }
+ for (cL = 0; cL < glengthL - 1; cL++) {
+ splicesitepos = chrhigh - leftoffset - cL + 1;
+ if (left_known[cL]) {
+ left_probabilities[cL] = 1.0;
+ } else {
+ left_probabilities[cL] = Maxent_hr_acceptor_prob(splicesitepos,chroffset);
+ debug3(printf("left acceptor probability at cL %d is %f\n",cL,left_probabilities[cL]));
}
+ }
- for (cR = 0; cR < glengthR - 1; cR++) {
- splicesitepos = chrhigh - rightoffset + cR;
- if (right_known[cR]) {
- right_probabilities[cR] = 1.0;
- } else {
- right_probabilities[cR] = Maxent_hr_donor_prob(splicesitepos,chroffset);
- }
+ for (cR = 0; cR < glengthR - 1; cR++) {
+ splicesitepos = chrhigh - rightoffset + cR;
+ if (right_known[cR]) {
+ right_probabilities[cR] = 1.0;
+ } else {
+ right_probabilities[cR] = Maxent_hr_donor_prob(splicesitepos,chroffset);
+ debug3(printf("right donor probability at cR %d is %f\n",cR,right_probabilities[cR]));
}
}
}
+ }
- /* Search using probs and without simultaneously */
- bestscore = NEG_INFINITY_8;
- bestprob = bestprob_trunc = 0.0;
- for (rL = 1, rR = rlength-1; rL < rlength; rL++, rR--) {
- debug3(printf("\nAt row %d on left and %d on right\n",rL,rR));
- if ((cloL = rL - lbandL) < 1) {
- cloL = 1;
- }
- if ((chighL = rL + ubandL) > glengthL-1) {
- chighL = glengthL-1;
- }
-
- if ((cloR = rR - lbandR) < 1) {
- cloR = 1;
- }
- if ((chighR = rR + ubandR) > glengthR-1) {
- chighR = glengthR-1;
- }
+ /* Search using probs and without simultaneously */
+ bestscore = NEG_INFINITY_8;
+ bestprob = bestprob_trunc = 0.0;
+ for (rL = 1, rR = rlength-1; rL < rlength; rL++, rR--) {
+ debug3(printf("\nAt row %d on left and %d on right\n",rL,rR));
+ if ((cloL = rL - lbandL) < 1) {
+ cloL = 1;
+ }
+ if ((chighL = rL + ubandL) > glengthL-1) {
+ chighL = glengthL-1;
+ }
-#ifdef ALLOW_DUAL_INDELS
- fprintf(stderr,"Dual indels not implemented\n");
- abort();
- /* Test indels on left and right */
- for (cL = cloL; cL <= chighL; cL++) {
- /* The following check limits genomic inserts (horizontal) and
- multiple cDNA inserts (vertical). */
- if (1) {
- probL = left_probabilities[cL];
- if (probL > PROB_CEILING) {
- probL_trunc = PROB_CEILING;
- } else if (probL < PROB_FLOOR) {
- probL_trunc = PROB_FLOOR;
- } else {
- probL_trunc = probL;
- }
- scoreL = (int) matrixL[cL][rL];
- if (directionsL_nogap[cL][rL] != DIAG) {
- /* Favor gaps away from intron if possible */
- scoreL -= 1;
- }
+ if ((cloR = rR - lbandR) < 1) {
+ cloR = 1;
+ }
+ if ((chighR = rR + ubandR) > glengthR-1) {
+ chighR = glengthR-1;
+ }
- /* Disallow leftoffset + cL >= rightoffset - cR, or cR >= rightoffset - leftoffset - cL */
- for (cR = cloR; cR <= chighR && cR < rightoffset-leftoffset-cL; cR++) {
- if (1) {
- probR = right_probabilities[cR];
- if (probR > PROB_CEILING) {
- probR_trunc = PROB_CEILING;
- } else if (probR < PROB_FLOOR) {
- probR_trunc = PROB_FLOOR;
- } else {
- probR_trunc = probR;
- }
- scoreR = (int) matrixR[cR][rR];
- if (directionsR_nogap[cR][rR] != DIAG) {
- /* Favor gaps away from intron if possible */
- scoreR -= 1;
- }
-
- scoreI = intron_score(&introntype,leftdi[cL],rightdi[cR],
- cdna_direction,canonical_reward,finalp);
-
- if ((score = scoreL + scoreI + scoreR) > bestscore) {
- debug3(printf("No prob: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
- cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
- debug3(printf("probL %f, probR %f\n",left_probabilities[cL],right_probabilities[cR]));
- bestscore = score;
- *bestrL = rL;
- *bestrR = rR;
- *bestcL = cL;
- *bestcR = cR;
- bestprob = probL + probR;
- } else if (score == bestscore && probL + probR > bestprob) {
- debug3(printf("Improved prob: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
- cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
- debug3(printf("probL %f, probR %f\n",left_probabilities[cL],right_probabilities[cR]));
- *bestrL = rL;
- *bestrR = rR;
- *bestcL = cL;
- *bestcR = cR;
- bestprob = probL + probR;
- }
+ debug3(printf("A. Test no indels\n"));
+ cL = rL;
+ probL = left_probabilities[cL];
+ if (probL > PROB_CEILING) {
+ probL_trunc = PROB_CEILING;
+ } else if (probL < PROB_FLOOR) {
+ probL_trunc = PROB_FLOOR;
+ } else {
+ probL_trunc = probL;
+ }
+ scoreL = (int) matrixL_upper[cL][rL];
+
+ cR = rR;
+ probR = right_probabilities[cR];
+ if (probR > PROB_CEILING) {
+ probR_trunc = PROB_CEILING;
+ } else if (probR < PROB_FLOOR) {
+ probR_trunc = PROB_FLOOR;
+ } else {
+ probR_trunc = probR;
+ }
+ scoreR = (int) matrixR_upper[cR][rR];
+
+#ifdef USE_SCOREI
+ scoreI = intron_score(&introntype,leftdi[cL],rightdi[cR],cdna_direction,canonical_reward,finalp);
+#else
+ scoreI = 0;
+#endif
- if (probL_trunc + probR_trunc < bestprob_trunc) {
- debug3a(printf("At %d left to %d right, prob is %f + %f = %f\n",
- cL,cR,probL_trunc,probR_trunc,probL_trunc+probR_trunc));
+ if ((score = scoreL + scoreI + scoreR) > bestscore) {
+ debug3(printf("Best score: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
+ cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
+ debug3(printf("probL %f, probR %f\n",left_probabilities[cL],right_probabilities[cR]));
+ bestscore = score;
+ *bestrL = rL;
+ *bestrR = rR;
+ *bestcL = cL;
+ *bestcR = cR;
+ bestprob = probL + probR;
+ } else if (score == bestscore && probL + probR > bestprob) {
+ debug3(printf("Improved prob: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
+ cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
+ debug3(printf("probL %f, probR %f\n",left_probabilities[cL],right_probabilities[cR]));
+ *bestrL = rL;
+ *bestrR = rR;
+ *bestcL = cL;
+ *bestcR = cR;
+ bestprob = probL + probR;
+ } else {
+ debug3a(printf("Not best score: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
+ cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
+ }
+
+ if (probL_trunc + probR_trunc < bestprob_trunc) {
+ debug3a(printf("At %d left to %d right, prob is %f + %f = %f\n",
+ cL,cR,probL_trunc,probR_trunc,probL_trunc+probR_trunc));
+
+ } else if (probL_trunc + probR_trunc == bestprob_trunc) {
+ debug3(printf("At %d left to %d right, prob is %f + %f = %f\n",
+ cL,cR,probL_trunc,probR_trunc,probL_trunc+probR_trunc));
+
+ if (scoreL + scoreI + scoreR > bestscore_with_prob) {
+ debug3(printf(" (bestscore %d)\n",scoreL+scoreR));
+ bestprob_trunc = probL_trunc + probR_trunc;
+ bestcL_with_prob = cL;
+ bestcR_with_prob = cR;
+ bestrL_with_prob = rL;
+ bestrR_with_prob = rR;
+ bestscore_with_prob = scoreL + scoreI + scoreR;
+ }
+
+ } else {
+ /* probL_trunc + probR_trunc > bestprob_trunc */
+ debug3(printf("At %d left to %d right, prob is %f + %f = %f\n",
+ cL,cR,probL_trunc,probR_trunc,probL_trunc+probR_trunc));
+
+ debug3(printf(" (bestscore %d)\n",scoreL+scoreR));
+ bestprob_trunc = probL_trunc + probR_trunc;
+ bestcL_with_prob = cL;
+ bestcR_with_prob = cR;
+ bestrL_with_prob = rL;
+ bestrR_with_prob = rR;
+ bestscore_with_prob = scoreL + scoreI + scoreR;
+ }
- } else if (probL_trunc + probR_trunc == bestprob_trunc) {
- debug3(printf("At %d left to %d right, prob is %f + %f = %f\n",
- cL,cR,probL_trunc,probR_trunc,probL_trunc+probR_trunc));
- if (scoreL + scoreI + scoreR > bestscore_with_prob) {
- debug3(printf(" (bestscore %d)\n",scoreL+scoreI+scoreR));
- bestprob_trunc = probL_trunc + probR_trunc;
- bestcL_with_prob = cL;
- bestcR_with_prob = cR;
- bestrL_with_prob = rL;
- bestrR_with_prob = rR;
- bestscore_with_prob = scoreL + scoreI + scoreR;
- }
-
- } else {
- /* probL_trunc + probR_trunc > bestprob_trunc */
- debug3(printf("At %d left to %d right, prob is %f + %f = %f\n",
- cL,cR,probL_trunc,probR_trunc,probL_trunc+probR_trunc));
-
- debug3(printf(" (bestscore %d)\n",scoreL+scoreI+scoreR));
- bestprob_trunc = probL_trunc + probR_trunc;
- bestcL_with_prob = cL;
- bestcR_with_prob = cR;
- bestrL_with_prob = rL;
- bestrR_with_prob = rR;
- bestscore_with_prob = scoreL + scoreI + scoreR;
- }
- }
- }
- }
- }
+ debug3(printf("B. Test indel on right\n"));
+ /* Test indel on right */
+ cL = rL;
+ probL = left_probabilities[cL];
+ if (probL > PROB_CEILING) {
+ probL_trunc = PROB_CEILING;
+ } else if (probL < PROB_FLOOR) {
+ probL_trunc = PROB_FLOOR;
+ } else {
+ probL_trunc = probL;
+ }
+ scoreL = (int) matrixL_upper[cL][rL];
+ if (directionsL_upper_nogap[cL][rL] != DIAG) {
+ /* Favor gaps away from intron if possible */
+ scoreL -= 1;
+ }
-#else
- /* Test indel on right */
- cL = rL;
- probL = left_probabilities[cL];
- if (probL > PROB_CEILING) {
- probL_trunc = PROB_CEILING;
- } else if (probL < PROB_FLOOR) {
- probL_trunc = PROB_FLOOR;
+ /* Disallow leftoffset + cL >= rightoffset - cR, or cR >= rightoffset - leftoffset - cL */
+ for (cR = cloR; cR < /*to main diagonal*/rR && cR < rightoffset-leftoffset-cL; cR++) {
+ probR = right_probabilities[cR];
+ if (probR > PROB_CEILING) {
+ probR_trunc = PROB_CEILING;
+ } else if (probR < PROB_FLOOR) {
+ probR_trunc = PROB_FLOOR;
} else {
- probL_trunc = probL;
+ probR_trunc = probR;
}
- scoreL = (int) matrixL_upper[cL][rL];
- if (directionsL_upper_nogap[cL][rL] != DIAG) {
+ scoreR = (int) matrixR_lower[rR][cR];
+ if (directionsR_lower_nogap[rR][cR] != DIAG) {
/* Favor gaps away from intron if possible */
- scoreL -= 1;
+ scoreR -= 1;
}
-
- /* Disallow leftoffset + cL >= rightoffset - cR, or cR >= rightoffset - leftoffset - cL */
- for (cR = cloR; cR < /*to main diagonal*/rR && cR < rightoffset-leftoffset-cL; cR++) {
- probR = right_probabilities[cR];
- if (probR > PROB_CEILING) {
- probR_trunc = PROB_CEILING;
- } else if (probR < PROB_FLOOR) {
- probR_trunc = PROB_FLOOR;
- } else {
- probR_trunc = probR;
- }
- scoreR = (int) matrixR_lower[rR][cR];
- if (directionsR_lower_nogap[rR][cR] != DIAG) {
- /* Favor gaps away from intron if possible */
- scoreR -= 1;
- }
- scoreI = intron_score(&introntype,leftdi[cL],rightdi[cR],
- cdna_direction,canonical_reward,finalp);
+#ifdef USE_SCOREI
+ scoreI = intron_score(&introntype,leftdi[cL],rightdi[cR],cdna_direction,canonical_reward,finalp);
+#else
+ scoreI = 0;
+#endif
- if ((score = scoreL + scoreI + scoreR) > bestscore) {
- debug3(printf("No prob: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
- cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
- debug3(printf("probL %f, probR %f\n",left_probabilities[cL],right_probabilities[cR]));
- bestscore = score;
- *bestrL = rL;
- *bestrR = rR;
- *bestcL = cL;
- *bestcR = cR;
- bestprob = probL + probR;
- } else if (score == bestscore && probL + probR > bestprob) {
- debug3(printf("Improved prob: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
- cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
- debug3(printf("probL %f, probR %f\n",left_probabilities[cL],right_probabilities[cR]));
- *bestrL = rL;
- *bestrR = rR;
- *bestcL = cL;
- *bestcR = cR;
- bestprob = probL + probR;
- }
+ if ((score = scoreL + scoreI + scoreR) > bestscore) {
+ debug3(printf("Best score: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
+ cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
+ debug3(printf("probL %f, probR %f\n",left_probabilities[cL],right_probabilities[cR]));
+ bestscore = score;
+ *bestrL = rL;
+ *bestrR = rR;
+ *bestcL = cL;
+ *bestcR = cR;
+ bestprob = probL + probR;
+ } else if (score == bestscore && probL + probR > bestprob) {
+ debug3(printf("Improved prob: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
+ cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
+ debug3(printf("probL %f, probR %f\n",left_probabilities[cL],right_probabilities[cR]));
+ *bestrL = rL;
+ *bestrR = rR;
+ *bestcL = cL;
+ *bestcR = cR;
+ bestprob = probL + probR;
+ } else {
+ debug3a(printf("Not best score: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
+ cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
+ }
- if (probL_trunc + probR_trunc < bestprob_trunc) {
- debug3a(printf("At %d left to %d right, prob is %f + %f = %f\n",
- cL,cR,probL_trunc,probR_trunc,probL_trunc+probR_trunc));
-
- } else if (probL_trunc + probR_trunc == bestprob_trunc) {
- debug3(printf("At %d left to %d right, prob is %f + %f = %f\n",
- cL,cR,probL_trunc,probR_trunc,probL_trunc+probR_trunc));
+ if (probL_trunc + probR_trunc < bestprob_trunc) {
+ debug3a(printf("At %d left to %d right, prob is %f + %f = %f\n",
+ cL,cR,probL_trunc,probR_trunc,probL_trunc+probR_trunc));
- if (scoreL + scoreI + scoreR > bestscore_with_prob) {
- debug3(printf(" (bestscore %d)\n",scoreL+scoreI+scoreR));
- bestprob_trunc = probL_trunc + probR_trunc;
- bestcL_with_prob = cL;
- bestcR_with_prob = cR;
- bestrL_with_prob = rL;
- bestrR_with_prob = rR;
- bestscore_with_prob = scoreL + scoreI + scoreR;
- }
-
- } else {
- /* probL_trunc + probR_trunc > bestprob_trunc */
- debug3(printf("At %d left to %d right, prob is %f + %f = %f\n",
- cL,cR,probL_trunc,probR_trunc,probL_trunc+probR_trunc));
+ } else if (probL_trunc + probR_trunc == bestprob_trunc) {
+ debug3(printf("At %d left to %d right, prob is %f + %f = %f\n",
+ cL,cR,probL_trunc,probR_trunc,probL_trunc+probR_trunc));
- debug3(printf(" (bestscore %d)\n",scoreL+scoreI+scoreR));
+ if (scoreL + scoreI + scoreR > bestscore_with_prob) {
+ debug3(printf(" (bestscore %d)\n",scoreL+scoreR));
bestprob_trunc = probL_trunc + probR_trunc;
bestcL_with_prob = cL;
bestcR_with_prob = cR;
@@ -1165,71 +1151,78 @@ bridge_intron_gap_8_ud (int *finalscore, int *bestrL, int *bestrR, int *bestcL,
bestrR_with_prob = rR;
bestscore_with_prob = scoreL + scoreI + scoreR;
}
+
+ } else {
+ /* probL_trunc + probR_trunc > bestprob_trunc */
+ debug3(printf("At %d left to %d right, prob is %f + %f = %f\n",
+ cL,cR,probL_trunc,probR_trunc,probL_trunc+probR_trunc));
+
+ debug3(printf(" (bestscore %d)\n",scoreL+scoreR));
+ bestprob_trunc = probL_trunc + probR_trunc;
+ bestcL_with_prob = cL;
+ bestcR_with_prob = cR;
+ bestrL_with_prob = rL;
+ bestrR_with_prob = rR;
+ bestscore_with_prob = scoreL + scoreI + scoreR;
}
+ }
- for (/*at main diagonal*/; cR <= chighR && cR < rightoffset-leftoffset-cL; cR++) {
- probR = right_probabilities[cR];
- if (probR > PROB_CEILING) {
- probR_trunc = PROB_CEILING;
- } else if (probR < PROB_FLOOR) {
- probR_trunc = PROB_FLOOR;
- } else {
- probR_trunc = probR;
- }
- scoreR = (int) matrixR_upper[cR][rR];
- if (directionsR_upper_nogap[cR][rR] != DIAG) {
- /* Favor gaps away from intron if possible */
- scoreR -= 1;
- }
+ debug3(printf("Skip main diagonal\n"));
+ for (/*skip main diagonal*/cR++; cR < chighR && cR < rightoffset-leftoffset-cL; cR++) {
+ probR = right_probabilities[cR];
+ if (probR > PROB_CEILING) {
+ probR_trunc = PROB_CEILING;
+ } else if (probR < PROB_FLOOR) {
+ probR_trunc = PROB_FLOOR;
+ } else {
+ probR_trunc = probR;
+ }
+ scoreR = (int) matrixR_upper[cR][rR];
+ if (directionsR_upper_nogap[cR][rR] != DIAG) {
+ /* Favor gaps away from intron if possible */
+ scoreR -= 1;
+ }
- scoreI = intron_score(&introntype,leftdi[cL],rightdi[cR],
- cdna_direction,canonical_reward,finalp);
+#ifdef USE_SCOREI
+ scoreI = intron_score(&introntype,leftdi[cL],rightdi[cR],cdna_direction,canonical_reward,finalp);
+#else
+ scoreI = 0;
+#endif
- if ((score = scoreL + scoreI + scoreR) > bestscore) {
- debug3(printf("No prob: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
- cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
- debug3(printf("probL %f, probR %f\n",left_probabilities[cL],right_probabilities[cR]));
- bestscore = score;
- *bestrL = rL;
- *bestrR = rR;
- *bestcL = cL;
- *bestcR = cR;
- bestprob = probL + probR;
- } else if (score == bestscore && probL + probR > bestprob) {
- debug3(printf("Improved prob: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
- cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
- debug3(printf("probL %f, probR %f\n",left_probabilities[cL],right_probabilities[cR]));
- *bestrL = rL;
- *bestrR = rR;
- *bestcL = cL;
- *bestcR = cR;
- bestprob = probL + probR;
- }
+ if ((score = scoreL + scoreI + scoreR) > bestscore) {
+ debug3(printf("Best score: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
+ cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
+ debug3(printf("probL %f, probR %f\n",left_probabilities[cL],right_probabilities[cR]));
+ bestscore = score;
+ *bestrL = rL;
+ *bestrR = rR;
+ *bestcL = cL;
+ *bestcR = cR;
+ bestprob = probL + probR;
+ } else if (score == bestscore && probL + probR > bestprob) {
+ debug3(printf("Improved prob: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
+ cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
+ debug3(printf("probL %f, probR %f\n",left_probabilities[cL],right_probabilities[cR]));
+ *bestrL = rL;
+ *bestrR = rR;
+ *bestcL = cL;
+ *bestcR = cR;
+ bestprob = probL + probR;
+ } else {
+ debug3a(printf("Not best score: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
+ cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
+ }
- if (probL_trunc + probR_trunc < bestprob_trunc) {
- debug3a(printf("At %d left to %d right, prob is %f + %f = %f\n",
- cL,cR,probL_trunc,probR_trunc,probL_trunc+probR_trunc));
-
- } else if (probL_trunc + probR_trunc == bestprob_trunc) {
- debug3(printf("At %d left to %d right, prob is %f + %f = %f\n",
- cL,cR,probL_trunc,probR_trunc,probL_trunc+probR_trunc));
+ if (probL_trunc + probR_trunc < bestprob_trunc) {
+ debug3a(printf("At %d left to %d right, prob is %f + %f = %f\n",
+ cL,cR,probL_trunc,probR_trunc,probL_trunc+probR_trunc));
- if (scoreL + scoreI + scoreR > bestscore_with_prob) {
- debug3(printf(" (bestscore %d)\n",scoreL+scoreI+scoreR));
- bestprob_trunc = probL_trunc + probR_trunc;
- bestcL_with_prob = cL;
- bestcR_with_prob = cR;
- bestrL_with_prob = rL;
- bestrR_with_prob = rR;
- bestscore_with_prob = scoreL + scoreI + scoreR;
- }
-
- } else {
- /* probL_trunc + probR_trunc > bestprob_trunc */
- debug3(printf("At %d left to %d right, prob is %f + %f = %f\n",
- cL,cR,probL_trunc,probR_trunc,probL_trunc+probR_trunc));
+ } else if (probL_trunc + probR_trunc == bestprob_trunc) {
+ debug3(printf("At %d left to %d right, prob is %f + %f = %f\n",
+ cL,cR,probL_trunc,probR_trunc,probL_trunc+probR_trunc));
- debug3(printf(" (bestscore %d)\n",scoreL+scoreI+scoreR));
+ if (scoreL + scoreI + scoreR > bestscore_with_prob) {
+ debug3(printf(" (bestscore %d)\n",scoreL+scoreR));
bestprob_trunc = probL_trunc + probR_trunc;
bestcL_with_prob = cL;
bestcR_with_prob = cR;
@@ -1237,89 +1230,95 @@ bridge_intron_gap_8_ud (int *finalscore, int *bestrL, int *bestrR, int *bestcL,
bestrR_with_prob = rR;
bestscore_with_prob = scoreL + scoreI + scoreR;
}
+
+ } else {
+ /* probL_trunc + probR_trunc > bestprob_trunc */
+ debug3(printf("At %d left to %d right, prob is %f + %f = %f\n",
+ cL,cR,probL_trunc,probR_trunc,probL_trunc+probR_trunc));
+
+ debug3(printf(" (bestscore %d)\n",scoreL+scoreR));
+ bestprob_trunc = probL_trunc + probR_trunc;
+ bestcL_with_prob = cL;
+ bestcR_with_prob = cR;
+ bestrL_with_prob = rL;
+ bestrR_with_prob = rR;
+ bestscore_with_prob = scoreL + scoreI + scoreR;
}
+ }
+ debug3(printf("C. Test indel on left\n"));
+ /* Test indel on left */
+ cR = rR;
+ probR = right_probabilities[cR];
+ if (probR > PROB_CEILING) {
+ probR_trunc = PROB_CEILING;
+ } else if (probR < PROB_FLOOR) {
+ probR_trunc = PROB_FLOOR;
+ } else {
+ probR_trunc = probR;
+ }
+ scoreR = (int) matrixR_upper[cR][rR];
+ if (directionsR_upper_nogap[cR][rR] != DIAG) {
+ /* Favor gaps away from intron if possible */
+ scoreR -= 1;
+ }
- /* Test indel on left */
- cR = rR;
- probR = right_probabilities[cR];
- if (probR > PROB_CEILING) {
- probR_trunc = PROB_CEILING;
- } else if (probR < PROB_FLOOR) {
- probR_trunc = PROB_FLOOR;
+ /* Disallow leftoffset + cL >= rightoffset - cR, or cR >= rightoffset - leftoffset - cL */
+ for (cL = cloL; cL < /*to main diagonal*/rL && cL < rightoffset-leftoffset-cR; cL++) {
+ probL = left_probabilities[cL];
+ if (probL > PROB_CEILING) {
+ probL_trunc = PROB_CEILING;
+ } else if (probL < PROB_FLOOR) {
+ probL_trunc = PROB_FLOOR;
} else {
- probR_trunc = probR;
+ probL_trunc = probL;
}
- scoreR = (int) matrixR_upper[cR][rR];
- if (directionsR_upper_nogap[cR][rR] != DIAG) {
+ scoreL = (int) matrixL_lower[rL][cL];
+ if (directionsL_lower_nogap[rL][cL] != DIAG) {
/* Favor gaps away from intron if possible */
- scoreR -= 1;
+ scoreL -= 1;
}
- /* Disallow leftoffset + cL >= rightoffset - cR, or cR >= rightoffset - leftoffset - cL */
- for (cL = cloL; cL < /*to main diagonal*/rL && cL < rightoffset-leftoffset-cR; cL++) {
- probL = left_probabilities[cL];
- if (probL > PROB_CEILING) {
- probL_trunc = PROB_CEILING;
- } else if (probL < PROB_FLOOR) {
- probL_trunc = PROB_FLOOR;
- } else {
- probL_trunc = probL;
- }
- scoreL = (int) matrixL_lower[rL][cL];
- if (directionsL_lower_nogap[rL][cL] != DIAG) {
- /* Favor gaps away from intron if possible */
- scoreL -= 1;
- }
+#ifdef USE_SCOREI
+ scoreI = intron_score(&introntype,leftdi[cL],rightdi[cR],cdna_direction,canonical_reward,finalp);
+#else
+ scoreI = 0;
+#endif
- scoreI = intron_score(&introntype,leftdi[cL],rightdi[cR],
- cdna_direction,canonical_reward,finalp);
-
- if ((score = scoreL + scoreI + scoreR) > bestscore) {
- debug3(printf("No prob: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
- cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
- debug3(printf("probL %f, probR %f\n",left_probabilities[cL],right_probabilities[cR]));
- bestscore = score;
- *bestrL = rL;
- *bestrR = rR;
- *bestcL = cL;
- *bestcR = cR;
- bestprob = probL + probR;
- } else if (score == bestscore && probL + probR > bestprob) {
- debug3(printf("Improved prob: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
- cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
- debug3(printf("probL %f, probR %f\n",left_probabilities[cL],right_probabilities[cR]));
- *bestrL = rL;
- *bestrR = rR;
- *bestcL = cL;
- *bestcR = cR;
- bestprob = probL + probR;
- }
+ if ((score = scoreL + scoreI + scoreR) > bestscore) {
+ debug3(printf("Best score: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
+ cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
+ debug3(printf("probL %f, probR %f\n",left_probabilities[cL],right_probabilities[cR]));
+ bestscore = score;
+ *bestrL = rL;
+ *bestrR = rR;
+ *bestcL = cL;
+ *bestcR = cR;
+ bestprob = probL + probR;
+ } else if (score == bestscore && probL + probR > bestprob) {
+ debug3(printf("Improved prob: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
+ cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
+ debug3(printf("probL %f, probR %f\n",left_probabilities[cL],right_probabilities[cR]));
+ *bestrL = rL;
+ *bestrR = rR;
+ *bestcL = cL;
+ *bestcR = cR;
+ bestprob = probL + probR;
+ } else {
+ debug3a(printf("Not best score: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
+ cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
+ }
- if (probL_trunc + probR_trunc < bestprob_trunc) {
- debug3a(printf("At %d left to %d right, prob is %f + %f = %f\n",
- cL,cR,probL_trunc,probR_trunc,probL_trunc+probR_trunc));
+ if (probL_trunc + probR_trunc < bestprob_trunc) {
+ debug3a(printf("At %d left to %d right, prob is %f + %f = %f\n",
+ cL,cR,probL_trunc,probR_trunc,probL_trunc+probR_trunc));
- } else if (probL_trunc + probR_trunc == bestprob_trunc) {
- debug3(printf("At %d left to %d right, prob is %f + %f = %f\n",
- cL,cR,probL_trunc,probR_trunc,probL_trunc+probR_trunc));
+ } else if (probL_trunc + probR_trunc == bestprob_trunc) {
+ debug3(printf("At %d left to %d right, prob is %f + %f = %f\n",
+ cL,cR,probL_trunc,probR_trunc,probL_trunc+probR_trunc));
- if (scoreL + scoreI + scoreR > bestscore_with_prob) {
- debug3(printf(" (bestscore %d)\n",scoreL+scoreI+scoreR));
- bestprob_trunc = probL_trunc + probR_trunc;
- bestcL_with_prob = cL;
- bestcR_with_prob = cR;
- bestrL_with_prob = rL;
- bestrR_with_prob = rR;
- bestscore_with_prob = scoreL + scoreI + scoreR;
- }
-
- } else {
- /* probL_trunc + probR_trunc > bestprob_trunc */
- debug3(printf("At %d left to %d right, prob is %f + %f = %f\n",
- cL,cR,probL_trunc,probR_trunc,probL_trunc+probR_trunc));
-
- debug3(printf(" (bestscore %d)\n",scoreL+scoreI+scoreR));
+ if (scoreL + scoreI + scoreR > bestscore_with_prob) {
+ debug3(printf(" (bestscore %d)\n",scoreL+scoreR));
bestprob_trunc = probL_trunc + probR_trunc;
bestcL_with_prob = cL;
bestcR_with_prob = cR;
@@ -1327,71 +1326,78 @@ bridge_intron_gap_8_ud (int *finalscore, int *bestrL, int *bestrR, int *bestcL,
bestrR_with_prob = rR;
bestscore_with_prob = scoreL + scoreI + scoreR;
}
+
+ } else {
+ /* probL_trunc + probR_trunc > bestprob_trunc */
+ debug3(printf("At %d left to %d right, prob is %f + %f = %f\n",
+ cL,cR,probL_trunc,probR_trunc,probL_trunc+probR_trunc));
+
+ debug3(printf(" (bestscore %d)\n",scoreL+scoreR));
+ bestprob_trunc = probL_trunc + probR_trunc;
+ bestcL_with_prob = cL;
+ bestcR_with_prob = cR;
+ bestrL_with_prob = rL;
+ bestrR_with_prob = rR;
+ bestscore_with_prob = scoreL + scoreI + scoreR;
}
+ }
- for (/*at main diagonal*/; cL <= chighL && cL < rightoffset-leftoffset-cR; cL++) {
- probL = left_probabilities[cL];
- if (probL > PROB_CEILING) {
- probL_trunc = PROB_CEILING;
- } else if (probL < PROB_FLOOR) {
- probL_trunc = PROB_FLOOR;
- } else {
- probL_trunc = probL;
- }
- scoreL = (int) matrixL_upper[cL][rL];
- if (directionsL_upper_nogap[cL][rL] != DIAG) {
- /* Favor gaps away from intron if possible */
- scoreL -= 1;
- }
+ debug3(printf("Skip main diagonal\n"));
+ for (/*Skip main diagonal*/cL++; cL < chighL && cL < rightoffset-leftoffset-cR; cL++) {
+ probL = left_probabilities[cL];
+ if (probL > PROB_CEILING) {
+ probL_trunc = PROB_CEILING;
+ } else if (probL < PROB_FLOOR) {
+ probL_trunc = PROB_FLOOR;
+ } else {
+ probL_trunc = probL;
+ }
+ scoreL = (int) matrixL_upper[cL][rL];
+ if (directionsL_upper_nogap[cL][rL] != DIAG) {
+ /* Favor gaps away from intron if possible */
+ scoreL -= 1;
+ }
- scoreI = intron_score(&introntype,leftdi[cL],rightdi[cR],
- cdna_direction,canonical_reward,finalp);
-
- if ((score = scoreL + scoreI + scoreR) > bestscore) {
- debug3(printf("No prob: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
- cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
- debug3(printf("probL %f, probR %f\n",left_probabilities[cL],right_probabilities[cR]));
- bestscore = score;
- *bestrL = rL;
- *bestrR = rR;
- *bestcL = cL;
- *bestcR = cR;
- bestprob = probL + probR;
- } else if (score == bestscore && probL + probR > bestprob) {
- debug3(printf("Improved prob: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
- cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
- debug3(printf("probL %f, probR %f\n",left_probabilities[cL],right_probabilities[cR]));
- *bestrL = rL;
- *bestrR = rR;
- *bestcL = cL;
- *bestcR = cR;
- bestprob = probL + probR;
- }
+#ifdef USE_SCOREI
+ scoreI = intron_score(&introntype,leftdi[cL],rightdi[cR],cdna_direction,canonical_reward,finalp);
+#else
+ scoreI = 0;
+#endif
+
+ if ((score = scoreL + scoreI + scoreR) > bestscore) {
+ debug3(printf("Best score: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
+ cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
+ debug3(printf("probL %f, probR %f\n",left_probabilities[cL],right_probabilities[cR]));
+ bestscore = score;
+ *bestrL = rL;
+ *bestrR = rR;
+ *bestcL = cL;
+ *bestcR = cR;
+ bestprob = probL + probR;
+ } else if (score == bestscore && probL + probR > bestprob) {
+ debug3(printf("Improved prob: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
+ cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
+ debug3(printf("probL %f, probR %f\n",left_probabilities[cL],right_probabilities[cR]));
+ *bestrL = rL;
+ *bestrR = rR;
+ *bestcL = cL;
+ *bestcR = cR;
+ bestprob = probL + probR;
+ } else {
+ debug3a(printf("Not best score: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
+ cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
+ }
- if (probL_trunc + probR_trunc < bestprob_trunc) {
- debug3a(printf("At %d left to %d right, prob is %f + %f = %f\n",
- cL,cR,probL_trunc,probR_trunc,probL_trunc+probR_trunc));
+ if (probL_trunc + probR_trunc < bestprob_trunc) {
+ debug3a(printf("At %d left to %d right, prob is %f + %f = %f\n",
+ cL,cR,probL_trunc,probR_trunc,probL_trunc+probR_trunc));
- } else if (probL_trunc + probR_trunc == bestprob_trunc) {
- debug3(printf("At %d left to %d right, prob is %f + %f = %f\n",
- cL,cR,probL_trunc,probR_trunc,probL_trunc+probR_trunc));
+ } else if (probL_trunc + probR_trunc == bestprob_trunc) {
+ debug3(printf("At %d left to %d right, prob is %f + %f = %f\n",
+ cL,cR,probL_trunc,probR_trunc,probL_trunc+probR_trunc));
- if (scoreL + scoreI + scoreR > bestscore_with_prob) {
- debug3(printf(" (bestscore %d)\n",scoreL+scoreI+scoreR));
- bestprob_trunc = probL_trunc + probR_trunc;
- bestcL_with_prob = cL;
- bestcR_with_prob = cR;
- bestrL_with_prob = rL;
- bestrR_with_prob = rR;
- bestscore_with_prob = scoreL + scoreI + scoreR;
- }
-
- } else {
- /* probL_trunc + probR_trunc > bestprob_trunc */
- debug3(printf("At %d left to %d right, prob is %f + %f = %f\n",
- cL,cR,probL_trunc,probR_trunc,probL_trunc+probR_trunc));
-
- debug3(printf(" (bestscore %d)\n",scoreL+scoreI+scoreR));
+ if (scoreL + scoreI + scoreR > bestscore_with_prob) {
+ debug3(printf(" (bestscore %d)\n",scoreL+scoreR));
bestprob_trunc = probL_trunc + probR_trunc;
bestcL_with_prob = cL;
bestcR_with_prob = cR;
@@ -1399,45 +1405,122 @@ bridge_intron_gap_8_ud (int *finalscore, int *bestrL, int *bestrR, int *bestcL,
bestrR_with_prob = rR;
bestscore_with_prob = scoreL + scoreI + scoreR;
}
+
+ } else {
+ /* probL_trunc + probR_trunc > bestprob_trunc */
+ debug3(printf("At %d left to %d right, prob is %f + %f = %f\n",
+ cL,cR,probL_trunc,probR_trunc,probL_trunc+probR_trunc));
+
+ debug3(printf(" (bestscore %d)\n",scoreL+scoreR));
+ bestprob_trunc = probL_trunc + probR_trunc;
+ bestcL_with_prob = cL;
+ bestcR_with_prob = cR;
+ bestrL_with_prob = rL;
+ bestrR_with_prob = rR;
+ bestscore_with_prob = scoreL + scoreI + scoreR;
}
-#endif
-
}
+ }
+
+ if (bestprob > 2*PROB_CEILING) {
+ /* Probability is good with best alignment, so take that */
+ debug3(printf("Best alignment has good probability\n"));
+ use_prob_p = true;
+ } else if (left_probabilities[bestcL_with_prob] < PROB_CEILING && right_probabilities[bestcR_with_prob] < PROB_CEILING) {
+ /* Probability-based solution is bad, so use alignment */
+ debug3(printf("Probability-based solution is bad\n"));
+ use_prob_p = false;
+ } else if (bestscore_with_prob < bestscore - 9) {
+ debug3(printf("Probability-based solution requires very bad alignment, because bestscore_with_prob %d < bestscore %d - 9\n",
+ bestscore_with_prob,bestscore));
+ use_prob_p = false;
+ } else {
+ use_prob_p = true;
+ }
- debug(printf("SIMD 8. bestscore %d (bestprob %f) vs bestscore_with_prob %d (bestprob_trunc %f, actually %f and %f)\n",
+ if (use_prob_p == true) {
+ /* Best alignment yields bad probability, and probability-based alignment yields good probability, so switch */
+ debug3(printf("Switch to probability-based solution\n"));
+ debug3(printf("SIMD 8. bestscore %d (bestprob %f) vs bestscore_with_prob %d (bestprob_trunc %f, actually %f and %f)\n",
bestscore,bestprob,bestscore_with_prob,bestprob_trunc,left_probabilities[bestcL_with_prob],right_probabilities[bestcR_with_prob]));
- if (bestprob > 2*PROB_CEILING) {
- /* Probability is good with best alignment, so take that */
- debug(printf("Best alignment has good probability\n"));
- } else if (left_probabilities[bestcL_with_prob] < PROB_CEILING && right_probabilities[bestcR_with_prob] < PROB_CEILING) {
- /* Probability-based solution is bad, so use alignment */
- debug(printf("Probability-based solution is bad\n"));
- } else if (bestscore_with_prob < bestscore - 9) {
- debug(printf("Probability-based solution requires very bad alignment\n"));
- } else {
- /* Best alignment yields bad probability, and probability-based alignment yields good probability, so switch */
- debug(printf("Switch to probability-based solution\n"));
- *bestcL = bestcL_with_prob;
- *bestcR = bestcR_with_prob;
- *bestrL = bestrL_with_prob;
- *bestrR = bestrR_with_prob;
- bestscore = bestscore_with_prob;
- }
+ *bestcL = bestcL_with_prob;
+ *bestcR = bestcR_with_prob;
+ *bestrL = bestrL_with_prob;
+ *bestrR = bestrR_with_prob;
+ bestscore = bestscore_with_prob;
+ }
- scoreI = intron_score(&introntype,leftdi[*bestcL],rightdi[*bestcR],
- cdna_direction,canonical_reward,finalp);
+ FREEA(rightdi);
+ FREEA(leftdi);
+ FREEA(left_probabilities);
+ FREEA(right_probabilities);
+
+ if (halfp == true) {
+ scoreI = intron_score(&introntype,leftdi[*bestcL],rightdi[*bestcR],cdna_direction,canonical_reward,finalp);
+ return (int) (bestscore - scoreI/2);
+ } else {
+ return (int) bestscore;
+ }
+}
- if (halfp == true) {
- *finalscore = (int) (bestscore - scoreI/2);
- } else {
- *finalscore = (int) bestscore;
- }
- FREEA(left_probabilities);
- FREEA(right_probabilities);
+static int
+bridge_intron_gap_8_ud (int *bestrL, int *bestrR, int *bestcL, int *bestcR,
+ int *best_introntype, double *left_prob, double *right_prob,
+ Score8_T **matrixL_upper, Score8_T **matrixL_lower,
+ Score8_T **matrixR_upper, Score8_T **matrixR_lower,
+ Direction8_T **directionsL_upper_nogap, Direction8_T **directionsL_lower_nogap,
+ Direction8_T **directionsR_upper_nogap, Direction8_T **directionsR_lower_nogap,
+ char *gsequenceL, char *gsequenceL_alt, char *rev_gsequenceR, char *rev_gsequenceR_alt,
+ int goffsetL, int rev_goffsetR, int rlength, int glengthL, int glengthR,
+ int cdna_direction, bool watsonp, int lbandL, int ubandL, int lbandR, int ubandR,
+ double defect_rate, int canonical_reward, int leftoffset, int rightoffset,
+ Chrnum_T chrnum, Univcoord_T chroffset, Univcoord_T chrhigh,
+ bool halfp, bool finalp, bool jump_late_p) {
+ int finalscore;
+ int *left_known, *right_known;
+
+ debug(printf("Running bridge_intron_gap_8_ud\n"));
+
+ if (glengthL+1 <= 0) {
+ fprintf(stderr,"Problem with glengthL = %d\n",glengthL);
+ abort();
}
+ if (glengthR+1 <= 0) {
+ fprintf(stderr,"Problem with glengthR = %d\n",glengthR);
+ abort();
+ }
+ left_known = (int *) CALLOCA(glengthL+1,sizeof(int));
+ right_known = (int *) CALLOCA(glengthR+1,sizeof(int));
+ get_known_splicesites(left_known,right_known,glengthL,glengthR,
+ /*leftoffset*/goffsetL,/*rightoffset*/rev_goffsetR,
+ cdna_direction,watsonp,chrnum,chroffset,chrhigh);
+
+ if (novelsplicingp == false && splicing_iit != NULL && (donor_typeint < 0 || acceptor_typeint < 0)) {
+ /* Constrain to given introns */
+ finalscore = bridge_intron_gap_8_intron_level(&(*bestrL),&(*bestrR),&(*bestcL),&(*bestcR),&(*best_introntype),
+ matrixL_upper,matrixL_lower,matrixR_upper,matrixR_lower,
+ directionsL_upper_nogap,directionsL_lower_nogap,
+ directionsR_upper_nogap,directionsR_lower_nogap,
+ left_known,right_known,rlength,glengthL,glengthR,
+ cdna_direction,watsonp,lbandL,ubandL,lbandR,ubandR,
+ leftoffset,rightoffset,chrnum,chroffset,chrhigh,jump_late_p);
+ } else {
+ finalscore = bridge_intron_gap_8_site_level(&(*bestrL),&(*bestrR),&(*bestcL),&(*bestcR),
+ matrixL_upper,matrixL_lower,matrixR_upper,matrixR_lower,
+ directionsL_upper_nogap,directionsL_lower_nogap,
+ directionsR_upper_nogap,directionsR_lower_nogap,
+ gsequenceL,gsequenceL_alt,rev_gsequenceR,rev_gsequenceR_alt,goffsetL,rev_goffsetR,
+ left_known,right_known,rlength,glengthL,glengthR,
+ cdna_direction,watsonp,lbandL,ubandL,lbandR,ubandR,
+ canonical_reward,leftoffset,rightoffset,
+ chroffset,chrhigh,halfp,finalp);
+ }
+
+
+#if 0
/* Determine if result meets given constraints */
if (*finalscore < 0) {
result = false;
@@ -1457,72 +1540,291 @@ bridge_intron_gap_8_ud (int *finalscore, int *bestrL, int *bestrR, int *bestcL,
/* If novelsplicingp is false and using splicing at splice site level, result was already constrained */
result = true;
}
+#endif
-
- if (/*finalp == true &&*/ result == true) {
- get_splicesite_probs(&(*left_prob),&(*right_prob),*bestcL,*bestcR,
- left_known,right_known,leftoffset,rightoffset,chroffset,chrhigh,
- cdna_direction,watsonp);
- }
-
- debug3(printf("Returning final score of %d at (%d,%d) left to (%d,%d) right, with probs %f and %f\n",
- *finalscore,*bestrL,*bestcL,*bestrR,*bestcR,*left_prob,*right_prob));
- debug(printf("Returning final score of %d at (%d,%d) left to (%d,%d) right, with probs %f and %f\n",
- *finalscore,*bestrL,*bestcL,*bestrR,*bestcR,*left_prob,*right_prob));
+ get_splicesite_probs(&(*left_prob),&(*right_prob),*bestcL,*bestcR,
+ left_known,right_known,leftoffset,rightoffset,chroffset,chrhigh,
+ cdna_direction,watsonp);
FREEA(right_known);
FREEA(left_known);
- FREEA(rightdi);
- FREEA(leftdi);
- return result;
+#if defined(DEBUG) || defined(DEBUG3)
+ printf("Returning final score of %d at (%d,%d) left to (%d,%d) right, with probs %f and %f\n",
+ finalscore,*bestrL,*bestcL,*bestrR,*bestcR,*left_prob,*right_prob);
+#endif
+
+ return finalscore;
}
#endif
#if defined(HAVE_SSE2)
-static bool
-bridge_intron_gap_16_ud (int *finalscore, int *bestrL, int *bestrR, int *bestcL, int *bestcR,
- int *best_introntype, double *left_prob, double *right_prob,
- Score16_T **matrixL_upper, Score16_T **matrixL_lower,
- Score16_T **matrixR_upper, Score16_T **matrixR_lower,
- Direction16_T **directionsL_upper_nogap, Direction16_T **directionsL_lower_nogap,
- Direction16_T **directionsR_upper_nogap, Direction16_T **directionsR_lower_nogap,
- char *gsequenceL, char *gsequenceL_alt, char *rev_gsequenceR, char *rev_gsequenceR_alt,
- int goffsetL, int rev_goffsetR, int rlength, int glengthL, int glengthR,
- int cdna_direction, bool watsonp, int lbandL, int ubandL, int lbandR, int ubandR,
- double defect_rate, int canonical_reward, int leftoffset, int rightoffset,
- Chrnum_T chrnum, Univcoord_T chroffset, Univcoord_T chrhigh,
- bool halfp, bool finalp, bool jump_late_p) {
- bool result;
- int bestscore = NEG_INFINITY_16, score, scoreL, scoreR, scoreI;
-#if 0
- int bestscoreI = NEG_INFINITY_16;
-#endif
- int bestscore_with_prob = NEG_INFINITY_16;
+static int
+bridge_intron_gap_16_intron_level (int *bestrL, int *bestrR, int *bestcL, int *bestcR,
+ int *best_introntype,
+ Score16_T **matrixL_upper, Score16_T **matrixL_lower,
+ Score16_T **matrixR_upper, Score16_T **matrixR_lower,
+ Direction16_T **directionsL_upper_nogap, Direction16_T **directionsL_lower_nogap,
+ Direction16_T **directionsR_upper_nogap, Direction16_T **directionsR_lower_nogap,
+ int *left_known, int *right_known,
+ int rlength, int glengthL, int glengthR,
+ int cdna_direction, bool watsonp, int lbandL, int ubandL, int lbandR, int ubandR,
+ int leftoffset, int rightoffset,
+ Chrnum_T chrnum, Univcoord_T chroffset, Univcoord_T chrhigh,
+ bool jump_late_p) {
+ int rL, rR, cL, cR;
+ int cloL, chighL;
+ int cloR, chighR;
+ int bestscore = NEG_INFINITY_16, score, scoreL, scoreR;
+ Univcoord_T splicesitepos1, splicesitepos2;
+ bool bestp;
+
+
+ for (rL = 1, rR = rlength-1; rL < rlength; rL++, rR--) {
+ debug3(printf("\nGenomic insert: At row %d on left and %d on right\n",rL,rR));
+ if ((cloL = rL - lbandL) < 1) {
+ cloL = 1;
+ }
+ if ((chighL = rL + ubandL) > glengthL-1) {
+ chighL = glengthL-1;
+ }
+
+ if ((cloR = rR - lbandR) < 1) {
+ cloR = 1;
+ }
+ if ((chighR = rR + ubandR) > glengthR-1) {
+ chighR = glengthR-1;
+ }
+
+ /* Test indels on left and right */
+ for (cL = cloL; cL < /* left of main diagonal*/rL; cL++) {
+ /* The following check limits genomic inserts (horizontal) and
+ multiple cDNA inserts (vertical). */
+ if (left_known[cL] > 0) {
+ scoreL = (int) matrixL_lower[rL][cL];
+ if (directionsL_lower_nogap[rL][cL] != DIAG) {
+ /* Favor gaps away from intron if possible */
+ scoreL -= 1;
+ }
+
+ /* Disallow leftoffset + cL >= rightoffset - cR, or cR >= rightoffset - leftoffset - cL */
+ for (cR = cloR; cR < /* left of main diagonal*/rR && cR < rightoffset-leftoffset-cL; cR++) {
+ if (right_known[cR] > 0) {
+ scoreR = (int) matrixR_lower[rR][cR];
+ if (directionsR_lower_nogap[rR][cR] != DIAG) {
+ /* Favor gaps away from intron if possible */
+ scoreR -= 1;
+ }
+
+ if ((score = scoreL + scoreR) > bestscore ||
+ (score >= bestscore && jump_late_p)) { /* Use >= for jump late */
+ bestp = false;
+ if (watsonp == true) {
+ splicesitepos1 = leftoffset + cL;
+ splicesitepos2 = rightoffset - cR + 1;
+ if (IIT_exists_with_divno_signed(splicing_iit,splicing_divint_crosstable[chrnum],
+ splicesitepos1,splicesitepos2+1U,/*sign*/cdna_direction) == true) {
+ bestp = true;
+ }
+ } else {
+ splicesitepos1 = (chrhigh - chroffset) - leftoffset - cL + 1;
+ splicesitepos2 = (chrhigh - chroffset) - rightoffset + cR;
+ if (IIT_exists_with_divno_signed(splicing_iit,splicing_divint_crosstable[chrnum],
+ splicesitepos2,splicesitepos1+1U,/*sign*/-cdna_direction) == true) {
+ bestp = true;
+ }
+ }
+ if (bestp == true) {
+ debug3(printf("At %d left to %d right, score is (%d)+(%d) = %d (bestscore)\n",
+ cL,cR,scoreL,scoreR,score));
+ bestscore = score;
+ *bestrL = rL;
+ *bestrR = rR;
+ *bestcL = cL;
+ *bestcR = cR;
+ } else {
+ debug3a(printf("At %d left to %d right, score is (%d)+(%d) = %d\n",
+ cL,cR,scoreL,scoreR,score));
+ }
+ }
+ }
+ }
+
+ for (/* at main diagonal*/; cR < chighR && cR < rightoffset-leftoffset-cL; cR++) {
+ if (right_known[cR] > 0) {
+ scoreR = (int) matrixR_upper[cR][rR];
+ if (directionsR_upper_nogap[cR][rR] != DIAG) {
+ /* Favor gaps away from intron if possible */
+ scoreR -= 1;
+ }
+
+ if ((score = scoreL + scoreR) > bestscore ||
+ (score >= bestscore && jump_late_p)) { /* Use >= for jump late */
+ bestp = false;
+ if (watsonp == true) {
+ splicesitepos1 = leftoffset + cL;
+ splicesitepos2 = rightoffset - cR + 1;
+ if (IIT_exists_with_divno_signed(splicing_iit,splicing_divint_crosstable[chrnum],
+ splicesitepos1,splicesitepos2+1U,/*sign*/cdna_direction) == true) {
+ bestp = true;
+ }
+ } else {
+ splicesitepos1 = (chrhigh - chroffset) - leftoffset - cL + 1;
+ splicesitepos2 = (chrhigh - chroffset) - rightoffset + cR;
+ if (IIT_exists_with_divno_signed(splicing_iit,splicing_divint_crosstable[chrnum],
+ splicesitepos2,splicesitepos1+1U,/*sign*/-cdna_direction) == true) {
+ bestp = true;
+ }
+ }
+ if (bestp == true) {
+ debug3(printf("At %d left to %d right, score is (%d)+(%d) = %d (bestscore)\n",
+ cL,cR,scoreL,scoreR,score));
+ bestscore = score;
+ *bestrL = rL;
+ *bestrR = rR;
+ *bestcL = cL;
+ *bestcR = cR;
+ } else {
+ debug3a(printf("At %d left to %d right, score is (%d)+(%d) = %d\n",
+ cL,cR,scoreL,scoreR,score));
+ }
+ }
+ }
+ }
+ }
+ }
+
+ for (/* at main diagonal*/; cL < chighL; cL++) {
+ /* The following check limits genomic inserts (horizontal) and
+ multiple cDNA inserts (vertical). */
+ if (left_known[cL] > 0) {
+ scoreL = (int) matrixL_upper[cL][rL];
+ if (directionsL_upper_nogap[cL][rL] != DIAG) {
+ /* Favor gaps away from intron if possible */
+ scoreL -= 1;
+ }
+
+ /* Disallow leftoffset + cL >= rightoffset - cR, or cR >= rightoffset - leftoffset - cL */
+ for (cR = cloR; cR < /* left of main diagonal*/rR && cR < rightoffset-leftoffset-cL; cR++) {
+ if (right_known[cR] > 0) {
+ scoreR = (int) matrixR_lower[rR][cR];
+ if (directionsR_lower_nogap[rR][cR] != DIAG) {
+ /* Favor gaps away from intron if possible */
+ scoreR -= 1;
+ }
+
+ if ((score = scoreL + scoreR) > bestscore ||
+ (score >= bestscore && jump_late_p)) { /* Use >= for jump late */
+ bestp = false;
+ if (watsonp == true) {
+ splicesitepos1 = leftoffset + cL;
+ splicesitepos2 = rightoffset - cR + 1;
+ if (IIT_exists_with_divno_signed(splicing_iit,splicing_divint_crosstable[chrnum],
+ splicesitepos1,splicesitepos2+1U,/*sign*/cdna_direction) == true) {
+ bestp = true;
+ }
+ } else {
+ splicesitepos1 = (chrhigh - chroffset) - leftoffset - cL + 1;
+ splicesitepos2 = (chrhigh - chroffset) - rightoffset + cR;
+ if (IIT_exists_with_divno_signed(splicing_iit,splicing_divint_crosstable[chrnum],
+ splicesitepos2,splicesitepos1+1U,/*sign*/-cdna_direction) == true) {
+ bestp = true;
+ }
+ }
+ if (bestp == true) {
+ debug3(printf("At %d left to %d right, score is (%d)+(%d) = %d (bestscore)\n",
+ cL,cR,scoreL,scoreR,score));
+ bestscore = score;
+ *bestrL = rL;
+ *bestrR = rR;
+ *bestcL = cL;
+ *bestcR = cR;
+ } else {
+ debug3a(printf("At %d left to %d right, score is (%d)+(%d) = %d\n",
+ cL,cR,scoreL,scoreR,score));
+ }
+ }
+ }
+ }
+
+ for (/* at main diagonal*/; cR < chighR && cR < rightoffset-leftoffset-cL; cR++) {
+ if (right_known[cR] > 0) {
+ scoreR = (int) matrixR_upper[cR][rR];
+ if (directionsR_upper_nogap[cR][rR] != DIAG) {
+ /* Favor gaps away from intron if possible */
+ scoreR -= 1;
+ }
+
+ if ((score = scoreL + scoreR) > bestscore ||
+ (score >= bestscore && jump_late_p)) { /* Use >= for jump late */
+ bestp = false;
+ if (watsonp == true) {
+ splicesitepos1 = leftoffset + cL;
+ splicesitepos2 = rightoffset - cR + 1;
+ if (IIT_exists_with_divno_signed(splicing_iit,splicing_divint_crosstable[chrnum],
+ splicesitepos1,splicesitepos2+1U,/*sign*/cdna_direction) == true) {
+ bestp = true;
+ }
+ } else {
+ splicesitepos1 = (chrhigh - chroffset) - leftoffset - cL + 1;
+ splicesitepos2 = (chrhigh - chroffset) - rightoffset + cR;
+ if (IIT_exists_with_divno_signed(splicing_iit,splicing_divint_crosstable[chrnum],
+ splicesitepos2,splicesitepos1+1U,/*sign*/-cdna_direction) == true) {
+ bestp = true;
+ }
+ }
+ if (bestp == true) {
+ debug3(printf("At %d left to %d right, score is (%d)+(%d) = %d (bestscore)\n",
+ cL,cR,scoreL,scoreR,score));
+ bestscore = score;
+ *bestrL = rL;
+ *bestrR = rR;
+ *bestcL = cL;
+ *bestcR = cR;
+ } else {
+ debug3a(printf("At %d left to %d right, score is (%d)+(%d) = %d\n",
+ cL,cR,scoreL,scoreR,score));
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+
+ *best_introntype = NONINTRON;
+ return (int) bestscore;
+}
+
+
+/* Returns finalscore */
+static int
+bridge_intron_gap_16_site_level (int *bestrL, int *bestrR, int *bestcL, int *bestcR,
+ Score16_T **matrixL_upper, Score16_T **matrixL_lower,
+ Score16_T **matrixR_upper, Score16_T **matrixR_lower,
+ Direction16_T **directionsL_upper_nogap, Direction16_T **directionsL_lower_nogap,
+ Direction16_T **directionsR_upper_nogap, Direction16_T **directionsR_lower_nogap,
+ char *gsequenceL, char *gsequenceL_alt, char *rev_gsequenceR, char *rev_gsequenceR_alt,
+ int goffsetL, int rev_goffsetR, int *left_known, int *right_known,
+ int rlength, int glengthL, int glengthR,
+ int cdna_direction, bool watsonp, int lbandL, int ubandL, int lbandR, int ubandR,
+ int canonical_reward, int leftoffset, int rightoffset,
+ Univcoord_T chroffset, Univcoord_T chrhigh,
+ bool halfp, bool finalp) {
int rL, rR, cL, cR;
int bestrL_with_prob, bestrR_with_prob, bestcL_with_prob, bestcR_with_prob;
int cloL, chighL;
int cloR, chighR;
- char left1, left2, right2, right1, left1_alt, left2_alt, right2_alt, right1_alt;
- int *leftdi, *rightdi, introntype;
- int *left_known, *right_known;
+ int introntype;
+ int bestscore = NEG_INFINITY_16, score, scoreL, scoreR, scoreI;
+ int bestscore_with_prob = NEG_INFINITY_16;
double *left_probabilities, *right_probabilities, probL, probR, probL_trunc, probR_trunc, bestprob, bestprob_trunc;
- Univcoord_T splicesitepos, splicesitepos1, splicesitepos2;
- bool bestp;
-
-
- debug(printf("Running bridge_intron_gap_16_ud\n"));
-
- if (glengthL+1 <= 0) {
- fprintf(stderr,"Problem with glengthL = %d\n",glengthL);
- abort();
- }
+ Univcoord_T splicesitepos;
+ char left1, left2, right2, right1, left1_alt, left2_alt, right2_alt, right1_alt;
+ int *leftdi, *rightdi;
+ bool use_prob_p;
- if (glengthR+1 <= 0) {
- fprintf(stderr,"Problem with glengthR = %d\n",glengthR);
- abort();
- }
/* Read dinucleotides */
leftdi = (int *) MALLOCA((glengthL+1) * sizeof(int));
@@ -1576,535 +1878,278 @@ bridge_intron_gap_16_ud (int *finalscore, int *bestrL, int *bestrR, int *bestcL,
}
rightdi[glengthR-1] = rightdi[glengthR] = 0x00;
- left_known = (int *) CALLOCA(glengthL+1,sizeof(int));
- right_known = (int *) CALLOCA(glengthR+1,sizeof(int));
- get_known_splicesites(left_known,right_known,glengthL,glengthR,
- /*leftoffset*/goffsetL,/*rightoffset*/rev_goffsetR,
- cdna_direction,watsonp,chrnum,chroffset,chrhigh);
-
- /* Perform computations */
-#if 0
- /* Bands already computed for dynamic programming */
-#if 1
- /* Allows unlimited indel lengths */
- ubandL = glengthL - rlength + extraband_paired;
- lbandL = extraband_paired;
-
- ubandR = glengthR - rlength + extraband_paired;
- lbandR = extraband_paired;
-#else
- /* Limit indels to 3 bp around splice sites. Doesn't work on PacBio reads. */
- ubandL = 3;
- lbandL = 3;
-
- ubandR = 3;
- lbandR = 3;
-#endif
-#endif
+ left_probabilities = (double *) MALLOCA(glengthL * sizeof(double));
+ right_probabilities = (double *) MALLOCA(glengthR * sizeof(double));
- if (novelsplicingp == false && splicing_iit != NULL && (donor_typeint < 0 || acceptor_typeint < 0)) {
- /* Constrain to given introns */
- for (rL = 1, rR = rlength-1; rL < rlength; rL++, rR--) {
- debug3(printf("\nGenomic insert: At row %d on left and %d on right\n",rL,rR));
- if ((cloL = rL - lbandL) < 1) {
- cloL = 1;
- }
- if ((chighL = rL + ubandL) > glengthL-1) {
- chighL = glengthL-1;
+ debug3(printf("watsonp is %d. cdna_direction is %d\n",watsonp,cdna_direction));
+ if (watsonp == true) {
+ if (cdna_direction > 0) {
+ for (cL = 0; cL < glengthL - 1; cL++) {
+ splicesitepos = chroffset + leftoffset + cL;
+ if (left_known[cL]) {
+ left_probabilities[cL] = 1.0;
+ } else {
+ left_probabilities[cL] = Maxent_hr_donor_prob(splicesitepos,chroffset);
+ debug3(printf("left donor probability at cL %d is %f\n",cL,left_probabilities[cL]));
+ }
}
- if ((cloR = rR - lbandR) < 1) {
- cloR = 1;
- }
- if ((chighR = rR + ubandR) > glengthR-1) {
- chighR = glengthR-1;
+ for (cR = 0; cR < glengthR - 1; cR++) {
+ splicesitepos = chroffset + rightoffset - cR + 1;
+ if (right_known[cR]) {
+ right_probabilities[cR] = 1.0;
+ } else {
+ right_probabilities[cR] = Maxent_hr_acceptor_prob(splicesitepos,chroffset);
+ debug3(printf("right acceptor probability at cR %d is %f\n",cR,right_probabilities[cR]));
+ }
}
- /* Test indels on left and right */
- for (cL = cloL; cL < /* left of main diagonal*/rL; cL++) {
- /* The following check limits genomic inserts (horizontal) and
- multiple cDNA inserts (vertical). */
- if (left_known[cL] > 0) {
- scoreL = (int) matrixL_lower[rL][cL];
- if (directionsL_lower_nogap[rL][cL] != DIAG) {
- /* Favor gaps away from intron if possible */
- scoreL -= 1;
- }
-
- /* Disallow leftoffset + cL >= rightoffset - cR, or cR >= rightoffset - leftoffset - cL */
- for (cR = cloR; cR < /* left of main diagonal*/rR && cR < rightoffset-leftoffset-cL; cR++) {
- if (right_known[cR] > 0) {
- scoreR = (int) matrixR_lower[rR][cR];
- if (directionsR_lower_nogap[rR][cR] != DIAG) {
- /* Favor gaps away from intron if possible */
- scoreR -= 1;
- }
-
- if ((score = scoreL + scoreR) > bestscore ||
- (score >= bestscore && jump_late_p)) { /* Use >= for jump late */
- bestp = false;
- if (watsonp == true) {
- splicesitepos1 = leftoffset + cL;
- splicesitepos2 = rightoffset - cR + 1;
- if (IIT_exists_with_divno_signed(splicing_iit,splicing_divint_crosstable[chrnum],
- splicesitepos1,splicesitepos2+1U,/*sign*/cdna_direction) == true) {
- bestp = true;
- }
- } else {
- splicesitepos1 = (chrhigh - chroffset) - leftoffset - cL + 1;
- splicesitepos2 = (chrhigh - chroffset) - rightoffset + cR;
- if (IIT_exists_with_divno_signed(splicing_iit,splicing_divint_crosstable[chrnum],
- splicesitepos2,splicesitepos1+1U,/*sign*/-cdna_direction) == true) {
- bestp = true;
- }
- }
- if (bestp == true) {
- debug3(printf("At %d left to %d right, score is (%d)+(%d) = %d (bestscore)\n",
- cL,cR,scoreL,scoreR,score));
- bestscore = score;
- *bestrL = rL;
- *bestrR = rR;
- *bestcL = cL;
- *bestcR = cR;
- } else {
- debug3a(printf("At %d left to %d right, score is (%d)+(%d) = %d\n",
- cL,cR,scoreL,scoreR,score));
- }
- }
- }
- }
-
- for (/* at main diagonal*/; cR <= chighR && cR < rightoffset-leftoffset-cL; cR++) {
- if (right_known[cR] > 0) {
- scoreR = (int) matrixR_upper[cR][rR];
- if (directionsR_upper_nogap[cR][rR] != DIAG) {
- /* Favor gaps away from intron if possible */
- scoreR -= 1;
- }
-
- if ((score = scoreL + scoreR) > bestscore ||
- (score >= bestscore && jump_late_p)) { /* Use >= for jump late */
- bestp = false;
- if (watsonp == true) {
- splicesitepos1 = leftoffset + cL;
- splicesitepos2 = rightoffset - cR + 1;
- if (IIT_exists_with_divno_signed(splicing_iit,splicing_divint_crosstable[chrnum],
- splicesitepos1,splicesitepos2+1U,/*sign*/cdna_direction) == true) {
- bestp = true;
- }
- } else {
- splicesitepos1 = (chrhigh - chroffset) - leftoffset - cL + 1;
- splicesitepos2 = (chrhigh - chroffset) - rightoffset + cR;
- if (IIT_exists_with_divno_signed(splicing_iit,splicing_divint_crosstable[chrnum],
- splicesitepos2,splicesitepos1+1U,/*sign*/-cdna_direction) == true) {
- bestp = true;
- }
- }
- if (bestp == true) {
- debug3(printf("At %d left to %d right, score is (%d)+(%d) = %d (bestscore)\n",
- cL,cR,scoreL,scoreR,score));
- bestscore = score;
- *bestrL = rL;
- *bestrR = rR;
- *bestcL = cL;
- *bestcR = cR;
- } else {
- debug3a(printf("At %d left to %d right, score is (%d)+(%d) = %d\n",
- cL,cR,scoreL,scoreR,score));
- }
- }
- }
- }
+ } else {
+ for (cL = 0; cL < glengthL - 1; cL++) {
+ splicesitepos = chroffset + leftoffset + cL;
+ if (left_known[cL]) {
+ left_probabilities[cL] = 1.0;
+ } else {
+ left_probabilities[cL] = Maxent_hr_antiacceptor_prob(splicesitepos,chroffset);
+ debug3(printf("left antiacceptor probability at cL %d is %f\n",cL,left_probabilities[cL]));
}
}
- for (/* at main diagonal*/; cL <= chighL; cL++) {
- /* The following check limits genomic inserts (horizontal) and
- multiple cDNA inserts (vertical). */
- if (left_known[cL] > 0) {
- scoreL = (int) matrixL_upper[cL][rL];
- if (directionsL_upper_nogap[cL][rL] != DIAG) {
- /* Favor gaps away from intron if possible */
- scoreL -= 1;
- }
-
- /* Disallow leftoffset + cL >= rightoffset - cR, or cR >= rightoffset - leftoffset - cL */
- for (cR = cloR; cR < /* left of main diagonal*/rR && cR < rightoffset-leftoffset-cL; cR++) {
- if (right_known[cR] > 0) {
- scoreR = (int) matrixR_lower[rR][cR];
- if (directionsR_lower_nogap[rR][cR] != DIAG) {
- /* Favor gaps away from intron if possible */
- scoreR -= 1;
- }
-
- if ((score = scoreL + scoreR) > bestscore ||
- (score >= bestscore && jump_late_p)) { /* Use >= for jump late */
- bestp = false;
- if (watsonp == true) {
- splicesitepos1 = leftoffset + cL;
- splicesitepos2 = rightoffset - cR + 1;
- if (IIT_exists_with_divno_signed(splicing_iit,splicing_divint_crosstable[chrnum],
- splicesitepos1,splicesitepos2+1U,/*sign*/cdna_direction) == true) {
- bestp = true;
- }
- } else {
- splicesitepos1 = (chrhigh - chroffset) - leftoffset - cL + 1;
- splicesitepos2 = (chrhigh - chroffset) - rightoffset + cR;
- if (IIT_exists_with_divno_signed(splicing_iit,splicing_divint_crosstable[chrnum],
- splicesitepos2,splicesitepos1+1U,/*sign*/-cdna_direction) == true) {
- bestp = true;
- }
- }
- if (bestp == true) {
- debug3(printf("At %d left to %d right, score is (%d)+(%d) = %d (bestscore)\n",
- cL,cR,scoreL,scoreR,score));
- bestscore = score;
- *bestrL = rL;
- *bestrR = rR;
- *bestcL = cL;
- *bestcR = cR;
- } else {
- debug3a(printf("At %d left to %d right, score is (%d)+(%d) = %d\n",
- cL,cR,scoreL,scoreR,score));
- }
- }
- }
- }
-
- for (/* at main diagonal*/; cR <= chighR && cR < rightoffset-leftoffset-cL; cR++) {
- if (right_known[cR] > 0) {
- scoreR = (int) matrixR_upper[cR][rR];
- if (directionsR_upper_nogap[cR][rR] != DIAG) {
- /* Favor gaps away from intron if possible */
- scoreR -= 1;
- }
-
- if ((score = scoreL + scoreR) > bestscore ||
- (score >= bestscore && jump_late_p)) { /* Use >= for jump late */
- bestp = false;
- if (watsonp == true) {
- splicesitepos1 = leftoffset + cL;
- splicesitepos2 = rightoffset - cR + 1;
- if (IIT_exists_with_divno_signed(splicing_iit,splicing_divint_crosstable[chrnum],
- splicesitepos1,splicesitepos2+1U,/*sign*/cdna_direction) == true) {
- bestp = true;
- }
- } else {
- splicesitepos1 = (chrhigh - chroffset) - leftoffset - cL + 1;
- splicesitepos2 = (chrhigh - chroffset) - rightoffset + cR;
- if (IIT_exists_with_divno_signed(splicing_iit,splicing_divint_crosstable[chrnum],
- splicesitepos2,splicesitepos1+1U,/*sign*/-cdna_direction) == true) {
- bestp = true;
- }
- }
- if (bestp == true) {
- debug3(printf("At %d left to %d right, score is (%d)+(%d) = %d (bestscore)\n",
- cL,cR,scoreL,scoreR,score));
- bestscore = score;
- *bestrL = rL;
- *bestrR = rR;
- *bestcL = cL;
- *bestcR = cR;
- } else {
- debug3a(printf("At %d left to %d right, score is (%d)+(%d) = %d\n",
- cL,cR,scoreL,scoreR,score));
- }
- }
- }
- }
+ for (cR = 0; cR < glengthR - 1; cR++) {
+ splicesitepos = chroffset + rightoffset - cR + 1;
+ if (right_known[cR]) {
+ right_probabilities[cR] = 1.0;
+ } else {
+ right_probabilities[cR] = Maxent_hr_antidonor_prob(splicesitepos,chroffset);
+ debug3(printf("right antidonor probability at cR %d is %f\n",cR,right_probabilities[cR]));
}
}
}
- *finalscore = (int) bestscore;
- *best_introntype = NONINTRON;
-
} else {
- left_probabilities = (double *) MALLOCA(glengthL * sizeof(double));
- right_probabilities = (double *) MALLOCA(glengthR * sizeof(double));
-
- if (watsonp == true) {
- if (cdna_direction > 0) {
- for (cL = 0; cL < glengthL - 1; cL++) {
- splicesitepos = chroffset + leftoffset + cL;
- if (left_known[cL]) {
- left_probabilities[cL] = 1.0;
- } else {
- left_probabilities[cL] = Maxent_hr_donor_prob(splicesitepos,chroffset);
- }
- }
-
- for (cR = 0; cR < glengthR - 1; cR++) {
- splicesitepos = chroffset + rightoffset - cR + 1;
- if (right_known[cR]) {
- right_probabilities[cR] = 1.0;
- } else {
- right_probabilities[cR] = Maxent_hr_acceptor_prob(splicesitepos,chroffset);
- }
- }
-
- } else {
- for (cL = 0; cL < glengthL - 1; cL++) {
- splicesitepos = chroffset + leftoffset + cL;
- if (left_known[cL]) {
- left_probabilities[cL] = 1.0;
- } else {
- left_probabilities[cL] = Maxent_hr_antiacceptor_prob(splicesitepos,chroffset);
- }
+ if (cdna_direction > 0) {
+ for (cL = 0; cL < glengthL - 1; cL++) {
+ splicesitepos = chrhigh - leftoffset - cL + 1;
+ if (left_known[cL]) {
+ left_probabilities[cL] = 1.0;
+ } else {
+ left_probabilities[cL] = Maxent_hr_antidonor_prob(splicesitepos,chroffset);
+ debug3(printf("left antidonor probability at cL %d is %f\n",cL,left_probabilities[cL]));
}
+ }
- for (cR = 0; cR < glengthR - 1; cR++) {
- splicesitepos = chroffset + rightoffset - cR + 1;
- if (right_known[cR]) {
- right_probabilities[cR] = 1.0;
- } else {
- right_probabilities[cR] = Maxent_hr_antidonor_prob(splicesitepos,chroffset);
- }
+ for (cR = 0; cR < glengthR - 1; cR++) {
+ splicesitepos = chrhigh - rightoffset + cR;
+ if (right_known[cR]) {
+ right_probabilities[cR] = 1.0;
+ } else {
+ right_probabilities[cR] = Maxent_hr_antiacceptor_prob(splicesitepos,chroffset);
+ debug3(printf("right antiacceptor probability at cR %d is %f\n",cR,right_probabilities[cR]));
}
}
} else {
- if (cdna_direction > 0) {
- for (cL = 0; cL < glengthL - 1; cL++) {
- splicesitepos = chrhigh - leftoffset - cL + 1;
- if (left_known[cL]) {
- left_probabilities[cL] = 1.0;
- } else {
- left_probabilities[cL] = Maxent_hr_antidonor_prob(splicesitepos,chroffset);
- }
- }
-
- for (cR = 0; cR < glengthR - 1; cR++) {
- splicesitepos = chrhigh - rightoffset + cR;
- if (right_known[cR]) {
- right_probabilities[cR] = 1.0;
- } else {
- right_probabilities[cR] = Maxent_hr_antiacceptor_prob(splicesitepos,chroffset);
- }
- }
-
- } else {
- for (cL = 0; cL < glengthL - 1; cL++) {
- splicesitepos = chrhigh - leftoffset - cL + 1;
- if (left_known[cL]) {
- left_probabilities[cL] = 1.0;
- } else {
- left_probabilities[cL] = Maxent_hr_acceptor_prob(splicesitepos,chroffset);
- }
+ for (cL = 0; cL < glengthL - 1; cL++) {
+ splicesitepos = chrhigh - leftoffset - cL + 1;
+ if (left_known[cL]) {
+ left_probabilities[cL] = 1.0;
+ } else {
+ left_probabilities[cL] = Maxent_hr_acceptor_prob(splicesitepos,chroffset);
+ debug3(printf("left acceptor probability at cL %d is %f\n",cL,left_probabilities[cL]));
}
+ }
- for (cR = 0; cR < glengthR - 1; cR++) {
- splicesitepos = chrhigh - rightoffset + cR;
- if (right_known[cR]) {
- right_probabilities[cR] = 1.0;
- } else {
- right_probabilities[cR] = Maxent_hr_donor_prob(splicesitepos,chroffset);
- }
+ for (cR = 0; cR < glengthR - 1; cR++) {
+ splicesitepos = chrhigh - rightoffset + cR;
+ if (right_known[cR]) {
+ right_probabilities[cR] = 1.0;
+ } else {
+ right_probabilities[cR] = Maxent_hr_donor_prob(splicesitepos,chroffset);
+ debug3(printf("right donor probability at cR %d is %f\n",cR,right_probabilities[cR]));
}
}
}
+ }
- /* Search using probs and without simultaneously */
- bestscore = NEG_INFINITY_16;
- bestprob = bestprob_trunc = 0.0;
- for (rL = 1, rR = rlength-1; rL < rlength; rL++, rR--) {
- debug3(printf("\nAt row %d on left and %d on right\n",rL,rR));
- if ((cloL = rL - lbandL) < 1) {
- cloL = 1;
- }
- if ((chighL = rL + ubandL) > glengthL-1) {
- chighL = glengthL-1;
- }
-
- if ((cloR = rR - lbandR) < 1) {
- cloR = 1;
- }
- if ((chighR = rR + ubandR) > glengthR-1) {
- chighR = glengthR-1;
- }
-
-#ifdef ALLOW_DUAL_INDELS
- fprintf(stderr,"Dual indels not implemented\n");
- abort();
- /* Test indels on left and right */
- for (cL = cloL; cL <= chighL; cL++) {
- /* The following check limits genomic inserts (horizontal) and
- multiple cDNA inserts (vertical). */
- if (1) {
- probL = left_probabilities[cL];
- if (probL > PROB_CEILING) {
- probL_trunc = PROB_CEILING;
- } else if (probL < PROB_FLOOR) {
- probL_trunc = PROB_FLOOR;
- } else {
- probL_trunc = probL;
- }
- scoreL = (int) matrixL[cL][rL];
- if (directionsL_nogap[cL][rL] != DIAG) {
- /* Favor gaps away from intron if possible */
- scoreL -= 1;
- }
-
- /* Disallow leftoffset + cL >= rightoffset - cR, or cR >= rightoffset - leftoffset - cL */
- for (cR = cloR; cR <= chighR && cR < rightoffset-leftoffset-cL; cR++) {
- if (1) {
- probR = right_probabilities[cR];
- if (probR > PROB_CEILING) {
- probR_trunc = PROB_CEILING;
- } else if (probR < PROB_FLOOR) {
- probR_trunc = PROB_FLOOR;
- } else {
- probR_trunc = probR;
- }
- scoreR = (int) matrixR[cR][rR];
- if (directionsR_nogap[cR][rR] != DIAG) {
- /* Favor gaps away from intron if possible */
- scoreR -= 1;
- }
-
- scoreI = intron_score(&introntype,leftdi[cL],rightdi[cR],
- cdna_direction,canonical_reward,finalp);
+ /* Search using probs and without simultaneously */
+ bestscore = NEG_INFINITY_16;
+ bestprob = bestprob_trunc = 0.0;
+ for (rL = 1, rR = rlength-1; rL < rlength; rL++, rR--) {
+ debug3(printf("\nAt row %d on left and %d on right\n",rL,rR));
+ if ((cloL = rL - lbandL) < 1) {
+ cloL = 1;
+ }
+ if ((chighL = rL + ubandL) > glengthL-1) {
+ chighL = glengthL-1;
+ }
- if ((score = scoreL + scoreI + scoreR) > bestscore) {
- debug3(printf("No prob: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
- cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
- debug3(printf("probL %f, probR %f\n",left_probabilities[cL],right_probabilities[cR]));
- bestscore = score;
- *bestrL = rL;
- *bestrR = rR;
- *bestcL = cL;
- *bestcR = cR;
- bestprob = probL + probR;
- } else if (score == bestscore && probL + probR > bestprob) {
- debug3(printf("Improved prob: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
- cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
- debug3(printf("probL %f, probR %f\n",left_probabilities[cL],right_probabilities[cR]));
- *bestrL = rL;
- *bestrR = rR;
- *bestcL = cL;
- *bestcR = cR;
- bestprob = probL + probR;
- }
+ if ((cloR = rR - lbandR) < 1) {
+ cloR = 1;
+ }
+ if ((chighR = rR + ubandR) > glengthR-1) {
+ chighR = glengthR-1;
+ }
+ debug3(printf("A. Test no indels\n"));
+ cL = rL;
+ probL = left_probabilities[cL];
+ if (probL > PROB_CEILING) {
+ probL_trunc = PROB_CEILING;
+ } else if (probL < PROB_FLOOR) {
+ probL_trunc = PROB_FLOOR;
+ } else {
+ probL_trunc = probL;
+ }
+ scoreL = (int) matrixL_upper[cL][rL];
+
+ cR = rR;
+ probR = right_probabilities[cR];
+ if (probR > PROB_CEILING) {
+ probR_trunc = PROB_CEILING;
+ } else if (probR < PROB_FLOOR) {
+ probR_trunc = PROB_FLOOR;
+ } else {
+ probR_trunc = probR;
+ }
+ scoreR = (int) matrixR_upper[cR][rR];
- if (probL_trunc + probR_trunc < bestprob_trunc) {
- debug3a(printf("At %d left to %d right, prob is %f + %f = %f\n",
- cL,cR,probL_trunc,probR_trunc,probL_trunc+probR_trunc));
+#ifdef USE_SCOREI
+ scoreI = intron_score(&introntype,leftdi[cL],rightdi[cR],cdna_direction,canonical_reward,finalp);
+#else
+ scoreI = 0;
+#endif
+
+ if ((score = scoreL + scoreI + scoreR) > bestscore) {
+ debug3(printf("Best score: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
+ cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
+ debug3(printf("probL %f, probR %f\n",left_probabilities[cL],right_probabilities[cR]));
+ bestscore = score;
+ *bestrL = rL;
+ *bestrR = rR;
+ *bestcL = cL;
+ *bestcR = cR;
+ bestprob = probL + probR;
+ } else if (score == bestscore && probL + probR > bestprob) {
+ debug3(printf("Improved prob: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
+ cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
+ debug3(printf("probL %f, probR %f\n",left_probabilities[cL],right_probabilities[cR]));
+ *bestrL = rL;
+ *bestrR = rR;
+ *bestcL = cL;
+ *bestcR = cR;
+ bestprob = probL + probR;
+ } else {
+ debug3a(printf("Not best score: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
+ cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
+ }
+
+ if (probL_trunc + probR_trunc < bestprob_trunc) {
+ debug3a(printf("At %d left to %d right, prob is %f + %f = %f\n",
+ cL,cR,probL_trunc,probR_trunc,probL_trunc+probR_trunc));
+
+ } else if (probL_trunc + probR_trunc == bestprob_trunc) {
+ debug3(printf("At %d left to %d right, prob is %f + %f = %f\n",
+ cL,cR,probL_trunc,probR_trunc,probL_trunc+probR_trunc));
+
+ if (scoreL + scoreI + scoreR > bestscore_with_prob) {
+ debug3(printf(" (bestscore %d)\n",scoreL+scoreR));
+ bestprob_trunc = probL_trunc + probR_trunc;
+ bestcL_with_prob = cL;
+ bestcR_with_prob = cR;
+ bestrL_with_prob = rL;
+ bestrR_with_prob = rR;
+ bestscore_with_prob = scoreL + scoreI + scoreR;
+ }
+
+ } else {
+ /* probL_trunc + probR_trunc > bestprob_trunc */
+ debug3(printf("At %d left to %d right, prob is %f + %f = %f\n",
+ cL,cR,probL_trunc,probR_trunc,probL_trunc+probR_trunc));
+
+ debug3(printf(" (bestscore %d)\n",scoreL+scoreR));
+ bestprob_trunc = probL_trunc + probR_trunc;
+ bestcL_with_prob = cL;
+ bestcR_with_prob = cR;
+ bestrL_with_prob = rL;
+ bestrR_with_prob = rR;
+ bestscore_with_prob = scoreL + scoreI + scoreR;
+ }
- } else if (probL_trunc + probR_trunc == bestprob_trunc) {
- debug3(printf("At %d left to %d right, prob is %f + %f = %f\n",
- cL,cR,probL_trunc,probR_trunc,probL_trunc+probR_trunc));
- if (scoreL + scoreI + scoreR > bestscore_with_prob) {
- debug3(printf(" (bestscore %d)\n",scoreL+scoreI+scoreR));
- bestprob_trunc = probL_trunc + probR_trunc;
- bestcL_with_prob = cL;
- bestcR_with_prob = cR;
- bestrL_with_prob = rL;
- bestrR_with_prob = rR;
- bestscore_with_prob = scoreL + scoreI + scoreR;
- }
-
- } else {
- /* probL_trunc + probR_trunc > bestprob_trunc */
- debug3(printf("At %d left to %d right, prob is %f + %f = %f\n",
- cL,cR,probL_trunc,probR_trunc,probL_trunc+probR_trunc));
-
- debug3(printf(" (bestscore %d)\n",scoreL+scoreI+scoreR));
- bestprob_trunc = probL_trunc + probR_trunc;
- bestcL_with_prob = cL;
- bestcR_with_prob = cR;
- bestrL_with_prob = rL;
- bestrR_with_prob = rR;
- bestscore_with_prob = scoreL + scoreI + scoreR;
- }
- }
- }
- }
- }
+ debug3(printf("B. Test indel on right\n"));
+ /* Test indel on right */
+ cL = rL;
+ probL = left_probabilities[cL];
+ if (probL > PROB_CEILING) {
+ probL_trunc = PROB_CEILING;
+ } else if (probL < PROB_FLOOR) {
+ probL_trunc = PROB_FLOOR;
+ } else {
+ probL_trunc = probL;
+ }
+ scoreL = (int) matrixL_upper[cL][rL];
+ if (directionsL_upper_nogap[cL][rL] != DIAG) {
+ /* Favor gaps away from intron if possible */
+ scoreL -= 1;
+ }
-#else
- /* Test indel on right */
- cL = rL;
- probL = left_probabilities[cL];
- if (probL > PROB_CEILING) {
- probL_trunc = PROB_CEILING;
- } else if (probL < PROB_FLOOR) {
- probL_trunc = PROB_FLOOR;
+ /* Disallow leftoffset + cL >= rightoffset - cR, or cR >= rightoffset - leftoffset - cL */
+ for (cR = cloR; cR < /*to main diagonal*/rR && cR < rightoffset-leftoffset-cL; cR++) {
+ probR = right_probabilities[cR];
+ if (probR > PROB_CEILING) {
+ probR_trunc = PROB_CEILING;
+ } else if (probR < PROB_FLOOR) {
+ probR_trunc = PROB_FLOOR;
} else {
- probL_trunc = probL;
+ probR_trunc = probR;
}
- scoreL = (int) matrixL_upper[cL][rL];
- if (directionsL_upper_nogap[cL][rL] != DIAG) {
+ scoreR = (int) matrixR_lower[rR][cR];
+ if (directionsR_lower_nogap[rR][cR] != DIAG) {
/* Favor gaps away from intron if possible */
- scoreL -= 1;
+ scoreR -= 1;
}
-
- /* Disallow leftoffset + cL >= rightoffset - cR, or cR >= rightoffset - leftoffset - cL */
- for (cR = cloR; cR < /*to main diagonal*/rR && cR < rightoffset-leftoffset-cL; cR++) {
- probR = right_probabilities[cR];
- if (probR > PROB_CEILING) {
- probR_trunc = PROB_CEILING;
- } else if (probR < PROB_FLOOR) {
- probR_trunc = PROB_FLOOR;
- } else {
- probR_trunc = probR;
- }
- scoreR = (int) matrixR_lower[rR][cR];
- if (directionsR_lower_nogap[rR][cR] != DIAG) {
- /* Favor gaps away from intron if possible */
- scoreR -= 1;
- }
- scoreI = intron_score(&introntype,leftdi[cL],rightdi[cR],
- cdna_direction,canonical_reward,finalp);
+#ifdef USE_SCOREI
+ scoreI = intron_score(&introntype,leftdi[cL],rightdi[cR],cdna_direction,canonical_reward,finalp);
+#else
+ scoreI = 0;
+#endif
- if ((score = scoreL + scoreI + scoreR) > bestscore) {
- debug3(printf("No prob: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
- cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
- debug3(printf("probL %f, probR %f\n",left_probabilities[cL],right_probabilities[cR]));
- bestscore = score;
- *bestrL = rL;
- *bestrR = rR;
- *bestcL = cL;
- *bestcR = cR;
- bestprob = probL + probR;
- } else if (score == bestscore && probL + probR > bestprob) {
- debug3(printf("Improved prob: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
- cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
- debug3(printf("probL %f, probR %f\n",left_probabilities[cL],right_probabilities[cR]));
- *bestrL = rL;
- *bestrR = rR;
- *bestcL = cL;
- *bestcR = cR;
- bestprob = probL + probR;
- }
+ if ((score = scoreL + scoreI + scoreR) > bestscore) {
+ debug3(printf("Best score: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
+ cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
+ debug3(printf("probL %f, probR %f\n",left_probabilities[cL],right_probabilities[cR]));
+ bestscore = score;
+ *bestrL = rL;
+ *bestrR = rR;
+ *bestcL = cL;
+ *bestcR = cR;
+ bestprob = probL + probR;
+ } else if (score == bestscore && probL + probR > bestprob) {
+ debug3(printf("Improved prob: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
+ cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
+ debug3(printf("probL %f, probR %f\n",left_probabilities[cL],right_probabilities[cR]));
+ *bestrL = rL;
+ *bestrR = rR;
+ *bestcL = cL;
+ *bestcR = cR;
+ bestprob = probL + probR;
+ } else {
+ debug3a(printf("Not best score: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
+ cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
+ }
- if (probL_trunc + probR_trunc < bestprob_trunc) {
- debug3a(printf("At %d left to %d right, prob is %f + %f = %f\n",
- cL,cR,probL_trunc,probR_trunc,probL_trunc+probR_trunc));
-
- } else if (probL_trunc + probR_trunc == bestprob_trunc) {
- debug3(printf("At %d left to %d right, prob is %f + %f = %f\n",
- cL,cR,probL_trunc,probR_trunc,probL_trunc+probR_trunc));
+ if (probL_trunc + probR_trunc < bestprob_trunc) {
+ debug3a(printf("At %d left to %d right, prob is %f + %f = %f\n",
+ cL,cR,probL_trunc,probR_trunc,probL_trunc+probR_trunc));
- if (scoreL + scoreI + scoreR > bestscore_with_prob) {
- debug3(printf(" (bestscore %d)\n",scoreL+scoreI+scoreR));
- bestprob_trunc = probL_trunc + probR_trunc;
- bestcL_with_prob = cL;
- bestcR_with_prob = cR;
- bestrL_with_prob = rL;
- bestrR_with_prob = rR;
- bestscore_with_prob = scoreL + scoreI + scoreR;
- }
-
- } else {
- /* probL_trunc + probR_trunc > bestprob_trunc */
- debug3(printf("At %d left to %d right, prob is %f + %f = %f\n",
- cL,cR,probL_trunc,probR_trunc,probL_trunc+probR_trunc));
+ } else if (probL_trunc + probR_trunc == bestprob_trunc) {
+ debug3(printf("At %d left to %d right, prob is %f + %f = %f\n",
+ cL,cR,probL_trunc,probR_trunc,probL_trunc+probR_trunc));
- debug3(printf(" (bestscore %d)\n",scoreL+scoreI+scoreR));
+ if (scoreL + scoreI + scoreR > bestscore_with_prob) {
+ debug3(printf(" (bestscore %d)\n",scoreL+scoreR));
bestprob_trunc = probL_trunc + probR_trunc;
bestcL_with_prob = cL;
bestcR_with_prob = cR;
@@ -2112,71 +2157,78 @@ bridge_intron_gap_16_ud (int *finalscore, int *bestrL, int *bestrR, int *bestcL,
bestrR_with_prob = rR;
bestscore_with_prob = scoreL + scoreI + scoreR;
}
+
+ } else {
+ /* probL_trunc + probR_trunc > bestprob_trunc */
+ debug3(printf("At %d left to %d right, prob is %f + %f = %f\n",
+ cL,cR,probL_trunc,probR_trunc,probL_trunc+probR_trunc));
+
+ debug3(printf(" (bestscore %d)\n",scoreL+scoreR));
+ bestprob_trunc = probL_trunc + probR_trunc;
+ bestcL_with_prob = cL;
+ bestcR_with_prob = cR;
+ bestrL_with_prob = rL;
+ bestrR_with_prob = rR;
+ bestscore_with_prob = scoreL + scoreI + scoreR;
}
+ }
- for (/*at main diagonal*/; cR <= chighR && cR < rightoffset-leftoffset-cL; cR++) {
- probR = right_probabilities[cR];
- if (probR > PROB_CEILING) {
- probR_trunc = PROB_CEILING;
- } else if (probR < PROB_FLOOR) {
- probR_trunc = PROB_FLOOR;
- } else {
- probR_trunc = probR;
- }
- scoreR = (int) matrixR_upper[cR][rR];
- if (directionsR_upper_nogap[cR][rR] != DIAG) {
- /* Favor gaps away from intron if possible */
- scoreR -= 1;
- }
+ debug3(printf("Skip main diagonal\n"));
+ for (/*Skip main diagonal*/cR++; cR < chighR && cR < rightoffset-leftoffset-cL; cR++) {
+ probR = right_probabilities[cR];
+ if (probR > PROB_CEILING) {
+ probR_trunc = PROB_CEILING;
+ } else if (probR < PROB_FLOOR) {
+ probR_trunc = PROB_FLOOR;
+ } else {
+ probR_trunc = probR;
+ }
+ scoreR = (int) matrixR_upper[cR][rR];
+ if (directionsR_upper_nogap[cR][rR] != DIAG) {
+ /* Favor gaps away from intron if possible */
+ scoreR -= 1;
+ }
- scoreI = intron_score(&introntype,leftdi[cL],rightdi[cR],
- cdna_direction,canonical_reward,finalp);
+#ifdef USE_SCOREI
+ scoreI = intron_score(&introntype,leftdi[cL],rightdi[cR],cdna_direction,canonical_reward,finalp);
+#else
+ scoreI = 0;
+#endif
- if ((score = scoreL + scoreI + scoreR) > bestscore) {
- debug3(printf("No prob: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
- cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
- debug3(printf("probL %f, probR %f\n",left_probabilities[cL],right_probabilities[cR]));
- bestscore = score;
- *bestrL = rL;
- *bestrR = rR;
- *bestcL = cL;
- *bestcR = cR;
- bestprob = probL + probR;
- } else if (score == bestscore && probL + probR > bestprob) {
- debug3(printf("Improved prob: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
- cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
- debug3(printf("probL %f, probR %f\n",left_probabilities[cL],right_probabilities[cR]));
- *bestrL = rL;
- *bestrR = rR;
- *bestcL = cL;
- *bestcR = cR;
- bestprob = probL + probR;
- }
+ if ((score = scoreL + scoreI + scoreR) > bestscore) {
+ debug3(printf("Best score: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
+ cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
+ debug3(printf("probL %f, probR %f\n",left_probabilities[cL],right_probabilities[cR]));
+ bestscore = score;
+ *bestrL = rL;
+ *bestrR = rR;
+ *bestcL = cL;
+ *bestcR = cR;
+ bestprob = probL + probR;
+ } else if (score == bestscore && probL + probR > bestprob) {
+ debug3(printf("Improved prob: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
+ cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
+ debug3(printf("probL %f, probR %f\n",left_probabilities[cL],right_probabilities[cR]));
+ *bestrL = rL;
+ *bestrR = rR;
+ *bestcL = cL;
+ *bestcR = cR;
+ bestprob = probL + probR;
+ } else {
+ debug3a(printf("Not best score: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
+ cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
+ }
- if (probL_trunc + probR_trunc < bestprob_trunc) {
- debug3a(printf("At %d left to %d right, prob is %f + %f = %f\n",
- cL,cR,probL_trunc,probR_trunc,probL_trunc+probR_trunc));
-
- } else if (probL_trunc + probR_trunc == bestprob_trunc) {
- debug3(printf("At %d left to %d right, prob is %f + %f = %f\n",
- cL,cR,probL_trunc,probR_trunc,probL_trunc+probR_trunc));
+ if (probL_trunc + probR_trunc < bestprob_trunc) {
+ debug3a(printf("At %d left to %d right, prob is %f + %f = %f\n",
+ cL,cR,probL_trunc,probR_trunc,probL_trunc+probR_trunc));
- if (scoreL + scoreI + scoreR > bestscore_with_prob) {
- debug3(printf(" (bestscore %d)\n",scoreL+scoreI+scoreR));
- bestprob_trunc = probL_trunc + probR_trunc;
- bestcL_with_prob = cL;
- bestcR_with_prob = cR;
- bestrL_with_prob = rL;
- bestrR_with_prob = rR;
- bestscore_with_prob = scoreL + scoreI + scoreR;
- }
-
- } else {
- /* probL_trunc + probR_trunc > bestprob_trunc */
- debug3(printf("At %d left to %d right, prob is %f + %f = %f\n",
- cL,cR,probL_trunc,probR_trunc,probL_trunc+probR_trunc));
+ } else if (probL_trunc + probR_trunc == bestprob_trunc) {
+ debug3(printf("At %d left to %d right, prob is %f + %f = %f\n",
+ cL,cR,probL_trunc,probR_trunc,probL_trunc+probR_trunc));
- debug3(printf(" (bestscore %d)\n",scoreL+scoreI+scoreR));
+ if (scoreL + scoreI + scoreR > bestscore_with_prob) {
+ debug3(printf(" (bestscore %d)\n",scoreL+scoreR));
bestprob_trunc = probL_trunc + probR_trunc;
bestcL_with_prob = cL;
bestcR_with_prob = cR;
@@ -2184,89 +2236,96 @@ bridge_intron_gap_16_ud (int *finalscore, int *bestrL, int *bestrR, int *bestcL,
bestrR_with_prob = rR;
bestscore_with_prob = scoreL + scoreI + scoreR;
}
+
+ } else {
+ /* probL_trunc + probR_trunc > bestprob_trunc */
+ debug3(printf("At %d left to %d right, prob is %f + %f = %f\n",
+ cL,cR,probL_trunc,probR_trunc,probL_trunc+probR_trunc));
+
+ debug3(printf(" (bestscore %d)\n",scoreL+scoreR));
+ bestprob_trunc = probL_trunc + probR_trunc;
+ bestcL_with_prob = cL;
+ bestcR_with_prob = cR;
+ bestrL_with_prob = rL;
+ bestrR_with_prob = rR;
+ bestscore_with_prob = scoreL + scoreI + scoreR;
}
+ }
- /* Test indel on left */
- cR = rR;
- probR = right_probabilities[cR];
- if (probR > PROB_CEILING) {
- probR_trunc = PROB_CEILING;
- } else if (probR < PROB_FLOOR) {
- probR_trunc = PROB_FLOOR;
+ debug3(printf("C. Test indel on left\n"));
+ /* Test indel on left */
+ cR = rR;
+ probR = right_probabilities[cR];
+ if (probR > PROB_CEILING) {
+ probR_trunc = PROB_CEILING;
+ } else if (probR < PROB_FLOOR) {
+ probR_trunc = PROB_FLOOR;
+ } else {
+ probR_trunc = probR;
+ }
+ scoreR = (int) matrixR_upper[cR][rR];
+ if (directionsR_upper_nogap[cR][rR] != DIAG) {
+ /* Favor gaps away from intron if possible */
+ scoreR -= 1;
+ }
+
+ /* Disallow leftoffset + cL >= rightoffset - cR, or cR >= rightoffset - leftoffset - cL */
+ for (cL = cloL; cL < /*to main diagonal*/rL && cL < rightoffset-leftoffset-cR; cL++) {
+ probL = left_probabilities[cL];
+ if (probL > PROB_CEILING) {
+ probL_trunc = PROB_CEILING;
+ } else if (probL < PROB_FLOOR) {
+ probL_trunc = PROB_FLOOR;
} else {
- probR_trunc = probR;
+ probL_trunc = probL;
}
- scoreR = (int) matrixR_upper[cR][rR];
- if (directionsR_upper_nogap[cR][rR] != DIAG) {
+ scoreL = (int) matrixL_lower[rL][cL];
+ if (directionsL_lower_nogap[rL][cL] != DIAG) {
/* Favor gaps away from intron if possible */
- scoreR -= 1;
+ scoreL -= 1;
}
- /* Disallow leftoffset + cL >= rightoffset - cR, or cR >= rightoffset - leftoffset - cL */
- for (cL = cloL; cL < /*to main diagonal*/rL && cL < rightoffset-leftoffset-cR; cL++) {
- probL = left_probabilities[cL];
- if (probL > PROB_CEILING) {
- probL_trunc = PROB_CEILING;
- } else if (probL < PROB_FLOOR) {
- probL_trunc = PROB_FLOOR;
- } else {
- probL_trunc = probL;
- }
- scoreL = (int) matrixL_lower[rL][cL];
- if (directionsL_lower_nogap[rL][cL] != DIAG) {
- /* Favor gaps away from intron if possible */
- scoreL -= 1;
- }
+#ifdef USE_SCOREI
+ scoreI = intron_score(&introntype,leftdi[cL],rightdi[cR],cdna_direction,canonical_reward,finalp);
+#else
+ scoreI = 0;
+#endif
- scoreI = intron_score(&introntype,leftdi[cL],rightdi[cR],
- cdna_direction,canonical_reward,finalp);
-
- if ((score = scoreL + scoreI + scoreR) > bestscore) {
- debug3(printf("No prob: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
- cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
- debug3(printf("probL %f, probR %f\n",left_probabilities[cL],right_probabilities[cR]));
- bestscore = score;
- *bestrL = rL;
- *bestrR = rR;
- *bestcL = cL;
- *bestcR = cR;
- bestprob = probL + probR;
- } else if (score == bestscore && probL + probR > bestprob) {
- debug3(printf("Improved prob: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
- cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
- debug3(printf("probL %f, probR %f\n",left_probabilities[cL],right_probabilities[cR]));
- *bestrL = rL;
- *bestrR = rR;
- *bestcL = cL;
- *bestcR = cR;
- bestprob = probL + probR;
- }
+ if ((score = scoreL + scoreI + scoreR) > bestscore) {
+ debug3(printf("Best score: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
+ cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
+ debug3(printf("probL %f, probR %f\n",left_probabilities[cL],right_probabilities[cR]));
+ bestscore = score;
+ *bestrL = rL;
+ *bestrR = rR;
+ *bestcL = cL;
+ *bestcR = cR;
+ bestprob = probL + probR;
+ } else if (score == bestscore && probL + probR > bestprob) {
+ debug3(printf("Improved prob: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
+ cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
+ debug3(printf("probL %f, probR %f\n",left_probabilities[cL],right_probabilities[cR]));
+ *bestrL = rL;
+ *bestrR = rR;
+ *bestcL = cL;
+ *bestcR = cR;
+ bestprob = probL + probR;
+ } else {
+ debug3a(printf("Not best score: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
+ cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
+ }
- if (probL_trunc + probR_trunc < bestprob_trunc) {
- debug3a(printf("At %d left to %d right, prob is %f + %f = %f\n",
- cL,cR,probL_trunc,probR_trunc,probL_trunc+probR_trunc));
-
- } else if (probL_trunc + probR_trunc == bestprob_trunc) {
- debug3(printf("At %d left to %d right, prob is %f + %f = %f\n",
- cL,cR,probL_trunc,probR_trunc,probL_trunc+probR_trunc));
-
- if (scoreL + scoreI + scoreR > bestscore_with_prob) {
- debug3(printf(" (bestscore %d)\n",scoreL+scoreI+scoreR));
- bestprob_trunc = probL_trunc + probR_trunc;
- bestcL_with_prob = cL;
- bestcR_with_prob = cR;
- bestrL_with_prob = rL;
- bestrR_with_prob = rR;
- bestscore_with_prob = scoreL + scoreI + scoreR;
- }
+ if (probL_trunc + probR_trunc < bestprob_trunc) {
+ debug3a(printf("At %d left to %d right, prob is %f + %f = %f\n",
+ cL,cR,probL_trunc,probR_trunc,probL_trunc+probR_trunc));
- } else {
- /* probL_trunc + probR_trunc > bestprob_trunc */
- debug3(printf("At %d left to %d right, prob is %f + %f = %f\n",
- cL,cR,probL_trunc,probR_trunc,probL_trunc+probR_trunc));
+ } else if (probL_trunc + probR_trunc == bestprob_trunc) {
+ debug3(printf("At %d left to %d right, prob is %f + %f = %f\n",
+ cL,cR,probL_trunc,probR_trunc,probL_trunc+probR_trunc));
- debug3(printf(" (bestscore %d)\n",scoreL+scoreI+scoreR));
+ if (scoreL + scoreI + scoreR > bestscore_with_prob) {
+ debug3(printf(" (bestscore %d)\n",scoreL+scoreR));
bestprob_trunc = probL_trunc + probR_trunc;
bestcL_with_prob = cL;
bestcR_with_prob = cR;
@@ -2274,71 +2333,78 @@ bridge_intron_gap_16_ud (int *finalscore, int *bestrL, int *bestrR, int *bestcL,
bestrR_with_prob = rR;
bestscore_with_prob = scoreL + scoreI + scoreR;
}
+
+ } else {
+ /* probL_trunc + probR_trunc > bestprob_trunc */
+ debug3(printf("At %d left to %d right, prob is %f + %f = %f\n",
+ cL,cR,probL_trunc,probR_trunc,probL_trunc+probR_trunc));
+
+ debug3(printf(" (bestscore %d)\n",scoreL+scoreR));
+ bestprob_trunc = probL_trunc + probR_trunc;
+ bestcL_with_prob = cL;
+ bestcR_with_prob = cR;
+ bestrL_with_prob = rL;
+ bestrR_with_prob = rR;
+ bestscore_with_prob = scoreL + scoreI + scoreR;
}
+ }
- for (/*at main diagonal*/; cL <= chighL && cL < rightoffset-leftoffset-cR; cL++) {
- probL = left_probabilities[cL];
- if (probL > PROB_CEILING) {
- probL_trunc = PROB_CEILING;
- } else if (probL < PROB_FLOOR) {
- probL_trunc = PROB_FLOOR;
- } else {
- probL_trunc = probL;
- }
- scoreL = (int) matrixL_upper[cL][rL];
- if (directionsL_upper_nogap[cL][rL] != DIAG) {
- /* Favor gaps away from intron if possible */
- scoreL -= 1;
- }
+ debug3(printf("Skip main diagonal\n"));
+ for (/*Skip main diagonal*/cL++; cL < chighL && cL < rightoffset-leftoffset-cR; cL++) {
+ probL = left_probabilities[cL];
+ if (probL > PROB_CEILING) {
+ probL_trunc = PROB_CEILING;
+ } else if (probL < PROB_FLOOR) {
+ probL_trunc = PROB_FLOOR;
+ } else {
+ probL_trunc = probL;
+ }
+ scoreL = (int) matrixL_upper[cL][rL];
+ if (directionsL_upper_nogap[cL][rL] != DIAG) {
+ /* Favor gaps away from intron if possible */
+ scoreL -= 1;
+ }
- scoreI = intron_score(&introntype,leftdi[cL],rightdi[cR],
- cdna_direction,canonical_reward,finalp);
-
- if ((score = scoreL + scoreI + scoreR) > bestscore) {
- debug3(printf("No prob: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
- cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
- debug3(printf("probL %f, probR %f\n",left_probabilities[cL],right_probabilities[cR]));
- bestscore = score;
- *bestrL = rL;
- *bestrR = rR;
- *bestcL = cL;
- *bestcR = cR;
- bestprob = probL + probR;
- } else if (score == bestscore && probL + probR > bestprob) {
- debug3(printf("Improved prob: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
- cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
- debug3(printf("probL %f, probR %f\n",left_probabilities[cL],right_probabilities[cR]));
- *bestrL = rL;
- *bestrR = rR;
- *bestcL = cL;
- *bestcR = cR;
- bestprob = probL + probR;
- }
+#ifdef USE_SCOREI
+ scoreI = intron_score(&introntype,leftdi[cL],rightdi[cR],cdna_direction,canonical_reward,finalp);
+#else
+ scoreI = 0;
+#endif
+
+ if ((score = scoreL + scoreI + scoreR) > bestscore) {
+ debug3(printf("Best score: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
+ cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
+ debug3(printf("probL %f, probR %f\n",left_probabilities[cL],right_probabilities[cR]));
+ bestscore = score;
+ *bestrL = rL;
+ *bestrR = rR;
+ *bestcL = cL;
+ *bestcR = cR;
+ bestprob = probL + probR;
+ } else if (score == bestscore && probL + probR > bestprob) {
+ debug3(printf("Improved prob: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
+ cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
+ debug3(printf("probL %f, probR %f\n",left_probabilities[cL],right_probabilities[cR]));
+ *bestrL = rL;
+ *bestrR = rR;
+ *bestcL = cL;
+ *bestcR = cR;
+ bestprob = probL + probR;
+ } else {
+ debug3a(printf("Not best score: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
+ cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
+ }
- if (probL_trunc + probR_trunc < bestprob_trunc) {
- debug3a(printf("At %d left to %d right, prob is %f + %f = %f\n",
- cL,cR,probL_trunc,probR_trunc,probL_trunc+probR_trunc));
-
- } else if (probL_trunc + probR_trunc == bestprob_trunc) {
- debug3(printf("At %d left to %d right, prob is %f + %f = %f\n",
- cL,cR,probL_trunc,probR_trunc,probL_trunc+probR_trunc));
+ if (probL_trunc + probR_trunc < bestprob_trunc) {
+ debug3a(printf("At %d left to %d right, prob is %f + %f = %f\n",
+ cL,cR,probL_trunc,probR_trunc,probL_trunc+probR_trunc));
- if (scoreL + scoreI + scoreR > bestscore_with_prob) {
- debug3(printf(" (bestscore %d)\n",scoreL+scoreI+scoreR));
- bestprob_trunc = probL_trunc + probR_trunc;
- bestcL_with_prob = cL;
- bestcR_with_prob = cR;
- bestrL_with_prob = rL;
- bestrR_with_prob = rR;
- bestscore_with_prob = scoreL + scoreI + scoreR;
- }
-
- } else {
- /* probL_trunc + probR_trunc > bestprob_trunc */
- debug3(printf("At %d left to %d right, prob is %f + %f = %f\n",
- cL,cR,probL_trunc,probR_trunc,probL_trunc+probR_trunc));
+ } else if (probL_trunc + probR_trunc == bestprob_trunc) {
+ debug3(printf("At %d left to %d right, prob is %f + %f = %f\n",
+ cL,cR,probL_trunc,probR_trunc,probL_trunc+probR_trunc));
- debug3(printf(" (bestscore %d)\n",scoreL+scoreI+scoreR));
+ if (scoreL + scoreI + scoreR > bestscore_with_prob) {
+ debug3(printf(" (bestscore %d)\n",scoreL+scoreR));
bestprob_trunc = probL_trunc + probR_trunc;
bestcL_with_prob = cL;
bestcR_with_prob = cR;
@@ -2346,45 +2412,123 @@ bridge_intron_gap_16_ud (int *finalscore, int *bestrL, int *bestrR, int *bestcL,
bestrR_with_prob = rR;
bestscore_with_prob = scoreL + scoreI + scoreR;
}
+
+ } else {
+ /* probL_trunc + probR_trunc > bestprob_trunc */
+ debug3(printf("At %d left to %d right, prob is %f + %f = %f\n",
+ cL,cR,probL_trunc,probR_trunc,probL_trunc+probR_trunc));
+
+ debug3(printf(" (bestscore %d)\n",scoreL+scoreR));
+ bestprob_trunc = probL_trunc + probR_trunc;
+ bestcL_with_prob = cL;
+ bestcR_with_prob = cR;
+ bestrL_with_prob = rL;
+ bestrR_with_prob = rR;
+ bestscore_with_prob = scoreL + scoreI + scoreR;
}
-#endif
-
}
+ }
+
+ if (bestprob > 2*PROB_CEILING) {
+ /* Probability is good with best alignment, so take that */
+ debug(printf("Best alignment has good probability\n"));
+ use_prob_p = true;
+ } else if (left_probabilities[bestcL_with_prob] < PROB_CEILING && right_probabilities[bestcR_with_prob] < PROB_CEILING) {
+ /* Probability-based solution is bad, so use alignment */
+ debug(printf("Probability-based solution is bad\n"));
+ use_prob_p = false;
+ } else if (bestscore_with_prob < bestscore - 9) {
+ debug(printf("Probability-based solution requires very bad alignment\n"));
+ use_prob_p = false;
+ } else {
+ use_prob_p = true;
+ }
+ if (use_prob_p == true) {
+ /* Best alignment yields bad probability, and probability-based alignment yields good probability, so switch */
+ debug(printf("Switch to probability-based solution\n"));
debug(printf("SIMD 16. bestscore %d (bestprob %f) vs bestscore_with_prob %d (bestprob_trunc %f, actually %f and %f)\n",
bestscore,bestprob,bestscore_with_prob,bestprob_trunc,left_probabilities[bestcL_with_prob],right_probabilities[bestcR_with_prob]));
- if (bestprob > 2*PROB_CEILING) {
- /* Probability is good with best alignment, so take that */
- debug(printf("Best alignment has good probability\n"));
- } else if (left_probabilities[bestcL_with_prob] < PROB_CEILING && right_probabilities[bestcR_with_prob] < PROB_CEILING) {
- /* Probability-based solution is bad, so use alignment */
- debug(printf("Probability-based solution is bad\n"));
- } else if (bestscore_with_prob < bestscore - 9) {
- debug(printf("Probability-based solution requires very bad alignment\n"));
- } else {
- /* Best alignment yields bad probability, and probability-based alignment yields good probability, so switch */
- debug(printf("Switch to probability-based solution\n"));
- *bestcL = bestcL_with_prob;
- *bestcR = bestcR_with_prob;
- *bestrL = bestrL_with_prob;
- *bestrR = bestrR_with_prob;
- bestscore = bestscore_with_prob;
- }
+ *bestcL = bestcL_with_prob;
+ *bestcR = bestcR_with_prob;
+ *bestrL = bestrL_with_prob;
+ *bestrR = bestrR_with_prob;
+ bestscore = bestscore_with_prob;
+ }
- scoreI = intron_score(&introntype,leftdi[*bestcL],rightdi[*bestcR],
- cdna_direction,canonical_reward,finalp);
+ FREEA(rightdi);
+ FREEA(leftdi);
+ FREEA(left_probabilities);
+ FREEA(right_probabilities);
- if (halfp == true) {
- *finalscore = (int) (bestscore - scoreI/2);
- } else {
- *finalscore = (int) bestscore;
- }
+ if (halfp == true) {
+ scoreI = intron_score(&introntype,leftdi[*bestcL],rightdi[*bestcR],cdna_direction,canonical_reward,finalp);
+ return (int) (bestscore - scoreI/2);
+ } else {
+ return (int) bestscore;
+ }
+}
+
+
+
+static int
+bridge_intron_gap_16_ud (int *bestrL, int *bestrR, int *bestcL, int *bestcR,
+ int *best_introntype, double *left_prob, double *right_prob,
+ Score16_T **matrixL_upper, Score16_T **matrixL_lower,
+ Score16_T **matrixR_upper, Score16_T **matrixR_lower,
+ Direction16_T **directionsL_upper_nogap, Direction16_T **directionsL_lower_nogap,
+ Direction16_T **directionsR_upper_nogap, Direction16_T **directionsR_lower_nogap,
+ char *gsequenceL, char *gsequenceL_alt, char *rev_gsequenceR, char *rev_gsequenceR_alt,
+ int goffsetL, int rev_goffsetR, int rlength, int glengthL, int glengthR,
+ int cdna_direction, bool watsonp, int lbandL, int ubandL, int lbandR, int ubandR,
+ double defect_rate, int canonical_reward, int leftoffset, int rightoffset,
+ Chrnum_T chrnum, Univcoord_T chroffset, Univcoord_T chrhigh,
+ bool halfp, bool finalp, bool jump_late_p) {
+ int finalscore;
+ int *left_known, *right_known;
+
+ debug(printf("Running bridge_intron_gap_16_ud\n"));
+
+ if (glengthL+1 <= 0) {
+ fprintf(stderr,"Problem with glengthL = %d\n",glengthL);
+ abort();
+ }
+
+ if (glengthR+1 <= 0) {
+ fprintf(stderr,"Problem with glengthR = %d\n",glengthR);
+ abort();
+ }
+
+ left_known = (int *) CALLOCA(glengthL+1,sizeof(int));
+ right_known = (int *) CALLOCA(glengthR+1,sizeof(int));
+ get_known_splicesites(left_known,right_known,glengthL,glengthR,
+ /*leftoffset*/goffsetL,/*rightoffset*/rev_goffsetR,
+ cdna_direction,watsonp,chrnum,chroffset,chrhigh);
+
+ if (novelsplicingp == false && splicing_iit != NULL && (donor_typeint < 0 || acceptor_typeint < 0)) {
+ /* Constrain to given introns */
+ finalscore = bridge_intron_gap_16_intron_level(&(*bestrL),&(*bestrR),&(*bestcL),&(*bestcR),&(*best_introntype),
+ matrixL_upper,matrixL_lower,matrixR_upper,matrixR_lower,
+ directionsL_upper_nogap,directionsL_lower_nogap,
+ directionsR_upper_nogap,directionsR_lower_nogap,
+ left_known,right_known,rlength,glengthL,glengthR,
+ cdna_direction,watsonp,lbandL,ubandL,lbandR,ubandR,
+ leftoffset,rightoffset,chrnum,chroffset,chrhigh,jump_late_p);
- FREEA(left_probabilities);
- FREEA(right_probabilities);
+ } else {
+ finalscore = bridge_intron_gap_16_site_level(&(*bestrL),&(*bestrR),&(*bestcL),&(*bestcR),
+ matrixL_upper,matrixL_lower,matrixR_upper,matrixR_lower,
+ directionsL_upper_nogap,directionsL_lower_nogap,
+ directionsR_upper_nogap,directionsR_lower_nogap,
+ gsequenceL,gsequenceL_alt,rev_gsequenceR,rev_gsequenceR_alt,goffsetL,rev_goffsetR,
+ left_known,right_known,rlength,glengthL,glengthR,
+ cdna_direction,watsonp,lbandL,ubandL,lbandR,ubandR,
+ canonical_reward,leftoffset,rightoffset,
+ chroffset,chrhigh,halfp,finalp);
}
+#if 0
/* Determine if result meets given constraints */
if (*finalscore < 0) {
result = false;
@@ -2404,67 +2548,149 @@ bridge_intron_gap_16_ud (int *finalscore, int *bestrL, int *bestrR, int *bestcL,
/* If novelsplicingp is false and using splicing at splice site level, result was already constrained */
result = true;
}
+#endif
-
- if (/*finalp == true &&*/ result == true) {
- get_splicesite_probs(&(*left_prob),&(*right_prob),*bestcL,*bestcR,
- left_known,right_known,leftoffset,rightoffset,chroffset,chrhigh,
- cdna_direction,watsonp);
- }
-
- debug3(printf("Returning final score of %d at (%d,%d) left to (%d,%d) right, with probs %f and %f\n",
- *finalscore,*bestrL,*bestcL,*bestrR,*bestcR,*left_prob,*right_prob));
- debug(printf("Returning final score of %d at (%d,%d) left to (%d,%d) right, with probs %f and %f\n",
- *finalscore,*bestrL,*bestcL,*bestrR,*bestcR,*left_prob,*right_prob));
+ get_splicesite_probs(&(*left_prob),&(*right_prob),*bestcL,*bestcR,
+ left_known,right_known,leftoffset,rightoffset,chroffset,chrhigh,
+ cdna_direction,watsonp);
FREEA(right_known);
FREEA(left_known);
- FREEA(rightdi);
- FREEA(leftdi);
- return result;
+#if defined(DEBUG) || defined(DEBUG3)
+ printf("Returning final score of %d at (%d,%d) left to (%d,%d) right, with probs %f and %f\n",
+ finalscore,*bestrL,*bestcL,*bestrR,*bestcR,*left_prob,*right_prob);
+#endif
+
+ return finalscore;
}
#endif
+
#ifndef HAVE_SSE2
-static bool
-bridge_intron_gap (int *finalscore, int *bestrL, int *bestrR, int *bestcL, int *bestcR,
- int *best_introntype, double *left_prob, double *right_prob,
- Score32_T **matrixL, Score32_T **matrixR,
- Direction32_T **directionsL_nogap, Direction32_T **directionsR_nogap,
- char *gsequenceL, char *gsequenceL_alt, char *rev_gsequenceR, char *rev_gsequenceR_alt,
- int goffsetL, int rev_goffsetR, int rlength, int glengthL, int glengthR,
- int cdna_direction, bool watsonp, int extraband_paired, double defect_rate, int canonical_reward,
- int leftoffset, int rightoffset,
- Chrnum_T chrnum, Univcoord_T chroffset, Univcoord_T chrhigh,
- bool halfp, bool finalp, bool jump_late_p) {
- bool result;
- int bestscore = NEG_INFINITY_32, score, scoreL, scoreR, scoreI;
- /* int bestscoreI = NEG_INFINITY_32; */
- int bestscore_with_prob = NEG_INFINITY_32;
+static int
+bridge_intron_gap_intron_level (int *bestrL, int *bestrR, int *bestcL, int *bestcR,
+ int *best_introntype,
+ Score32_T **matrixL, Score32_T **matrixR,
+ Direction32_T **directionsL_nogap, Direction32_T **directionsR_nogap,
+ int *left_known, int *right_known,
+ int rlength, int glengthL, int glengthR,
+ int cdna_direction, bool watsonp, int lbandL, int ubandL, int lbandR, int ubandR,
+ int leftoffset, int rightoffset,
+ Chrnum_T chrnum, Univcoord_T chroffset, Univcoord_T chrhigh,
+ bool jump_late_p) {
int rL, rR, cL, cR;
- int bestrL_with_prob, bestrR_with_prob, bestcL_with_prob, bestcR_with_prob;
- int lbandL, ubandL, cloL, chighL;
- int lbandR, ubandR, cloR, chighR;
- char left1, left2, right2, right1, left1_alt, left2_alt, right2_alt, right1_alt;
- int *leftdi, *rightdi, introntype;
- int *left_known, *right_known;
- double *left_probabilities, *right_probabilities, probL, probR, probL_trunc, probR_trunc, bestprob, bestprob_trunc;
- Univcoord_T splicesitepos, splicesitepos1, splicesitepos2;
+ int cloL, chighL;
+ int cloR, chighR;
+ int bestscore = NEG_INFINITY_32, score, scoreL, scoreR;
+ Univcoord_T splicesitepos1, splicesitepos2;
bool bestp;
- debug(printf("Running bridge_intron_gap\n"));
- if (glengthL+1 <= 0) {
- fprintf(stderr,"Problem with glengthL = %d\n",glengthL);
- abort();
- }
+ for (rL = 1, rR = rlength-1; rL < rlength; rL++, rR--) {
+ debug3(printf("\nGenomic insert: At row %d on left and %d on right\n",rL,rR));
+ if ((cloL = rL - lbandL) < 1) {
+ cloL = 1;
+ }
+ if ((chighL = rL + ubandL) > glengthL-1) {
+ chighL = glengthL-1;
+ }
- if (glengthR+1 <= 0) {
- fprintf(stderr,"Problem with glengthR = %d\n",glengthR);
- abort();
+ if ((cloR = rR - lbandR) < 1) {
+ cloR = 1;
+ }
+ if ((chighR = rR + ubandR) > glengthR-1) {
+ chighR = glengthR-1;
+ }
+
+ /* Test indels on left and right */
+ for (cL = cloL; cL < chighL; cL++) {
+ /* The following check limits genomic inserts (horizontal) and
+ multiple cDNA inserts (vertical). */
+ if (left_known[cL] > 0) {
+ scoreL = (int) matrixL[cL][rL];
+ if (directionsL_nogap[cL][rL] != DIAG) {
+ /* Favor gaps away from intron if possible */
+ scoreL -= 1;
+ }
+
+ /* Disallow leftoffset + cL >= rightoffset - cR, or cR >= rightoffset - leftoffset - cL */
+ for (cR = cloR; cR < chighR && cR < rightoffset-leftoffset-cL; cR++) {
+ if (right_known[cR] > 0) {
+ scoreR = (int) matrixR[cR][rR];
+ if (directionsR_nogap[cR][rR] != DIAG) {
+ /* Favor gaps away from intron if possible */
+ scoreR -= 1;
+ }
+
+ if ((score = scoreL + scoreR) > bestscore ||
+ (score >= bestscore && jump_late_p)) { /* Use >= for jump late */
+ bestp = false;
+ if (watsonp == true) {
+ splicesitepos1 = leftoffset + cL;
+ splicesitepos2 = rightoffset - cR + 1;
+ if (IIT_exists_with_divno_signed(splicing_iit,splicing_divint_crosstable[chrnum],
+ splicesitepos1,splicesitepos2+1U,/*sign*/cdna_direction) == true) {
+ bestp = true;
+ }
+ } else {
+ splicesitepos1 = (chrhigh - chroffset) - leftoffset - cL + 1;
+ splicesitepos2 = (chrhigh - chroffset) - rightoffset + cR;
+ if (IIT_exists_with_divno_signed(splicing_iit,splicing_divint_crosstable[chrnum],
+ splicesitepos2,splicesitepos1+1U,/*sign*/-cdna_direction) == true) {
+ bestp = true;
+ }
+ }
+ if (bestp == true) {
+ debug3(printf("At %d left to %d right, score is (%d)+(%d) = %d (bestscore)\n",
+ cL,cR,scoreL,scoreR,score));
+ bestscore = score;
+ *bestrL = rL;
+ *bestrR = rR;
+ *bestcL = cL;
+ *bestcR = cR;
+ } else {
+ debug3a(printf("At %d left to %d right, score is (%d)+(%d) = %d\n",
+ cL,cR,scoreL,scoreR,score));
+ }
+ }
+ }
+ }
+ }
+ }
}
+ *best_introntype = NONINTRON;
+ return (int) bestscore;
+}
+
+
+/* Returns finalscore */
+static int
+bridge_intron_gap_site_level (int *bestrL, int *bestrR, int *bestcL, int *bestcR,
+ Score32_T **matrixL, Score32_T **matrixR,
+ Direction32_T **directionsL_nogap, Direction32_T **directionsR_nogap,
+ char *gsequenceL, char *gsequenceL_alt, char *rev_gsequenceR, char *rev_gsequenceR_alt,
+ int goffsetL, int rev_goffsetR, int *left_known, int *right_known,
+ int rlength, int glengthL, int glengthR,
+ int cdna_direction, bool watsonp, int lbandL, int ubandL, int lbandR, int ubandR,
+ int canonical_reward, int leftoffset, int rightoffset,
+ Univcoord_T chroffset, Univcoord_T chrhigh,
+ bool halfp, bool finalp) {
+ int rL, rR, cL, cR;
+ int bestrL_with_prob, bestrR_with_prob, bestcL_with_prob, bestcR_with_prob;
+ int cloL, chighL;
+ int cloR, chighR;
+ int introntype;
+ int bestscore = NEG_INFINITY_32, score, scoreL, scoreR, scoreI;
+ int bestscore_with_prob = NEG_INFINITY_32;
+ double *left_probabilities, *right_probabilities, probL, probR, probL_trunc, probR_trunc, bestprob, bestprob_trunc;
+ Univcoord_T splicesitepos;
+ char left1, left2, right2, right1, left1_alt, left2_alt, right2_alt, right1_alt;
+ int *leftdi, *rightdi;
+ bool use_prob_p;
+
+
/* Read dinucleotides */
leftdi = (int *) MALLOCA((glengthL+1) * sizeof(int));
rightdi = (int *) MALLOCA((glengthR+1) * sizeof(int));
@@ -2506,401 +2732,227 @@ bridge_intron_gap (int *finalscore, int *bestrL, int *bestrR, int *bestcL, int *
} else if ((right2 == 'A' || right2_alt == 'A') && (right1 == 'C' || right1_alt == 'C')) {
rightdi[cR] = RIGHT_AC;
#ifndef PMAP
- } else if ((right2 == 'G' || right2_alt == 'G') && (right1 == 'C' || right1_alt == 'C')) {
- rightdi[cR] = RIGHT_GC;
- } else if ((right2 == 'A' || right2_alt == 'A') && (right1 == 'T' || right1_alt == 'T')) {
- rightdi[cR] = RIGHT_AT;
-#endif
- } else {
- rightdi[cR] = 0x00;
- }
- }
- rightdi[glengthR-1] = rightdi[glengthR] = 0x00;
-
- left_known = (int *) CALLOCA(glengthL+1,sizeof(int));
- right_known = (int *) CALLOCA(glengthR+1,sizeof(int));
- get_known_splicesites(left_known,right_known,glengthL,glengthR,
- /*leftoffset*/goffsetL,/*rightoffset*/rev_goffsetR,
- cdna_direction,watsonp,chrnum,chroffset,chrhigh);
-
- /* Perform computations */
-#if 1
- /* Allows unlimited indel lengths */
- ubandL = glengthL - rlength + extraband_paired;
- lbandL = extraband_paired;
-
- ubandR = glengthR - rlength + extraband_paired;
- lbandR = extraband_paired;
-#else
- /* Limit indels to 3 bp around splice sites. Doesn't work on PacBio reads. */
- ubandL = 3;
- lbandL = 3;
-
- ubandR = 3;
- lbandR = 3;
-#endif
-
-
- if (novelsplicingp == false && splicing_iit != NULL && (donor_typeint < 0 || acceptor_typeint < 0)) {
- /* Constrain to given introns */
- for (rL = 1, rR = rlength-1; rL < rlength; rL++, rR--) {
- debug3(printf("\nGenomic insert: At row %d on left and %d on right\n",rL,rR));
- if ((cloL = rL - lbandL) < 1) {
- cloL = 1;
- }
- if ((chighL = rL + ubandL) > glengthL-1) {
- chighL = glengthL-1;
- }
-
- if ((cloR = rR - lbandR) < 1) {
- cloR = 1;
- }
- if ((chighR = rR + ubandR) > glengthR-1) {
- chighR = glengthR-1;
- }
-
- /* Test indels on left and right */
- for (cL = cloL; cL <= chighL; cL++) {
- /* The following check limits genomic inserts (horizontal) and
- multiple cDNA inserts (vertical). */
- if (left_known[cL] > 0) {
- scoreL = (int) matrixL[cL][rL];
- if (directionsL_nogap[cL][rL] != DIAG) {
- /* Favor gaps away from intron if possible */
- scoreL -= 1;
- }
-
- /* Disallow leftoffset + cL >= rightoffset - cR, or cR >= rightoffset - leftoffset - cL */
- for (cR = cloR; cR <= chighR && cR < rightoffset-leftoffset-cL; cR++) {
- if (right_known[cR] > 0) {
- scoreR = (int) matrixR[cR][rR];
- if (directionsR_nogap[cR][rR] != DIAG) {
- /* Favor gaps away from intron if possible */
- scoreR -= 1;
- }
-
- if ((score = scoreL + scoreR) > bestscore ||
- (score >= bestscore && jump_late_p)) { /* Use >= for jump late */
- bestp = false;
- if (watsonp == true) {
- splicesitepos1 = leftoffset + cL;
- splicesitepos2 = rightoffset - cR + 1;
- if (IIT_exists_with_divno_signed(splicing_iit,splicing_divint_crosstable[chrnum],
- splicesitepos1,splicesitepos2+1U,/*sign*/cdna_direction) == true) {
- bestp = true;
- }
- } else {
- splicesitepos1 = (chrhigh - chroffset) - leftoffset - cL + 1;
- splicesitepos2 = (chrhigh - chroffset) - rightoffset + cR;
- if (IIT_exists_with_divno_signed(splicing_iit,splicing_divint_crosstable[chrnum],
- splicesitepos2,splicesitepos1+1U,/*sign*/-cdna_direction) == true) {
- bestp = true;
- }
- }
- if (bestp == true) {
- debug3(printf("At %d left to %d right, score is (%d)+(%d) = %d (bestscore)\n",
- cL,cR,scoreL,scoreR,score));
- bestscore = score;
- *bestrL = rL;
- *bestrR = rR;
- *bestcL = cL;
- *bestcR = cR;
- } else {
- debug3a(printf("At %d left to %d right, score is (%d)+(%d) = %d\n",
- cL,cR,scoreL,scoreR,score));
- }
- }
- }
- }
- }
- }
+ } else if ((right2 == 'G' || right2_alt == 'G') && (right1 == 'C' || right1_alt == 'C')) {
+ rightdi[cR] = RIGHT_GC;
+ } else if ((right2 == 'A' || right2_alt == 'A') && (right1 == 'T' || right1_alt == 'T')) {
+ rightdi[cR] = RIGHT_AT;
+#endif
+ } else {
+ rightdi[cR] = 0x00;
}
+ }
+ rightdi[glengthR-1] = rightdi[glengthR] = 0x00;
- *finalscore = (int) bestscore;
- *best_introntype = NONINTRON;
-
- } else {
- left_probabilities = (double *) MALLOCA(glengthL * sizeof(double));
- right_probabilities = (double *) MALLOCA(glengthR * sizeof(double));
-
- if (watsonp == true) {
- if (cdna_direction > 0) {
- for (cL = 0; cL < glengthL - 1; cL++) {
- splicesitepos = chroffset + leftoffset + cL;
- if (left_known[cL]) {
- left_probabilities[cL] = 1.0;
- } else {
- left_probabilities[cL] = Maxent_hr_donor_prob(splicesitepos,chroffset);
- }
- }
- for (cR = 0; cR < glengthR - 1; cR++) {
- splicesitepos = chroffset + rightoffset - cR + 1;
- if (right_known[cR]) {
- right_probabilities[cR] = 1.0;
- } else {
- right_probabilities[cR] = Maxent_hr_acceptor_prob(splicesitepos,chroffset);
- }
- }
+ left_probabilities = (double *) MALLOCA(glengthL * sizeof(double));
+ right_probabilities = (double *) MALLOCA(glengthR * sizeof(double));
- } else {
- for (cL = 0; cL < glengthL - 1; cL++) {
- splicesitepos = chroffset + leftoffset + cL;
- if (left_known[cL]) {
- left_probabilities[cL] = 1.0;
- } else {
- left_probabilities[cL] = Maxent_hr_antiacceptor_prob(splicesitepos,chroffset);
- }
+ debug3(printf("watsonp is %d. cdna_direction is %d\n",watsonp,cdna_direction));
+ if (watsonp == true) {
+ if (cdna_direction > 0) {
+ for (cL = 0; cL < glengthL - 1; cL++) {
+ splicesitepos = chroffset + leftoffset + cL;
+ if (left_known[cL]) {
+ left_probabilities[cL] = 1.0;
+ } else {
+ left_probabilities[cL] = Maxent_hr_donor_prob(splicesitepos,chroffset);
+ debug3(printf("left donor probability at cL %d is %f\n",cL,left_probabilities[cL]));
}
+ }
- for (cR = 0; cR < glengthR - 1; cR++) {
- splicesitepos = chroffset + rightoffset - cR + 1;
- if (right_known[cR]) {
- right_probabilities[cR] = 1.0;
- } else {
- right_probabilities[cR] = Maxent_hr_antidonor_prob(splicesitepos,chroffset);
- }
+ for (cR = 0; cR < glengthR - 1; cR++) {
+ splicesitepos = chroffset + rightoffset - cR + 1;
+ if (right_known[cR]) {
+ right_probabilities[cR] = 1.0;
+ } else {
+ right_probabilities[cR] = Maxent_hr_acceptor_prob(splicesitepos,chroffset);
+ debug3(printf("right acceptor probability at cR %d is %f\n",cR,right_probabilities[cR]));
}
}
} else {
- if (cdna_direction > 0) {
- for (cL = 0; cL < glengthL - 1; cL++) {
- splicesitepos = chrhigh - leftoffset - cL + 1;
- if (left_known[cL]) {
- left_probabilities[cL] = 1.0;
- } else {
- left_probabilities[cL] = Maxent_hr_antidonor_prob(splicesitepos,chroffset);
- }
+ for (cL = 0; cL < glengthL - 1; cL++) {
+ splicesitepos = chroffset + leftoffset + cL;
+ if (left_known[cL]) {
+ left_probabilities[cL] = 1.0;
+ } else {
+ left_probabilities[cL] = Maxent_hr_antiacceptor_prob(splicesitepos,chroffset);
+ debug3(printf("left antiacceptor probability at cL %d is %f\n",cL,left_probabilities[cL]));
}
+ }
- for (cR = 0; cR < glengthR - 1; cR++) {
- splicesitepos = chrhigh - rightoffset + cR;
- if (right_known[cR]) {
- right_probabilities[cR] = 1.0;
- } else {
- right_probabilities[cR] = Maxent_hr_antiacceptor_prob(splicesitepos,chroffset);
- }
+ for (cR = 0; cR < glengthR - 1; cR++) {
+ splicesitepos = chroffset + rightoffset - cR + 1;
+ if (right_known[cR]) {
+ right_probabilities[cR] = 1.0;
+ } else {
+ right_probabilities[cR] = Maxent_hr_antidonor_prob(splicesitepos,chroffset);
+ debug3(printf("right antidonor probability at cR %d is %f\n",cR,right_probabilities[cR]));
}
+ }
+ }
- } else {
- for (cL = 0; cL < glengthL - 1; cL++) {
- splicesitepos = chrhigh - leftoffset - cL + 1;
- if (left_known[cL]) {
- left_probabilities[cL] = 1.0;
- } else {
- left_probabilities[cL] = Maxent_hr_acceptor_prob(splicesitepos,chroffset);
- }
+ } else {
+ if (cdna_direction > 0) {
+ for (cL = 0; cL < glengthL - 1; cL++) {
+ splicesitepos = chrhigh - leftoffset - cL + 1;
+ if (left_known[cL]) {
+ left_probabilities[cL] = 1.0;
+ } else {
+ left_probabilities[cL] = Maxent_hr_antidonor_prob(splicesitepos,chroffset);
+ debug3(printf("left antidonor probability at cL %d is %f\n",cL,left_probabilities[cL]));
}
+ }
- for (cR = 0; cR < glengthR - 1; cR++) {
- splicesitepos = chrhigh - rightoffset + cR;
- if (right_known[cR]) {
- right_probabilities[cR] = 1.0;
- } else {
- right_probabilities[cR] = Maxent_hr_donor_prob(splicesitepos,chroffset);
- }
+ for (cR = 0; cR < glengthR - 1; cR++) {
+ splicesitepos = chrhigh - rightoffset + cR;
+ if (right_known[cR]) {
+ right_probabilities[cR] = 1.0;
+ } else {
+ right_probabilities[cR] = Maxent_hr_antiacceptor_prob(splicesitepos,chroffset);
+ debug3(printf("right antiacceptor probability at cR %d is %f\n",cR,right_probabilities[cR]));
}
}
- }
- /* Search using probs and without simultaneously */
- bestscore = NEG_INFINITY_16;
- bestprob = bestprob_trunc = 0.0;
- for (rL = 1, rR = rlength-1; rL < rlength; rL++, rR--) {
- debug3(printf("\nAt row %d on left and %d on right\n",rL,rR));
- if ((cloL = rL - lbandL) < 1) {
- cloL = 1;
- }
- if ((chighL = rL + ubandL) > glengthL-1) {
- chighL = glengthL-1;
+ } else {
+ for (cL = 0; cL < glengthL - 1; cL++) {
+ splicesitepos = chrhigh - leftoffset - cL + 1;
+ if (left_known[cL]) {
+ left_probabilities[cL] = 1.0;
+ } else {
+ left_probabilities[cL] = Maxent_hr_acceptor_prob(splicesitepos,chroffset);
+ debug3(printf("left acceptor probability at cL %d is %f\n",cL,left_probabilities[cL]));
+ }
}
- if ((cloR = rR - lbandR) < 1) {
- cloR = 1;
- }
- if ((chighR = rR + ubandR) > glengthR-1) {
- chighR = glengthR-1;
+ for (cR = 0; cR < glengthR - 1; cR++) {
+ splicesitepos = chrhigh - rightoffset + cR;
+ if (right_known[cR]) {
+ right_probabilities[cR] = 1.0;
+ } else {
+ right_probabilities[cR] = Maxent_hr_donor_prob(splicesitepos,chroffset);
+ debug3(printf("right donor probability at cR %d is %f\n",cR,right_probabilities[cR]));
+ }
}
+ }
+ }
-#ifdef ALLOW_DUAL_INDELS
- /* Test indels on left and right */
- for (cL = cloL; cL <= chighL; cL++) {
- /* The following check limits genomic inserts (horizontal) and
- multiple cDNA inserts (vertical). */
- if (1) {
- probL = left_probabilities[cL];
- if (probL > PROB_CEILING) {
- probL_trunc = PROB_CEILING;
- } else if (probL < PROB_FLOOR) {
- probL_trunc = PROB_FLOOR;
- } else {
- probL_trunc = probL;
- }
- scoreL = (int) matrixL[cL][rL];
- if (directionsL_nogap[cL][rL] != DIAG) {
- /* Favor gaps away from intron if possible */
- scoreL -= 1;
- }
-
- /* Disallow leftoffset + cL >= rightoffset - cR, or cR >= rightoffset - leftoffset - cL */
- for (cR = cloR; cR <= chighR && cR < rightoffset-leftoffset-cL; cR++) {
- if (1) {
- probR = right_probabilities[cR];
- if (probR > PROB_CEILING) {
- probR_trunc = PROB_CEILING;
- } else if (probR < PROB_FLOOR) {
- probR_trunc = PROB_FLOOR;
- } else {
- probR_trunc = probR;
- }
- scoreR = (int) matrixR[cR][rR];
- if (directionsR_nogap[cR][rR] != DIAG) {
- /* Favor gaps away from intron if possible */
- scoreR -= 1;
- }
-
- scoreI = intron_score(&introntype,leftdi[cL],rightdi[cR],
- cdna_direction,canonical_reward,finalp);
-
- if ((score = scoreL + scoreI + scoreR) > bestscore) {
- debug3(printf("No prob: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
- cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
- debug3(printf("probL %f, probR %f\n",left_probabilities[cL],right_probabilities[cR]));
- bestscore = score;
- *bestrL = rL;
- *bestrR = rR;
- *bestcL = cL;
- *bestcR = cR;
- bestprob = probL + probR;
- } else if (score == bestscore && probL + probR > bestprob) {
- debug3(printf("Improved prob: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
- cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
- debug3(printf("probL %f, probR %f\n",left_probabilities[cL],right_probabilities[cR]));
- *bestrL = rL;
- *bestrR = rR;
- *bestcL = cL;
- *bestcR = cR;
- bestprob = probL + probR;
- }
+ /* Search using probs and without simultaneously */
+ bestscore = NEG_INFINITY_32;
+ bestprob = bestprob_trunc = 0.0;
+ for (rL = 1, rR = rlength-1; rL < rlength; rL++, rR--) {
+ debug3(printf("\nAt row %d on left and %d on right\n",rL,rR));
+ if ((cloL = rL - lbandL) < 1) {
+ cloL = 1;
+ }
+ if ((chighL = rL + ubandL) > glengthL-1) {
+ chighL = glengthL-1;
+ }
+ if ((cloR = rR - lbandR) < 1) {
+ cloR = 1;
+ }
+ if ((chighR = rR + ubandR) > glengthR-1) {
+ chighR = glengthR-1;
+ }
- if (probL_trunc + probR_trunc < bestprob_trunc) {
- debug3a(printf("At %d left to %d right, prob is %f + %f = %f\n",
- cL,cR,probL_trunc,probR_trunc,probL_trunc+probR_trunc));
+ debug3(printf("A. Test no indels\n"));
+ cL = rL;
+ probL = left_probabilities[cL];
+ if (probL > PROB_CEILING) {
+ probL_trunc = PROB_CEILING;
+ } else if (probL < PROB_FLOOR) {
+ probL_trunc = PROB_FLOOR;
+ } else {
+ probL_trunc = probL;
+ }
+ scoreL = (int) matrixL[cL][rL];
+
+ cR = rR;
+ probR = right_probabilities[cR];
+ if (probR > PROB_CEILING) {
+ probR_trunc = PROB_CEILING;
+ } else if (probR < PROB_FLOOR) {
+ probR_trunc = PROB_FLOOR;
+ } else {
+ probR_trunc = probR;
+ }
+ scoreR = (int) matrixR[cR][rR];
- } else if (probL_trunc + probR_trunc == bestprob_trunc) {
- debug3(printf("At %d left to %d right, prob is %f + %f = %f\n",
- cL,cR,probL_trunc,probR_trunc,probL_trunc+probR_trunc));
- if (scoreL + scoreI + scoreR > bestscore_with_prob) {
- debug3(printf(" (bestscore %d)\n",scoreL+scoreI+scoreR));
- bestprob_trunc = probL_trunc + probR_trunc;
- bestcL_with_prob = cL;
- bestcR_with_prob = cR;
- bestrL_with_prob = rL;
- bestrR_with_prob = rR;
- bestscore_with_prob = scoreL + scoreI + scoreR;
- }
+ debug3(printf("B. Test indel on right\n"));
+ /* Test indel on right */
+ cL = rL;
+ probL = left_probabilities[cL];
+ if (probL > PROB_CEILING) {
+ probL_trunc = PROB_CEILING;
+ } else if (probL < PROB_FLOOR) {
+ probL_trunc = PROB_FLOOR;
+ } else {
+ probL_trunc = probL;
+ }
+ scoreL = (int) matrixL[cL][rL];
+ if (directionsL_nogap[cL][rL] != DIAG) {
+ /* Favor gaps away from intron if possible */
+ scoreL -= 1;
+ }
- } else {
- /* probL_trunc + probR_trunc > bestprob_trunc */
- debug3(printf("At %d left to %d right, prob is %f + %f = %f\n",
- cL,cR,probL_trunc,probR_trunc,probL_trunc+probR_trunc));
-
- debug3(printf(" (bestscore %d)\n",scoreL+scoreI+scoreR));
- bestprob_trunc = probL_trunc + probR_trunc;
- bestcL_with_prob = cL;
- bestcR_with_prob = cR;
- bestrL_with_prob = rL;
- bestrR_with_prob = rR;
- bestscore_with_prob = scoreL + scoreI + scoreR;
- }
- }
- }
- }
- }
-#else
- /* Test indel on right */
- cL = rL;
- probL = left_probabilities[cL];
- if (probL > PROB_CEILING) {
- probL_trunc = PROB_CEILING;
- } else if (probL < PROB_FLOOR) {
- probL_trunc = PROB_FLOOR;
+ /* Disallow leftoffset + cL >= rightoffset - cR, or cR >= rightoffset - leftoffset - cL */
+ for (cR = cloR; cR < chighR && cR < rightoffset-leftoffset-cL; cR++) {
+ probR = right_probabilities[cR];
+ if (probR > PROB_CEILING) {
+ probR_trunc = PROB_CEILING;
+ } else if (probR < PROB_FLOOR) {
+ probR_trunc = PROB_FLOOR;
} else {
- probL_trunc = probL;
+ probR_trunc = probR;
}
- scoreL = (int) matrixL[cL][rL];
- if (directionsL_nogap[cL][rL] != DIAG) {
+ scoreR = (int) matrixR[cR][rR];
+ if (directionsR_nogap[cR][rR] != DIAG) {
/* Favor gaps away from intron if possible */
- scoreL -= 1;
+ scoreR -= 1;
}
-
- /* Disallow leftoffset + cL >= rightoffset - cR, or cR >= rightoffset - leftoffset - cL */
- for (cR = cloR; cR <= chighR && cR < rightoffset-leftoffset-cL; cR++) {
- probR = right_probabilities[cR];
- if (probR > PROB_CEILING) {
- probR_trunc = PROB_CEILING;
- } else if (probR < PROB_FLOOR) {
- probR_trunc = PROB_FLOOR;
- } else {
- probR_trunc = probR;
- }
- scoreR = (int) matrixR[cR][rR];
- if (directionsR_nogap[cR][rR] != DIAG) {
- /* Favor gaps away from intron if possible */
- scoreR -= 1;
- }
- scoreI = intron_score(&introntype,leftdi[cL],rightdi[cR],
- cdna_direction,canonical_reward,finalp);
+#ifdef USE_SCOREI
+ scoreI = intron_score(&introntype,leftdi[cL],rightdi[cR],cdna_direction,canonical_reward,finalp);
+#else
+ scoreI = 0;
+#endif
- if ((score = scoreL + scoreI + scoreR) > bestscore) {
- debug3(printf("No prob: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
- cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
- debug3(printf("probL %f, probR %f\n",left_probabilities[cL],right_probabilities[cR]));
- bestscore = score;
- *bestrL = rL;
- *bestrR = rR;
- *bestcL = cL;
- *bestcR = cR;
- bestprob = probL + probR;
- } else if (score == bestscore && probL + probR > bestprob) {
- debug3(printf("Improved prob: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
- cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
- debug3(printf("probL %f, probR %f\n",left_probabilities[cL],right_probabilities[cR]));
- *bestrL = rL;
- *bestrR = rR;
- *bestcL = cL;
- *bestcR = cR;
- bestprob = probL + probR;
- }
-
- if (probL_trunc + probR_trunc < bestprob_trunc) {
- debug3a(printf("At %d left to %d right, prob is %f + %f = %f\n",
- cL,cR,probL_trunc,probR_trunc,probL_trunc+probR_trunc));
-
- } else if (probL_trunc + probR_trunc == bestprob_trunc) {
- debug3(printf("At %d left to %d right, prob is %f + %f = %f\n",
- cL,cR,probL_trunc,probR_trunc,probL_trunc+probR_trunc));
-
- if (scoreL + scoreI + scoreR > bestscore_with_prob) {
- debug3(printf(" (bestscore %d)\n",scoreL+scoreI+scoreR));
- bestprob_trunc = probL_trunc + probR_trunc;
- bestcL_with_prob = cL;
- bestcR_with_prob = cR;
- bestrL_with_prob = rL;
- bestrR_with_prob = rR;
- bestscore_with_prob = scoreL + scoreI + scoreR;
- }
+ if ((score = scoreL + scoreI + scoreR) > bestscore) {
+ debug3(printf("Best score: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
+ cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
+ debug3(printf("probL %f, probR %f\n",left_probabilities[cL],right_probabilities[cR]));
+ bestscore = score;
+ *bestrL = rL;
+ *bestrR = rR;
+ *bestcL = cL;
+ *bestcR = cR;
+ bestprob = probL + probR;
+ } else if (score == bestscore && probL + probR > bestprob) {
+ debug3(printf("Improved prob: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
+ cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
+ debug3(printf("probL %f, probR %f\n",left_probabilities[cL],right_probabilities[cR]));
+ *bestrL = rL;
+ *bestrR = rR;
+ *bestcL = cL;
+ *bestcR = cR;
+ bestprob = probL + probR;
+ } else {
+ debug3a(printf("Not best score: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
+ cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
+ }
- } else {
- /* probL_trunc + probR_trunc > bestprob_trunc */
- debug3(printf("At %d left to %d right, prob is %f + %f = %f\n",
- cL,cR,probL_trunc,probR_trunc,probL_trunc+probR_trunc));
+ if (probL_trunc + probR_trunc < bestprob_trunc) {
+ debug3a(printf("At %d left to %d right, prob is %f + %f = %f\n",
+ cL,cR,probL_trunc,probR_trunc,probL_trunc+probR_trunc));
+
+ } else if (probL_trunc + probR_trunc == bestprob_trunc) {
+ debug3(printf("At %d left to %d right, prob is %f + %f = %f\n",
+ cL,cR,probL_trunc,probR_trunc,probL_trunc+probR_trunc));
- debug3(printf(" (bestscore %d)\n",scoreL+scoreI+scoreR));
+ if (scoreL + scoreI + scoreR > bestscore_with_prob) {
+ debug3(printf(" (bestscore %d)\n",scoreL+scoreR));
bestprob_trunc = probL_trunc + probR_trunc;
bestcL_with_prob = cL;
bestcR_with_prob = cR;
@@ -2908,88 +2960,95 @@ bridge_intron_gap (int *finalscore, int *bestrL, int *bestrR, int *bestcL, int *
bestrR_with_prob = rR;
bestscore_with_prob = scoreL + scoreI + scoreR;
}
+
+ } else {
+ /* probL_trunc + probR_trunc > bestprob_trunc */
+ debug3(printf("At %d left to %d right, prob is %f + %f = %f\n",
+ cL,cR,probL_trunc,probR_trunc,probL_trunc+probR_trunc));
+
+ debug3(printf(" (bestscore %d)\n",scoreL+scoreR));
+ bestprob_trunc = probL_trunc + probR_trunc;
+ bestcL_with_prob = cL;
+ bestcR_with_prob = cR;
+ bestrL_with_prob = rL;
+ bestrR_with_prob = rR;
+ bestscore_with_prob = scoreL + scoreI + scoreR;
}
+ }
- /* Test indel on left */
- cR = rR;
- probR = right_probabilities[cR];
- if (probR > PROB_CEILING) {
- probR_trunc = PROB_CEILING;
- } else if (probR < PROB_FLOOR) {
- probR_trunc = PROB_FLOOR;
+ debug3(printf("C. Test indel on left\n"));
+ /* Test indel on left */
+ cR = rR;
+ probR = right_probabilities[cR];
+ if (probR > PROB_CEILING) {
+ probR_trunc = PROB_CEILING;
+ } else if (probR < PROB_FLOOR) {
+ probR_trunc = PROB_FLOOR;
+ } else {
+ probR_trunc = probR;
+ }
+ scoreR = (int) matrixR[cR][rR];
+ if (directionsR_nogap[cR][rR] != DIAG) {
+ /* Favor gaps away from intron if possible */
+ scoreR -= 1;
+ }
+
+ /* Disallow leftoffset + cL >= rightoffset - cR, or cR >= rightoffset - leftoffset - cL */
+ for (cL = cloL; cL < chighL && cL < rightoffset-leftoffset-cR; cL++) {
+ probL = left_probabilities[cL];
+ if (probL > PROB_CEILING) {
+ probL_trunc = PROB_CEILING;
+ } else if (probL < PROB_FLOOR) {
+ probL_trunc = PROB_FLOOR;
} else {
- probR_trunc = probR;
+ probL_trunc = probL;
}
- scoreR = (int) matrixR[cR][rR];
- if (directionsR_nogap[cR][rR] != DIAG) {
+ scoreL = (int) matrixL[cL][rL];
+ if (directionsL_nogap[cL][rL] != DIAG) {
/* Favor gaps away from intron if possible */
- scoreR -= 1;
+ scoreL -= 1;
}
- /* Disallow leftoffset + cL >= rightoffset - cR, or cR >= rightoffset - leftoffset - cL */
- for (cL = cloL; cL <= chighL && cL < rightoffset-leftoffset-cR; cL++) {
- probL = left_probabilities[cL];
- if (probL > PROB_CEILING) {
- probL_trunc = PROB_CEILING;
- } else if (probL < PROB_FLOOR) {
- probL_trunc = PROB_FLOOR;
- } else {
- probL_trunc = probL;
- }
- scoreL = (int) matrixL[cL][rL];
- if (directionsL_nogap[cL][rL] != DIAG) {
- /* Favor gaps away from intron if possible */
- scoreL -= 1;
- }
-
- scoreI = intron_score(&introntype,leftdi[cL],rightdi[cR],
- cdna_direction,canonical_reward,finalp);
+#ifdef USE_SCOREI
+ scoreI = intron_score(&introntype,leftdi[cL],rightdi[cR],cdna_direction,canonical_reward,finalp);
+#else
+ scoreI = 0;
+#endif
- if ((score = scoreL + scoreI + scoreR) > bestscore) {
- debug3(printf("No prob: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
- cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
- debug3(printf("probL %f, probR %f\n",left_probabilities[cL],right_probabilities[cR]));
- bestscore = score;
- *bestrL = rL;
- *bestrR = rR;
- *bestcL = cL;
- *bestcR = cR;
- bestprob = probL + probR;
- } else if (score == bestscore && probL + probR > bestprob) {
- debug3(printf("Improved prob: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
- cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
- debug3(printf("probL %f, probR %f\n",left_probabilities[cL],right_probabilities[cR]));
- *bestrL = rL;
- *bestrR = rR;
- *bestcL = cL;
- *bestcR = cR;
- bestprob = probL + probR;
- }
-
- if (probL_trunc + probR_trunc < bestprob_trunc) {
- debug3a(printf("At %d left to %d right, prob is %f + %f = %f\n",
- cL,cR,probL_trunc,probR_trunc,probL_trunc+probR_trunc));
-
- } else if (probL_trunc + probR_trunc == bestprob_trunc) {
- debug3(printf("At %d left to %d right, prob is %f + %f = %f\n",
- cL,cR,probL_trunc,probR_trunc,probL_trunc+probR_trunc));
-
- if (scoreL + scoreI + scoreR > bestscore_with_prob) {
- debug3(printf(" (bestscore %d)\n",scoreL+scoreI+scoreR));
- bestprob_trunc = probL_trunc + probR_trunc;
- bestcL_with_prob = cL;
- bestcR_with_prob = cR;
- bestrL_with_prob = rL;
- bestrR_with_prob = rR;
- bestscore_with_prob = scoreL + scoreI + scoreR;
- }
+ if ((score = scoreL + scoreI + scoreR) > bestscore) {
+ debug3(printf("Best score: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
+ cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
+ debug3(printf("probL %f, probR %f\n",left_probabilities[cL],right_probabilities[cR]));
+ bestscore = score;
+ *bestrL = rL;
+ *bestrR = rR;
+ *bestcL = cL;
+ *bestcR = cR;
+ bestprob = probL + probR;
+ } else if (score == bestscore && probL + probR > bestprob) {
+ debug3(printf("Improved prob: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
+ cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
+ debug3(printf("probL %f, probR %f\n",left_probabilities[cL],right_probabilities[cR]));
+ *bestrL = rL;
+ *bestrR = rR;
+ *bestcL = cL;
+ *bestcR = cR;
+ bestprob = probL + probR;
+ } else {
+ debug3a(printf("Not best score: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
+ cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
+ }
- } else {
- /* probL_trunc + probR_trunc > bestprob_trunc */
- debug3(printf("At %d left to %d right, prob is %f + %f = %f\n",
- cL,cR,probL_trunc,probR_trunc,probL_trunc+probR_trunc));
+ if (probL_trunc + probR_trunc < bestprob_trunc) {
+ debug3a(printf("At %d left to %d right, prob is %f + %f = %f\n",
+ cL,cR,probL_trunc,probR_trunc,probL_trunc+probR_trunc));
- debug3(printf(" (bestscore %d)\n",scoreL+scoreI+scoreR));
+ } else if (probL_trunc + probR_trunc == bestprob_trunc) {
+ debug3(printf("At %d left to %d right, prob is %f + %f = %f\n",
+ cL,cR,probL_trunc,probR_trunc,probL_trunc+probR_trunc));
+
+ if (scoreL + scoreI + scoreR > bestscore_with_prob) {
+ debug3(printf(" (bestscore %d)\n",scoreL+scoreR));
bestprob_trunc = probL_trunc + probR_trunc;
bestcL_with_prob = cL;
bestcR_with_prob = cR;
@@ -2997,45 +3056,133 @@ bridge_intron_gap (int *finalscore, int *bestrL, int *bestrR, int *bestcL, int *
bestrR_with_prob = rR;
bestscore_with_prob = scoreL + scoreI + scoreR;
}
- }
-#endif
+ } else {
+ /* probL_trunc + probR_trunc > bestprob_trunc */
+ debug3(printf("At %d left to %d right, prob is %f + %f = %f\n",
+ cL,cR,probL_trunc,probR_trunc,probL_trunc+probR_trunc));
+
+ debug3(printf(" (bestscore %d)\n",scoreL+scoreR));
+ bestprob_trunc = probL_trunc + probR_trunc;
+ bestcL_with_prob = cL;
+ bestcR_with_prob = cR;
+ bestrL_with_prob = rL;
+ bestrR_with_prob = rR;
+ bestscore_with_prob = scoreL + scoreI + scoreR;
+ }
}
+ }
+
+ if (bestprob > 2*PROB_CEILING) {
+ /* Probability is good with best alignment, so take that */
+ debug(printf("Best alignment has good probability\n"));
+ use_prob_p = true;
+ } else if (left_probabilities[bestcL_with_prob] < PROB_CEILING && right_probabilities[bestcR_with_prob] < PROB_CEILING) {
+ /* Probability-based solution is bad, so use alignment */
+ debug(printf("Probability-based solution is bad\n"));
+ use_prob_p = false;
+ } else if (bestscore_with_prob < bestscore - 9) {
+ debug(printf("Probability-based solution requires very bad alignment\n"));
+ use_prob_p = false;
+ } else {
+ use_prob_p = true;
+ }
+ if (use_prob_p == true) {
+ /* Best alignment yields bad probability, and probability-based alignment yields good probability, so switch */
+ debug(printf("Switch to probability-based solution\n"));
debug(printf("Non-SIMD. bestscore %d (bestprob %f) vs bestscore_with_prob %d (bestprob_trunc %f, actually %f and %f)\n",
bestscore,bestprob,bestscore_with_prob,bestprob_trunc,left_probabilities[bestcL_with_prob],right_probabilities[bestcR_with_prob]));
- if (bestprob > 2*PROB_CEILING) {
- /* Probability is good with best alignment, so take that */
- debug(printf("Best alignment has good probability\n"));
- } else if (left_probabilities[bestcL_with_prob] < PROB_CEILING && right_probabilities[bestcR_with_prob] < PROB_CEILING) {
- /* Probability-based solution is bad, so use alignment */
- debug(printf("Probability-based solution is bad\n"));
- } else if (bestscore_with_prob < bestscore - 9) {
- debug(printf("Probability-based solution requires very bad alignment\n"));
- } else {
- /* Best alignment yields bad probability, and probability-based alignment yields good probability, so switch */
- debug(printf("Switch to probability-based solution\n"));
- *bestcL = bestcL_with_prob;
- *bestcR = bestcR_with_prob;
- *bestrL = bestrL_with_prob;
- *bestrR = bestrR_with_prob;
- bestscore = bestscore_with_prob;
- }
+ *bestcL = bestcL_with_prob;
+ *bestcR = bestcR_with_prob;
+ *bestrL = bestrL_with_prob;
+ *bestrR = bestrR_with_prob;
+ bestscore = bestscore_with_prob;
+ }
- scoreI = intron_score(&introntype,leftdi[*bestcL],rightdi[*bestcR],
- cdna_direction,canonical_reward,finalp);
- if (halfp == true) {
- *finalscore = (int) (bestscore - scoreI/2);
- } else {
- *finalscore = (int) bestscore;
- }
+ FREEA(rightdi);
+ FREEA(leftdi);
+ FREEA(left_probabilities);
+ FREEA(right_probabilities);
+
+ if (halfp == true) {
+ scoreI = intron_score(&introntype,leftdi[*bestcL],rightdi[*bestcR],cdna_direction,canonical_reward,finalp);
+ return (int) (bestscore - scoreI/2);
+ } else {
+ return (int) bestscore;
+ }
+}
+
+
+static int
+bridge_intron_gap (int *bestrL, int *bestrR, int *bestcL, int *bestcR,
+ int *best_introntype, double *left_prob, double *right_prob,
+ Score32_T **matrixL, Score32_T **matrixR,
+ Direction32_T **directionsL_nogap, Direction32_T **directionsR_nogap,
+ char *gsequenceL, char *gsequenceL_alt, char *rev_gsequenceR, char *rev_gsequenceR_alt,
+ int goffsetL, int rev_goffsetR, int rlength, int glengthL, int glengthR,
+ int cdna_direction, bool watsonp, int extraband_paired, double defect_rate, int canonical_reward,
+ int leftoffset, int rightoffset,
+ Chrnum_T chrnum, Univcoord_T chroffset, Univcoord_T chrhigh,
+ bool halfp, bool finalp, bool jump_late_p) {
+ int finalscore;
+ int *left_known, *right_known;
+ int ubandL, lbandL, ubandR, lbandR;
+
+
+ if (glengthL+1 <= 0) {
+ fprintf(stderr,"Problem with glengthL = %d\n",glengthL);
+ abort();
+ }
+
+ if (glengthR+1 <= 0) {
+ fprintf(stderr,"Problem with glengthR = %d\n",glengthR);
+ abort();
+ }
+
+#if 1
+ /* Allows unlimited indel lengths */
+ ubandL = glengthL - rlength + extraband_paired;
+ lbandL = extraband_paired;
+
+ ubandR = glengthR - rlength + extraband_paired;
+ lbandR = extraband_paired;
+#else
+ /* Limit indels to 3 bp around splice sites. Doesn't work on PacBio reads. */
+ ubandL = 3;
+ lbandL = 3;
+
+ ubandR = 3;
+ lbandR = 3;
+#endif
+
+ left_known = (int *) CALLOCA(glengthL+1,sizeof(int));
+ right_known = (int *) CALLOCA(glengthR+1,sizeof(int));
+ get_known_splicesites(left_known,right_known,glengthL,glengthR,
+ /*leftoffset*/goffsetL,/*rightoffset*/rev_goffsetR,
+ cdna_direction,watsonp,chrnum,chroffset,chrhigh);
- FREEA(left_probabilities);
- FREEA(right_probabilities);
+ if (novelsplicingp == false && splicing_iit != NULL && (donor_typeint < 0 || acceptor_typeint < 0)) {
+ /* Constrain to given introns */
+ finalscore = bridge_intron_gap_intron_level(&(*bestrL),&(*bestrR),&(*bestcL),&(*bestcR),&(*best_introntype),
+ matrixL,matrixR,directionsL_nogap,directionsR_nogap,
+ left_known,right_known,rlength,glengthL,glengthR,
+ cdna_direction,watsonp,lbandL,ubandL,lbandR,ubandR,
+ leftoffset,rightoffset,chrnum,chroffset,chrhigh,jump_late_p);
+
+ } else {
+ finalscore = bridge_intron_gap_site_level(&(*bestrL),&(*bestrR),&(*bestcL),&(*bestcR),
+ matrixL,matrixR,directionsL_nogap,directionsR_nogap,
+ gsequenceL,gsequenceL_alt,rev_gsequenceR,rev_gsequenceR_alt,goffsetL,rev_goffsetR,
+ left_known,right_known,rlength,glengthL,glengthR,
+ cdna_direction,watsonp,lbandL,ubandL,lbandR,ubandR,
+ canonical_reward,leftoffset,rightoffset,
+ chroffset,chrhigh,halfp,finalp);
}
+#if 0
/* Determine if result meets given constraints */
if (*finalscore < 0) {
result = false;
@@ -3055,25 +3202,21 @@ bridge_intron_gap (int *finalscore, int *bestrL, int *bestrR, int *bestcL, int *
/* If novelsplicingp is false and using splicing at splice site level, result was already constrained */
result = true;
}
+#endif
-
- if (/*finalp == true &&*/ result == true) {
- get_splicesite_probs(&(*left_prob),&(*right_prob),*bestcL,*bestcR,
- left_known,right_known,leftoffset,rightoffset,chroffset,chrhigh,
- cdna_direction,watsonp);
- }
-
- debug3(printf("Returning final score of %d at (%d,%d) left to (%d,%d) right, with probs %f and %f\n",
- *finalscore,*bestrL,*bestcL,*bestrR,*bestcR,*left_prob,*right_prob));
- debug(printf("Returning final score of %d at (%d,%d) left to (%d,%d) right, with probs %f and %f\n",
- *finalscore,*bestrL,*bestcL,*bestrR,*bestcR,*left_prob,*right_prob));
+ get_splicesite_probs(&(*left_prob),&(*right_prob),*bestcL,*bestcR,
+ left_known,right_known,leftoffset,rightoffset,chroffset,chrhigh,
+ cdna_direction,watsonp);
FREEA(right_known);
FREEA(left_known);
- FREEA(rightdi);
- FREEA(leftdi);
- return result;
+#if defined(DEBUG) || defined(DEBUG3)
+ printf("Returning final score of %d at (%d,%d) left to (%d,%d) right, with probs %f and %f\n",
+ finalscore,*bestrL,*bestcL,*bestrR,*bestcR,*left_prob,*right_prob);
+#endif
+
+ return finalscore;
}
#endif
@@ -3564,6 +3707,7 @@ Dynprog_genome_gap (int *dynprogindex, int *finalscore, int *new_leftgenomepos,
#if defined(HAVE_SSE4_1) || defined(HAVE_SSE2)
if (use8p == true) {
Dynprog_compute_bands(&lbandL,&ubandL,rlength,glengthL,extraband_paired,/*widebandp*/true);
+ debug3(printf("Computing matrix8L_upper\n"));
matrix8L_upper = Dynprog_simd_8_upper(&directions8L_upper_nogap,&directions8L_upper_Egap,dynprogL,
rsequence,gsequenceL,gsequenceL_alt,rlength,glengthL,
#ifdef DEBUG14
@@ -3571,6 +3715,7 @@ Dynprog_genome_gap (int *dynprogindex, int *finalscore, int *new_leftgenomepos,
#endif
mismatchtype,open,extend,ubandL,jump_late_p,/*revp*/false);
+ debug3(printf("Computing matrix8L_lower\n"));
matrix8L_lower = Dynprog_simd_8_lower(&directions8L_lower_nogap,&directions8L_lower_Egap,dynprogL,
rsequence,gsequenceL,gsequenceL_alt,rlength,glengthL,
#ifdef DEBUG14
@@ -3580,6 +3725,7 @@ Dynprog_genome_gap (int *dynprogindex, int *finalscore, int *new_leftgenomepos,
Dynprog_compute_bands(&lbandR,&ubandR,rlength,glengthR,extraband_paired,/*widebandp*/true);
+ debug3(printf("Computing matrix8R_upper\n"));
matrix8R_upper = Dynprog_simd_8_upper(&directions8R_upper_nogap,&directions8R_upper_Egap,dynprogR,
rev_rsequence,&(rev_gsequenceR[glengthR-1]),&(rev_gsequenceR_alt[glengthR-1]),
rlength,glengthR,
@@ -3588,6 +3734,7 @@ Dynprog_genome_gap (int *dynprogindex, int *finalscore, int *new_leftgenomepos,
#endif
mismatchtype,open,extend,ubandR,/*for revp true*/!jump_late_p,/*revp*/true);
+ debug3(printf("Computing matrix8R_lower\n"));
matrix8R_lower = Dynprog_simd_8_lower(&directions8R_lower_nogap,&directions8R_lower_Egap,dynprogR,
rev_rsequence,&(rev_gsequenceR[glengthR-1]),&(rev_gsequenceR_alt[glengthR-1]),
rlength,glengthR,
@@ -3596,16 +3743,16 @@ Dynprog_genome_gap (int *dynprogindex, int *finalscore, int *new_leftgenomepos,
#endif
mismatchtype,open,extend,lbandR,/*for revp true*/!jump_late_p,/*revp*/true);
- if (bridge_intron_gap_8_ud(&(*finalscore),&bestrL,&bestrR,&bestcL,&bestcR,
- &(*introntype),&(*left_prob),&(*right_prob),
- matrix8L_upper,matrix8L_lower,matrix8R_upper,matrix8R_lower,
- directions8L_upper_nogap,directions8L_lower_nogap,
- directions8R_upper_nogap,directions8R_lower_nogap,
- gsequenceL,gsequenceL_alt,&(rev_gsequenceR[glengthR-1]),&(rev_gsequenceR_alt[glengthR-1]),
- goffsetL,rev_goffsetR,rlength,glengthL,glengthR,
- cdna_direction,watsonp,lbandL,ubandL,lbandR,ubandR,defect_rate,
- canonical_reward,goffsetL,rev_goffsetR,
- chrnum,chroffset,chrhigh,halfp,finalp,jump_late_p) == false) {
+ if ((*finalscore = bridge_intron_gap_8_ud(&bestrL,&bestrR,&bestcL,&bestcR,
+ &(*introntype),&(*left_prob),&(*right_prob),
+ matrix8L_upper,matrix8L_lower,matrix8R_upper,matrix8R_lower,
+ directions8L_upper_nogap,directions8L_lower_nogap,
+ directions8R_upper_nogap,directions8R_lower_nogap,
+ gsequenceL,gsequenceL_alt,&(rev_gsequenceR[glengthR-1]),&(rev_gsequenceR_alt[glengthR-1]),
+ goffsetL,rev_goffsetR,rlength,glengthL,glengthR,
+ cdna_direction,watsonp,lbandL,ubandL,lbandR,ubandR,defect_rate,
+ canonical_reward,goffsetL,rev_goffsetR,
+ chrnum,chroffset,chrhigh,halfp,finalp,jump_late_p)) < 0) {
FREEA(rev_gsequenceR_alt);
FREEA(rev_gsequenceR);
FREEA(gsequenceL_alt);
@@ -3645,6 +3792,7 @@ Dynprog_genome_gap (int *dynprogindex, int *finalscore, int *new_leftgenomepos,
debug(printf("Pushing a gap with genomejump %d, introntype %s, prob %f and %f\n",
(*new_rightgenomepos)-(*new_leftgenomepos)-1,
Intron_type_string(*introntype),*left_prob,*right_prob));
+
#ifndef NOGAPHOLDER
pairs = Pairpool_push_gapholder(pairs,pairpool,/*queryjump*/(rev_roffset-bestrR) - (roffset+bestrL) + 1,
/*genomejump*/(*new_rightgenomepos)-(*new_leftgenomepos)-1,
@@ -3682,12 +3830,19 @@ Dynprog_genome_gap (int *dynprogindex, int *finalscore, int *new_leftgenomepos,
debug(printf("End of dynprog genome gap\n"));
*dynprogindex += (*dynprogindex > 0 ? +1 : -1);
- return List_reverse(pairs);
+ debug3(Pair_dump_list(pairs,true));
+ debug3(printf("maxnegscore = %d\n",Pair_maxnegscore(pairs)));
+ if (Pair_maxnegscore(pairs) < -10) {
+ return (List_T) NULL;
+ } else {
+ return List_reverse(pairs);
+ }
}
} else {
/* Use 16-mers */
Dynprog_compute_bands(&lbandL,&ubandL,rlength,glengthL,extraband_paired,/*widebandp*/true);
+ debug3(printf("Computing matrix16L_upper\n"));
matrix16L_upper = Dynprog_simd_16_upper(&directions16L_upper_nogap,&directions16L_upper_Egap,dynprogL,
rsequence,gsequenceL,gsequenceL_alt,rlength,glengthL,
#ifdef DEBUG14
@@ -3695,6 +3850,7 @@ Dynprog_genome_gap (int *dynprogindex, int *finalscore, int *new_leftgenomepos,
#endif
mismatchtype,open,extend,ubandL,jump_late_p,/*revp*/false);
+ debug3(printf("Computing matrix16L_lower\n"));
matrix16L_lower = Dynprog_simd_16_lower(&directions16L_lower_nogap,&directions16L_lower_Egap,dynprogL,
rsequence,gsequenceL,gsequenceL_alt,rlength,glengthL,
#ifdef DEBUG14
@@ -3703,6 +3859,7 @@ Dynprog_genome_gap (int *dynprogindex, int *finalscore, int *new_leftgenomepos,
mismatchtype,open,extend,lbandL,jump_late_p,/*revp*/false);
Dynprog_compute_bands(&lbandR,&ubandR,rlength,glengthR,extraband_paired,/*widebandp*/true);
+ debug3(printf("Computing matrix16R_upper\n"));
matrix16R_upper = Dynprog_simd_16_upper(&directions16R_upper_nogap,&directions16R_upper_Egap,dynprogR,
rev_rsequence,&(rev_gsequenceR[glengthR-1]),&(rev_gsequenceR_alt[glengthR-1]),
rlength,glengthR,
@@ -3711,6 +3868,7 @@ Dynprog_genome_gap (int *dynprogindex, int *finalscore, int *new_leftgenomepos,
#endif
mismatchtype,open,extend,ubandR,/*for revp true*/!jump_late_p,/*revp*/true);
+ debug3(printf("Computing matrix16R_lower\n"));
matrix16R_lower = Dynprog_simd_16_lower(&directions16R_lower_nogap,&directions16R_lower_Egap,dynprogR,
rev_rsequence,&(rev_gsequenceR[glengthR-1]),&(rev_gsequenceR_alt[glengthR-1]),
rlength,glengthR,
@@ -3719,16 +3877,16 @@ Dynprog_genome_gap (int *dynprogindex, int *finalscore, int *new_leftgenomepos,
#endif
mismatchtype,open,extend,lbandR,/*for revp true*/!jump_late_p,/*revp*/true);
- if (bridge_intron_gap_16_ud(&(*finalscore),&bestrL,&bestrR,&bestcL,&bestcR,
- &(*introntype),&(*left_prob),&(*right_prob),
- matrix16L_upper,matrix16L_lower,matrix16R_upper,matrix16R_lower,
- directions16L_upper_nogap,directions16L_lower_nogap,
- directions16R_upper_nogap,directions16R_lower_nogap,
- gsequenceL,gsequenceL_alt,&(rev_gsequenceR[glengthR-1]),&(rev_gsequenceR_alt[glengthR-1]),
- goffsetL,rev_goffsetR,rlength,glengthL,glengthR,
- cdna_direction,watsonp,lbandL,ubandL,lbandR,ubandR,defect_rate,
- canonical_reward,goffsetL,rev_goffsetR,
- chrnum,chroffset,chrhigh,halfp,finalp,jump_late_p) == false) {
+ if ((*finalscore = bridge_intron_gap_16_ud(&bestrL,&bestrR,&bestcL,&bestcR,
+ &(*introntype),&(*left_prob),&(*right_prob),
+ matrix16L_upper,matrix16L_lower,matrix16R_upper,matrix16R_lower,
+ directions16L_upper_nogap,directions16L_lower_nogap,
+ directions16R_upper_nogap,directions16R_lower_nogap,
+ gsequenceL,gsequenceL_alt,&(rev_gsequenceR[glengthR-1]),&(rev_gsequenceR_alt[glengthR-1]),
+ goffsetL,rev_goffsetR,rlength,glengthL,glengthR,
+ cdna_direction,watsonp,lbandL,ubandL,lbandR,ubandR,defect_rate,
+ canonical_reward,goffsetL,rev_goffsetR,
+ chrnum,chroffset,chrhigh,halfp,finalp,jump_late_p)) < 0) {
FREEA(rev_gsequenceR_alt);
FREEA(rev_gsequenceR);
@@ -3806,7 +3964,13 @@ Dynprog_genome_gap (int *dynprogindex, int *finalscore, int *new_leftgenomepos,
debug(printf("End of dynprog genome gap\n"));
*dynprogindex += (*dynprogindex > 0 ? +1 : -1);
- return List_reverse(pairs);
+ debug3(Pair_dump_list(pairs,true));
+ debug3(printf("maxnegscore = %d\n",Pair_maxnegscore(pairs)));
+ if (Pair_maxnegscore(pairs) < -10) {
+ return (List_T) NULL;
+ } else {
+ return List_reverse(pairs);
+ }
}
}
@@ -3814,6 +3978,7 @@ Dynprog_genome_gap (int *dynprogindex, int *finalscore, int *new_leftgenomepos,
#else
/* Non-SIMD methods */
Dynprog_compute_bands(&lbandL,&ubandL,rlength,glengthL,extraband_paired,/*widebandp*/true);
+ debug3(printf("Computing matrixL\n"));
matrixL = Dynprog_standard(&directionsL_nogap,&directionsL_Egap,&directionsL_Fgap,dynprogL,
rsequence,gsequenceL,gsequenceL_alt,rlength,glengthL,
goffsetL,chroffset,chrhigh,watsonp,
@@ -3821,20 +3986,21 @@ Dynprog_genome_gap (int *dynprogindex, int *finalscore, int *new_leftgenomepos,
jump_late_p,/*revp*/false,/*saturation*/NEG_INFINITY_INT);
Dynprog_compute_bands(&lbandR,&ubandR,rlength,glengthR,extraband_paired,/*widebandp*/true);
+ debug3(printf("Computing matrixR\n"));
matrixR = Dynprog_standard(&directionsR_nogap,&directionsR_Egap,&directionsR_Fgap,dynprogR,
rev_rsequence,&(rev_gsequenceR[glengthR-1]),&(rev_gsequenceR_alt[glengthR-1]),
rlength,glengthR,rev_goffsetR,chroffset,chrhigh,watsonp,
mismatchtype,open,extend,lbandL,ubandR,
/*for revp true*/!jump_late_p,/*revp*/true,/*saturation*/NEG_INFINITY_INT);
- if (bridge_intron_gap(&(*finalscore),&bestrL,&bestrR,&bestcL,&bestcR,
- &(*introntype),&(*left_prob),&(*right_prob),
- matrixL,matrixR,directionsL_nogap,directionsR_nogap,
- gsequenceL,gsequenceL_alt,&(rev_gsequenceR[glengthR-1]),&(rev_gsequenceR_alt[glengthR-1]),
- goffsetL,rev_goffsetR,rlength,glengthL,glengthR,
- cdna_direction,watsonp,extraband_paired,defect_rate,
- canonical_reward,goffsetL,rev_goffsetR,
- chrnum,chroffset,chrhigh,halfp,finalp,jump_late_p) == false) {
+ if ((*finalscore = bridge_intron_gap(&bestrL,&bestrR,&bestcL,&bestcR,
+ &(*introntype),&(*left_prob),&(*right_prob),
+ matrixL,matrixR,directionsL_nogap,directionsR_nogap,
+ gsequenceL,gsequenceL_alt,&(rev_gsequenceR[glengthR-1]),&(rev_gsequenceR_alt[glengthR-1]),
+ goffsetL,rev_goffsetR,rlength,glengthL,glengthR,
+ cdna_direction,watsonp,extraband_paired,defect_rate,
+ canonical_reward,goffsetL,rev_goffsetR,
+ chrnum,chroffset,chrhigh,halfp,finalp,jump_late_p)) < 0) {
FREEA(gsequenceL_alt);
FREEA(rev_gsequenceR_alt);
@@ -3895,7 +4061,13 @@ Dynprog_genome_gap (int *dynprogindex, int *finalscore, int *new_leftgenomepos,
debug(printf("End of dynprog genome gap\n"));
*dynprogindex += (*dynprogindex > 0 ? +1 : -1);
- return List_reverse(pairs);
+ debug3(Pair_dump_list(pairs,true));
+ debug3(printf("maxnegscore = %d\n",Pair_maxnegscore(pairs)));
+ if (Pair_maxnegscore(pairs) < -10) {
+ return (List_T) NULL;
+ } else {
+ return List_reverse(pairs);
+ }
}
#endif
diff --git a/src/genome.c b/src/genome.c
index ffdd265..f3a0bbc 100644
--- a/src/genome.c
+++ b/src/genome.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: genome.c 168395 2015-06-26 17:13:13Z twu $";
+static char rcsid[] = "$Id: genome.c 172736 2015-08-27 16:36:31Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -11113,6 +11113,7 @@ Genome_get_segment_blocks_right (char *segment, char *segmentalt, Univcoord_T le
if (genomealt_blocks == genome_blocks) {
strncpy(segmentalt,segment,length);
+ segmentalt[length] = '\0';
} else {
for (i = length - 1; i >= length - out_of_bounds; i--) {
segmentalt[i] = '*';
@@ -11158,6 +11159,7 @@ Genome_get_segment_blocks_left (char *segment, char *segmentalt, Univcoord_T rig
if (genomealt_blocks == genome_blocks) {
strncpy(segmentalt,segment,length);
+ segmentalt[length] = '\0';
} else {
for (i = 0; i < out_of_bounds; i++) {
segmentalt[i] = '*';
diff --git a/src/gmap.c b/src/gmap.c
index 74a9046..655682f 100644
--- a/src/gmap.c
+++ b/src/gmap.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: gmap.c 168166 2015-06-24 03:57:10Z twu $";
+static char rcsid[] = "$Id: gmap.c 173190 2015-09-01 18:59:44Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -950,7 +950,7 @@ stage3array_from_list (int *npaths, int *first_absmq, int *second_absmq, List_T
}
}
- array1 = (Stage3_T *) MALLOC((*npaths) * sizeof(Stage3_T)); /* Return value */
+ array1 = (Stage3_T *) MALLOC_OUT((*npaths) * sizeof(Stage3_T)); /* Return value */
j = 0;
for (i = 0; i < norig; i++) {
x = array0[i];
@@ -2131,6 +2131,8 @@ find_breakpoint (int *cdna_direction, int *chimerapos, int *chimeraequivpos, int
int queryjump;
int genomejump;
bool max_extend_p;
+ Chrpos_T left_chrlength, right_chrlength;
+ Univcoord_T chroffset, chrhigh;
if (Stage3_queryend(from) < Stage3_querystart(to)) {
/* Gap exists between the two parts */
@@ -2243,20 +2245,34 @@ find_breakpoint (int *cdna_direction, int *chimerapos, int *chimeraequivpos, int
*cdna_direction = found_cdna_direction;
debug2(printf("Exon-exon boundary found at %d, which is breakpoint. Comp = %c\n",
*exonexonpos,comp));
+ return breakpoint;
+
} else {
- *chimerapos = Chimera_find_breakpoint(&(*chimeraequivpos),&(*donor1),&(*donor2),&(*acceptor2),&(*acceptor1),
- from,to,queryntlength,genome);
- *donor_prob = *acceptor_prob = 0.0;
-
- debug2(printf("Chimera_find_breakpoint returns boundary at %d..%d (switch can occur at %d..%d)\n",
- *chimerapos,*chimeraequivpos,(*chimerapos)-1,*chimeraequivpos));
+ Univ_IIT_interval_bounds(&chroffset,&chrhigh,&left_chrlength,chromosome_iit,Stage3_chrnum(from),circular_typeint);
+ Univ_IIT_interval_bounds(&chroffset,&chrhigh,&right_chrlength,chromosome_iit,Stage3_chrnum(to),circular_typeint);
+
+ if ((*chimerapos = Chimera_find_breakpoint(&(*chimeraequivpos),&(*donor1),&(*donor2),&(*acceptor2),&(*acceptor1),
+ from,to,queryntlength,genome,left_chrlength,right_chrlength)) < 0) {
+ /* TODO: Allow finding a breakpoint for DNA-Seq, which needs no donor or acceptor nucleotides */
+ debug2(printf("Chimera_find_breakpoint returns no value\n"));
+ *donor_prob = *acceptor_prob = 0.0;
+ *donor_watsonp = *acceptor_watsonp = true;
+ *cdna_direction = 0;
+ return -1;
- breakpoint = ((*chimerapos) + (*chimeraequivpos))/2;
- *cdna_direction = try_cdna_direction;
- debug2(printf("Exon-exon boundary not found, but setting breakpoint to be %d\n",breakpoint));
+ } else {
+ *donor_prob = *acceptor_prob = 0.0;
+ *donor_watsonp = *acceptor_watsonp = true;
+
+ debug2(printf("Chimera_find_breakpoint returns boundary at %d..%d (switch can occur at %d..%d)\n",
+ *chimerapos,*chimeraequivpos,(*chimerapos)-1,*chimeraequivpos));
+
+ breakpoint = ((*chimerapos) + (*chimeraequivpos))/2;
+ *cdna_direction = try_cdna_direction;
+ debug2(printf("Exon-exon boundary not found, but setting breakpoint to be %d\n",breakpoint));
+ return breakpoint;
+ }
}
-
- return breakpoint;
}
@@ -2925,15 +2941,18 @@ check_for_chimera (bool *mergedp, Chimera_T *chimera, List_T stage3list, int eff
if (chimeric_goodness < max_single_goodness + penalty) {
debug2(printf("chimeric goodness not good enough relative to max_single_goodness %d and penalty %d\n",
max_single_goodness,penalty));
- } else {
- breakpoint = find_breakpoint(&chimera_cdna_direction,&chimerapos,&chimeraequivpos,&exonexonpos,
- &donor1,&donor2,&acceptor2,&acceptor1,
- &donor_watsonp,&acceptor_watsonp,&donor_prob,&acceptor_prob,from,to,
+
+ } else if ((breakpoint = find_breakpoint(&chimera_cdna_direction,&chimerapos,&chimeraequivpos,&exonexonpos,
+ &donor1,&donor2,&acceptor2,&acceptor1,
+ &donor_watsonp,&acceptor_watsonp,&donor_prob,&acceptor_prob,from,to,
#ifdef PMAP
- queryntseq,
+ queryntseq,
#endif
- queryseq,queryuc,queryntlength,
- genomecomp,genomecomp_alt,chromosome_iit,pairpool);
+ queryseq,queryuc,queryntlength,
+ genomecomp,genomecomp_alt,chromosome_iit,pairpool)) < 0) {
+ debug2(printf("find_breakpoint returns no value\n"));
+
+ } else {
debug2(printf("find_breakpoint returns %d\n",breakpoint));
/* Check to see if we can merge chimeric parts */
@@ -4076,6 +4095,7 @@ worker_mpi_process (int worker_id, Inbuffer_T inbuffer) {
double worker_runtime;
#ifdef MEMUSAGE
+ Sequence_T queryseq;
long int memusage_constant = 0, memusage, max_memusage;
char procname[12];
char acc[100+1], comma0[20], comma1[20], comma2[20], comma3[20], comma4[20], comma5[20];
@@ -4211,8 +4231,8 @@ worker_mpi_process (int worker_id, Inbuffer_T inbuffer) {
#ifdef MEMUSAGE
/* Copy acc before we free the request */
- queryseq1 = Request_queryseq1(request);
- strncpy(acc,Shortread_accession(queryseq1),100);
+ queryseq = Request_queryseq(request);
+ strncpy(acc,Sequence_accession(queryseq),100);
acc[100] = '\0';
#endif
@@ -4294,7 +4314,7 @@ single_thread () {
double worker_runtime;
#ifdef MEMUSAGE
- long int memusage_constant = 0;
+ long int memusage, memusage_constant = 0;
char acc[100+1], comma0[20], comma1[20], comma2[20], comma3[20], comma4[20], comma5[20];
#endif
@@ -4342,8 +4362,8 @@ single_thread () {
}
#ifdef MEMUSAGE
- queryseq1 = Request_queryseq1(request);
- fprintf(stderr,"Single thread starting %s\n",Shortread_accession(queryseq1));
+ queryseq = Request_queryseq(request);
+ fprintf(stderr,"Single thread starting %s\n",Sequence_accession(queryseq));
Mem_usage_reset_stack_max();
Mem_usage_reset_heap_max();
#endif
@@ -4387,10 +4407,10 @@ single_thread () {
}
#ifdef MEMUSAGE
- /* Copy acc before we free the request */
- queryseq1 = Request_queryseq1(request);
- strncpy(acc,Shortread_accession(queryseq1),100);
- acc[100] = '\0';
+ /* Copy acc before we free the request */
+ queryseq = Request_queryseq(request);
+ strncpy(acc,Sequence_accession(queryseq),100);
+ acc[100] = '\0';
#endif
Request_free(&request);
@@ -4566,8 +4586,8 @@ worker_thread (void *data) {
#ifdef MEMUSAGE
/* Copy acc before we free the request */
- queryseq1 = Request_queryseq1(request);
- strncpy(acc,Shortread_accession(queryseq1),100);
+ queryseq = Request_queryseq(request);
+ strncpy(acc,Sequence_accession(queryseq),100);
acc[100] = '\0';
#endif
@@ -6606,6 +6626,8 @@ main (int argc, char *argv[]) {
Inbuffer_free(&inbuffer); /* Also closes inputs */
}
+ Outbuffer_close_files(); /* All ranks have to close the files */
+
#else
/* Single CPU or Pthreads version */
runtime = Stopwatch_stop(stopwatch);
@@ -6619,6 +6641,7 @@ main (int argc, char *argv[]) {
Outbuffer_free(&outbuffer);
Inbuffer_free(&inbuffer); /* Also closes inputs */
+ Outbuffer_close_files();
#endif
#ifdef PMAP
diff --git a/src/gsnap.c b/src/gsnap.c
index faf7281..4660222 100644
--- a/src/gsnap.c
+++ b/src/gsnap.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: gsnap.c 168165 2015-06-24 03:56:57Z twu $";
+static char rcsid[] = "$Id: gsnap.c 173896 2015-09-12 00:11:40Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -300,10 +300,7 @@ static int subopt_levels = 0;
1, then treated as a fraction of the querylength. Else, treated as
an integer */
static double user_maxlevel_float = -1.0;
-
-static int terminal_threshold = 2;
-static int reject_trimlength = 1000;
-static bool user_reject_trimlength_p = false;
+static double user_mincoverage_float = -1.0;
/* Really have only one indel penalty */
static int indel_penalty_middle = 2;
@@ -413,6 +410,8 @@ static bool user_quality_shift = false;
static int quality_shift = 0; /* For printing, may want -31 */
static bool exception_raise_p = true;
+static bool add_paired_nomappers_p = false;
+static bool paired_flag_means_concordant_p = false;
static bool quiet_if_excessive_p = false;
static int maxpaths_search = 1000;
static int maxpaths_report = 100;
@@ -510,8 +509,7 @@ static struct option long_options[] = {
{"find-dna-chimeras", required_argument, 0, 0}, /* find_dna_chimeras */
{"max-mismatches", required_argument, 0, 'm'}, /* user_maxlevel_float */
- {"terminal-threshold", required_argument, 0, 0}, /* terminal_threshold */
- {"reject-trimlength", required_argument, 0, 0}, /* reject_trimlength, user_reject_trimlength_p */
+ {"min-coverage", required_argument, 0, 0}, /* user_mincoverage_float */
#if 0
{"indel-penalty-middle", required_argument, 0, 'i'}, /* indel_penalty_middle */
@@ -590,6 +588,8 @@ static struct option long_options[] = {
{"noexceptions", no_argument, 0, '0'}, /* exception_raise_p */
{"maxsearch", required_argument, 0, 0}, /* maxpaths_search */
{"npaths", required_argument, 0, 'n'}, /* maxpaths_report */
+ {"add-paired-nomappers", no_argument, 0, 0}, /* add_paired_nomappers_p */
+ {"paired-flag-means-concordant", required_argument, 0, 0}, /* paired_flag_means_concordant_p */
{"quiet-if-excessive", no_argument, 0, 'Q'}, /* quiet_if_excessive_p */
{"ordered", no_argument, 0, 'O'}, /* orderedp */
{"clip-overlap", no_argument, 0, 0}, /* clip_overlap_p */
@@ -862,7 +862,7 @@ process_request (Filestring_T *fp_failedinput_1, Filestring_T *fp_failedinput_2,
if (queryseq2 == NULL) {
stage3array = Stage1_single_read(&npaths,&first_absmq,&second_absmq,
queryseq1,indexdb,indexdb2,indexdb_size_threshold,
- floors_array,user_maxlevel_float,
+ floors_array,user_maxlevel_float,user_mincoverage_float,
indel_penalty_middle,indel_penalty_end,
allow_end_indels_p,max_end_insertions,max_end_deletions,min_indel_end_matches,
localsplicing_penalty,distantsplicing_penalty,min_shortend,
@@ -880,7 +880,7 @@ process_request (Filestring_T *fp_failedinput_1, Filestring_T *fp_failedinput_2,
&stage3array5,&npaths5,&first_absmq5,&second_absmq5,
&stage3array3,&npaths3,&first_absmq3,&second_absmq3,
queryseq1,queryseq2,indexdb,indexdb2,indexdb_size_threshold,
- floors_array,user_maxlevel_float,
+ floors_array,user_maxlevel_float,user_mincoverage_float,
indel_penalty_middle,indel_penalty_end,
allow_end_indels_p,max_end_insertions,max_end_deletions,min_indel_end_matches,
localsplicing_penalty,distantsplicing_penalty,min_shortend,
@@ -921,7 +921,7 @@ process_request (Filestring_T *fp_failedinput_1, Filestring_T *fp_failedinput_2,
&stage3array5,&npaths5,&first_absmq5,&second_absmq5,
&stage3array3,&npaths3,&first_absmq3,&second_absmq3,
queryseq1,queryseq2,indexdb,indexdb2,indexdb_size_threshold,
- floors_array,user_maxlevel_float,
+ floors_array,user_maxlevel_float,user_mincoverage_float,
indel_penalty_middle,indel_penalty_end,
allow_end_indels_p,max_end_insertions,max_end_deletions,min_indel_end_matches,
localsplicing_penalty,distantsplicing_penalty,min_shortend,
@@ -1863,19 +1863,23 @@ parse_command_line (int argc, char *argv[], int optind) {
} else if (!strcmp(long_name,"pairdev")) {
pairlength_deviation = atoi(check_valid_int(optarg));
+ } else if (!strcmp(long_name,"min-coverage")) {
+ user_mincoverage_float = atof(check_valid_float_or_int(optarg));
+ if (user_mincoverage_float > 1.0 && user_mincoverage_float != rint(user_mincoverage_float)) {
+ fprintf(stderr,"Cannot specify fractional value %f for --max-mismatches except between 0.0 and 1.0\n",user_mincoverage_float);
+ return 9;
+ } else if (user_mincoverage_float > 0.10 && user_mincoverage_float < 1.0) {
+ fprintf(stderr,"Your value %f for --max-mismatches implies more than 10%% mismatches, which does not make sense\n",
+ user_mincoverage_float);
+ return 9;
+ }
+
} else if (!strcmp(long_name,"indel-endlength")) {
min_indel_end_matches = atoi(check_valid_int(optarg));
if (min_indel_end_matches > 14) {
allow_end_indels_p = false;
}
- } else if (!strcmp(long_name,"terminal-threshold")) {
- terminal_threshold = atoi(check_valid_int(optarg));
-
- } else if (!strcmp(long_name,"reject-trimlength")) {
- reject_trimlength = atoi(check_valid_int(optarg));
- user_reject_trimlength_p = true;
-
} else if (!strcmp(long_name,"antistranded-penalty")) {
antistranded_penalty = atoi(check_valid_int(optarg));
@@ -1929,6 +1933,19 @@ parse_command_line (int argc, char *argv[], int optind) {
} else if (!strcmp(long_name,"no-sam-headers")) {
sam_headers_p = false;
+ } else if (!strcmp(long_name,"add-paired-nomappers")) {
+ add_paired_nomappers_p = true;
+
+ } else if (!strcmp(long_name,"paired-flag-means-concordant")) {
+ if (!strcmp(optarg,"1")) {
+ paired_flag_means_concordant_p = true;
+ } else if (!strcmp(optarg,"0")) {
+ paired_flag_means_concordant_p = false; /* Default */
+ } else {
+ fprintf(stderr,"--paired-flag-means-concordant flag must be 0 or 1\n");
+ return 9;
+ }
+
} else if (!strcmp(long_name,"sam-headers-batch")) {
sam_headers_batch = atoi(check_valid_int(optarg));
@@ -3218,7 +3235,7 @@ worker_setup (char *genomesubdir, char *fileroot) {
Splice_setup(min_shortend);
Indel_setup(min_indel_end_matches,indel_penalty_middle);
Stage1hr_setup(use_sarray_p,use_only_sarray_p,index1part,index1interval,spansize,chromosome_iit,nchromosomes,
- genomecomp,genomecomp_alt,mode,maxpaths_search,terminal_threshold,reject_trimlength,
+ genomecomp,genomecomp_alt,mode,maxpaths_search,
splicesites,splicetypes,splicedists,nsplicesites,
novelsplicingp,knownsplicingp,find_dna_chimeras_p,distances_observed_p,
subopt_levels,max_middle_insertions,max_middle_deletions,
@@ -3234,17 +3251,17 @@ worker_setup (char *genomesubdir, char *fileroot) {
splicing_iit,splicing_divint_crosstable,
donor_typeint,acceptor_typeint,trim_mismatch_score,
novelsplicingp,knownsplicingp,output_sam_p,mode,
- Univ_IIT_genomelength(chromosome_iit,/*with_circular_alias*/false),
- reject_trimlength);
+ Univ_IIT_genomelength(chromosome_iit,/*with_circular_alias*/false));
Stage3hr_setup(invert_first_p,invert_second_p,genomecomp,chromosome_iit,nchromosomes,circular_typeint,
genes_iit,genes_divint_crosstable,
tally_iit,tally_divint_crosstable,runlength_iit,runlength_divint_crosstable,
- reject_trimlength,distances_observed_p,pairmax,
+ distances_observed_p,pairmax,
expected_pairlength,pairlength_deviation,
localsplicing_penalty,indel_penalty_middle,antistranded_penalty,
favor_multiexon_p,gmap_min_nconsecutive,index1part,index1interval,novelsplicingp,
merge_samechr_p,circularp,failedinput_root,print_m8_p,want_random_p);
- SAM_setup(quiet_if_excessive_p,maxpaths_report,failedinput_root,fastq_format_p,hide_soft_clips_p,
+ SAM_setup(add_paired_nomappers_p,paired_flag_means_concordant_p,
+ quiet_if_excessive_p,maxpaths_report,failedinput_root,fastq_format_p,hide_soft_clips_p,
clip_overlap_p,merge_overlap_p,sam_multiple_primaries_p,
force_xs_direction_p,md_lowercase_variant_p,snps_iit,chromosome_iit,genomecomp);
Output_setup(chromosome_iit,nofailsp,failsonlyp,quiet_if_excessive_p,maxpaths_report,
@@ -3975,6 +3992,10 @@ is still designed to be fast.\n\
of mismatches (including indel and splicing penalties)\n\
For RNA-Seq, you may need to increase this value slightly\n\
to align reads extending past the ends of an exon.\n\
+ --min-coverage=FLOAT Minimum coverage required for an alignment.\n\
+ If specified between 0.0 and 1.0, then treated as a fraction\n\
+ of each read length. Otherwise, treated as an integral number\n\
+ of base pairs. Default value is 0.0.\n\
--query-unk-mismatch=INT Whether to count unknown (N) characters in the query as a mismatch\n\
(0=no (default), 1=yes)\n\
--genome-unk-mismatch=INT Whether to count unknown (N) characters in the genome as a mismatch\n\
@@ -4001,24 +4022,6 @@ is still designed to be fast.\n\
#endif
fprintf(stdout,"\
- --terminal-threshold=INT Threshold for computing a terminal alignment (from one end of the\n\
- read to the best possible position at the other end) (default %d)\n\
- For example, if this value is 2, then if GSNAP finds an exact or\n\
- 1-mismatch alignment, it will not try to find a terminal alignment.\n\
- To turn off the computation of terminal alignments, set this to a\n\
- high value, greater than the value for --max-mismatches. However,\n\
- note hat terminal alignments are needed to help the GMAP algorithm\n\
- find some alignments. Therefore, to avoid getting terminal alignments\n\
- in the output, you should generally set --terminal-output-minlength\n\
- instead of this parameter.\n\
-",terminal_threshold);
- fprintf(stdout,"\
- --reject-trimlength=INT\n\
- Do not print alignments where amount trimmed on both ends totals more than\n\
- this amount (default %d). Note that ambiguous splicing does not count\n\
- as a trim.\n\
-",reject_trimlength);
- fprintf(stdout,"\
-i, --indel-penalty=INT Penalty for an indel (default %d).\n\
Counts against mismatches allowed. To find indels, make\n\
indel-penalty less than or equal to max-mismatches.\n\
@@ -4329,6 +4332,10 @@ is still designed to be fast.\n\
fprintf(stdout,"Options for SAM output\n");
fprintf(stdout,"\
--no-sam-headers Do not print headers beginning with '@'\n\
+ --add-paired-nomappers Add nomapper lines as needed to make all paired-end results alternate\n\
+ between first end and second end\n\
+ --paired-flag-means-concordant=INT Whether the paired bit in the SAM flags means concordant only (1)\n\
+ or paired plus concordant (0, default)\n\
--sam-headers-batch=INT Print headers only for this batch, as specified by -q\n\
--sam-use-0M Insert 0M in CIGAR between adjacent insertions and deletions\n\
Required by Picard, but can cause errors in other tools\n\
diff --git a/src/inbuffer.c b/src/inbuffer.c
index 92f2390..e3c2f85 100644
--- a/src/inbuffer.c
+++ b/src/inbuffer.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: inbuffer.c 160102 2015-03-03 21:04:01Z twu $";
+static char rcsid[] = "$Id: inbuffer.c 175728 2015-09-30 15:08:16Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -343,7 +343,7 @@ fill_buffer_master (T this) {
int nextchar_end;
bool donep;
#if 0
- int nchars1, nchars2; /* Doesn't need to be saved as a field in Inbuffer_T. */
+ int nchars1 = 0, nchars2 = 0; /* Doesn't need to be saved as a field in Inbuffer_T. */
#endif
/* Need to receive nextchar_end because of the difference between
@@ -519,7 +519,7 @@ fill_buffer_slave (T this) {
int offset_start_1, offset_end_1, offset_start_2, offset_end_2;
int nextchar_end;
#if 0
- int nchars1, nchars2; /* Doesn't need to be saved as a field in Inbuffer_T. */
+ int nchars1 = 0, nchars2 = 0; /* Doesn't need to be saved as a field in Inbuffer_T. */
#endif
/* Need to receive nextchar_end because of the difference between
@@ -710,7 +710,7 @@ fill_buffer (T this) {
unsigned int nread = 0;
Shortread_T queryseq1, queryseq2;
bool skipp;
- int nchars1, nchars2; /* Returned only because MPI master needs it. Doesn't need to be saved as a field in Inbuffer_T. */
+ int nchars1 = 0, nchars2 = 0; /* Returned only because MPI master needs it. Doesn't need to be saved as a field in Inbuffer_T. */
while (nread < this->nspaces &&
(queryseq1 = Shortread_read(&this->nextchar,&nchars1,&nchars2,&queryseq2,
diff --git a/src/oligoindex_hr.c b/src/oligoindex_hr.c
index c541ce9..3c0b489 100644
--- a/src/oligoindex_hr.c
+++ b/src/oligoindex_hr.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: oligoindex_hr.c 167575 2015-06-15 17:26:24Z twu $";
+static char rcsid[] = "$Id: oligoindex_hr.c 175547 2015-09-28 21:31:01Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -8851,6 +8851,7 @@ count_9mers_fwd_partial (Count_T *counts, Genomecomp_T high_rev, Genomecomp_T lo
while (pos >= startdiscard && pos >= 16) {
masked = low_rev >> (46 - 2*pos);
masked &= MASK9;
+ counts[masked] += 1;
debug(printf("%d %04X => %d\n",pos,masked,counts[masked]));
pos--;
}
@@ -16153,7 +16154,11 @@ count_positions_fwd_std (Count_T *counts, int indexsize, Univcoord_T left, Univc
debug(printf("Starting count_positions_fwd_std\n"));
- left_plus_length -= indexsize;
+ if (left_plus_length < indexsize) {
+ left_plus_length = 0;
+ } else {
+ left_plus_length -= indexsize;
+ }
startptr = left/32U*3;
ptr = endptr = left_plus_length/32U*3;
@@ -16496,7 +16501,11 @@ count_positions_fwd_simd (Count_T *counts, int indexsize,
debug(printf("Starting count_positions_fwd_simd\n"));
- left_plus_length -= indexsize;
+ if (left_plus_length < indexsize) {
+ left_plus_length = 0;
+ } else {
+ left_plus_length -= indexsize;
+ }
startptr = left/32U*3;
ptr = endptr = left_plus_length/32U*3;
@@ -17274,7 +17283,11 @@ store_positions_fwd_std (Chrpos_T **pointers, Chrpos_T **positions, Count_T *cou
low, high, nextlow;
- left_plus_length -= indexsize;
+ if (left_plus_length < indexsize) {
+ left_plus_length = 0;
+ } else {
+ left_plus_length -= indexsize;
+ }
chrpos += (left_plus_length - left); /* We are starting from the right */
startptr = left/32U*3;
@@ -17616,7 +17629,11 @@ store_positions_fwd_simd (Chrpos_T **pointers, Chrpos_T **positions, Count_T *co
debug(printf("Starting store_positions_fwd_simd\n"));
- left_plus_length -= indexsize;
+ if (left_plus_length < indexsize) {
+ left_plus_length = 0;
+ } else {
+ left_plus_length -= indexsize;
+ }
chrpos += (left_plus_length - left); /* We are starting from the right */
startptr = left/32U*3;
@@ -24883,7 +24900,11 @@ count_positions_rev_std (Count_T *counts, int indexsize, Univcoord_T left, Univc
debug(printf("Starting count_positions_rev_std\n"));
- left_plus_length -= indexsize;
+ if (left_plus_length < indexsize) {
+ left_plus_length = 0;
+ } else {
+ left_plus_length -= indexsize;
+ }
ptr = startptr = left/32U*3;
endptr = left_plus_length/32U*3;
@@ -25190,7 +25211,11 @@ count_positions_rev_simd (Count_T *counts, int indexsize,
debug(printf("Starting count_positions_rev_simd\n"));
- left_plus_length -= indexsize;
+ if (left_plus_length < indexsize) {
+ left_plus_length = 0;
+ } else {
+ left_plus_length -= indexsize;
+ }
ptr = startptr = left/32U*3;
endptr = left_plus_length/32U*3;
@@ -25789,7 +25814,11 @@ store_positions_rev_std (Chrpos_T **pointers, Chrpos_T **positions, Count_T *cou
low, high, nextlow;
- left_plus_length -= indexsize;
+ if (left_plus_length < indexsize) {
+ left_plus_length = 0;
+ } else {
+ left_plus_length -= indexsize;
+ }
chrpos += (left_plus_length - left); /* We are starting from the right */
ptr = startptr = left/32U*3;
@@ -26096,7 +26125,11 @@ store_positions_rev_simd (Chrpos_T **pointers, Chrpos_T **positions, Count_T *co
#endif
- left_plus_length -= indexsize;
+ if (left_plus_length < indexsize) {
+ left_plus_length = 0;
+ } else {
+ left_plus_length -= indexsize;
+ }
chrpos += (left_plus_length - left); /* We are starting from the right */
ptr = startptr = left/32U*3;
@@ -27244,6 +27277,8 @@ Oligoindex_set_inquery (int *badoligos, int *repoligos, int *trimoligos, int *tr
if (querylength <= indexsize) {
*badoligos = 0;
+ *repoligos = 0;
+ *trimoligos = 0;
*trim_start = 0;
*trim_end = querylength;
return 1.0;
@@ -27292,6 +27327,8 @@ Oligoindex_set_inquery (int *badoligos, int *repoligos, int *trimoligos, int *tr
if (trimp == false) {
*badoligos = (querylength + 1 - indexsize) - noligos;
+ *repoligos = 0;
+ *trimoligos = 0;
*trim_start = 0;
*trim_end = querylength;
return 1.0;
diff --git a/src/outbuffer.c b/src/outbuffer.c
index 5583a71..a02fc2e 100644
--- a/src/outbuffer.c
+++ b/src/outbuffer.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: outbuffer.c 162092 2015-03-26 18:30:31Z twu $";
+static char rcsid[] = "$Id: outbuffer.c 173039 2015-08-31 19:12:10Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -370,7 +370,7 @@ Outbuffer_setup (int argc_in, char **argv_in, int optind_in,
void
Outbuffer_cleanup () {
- FREE(outputs);
+ FREE_KEEP(outputs); /* Matches CALLOC_KEEP in Outbuffer_setup */
return;
}
diff --git a/src/pair.c b/src/pair.c
index 6cc0bf1..60b6e26 100644
--- a/src/pair.c
+++ b/src/pair.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: pair.c 166981 2015-06-06 15:53:43Z twu $";
+static char rcsid[] = "$Id: pair.c 174482 2015-09-22 00:58:39Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -120,6 +120,15 @@ static char rcsid[] = "$Id: pair.c 166981 2015-06-06 15:53:43Z twu $";
#define debug10(x)
#endif
+/* maxnegscore */
+#ifdef DEBUG11
+#define debug11(x) x
+#else
+#define debug11(x)
+#endif
+
+
+
#define TRIM_MATCH_SCORE 1
@@ -7618,44 +7627,51 @@ Pair_matchscores (int *matchscores, struct T *ptr, int npairs, int querylength)
}
-#if 0
-/* Called only by chop_ends_by_changepoint in stage3.c, which is not used anymore */
-int *
-Pair_matchscores_list (int *nmatches, int *ntotal, int *length, List_T pairs) {
- int *matchscores;
+int
+Pair_maxnegscore (List_T pairs) {
+ int maxnegscore = 0, prevhigh = 0, score = 0;
T this;
- List_T p;
- int i = 0;
-
- matchscores = (int *) CALLOC(List_length(pairs),sizeof(int));
- *nmatches = *ntotal = *length = 0;
+ List_T p = pairs;
- for (p = pairs; p != NULL; p = p->rest) {
+ while (p != NULL) {
this = p->first;
+ debug11(Pair_dump_one(this,/*zerobasedp*/true));
+
if (this->gapp) {
- matchscores[i++] = 0; /* Count as mismatch; make evidence support the gap */
- (*ntotal) += 1;
+ /* Skip */
+ p = p->rest;
+
} else if (this->comp == MISMATCH_COMP) {
- matchscores[i++] = 0; /* For mismatch */
- (*ntotal) += 1;
-#ifndef PMAP
- } else if (this->comp == AMBIGUOUS_COMP) {
- matchscores[i++] = 0; /* For cases involving 'N' */
- (*ntotal) += 1;
-#endif
+ score += MISMATCH;
+ if (score - prevhigh < maxnegscore) {
+ maxnegscore = score - prevhigh;
+ }
+ p = p->rest;
+
} else if (this->comp == INDEL_COMP) {
- matchscores[i++] = -1; /* Ignore indels */
+ score += QOPEN + QINDEL;
+ p = p->rest;
+ while (p != NULL && ((T) p->first)->comp == INDEL_COMP) {
+ score += QINDEL;
+ p = p->rest;
+ }
+ if (score - prevhigh < maxnegscore) {
+ maxnegscore = score - prevhigh;
+ }
+
} else {
- matchscores[i++] = 1; /* For match */
- (*nmatches) += 1;
- (*ntotal) += 1;
+ score += MATCH;
+ if (score > prevhigh) {
+ prevhigh = score;
+ }
+ p = p->rest;
}
- (*length) += 1;
+
+ debug11(printf(" score %d, prevhigh %d, maxnegscore %d\n",score,prevhigh,maxnegscore));
}
- return matchscores;
+ return maxnegscore;
}
-#endif
void
@@ -7683,7 +7699,7 @@ Pair_pathscores (bool *gapp, int *pathscores, struct T *ptr, int npairs,
for (querypos = 0; querypos < querystart; querypos++) {
gapp[querypos] = true;
}
- for (querypos = queryend + pre_extension_slop; querypos < querylength; querypos++) {
+ for (querypos = queryend + 1 + pre_extension_slop; querypos < querylength; querypos++) {
gapp[querypos] = true;
}
} else {
@@ -7691,7 +7707,7 @@ Pair_pathscores (bool *gapp, int *pathscores, struct T *ptr, int npairs,
for (querypos = 0; querypos < querystart - pre_extension_slop; querypos++) {
gapp[querypos] = true;
}
- for (querypos = queryend; querypos < querylength; querypos++) {
+ for (querypos = queryend + 1; querypos < querylength; querypos++) {
gapp[querypos] = true;
}
}
diff --git a/src/pair.h b/src/pair.h
index 5513bd7..2bd8bf7 100644
--- a/src/pair.h
+++ b/src/pair.h
@@ -1,4 +1,4 @@
-/* $Id: pair.h 166783 2015-06-02 17:58:02Z twu $ */
+/* $Id: pair.h 174482 2015-09-22 00:58:39Z twu $ */
#ifndef PAIR_INCLUDED
#define PAIR_INCLUDED
@@ -307,6 +307,9 @@ Pair_fracidentity_bounded (int *matches, int *unknowns, int *mismatches,
int cdna_direction, int minpos, int maxpos);
extern void
Pair_matchscores (int *matchscores, struct T *ptr, int npairs, int querylength);
+extern int
+Pair_maxnegscore (List_T pairs);
+
extern void
Pair_pathscores (bool *gapp, int *pathscores, struct T *ptr, int npairs,
diff --git a/src/samprint.c b/src/samprint.c
index c78372e..19a2ca6 100644
--- a/src/samprint.c
+++ b/src/samprint.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: samprint.c 166973 2015-06-05 20:27:15Z twu $";
+static char rcsid[] = "$Id: samprint.c 172734 2015-08-27 16:35:15Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -60,6 +60,8 @@ static char rcsid[] = "$Id: samprint.c 166973 2015-06-05 20:27:15Z twu $";
#endif
+static bool add_paired_nomappers_p;
+static bool paired_flag_means_concordant_p;
static bool quiet_if_excessive_p;
static int maxpaths_report;
static char *failedinput_root;
@@ -78,11 +80,14 @@ static Univ_IIT_T chromosome_iit;
static Genome_T genome;
void
-SAM_setup (bool quiet_if_excessive_p_in, int maxpaths_report_in,
+SAM_setup (bool add_paired_nomappers_p_in, bool paired_flag_means_concordant_p_in,
+ bool quiet_if_excessive_p_in, int maxpaths_report_in,
char *failedinput_root_in, bool fastq_format_p_in, bool hide_soft_clips_p_in,
bool clip_overlap_p_in, bool merge_overlap_p_in, bool sam_multiple_primaries_p_in,
bool force_xs_direction_p_in, bool md_lowercase_variant_p_in, IIT_T snps_iit_in,
Univ_IIT_T chromosome_iit_in, Genome_T genome_in) {
+ add_paired_nomappers_p = add_paired_nomappers_p_in;
+ paired_flag_means_concordant_p = paired_flag_means_concordant_p_in;
quiet_if_excessive_p = quiet_if_excessive_p_in;
failedinput_root = failedinput_root_in;
fastq_format_p = fastq_format_p_in;
@@ -164,8 +169,12 @@ SAM_compute_flag (bool plusp, Stage3end_T mate, Resulttype_T resulttype,
} else if (resulttype == PAIRED_UNIQ || resulttype == PAIRED_MULT) {
/* Note: We are counting PAIRED_UNIQ and PAIRED_MULT as "paired" mappings.
However, we are no longer counting UNPAIRED_UNIQ as a "paired" mapping. */
- debug(printf("PAIRED_MAPPING %d\n",PAIRED_MAPPING));
- flag |= PAIRED_MAPPING;
+ if (paired_flag_means_concordant_p == true) {
+ /* Don't turn on paired flag */
+ } else {
+ debug(printf("PAIRED_MAPPING %d\n",PAIRED_MAPPING));
+ flag |= PAIRED_MAPPING;
+ }
if (0 && Stage3end_chrnum(mate) == 0) {
/* Splice without a direction. But want the effective plusp anyway. */
@@ -4141,6 +4150,18 @@ SAM_print_paired (Filestring_T fp, Filestring_T fp_failedinput_1, Filestring_T f
resulttype,/*first_read_p*/true,/*npaths_mate*/1,quality_shift,sam_read_group_id,
invert_first_p,invert_second_p,merge_samechr_p);
+ if (add_paired_nomappers_p == true) {
+ /* matching nomapper for second end */
+ SAM_print_nomapping(fp,abbrev,queryseq2,/*mate*/hit5,acc1,acc2,chromosome_iit,
+ resulttype,/*first_read_p*/false,/*npaths*/1,/*npaths_mate*/1,/*mate_chrpos*/chrpos5,
+ quality_shift,sam_read_group_id,invert_first_p,invert_second_p);
+
+ /* matching nomapper for first end */
+ SAM_print_nomapping(fp,abbrev,queryseq1,/*mate*/hit3,acc1,acc2,chromosome_iit,
+ resulttype,/*first_read_p*/true,/*npaths*/1,/*npaths_mate*/1,/*mate_chrpos*/chrpos3,
+ quality_shift,sam_read_group_id,invert_first_p,invert_second_p);
+ }
+
/* print second end */
/* Stage3end_eval_and_sort(stage3array2,npaths2,maxpaths_report,queryseq2); */
SAM_print(fp,fp_failedinput_2,abbrev,hit3,/*mate*/hit5,acc1,acc2,/*pathnum*/1,/*npaths*/1,
@@ -4216,12 +4237,24 @@ SAM_print_paired (Filestring_T fp, Filestring_T fp_failedinput_1, Filestring_T f
resulttype,/*first_read_p*/true,/*npaths_mate*/npaths2,quality_shift,sam_read_group_id,
invert_first_p,invert_second_p,merge_samechr_p);
+ if (add_paired_nomappers_p == true) {
+ /* matching nomapper for second end */
+ SAM_print_nomapping(fp,abbrev,queryseq2,/*mate*/stage3,acc1,acc2,chromosome_iit,
+ resulttype,/*first_read_p*/false,/*npaths*/1,/*npaths_mate*/npaths2,
+ /*mate_chrpos*/chrpos5,
+ quality_shift,sam_read_group_id,invert_first_p,invert_second_p);
+ }
+
} else if (quiet_if_excessive_p && npaths1 > maxpaths_report) {
- /* Just printing one end as nomapping */
- SAM_print_nomapping(fp,abbrev,queryseq1,mate,acc1,acc2,chromosome_iit,
- resulttype,/*first_read_p*/true,npaths1,/*npaths_mate*/npaths2,
- /*mate_chrpos*/chrpos3,
- quality_shift,sam_read_group_id,invert_first_p,invert_second_p);
+ if (add_paired_nomappers_p == true) {
+ /* Handle nomappers with each mapped mate */
+ } else {
+ /* Just printing one end as nomapping */
+ SAM_print_nomapping(fp,abbrev,queryseq1,mate,acc1,acc2,chromosome_iit,
+ resulttype,/*first_read_p*/true,npaths1,/*npaths_mate*/npaths2,
+ /*mate_chrpos*/chrpos3,
+ quality_shift,sam_read_group_id,invert_first_p,invert_second_p);
+ }
} else {
for (pathnum = 1; pathnum <= npaths1 && pathnum <= maxpaths_report; pathnum++) {
@@ -4230,7 +4263,7 @@ SAM_print_paired (Filestring_T fp, Filestring_T fp_failedinput_1, Filestring_T f
chrpos5 = SAM_compute_chrpos(/*hardclip_low*/0,/*hardclip_high*/hardclip5_high,stage3,Shortread_fulllength(queryseq1),
/*first_read_p*/true);
- SAM_print(fp,fp_failedinput_2,abbrev,stage3,mate,acc1,acc2,pathnum,npaths1,
+ SAM_print(fp,fp_failedinput_1,abbrev,stage3,mate,acc1,acc2,pathnum,npaths1,
Stage3end_absmq_score(stage3),first_absmq1,second_absmq1,
Stage3end_mapq_score(stage3),chromosome_iit,
/*queryseq*/queryseq1,/*queryseq_mate*/queryseq2,
@@ -4238,6 +4271,14 @@ SAM_print_paired (Filestring_T fp, Filestring_T fp_failedinput_1, Filestring_T f
/*clipdir*/0,/*hardclip5_low*/0,/*hardclip5_high*/0,/*hardclip3_low*/0,/*hardclip3_high*/0,
resulttype,/*first_read_p*/true,/*npaths_mate*/npaths2,quality_shift,sam_read_group_id,
invert_first_p,invert_second_p,merge_samechr_p);
+
+ if (add_paired_nomappers_p == true) {
+ /* matching nomapper for second end */
+ SAM_print_nomapping(fp,abbrev,queryseq2,/*mate*/stage3,acc1,acc2,chromosome_iit,
+ resulttype,/*first_read_p*/false,/*npaths*/npaths2,/*npaths_mate*/npaths1,
+ /*mate_chrpos*/chrpos5,
+ quality_shift,sam_read_group_id,invert_first_p,invert_second_p);
+ }
}
}
@@ -4261,7 +4302,15 @@ SAM_print_paired (Filestring_T fp, Filestring_T fp_failedinput_1, Filestring_T f
chrpos3 = SAM_compute_chrpos(/*hardclip_low*/0,/*hardclip_high*/0,stage3,Shortread_fulllength(queryseq2),
/*first_read_p*/false);
- SAM_print(fp,fp_failedinput_1,abbrev,stage3,mate,acc1,acc2,/*pathnum*/1,npaths2,
+ if (add_paired_nomappers_p == true) {
+ /* matching nomapper for first end */
+ SAM_print_nomapping(fp,abbrev,queryseq1,/*mate*/stage3,acc1,acc2,chromosome_iit,
+ resulttype,/*first_read_p*/true,/*npaths*/npaths1,/*npaths_mate*/npaths2,
+ /*mate_chrpos*/chrpos3,
+ quality_shift,sam_read_group_id,invert_first_p,invert_second_p);
+ }
+
+ SAM_print(fp,fp_failedinput_2,abbrev,stage3,mate,acc1,acc2,/*pathnum*/1,npaths2,
Stage3end_absmq_score(stage3),first_absmq2,second_absmq2,
Stage3end_mapq_score(stage3),chromosome_iit,
/*queryseq*/queryseq2,/*queryseq_mate*/queryseq1,
@@ -4271,11 +4320,15 @@ SAM_print_paired (Filestring_T fp, Filestring_T fp_failedinput_1, Filestring_T f
invert_second_p,invert_first_p,merge_samechr_p);
} else if (quiet_if_excessive_p && npaths2 > maxpaths_report) {
- /* Just printing one end as nomapping */
- SAM_print_nomapping(fp,abbrev,queryseq2,mate,acc1,acc2,chromosome_iit,
- resulttype,/*first_read_p*/false,npaths2,/*npaths_mate*/npaths1,
- /*mate_chrpos*/chrpos5,
- quality_shift,sam_read_group_id,invert_second_p,invert_first_p);
+ if (add_paired_nomappers_p == true) {
+ /* Handle nomappers with each mapped mate */
+ } else {
+ /* Just printing one end as nomapping */
+ SAM_print_nomapping(fp,abbrev,queryseq2,mate,acc1,acc2,chromosome_iit,
+ resulttype,/*first_read_p*/false,npaths2,/*npaths_mate*/npaths1,
+ /*mate_chrpos*/chrpos5,
+ quality_shift,sam_read_group_id,invert_second_p,invert_first_p);
+ }
} else {
for (pathnum = 1; pathnum <= npaths2 && pathnum <= maxpaths_report; pathnum++) {
@@ -4284,6 +4337,13 @@ SAM_print_paired (Filestring_T fp, Filestring_T fp_failedinput_1, Filestring_T f
chrpos3 = SAM_compute_chrpos(/*hardclip_low*/0,/*hardclip_high*/0,stage3,Shortread_fulllength(queryseq2),
/*first_read_p*/false);
+ if (add_paired_nomappers_p == true) {
+ /* matching nomapper for first end */
+ SAM_print_nomapping(fp,abbrev,queryseq1,/*mate*/stage3,acc1,acc2,chromosome_iit,
+ resulttype,/*first_read_p*/true,/*npaths*/npaths1,/*npaths_mate*/npaths2,
+ /*mate_chrpos*/chrpos3,
+ quality_shift,sam_read_group_id,invert_first_p,invert_second_p);
+ }
SAM_print(fp,fp_failedinput_2,abbrev,stage3,mate,acc1,acc2,pathnum,npaths2,
Stage3end_absmq_score(stage3),first_absmq2,second_absmq2,
Stage3end_mapq_score(stage3),chromosome_iit,
@@ -4368,10 +4428,14 @@ SAM_print_paired (Filestring_T fp, Filestring_T fp_failedinput_1, Filestring_T f
if (npaths1 == 0) {
/* just printing one end as nomapping */
/* mate should be non-NULL here */
- SAM_print_nomapping(fp,abbrev,queryseq1,mate,acc1,acc2,chromosome_iit,resulttype,
- /*first_read_p*/true,npaths1,/*npaths_mate*/npaths2,
- /*mate_chrpos*/chrpos3,
- quality_shift,sam_read_group_id,invert_first_p,invert_second_p);
+ if (add_paired_nomappers_p == true) {
+ /* Handle nomappers with each mapped mate */
+ } else {
+ SAM_print_nomapping(fp,abbrev,queryseq1,mate,acc1,acc2,chromosome_iit,resulttype,
+ /*first_read_p*/true,npaths1,/*npaths_mate*/npaths2,
+ /*mate_chrpos*/chrpos3,
+ quality_shift,sam_read_group_id,invert_first_p,invert_second_p);
+ }
} else if (npaths1 == 1) {
/* mate should be NULL here */
@@ -4389,14 +4453,24 @@ SAM_print_paired (Filestring_T fp, Filestring_T fp_failedinput_1, Filestring_T f
/*clipdir*/0,/*hardclip5_low*/0,/*hardclip5_high*/0,/*hardclip3_low*/0,/*hardclip3_high*/0,
resulttype,/*first_read_p*/true,/*npaths_mate*/npaths2,quality_shift,sam_read_group_id,
invert_first_p,invert_second_p,merge_samechr_p);
+ if (add_paired_nomappers_p == true) {
+ /* matching nomapper for second end */
+ SAM_print_nomapping(fp,abbrev,queryseq2,/*mate*/stage3,acc1,acc2,chromosome_iit,
+ resulttype,/*first_read_p*/false,/*npaths*/npaths2,/*npaths_mate*/npaths1,/*mate_chrpos*/chrpos5,
+ quality_shift,sam_read_group_id,invert_first_p,invert_second_p);
+ }
} else if (quiet_if_excessive_p && npaths1 > maxpaths_report) {
/* Just printing one end as nomapping */
/* mate should be NULL here */
- SAM_print_nomapping(fp,abbrev,queryseq1,mate,acc1,acc2,chromosome_iit,resulttype,
- /*first_read_p*/true,npaths1,/*npaths_mate*/npaths2,
- /*mate_chrpos*/chrpos3,
- quality_shift,sam_read_group_id,invert_first_p,invert_second_p);
+ if (add_paired_nomappers_p == true) {
+ /* Handle nomappers with each mapped mate */
+ } else {
+ SAM_print_nomapping(fp,abbrev,queryseq1,mate,acc1,acc2,chromosome_iit,resulttype,
+ /*first_read_p*/true,npaths1,/*npaths_mate*/npaths2,
+ /*mate_chrpos*/chrpos3,
+ quality_shift,sam_read_group_id,invert_first_p,invert_second_p);
+ }
} else {
/* mate should be NULL here */
@@ -4414,6 +4488,12 @@ SAM_print_paired (Filestring_T fp, Filestring_T fp_failedinput_1, Filestring_T f
/*clipdir*/0,/*hardclip5_low*/0,/*hardclip5_high*/0,/*hardclip3_low*/0,/*hardclip3_high*/0,
resulttype,/*first_read_p*/true,/*npaths_mate*/npaths2,quality_shift,sam_read_group_id,
invert_first_p,invert_second_p,merge_samechr_p);
+ if (add_paired_nomappers_p == true) {
+ /* matching nomapper for second end */
+ SAM_print_nomapping(fp,abbrev,queryseq2,/*mate*/stage3,acc1,acc2,chromosome_iit,
+ resulttype,/*first_read_p*/false,/*npaths*/npaths2,/*npaths_mate*/npaths1,/*mate_chrpos*/chrpos5,
+ quality_shift,sam_read_group_id,invert_first_p,invert_second_p);
+ }
}
}
@@ -4434,10 +4514,14 @@ SAM_print_paired (Filestring_T fp, Filestring_T fp_failedinput_1, Filestring_T f
if (npaths2 == 0) {
/* Just printing one end as nomapping */
/* mate should be non-NULL here */
- SAM_print_nomapping(fp,abbrev,queryseq2,mate,acc1,acc2,chromosome_iit,resulttype,
- /*first_read_p*/false,npaths2,/*npaths_mate*/npaths1,
- /*mate_chrpos*/chrpos5,
- quality_shift,sam_read_group_id,invert_second_p,invert_first_p);
+ if (add_paired_nomappers_p == true) {
+ /* Handle nomappers with each mapped mate */
+ } else {
+ SAM_print_nomapping(fp,abbrev,queryseq2,mate,acc1,acc2,chromosome_iit,resulttype,
+ /*first_read_p*/false,npaths2,/*npaths_mate*/npaths1,
+ /*mate_chrpos*/chrpos5,
+ quality_shift,sam_read_group_id,invert_second_p,invert_first_p);
+ }
} else if (npaths2 == 1) {
/* mate should be NULL here */
@@ -4447,6 +4531,12 @@ SAM_print_paired (Filestring_T fp, Filestring_T fp_failedinput_1, Filestring_T f
chrpos3 = SAM_compute_chrpos(/*hardclip_low*/0,/*hardclip_high*/0,stage3,Shortread_fulllength(queryseq2),
/*first_read_p*/false);
+ if (add_paired_nomappers_p == true) {
+ /* matching nomapper for first end */
+ SAM_print_nomapping(fp,abbrev,queryseq2,/*mate*/stage3,acc1,acc2,chromosome_iit,
+ resulttype,/*first_read_p*/true,/*npaths*/npaths1,/*npaths_mate*/npaths2,/*mate_chrpos*/chrpos3,
+ quality_shift,sam_read_group_id,invert_first_p,invert_second_p);
+ }
SAM_print(fp,fp_failedinput_2,abbrev,stage3,mate,acc1,acc2,/*pathnum*/1,npaths2,
Stage3end_absmq_score(stage3),first_absmq2,/*second_absmq2*/0,
Stage3end_mapq_score(stage3),chromosome_iit,
@@ -4459,10 +4549,14 @@ SAM_print_paired (Filestring_T fp, Filestring_T fp_failedinput_1, Filestring_T f
} else if (quiet_if_excessive_p && npaths2 > maxpaths_report) {
/* Just printing one end as nomapping */
/* mate should be NULL here */
- SAM_print_nomapping(fp,abbrev,queryseq2,mate,acc1,acc2,chromosome_iit,resulttype,
- /*first_read_p*/false,npaths2,/*npaths_mate*/npaths1,
- /*mate_chrpos*/chrpos5,
- quality_shift,sam_read_group_id,invert_second_p,invert_first_p);
+ if (add_paired_nomappers_p == true) {
+ /* Handle nomappers with each mapped mate */
+ } else {
+ SAM_print_nomapping(fp,abbrev,queryseq2,mate,acc1,acc2,chromosome_iit,resulttype,
+ /*first_read_p*/false,npaths2,/*npaths_mate*/npaths1,
+ /*mate_chrpos*/chrpos5,
+ quality_shift,sam_read_group_id,invert_second_p,invert_first_p);
+ }
} else {
/* mate should be NULL here */
@@ -4472,6 +4566,12 @@ SAM_print_paired (Filestring_T fp, Filestring_T fp_failedinput_1, Filestring_T f
chrpos3 = SAM_compute_chrpos(/*hardclip_low*/0,/*hardclip_high*/0,stage3,Shortread_fulllength(queryseq2),
/*first_read_p*/false);
+ if (add_paired_nomappers_p == true) {
+ /* matching nomapper for first end */
+ SAM_print_nomapping(fp,abbrev,queryseq2,/*mate*/stage3,acc1,acc2,chromosome_iit,
+ resulttype,/*first_read_p*/true,/*npaths*/npaths1,/*npaths_mate*/npaths2,/*mate_chrpos*/chrpos3,
+ quality_shift,sam_read_group_id,invert_first_p,invert_second_p);
+ }
SAM_print(fp,fp_failedinput_2,abbrev,stage3,mate,acc1,acc2,pathnum,npaths2,
Stage3end_absmq_score(stage3),first_absmq2,second_absmq2,
Stage3end_mapq_score(stage3),chromosome_iit,
diff --git a/src/samprint.h b/src/samprint.h
index b237dd4..f43b204 100644
--- a/src/samprint.h
+++ b/src/samprint.h
@@ -1,4 +1,4 @@
-/* $Id: samprint.h 166641 2015-05-29 21:13:04Z twu $ */
+/* $Id: samprint.h 172734 2015-08-27 16:35:15Z twu $ */
#ifndef SAMPRINT_INCLUDED
#define SAMPRINT_INCLUDED
@@ -22,7 +22,8 @@
#ifdef GSNAP
extern void
-SAM_setup (bool quiet_if_excessive_p_in, int maxpaths_report_in,
+SAM_setup (bool add_paired_nomappers_p_in, bool paired_flag_means_concordant_p_in,
+ bool quiet_if_excessive_p_in, int maxpaths_report_in,
char *failedinput_root_in, bool fastq_format_p_in, bool hide_soft_clips_p_in,
bool clip_overlap_p_in, bool merge_overlap_p_in, bool sam_multiple_primaries_p_in,
bool force_xs_direction_p_in, bool md_lowercase_variant_p_in, IIT_T snps_iit_in,
diff --git a/src/sarray-read.c b/src/sarray-read.c
index 46c0cdc..e0f9b6e 100644
--- a/src/sarray-read.c
+++ b/src/sarray-read.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: sarray-read.c 170516 2015-07-23 23:15:12Z twu $";
+static char rcsid[] = "$Id: sarray-read.c 172738 2015-08-27 16:37:59Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -3928,20 +3928,20 @@ solve_twopart (int *found_score, List_T *subs, List_T *indels, List_T *ambiguous
spliceends_antisense =
Splice_solve_single_antisense(&(*found_score),&nspliceends_antisense,spliceends_antisense,&lowprob,
- &segmenti_usedp,&segmentj_usedp,
- /*segmenti_left*/left1,/*segmentj_left*/left2,
- chrnum,chroffset,chrhigh,chrlength,
- chrnum,chroffset,chrhigh,chrlength,
- querylength,query_compress,
- segmenti_donor_knownpos,segmentj_acceptor_knownpos,
- segmentj_antidonor_knownpos,segmenti_antiacceptor_knownpos,
- segmenti_donor_knowni,segmentj_acceptor_knowni,
- segmentj_antidonor_knowni,segmenti_antiacceptor_knowni,
- segmenti_donor_nknown,segmentj_acceptor_nknown,
- segmentj_antidonor_nknown,segmenti_antiacceptor_nknown,
- splicing_penalty,/*max_mismatches_allowed*/1000,
- plusp,genestrand,first_read_p,/*subs_or_indels_p*/false,
- /*sarrayp*/true);
+ &segmenti_usedp,&segmentj_usedp,
+ /*segmenti_left*/left1,/*segmentj_left*/left2,
+ chrnum,chroffset,chrhigh,chrlength,
+ chrnum,chroffset,chrhigh,chrlength,
+ querylength,query_compress,
+ segmenti_donor_knownpos,segmentj_acceptor_knownpos,
+ segmentj_antidonor_knownpos,segmenti_antiacceptor_knownpos,
+ segmenti_donor_knowni,segmentj_acceptor_knowni,
+ segmentj_antidonor_knowni,segmenti_antiacceptor_knowni,
+ segmenti_donor_nknown,segmentj_acceptor_nknown,
+ segmentj_antidonor_nknown,segmenti_antiacceptor_nknown,
+ splicing_penalty,/*max_mismatches_allowed*/1000,
+ plusp,genestrand,first_read_p,/*subs_or_indels_p*/false,
+ /*sarrayp*/true);
} else if (left2 > left1) {
nindels = left2 - left1;
@@ -5887,7 +5887,7 @@ find_best_path (List_T *right_paths, Intlist_T *right_endpoints_sense, Intlist_T
char *queryptr, int querylength, Compress_T query_compress,
Univcoord_T chroffset, Univcoord_T chrhigh,
Oligoindex_array_T oligoindices_minor, Diagpool_T diagpool, bool plusp, int genestrand,
- bool first_read_p) {
+ bool first_read_p, int max_mismatches_allowed) {
List_T middle_path;
List_T p;
@@ -5908,7 +5908,6 @@ find_best_path (List_T *right_paths, Intlist_T *right_endpoints_sense, Intlist_T
int indexsize;
Chrpos_T splice_distance;
- int max_mismatches_allowed;
int splice_pos;
int best_knowni_i, best_knowni_j, best_nmismatches_i, best_nmismatches_j;
double best_prob_i, best_prob_j;
@@ -6069,6 +6068,7 @@ find_best_path (List_T *right_paths, Intlist_T *right_endpoints_sense, Intlist_T
segmentj_antidonor_knownpos[segmentj_antidonor_nknown] = querylength + 100;
splice_distance = left - prev_left;
+#if 0
max_mismatches_allowed = (diagonal->querystart - prev_diagonal->queryend - 1);
debug13(printf("max_mismatches %d = %d - %d - 1\n",max_mismatches_allowed,diagonal->querystart,prev_diagonal->queryend));
if (prev_diagonal->intscore > 0) {
@@ -6077,6 +6077,7 @@ find_best_path (List_T *right_paths, Intlist_T *right_endpoints_sense, Intlist_T
if (diagonal->intscore > 0) {
max_mismatches_allowed += 1;
}
+#endif
if ((splice_pos = Splice_resolve_sense(&best_knowni_i,&best_knowni_j,&best_nmismatches_i,&best_nmismatches_j,
&best_prob_i,&best_prob_j,
@@ -6363,6 +6364,7 @@ find_best_path (List_T *right_paths, Intlist_T *right_endpoints_sense, Intlist_T
segmentj_antidonor_knownpos[segmentj_antidonor_nknown] = querylength + 100;
splice_distance = left - prev_left;
+#if 0
max_mismatches_allowed = (diagonal->querystart - prev_diagonal->queryend - 1);
debug13(printf("max_mismatches %d = %d - %d - 1\n",max_mismatches_allowed,diagonal->querystart,prev_diagonal->queryend));
if (prev_diagonal->intscore > 0) {
@@ -6371,6 +6373,7 @@ find_best_path (List_T *right_paths, Intlist_T *right_endpoints_sense, Intlist_T
if (diagonal->intscore > 0) {
max_mismatches_allowed += 1;
}
+#endif
if ((splice_pos = Splice_resolve_sense(&best_knowni_i,&best_knowni_j,&best_nmismatches_i,&best_nmismatches_j,
&best_prob_i,&best_prob_j,
@@ -7017,7 +7020,7 @@ solve_via_segments (int *found_score, bool *completep, List_T hits, List_T middl
Chrnum_T chrnum, Univcoord_T chroffset, Univcoord_T chrhigh,
Chrpos_T chrlength, int querylength, Compress_T query_compress,
- bool plusp, int genestrand, bool first_read_p) {
+ bool plusp, int genestrand, bool first_read_p, int max_mismatches_allowed) {
List_T super_path, ambig_path;
Stage3end_T hit;
int sensedir, sense_sensedir, antisense_sensedir;
@@ -7028,7 +7031,7 @@ solve_via_segments (int *found_score, bool *completep, List_T hits, List_T middl
Chrpos_T splice_distance;
int querystart_for_merge, querystart, queryend, ignore;
int max_leftward, skip_left;
- int nmismatches, max_mismatches_allowed;
+ int nmismatches;
bool fillin_p;
Junction_T junction;
@@ -7176,6 +7179,7 @@ solve_via_segments (int *found_score, bool *completep, List_T hits, List_T middl
if (left < prev_left) {
/* Insertion */
nindels = prev_left - left;
+#if 0
max_mismatches_allowed = (diagonal->querystart - prev_diagonal->queryend - 1);
debug13(printf("max_mismatches %d = %d - %d - 1\n",max_mismatches_allowed,diagonal->querystart,prev_diagonal->queryend));
if (prev_diagonal->intscore > 0) {
@@ -7184,6 +7188,7 @@ solve_via_segments (int *found_score, bool *completep, List_T hits, List_T middl
if (diagonal->intscore > 0) {
max_mismatches_allowed += 1;
}
+#endif
if ((indel_pos = Indel_resolve_middle_insertion(&best_nmismatches_i,&best_nmismatches_j,
/*left*/prev_left,/*indels*/+nindels,query_compress,
prev_diagonal->querystart,diagonal->queryend,querylength,
@@ -7213,6 +7218,7 @@ solve_via_segments (int *found_score, bool *completep, List_T hits, List_T middl
} else if (left <= prev_left + max_deletionlen) {
/* Deletion */
nindels = left - prev_left;
+#if 0
max_mismatches_allowed = (diagonal->querystart - prev_diagonal->queryend - 1);
debug13(printf("max_mismatches %d = %d - %d - 1\n",max_mismatches_allowed,diagonal->querystart,prev_diagonal->queryend));
if (prev_diagonal->intscore > 0) {
@@ -7221,6 +7227,7 @@ solve_via_segments (int *found_score, bool *completep, List_T hits, List_T middl
if (diagonal->intscore > 0) {
max_mismatches_allowed += 1;
}
+#endif
if ((indel_pos = Indel_resolve_middle_deletion(&best_nmismatches_i,&best_nmismatches_j,
/*left*/prev_left,/*indels*/-nindels,query_compress,
prev_diagonal->querystart,diagonal->queryend,querylength,
@@ -7291,6 +7298,7 @@ solve_via_segments (int *found_score, bool *completep, List_T hits, List_T middl
segmentj_antidonor_knownpos[segmentj_antidonor_nknown] = querylength + 100;
splice_distance = left - prev_left;
+#if 0
max_mismatches_allowed = (diagonal->querystart - prev_diagonal->queryend - 1);
debug13(printf("max_mismatches %d = %d - %d - 1\n",max_mismatches_allowed,diagonal->querystart,prev_diagonal->queryend));
if (prev_diagonal->intscore > 0) {
@@ -7299,6 +7307,7 @@ solve_via_segments (int *found_score, bool *completep, List_T hits, List_T middl
if (diagonal->intscore > 0) {
max_mismatches_allowed += 1;
}
+#endif
if ((splice_pos = Splice_resolve_sense(&best_knowni_i,&best_knowni_j,&best_nmismatches_i,&best_nmismatches_j,
&best_prob_i,&best_prob_j,
@@ -8254,7 +8263,8 @@ Sarray_search_greedy (int *found_score, char *queryuc_ptr, char *queryrc, int qu
&left_amb_probsj_sense,&left_amb_probsj_antisense,
&(fillin_diagonals_plus[i]),diagonal,best_right_diagonals_plus[i],best_left_diagonals_plus[i],
/*queryptr*/queryuc_ptr,querylength,query_compress_fwd,chroffset,chrhigh,
- oligoindices_minor,diagpool,/*plusp*/true,genestrand,first_read_p);
+ oligoindices_minor,diagpool,/*plusp*/true,genestrand,first_read_p,
+ /*nmismatches_allowed*/nmisses_allowed);
hits = solve_via_segments(&(*found_score),&completep,hits,middle_path_plus[i],
right_endpoints_sense,right_endpoints_antisense,
@@ -8276,7 +8286,8 @@ Sarray_search_greedy (int *found_score, char *queryuc_ptr, char *queryrc, int qu
left_amb_probsj_sense,left_amb_probsj_antisense,
chrnum,chroffset,chrhigh,chrlength,
- querylength,query_compress_fwd,/*plusp*/true,genestrand,first_read_p);
+ querylength,query_compress_fwd,/*plusp*/true,genestrand,first_read_p,
+ /*max_mismatches_allowed*/nmisses_allowed);
#if 0
if (0 && completep == false) {
@@ -8336,7 +8347,8 @@ Sarray_search_greedy (int *found_score, char *queryuc_ptr, char *queryrc, int qu
&left_amb_probsj_sense,&left_amb_probsj_antisense,
&(fillin_diagonals_minus[i]),diagonal,best_right_diagonals_minus[i],best_left_diagonals_minus[i],
/*queryptr*/queryrc,querylength,query_compress_rev,chroffset,chrhigh,
- oligoindices_minor,diagpool,/*plusp*/false,genestrand,first_read_p);
+ oligoindices_minor,diagpool,/*plusp*/false,genestrand,first_read_p,
+ /*nmismatches_allowed*/nmisses_allowed);
hits = solve_via_segments(&(*found_score),&completep,hits,middle_path_minus[i],
right_endpoints_sense,right_endpoints_antisense,
@@ -8358,7 +8370,8 @@ Sarray_search_greedy (int *found_score, char *queryuc_ptr, char *queryrc, int qu
left_amb_probsj_sense,left_amb_probsj_antisense,
chrnum,chroffset,chrhigh,chrlength,
- querylength,query_compress_rev,/*plusp*/false,genestrand,first_read_p);
+ querylength,query_compress_rev,/*plusp*/false,genestrand,first_read_p,
+ /*max_mismatches_allowed*/nmisses_allowed);
#if 0
if (0 && completep == false) {
diff --git a/src/splice.c b/src/splice.c
index 5362f29..04f9695 100644
--- a/src/splice.c
+++ b/src/splice.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: splice.c 167583 2015-06-15 18:12:14Z twu $";
+static char rcsid[] = "$Id: splice.c 173900 2015-09-12 00:46:34Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -2557,14 +2557,14 @@ Splice_group_by_segmenti (int *found_score, List_T localsplicing, List_T *ambigu
qsort(array_forward,n_sense_forward,sizeof(Stage3end_T),Stage3end_chimera_segmenti_cmp);
winners = group_by_segmenti_aux(&(*found_score),winners,&(*ambiguous),array_forward,n_sense_forward,
querylength,first_read_p,sarrayp);
- FREEA(array);
+ FREEA(array_forward);
}
if (n_sense_anti > 0) {
qsort(array_anti,n_sense_anti,sizeof(Stage3end_T),Stage3end_chimera_segmenti_cmp);
winners = group_by_segmenti_aux(&(*found_score),winners,&(*ambiguous),array_anti,n_sense_anti,
querylength,first_read_p,sarrayp);
- FREEA(array);
+ FREEA(array_anti);
}
List_free(&localsplicing);
@@ -2892,14 +2892,14 @@ Splice_group_by_segmentj (int *found_score, List_T localsplicing, List_T *ambigu
qsort(array_forward,n_sense_forward,sizeof(Stage3end_T),Stage3end_chimera_segmentj_cmp);
winners = group_by_segmentj_aux(&(*found_score),winners,&(*ambiguous),array_forward,n_sense_forward,
querylength,first_read_p,sarrayp);
- FREEA(array);
+ FREEA(array_forward);
}
if (n_sense_anti > 0) {
qsort(array_anti,n_sense_anti,sizeof(Stage3end_T),Stage3end_chimera_segmentj_cmp);
winners = group_by_segmentj_aux(&(*found_score),winners,&(*ambiguous),array_anti,n_sense_anti,
querylength,first_read_p,sarrayp);
- FREEA(array);
+ FREEA(array_anti);
}
List_free(&localsplicing);
diff --git a/src/stage1.c b/src/stage1.c
index 2ad2cdb..51d75b1 100644
--- a/src/stage1.c
+++ b/src/stage1.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: stage1.c 158350 2015-02-10 18:43:34Z twu $";
+static char rcsid[] = "$Id: stage1.c 173039 2015-08-31 19:12:10Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -2550,8 +2550,9 @@ collapse_diagonals (Univcoord_T **diagonals, int *ndiagonals,
/* Set up batches */
maxsegments = 0;
- batchpool = (struct Batch_T *) MALLOCA((querylength-oligobase+1) * sizeof(struct Batch_T));
- heap = (Batch_T *) MALLOCA((2*querylength+1+1) * sizeof(Batch_T));
+ /* Batch_T can be large, so don't use alloca */
+ batchpool = (struct Batch_T *) MALLOC((querylength-oligobase+1) * sizeof(struct Batch_T));
+ heap = (Batch_T *) MALLOC((2*querylength+1+1) * sizeof(Batch_T));
for (querypos = 0, i = 0; querypos <= querylength - oligobase; querypos++) {
if (ndiagonals[querypos] > 0) {
@@ -2570,8 +2571,8 @@ collapse_diagonals (Univcoord_T **diagonals, int *ndiagonals,
}
if (maxsegments == 0) {
- FREEA(heap);
- FREEA(batchpool);
+ FREE(heap);
+ FREE(batchpool);
return;
}
@@ -2672,8 +2673,8 @@ collapse_diagonals (Univcoord_T **diagonals, int *ndiagonals,
}
/* Terminate loop. */
- FREEA(heap);
- FREEA(batchpool);
+ FREE(heap);
+ FREE(batchpool);
return;
}
@@ -2700,8 +2701,9 @@ find_segments (int *nsegments, Univcoord_T **diagonals, int *ndiagonals,
/* Set up batches */
maxsegments = 0;
- batchpool = (struct Batch_T *) MALLOCA((querylength-oligobase+1) * sizeof(struct Batch_T));
- heap = (Batch_T *) MALLOCA((2*querylength+1+1) * sizeof(Batch_T));
+ /* Batch_T can be large, so don't use alloca */
+ batchpool = (struct Batch_T *) MALLOC((querylength-oligobase+1) * sizeof(struct Batch_T));
+ heap = (Batch_T *) MALLOC((2*querylength+1+1) * sizeof(Batch_T));
for (querypos = 0, i = 0; querypos <= querylength - oligobase; querypos++) {
if (ndiagonals[querypos] > 0) {
@@ -2720,8 +2722,8 @@ find_segments (int *nsegments, Univcoord_T **diagonals, int *ndiagonals,
}
if (maxsegments == 0) {
- FREEA(heap);
- FREEA(batchpool);
+ FREE(heap);
+ FREE(batchpool);
*nsegments = 0;
return (struct Segment_T *) NULL;
} else {
@@ -2895,8 +2897,8 @@ find_segments (int *nsegments, Univcoord_T **diagonals, int *ndiagonals,
ptr++; /* Needed to get correct value for nsegments below */
}
- FREEA(heap);
- FREEA(batchpool);
+ FREE(heap);
+ FREE(batchpool);
*nsegments = ptr - segments;
debug6(for (j = 0; j < *nsegments; j++) {
diff --git a/src/stage1hr.c b/src/stage1hr.c
index 67fb4db..6929c09 100644
--- a/src/stage1hr.c
+++ b/src/stage1hr.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: stage1hr.c 170517 2015-07-23 23:15:28Z twu $";
+static char rcsid[] = "$Id: stage1hr.c 175729 2015-09-30 15:10:51Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -119,7 +119,6 @@ static Chrpos_T shortsplicedist_novelend;
/* Penalties */
static int subopt_levels;
-static int terminal_threshold;
static int reject_trimlength;
static bool novelsplicingp;
@@ -13403,10 +13402,7 @@ run_gmap_for_region (bool *good_start_p, bool *good_end_p, History_T gmap_histor
debug13(printf("Trim at start: %d, trim at end: %d\n",
Stage3end_trim_left(hit),Stage3end_trim_right(hit)));
/* Don't throw away GMAP hits */
- if (0 && terminal_threshold > user_maxlevel &&
- (Stage3end_trim_left_raw(hit) >= GOOD_GMAP_END || Stage3end_trim_right_raw(hit) >= GOOD_GMAP_END)) {
- debug13(printf("terminal_threshold %d > user_maxlevel %d, so freeing this GMAP hit\n",
- terminal_threshold,user_maxlevel));
+ if (0 && (Stage3end_trim_left_raw(hit) >= GOOD_GMAP_END || Stage3end_trim_right_raw(hit) >= GOOD_GMAP_END)) {
stored_hits = List_push(stored_hits,(void *) NULL);
Stage3end_free(&hit);
} else {
@@ -13451,10 +13447,7 @@ run_gmap_for_region (bool *good_start_p, bool *good_end_p, History_T gmap_histor
Stage3end_trim_right(hit),Stage3end_trim_left(hit),
Stage3end_trim_right_raw(hit),Stage3end_trim_left_raw(hit)));
/* Don't throw away GMAP hits */
- if (0 && terminal_threshold > user_maxlevel &&
- (Stage3end_trim_left_raw(hit) >= GOOD_GMAP_END || Stage3end_trim_right_raw(hit) >= GOOD_GMAP_END)) {
- debug13(printf("terminal_threshold %d > user_maxlevel %d, so freeing this GMAP hit\n",
- terminal_threshold,user_maxlevel));
+ if (0 && (Stage3end_trim_left_raw(hit) >= GOOD_GMAP_END || Stage3end_trim_right_raw(hit) >= GOOD_GMAP_END)) {
stored_hits = List_push(stored_hits,(void *) NULL);
Stage3end_free(&hit);
} else {
@@ -14173,7 +14166,7 @@ convert_plus_segments_to_gmap (History_T gmap_history, List_T hits,
struct Pair_T *pairarray;
List_T pairs, stage2pairs, unsorted_pairs;
int querypos, boundpos, seglength;
- Chrpos_T genomepos;
+ Chrpos_T genomepos, min_genomepos, max_genomepos;
char comp, c, g, g_alt;
char *gsequence_orig, *gsequence_alt;
@@ -14565,7 +14558,7 @@ convert_plus_segments_to_gmap (History_T gmap_history, List_T hits,
seglength = (anchor_segment->querypos3 + index1part) - querypos;
left = anchor_segment->diagonal - querylength; /* FORMULA */
- genomepos = (left - chroffset) + querypos;
+ min_genomepos = genomepos = (left - chroffset) + querypos;
Genome_get_segment_blocks_right(gsequence_orig,gsequence_alt,/*left*/chroffset+genomepos,
seglength,chrhigh,/*revcomp*/false);
@@ -14585,7 +14578,7 @@ convert_plus_segments_to_gmap (History_T gmap_history, List_T hits,
querypos++;
genomepos++;
}
-
+ max_genomepos = genomepos - 1;
/* F. Make stage2pairs (left) */
sorted = &(sorted_allocated[startk+1]);
@@ -14597,32 +14590,37 @@ convert_plus_segments_to_gmap (History_T gmap_history, List_T hits,
segment->querypos5,segment->querypos3,segment->usedp,segment->pairablep));
querypos = segment->querypos5;
- seglength = (segment->querypos3 + index1part) - querypos;
+ if (querypos < boundpos) {
+ left = segment->diagonal - querylength; /* FORMULA */
+ genomepos = (left - chroffset) + querypos;
+ if (genomepos < min_genomepos) {
+ seglength = (segment->querypos3 + index1part) - querypos;
+ Genome_get_segment_blocks_left(gsequence_orig,gsequence_alt,/*right*/chroffset+genomepos+seglength,
+ seglength,chroffset,/*revcomp*/false);
+ debug13(printf("At %u, gsequence_orig %s\n",genomepos,gsequence_orig));
- left = segment->diagonal - querylength; /* FORMULA */
- genomepos = (left - chroffset) + querypos;
- Genome_get_segment_blocks_left(gsequence_orig,gsequence_alt,/*left*/chroffset+genomepos,
- seglength,chroffset,/*revcomp*/false);
-
- for (i = 0; i < seglength; i++) {
- if (querypos < boundpos) {
- c = queryuc_ptr[querypos];
- g = gsequence_orig[i];
- g_alt = gsequence_alt[i];
- if (g == c || g_alt == c) {
- comp = MATCH_COMP;
- } else {
- comp = MISMATCH_COMP;
+ i = 0;
+ while (i < seglength && querypos < boundpos && genomepos < min_genomepos) {
+ c = queryuc_ptr[querypos];
+ g = gsequence_orig[i];
+ g_alt = gsequence_alt[i];
+ if (g == c || g_alt == c) {
+ comp = MATCH_COMP;
+ } else {
+ comp = MISMATCH_COMP;
+ }
+ debug13(printf("Pushing %c %c %c at %d,%u\n",c,comp,g,querypos,genomepos));
+ unsorted_pairs = Pairpool_push(unsorted_pairs,pairpool,querypos,genomepos,
+ /*cdna*/c,comp,/*genome*/g,/*genomealt*/g_alt,
+ /*dynprogindex*/0);
+ querypos++;
+ genomepos++;
+ i++;
}
- debug13(printf("Pushing %c %c %c at %d,%u\n",c,comp,g,querypos,genomepos));
- unsorted_pairs = Pairpool_push(unsorted_pairs,pairpool,querypos,genomepos,
- /*cdna*/c,comp,/*genome*/g,/*genomealt*/g_alt,
- /*dynprogindex*/0);
+ boundpos = segment->querypos5;
+ min_genomepos = (left - chroffset) + segment->querypos5;
}
- querypos++;
- genomepos++;
}
- boundpos = segment->querypos5;
}
/* F. Make stage2pairs (right) */
@@ -14641,25 +14639,32 @@ convert_plus_segments_to_gmap (History_T gmap_history, List_T hits,
Genome_get_segment_blocks_right(gsequence_orig,gsequence_alt,/*left*/chroffset+genomepos,
seglength,chrhigh,/*revcomp*/false);
- for (i = 0; i < seglength; i++) {
- if (querypos > boundpos) {
- c = queryuc_ptr[querypos];
- g = gsequence_orig[i];
- g_alt = gsequence_alt[i];
- if (g == c || g_alt == c) {
- comp = MATCH_COMP;
- } else {
- comp = MISMATCH_COMP;
- }
- debug13(printf("Pushing %c %c %c at %d,%u\n",c,comp,g,querypos,genomepos));
- unsorted_pairs = Pairpool_push(unsorted_pairs,pairpool,querypos,genomepos,
- /*cdna*/c,comp,/*genome*/g,/*genomealt*/g_alt,
- /*dynprogindex*/0);
+ i = 0;
+ while (i < seglength && (querypos <= boundpos || genomepos <= max_genomepos)) {
+ querypos++;
+ genomepos++;
+ i++;
+ }
+
+ while (i < seglength) {
+ c = queryuc_ptr[querypos];
+ g = gsequence_orig[i];
+ g_alt = gsequence_alt[i];
+ if (g == c || g_alt == c) {
+ comp = MATCH_COMP;
+ } else {
+ comp = MISMATCH_COMP;
}
+ debug13(printf("Pushing %c %c %c at %d,%u\n",c,comp,g,querypos,genomepos));
+ unsorted_pairs = Pairpool_push(unsorted_pairs,pairpool,querypos,genomepos,
+ /*cdna*/c,comp,/*genome*/g,/*genomealt*/g_alt,
+ /*dynprogindex*/0);
querypos++;
genomepos++;
+ i++;
}
boundpos = segment->querypos3 + index1part;
+ max_genomepos = genomepos - 1;
}
@@ -14802,7 +14807,7 @@ convert_minus_segments_to_gmap (History_T gmap_history, List_T hits,
struct Pair_T *pairarray;
List_T pairs, stage2pairs, unsorted_pairs;
int querypos, boundpos, seglength;
- Chrpos_T genomepos;
+ Chrpos_T genomepos, min_genomepos, max_genomepos;
char comp, c, g, g_alt;
char *gsequence_orig, *gsequence_alt;
@@ -15193,7 +15198,7 @@ convert_minus_segments_to_gmap (History_T gmap_history, List_T hits,
seglength = (anchor_segment->querypos3 + index1part) - querypos;
/* left = anchor_segment->diagonal - querylength; -- FORMULA */
- genomepos = chrhigh - (anchor_segment->diagonal - 1) + querypos;
+ min_genomepos = genomepos = chrhigh - (anchor_segment->diagonal - 1) + querypos;
Genome_get_segment_blocks_right(gsequence_orig,gsequence_alt,/*left*/anchor_segment->diagonal - querypos - seglength,
seglength,chrhigh,/*revcomp*/true);
@@ -15213,6 +15218,7 @@ convert_minus_segments_to_gmap (History_T gmap_history, List_T hits,
querypos++;
genomepos++;
}
+ max_genomepos = genomepos - 1;
/* F. Make stage2pairs (left) */
sorted = &(sorted_allocated[startk+1]);
@@ -15227,28 +15233,35 @@ convert_minus_segments_to_gmap (History_T gmap_history, List_T hits,
/* left = segment->diagonal - querylength; -- FORMULA */
genomepos = chrhigh - (segment->diagonal - 1) + querypos;
- Genome_get_segment_blocks_left(gsequence_orig,gsequence_alt,/*left*/segment->diagonal - querypos - seglength,
- seglength,chroffset,/*revcomp*/true);
-
- for (i = 0; i < seglength; i++) {
- if (querypos > boundpos) {
- c = queryuc_ptr[querypos];
- g = gsequence_orig[i];
- g_alt = gsequence_alt[i];
- if (g == c || g_alt == c) {
- comp = MATCH_COMP;
- } else {
- comp = MISMATCH_COMP;
- }
- debug13(printf("Pushing %c %c %c at %d,%u\n",c,comp,g,querypos,genomepos));
- unsorted_pairs = Pairpool_push(unsorted_pairs,pairpool,querypos,genomepos,
- /*cdna*/c,comp,/*genome*/g,/*genomealt*/g_alt,
- /*dynprogindex*/0);
+ Genome_get_segment_blocks_left(gsequence_orig,gsequence_alt,/*right*/segment->diagonal - querypos /*- seglength*/,
+ seglength,chroffset,/*revcomp*/true);
+
+ i = 0;
+ while (i < seglength && (querypos <= boundpos || genomepos <= max_genomepos)) {
+ querypos++;
+ genomepos++;
+ i++;
+ }
+
+ while (i < seglength) {
+ c = queryuc_ptr[querypos];
+ g = gsequence_orig[i];
+ g_alt = gsequence_alt[i];
+ if (g == c || g_alt == c) {
+ comp = MATCH_COMP;
+ } else {
+ comp = MISMATCH_COMP;
}
+ debug13(printf("Pushing %c %c %c at %d,%u\n",c,comp,g,querypos,genomepos));
+ unsorted_pairs = Pairpool_push(unsorted_pairs,pairpool,querypos,genomepos,
+ /*cdna*/c,comp,/*genome*/g,/*genomealt*/g_alt,
+ /*dynprogindex*/0);
querypos++;
genomepos++;
+ i++;
}
boundpos = segment->querypos3 + index1part;
+ max_genomepos = genomepos - 1;
}
/* F. Make stage2pairs (right) */
@@ -15260,32 +15273,36 @@ convert_minus_segments_to_gmap (History_T gmap_history, List_T hits,
(Chrpos_T) (segment->diagonal - chroffset),(unsigned long long) segment->diagonal,
segment->querypos5,segment->querypos3,segment->usedp,segment->pairablep));
querypos = segment->querypos5;
- seglength = (segment->querypos3 + index1part) - querypos;
+ if (querypos < boundpos) {
+ /* left = segment->diagonal - querylength; -- FORMULA */
+ genomepos = chrhigh - (segment->diagonal - 1) + querypos;
+ if (genomepos < min_genomepos) {
+ seglength = (segment->querypos3 + index1part) - querypos;
+ Genome_get_segment_blocks_right(gsequence_orig,gsequence_alt,/*left*/segment->diagonal - querypos - seglength,
+ seglength,chrhigh,/*revcomp*/true);
- /* left = segment->diagonal - querylength; -- FORMULA */
- genomepos = chrhigh - (segment->diagonal - 1) + querypos;
- Genome_get_segment_blocks_right(gsequence_orig,gsequence_alt,/*left*/segment->diagonal - querypos - seglength,
- seglength,chrhigh,/*revcomp*/true);
-
- for (i = 0; i < seglength; i++) {
- if (querypos < boundpos) {
- c = queryuc_ptr[querypos];
- g = gsequence_orig[i];
- g_alt = gsequence_alt[i];
- if (g == c || g_alt == c) {
- comp = MATCH_COMP;
- } else {
- comp = MISMATCH_COMP;
+ i = 0;
+ while (i < seglength && querypos < boundpos && genomepos < min_genomepos) {
+ c = queryuc_ptr[querypos];
+ g = gsequence_orig[i];
+ g_alt = gsequence_alt[i];
+ if (g == c || g_alt == c) {
+ comp = MATCH_COMP;
+ } else {
+ comp = MISMATCH_COMP;
+ }
+ debug13(printf("Pushing %c %c %c at %d,%u\n",c,comp,g,querypos,genomepos));
+ unsorted_pairs = Pairpool_push(unsorted_pairs,pairpool,querypos,genomepos,
+ /*cdna*/c,comp,/*genome*/g,/*genomealt*/g_alt,
+ /*dynprogindex*/0);
+ querypos++;
+ genomepos++;
+ i++;
}
- debug13(printf("Pushing %c %c %c at %d,%u\n",c,comp,g,querypos,genomepos));
- unsorted_pairs = Pairpool_push(unsorted_pairs,pairpool,querypos,genomepos,
- /*cdna*/c,comp,/*genome*/g,/*genomealt*/g_alt,
- /*dynprogindex*/0);
+ boundpos = segment->querypos5;
+ min_genomepos = chrhigh - (segment->diagonal - 1) + segment->querypos5;
}
- querypos++;
- genomepos++;
}
- boundpos = segment->querypos5;
}
/* Sort pairs and get unique ones */
@@ -15404,6 +15421,9 @@ align_singleend_with_gmap (History_T gmap_history, List_T result, T this,
List_T p;
int genestrand;
int i;
+#ifdef DEBUG13
+ int missing_hit, missing_gmap;
+#endif
debug13(printf("Sorting hits by nmatches\n"));
@@ -15513,7 +15533,7 @@ align_end (int *cutoff_level, History_T gmap_history, T this,
Pairpool_T pairpool, Diagpool_T diagpool, Cellpool_T cellpool,
Dynprog_T dynprogL, Dynprog_T dynprogM, Dynprog_T dynprogR,
- int user_maxlevel, int indel_penalty_middle, int indel_penalty_end,
+ int user_maxlevel, int min_coverage, int indel_penalty_middle, int indel_penalty_end,
int localsplicing_penalty, int distantsplicing_penalty, int min_shortend,
bool allow_end_indels_p, int max_end_insertions, int max_end_deletions, int min_indel_end_matches,
bool keep_floors_p, int genestrand, bool first_read_p) {
@@ -15536,6 +15556,10 @@ align_end (int *cutoff_level, History_T gmap_history, T this,
bool segments_computed_p = false;
Indexdb_T plus_indexdb, minus_indexdb;
bool allvalidp;
+#ifdef DEBUG13
+ int missing_hit, missing_gmap;
+#endif
+
if (genestrand == +2) {
plus_indexdb = indexdb_rev;
@@ -16191,11 +16215,10 @@ align_end (int *cutoff_level, History_T gmap_history, T this,
/* Search 8: Terminals */
- if (greedy || subs || indels || singlesplicing || doublesplicing || shortendsplicing || longsinglesplicing || distantsplicing) {
- /* Don't find terminals */
- debug(printf("Skipping terminals because have greedy %p, subs %p, indels %p, singlesplicing %p, doublesplicing %p, or shortendsplicing %p\n",
- greedy,subs,indels,singlesplicing,doublesplicing,shortendsplicing));
- } else {
+ /* Previously criterion for skipping find_terminals was (greedy ||
+ subs || indels || singlesplicing || doublesplicing ||
+ shortendsplicing || longsinglesplicing || distantsplicing) */
+ if (found_score > opt_level) {
terminals = find_terminals(plus_anchor_segments,minus_anchor_segments,
querylength,query_lastpos,
query_compress_fwd,query_compress_rev,
@@ -16310,7 +16333,7 @@ align_end (int *cutoff_level, History_T gmap_history, T this,
debug(printf("No GMAP improvement: Before remove_overlaps at cutoff level %d: %d\n",*cutoff_level,List_length(hits)));
hits = Stage3end_optimal_score(hits,*cutoff_level,subopt_levels,query_compress_fwd,query_compress_rev,
querylength,/*keep_gmap_p*/true,/*finalp*/true);
- hits = Stage3end_reject_trimlengths(hits);
+ /* hits = Stage3end_reject_trimlengths(hits); */
hits = Stage3end_remove_overlaps(hits,/*finalp*/true);
hits = Stage3end_optimal_score(hits,*cutoff_level,subopt_levels,query_compress_fwd,query_compress_rev,
querylength,/*keep_gmap_p*/false,/*finalp*/true);
@@ -16335,7 +16358,7 @@ align_end (int *cutoff_level, History_T gmap_history, T this,
first_read_p);
hits = Stage3end_optimal_score(hits,*cutoff_level,subopt_levels,query_compress_fwd,query_compress_rev,
querylength,/*keep_gmap_p*/true,/*finalp*/true);
- hits = Stage3end_reject_trimlengths(hits);
+ /* hits = Stage3end_reject_trimlengths(hits); */
hits = Stage3end_remove_overlaps(hits,/*finalp*/true);
hits = Stage3end_optimal_score(hits,*cutoff_level,subopt_levels,query_compress_fwd,query_compress_rev,
querylength,/*keep_gmap_p*/false,/*finalp*/true);
@@ -16356,7 +16379,8 @@ static Stage3end_T *
single_read (int *npaths, int *first_absmq, int *second_absmq,
Shortread_T queryseq, Indexdb_T indexdb_fwd, Indexdb_T indexdb_rev,
int indexdb_size_threshold, Floors_T *floors_array,
- double user_maxlevel_float, int indel_penalty_middle, int indel_penalty_end,
+ double user_maxlevel_float, double user_mincoverage_float,
+ int indel_penalty_middle, int indel_penalty_end,
bool allow_end_indels_p, int max_end_insertions, int max_end_deletions, int min_indel_end_matches,
int localsplicing_penalty, int distantsplicing_penalty, int min_shortend,
Oligoindex_array_T oligoindices_major, Oligoindex_array_T oligoindices_minor,
@@ -16367,7 +16391,7 @@ single_read (int *npaths, int *first_absmq, int *second_absmq,
History_T gmap_history;
List_T hits = NULL;
T this = NULL;
- int user_maxlevel;
+ int user_maxlevel, min_coverage;
int querylength, query_lastpos, cutoff_level;
char *queryuc_ptr, *quality_string;
Compress_T query_compress_fwd = NULL, query_compress_rev = NULL;
@@ -16397,6 +16421,14 @@ single_read (int *npaths, int *first_absmq, int *second_absmq,
user_maxlevel = (int) user_maxlevel_float;
}
+ if (user_mincoverage_float < 0.0) {
+ min_coverage = 0;
+ } else if (user_mincoverage_float > 0.0 && user_mincoverage_float < 1.0) {
+ min_coverage = (int) rint(user_mincoverage_float * (double) querylength);
+ } else {
+ min_coverage = (int) user_mincoverage_float;
+ }
+
/* Limit search on repetitive sequences */
queryuc_ptr = Shortread_fullpointer_uc(queryseq);
quality_string = Shortread_quality_string(queryseq);
@@ -16421,11 +16453,12 @@ single_read (int *npaths, int *first_absmq, int *second_absmq,
indexdb_fwd,indexdb_rev,indexdb_size_threshold,floors_array,
oligoindices_major,oligoindices_minor,
pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,
- user_maxlevel,indel_penalty_middle,indel_penalty_end,
+ user_maxlevel,min_coverage,indel_penalty_middle,indel_penalty_end,
localsplicing_penalty,distantsplicing_penalty,min_shortend,
allow_end_indels_p,max_end_insertions,max_end_deletions,min_indel_end_matches,
keep_floors_p,/*genestrand*/0,/*first_read_p*/true);
+ hits = Stage3end_filter_coverage(hits,min_coverage);
if ((*npaths = List_length(hits)) == 0) {
stage3array = (Stage3end_T *) NULL;
} else {
@@ -16448,7 +16481,8 @@ static Stage3end_T *
single_read_tolerant_nonstranded (int *npaths, int *first_absmq, int *second_absmq,
Shortread_T queryseq, Indexdb_T indexdb_fwd, Indexdb_T indexdb_rev,
int indexdb_size_threshold, Floors_T *floors_array,
- double user_maxlevel_float, int indel_penalty_middle, int indel_penalty_end,
+ double user_maxlevel_float, double user_mincoverage_float,
+ int indel_penalty_middle, int indel_penalty_end,
bool allow_end_indels_p, int max_end_insertions, int max_end_deletions, int min_indel_end_matches,
int localsplicing_penalty, int distantsplicing_penalty, int min_shortend,
Oligoindex_array_T oligoindices_major, Oligoindex_array_T oligoindices_minor,
@@ -16459,7 +16493,7 @@ single_read_tolerant_nonstranded (int *npaths, int *first_absmq, int *second_abs
History_T gmap_history;
List_T hits, hits_geneplus = NULL, hits_geneminus = NULL;
T this_geneplus = NULL, this_geneminus = NULL;
- int user_maxlevel;
+ int user_maxlevel, min_coverage;
int querylength, query_lastpos, cutoff_level;
char *queryuc_ptr, *quality_string;
Compress_T query_compress_fwd = NULL, query_compress_rev = NULL;
@@ -16490,6 +16524,14 @@ single_read_tolerant_nonstranded (int *npaths, int *first_absmq, int *second_abs
user_maxlevel = (int) user_maxlevel_float;
}
+ if (user_mincoverage_float < 0.0) {
+ min_coverage = 0;
+ } else if (user_mincoverage_float > 0.0 && user_mincoverage_float < 1.0) {
+ min_coverage = (int) rint(user_mincoverage_float * (double) querylength);
+ } else {
+ min_coverage = (int) user_mincoverage_float;
+ }
+
this_geneplus = Stage1_new(querylength);
this_geneminus = Stage1_new(querylength);
@@ -16518,7 +16560,7 @@ single_read_tolerant_nonstranded (int *npaths, int *first_absmq, int *second_abs
indexdb_fwd,indexdb_rev,indexdb_size_threshold,
floors_array,oligoindices_major,oligoindices_minor,
pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,
- user_maxlevel,indel_penalty_middle,indel_penalty_end,
+ user_maxlevel,min_coverage,indel_penalty_middle,indel_penalty_end,
localsplicing_penalty,distantsplicing_penalty,min_shortend,
allow_end_indels_p,max_end_insertions,max_end_deletions,min_indel_end_matches,
keep_floors_p,/*genestrand*/+1,/*first_read_p*/true);
@@ -16532,7 +16574,7 @@ single_read_tolerant_nonstranded (int *npaths, int *first_absmq, int *second_abs
indexdb_fwd,indexdb_rev,indexdb_size_threshold,
floors_array,oligoindices_major,oligoindices_minor,
pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,
- user_maxlevel,indel_penalty_middle,indel_penalty_end,
+ user_maxlevel,min_coverage,indel_penalty_middle,indel_penalty_end,
localsplicing_penalty,distantsplicing_penalty,min_shortend,
allow_end_indels_p,max_end_insertions,max_end_deletions,min_indel_end_matches,
keep_floors_p,/*genestrand*/+2,/*first_read_p*/true);
@@ -16541,12 +16583,13 @@ single_read_tolerant_nonstranded (int *npaths, int *first_absmq, int *second_abs
hits = List_append(hits_geneplus,hits_geneminus);
hits = Stage3end_optimal_score(hits,cutoff_level,subopt_levels,query_compress_fwd,query_compress_rev,
querylength,/*keep_gmap_p*/true,/*finalp*/true);
- hits = Stage3end_reject_trimlengths(hits);
+ /* hits = Stage3end_reject_trimlengths(hits); */
hits = Stage3end_remove_overlaps(hits,/*finalp*/true);
hits = Stage3end_optimal_score(hits,cutoff_level,subopt_levels,query_compress_fwd,query_compress_rev,
querylength,/*keep_gmap_p*/false,/*finalp*/true);
hits = Stage3end_resolve_multimapping(hits);
+ hits = Stage3end_filter_coverage(hits,min_coverage);
if ((*npaths = List_length(hits)) == 0) {
stage3array = (Stage3end_T *) NULL;
} else {
@@ -16570,7 +16613,8 @@ Stage3end_T *
Stage1_single_read (int *npaths, int *first_absmq, int *second_absmq,
Shortread_T queryseq, Indexdb_T indexdb_fwd, Indexdb_T indexdb_rev,
int indexdb_size_threshold, Floors_T *floors_array,
- double user_maxlevel_float, int indel_penalty_middle, int indel_penalty_end,
+ double user_maxlevel_float, double user_mincoverage_float,
+ int indel_penalty_middle, int indel_penalty_end,
bool allow_end_indels_p, int max_end_insertions, int max_end_deletions, int min_indel_end_matches,
int localsplicing_penalty, int distantsplicing_penalty, int min_shortend,
Oligoindex_array_T oligoindices_major, Oligoindex_array_T oligoindices_minor,
@@ -16581,7 +16625,7 @@ Stage1_single_read (int *npaths, int *first_absmq, int *second_absmq,
if (mode == STANDARD || mode == CMET_STRANDED || mode == ATOI_STRANDED || mode == TTOC_STRANDED) {
return single_read(&(*npaths),&(*first_absmq),&(*second_absmq),
queryseq,indexdb_fwd,indexdb_rev,indexdb_size_threshold,
- floors_array,user_maxlevel_float,
+ floors_array,user_maxlevel_float,user_mincoverage_float,
indel_penalty_middle,indel_penalty_end,
allow_end_indels_p,max_end_insertions,max_end_deletions,min_indel_end_matches,
localsplicing_penalty,distantsplicing_penalty,min_shortend,
@@ -16590,7 +16634,7 @@ Stage1_single_read (int *npaths, int *first_absmq, int *second_absmq,
} else if (mode == CMET_NONSTRANDED || mode == ATOI_NONSTRANDED || mode == TTOC_NONSTRANDED) {
return single_read_tolerant_nonstranded(&(*npaths),&(*first_absmq),&(*second_absmq),queryseq,
indexdb_fwd,indexdb_rev,indexdb_size_threshold,
- floors_array,user_maxlevel_float,
+ floors_array,user_maxlevel_float,user_mincoverage_float,
indel_penalty_middle,indel_penalty_end,
allow_end_indels_p,max_end_insertions,max_end_deletions,min_indel_end_matches,
localsplicing_penalty,distantsplicing_penalty,min_shortend,
@@ -17521,6 +17565,9 @@ align_pair_with_gmap (Pairtype_T *final_pairtype, List_T result,
int genestrand;
int i;
bool replacedp;
+#ifdef DEBUG13
+ int missing_hit, missing_gmap;
+#endif
debug13(printf("Sorting %d hitpairs by nmatches\n",List_length(result)));
@@ -17856,7 +17903,8 @@ align_pair (bool *abort_pairing_p, int *found_score, int *cutoff_level_5, int *c
Pairpool_T pairpool, Diagpool_T diagpool, Cellpool_T cellpool,
Dynprog_T dynprogL, Dynprog_T dynprogM, Dynprog_T dynprogR,
- int user_maxlevel_5, int user_maxlevel_3, int indel_penalty_middle, int indel_penalty_end,
+ int user_maxlevel_5, int user_maxlevel_3, int min_coverage_5, int min_coverage_3,
+ int indel_penalty_middle, int indel_penalty_end,
int localsplicing_penalty, int distantsplicing_penalty, int min_shortend,
bool allow_end_indels_p, int max_end_insertions, int max_end_deletions, int min_indel_end_matches,
Chrpos_T pairmax, int maxpairedpaths, bool keep_floors_p, Shortread_T queryseq5, Shortread_T queryseq3,
@@ -18018,6 +18066,8 @@ align_pair (bool *abort_pairing_p, int *found_score, int *cutoff_level_5, int *c
/* Need to run Stage3end_remove_duplicates before we append the results together */
hitarray5[HITARRAY_GREEDY] = *hits5;
hitarray3[HITARRAY_GREEDY] = *hits3;
+ debug(printf("sarray only: 5' end has %d greedy\n",List_length(*hits5)));
+ debug(printf("sarray only: 3' end has %d greedy\n",List_length(*hits3)));
if (*hits5 == NULL || *hits3 == NULL) {
return (List_T) NULL;
@@ -18032,6 +18082,8 @@ align_pair (bool *abort_pairing_p, int *found_score, int *cutoff_level_5, int *c
query3_compress_fwd,query3_compress_rev,
querylength5,querylength3,maxpairedpaths,localsplicing_penalty,
genestrand);
+ debug(printf("After pairing sarray, found %d concordant, %d samechr, found_score %d\n",
+ nconcordant,nsamechr,*found_score));
debug(printf("SA> found_score = %d, done_level %d,%d\n",*found_score,done_level_5,done_level_3));
return Stage3pair_remove_circular_alias(hitpairs);
@@ -18703,7 +18755,7 @@ align_pair (bool *abort_pairing_p, int *found_score, int *cutoff_level_5, int *c
*hits3 = List_append(greedy3,List_append(subs3,List_append(indels3,List_append(singlesplicing3,doublesplicing3))));
- /* Search 6: Paired egments -> GMAP via segments */
+ /* Search 6: Paired segments -> GMAP via segments */
gmap5p = gmap3p = true;
if (gmap_segments_p == false) {
@@ -18714,7 +18766,7 @@ align_pair (bool *abort_pairing_p, int *found_score, int *cutoff_level_5, int *c
gmap5p = gmap3p = false;
} else if (nconcordant > 0) {
/* Rely upon GMAP improvement instead */
- debug(printf("nconcordant == 0, so setting gmap5p and gmap3p false\n"));
+ debug(printf("nconcordant > 0, so setting gmap5p and gmap3p false\n"));
gmap5p = gmap3p = false;
} else if (*found_score < trigger_score_for_gmap) {
debug(printf("found_score %d < trigger_score_for_gmap %d, so setting gmap5p and gmap3p false\n",
@@ -19128,7 +19180,7 @@ align_pair (bool *abort_pairing_p, int *found_score, int *cutoff_level_5, int *c
}
/* Search 8: Terminals */
- if (nconcordant == 0) {
+ if (nconcordant == 0 || *found_score > opt_level) {
terminals5 = find_terminals(plus_anchor_segments_5,minus_anchor_segments_5,
querylength5,query5_lastpos,
query5_compress_fwd,query5_compress_rev,
@@ -19243,6 +19295,7 @@ align_pair (bool *abort_pairing_p, int *found_score, int *cutoff_level_5, int *c
Stage3end_nmatches_posttrim(gmap3),querylength3,*cutoff_level_3,nconcordant));
}
hitpairs = List_push(hitpairs,(void *) newpair);
+
} else if (Stage3end_trimlength(hit5) < reject_trimlength) {
if (Stage3end_nmatches_posttrim(gmap3) >= querylength3 - (*cutoff_level_3) &&
Stage3end_gmap_max_match_length(gmap3) >= querylength3/2) {
@@ -19607,9 +19660,11 @@ align_pair (bool *abort_pairing_p, int *found_score, int *cutoff_level_5, int *c
/*pairtype*/CONCORDANT,localsplicing_penalty,
/*private5p*/true,/*private3p*/true,/*expect_concordant_p*/true)) == NULL) {
/* Stage3end_free(&gmap3); -- done by Stage3pair_new */
+#if 0
} else if (Stage3end_trimlength(hit5) < reject_trimlength) {
/* Save hit5-gmap3 */
hitpairs = List_push(hitpairs,(void *) newpair);
+#endif
} else {
/* Stage3end_free(&gmap3); */
Stage3pair_free(&newpair);
@@ -19651,9 +19706,11 @@ align_pair (bool *abort_pairing_p, int *found_score, int *cutoff_level_5, int *c
/*pairtype*/CONCORDANT,localsplicing_penalty,
/*private5p*/true,/*private3p*/true,/*expect_concordant_p*/true)) == NULL) {
/* Stage3end_free(&gmap5); -- done by Stage3pair_new */
+#if 0
} else if (Stage3end_trimlength(hit3) < reject_trimlength) {
/* Save gmap5-hit3 */
hitpairs = List_push(hitpairs,(void *) newpair);
+#endif
} else {
/* Stage3end_free(&gmap5); */
Stage3pair_free(&newpair);
@@ -19806,7 +19863,8 @@ realign_separately (Stage3end_T **stage3array5, int *nhits5, int *first_absmq5,
Shortread_T queryseq3, char *queryuc_ptr_3, char *queryrc3, char *quality_string_3, int querylength3, int query3_lastpos,
Indexdb_T indexdb_fwd, Indexdb_T indexdb_rev, int indexdb_size_threshold,
Floors_T *floors_array,
- int user_maxlevel_5, int user_maxlevel_3, int indel_penalty_middle, int indel_penalty_end,
+ int user_maxlevel_5, int user_maxlevel_3, int min_coverage_5, int min_coverage_3,
+ int indel_penalty_middle, int indel_penalty_end,
bool allow_end_indels_p, int max_end_insertions, int max_end_deletions, int min_indel_end_matches,
int localsplicing_penalty, int distantsplicing_penalty, int min_shortend,
Oligoindex_array_T oligoindices_major, Oligoindex_array_T oligoindices_minor,
@@ -19829,12 +19887,13 @@ realign_separately (Stage3end_T **stage3array5, int *nhits5, int *first_absmq5,
indexdb_fwd,indexdb_rev,indexdb_size_threshold,
floors_array,oligoindices_major,oligoindices_minor,
pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,
- user_maxlevel_5,indel_penalty_middle,indel_penalty_end,
+ user_maxlevel_5,min_coverage_5,indel_penalty_middle,indel_penalty_end,
localsplicing_penalty,distantsplicing_penalty,min_shortend,
allow_end_indels_p,max_end_insertions,max_end_deletions,min_indel_end_matches,
keep_floors_p,genestrand,/*first_read_p*/true);
}
+ singlehits5 = Stage3end_filter_coverage(singlehits5,min_coverage_5);
if ((*nhits5 = List_length(singlehits5)) == 0) {
*stage3array5 = (Stage3end_T *) NULL;
} else {
@@ -19857,12 +19916,13 @@ realign_separately (Stage3end_T **stage3array5, int *nhits5, int *first_absmq5,
indexdb_fwd,indexdb_rev,indexdb_size_threshold,
floors_array,oligoindices_major,oligoindices_minor,
pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,
- user_maxlevel_3,indel_penalty_middle,indel_penalty_end,
+ user_maxlevel_3,min_coverage_3,indel_penalty_middle,indel_penalty_end,
localsplicing_penalty,distantsplicing_penalty,min_shortend,
allow_end_indels_p,max_end_insertions,max_end_deletions,min_indel_end_matches,
keep_floors_p,genestrand,/*first_read_p*/false);
}
+ singlehits3 = Stage3end_filter_coverage(singlehits3,min_coverage_3);
if ((*nhits3 = List_length(singlehits3)) == 0) {
*stage3array3 = (Stage3end_T *) NULL;
} else {
@@ -19896,7 +19956,7 @@ consolidate_paired_results (int *npaths, int *first_absmq, int *second_absmq, Pa
Shortread_T queryseq3, char *queryuc_ptr_3, char *queryrc3,
char *quality_string_3, int querylength3, int query3_lastpos,
- int cutoff_level_5, int cutoff_level_3, int localsplicing_penalty,
+ int cutoff_level_5, int cutoff_level_3, int min_coverage_5, int min_coverage_3, int localsplicing_penalty,
Oligoindex_array_T oligoindices_major, Oligoindex_array_T oligoindices_minor,
Pairpool_T pairpool, Diagpool_T diagpool, Cellpool_T cellpool,
Dynprog_T dynprogL, Dynprog_T dynprogM, Dynprog_T dynprogR,
@@ -20257,7 +20317,7 @@ consolidate_paired_results (int *npaths, int *first_absmq, int *second_absmq, Pa
if (result == NULL) {
singlehits5 = Stage3end_optimal_score(hits5,cutoff_level_5,subopt_levels,query5_compress_fwd,query5_compress_rev,
querylength5,/*keep_gmap_p*/true,/*finalp*/true);
- singlehits5 = Stage3end_reject_trimlengths(singlehits5);
+ /* singlehits5 = Stage3end_reject_trimlengths(singlehits5); */
singlehits5 = Stage3end_linearize_5(singlehits5);
singlehits5 = Stage3end_remove_overlaps(singlehits5,/*finalp*/true);
singlehits5 = Stage3end_optimal_score(singlehits5,cutoff_level_5,subopt_levels,query5_compress_fwd,query5_compress_rev,
@@ -20266,7 +20326,7 @@ consolidate_paired_results (int *npaths, int *first_absmq, int *second_absmq, Pa
singlehits3 = Stage3end_optimal_score(hits3,cutoff_level_3,subopt_levels,query3_compress_fwd,query3_compress_rev,
querylength3,/*keep_gmap_p*/true,/*finalp*/true);
- singlehits3 = Stage3end_reject_trimlengths(singlehits3);
+ /* singlehits3 = Stage3end_reject_trimlengths(singlehits3); */
singlehits3 = Stage3end_linearize_3(singlehits3);
singlehits3 = Stage3end_remove_overlaps(singlehits3,/*finalp*/true);
singlehits3 = Stage3end_optimal_score(singlehits3,cutoff_level_3,subopt_levels,query3_compress_fwd,query3_compress_rev,
@@ -20318,6 +20378,7 @@ consolidate_paired_results (int *npaths, int *first_absmq, int *second_absmq, Pa
*npaths = 0;
*final_pairtype = UNPAIRED;
+ singlehits5 = Stage3end_filter_coverage(singlehits5,min_coverage_5);
if ((*nhits5 = List_length(singlehits5)) == 0) {
*stage3array5 = (Stage3end_T *) NULL;
} else {
@@ -20331,6 +20392,7 @@ consolidate_paired_results (int *npaths, int *first_absmq, int *second_absmq, Pa
*stage3array5 = (Stage3end_T *) List_to_array_out(singlehits5,NULL); List_free(&singlehits5); /* Return value */
}
+ singlehits3 = Stage3end_filter_coverage(singlehits3,min_coverage_3);
if ((*nhits3 = List_length(singlehits3)) == 0) {
*stage3array3 = (Stage3end_T *) NULL;
} else {
@@ -20383,27 +20445,35 @@ consolidate_paired_results (int *npaths, int *first_absmq, int *second_absmq, Pa
return (Stage3pair_T *) NULL;
} else {
- /* result != NULL */
- /* Concordant, paired, or transloc pairs found. Remove single hits. */
debug16(printf("Result is not NULL (%d paths), and we fall through to concordant, paired, or transloc pairs\n",
List_length(result)));
- *npaths = List_length(result);
- stage3pairarray = (Stage3pair_T *) List_to_array_out(result,NULL); List_free(&result); /* Return value */
- Stage3pair_privatize(stage3pairarray,*npaths);
- Stage3pair_eval_and_sort(&(*npaths),&(*first_absmq),&(*second_absmq),
- stage3pairarray,maxpaths_search,queryseq5,queryseq3,
- queryuc_ptr_5,queryrc5,queryuc_ptr_3,queryrc3,
- query5_compress_fwd,query5_compress_rev,
- query3_compress_fwd,query3_compress_rev,
- quality_string_5,quality_string_3);
-
- stage3list_gc(&hits3);
- stage3list_gc(&hits5);
+ result = Stage3pair_filter_coverage(result,min_coverage_5,min_coverage_3);
+ if ((*npaths = List_length(result)) == 0) {
+ stage3list_gc(&hits3);
+ stage3list_gc(&hits5);
+ *nhits5 = *nhits3 = 0;
+ *stage3array5 = *stage3array3 = (Stage3end_T *) NULL;
+ return (Stage3pair_T *) NULL;
- *nhits5 = *nhits3 = 0;
- *stage3array5 = *stage3array3 = (Stage3end_T *) NULL;
- return stage3pairarray;
+ } else {
+ /* result != NULL */
+ /* Concordant, paired, or transloc pairs found. Remove single hits. */
+ stage3pairarray = (Stage3pair_T *) List_to_array_out(result,NULL); List_free(&result); /* Return value */
+ Stage3pair_privatize(stage3pairarray,*npaths);
+ Stage3pair_eval_and_sort(&(*npaths),&(*first_absmq),&(*second_absmq),
+ stage3pairarray,maxpaths_search,queryseq5,queryseq3,
+ queryuc_ptr_5,queryrc5,queryuc_ptr_3,queryrc3,
+ query5_compress_fwd,query5_compress_rev,
+ query3_compress_fwd,query3_compress_rev,
+ quality_string_5,quality_string_3);
+ stage3list_gc(&hits3);
+ stage3list_gc(&hits5);
+
+ *nhits5 = *nhits3 = 0;
+ *stage3array5 = *stage3array3 = (Stage3end_T *) NULL;
+ return stage3pairarray;
+ }
}
}
@@ -20415,7 +20485,7 @@ paired_read (int *npaths, int *first_absmq, int *second_absmq, Pairtype_T *final
Shortread_T queryseq5, Shortread_T queryseq3,
Indexdb_T indexdb_fwd, Indexdb_T indexdb_rev, int indexdb_size_threshold,
Floors_T *floors_array,
- double user_maxlevel_float, int indel_penalty_middle, int indel_penalty_end,
+ double user_maxlevel_float, double user_mincoverage_float, int indel_penalty_middle, int indel_penalty_end,
bool allow_end_indels_p, int max_end_insertions, int max_end_deletions, int min_indel_end_matches,
int localsplicing_penalty, int distantsplicing_penalty, int min_shortend,
Oligoindex_array_T oligoindices_major, Oligoindex_array_T oligoindices_minor,
@@ -20428,7 +20498,7 @@ paired_read (int *npaths, int *first_absmq, int *second_absmq, Pairtype_T *final
char *queryuc_ptr_5, *queryuc_ptr_3, *quality_string_5, *quality_string_3;
Compress_T query5_compress_fwd = NULL, query5_compress_rev = NULL, query3_compress_fwd = NULL, query3_compress_rev = NULL;
History_T gmap_history_5, gmap_history_3;
- int user_maxlevel_5, user_maxlevel_3;
+ int user_maxlevel_5, user_maxlevel_3, min_coverage_5, min_coverage_3;
int found_score, cutoff_level_5, cutoff_level_3;
int querylength5, querylength3, query5_lastpos, query3_lastpos;
#if 0
@@ -20470,6 +20540,16 @@ paired_read (int *npaths, int *first_absmq, int *second_absmq, Pairtype_T *final
user_maxlevel_5 = user_maxlevel_3 = (int) user_maxlevel_float;
}
+ if (user_mincoverage_float < 0.0) {
+ min_coverage_5 = min_coverage_3 = 0;
+ } else if (user_mincoverage_float > 0.0 && user_mincoverage_float < 1.0) {
+ min_coverage_5 = (int) rint(user_mincoverage_float * (double) querylength5);
+ min_coverage_3 = (int) rint(user_mincoverage_float * (double) querylength3);
+ } else {
+ min_coverage_5 = min_coverage_3 = (int) user_mincoverage_float;
+ }
+
+
this5 = Stage1_new(querylength5);
this3 = Stage1_new(querylength3);
queryuc_ptr_5 = Shortread_fullpointer_uc(queryseq5);
@@ -20507,7 +20587,8 @@ paired_read (int *npaths, int *first_absmq, int *second_absmq, Pairtype_T *final
oligoindices_major,oligoindices_minor,
pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,
- user_maxlevel_5,user_maxlevel_3,indel_penalty_middle,indel_penalty_end,
+ user_maxlevel_5,user_maxlevel_3,min_coverage_5,min_coverage_3,
+ indel_penalty_middle,indel_penalty_end,
localsplicing_penalty,distantsplicing_penalty,min_shortend,
allow_end_indels_p,max_end_insertions,max_end_deletions,min_indel_end_matches,
pairmax,maxpairedpaths,keep_floors_p,queryseq5,queryseq3,/*genestrand*/0);
@@ -20526,7 +20607,8 @@ paired_read (int *npaths, int *first_absmq, int *second_absmq, Pairtype_T *final
queryseq5,queryuc_ptr_5,queryrc5,quality_string_5,querylength5,query5_lastpos,
queryseq3,queryuc_ptr_3,queryrc3,quality_string_3,querylength3,query3_lastpos,
indexdb_fwd,indexdb_rev,indexdb_size_threshold,floors_array,
- user_maxlevel_5,user_maxlevel_3,indel_penalty_middle,indel_penalty_end,
+ user_maxlevel_5,user_maxlevel_3,min_coverage_5,min_coverage_3,
+ indel_penalty_middle,indel_penalty_end,
allow_end_indels_p,max_end_insertions,max_end_deletions,min_indel_end_matches,
localsplicing_penalty,distantsplicing_penalty,min_shortend,
oligoindices_major,oligoindices_minor,
@@ -20556,7 +20638,7 @@ paired_read (int *npaths, int *first_absmq, int *second_absmq, Pairtype_T *final
&this3->plus_segments,&this3->plus_nsegments,&this3->minus_segments,&this3->minus_nsegments,
queryseq5,queryuc_ptr_5,queryrc5,quality_string_5,querylength5,query5_lastpos,
queryseq3,queryuc_ptr_3,queryrc3,quality_string_3,querylength3,query3_lastpos,
- cutoff_level_5,cutoff_level_3,
+ cutoff_level_5,cutoff_level_3,min_coverage_5,min_coverage_3,
localsplicing_penalty,
oligoindices_major,oligoindices_minor,
pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,pairmax,user_maxlevel_5,user_maxlevel_3);
@@ -20581,7 +20663,7 @@ paired_read_tolerant_nonstranded (int *npaths, int *first_absmq, int *second_abs
Shortread_T queryseq5, Shortread_T queryseq3,
Indexdb_T indexdb_fwd, Indexdb_T indexdb_rev, int indexdb_size_threshold,
Floors_T *floors_array,
- double user_maxlevel_float, int indel_penalty_middle, int indel_penalty_end,
+ double user_maxlevel_float, double user_mincoverage_float, int indel_penalty_middle, int indel_penalty_end,
bool allow_end_indels_p, int max_end_insertions, int max_end_deletions, int min_indel_end_matches,
int localsplicing_penalty, int distantsplicing_penalty, int min_shortend,
Oligoindex_array_T oligoindices_major, Oligoindex_array_T oligoindices_minor,
@@ -20597,7 +20679,7 @@ paired_read_tolerant_nonstranded (int *npaths, int *first_absmq, int *second_abs
char *queryuc_ptr_5, *queryuc_ptr_3, *quality_string_5, *quality_string_3;
Compress_T query5_compress_fwd = NULL, query5_compress_rev = NULL, query3_compress_fwd = NULL, query3_compress_rev = NULL;
History_T gmap_history_5, gmap_history_3;
- int user_maxlevel_5, user_maxlevel_3;
+ int user_maxlevel_5, user_maxlevel_3, min_coverage_5, min_coverage_3;
int found_score_geneplus, found_score_geneminus;
int cutoff_level_5, cutoff_level_3;
int querylength5, querylength3, query5_lastpos, query3_lastpos;
@@ -20644,6 +20726,15 @@ paired_read_tolerant_nonstranded (int *npaths, int *first_absmq, int *second_abs
user_maxlevel_5 = user_maxlevel_3 = (int) user_maxlevel_float;
}
+ if (user_mincoverage_float < 0.0) {
+ min_coverage_5 = min_coverage_3 = 0;
+ } else if (user_mincoverage_float > 0.0 && user_mincoverage_float < 1.0) {
+ min_coverage_5 = (int) rint(user_mincoverage_float * (double) querylength5);
+ min_coverage_3 = (int) rint(user_mincoverage_float * (double) querylength3);
+ } else {
+ min_coverage_5 = min_coverage_3 = (int) user_mincoverage_float;
+ }
+
this_geneplus_5 = Stage1_new(querylength5);
this_geneplus_3 = Stage1_new(querylength3);
this_geneminus_5 = Stage1_new(querylength5);
@@ -20687,7 +20778,8 @@ paired_read_tolerant_nonstranded (int *npaths, int *first_absmq, int *second_abs
oligoindices_major,oligoindices_minor,
pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,
- user_maxlevel_5,user_maxlevel_3,indel_penalty_middle,indel_penalty_end,
+ user_maxlevel_5,user_maxlevel_3,min_coverage_5,min_coverage_3,
+ indel_penalty_middle,indel_penalty_end,
localsplicing_penalty,distantsplicing_penalty,min_shortend,
allow_end_indels_p,max_end_insertions,max_end_deletions,min_indel_end_matches,
pairmax,maxpairedpaths,keep_floors_p,
@@ -20707,7 +20799,8 @@ paired_read_tolerant_nonstranded (int *npaths, int *first_absmq, int *second_abs
oligoindices_major,oligoindices_minor,
pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,
- user_maxlevel_5,user_maxlevel_3,indel_penalty_middle,indel_penalty_end,
+ user_maxlevel_5,user_maxlevel_3,min_coverage_5,min_coverage_3,
+ indel_penalty_middle,indel_penalty_end,
localsplicing_penalty,distantsplicing_penalty,min_shortend,
allow_end_indels_p,max_end_insertions,max_end_deletions,min_indel_end_matches,
pairmax,maxpairedpaths,keep_floors_p,queryseq5,queryseq3,/*genestrand*/+2);
@@ -20730,7 +20823,8 @@ paired_read_tolerant_nonstranded (int *npaths, int *first_absmq, int *second_abs
queryseq5,queryuc_ptr_5,queryrc5,quality_string_5,querylength5,query5_lastpos,
queryseq3,queryuc_ptr_3,queryrc3,quality_string_3,querylength3,query3_lastpos,
indexdb_fwd,indexdb_rev,indexdb_size_threshold,floors_array,
- user_maxlevel_5,user_maxlevel_3,indel_penalty_middle,indel_penalty_end,
+ user_maxlevel_5,user_maxlevel_3,min_coverage_5,min_coverage_3,
+ indel_penalty_middle,indel_penalty_end,
allow_end_indels_p,max_end_insertions,max_end_deletions,min_indel_end_matches,
localsplicing_penalty,distantsplicing_penalty,min_shortend,
oligoindices_major,oligoindices_minor,
@@ -20771,7 +20865,8 @@ paired_read_tolerant_nonstranded (int *npaths, int *first_absmq, int *second_abs
plus_segments_genestrand_3,plus_nsegments_genestrand_3,minus_segments_genestrand_3,minus_nsegments_genestrand_3,
queryseq5,queryuc_ptr_5,queryrc5,quality_string_5,querylength5,query5_lastpos,
queryseq3,queryuc_ptr_3,queryrc3,quality_string_3,querylength3,query3_lastpos,
- cutoff_level_5,cutoff_level_3,localsplicing_penalty,oligoindices_major,oligoindices_minor,
+ cutoff_level_5,cutoff_level_3,min_coverage_5,min_coverage_3,
+ localsplicing_penalty,oligoindices_major,oligoindices_minor,
pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,pairmax,user_maxlevel_5,user_maxlevel_3);
History_free(&gmap_history_3);
History_free(&gmap_history_5);
@@ -20802,7 +20897,8 @@ paired_read_tolerant_nonstranded (int *npaths, int *first_absmq, int *second_abs
queryseq5,queryuc_ptr_5,queryrc5,quality_string_5,querylength5,query5_lastpos,
queryseq3,queryuc_ptr_3,queryrc3,quality_string_3,querylength3,query3_lastpos,
indexdb_fwd,indexdb_rev,indexdb_size_threshold,floors_array,
- user_maxlevel_5,user_maxlevel_3,indel_penalty_middle,indel_penalty_end,
+ user_maxlevel_5,user_maxlevel_3,min_coverage_5,min_coverage_3,
+ indel_penalty_middle,indel_penalty_end,
allow_end_indels_p,max_end_insertions,max_end_deletions,min_indel_end_matches,
localsplicing_penalty,distantsplicing_penalty,min_shortend,
oligoindices_major,oligoindices_minor,
@@ -20843,7 +20939,8 @@ paired_read_tolerant_nonstranded (int *npaths, int *first_absmq, int *second_abs
plus_segments_genestrand_3,plus_nsegments_genestrand_3,minus_segments_genestrand_3,minus_nsegments_genestrand_3,
queryseq5,queryuc_ptr_5,queryrc5,quality_string_5,querylength5,query5_lastpos,
queryseq3,queryuc_ptr_3,queryrc3,quality_string_3,querylength3,query3_lastpos,
- cutoff_level_5,cutoff_level_3,localsplicing_penalty,oligoindices_major,oligoindices_minor,
+ cutoff_level_5,cutoff_level_3,min_coverage_5,min_coverage_3,
+ localsplicing_penalty,oligoindices_major,oligoindices_minor,
pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,pairmax,user_maxlevel_5,user_maxlevel_3);
History_free(&gmap_history_3);
History_free(&gmap_history_5);
@@ -20893,7 +20990,8 @@ paired_read_tolerant_nonstranded (int *npaths, int *first_absmq, int *second_abs
plus_segments_genestrand_3,plus_nsegments_genestrand_3,minus_segments_genestrand_3,minus_nsegments_genestrand_3,
queryseq5,queryuc_ptr_5,queryrc5,quality_string_5,querylength5,query5_lastpos,
queryseq3,queryuc_ptr_3,queryrc3,quality_string_3,querylength3,query3_lastpos,
- cutoff_level_5,cutoff_level_3,localsplicing_penalty,oligoindices_major,oligoindices_minor,
+ cutoff_level_5,cutoff_level_3,min_coverage_5,min_coverage_3,
+ localsplicing_penalty,oligoindices_major,oligoindices_minor,
pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,pairmax,user_maxlevel_5,user_maxlevel_3);
History_free(&gmap_history_3);
History_free(&gmap_history_5);
@@ -20917,7 +21015,7 @@ Stage1_paired_read (int *npaths, int *first_absmq, int *second_absmq, Pairtype_T
Shortread_T queryseq5, Shortread_T queryseq3,
Indexdb_T indexdb_fwd, Indexdb_T indexdb_rev, int indexdb_size_threshold,
Floors_T *floors_array,
- double user_maxlevel_float, int indel_penalty_middle, int indel_penalty_end,
+ double user_maxlevel_float, double user_mincoverage_float, int indel_penalty_middle, int indel_penalty_end,
bool allow_end_indels_p, int max_end_insertions, int max_end_deletions, int min_indel_end_matches,
int localsplicing_penalty, int distantsplicing_penalty, int min_shortend,
Oligoindex_array_T oligoindices_major, Oligoindex_array_T oligoindices_minor,
@@ -20930,7 +21028,7 @@ Stage1_paired_read (int *npaths, int *first_absmq, int *second_absmq, Pairtype_T
&(*stage3array5),&(*nhits5),&(*first_absmq5),&(*second_absmq5),
&(*stage3array3),&(*nhits3),&(*first_absmq3),&(*second_absmq3),
queryseq5,queryseq3,indexdb_fwd,indexdb_rev,indexdb_size_threshold,
- floors_array,user_maxlevel_float,indel_penalty_middle,indel_penalty_end,
+ floors_array,user_maxlevel_float,user_mincoverage_float,indel_penalty_middle,indel_penalty_end,
allow_end_indels_p,max_end_insertions,max_end_deletions,min_indel_end_matches,
localsplicing_penalty,distantsplicing_penalty,min_shortend,
oligoindices_major,oligoindices_minor,
@@ -20941,7 +21039,7 @@ Stage1_paired_read (int *npaths, int *first_absmq, int *second_absmq, Pairtype_T
&(*stage3array5),&(*nhits5),&(*first_absmq5),&(*second_absmq5),
&(*stage3array3),&(*nhits3),&(*first_absmq3),&(*second_absmq3),
queryseq5,queryseq3,indexdb_fwd,indexdb_rev,indexdb_size_threshold,
- floors_array,user_maxlevel_float,indel_penalty_middle,indel_penalty_end,
+ floors_array,user_maxlevel_float,user_mincoverage_float,indel_penalty_middle,indel_penalty_end,
allow_end_indels_p,max_end_insertions,max_end_deletions,min_indel_end_matches,
localsplicing_penalty,distantsplicing_penalty,min_shortend,
oligoindices_major,oligoindices_minor,
@@ -20966,7 +21064,6 @@ void
Stage1hr_setup (bool use_sarray_p_in, bool use_only_sarray_p_in, int index1part_in, int index1interval_in,
int spansize_in, Univ_IIT_T chromosome_iit_in, int nchromosomes_in,
Genome_T genome_in, Genome_T genomealt, Mode_T mode_in, int maxpaths_search_in,
- int terminal_threshold_in, int reject_trimlength_in,
Univcoord_T *splicesites_in, Splicetype_T *splicetypes_in,
Chrpos_T *splicedists_in, int nsplicesites_in,
@@ -21020,9 +21117,6 @@ Stage1hr_setup (bool use_sarray_p_in, bool use_only_sarray_p_in, int index1part_
mode = mode_in;
maxpaths_search = maxpaths_search_in;
- terminal_threshold = terminal_threshold_in;
- reject_trimlength = reject_trimlength_in;
-
splicesites = splicesites_in;
splicetypes = splicetypes_in;
splicedists = splicedists_in;
diff --git a/src/stage1hr.h b/src/stage1hr.h
index f79e5a5..84ba163 100644
--- a/src/stage1hr.h
+++ b/src/stage1hr.h
@@ -1,4 +1,4 @@
-/* $Id: stage1hr.h 166641 2015-05-29 21:13:04Z twu $ */
+/* $Id: stage1hr.h 173896 2015-09-12 00:11:40Z twu $ */
#ifndef STAGE1HR_INCLUDED
#define STAGE1HR_INCLUDED
@@ -59,7 +59,8 @@ extern Stage3end_T *
Stage1_single_read (int *npaths, int *first_absmq, int *second_absmq,
Shortread_T queryseq, Indexdb_T indexdb_fwd, Indexdb_T indexdb_rev,
int indexdb_size_threshold, Floors_T *floors_array,
- double usermax_level_float, int indel_penalty_middle, int indel_penalty_end,
+ double user_maxlevel_float, double user_mincoverage_float,
+ int indel_penalty_middle, int indel_penalty_end,
bool allow_end_indels_p, int max_end_insertions, int max_end_deletions, int min_indel_end_matches,
int localsplicing_penalty, int distantsplicing_penalty, int min_shortend,
Oligoindex_array_T oligoindices_major, Oligoindex_array_T oligoindices_minor,
@@ -74,7 +75,8 @@ Stage1_paired_read (int *npaths, int *first_absmq, int *second_absmq, Pairtype_T
Shortread_T queryseq5, Shortread_T queryseq3,
Indexdb_T indexdb_fwd, Indexdb_T indexdb_rev, int indexdb_size_threshold,
Floors_T *floors_array,
- double usermax_level_float, int indel_penalty_middle, int indel_penalty_end,
+ double usermax_level_float, double user_mincoverage_float,
+ int indel_penalty_middle, int indel_penalty_end,
bool allow_end_indels_p, int max_end_insertions, int max_end_deletions, int min_indel_end_matches,
int localsplicing_penalty, int distantsplicing_penalty, int min_shortend,
Oligoindex_array_T oligoindices_major, Oligoindex_array_T oligoindices_minor,
@@ -89,7 +91,6 @@ extern void
Stage1hr_setup (bool use_sarray_p_in, bool use_only_sarray_p_in, int index1part_in, int index1interval_in,
int spansize_in, Univ_IIT_T chromosome_iit_in, int nchromosomes_in,
Genome_T genome_in, Genome_T genomealt, Mode_T mode_in, int maxpaths_search_in,
- int terminal_threshold_in, int reject_trimlength,
Univcoord_T *splicesites_in, Splicetype_T *splicetypes_in,
Chrpos_T *splicedists_in, int nsplicesites_in,
diff --git a/src/stage3.c b/src/stage3.c
index 71b2126..ed3b9ac 100644
--- a/src/stage3.c
+++ b/src/stage3.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: stage3.c 166984 2015-06-07 02:59:20Z twu $";
+static char rcsid[] = "$Id: stage3.c 174482 2015-09-22 00:58:39Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -3460,23 +3460,23 @@ canonicalp (bool knowngapp, char comp, double donor_prob, double acceptor_prob,
/* Copied from stage1hr.c */
static int
-sufficient_splice_prob_local (int support, int nmismatches, double distal_spliceprob,
+sufficient_splice_prob_local (int support, int nmatches, int nmismatches, double distal_spliceprob,
double medial_spliceprob) {
- debug3(printf("Checking for sufficient splice prob, based on %d mismatches and support %d\n",
- nmismatches,support));
- support -= 2*nmismatches;
- if (support < 0) {
- return 0;
- } else if (support < 7) {
+ debug3(printf("Checking for sufficient splice prob, based on %d matches, %d mismatches, and support %d\n",
+ nmatches,nmismatches,support));
+ nmatches -= 2*nmismatches;
+ if (nmatches < 0) {
+ return (int) false;
+ } else if (nmatches < 7) {
return (distal_spliceprob > 0.95 && medial_spliceprob > 0.90);
- } else if (support < 11) {
+ } else if (nmatches < 11) {
return (distal_spliceprob > 0.90 && medial_spliceprob > 0.85);
- } else if (support < 15) {
+ } else if (nmatches < 15) {
return (distal_spliceprob > 0.85 && medial_spliceprob > 0.80);
- } else if (support < 19) {
- return (distal_spliceprob > 0.50);
+ } else if (nmatches < 19) {
+ return (distal_spliceprob > 0.50 /*&& medial_spliceprob > 0.50*/);
} else {
- return true;
+ return (int) true;
}
}
@@ -3687,11 +3687,13 @@ trim_end5_exon_indels (bool *trim5p, int ambig_end_length, List_T pairs,
*trim5p = true;
#endif
+#if 0
} else if (List_length(exon) - 1 > List_length(pairs)) {
/* Subtract 1 because gap is included in exon */
debug3(printf("Exon is more than halfway across %d - 1 > %d, so keeping it\n",List_length(exon),List_length(pairs)));
path = exon; /* exon already has the gap */
*trim5p = false;
+#endif
} else if (nearindelp == true && max_nmatches < INDEL_SPLICE_ENDLENGTH) {
debug3(printf("near indel with nmatches %d too low, so trimming it\n",max_nmatches));
@@ -3759,7 +3761,7 @@ trim_end5_exon_indels (bool *trim5p, int ambig_end_length, List_T pairs,
*trim5p = true;
#endif
- } else if (sufficient_splice_prob_local(List_length(exon),max_nmismatches,
+ } else if (sufficient_splice_prob_local(List_length(exon),max_nmatches,max_nmismatches,
/*distal_spliceprob*/cdna_direction >= 0 ? splice->donor_prob : splice->acceptor_prob,
/*medial_spliceprob*/cdna_direction >= 0 ? splice->acceptor_prob : splice->donor_prob)) {
/* Want to keep for comparison of fwd and rev, even if probabilities are poor */
@@ -3960,11 +3962,13 @@ trim_end3_exon_indels (bool *trim3p, int ambig_end_length, List_T path,
*trim3p = true;
#endif
+#if 0
} else if (List_length(exon) - 1 > List_length(path)) {
/* Subtract 1 because gap is included in exon */
debug3(printf("Exon is more than halfway across %d - 1 > %d, so keeping it\n",List_length(exon),List_length(path)));
pairs = exon; /* exon already has the gap */
*trim3p = false;
+#endif
} else if (nearindelp == true && max_nmatches < INDEL_SPLICE_ENDLENGTH) {
debug3(printf("near indel with nmatches %d too low, so trimming it\n",max_nmatches));
@@ -4032,7 +4036,7 @@ trim_end3_exon_indels (bool *trim3p, int ambig_end_length, List_T path,
*trim3p = true;
#endif
- } else if (sufficient_splice_prob_local(List_length(exon),max_nmismatches,
+ } else if (sufficient_splice_prob_local(List_length(exon),max_nmatches,max_nmismatches,
/*distal_spliceprob*/cdna_direction >= 0 ? splice->acceptor_prob : splice->donor_prob,
/*medial_spliceprob*/cdna_direction >= 0 ? splice->donor_prob : splice->acceptor_prob)) {
/* Want to keep for comparison of fwd and rev, even if probabilities are poor */
@@ -4973,7 +4977,7 @@ Stage3_new (struct Pair_T *pairarray, List_T pairs, int npairs, int goodness, in
return (T) NULL;
} else {
- new = (T) MALLOC(sizeof(*new));
+ new = (T) MALLOC_OUT(sizeof(*new)); /* Matches FREE_OUT in Stage3_free */
new->cigar_tokens = cigar_tokens;
new->intronp = intronp;
}
@@ -7458,7 +7462,7 @@ traverse_genome_gap (bool *filledp, bool *shiftp, int *dynprogindex_minor, int *
debug(Pair_dump_list(micropairs,/*zerobasedp*/true));
debug(printf("\n"));
-#if 0
+#if 1
if (1 || (nindels == 0 && nmismatches < 4)) {
/* Have a higher standard */
if (prob2 >= 0.95 && prob3 >= 0.95) {
@@ -7725,7 +7729,7 @@ traverse_dual_genome_gap (int *dynprogindex, List_T pairs, List_T *path,
}
if (dual_gappairs_2 == NULL || dual_gappairs_1 == NULL) {
- debug(printf("Single score wins becausel dual_guappairs_2 is NULL or dual_gappairs_1 is NULL\n"));
+ debug(printf("Single score wins because dual_guappairs_2 is NULL or dual_gappairs_1 is NULL\n"));
debug(printf("Loser: dual_gappairs\n"));
debug(Pair_dump_list(dual_gappairs_2,true));
debug(Pair_dump_list(dual_gappairs_1,true));
@@ -9355,9 +9359,8 @@ score_introns (double *max_intron_score, double *avg_donor_score, double *avg_ac
int minintronlen;
double donor_score, acceptor_score;
int nintrons = 0;
- int i;
int total_matches, total_denominator;
- int max_neighborhood_score, neighborhood_score;
+ int max_neighborhood_score, neighborhood_score, neighborhood_length;
#if 0
char gbuffer1[MAXENT_MAXLENGTH];
#endif
@@ -9421,7 +9424,8 @@ score_introns (double *max_intron_score, double *avg_donor_score, double *avg_ac
/* Look at right neighborhood */
max_neighborhood_score = neighborhood_score = 0;
- for (p = pairs, i = 0; p != NULL && i < 25 && ((Pair_T) (p->first))->gapp == false; p = p->rest, i++) {
+ neighborhood_length = 0;
+ for (p = pairs; p != NULL && neighborhood_length < 25 && ((Pair_T) (p->first))->gapp == false; p = p->rest) {
rightpair = p->first;
if (rightpair->comp == MATCH_COMP || rightpair->comp == DYNPROG_MATCH_COMP || rightpair->comp == AMBIGUOUS_COMP) {
neighborhood_score += 1;
@@ -9431,17 +9435,17 @@ score_introns (double *max_intron_score, double *avg_donor_score, double *avg_ac
if (neighborhood_score > max_neighborhood_score) {
max_neighborhood_score = neighborhood_score;
}
+ neighborhood_length += 1;
}
- debug11(printf("right neighborhood: max_neighborhood_score %d\n",max_neighborhood_score));
- if (max_neighborhood_score < 6) {
- /* Not a good intron */
- /* *nbadintrons += 1; */
-
- } else {
- /* Look at left neighborhood */
+ debug11(printf("right neighborhood: max_neighborhood_score %d, neighborhood_length %d\n",
+ max_neighborhood_score,neighborhood_length));
+ if (max_neighborhood_score >= 6 ||
+ (neighborhood_length < 10 && max_neighborhood_score > neighborhood_length - 1)) {
+ /* Alignment in right neighborhood okay. Look at left neighborhood */
max_neighborhood_score = neighborhood_score = 0;
- for (p = path, i = 0; p != NULL && i < 25 && ((Pair_T) (p->first))->gapp == false; p = p->rest, i++) {
+ neighborhood_length = 0;
+ for (p = path; p != NULL && neighborhood_length < 25 && ((Pair_T) (p->first))->gapp == false; p = p->rest) {
leftpair = p->first;
if (leftpair->comp == MATCH_COMP || leftpair->comp == DYNPROG_MATCH_COMP || leftpair->comp == AMBIGUOUS_COMP) {
neighborhood_score += 1;
@@ -9451,14 +9455,14 @@ score_introns (double *max_intron_score, double *avg_donor_score, double *avg_ac
if (neighborhood_score > max_neighborhood_score) {
max_neighborhood_score = neighborhood_score;
}
+ neighborhood_length += 1;
}
- debug11(printf("left neighborhood: max_neighborhood_score %d\n",max_neighborhood_score));
- if (max_neighborhood_score < 6) {
- /* Not a good intron */
- /* *nbadintrons += 1; */
-
- } else {
+ debug11(printf("left neighborhood: max_neighborhood_score %d, neighborhood_length %d\n",
+ max_neighborhood_score,neighborhood_length));
+ if (max_neighborhood_score >= 6 ||
+ (neighborhood_length < 10 && max_neighborhood_score > neighborhood_length - 1)) {
+ /* Alignment in left neighborhood okay */
leftpair = path->first;
rightpair = pairs->first;
@@ -11690,8 +11694,8 @@ path_trim (double defect_rate, int *ambig_end_length_5, int *ambig_end_length_3,
int iter = 0;
#ifdef GSNAP
- debug(printf("Entering path_trim with cdna_direction %d and sensedir %d\n",*cdna_direction,sensedir));
- debug3(printf("Entering path_trim with cdna_direction %d and sensedir %d\n",*cdna_direction,sensedir));
+ debug(printf("Entering path_trim with cdna_direction %d and sensedir %d\n",*cdna_direction,*sensedir));
+ debug3(printf("Entering path_trim with cdna_direction %d and sensedir %d\n",*cdna_direction,*sensedir));
#else
debug(printf("Entering path_trim with cdna_direction %d\n",*cdna_direction));
debug3(printf("Entering path_trim with cdna_direction %d\n",*cdna_direction));
diff --git a/src/stage3hr.c b/src/stage3hr.c
index 9dd4189..c62fb51 100644
--- a/src/stage3hr.c
+++ b/src/stage3hr.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: stage3hr.c 167393 2015-06-11 22:16:20Z twu $";
+static char rcsid[] = "$Id: stage3hr.c 174482 2015-09-22 00:58:39Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -122,6 +122,7 @@ static char rcsid[] = "$Id: stage3hr.c 167393 2015-06-11 22:16:20Z twu $";
#define debug5a(x)
#endif
+
/* Stage3pair_optimal_score */
#ifdef DEBUG6
#define debug6(x) x
@@ -214,7 +215,6 @@ static int *tally_divint_crosstable;
static IIT_T runlength_iit;
static int *runlength_divint_crosstable;
-static int reject_trimlength;
static int pairmax;
#if 0
@@ -256,7 +256,7 @@ Stage3hr_setup (bool invert_first_p_in, bool invert_second_p_in, Genome_T genome
IIT_T genes_iit_in, int *genes_divint_crosstable_in,
IIT_T tally_iit_in, int *tally_divint_crosstable_in,
IIT_T runlength_iit_in, int *runlength_divint_crosstable_in,
- int reject_trimlength_in, bool distances_observed_p, int pairmax_in,
+ bool distances_observed_p, int pairmax_in,
Chrpos_T expected_pairlength, Chrpos_T pairlength_deviation,
int localsplicing_penalty_in, int indel_penalty_middle_in,
int antistranded_penalty_in, bool favor_multiexon_p_in,
@@ -283,7 +283,6 @@ Stage3hr_setup (bool invert_first_p_in, bool invert_second_p_in, Genome_T genome
favor_multiexon_p = favor_multiexon_p_in;
gmap_min_nconsecutive = gmap_min_nconsecutive_in;
- reject_trimlength = reject_trimlength_in;
pairmax = pairmax_in;
if (pairlength_deviation > expected_pairlength) {
expected_pairlength_low = 0;
@@ -366,6 +365,7 @@ struct T {
int mapq_score;
int absmq_score; /* Absolute MAPQ, for XQ and X2 flags */
+ int nsegments;
int score; /* Includes colordiffs and penalties */
int ntscore; /* Includes penalties */
int nmatches;
@@ -448,7 +448,6 @@ struct T {
/* For GMAP alignment */
struct Pair_T *pairarray;
int npairs;
- int nsegments; /* Used only for GSNAP output */
List_T cigar_tokens;
bool gmap_intronp;
@@ -4443,6 +4442,7 @@ Stage3end_copy (T old) {
new->mapq_score = old->mapq_score;
new->absmq_score = old->absmq_score;
+ new->nsegments = old->nsegments;
new->score = old->score;
new->ntscore = old->ntscore;
new->nmatches = old->nmatches;
@@ -4496,14 +4496,12 @@ Stage3end_copy (T old) {
new->npairs = old->npairs;
new->cigar_tokens = Pair_tokens_copy(old->cigar_tokens);
new->gmap_intronp = old->gmap_intronp;
- new->nsegments = old->nsegments;
} else {
new->pairarray = (struct Pair_T *) NULL;
new->npairs = 0;
new->cigar_tokens = (List_T) NULL;
new->gmap_intronp = false;
- new->nsegments = 0;
for (p = old->substrings_1toN; p != NULL; p = List_next(p)) {
old_substring = (Substring_T) List_head(p);
@@ -4705,6 +4703,7 @@ Stage3end_new_substrings (int *found_score, Intlist_T endpoints,
debug0(printf("Endpoints: %s\n",Intlist_to_string(endpoints)));
debug0(printf("Lefts: %s\n",Uintlist_to_string(lefts)));
debug0(printf("Mismatches: %s\n",Intlist_to_string(nmismatches_list)));
+
assert(Uintlist_length(lefts) == Intlist_length(endpoints) - 1);
assert(Intlist_length(nmismatches_list) == Intlist_length(endpoints) - 1);
assert(List_length(junctions) == Intlist_length(endpoints) - 2);
@@ -5099,6 +5098,8 @@ Stage3end_new_substrings (int *found_score, Intlist_T endpoints,
/* new->nmismatches_refdiff = 0; */
new->ntscore = nmismatches_whole + indel_score;
new->score = nmismatches_whole + indel_score; /* Want untrimmed */
+ new->nsegments = List_length(new->substrings_1toN);
+
new->nmatches = querylength - nmismatches_whole;
new->nmatches_posttrim = querylength_trimmed - nmismatches_whole;
@@ -5215,8 +5216,8 @@ Stage3end_substrings_run_gmap_plus (T this, char *queryuc_ptr, int querylength,
stage2pairs = (List_T) NULL;
querypos = querystart; /* Should be 0 */
genomepos = (ambcoords[k] - seglength) - this->chroffset;
- Genome_get_segment_blocks_left(gsequence_orig,gsequence_alt,/*left*/ambcoords[k] - seglength,
- seglength,this->chrhigh,/*revcomp*/false);
+ Genome_get_segment_blocks_left(gsequence_orig,gsequence_alt,/*right*/ambcoords[k] /*- seglength*/,
+ seglength,this->chroffset,/*revcomp*/false);
for (i = 0; i < seglength; i++) {
c = queryuc_ptr[querypos];
g = gsequence_orig[i];
@@ -5495,8 +5496,8 @@ Stage3end_substrings_run_gmap_minus (T this, char *queryuc_ptr, int querylength,
stage2pairs = (List_T) NULL;
querypos = querystart;
genomepos = (this->chrhigh + 1) - ambcoords[k];
- Genome_get_segment_blocks_left(gsequence_orig,gsequence_alt,/*left*/ambcoords[k] - seglength,
- seglength,this->chrhigh,/*revcomp*/true);
+ Genome_get_segment_blocks_left(gsequence_orig,gsequence_alt,/*right*/ambcoords[k] /*- seglength*/,
+ seglength,this->chroffset,/*revcomp*/true);
for (i = 0; i < seglength; i++) {
c = queryuc_ptr[querypos];
@@ -5788,6 +5789,7 @@ Stage3end_new_exact (int *found_score, Univcoord_T left, int genomiclength, Comp
/* new->nmismatches_refdiff = 0; */
new->ntscore = 0;
new->score = 0;
+ new->nsegments = 1;
new->nmatches = genomiclength;
new->nmatches_posttrim = genomiclength;
@@ -5974,6 +5976,7 @@ Stage3end_new_substitution (int *found_score, int nmismatches_whole, Univcoord_T
new->nmismatches_whole = nmismatches_whole;
new->ntscore = nmismatches_whole;
new->score = nmismatches_whole;
+ new->nsegments = 1;
new->nmismatches_bothdiff = Substring_nmismatches_bothdiff(substring);
/* new->nmismatches_refdiff = Substring_nmismatches_refdiff(new->substring1); */
@@ -6252,6 +6255,7 @@ Stage3end_new_insertion (int *found_score, int nindels, int indel_pos, int nmism
new->nmismatches_whole = nmismatches1_whole + nmismatches2_whole;
new->ntscore = indel_penalty + nmismatches1_whole + nmismatches2_whole;
new->score = new->ntscore;
+ new->nsegments = 2;
new->nmismatches_bothdiff = Substring_nmismatches_bothdiff(substring1) + Substring_nmismatches_bothdiff(substring2);
/* new->nmismatches_refdiff = Substring_nmismatches_refdiff(new->substring1) + Substring_nmismatches_refdiff(new->substring2); */
@@ -6559,6 +6563,7 @@ Stage3end_new_deletion (int *found_score, int nindels, int indel_pos, int nmisma
new->nmismatches_whole = nmismatches1_whole + nmismatches2_whole;
new->ntscore = indel_penalty + nmismatches1_whole + nmismatches2_whole;
new->score = new->ntscore;
+ new->nsegments = 2;
new->nmismatches_bothdiff = Substring_nmismatches_bothdiff(substring1) + Substring_nmismatches_bothdiff(substring2);
/* new->nmismatches_refdiff = Substring_nmismatches_refdiff(substring1) + Substring_nmismatches_refdiff(substring2); */
@@ -7000,6 +7005,7 @@ Stage3end_new_splice (int *found_score, int nmismatches_donor, int nmismatches_a
new->nmismatches_whole = nmismatches_donor + nmismatches_acceptor;
new->score = new->ntscore = splicing_penalty + new->nmismatches_whole;
+ new->nsegments = 2;
#if 0
if (sensedir == SENSE_FORWARD) {
new->score += antistranded_penalty;
@@ -7355,6 +7361,7 @@ Stage3end_new_shortexon (int *found_score, Substring_T donor, Substring_T accept
new->ntscore = splicing_penalty + splicing_penalty + new->nmismatches_whole;
new->score = new->ntscore;
+ new->nsegments = 3;
#if 0
if (sensedir == SENSE_FORWARD) {
new->score += antistranded_penalty;
@@ -7653,6 +7660,7 @@ Stage3end_new_terminal (int querystart, int queryend, Univcoord_T left, Compress
#else
new->score = /* terminal_penalty + */ Substring_nmismatches_whole(substring);
#endif
+ new->nsegments = 1;
#if 0
new->nmatches = Substring_match_length(substring) - nmismatches;
@@ -7714,6 +7722,8 @@ Stage3end_new_gmap (int nmismatches_whole, int nmatches_posttrim, int max_match_
*/
/* However, this leads to fatal bugs later, so restored these statements */
+ debug0(printf("Entered Stage3end_new_gmap with first_read_p %d and sensedir %d\n",first_read_p,sensedir));
+
start = &(pairarray[0]);
end = &(pairarray[npairs-1]);
hardclip_start = start->querypos;
@@ -7862,6 +7872,7 @@ Stage3end_new_gmap (int nmismatches_whole, int nmatches_posttrim, int max_match_
}
debug0(printf(" nmatches %d = posttrim %d + ambig_end_length_5 %d + ambig_end_length_3 %d\n",
new->nmatches,nmatches_posttrim,ambig_end_length_5,ambig_end_length_3));
+
new->nmatches_posttrim -= localsplicing_penalty * nintrons; /* for use in goodness_cmp procedures */
new->nmatches_posttrim -= indel_penalty_middle * nindelbreaks; /* for use in goodness_cmp procedures */
@@ -7892,11 +7903,11 @@ Stage3end_new_gmap (int nmismatches_whole, int nmatches_posttrim, int max_match_
if (plusp == true) {
prob1 = Maxent_hr_acceptor_prob(genomepos,chroffset);
prob2 = Maxent_hr_antidonor_prob(genomepos,chroffset);
- /* fprintf(stderr,"At %llu, acceptor prob %f, antidonor prob %f\n",(unsigned long long) genomepos,prob1,prob2); */
+ /* printf("At %llu, acceptor prob %f, antidonor prob %f\n",(unsigned long long) genomepos,prob1,prob2); */
} else {
prob1 = Maxent_hr_donor_prob(genomepos,chroffset);
prob2 = Maxent_hr_antiacceptor_prob(genomepos,chroffset);
- /* fprintf(stderr,"At %llu, donor prob %f, antiacceptor prob %f\n",(unsigned long long) genomepos,prob1,prob2); */
+ /* printf("At %llu, donor prob %f, antiacceptor prob %f\n",(unsigned long long) genomepos,prob1,prob2); */
}
if (prob1 > 0.90 || prob2 > 0.90) {
new->trim_left_splicep = true;
@@ -7915,11 +7926,11 @@ Stage3end_new_gmap (int nmismatches_whole, int nmatches_posttrim, int max_match_
if (plusp == true) {
prob1 = Maxent_hr_donor_prob(genomepos,chroffset);
prob2 = Maxent_hr_antiacceptor_prob(genomepos,chroffset);
- /* fprintf(stderr,"At %llu, donor prob %f, antiacceptor prob %f\n",(unsigned long long) genomepos,prob1,prob2); */
+ /* printf("At %llu, donor prob %f, antiacceptor prob %f\n",(unsigned long long) genomepos,prob1,prob2); */
} else {
prob1 = Maxent_hr_acceptor_prob(genomepos,chroffset);
prob2 = Maxent_hr_antidonor_prob(genomepos,chroffset);
- /* fprintf(stderr,"At %llu, acceptor prob %f, antidonor prob %f\n",(unsigned long long) genomepos,prob1,prob2); */
+ /* printf("At %llu, acceptor prob %f, antidonor prob %f\n",(unsigned long long) genomepos,prob1,prob2); */
}
if (prob1 > 0.90 || prob2 > 0.90) {
new->trim_right_splicep = true;
@@ -8375,6 +8386,27 @@ Stage3end_sort_by_paired_seenp (List_T hits) {
+List_T
+Stage3end_filter_coverage (List_T hits, int min_coverage) {
+ List_T newhits = NULL, p;
+ Stage3end_T hit;
+
+ for (p = hits; p != NULL; p = List_next(p)) {
+ hit = (Stage3end_T) List_head(p);
+ if (hit->querylength - hit->trim_left - hit->trim_right >= min_coverage) {
+ newhits = List_push(newhits,(void *) hit);
+ } else {
+ Stage3end_free(&hit);
+ }
+ }
+
+ List_free(&hits);
+ return newhits;
+}
+
+
+
+
Stage3end_T *
Stage3end_eval_and_sort (int *npaths, int *first_absmq, int *second_absmq,
Stage3end_T *stage3array, int maxpaths, Shortread_T queryseq,
@@ -8776,6 +8808,7 @@ Stage3end_optimal_score_aux (bool *eliminatedp, List_T hitlist, int cutoff_level
int max_nmatches = 0, max_nmatches_posttrim = 0;
int trim_left = querylength, trim_right = querylength;
int nindelbreaks;
+ int best_nsegments;
#ifdef TRANSLOC_SPECIAL
bool non_translocation_p = false;
@@ -8994,6 +9027,38 @@ Stage3end_optimal_score_aux (bool *eliminatedp, List_T hitlist, int cutoff_level
List_free(&hitlist);
+
+ /* Filter on nsegments */
+ if (finalp == true && optimal != NULL) {
+ hitlist = optimal;
+ optimal = (List_T) NULL;
+
+ hit = (T) hitlist->first;
+ best_nsegments = hit->nsegments;
+
+ for (p = hitlist; p != NULL; p = p->rest) {
+ hit = (T) p->first;
+ if (hit->nsegments < best_nsegments) {
+ best_nsegments = hit->nsegments;
+ }
+ }
+
+ for (p = hitlist; p != NULL; p = p->rest) {
+ hit = (T) p->first;
+ if (hit->nsegments > best_nsegments + 2) {
+ debug4(printf("Eliminating a hit with nsegments %d\n",hit->nsegments));
+ *eliminatedp = true;
+ Stage3end_free(&hit);
+ } else {
+ debug4(printf("Keeping a hit with nsegments %d\n",hit->nsegments));
+ optimal = List_push(optimal,hit);
+ }
+ }
+
+ List_free(&hitlist);
+ }
+
+
debug4(printf("hitlist now has %d entries\n",List_length(optimal)));
return optimal;
}
@@ -9625,6 +9690,7 @@ Stage3end_remove_duplicates (List_T hitlist) {
}
+#if 0
List_T
Stage3end_reject_trimlengths (List_T hits) {
List_T filtered = NULL, p;
@@ -9642,6 +9708,7 @@ Stage3end_reject_trimlengths (List_T hits) {
List_free(&hits);
return filtered;
}
+#endif
/* Used for eliminating exact duplicates. Also sorts secondarily by hittype. */
@@ -9732,6 +9799,12 @@ hit_sort_cmp (const void *a, const void *b) {
} else if (y->indel_low < x->indel_low) {
return +1;
#endif
+
+ } else if (x->sensedir != 0 && y->sensedir == 0) {
+ return -1;
+ } else if (y->sensedir != 0 && x->sensedir == 0) {
+ return +1;
+
} else if (x->sarrayp == true && y->sarrayp == false) {
return -1;
} else if (x->sarrayp == false && y->sarrayp == true) {
@@ -9811,6 +9884,14 @@ hit_equiv_cmp (Stage3end_T x, Stage3end_T y) {
return +1;
#endif
+#if 0
+ /* Used for sorting but not equiv */
+ } else if (x->sensedir != 0 && y->sensedir == 0) {
+ return -1;
+ } else if (y->sensedir != 0 && x->sensedir == 0) {
+ return +1;
+#endif
+
} else {
return 0;
}
@@ -12374,6 +12455,7 @@ Stage3pair_new (T hit5, T hit3, Univcoord_T *splicesites,
debug10(printf("\nStage3pair_new called with pairtype %s and chrnum %d, %d (effective %d, %d)\n",
Pairtype_string(pairtype),hit5->chrnum,hit3->chrnum,hit5->effective_chrnum,hit3->effective_chrnum));
+#if 0
if (hit5->hittype == TERMINAL && hit5->trim_left + hit5->trim_right >= reject_trimlength) {
debug10(printf("5' rejected by trimlength %d + %d\n",hit5->trim_left,hit5->trim_right));
if (private5p == true) {
@@ -12396,8 +12478,11 @@ Stage3pair_new (T hit5, T hit3, Univcoord_T *splicesites,
debug5(printf("Rejecting terminal as NULL because hit3 trim %d+%d > reject_trimlength %d\n",hit3->trim_left,hit3->trim_right,reject_trimlength));
return (Stage3pair_T) NULL;
} else {
+#endif
new = (Stage3pair_T) MALLOC_OUT(sizeof(*new));
+#if 0
}
+#endif
if (pairtype == PAIRED_UNSPECIFIED || pairtype == UNSPECIFIED) {
/* Can get here from running GMAP improvement on a paired result */
@@ -13121,21 +13206,23 @@ hitpair_sort_cmp (const void *a, const void *b) {
Univcoord_T x_hit3_high, x_hit3_low, y_hit3_high, y_hit3_low;
Univcoord_T x_low, x_high, y_low, y_high;
- debug8(printf(" Comparing (%s, %s-%s): %p, %u..%u|%u..%u (dir = %d), alias %d|%d, nmatches: %d (%d posttrim), amb_lengths %d and %d\n",
+ debug8(printf(" Comparing (%s, %s-%s): %p, %u..%u|%u..%u (dir = %d), alias %d|%d, nmatches: %d (%d posttrim), amb_lengths %d and %d, sensedirs %d-%d\n",
Pairtype_string(x->pairtype),hittype_string(x->hit5->hittype),
hittype_string(x->hit3->hittype),x,
x->hit5->low - x->hit5->chroffset,x->hit5->high - x->hit5->chroffset,
x->hit3->low - x->hit3->chroffset,x->hit3->high - x->hit3->chroffset,
x->dir,x->hit5->alias,x->hit3->alias,x->nmatches,x->nmatches_posttrim,
- start_amb_length(x->hit5) + end_amb_length(x->hit5),start_amb_length(x->hit3) + end_amb_length(x->hit3)));
+ start_amb_length(x->hit5) + end_amb_length(x->hit5),start_amb_length(x->hit3) + end_amb_length(x->hit3),
+ x->hit5->sensedir,x->hit3->sensedir));
- debug8(printf(" with (%s, %s-%s): %p, %u..%u|%u..%u (dir = %d), alias %d|%d, nmatches: %d (%d posttrim), amb_lengths %d and %d\n",
+ debug8(printf(" with (%s, %s-%s): %p, %u..%u|%u..%u (dir = %d), alias %d|%d, nmatches: %d (%d posttrim), amb_lengths %d and %d, sensedirs %d-%d\n",
Pairtype_string(y->pairtype),hittype_string(y->hit5->hittype),
hittype_string(y->hit3->hittype),y,
y->hit5->low - y->hit5->chroffset,y->hit5->high - y->hit5->chroffset,
y->hit3->low - y->hit3->chroffset,y->hit3->high - y->hit3->chroffset,
y->dir,y->hit5->alias,y->hit3->alias,y->nmatches,y->nmatches_posttrim,
- start_amb_length(y->hit5) + end_amb_length(y->hit5),start_amb_length(y->hit3) + end_amb_length(y->hit3)));
+ start_amb_length(y->hit5) + end_amb_length(y->hit5),start_amb_length(y->hit3) + end_amb_length(y->hit3),
+ y->hit5->sensedir,y->hit3->sensedir));
x_hit5_low = normalize_coord(x->hit5->low,x->hit5->alias,x->hit5->chrlength);
@@ -13303,6 +13390,18 @@ hitpair_sort_cmp (const void *a, const void *b) {
} else if (y->indel_low < x->indel_low) {
return +1;
#endif
+
+ } else if (x->sense_consistent_p == true) {
+ if ((x->hit5->sensedir != 0 || x->hit3->sensedir != 0) &&
+ (y->hit5->sensedir == 0 && y->hit3->sensedir == 0)) {
+ return -1;
+ } else if ((y->hit5->sensedir != 0 || y->hit3->sensedir != 0) &&
+ (x->hit5->sensedir == 0 && x->hit3->sensedir == 0)) {
+ return +1;
+ } else {
+ return 0;
+ }
+
} else {
return 0;
}
@@ -13433,6 +13532,20 @@ hitpair_equiv_cmp (Stage3pair_T x, Stage3pair_T y) {
return +1;
#endif
+#if 0
+ } else if (x->sense_consistent_p == true) {
+ /* Used for sorting, but not equiv */
+ if ((x->hit5->sensedir != 0 || x->hit3->sensedir != 0) &&
+ (y->hit5->sensedir == 0 && y->hit3->sensedir == 0)) {
+ return -1;
+ } else if ((y->hit5->sensedir != 0 || y->hit3->sensedir != 0) &&
+ (x->hit5->sensedir == 0 && x->hit3->sensedir == 0)) {
+ return +1;
+ } else {
+ return 0;
+ }
+#endif
+
} else {
return 0;
}
@@ -14106,7 +14219,7 @@ pair_remove_bad_superstretches (bool *keep_p, Stage3pair_T superstretch, List_T
stage3pair->hit3->low - stage3pair->hit3->chroffset,stage3pair->hit3->high - stage3pair->hit3->chroffset,
stage3pair->dir,stage3pair->nmatches,stage3pair->nmatches_posttrim,
stage3pair->insertlength,stage3pair->amb_status_inside,
- start_amb_length(stage3pair->hit5)+ end_amb_length(stage3pair->hit5),start_amb_length(stage3pair->hit3) + end_amb_length(stage3pair->hit3)));
+ start_amb_length(stage3pair->hit5)+ end_amb_length(stage3pair->hit5),start_amb_length(stage3pair->hit3) + end_amb_length(stage3pair->hit3));
hitpair = (Stage3pair_T) List_head(q);
printf("subsumes that (%s, %s-%s): %p, %u..%u|%u..%u (dir = %d), nmatches: %d (%d posttrim), insertlength %d, amb_status_inside %d, amb_lengths %d and %d\n",
Pairtype_string(hitpair->pairtype),hittype_string(hitpair->hit5->hittype),
@@ -14115,7 +14228,7 @@ pair_remove_bad_superstretches (bool *keep_p, Stage3pair_T superstretch, List_T
hitpair->hit3->low - hitpair->hit3->chroffset,hitpair->hit3->high - hitpair->hit3->chroffset,
hitpair->dir,hitpair->nmatches,hitpair->nmatches_posttrim,
hitpair->insertlength,hitpair->amb_status_inside,
- start_amb_length(hitpair->hit5) + end_amb_length(hitpair->hit5),start_amb_length(hitpair->hit3) + end_amb_length(hitpair->hit3)));
+ start_amb_length(hitpair->hit5) + end_amb_length(hitpair->hit5),start_amb_length(hitpair->hit3) + end_amb_length(hitpair->hit3));
#endif
q = List_next(q);
}
@@ -14238,13 +14351,14 @@ pair_remove_overlaps (List_T hitpairlist, bool translocp, bool finalp) {
debug8(
for (i = 0; i < n; i++) {
hitpair = hitpairs[i];
- printf(" Initial %d (%s, %s-%s): %p, %u..%u|%u..%u (dir = %d), alias %d|%d, nmatches: %d (%d posttrim), amb_lengths %d and %d\n",
+ printf(" Initial %d (%s, %s-%s): %p, %u..%u|%u..%u (dir = %d), alias %d|%d, nmatches: %d (%d posttrim), amb_lengths %d and %d, sensedirs %d and %d\n",
i,Pairtype_string(hitpair->pairtype),hittype_string(hitpair->hit5->hittype),
hittype_string(hitpair->hit3->hittype),hitpair,
hitpair->hit5->low - hitpair->hit5->chroffset,hitpair->hit5->high - hitpair->hit5->chroffset,
hitpair->hit3->low - hitpair->hit3->chroffset,hitpair->hit3->high - hitpair->hit3->chroffset,
hitpair->dir,hitpair->hit5->alias,hitpair->hit3->alias,hitpair->nmatches,hitpair->nmatches_posttrim,
- start_amb_length(hitpair->hit5) + end_amb_length(hitpair->hit5),start_amb_length(hitpair->hit3) + end_amb_length(hitpair->hit3));
+ start_amb_length(hitpair->hit5) + end_amb_length(hitpair->hit5),start_amb_length(hitpair->hit3) + end_amb_length(hitpair->hit3),
+ hitpair->hit5->sensedir,hitpair->hit3->sensedir);
}
);
@@ -14519,6 +14633,28 @@ Stage3pair_resolve_multimapping (List_T hitpairs) {
}
+List_T
+Stage3pair_filter_coverage (List_T hits, int min_coverage_5, int min_coverage_3) {
+ List_T newhits = NULL, p;
+ Stage3end_T hit5, hit3;
+ Stage3pair_T hitpair;
+
+ for (p = hits; p != NULL; p = List_next(p)) {
+ hitpair = (Stage3pair_T) List_head(p);
+ hit5 = hitpair->hit5;
+ hit3 = hitpair->hit3;
+ if (hit5->querylength - hit5->trim_left - hit5->trim_right >= min_coverage_5 ||
+ hit3->querylength - hit3->trim_left - hit3->trim_right >= min_coverage_3) {
+ newhits = List_push(newhits,(void *) hitpair);
+ } else {
+ Stage3pair_free(&hitpair);
+ }
+ }
+
+ List_free(&hits);
+ return newhits;
+}
+
Stage3pair_T *
Stage3pair_eval_and_sort (int *npaths, int *first_absmq, int *second_absmq,
@@ -14757,6 +14893,7 @@ Stage3pair_optimal_score_aux (bool *eliminatedp, List_T hitpairlist, int cutoff_
int cutoff_level_5, cutoff_level_3, score;
int n;
int minscore5 = querylength5, minscore3 = querylength3, minscore = querylength5 + querylength3;
+ int best_nsegments;
/* int max_nmatches = 0, max_nmatches_posttrim; */
#ifdef USE_OPTIMAL_SCORE_BINGO
int minscore_bingo = querylength5 + querylength3;
@@ -14787,15 +14924,15 @@ Stage3pair_optimal_score_aux (bool *eliminatedp, List_T hitpairlist, int cutoff_
hit5 = hitpair->hit5;
hit3 = hitpair->hit3;
- debug6(printf("hit5 %u..%u type %s, trim_left: %d%s, trim_right %d%s, start_ambig %d, end_ambig %d. hit3 %u..%u type %s, trim_left %d%s, trim_right %d%s, start_ambig %d, end_ambig %d.\n",
+ debug6(printf("hit5 %u..%u type %s, nsegments %d, trim_left: %d%s, trim_right %d%s, start_ambig %d, end_ambig %d. hit3 %u..%u type %s, nsegments %d, trim_left %d%s, trim_right %d%s, start_ambig %d, end_ambig %d, sensedirs %d and %d.\n",
hit5->genomicstart - hit5->chroffset,hit5->genomicend - hit5->chroffset,hittype_string(hit5->hittype),
- hit5->trim_left,hit5->trim_left_splicep ? " (splice)" : "",
+ hit5->nsegments,hit5->trim_left,hit5->trim_left_splicep ? " (splice)" : "",
hit5->trim_right,hit5->trim_right_splicep ? " (splice)" : "",
start_amb_length(hit5),end_amb_length(hit5),
hit3->genomicstart - hit3->chroffset,hit3->genomicend - hit3->chroffset,hittype_string(hit3->hittype),
- hit3->trim_left,hit3->trim_left_splicep ? " (splice)" : "",
+ hit3->nsegments,hit3->trim_left,hit3->trim_left_splicep ? " (splice)" : "",
hit3->trim_right,hit3->trim_right_splicep ? " (splice)" : "",
- start_amb_length(hit3),end_amb_length(hit3)));
+ start_amb_length(hit3),end_amb_length(hit3),hit5->sensedir,hit3->sensedir));
if (hit5->hittype == TERMINAL) {
/* Don't allow terminals to set trims */
@@ -15129,6 +15266,36 @@ Stage3pair_optimal_score_aux (bool *eliminatedp, List_T hitpairlist, int cutoff_
List_free(&hitpairlist);
+ /* Filter on nsegments */
+ if (finalp == true && optimal != NULL) {
+ hitpairlist = optimal;
+ optimal = (List_T) NULL;
+
+ hitpair = (Stage3pair_T) hitpairlist->first;
+ best_nsegments = hitpair->hit5->nsegments + hitpair->hit3->nsegments;
+
+ for (p = hitpairlist; p != NULL; p = p->rest) {
+ hitpair = (Stage3pair_T) p->first;
+ if (hitpair->hit5->nsegments + hitpair->hit3->nsegments < best_nsegments) {
+ best_nsegments = hitpair->hit5->nsegments + hitpair->hit3->nsegments;
+ }
+ }
+
+ for (p = hitpairlist; p != NULL; p = p->rest) {
+ hitpair = (Stage3pair_T) p->first;
+ if (hitpair->hit5->nsegments + hitpair->hit3->nsegments > best_nsegments + 2) {
+ debug6(printf("Eliminating a hit pair with nsegments %d+%d\n",hitpair->hit5->nsegments,hitpair->hit3->nsegments));
+ *eliminatedp = true;
+ Stage3pair_free(&hitpair);
+ } else {
+ debug6(printf("Keeping a hit pair with nsegments %d+%d\n",hitpair->hit5->nsegments,hitpair->hit3->nsegments));
+ optimal = List_push(optimal,hitpair);
+ }
+ }
+
+ List_free(&hitpairlist);
+ }
+
#if 0
/* Filter on pairlength */
diff --git a/src/stage3hr.h b/src/stage3hr.h
index bfe3461..47aa59d 100644
--- a/src/stage3hr.h
+++ b/src/stage3hr.h
@@ -1,4 +1,4 @@
-/* $Id: stage3hr.h 166742 2015-06-02 02:00:54Z twu $ */
+/* $Id: stage3hr.h 173896 2015-09-12 00:11:40Z twu $ */
#ifndef STAGE3HR_INCLUDED
#define STAGE3HR_INCLUDED
@@ -38,7 +38,7 @@ Stage3hr_setup (bool invert_first_p_in, bool invert_second_p_in, Genome_T genome
IIT_T genes_iit_in, int *genes_divint_crosstable_in,
IIT_T tally_iit_in, int *tally_divint_crosstable_in,
IIT_T runlength_iit_in, int *runlength_divint_crosstable_in,
- int reject_trimlength_in, bool distances_observed_p, int pairmax_in,
+ bool distances_observed_p, int pairmax_in,
Chrpos_T expected_pairlength, Chrpos_T pairlength_deviation,
int localsplicing_penalty_in, int indel_penalty_middle_in,
int antistranded_penalty_in, bool favor_multiexon_p_in,
@@ -394,6 +394,9 @@ Stage3end_sort_bymatches (List_T hits);
extern List_T
Stage3end_sort_by_paired_seenp (List_T hits);
+extern List_T
+Stage3end_filter_coverage (List_T hits, int min_coverage);
+
extern Stage3end_T *
Stage3end_eval_and_sort (int *npaths, int *first_absmq, int *second_absmq,
Stage3end_T *stage3array, int maxpaths, Shortread_T queryseq,
@@ -478,6 +481,9 @@ Stage3pair_remove_overlaps (List_T hitpairlist, bool translocp, bool finalp);
extern List_T
Stage3pair_resolve_multimapping (List_T hitpairs);
+extern List_T
+Stage3pair_filter_coverage (List_T hits, int min_coverage_5, int min_coverage_3);
+
extern Stage3pair_T *
Stage3pair_eval_and_sort (int *npaths, int *first_absmq, int *second_absmq,
Stage3pair_T *stage3pairarray, int maxpaths,
diff --git a/src/substring.c b/src/substring.c
index a6043df..093fa78 100644
--- a/src/substring.c
+++ b/src/substring.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: substring.c 167592 2015-06-15 18:56:59Z twu $";
+static char rcsid[] = "$Id: substring.c 173896 2015-09-12 00:11:40Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -159,7 +159,6 @@ static bool output_sam_p;
static Mode_T mode;
static double genomelength; /* For BLAST E-value */
-static int reject_trimlength;
char *
@@ -1563,8 +1562,7 @@ Substring_setup (bool print_nsnpdiffs_p_in, bool print_snplabels_p_in,
IIT_T splicesites_iit_in, int *splicesites_divint_crosstable_in,
int donor_typeint_in, int acceptor_typeint_in, int trim_mismatch_score_in,
bool novelsplicingp_in, bool knownsplicingp_in,
- bool output_sam_p_in, Mode_T mode_in, Univcoord_T genomelength_in,
- int reject_trimlength_in) {
+ bool output_sam_p_in, Mode_T mode_in, Univcoord_T genomelength_in) {
print_nsnpdiffs_p = print_nsnpdiffs_p_in;
print_snplabels_p = print_snplabels_p_in;
show_refdiff_p = show_refdiff_p_in;
@@ -1590,7 +1588,6 @@ Substring_setup (bool print_nsnpdiffs_p_in, bool print_snplabels_p_in,
mode = mode_in;
genomelength = (double) genomelength_in;
- reject_trimlength = reject_trimlength_in;
return;
}
@@ -1880,6 +1877,7 @@ Substring_new (int nmismatches_whole, Chrnum_T chrnum, Univcoord_T chroffset,
}
debug8(printf("Nonterminal trim %d vs reject_trimlength %d\n",nonterminal_trim,reject_trimlength));
+#if 0
if (nonterminal_trim >= reject_trimlength) {
/* Reject non-terminal alignments (including those by sarray search) with excessive trim */
/* Keep true terminals for now in case they help lead to GMAP alignments */
@@ -1888,6 +1886,7 @@ Substring_new (int nmismatches_whole, Chrnum_T chrnum, Univcoord_T chroffset,
Substring_free(&new);
return (T) NULL;
} else {
+#endif
new->querystart += new->trim_left;
new->queryend -= new->trim_right;
/* Check for minlength. Needed to avoid nonsensical terminal alignments */
@@ -1897,7 +1896,9 @@ Substring_new (int nmismatches_whole, Chrnum_T chrnum, Univcoord_T chroffset,
Substring_free(&new);
return (T) NULL;
}
+#if 0
}
+#endif
/* ? Should we spend the time to determine trim_left_splicep and
diff --git a/src/substring.h b/src/substring.h
index e4a12a7..32ee8b3 100644
--- a/src/substring.h
+++ b/src/substring.h
@@ -1,4 +1,4 @@
-/* $Id: substring.h 166827 2015-06-03 06:55:46Z twu $ */
+/* $Id: substring.h 173896 2015-09-12 00:11:40Z twu $ */
#ifndef SUBSTRING_INCLUDED
#define SUBSTRING_INCLUDED
@@ -38,8 +38,7 @@ Substring_setup (bool print_nsnpdiffs_p_in, bool print_snplabels_p_in,
IIT_T splicesites_iit_in, int *splicesites_divint_crosstable_in,
int donor_typeint_in, int acceptor_typeint_in, int trim_mismatch_score_in,
bool novelsplicingp_in, bool knownsplicingp_in,
- bool output_sam_p_in, Mode_T mode_in, Univcoord_T genomelength_in,
- int reject_trimlength_in);
+ bool output_sam_p_in, Mode_T mode_in, Univcoord_T genomelength_in);
#define T Substring_T
typedef struct T *T;
diff --git a/src/uniqscan.c b/src/uniqscan.c
index c2359ad..58c9cc6 100644
--- a/src/uniqscan.c
+++ b/src/uniqscan.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: uniqscan.c 167592 2015-06-15 18:56:59Z twu $";
+static char rcsid[] = "$Id: uniqscan.c 173896 2015-09-12 00:11:40Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -170,6 +170,7 @@ static int subopt_levels = 0;
1, then treated as a fraction of the querylength. Else, treated as
an integer */
static double user_maxlevel_float = 0.0;
+static double user_mincoverage_float = 0.0;
/* Really have only one indel penalty */
static int indel_penalty_middle = 2;
@@ -458,7 +459,7 @@ uniqueness_scan (bool from_right_p) {
/*barcode_length*/0,/*invertp*/0,/*copy_acc_p*/false,/*skipp*/false);
stage3array = Stage1_single_read(&npaths,&first_absmq,&second_absmq,
queryseq1,indexdb,indexdb2,indexdb_size_threshold,
- floors_array,user_maxlevel_float,
+ floors_array,user_maxlevel_float,user_mincoverage_float,
indel_penalty_middle,indel_penalty_end,
allow_end_indels_p,max_end_insertions,max_end_deletions,min_indel_end_matches,
localsplicing_penalty,/*distantsplicing_penalty*/100,min_shortend,
@@ -502,7 +503,7 @@ uniqueness_scan (bool from_right_p) {
/*barcode_length*/0,/*invertp*/0,/*copy_acc_p*/false,/*skipp*/false);
stage3array = Stage1_single_read(&npaths,&first_absmq,&second_absmq,
queryseq1,indexdb,indexdb2,indexdb_size_threshold,
- floors_array,user_maxlevel_float,
+ floors_array,user_maxlevel_float,user_mincoverage_float,
indel_penalty_middle,indel_penalty_end,
allow_end_indels_p,max_end_insertions,max_end_deletions,min_indel_end_matches,
localsplicing_penalty,/*distantsplicing_penalty*/100,min_shortend,
@@ -1260,7 +1261,7 @@ main (int argc, char *argv[]) {
Indel_setup(min_indel_end_matches,indel_penalty_middle);
Stage1hr_setup(/*use_sarray_p*/false,/*use_only_sarray_p*/false,index1part,index1interval,
spansize,chromosome_iit,nchromosomes,
- genome,genomealt,mode,/*maxpaths_search*/10,/*terminal_threshold*/5,/*reject_trimlength*/1000,
+ genome,genomealt,mode,/*maxpaths_search*/10,
splicesites,splicetypes,splicedists,nsplicesites,
novelsplicingp,knownsplicingp,/*find_dna_chimeras_p*/false,distances_observed_p,
subopt_levels,max_middle_insertions,max_middle_deletions,
@@ -1276,8 +1277,7 @@ main (int argc, char *argv[]) {
splicing_iit,splicing_divint_crosstable,
donor_typeint,acceptor_typeint,trim_mismatch_score,
novelsplicingp,knownsplicingp,/*output_sam_p*/false,mode,
- Univ_IIT_genomelength(chromosome_iit,/*with_circular_alias*/false),
- /*reject_trimlength*/1000);
+ Univ_IIT_genomelength(chromosome_iit,/*with_circular_alias*/false));
Dynprog_single_setup(/*homopolymerp*/false);
Dynprog_genome_setup(novelsplicingp,splicing_iit,splicing_divint_crosstable,
donor_typeint,acceptor_typeint);
@@ -1302,7 +1302,7 @@ main (int argc, char *argv[]) {
Stage3hr_setup(/*invert_first_p*/false,/*invert_second_p*/false,genome,
chromosome_iit,nchromosomes,circular_typeint,genes_iit,genes_divint_crosstable,
/*tally_iit*/NULL,/*tally_divint_crosstable*/NULL,
- /*runlength_iit*/NULL,/*runlength_divint_crosstable*/NULL,/*reject_trimlength*/1000,
+ /*runlength_iit*/NULL,/*runlength_divint_crosstable*/NULL,
distances_observed_p,pairmax,expected_pairlength,pairlength_deviation,
localsplicing_penalty,indel_penalty_middle,antistranded_penalty,
favor_multiexon_p,gmap_min_nconsecutive,index1part,index1interval,
diff --git a/util/gmap_build.pl.in b/util/gmap_build.pl.in
index e348413..830b6a0 100644
--- a/util/gmap_build.pl.in
+++ b/util/gmap_build.pl.in
@@ -1,5 +1,5 @@
#! @PERL@
-# $Id: gmap_build.pl.in 167264 2015-06-10 23:59:39Z twu $
+# $Id: gmap_build.pl.in 173897 2015-09-12 00:11:59Z twu $
use warnings;
@@ -143,6 +143,7 @@ if (defined($contigs_mapped_p)) {
my $coordsfile = "$destdir/$dbname.coords";
my $fasta_sources = "$destdir/$dbname.sources";
+
$FP = new IO::File(">$fasta_sources") or die "Could not create $fasta_sources";
foreach $fasta (@ARGV) {
print $FP $fasta . "\n";
@@ -173,7 +174,7 @@ $genomecompfile = compress_genome($nmessages_flag,$bindir,$dbdir,$dbname,$gmap_p
unshuffle_genome($bindir,$dbdir,$dbname,$genomecompfile);
-$index_cmd = "$bindir/gmapindex -k $kmersize -q $sampling $nmessages_flag -d $dbname -F $dbdir -D $dbdir";
+$index_cmd = "$bindir/gmapindex -k $kmersize -q $sampling $nmessages_flag -d $dbname -F \"$dbdir\" -D \"$dbdir\"";
if (count_index_offsets($index_cmd,$genomecompfile) == 1) {
$index_cmd .= " -H";
@@ -232,18 +233,18 @@ sub create_coords {
if (defined($mdfile)) {
# MD file cannot specify that a chromosome is circular
- $cmd = "$bindir/md_coords -o $coordsfile $mdfile";
+ $cmd = "$bindir/md_coords -o \"$coordsfile\" $mdfile";
} else {
if (defined($fasta_pipe)) {
- $cmd = "$fasta_pipe | $bindir/fa_coords $circular_flag $contigs_mapped_flag -o $coordsfile";
+ $cmd = "$fasta_pipe | $bindir/fa_coords $circular_flag $contigs_mapped_flag -o \"$coordsfile\"";
} else {
- $cmd = "$bindir/fa_coords $gunzip_flag $circular_flag $contigs_mapped_flag -o $coordsfile";
+ $cmd = "$bindir/fa_coords $gunzip_flag $circular_flag $contigs_mapped_flag -o \"$coordsfile\"";
}
if (defined($chrnamefile)) {
$cmd .= " -n $chrnamefile";
}
if (!defined($fasta_pipe)) {
- $cmd .= " -f $fasta_sources";
+ $cmd .= " -f \"$fasta_sources\"";
}
}
print STDERR "Running $cmd\n";
@@ -258,9 +259,9 @@ sub make_gmap_process_pipe {
my ($fasta_pipe, $gunzip_flag, $bindir, $coordsfile, $fasta_sources) = @_;
if (defined($fasta_pipe)) {
- return "$fasta_pipe | $bindir/gmap_process -c $coordsfile";
+ return "$fasta_pipe | $bindir/gmap_process -c \"$coordsfile\"";
} else {
- return "$bindir/gmap_process $gunzip_flag -c $coordsfile -f $fasta_sources";
+ return "$bindir/gmap_process $gunzip_flag -c \"$coordsfile\" -f \"$fasta_sources\"";
}
}
@@ -269,7 +270,7 @@ sub make_contig {
$bindir, $dbdir, $dbname, $gmap_process_pipe) = @_;
my ($cmd, $rc);
- $cmd = "$gmap_process_pipe | $bindir/gmapindex $nmessages_flag -d $dbname -D $dbdir -A $chr_order_flag";
+ $cmd = "$gmap_process_pipe | $bindir/gmapindex $nmessages_flag -d $dbname -D \"$dbdir\" -A $chr_order_flag";
print STDERR "Running $cmd\n";
if (($rc = system($cmd)) != 0) {
die "$cmd failed with return code $rc";
@@ -283,7 +284,7 @@ sub compress_genome {
my $genomecompfile = "$dbdir/$dbname.genomecomp";
my ($cmd, $rc);
- $cmd = "$gmap_process_pipe | $bindir/gmapindex $nmessages_flag -d $dbname -F $dbdir -D $dbdir -G";
+ $cmd = "$gmap_process_pipe | $bindir/gmapindex $nmessages_flag -d $dbname -F \"$dbdir\" -D \"$dbdir\" -G";
print STDERR "Running $cmd\n";
if (($rc = system($cmd)) != 0) {
die "$cmd failed with return code $rc";
@@ -296,7 +297,7 @@ sub unshuffle_genome {
my ($bindir, $dbdir, $dbname, $genomecompfile) = @_;
my ($cmd, $rc);
- $cmd = "cat $genomecompfile | $bindir/gmapindex -d $dbname -U > $dbdir/$dbname.genomebits128";
+ $cmd = "cat \"$genomecompfile\" | $bindir/gmapindex -d $dbname -U > \"$dbdir/$dbname.genomebits128\"";
print STDERR "Running $cmd\n";
if (($rc = system($cmd)) != 0) {
die "$cmd failed with return code $rc";
@@ -323,7 +324,7 @@ sub count_index_offsets {
my $huge_genome_p;
my ($cmd, $noffsets);
- $cmd = "cat $genomecompfile | $index_cmd -N";
+ $cmd = "cat \"$genomecompfile\" | $index_cmd -N";
print STDERR "Running $cmd\n";
$noffsets = `$cmd`;
chop $noffsets;
@@ -342,7 +343,7 @@ sub create_index_offsets {
my ($index_cmd, $compression_flag, $genomecompfile) = @_;
my ($cmd, $rc);
- $cmd = "$index_cmd -O $compression_flag $genomecompfile";
+ $cmd = "$index_cmd -O $compression_flag \"$genomecompfile\"";
print STDERR "Running $cmd\n";
if (($rc = system($cmd)) != 0) {
die "$cmd failed with return code $rc";
@@ -355,7 +356,7 @@ sub create_index_positions {
my ($index_cmd, $genomecompfile) = @_;
my ($cmd, $rc);
- $cmd = "$index_cmd -P $genomecompfile";
+ $cmd = "$index_cmd -P \"$genomecompfile\"";
print STDERR "Running $cmd\n";
if (($rc = system($cmd)) != 0) {
die "$cmd failed with return code $rc";
@@ -369,7 +370,7 @@ sub make_enhanced_suffix_array {
my ($cmd, $rc);
# Suffix array
- $cmd = "$bindir/gmapindex -d $dbname -F $dbdir -D $dbdir -S";
+ $cmd = "$bindir/gmapindex -d $dbname -F \"$dbdir\" -D \"$dbdir\" -S";
print STDERR "Running $cmd\n";
if (($rc = system($cmd)) != 0) {
die "$cmd failed with return code $rc";
@@ -377,7 +378,7 @@ sub make_enhanced_suffix_array {
sleep($sleeptime);
# LCP and child arrays
- $cmd = "$bindir/gmapindex -d $dbname -F $dbdir -D $dbdir -L";
+ $cmd = "$bindir/gmapindex -d $dbname -F \"$dbdir\" -D \"$dbdir\" -L";
print STDERR "Running $cmd\n";
if (($rc = system($cmd)) != 0) {
die "$cmd failed with return code $rc";
@@ -385,7 +386,7 @@ sub make_enhanced_suffix_array {
sleep($sleeptime);
# Compressed suffix array
- # $cmd = "$bindir/gmapindex -d $dbname -F $dbdir -D $dbdir -C";
+ # $cmd = "$bindir/gmapindex -d $dbname -F \"$dbdir\" -D \"$dbdir\" -C";
# print STDERR "Running $cmd\n";
# if (($rc = system($cmd)) != 0) {
# die "$cmd failed with return code $rc";
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/gmap.git
More information about the debian-med-commit
mailing list