[med-svn] [gmap] 04/06: Imported Upstream version 2016-08-08
Alex Mestiashvili
malex-guest at moszumanska.debian.org
Tue Aug 9 09:08:05 UTC 2016
This is an automated email from the git hooks/post-receive script.
malex-guest pushed a commit to branch master
in repository gmap.
commit 84f4c61401075141553d1c63d88e9fbc0af453f3
Author: Alexandre Mestiashvili <alex at biotec.tu-dresden.de>
Date: Tue Aug 9 10:48:35 2016 +0200
Imported Upstream version 2016-08-08
---
ChangeLog | 169 +++--
VERSION | 2 +-
configure | 24 +-
src/atoi.c | 6 +-
src/atoiindex.c | 2 +-
src/cmet.c | 6 +-
src/comp.h | 2 +-
src/doublelist.c | 2 +-
src/doublelist.h | 2 +-
src/filestring.c | 94 +--
src/genome_sites.c | 26 +-
src/gmap.c | 2 +-
src/gsnap.c | 2 +-
src/inbuffer.c | 2 +-
src/intlist.c | 2 +-
src/intlist.h | 2 +-
src/pair.c | 2 +-
src/pairpool.c | 2 +-
src/samprint.c | 54 +-
src/sarray-read.c | 2 +-
src/sedgesort.c | 2 +-
src/sedgesort.h | 2 +-
src/shortread.c | 2 +-
src/splice.c | 61 +-
src/stage1hr.c | 1752 ++++++++++++++++++++++++++++++++++++++++------------
src/stage3.c | 13 +-
src/stage3.h | 2 +-
src/stage3hr.c | 2 +-
src/stage3hr.h | 2 +-
src/substring.c | 137 ++--
src/substring.h | 10 +-
src/uintlist.c | 2 +-
src/uintlist.h | 2 +-
src/uniqscan.c | 2 +-
src/univdiag.h | 2 +-
35 files changed, 1731 insertions(+), 667 deletions(-)
diff --git a/ChangeLog b/ChangeLog
index d5366eb..f9cb1d7 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,110 +1,145 @@
+2016-08-08 twu
+
+ * atoi.c, cmet.c: Fixed reduce procedures for 64-bit oligos
+
+ * stage1hr.c: Fixed values of splice_pos_start and splice_pos_end given to
+ Genome_donor_positions and related functions
+
+ * filestring.c: Handling the case where stringlen is negative
+
+ * stage3.c: Merged revision 195962 from trunk to fix an issue where we tried
+ to use pairs_pretrim after path_trim altered the pairs
+
+ * samprint.c, substring.c, substring.h: Merged revision 195960 from trunk to
+ fix XT field to have correct fusion coordinates
+
+2016-08-04 twu
+
+ * 2016-08-02-long-read-fusions, comp.h, config.site.rescomp.prd, pair.c,
+ pairpool.c, sarray-read.c, src, stage3.c, util: Merged revisions 195492
+ through 195762 from branches/2016-07-01-better-triage to get latest fixes
+
+ * 2016-08-02-long-read-fusions, Makefile.gsnaptoo.am, comp.h,
+ config.site.rescomp.prd, configure.ac, filestring.c, gsnap.c, pair.c,
+ pairpool.c, samprint.c, sarray-read.c, sedgesort.c, sedgesort.h,
+ shortread.c, src, stage1hr.c, stage3.c, stage3hr.c, stage3hr.h,
+ substring.c, substring.h, univdiag.c, univdiag.h: Merged revisions 193240
+ to 195491 from branches/2016-07-01-better-triage for better performance
+
+2016-08-03 twu
+
+ * stage1hr.c: Hard-coded some values for plusp
+
+ * splice.c: Using new interface to Substring_new_donor and
+ Substring_new_acceptor
+
+ * stage1hr.c: In computing spliceable segments, using a variable for holding
+ previous spliceable information, to resolve writing to an uninitialized
+ ptr at end. Using a streamlined version of splicing for distant RNA.
+
+ * substring.c, substring.h: Added parameters substring_querystart and
+ substring_queryend to Substring_new_donor and Substring_new_acceptor, so
+ we can handle splicing segments in the middle of the read
+
+ * genome_sites.c: Added debugging statements
+
+ * stage1hr.c: Allowing fusions to occur between middle segments that are
+ spliceable on their distal ends
+
2016-08-02 twu
- * gtf_genes.pl.in, gtf_introns.pl.in, gtf_splicesites.pl.in, util: Merged
- revision 195495 from trunk to use preference order of desired keys, rather
- than the text
+ * 2016-08-02-long-read-fusions: Created branch to find fusions in long reads
- * comp.h, pair.c, pairpool.c, src, stage3.c, stage3hr.c: Merging revisions
- 195548 through 195550 from trunk to revert recent changes to extraexon
- comp, addition of dual breaks, and restoring final procedure in
+ * stage3hr.c: Restoring final procedure based on nmatches in
Stage3pair_optimal_score
- * sarray-read.c: Limiting suffix array procedure by nmisses_allowed again
+ * stage3.c: Reverting from revision 195487 to allow extraexon comps again
+ and from revision 193238 to always insert dual break alignments
- * inbuffer.c: Fixed problem where --force-single-end terminated when file
- had reads that were a multiple of --input-buffer-size
+ * comp.h, pair.c, pairpool.c: Reverting from revision 195484 to allow
+ extraexon comps again
- * shortread.c: Changed debugging statements to write to stderr
+ * gtf_genes.pl.in, gtf_introns.pl.in, gtf_splicesites.pl.in: Imposing
+ preference order based on desired keys, rather than the text
- * atoiindex.c: Merged revision 194848 from trunk to fix calculation of oligo
- using new block algorithm
+ * inbuffer.c, shortread.c, src: Merged revisions 195492 and 195493 to fix
+ problem where --force-single-end terminated when a file had reads that
+ were a multiple of --input-buffer-size
- * stage3.c: Merged revision 195487 from trunk to use shortgap comp instead
- of extraexon comp for representing dual breaks
+ * stage3.c: Using shortgap comp instead of extraexon comp for representing
+ dual breaks
- * comp.h, pair.c, pairpool.c: Merged revision 195484 from trunk to use
- shortgap comp instead of extraexon comp for representing dual breaks
+ * comp.h, pair.c, pairpool.c: Using shortgap comp instead of extraexon comp
+ for representing dual breaks
- * shortread.c: Merged revision 195483 from trunk to fix issues in reading
- multiple pairs of files from command line
+ * shortread.c: Fixed issues in reading multiple pairs of files from command
+ line
-2016-07-18 twu
+2016-07-23 twu
- * filestring.c: Implemented right-justified strings
+ * atoiindex.c: Fixed calculation of oligo using new block algorithm
2016-07-12 twu
- * 2016-07-01-better-triage, archive.html, doublelist.c, doublelist.h,
- gmap.c, gsnap.c, intlist.c, intlist.h, sarray-read.c, src, stage1hr.c,
- stage3.c, stage3.h, stage3hr.c, substring.c, uintlist.c, uintlist.h,
- uniqscan.c: Merged revisions from trunk
+ * stage1hr.c: For paired terminals, assigning final pairtype to be
+ concordant
- * config.site.rescomp.prd, configure.ac: Added -fomit-frame-pointer
+ * archive.html: Exposed version 2015-07-23. Improved formatting.
- * univdiag.c, univdiag.h: Implemented Univdiag_diagonal_cmp
+ * stage3hr.c, substring.c: Handling the special case when alignstart or
+ alignend is requested on an ambiguous substring
- * substring.c: Using new format types for filestrings
+ * stage3hr.c: Computing insertlength and concordance properly for
+ overlapping dual GMAP alignments
- * stage1hr.c: Separated code for heaps and loser trees. Reverted to using
- heaps.
+ * stage1hr.c: For dynamic programming of anchor segments, proceeding from
+ closest to farthest segments, to favor shorter splice distances. Resolved
+ uninitialized variable when completeset algorithm is called but
+ spanningset was not called, so read_oligos was not called. Skipping
+ re-alignment of hits that are already have a type of GMAP.
- * shortread.c: Using new format types for filestrings
+ * sarray-read.c: Resolving ambiguous ends if one dominates by both
+ probability and splice distance
- * sarray-read.c: Using Sedgewick sort. For left and right diagonals, trying
- to consolidate segments on the same diagonal
-
- * samprint.c: Consolidated some calls to Filestring_put
+ * gmap.c, gsnap.c, uniqscan.c: Using new interface to Stage3_setup
- * gsnap.c: Making --end-detail=low the default
+ * stage3.c, stage3.h: Adding dual break for both SAM and non-SAM output,
+ needed to give the correct CIGAR starting coordinate
- * stage3.c: Restored backwards motion of ptr
+ * doublelist.c, doublelist.h, intlist.c, intlist.h, uintlist.c, uintlist.h:
+ Implemented procedures for keeping a single item in the list
- * stage3hr.h: Added Stage3pair_determine_pairtype
+2016-07-11 twu
- * stage3hr.c: Changed trim threshold for paired terminals from 15 to 8.
- Solving ends if the trims are 30 or more, regardless of --end-detail
- value.
+ * sarray-read.c: Among ambiguous splice segments, ranking by probability and
+ selecting closest one if it is less than half the distance of the second
+ one
- * pair.c: Added debugging macros for hiding method information
+ * substring.c: Improved debugging statements
- * filestring.c: Made transfer_string routine more efficient. Added ability
- to transfer strings in reverse and reverse complement format.
+ * stage3hr.c: Improved debugging statements
- * Makefile.gsnaptoo.am, sedgesort.c, sedgesort.h: Added Sedgewick sort
- routines
+ * sarray-read.c: Fixed value of substring1p passed to Substring_new_ambig
+ for alignments on the minus strand, which resulted in problems with the
+ --merge-overlap feature
2016-07-06 twu
- * stage1hr.c: Replaced heap in spanningset algorithm with a loser tree.
- Allowing different anchor length criteria for different mods. Changing
- paired result priorities so concordant results take priority over
- concordant terminals, which then take priority over samechr and
- translocations.
-
-2016-07-01 twu
+ * stage3.c: Restored backward movement of ptr
- * substring.c, substring.h: Defining Substring_nmismatches_region
+ * stage1hr.c: Fixed infinite loop due to circular list
- * stage3hr.c: Using max_trim instead of total_trim to determine whether a
- pair is a concordant terminal. Revised criteria for performing GMAP
- algorithm on middle part of substrings, now based on mismatches in any
- substring region being greater than 2.
+ * gsnap.c: Changed default end detail to be medium
- * stage1hr.c: Revised criteria for performing spanningset and completeset
- algorithms. Spanningset run if a terminal end has total trim > 15.
- Completeset run if a terminal score is greater than done_level.
+ * substring.c: Fixed standard GSNAP output for deletions, by reducing the
+ number of final dashes
- * sarray-read.c: Ignoring value of nmisses_allowed, thereby allowing more
- solutions to come from the suffix array algorithm
+ * stage3hr.c: Added debugging statements
- * pair.c, stage3.c: Restored backwards movement in processing pairs, but
- making an exception when we reach the last pair
+ * pair.c: Restored backward movement of ptr
2016-06-30 twu
- * 2016-07-01-better-triage: Created branch to triage better for GSNAP or
- GMAP
-
* pair.c, stage3.c: No longer going backward after an indel, which could
cause an infinite loop
diff --git a/VERSION b/VERSION
index d04a41d..6f5cbc7 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-2016-07-11
\ No newline at end of file
+2016-08-08
\ No newline at end of file
diff --git a/configure b/configure
index 54ac889..1e873be 100755
--- a/configure
+++ b/configure
@@ -1,6 +1,6 @@
#! /bin/sh
# Guess values for system-dependent variables and create Makefiles.
-# Generated by GNU Autoconf 2.69 for gmap 2016-07-11.
+# Generated by GNU Autoconf 2.69 for gmap 2016-08-08.
#
# Report bugs to <Thomas Wu <twu at gene.com>>.
#
@@ -590,8 +590,8 @@ MAKEFLAGS=
# Identity of this package.
PACKAGE_NAME='gmap'
PACKAGE_TARNAME='gmap'
-PACKAGE_VERSION='2016-07-11'
-PACKAGE_STRING='gmap 2016-07-11'
+PACKAGE_VERSION='2016-08-08'
+PACKAGE_STRING='gmap 2016-08-08'
PACKAGE_BUGREPORT='Thomas Wu <twu at gene.com>'
PACKAGE_URL=''
@@ -1368,7 +1368,7 @@ if test "$ac_init_help" = "long"; then
# Omit some internal or obsolete options to make the list less imposing.
# This message is too long to be a string in the A/UX 3.1 sh.
cat <<_ACEOF
-\`configure' configures gmap 2016-07-11 to adapt to many kinds of systems.
+\`configure' configures gmap 2016-08-08 to adapt to many kinds of systems.
Usage: $0 [OPTION]... [VAR=VALUE]...
@@ -1439,7 +1439,7 @@ fi
if test -n "$ac_init_help"; then
case $ac_init_help in
- short | recursive ) echo "Configuration of gmap 2016-07-11:";;
+ short | recursive ) echo "Configuration of gmap 2016-08-08:";;
esac
cat <<\_ACEOF
@@ -1574,7 +1574,7 @@ fi
test -n "$ac_init_help" && exit $ac_status
if $ac_init_version; then
cat <<\_ACEOF
-gmap configure 2016-07-11
+gmap configure 2016-08-08
generated by GNU Autoconf 2.69
Copyright (C) 2012 Free Software Foundation, Inc.
@@ -2180,7 +2180,7 @@ cat >config.log <<_ACEOF
This file contains any messages produced by compilers while
running configure, to aid debugging if configure makes a mistake.
-It was created by gmap $as_me 2016-07-11, which was
+It was created by gmap $as_me 2016-08-08, which was
generated by GNU Autoconf 2.69. Invocation command line was
$ $0 $@
@@ -2530,8 +2530,8 @@ ac_compiler_gnu=$ac_cv_c_compiler_gnu
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking package version" >&5
$as_echo_n "checking package version... " >&6; }
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: 2016-07-11" >&5
-$as_echo "2016-07-11" >&6; }
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: 2016-08-08" >&5
+$as_echo "2016-08-08" >&6; }
### Read defaults
@@ -4396,7 +4396,7 @@ fi
# Define the identity of the package.
PACKAGE='gmap'
- VERSION='2016-07-11'
+ VERSION='2016-08-08'
cat >>confdefs.h <<_ACEOF
@@ -20072,7 +20072,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
# report actual input values of CONFIG_FILES etc. instead of their
# values after options handling.
ac_log="
-This file was extended by gmap $as_me 2016-07-11, which was
+This file was extended by gmap $as_me 2016-08-08, which was
generated by GNU Autoconf 2.69. Invocation command line was
CONFIG_FILES = $CONFIG_FILES
@@ -20138,7 +20138,7 @@ _ACEOF
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
ac_cs_version="\\
-gmap config.status 2016-07-11
+gmap config.status 2016-08-08
configured by $0, generated by GNU Autoconf 2.69,
with options \\"\$ac_cs_config\\"
diff --git a/src/atoi.c b/src/atoi.c
index c5fce58..386c4a1 100644
--- a/src/atoi.c
+++ b/src/atoi.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: atoi.c 181327 2015-12-19 13:07:25Z twu $";
+static char rcsid[] = "$Id: atoi.c 195988 2016-08-08 19:29:00Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -16403,7 +16403,7 @@ Atoi_reduce_tc (Oligospace_T oligo) {
#ifdef HAVE_64_BIT
/* Handles 48 bits, or k-mer up to 24 */
- reduced = (Oligospace_T) convert_tc[oligo >> 32];
+ reduced = (Oligospace_T) convert_tc[oligo >> 32 & 0xFFFF];
reduced <<= 32; /* Put in separate line to make it clear we want 64 bits */
reduced |= (Oligospace_T) (convert_tc[oligo >> 16 & 0xFFFF] << 16);
reduced |= (Oligospace_T) convert_tc[oligo & 0xFFFF];
@@ -16423,7 +16423,7 @@ Atoi_reduce_ag (Oligospace_T oligo) {
#ifdef HAVE_64_BIT
/* Handles 48 bits, or k-mer up to 24 */
- reduced = (Oligospace_T) convert_ag[oligo >> 32];
+ reduced = (Oligospace_T) convert_ag[oligo >> 32 & 0xFFFF];
reduced <<= 32; /* Put in separate line to make it clear we want 64 bits */
reduced |= (Oligospace_T) (convert_ag[oligo >> 16 & 0xFFFF] << 16);
reduced |= (Oligospace_T) convert_ag[oligo & 0xFFFF];
diff --git a/src/atoiindex.c b/src/atoiindex.c
index 793d1f7..20b16a8 100644
--- a/src/atoiindex.c
+++ b/src/atoiindex.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: atoiindex.c 195489 2016-08-02 00:16:53Z twu $";
+static char rcsid[] = "$Id: atoiindex.c 194848 2016-07-23 21:11:43Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
diff --git a/src/cmet.c b/src/cmet.c
index dfd41b3..ea9f6f5 100644
--- a/src/cmet.c
+++ b/src/cmet.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: cmet.c 181327 2015-12-19 13:07:25Z twu $";
+static char rcsid[] = "$Id: cmet.c 195988 2016-08-08 19:29:00Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -16406,7 +16406,7 @@ Cmet_reduce_ct (Oligospace_T oligo) {
#ifdef HAVE_64_BIT
/* Handles 48 bits, or k-mer up to 24 */
- reduced = (Oligospace_T) convert_ct[oligo >> 32];
+ reduced = (Oligospace_T) convert_ct[oligo >> 32 & 0xFFFF];
reduced <<= 32; /* Put in separate line to make it clear we want 64 bits */
reduced |= (Oligospace_T) (convert_ct[oligo >> 16 & 0xFFFF] << 16);
reduced |= (Oligospace_T) convert_ct[oligo & 0xFFFF];
@@ -16425,7 +16425,7 @@ Cmet_reduce_ga (Oligospace_T oligo) {
#ifdef HAVE_64_BIT
/* Handles 48 bits, or k-mer up to 24 */
- reduced = (Oligospace_T) convert_ga[oligo >> 32];
+ reduced = (Oligospace_T) convert_ga[oligo >> 32 & 0xFFFF];
reduced <<= 32; /* Put in separate line to make it clear we want 64 bits */
reduced |= (Oligospace_T) (convert_ga[oligo >> 16 & 0xFFFF] << 16);
reduced |= (Oligospace_T) convert_ga[oligo & 0xFFFF];
diff --git a/src/comp.h b/src/comp.h
index e3aa9ef..5115764 100644
--- a/src/comp.h
+++ b/src/comp.h
@@ -1,4 +1,4 @@
-/* $Id: comp.h 195553 2016-08-02 17:32:39Z twu $ */
+/* $Id: comp.h 195763 2016-08-04 01:37:20Z twu $ */
#ifndef COMP_INCLUDED
#define COMP_INCLUDED
diff --git a/src/doublelist.c b/src/doublelist.c
index 0dd3c3a..e9f95a0 100644
--- a/src/doublelist.c
+++ b/src/doublelist.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: doublelist.c 193899 2016-07-12 04:41:34Z twu $";
+static char rcsid[] = "$Id: doublelist.c 193875 2016-07-12 02:43:38Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
diff --git a/src/doublelist.h b/src/doublelist.h
index e6e271f..c03c51a 100644
--- a/src/doublelist.h
+++ b/src/doublelist.h
@@ -1,4 +1,4 @@
-/* $Id: doublelist.h 193899 2016-07-12 04:41:34Z twu $ */
+/* $Id: doublelist.h 193875 2016-07-12 02:43:38Z twu $ */
#ifndef DOUBLELIST_INCLUDED
#define DOUBLELIST_INCLUDED
diff --git a/src/filestring.c b/src/filestring.c
index ebd9651..aea43ac 100644
--- a/src/filestring.c
+++ b/src/filestring.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: filestring.c 194346 2016-07-18 17:06:16Z twu $";
+static char rcsid[] = "$Id: filestring.c 195969 2016-08-08 17:01:27Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -249,21 +249,23 @@ static void
transfer_string (T this, char *string, int stringlen) {
char *block, *q;
- q = string;
- while (this->nleft <= stringlen) {
- strncpy(this->ptr,q,this->nleft);
- q += this->nleft;
- stringlen -= this->nleft;
+ if (stringlen > 0) {
+ q = string;
+ while (this->nleft <= stringlen) {
+ strncpy(this->ptr,q,this->nleft);
+ q += this->nleft;
+ stringlen -= this->nleft;
- block = (char *) MALLOC_OUT(BLOCKSIZE * sizeof(char));
- this->blocks = List_push_out(this->blocks,(void *) block);
- this->nleft = BLOCKSIZE;
- this->ptr = &(block[0]);
- }
+ block = (char *) MALLOC_OUT(BLOCKSIZE * sizeof(char));
+ this->blocks = List_push_out(this->blocks,(void *) block);
+ this->nleft = BLOCKSIZE;
+ this->ptr = &(block[0]);
+ }
- strncpy(this->ptr,q,stringlen);
- this->ptr += stringlen;
- this->nleft -= stringlen;
+ strncpy(this->ptr,q,stringlen);
+ this->ptr += stringlen;
+ this->nleft -= stringlen;
+ }
return;
}
@@ -290,23 +292,25 @@ static void
transfer_string_reverse (T this, char *string, int stringlen) {
char *block, *q;
- q = &(string[stringlen]);
- while (this->nleft <= stringlen) {
- q -= this->nleft;
- strncpy(this->ptr,q,this->nleft);
- reverse_inplace(this->ptr,this->nleft);
- stringlen -= this->nleft;
+ if (stringlen > 0) {
+ q = &(string[stringlen]);
+ while (this->nleft <= stringlen) {
+ q -= this->nleft;
+ strncpy(this->ptr,q,this->nleft);
+ reverse_inplace(this->ptr,this->nleft);
+ stringlen -= this->nleft;
- block = (char *) MALLOC_OUT(BLOCKSIZE * sizeof(char));
- this->blocks = List_push_out(this->blocks,(void *) block);
- this->nleft = BLOCKSIZE;
- this->ptr = &(block[0]);
- }
+ block = (char *) MALLOC_OUT(BLOCKSIZE * sizeof(char));
+ this->blocks = List_push_out(this->blocks,(void *) block);
+ this->nleft = BLOCKSIZE;
+ this->ptr = &(block[0]);
+ }
- strncpy(this->ptr,string,stringlen);
- reverse_inplace(this->ptr,stringlen);
- this->ptr += stringlen;
- this->nleft -= stringlen;
+ strncpy(this->ptr,string,stringlen);
+ reverse_inplace(this->ptr,stringlen);
+ this->ptr += stringlen;
+ this->nleft -= stringlen;
+ }
return;
}
@@ -338,23 +342,25 @@ static void
transfer_string_revcomp (T this, char *string, int stringlen) {
char *block, *q;
- q = &(string[stringlen]);
- while (this->nleft <= stringlen) {
- q -= this->nleft;
- strncpy(this->ptr,q,this->nleft);
- revcomp_inplace(this->ptr,this->nleft);
- stringlen -= this->nleft;
+ if (stringlen > 0) {
+ q = &(string[stringlen]);
+ while (this->nleft <= stringlen) {
+ q -= this->nleft;
+ strncpy(this->ptr,q,this->nleft);
+ revcomp_inplace(this->ptr,this->nleft);
+ stringlen -= this->nleft;
- block = (char *) MALLOC_OUT(BLOCKSIZE * sizeof(char));
- this->blocks = List_push_out(this->blocks,(void *) block);
- this->nleft = BLOCKSIZE;
- this->ptr = &(block[0]);
- }
+ block = (char *) MALLOC_OUT(BLOCKSIZE * sizeof(char));
+ this->blocks = List_push_out(this->blocks,(void *) block);
+ this->nleft = BLOCKSIZE;
+ this->ptr = &(block[0]);
+ }
- strncpy(this->ptr,string,stringlen);
- revcomp_inplace(this->ptr,stringlen);
- this->ptr += stringlen;
- this->nleft -= stringlen;
+ strncpy(this->ptr,string,stringlen);
+ revcomp_inplace(this->ptr,stringlen);
+ this->ptr += stringlen;
+ this->nleft -= stringlen;
+ }
return;
}
diff --git a/src/genome_sites.c b/src/genome_sites.c
index 32f041f..e3724dd 100644
--- a/src/genome_sites.c
+++ b/src/genome_sites.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: genome_sites.c 184186 2016-02-12 20:38:12Z twu $";
+static char rcsid[] = "$Id: genome_sites.c 195749 2016-08-03 23:35:09Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -24670,7 +24670,7 @@ static const Genomecomp_T antiacceptor_bits[] =
/* Code starts here */
-#if 0
+#ifdef DEBUG2
static void
write_chars_comp (UINT4 high, UINT4 low, UINT4 flags) {
char Buffer[33];
@@ -24711,7 +24711,7 @@ write_chars_comp (UINT4 high, UINT4 low, UINT4 flags) {
#endif
-#if 0
+#ifdef DEBUG2
static void
Genome_print_blocks (Genomecomp_T *blocks, Univcoord_T startpos, Univcoord_T endpos) {
/* Chrpos_T length = endpos - startpos; */
@@ -25045,10 +25045,12 @@ splicesite_positions (int *site_positions, int *site_knowni, int *knownpos, int
#ifdef HAVE_BUILTIN_CTZ
pos = offset + (relpos = __builtin_ctz(found));
while (*knownpos < pos) {
+ debug2(printf("Adding knownpos #%d at %d < pos %d\n",*knowni,*knownpos,pos));
site_knowni[nfound] = *knowni++;
site_positions[nfound++] = *knownpos++;
}
if (*knownpos == pos) {
+ debug2(printf("Adding knownpos #%d at %d == pos %d\n",*knowni,*knownpos,pos));
site_knowni[nfound] = *knowni++;
site_positions[nfound++] = *knownpos++;
} else {
@@ -25060,10 +25062,12 @@ splicesite_positions (int *site_positions, int *site_knowni, int *knownpos, int
debug2(printf("found is %08X, -found & found is %08X\n",found,-found & found));
pos = offset + mod_37_bit_position[(lowbit = -found & found) % 37];
while (*knownpos < pos) {
+ debug2(printf("Adding knownpos #%d at %d < pos %d\n",*knowni,*knownpos,pos));
site_knowni[nfound] = *knowni++;
site_positions[nfound++] = *knownpos++;
}
if (*knownpos == pos) {
+ debug2(printf("Adding knownpos #%d at %d == pos %d\n",*knowni,*knownpos,pos));
site_knowni[nfound] = *knowni++;
site_positions[nfound++] = *knownpos++;
} else {
@@ -25106,10 +25110,12 @@ splicesite_positions (int *site_positions, int *site_knowni, int *knownpos, int
#ifdef HAVE_BUILTIN_CTZ
pos = offset + (relpos = __builtin_ctz(found));
while (*knownpos < pos) {
+ debug2(printf("Adding knownpos #%d at %d < pos %d\n",*knowni,*knownpos,pos));
site_knowni[nfound] = *knowni++;
site_positions[nfound++] = *knownpos++;
}
if (*knownpos == pos) {
+ debug2(printf("Adding knownpos #%d at %d == pos %d\n",*knowni,*knownpos,pos));
site_knowni[nfound] = *knowni++;
site_positions[nfound++] = *knownpos++;
} else {
@@ -25121,10 +25127,12 @@ splicesite_positions (int *site_positions, int *site_knowni, int *knownpos, int
debug2(printf("found is %08X, -found & found is %08X\n",found,-found & found));
pos = offset + mod_37_bit_position[(lowbit = -found & found) % 37];
while (*knownpos < pos) {
+ debug2(printf("Adding knownpos #%d at %d < pos %d\n",*knowni,*knownpos,pos));
site_knowni[nfound] = *knowni++;
site_positions[nfound++] = *knownpos++;
}
if (*knownpos == pos) {
+ debug2(printf("Adding knownpos #%d at %d == pos %d\n",*knowni,*knownpos,pos));
site_knowni[nfound] = *knowni++;
site_positions[nfound++] = *knownpos++;
} else {
@@ -25152,10 +25160,12 @@ splicesite_positions (int *site_positions, int *site_knowni, int *knownpos, int
debug2(printf("low_halfsite & prev_high_halfsite => offset %llu - 1\n",(unsigned long long) offset));
pos = offset - 1; /* verified that this should be offset - 1 */
while (*knownpos < pos) {
+ debug2(printf("Adding knownpos #%d at %d < pos %d\n",*knowni,*knownpos,pos));
site_knowni[nfound] = *knowni++;
site_positions[nfound++] = *knownpos++;
}
if (*knownpos == pos) {
+ debug2(printf("Adding knownpos #%d at %d == pos %d\n",*knowni,*knownpos,pos));
site_knowni[nfound] = *knowni++;
site_positions[nfound++] = *knownpos++;
} else {
@@ -25168,10 +25178,12 @@ splicesite_positions (int *site_positions, int *site_knowni, int *knownpos, int
#ifdef HAVE_BUILTIN_CTZ
pos = offset + (relpos = __builtin_ctz(found));
while (*knownpos < pos) {
+ debug2(printf("Adding knownpos #%d at %d < pos %d\n",*knowni,*knownpos,pos));
site_knowni[nfound] = *knowni++;
site_positions[nfound++] = *knownpos++;
}
if (*knownpos == pos) {
+ debug2(printf("Adding knownpos #%d at %d == pos %d\n",*knowni,*knownpos,pos));
site_knowni[nfound] = *knowni++;
site_positions[nfound++] = *knownpos++;
} else {
@@ -25183,10 +25195,12 @@ splicesite_positions (int *site_positions, int *site_knowni, int *knownpos, int
debug2(printf("found is %08X, -found & found is %08X\n",found,-found & found));
pos = offset + mod_37_bit_position[(lowbit = -found & found) % 37];
while (*knownpos < pos) {
+ debug2(printf("Adding knownpos #%d at %d < pos %d\n",*knowni,*knownpos,pos));
site_knowni[nfound] = *knowni++;
site_positions[nfound++] = *knownpos++;
}
if (*knownpos == pos) {
+ debug2(printf("Adding knownpos #%d at %d == pos %d\n",*knowni,*knownpos,pos));
site_knowni[nfound] = *knowni++;
site_positions[nfound++] = *knownpos++;
} else {
@@ -25217,10 +25231,12 @@ splicesite_positions (int *site_positions, int *site_knowni, int *knownpos, int
debug2(printf("low_halfsite & prev_high_halfsite => offset %llu - 1\n",(unsigned long long) offset));
pos = offset - 1; /* verified that this should be offset - 1 */
while (*knownpos < pos) {
+ debug2(printf("Adding knownpos #%d at %d < pos %d\n",*knowni,*knownpos,pos));
site_knowni[nfound] = *knowni++;
site_positions[nfound++] = *knownpos++;
}
if (*knownpos == pos) {
+ debug2(printf("Adding knownpos #%d at %d == pos %d\n",*knowni,*knownpos,pos));
site_knowni[nfound] = *knowni++;
site_positions[nfound++] = *knownpos++;
} else {
@@ -25233,10 +25249,12 @@ splicesite_positions (int *site_positions, int *site_knowni, int *knownpos, int
#ifdef HAVE_BUILTIN_CTZ
pos = offset + (relpos = __builtin_ctz(found));
while (*knownpos < pos) {
+ debug2(printf("Adding knownpos #%d at %d < pos %d\n",*knowni,*knownpos,pos));
site_knowni[nfound] = *knowni++;
site_positions[nfound++] = *knownpos++;
}
if (*knownpos == pos) {
+ debug2(printf("Adding knownpos #%d at %d == pos %d\n",*knowni,*knownpos,pos));
site_knowni[nfound] = *knowni++;
site_positions[nfound++] = *knownpos++;
} else {
@@ -25248,10 +25266,12 @@ splicesite_positions (int *site_positions, int *site_knowni, int *knownpos, int
debug2(printf("found is %08X, -found & found is %08X\n",found,-found & found));
pos = offset + mod_37_bit_position[(lowbit = -found & found) % 37];
while (*knownpos < pos) {
+ debug2(printf("Adding knownpos #%d at %d < pos %d\n",*knowni,*knownpos,pos));
site_knowni[nfound] = *knowni++;
site_positions[nfound++] = *knownpos++;
}
if (*knownpos == pos) {
+ debug2(printf("Adding knownpos #%d at %d == pos %d\n",*knowni,*knownpos,pos));
site_knowni[nfound] = *knowni++;
site_positions[nfound++] = *knownpos++;
} else {
diff --git a/src/gmap.c b/src/gmap.c
index 5141a2c..39c14f4 100644
--- a/src/gmap.c
+++ b/src/gmap.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: gmap.c 193899 2016-07-12 04:41:34Z twu $";
+static char rcsid[] = "$Id: gmap.c 193877 2016-07-12 02:46:33Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
diff --git a/src/gsnap.c b/src/gsnap.c
index e0b4048..264c34c 100644
--- a/src/gsnap.c
+++ b/src/gsnap.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: gsnap.c 193899 2016-07-12 04:41:34Z twu $";
+static char rcsid[] = "$Id: gsnap.c 195760 2016-08-04 00:12:04Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
diff --git a/src/inbuffer.c b/src/inbuffer.c
index 064d817..a9be1b7 100644
--- a/src/inbuffer.c
+++ b/src/inbuffer.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: inbuffer.c 195493 2016-08-02 02:01:39Z twu $";
+static char rcsid[] = "$Id: inbuffer.c 195494 2016-08-02 02:03:20Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
diff --git a/src/intlist.c b/src/intlist.c
index 0dfd82d..e3c040c 100644
--- a/src/intlist.c
+++ b/src/intlist.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: intlist.c 193899 2016-07-12 04:41:34Z twu $";
+static char rcsid[] = "$Id: intlist.c 193875 2016-07-12 02:43:38Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
diff --git a/src/intlist.h b/src/intlist.h
index ba85895..537c32e 100644
--- a/src/intlist.h
+++ b/src/intlist.h
@@ -1,4 +1,4 @@
-/* $Id: intlist.h 193899 2016-07-12 04:41:34Z twu $ */
+/* $Id: intlist.h 193875 2016-07-12 02:43:38Z twu $ */
#ifndef INTLIST_INCLUDED
#define INTLIST_INCLUDED
diff --git a/src/pair.c b/src/pair.c
index e0a0d5c..c24366a 100644
--- a/src/pair.c
+++ b/src/pair.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: pair.c 195553 2016-08-02 17:32:39Z twu $";
+static char rcsid[] = "$Id: pair.c 195763 2016-08-04 01:37:20Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
diff --git a/src/pairpool.c b/src/pairpool.c
index aa61a0a..7572f6e 100644
--- a/src/pairpool.c
+++ b/src/pairpool.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: pairpool.c 195553 2016-08-02 17:32:39Z twu $";
+static char rcsid[] = "$Id: pairpool.c 195763 2016-08-04 01:37:20Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
diff --git a/src/samprint.c b/src/samprint.c
index 3ca6985..d2fce93 100644
--- a/src/samprint.c
+++ b/src/samprint.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: samprint.c 193892 2016-07-12 04:07:27Z twu $";
+static char rcsid[] = "$Id: samprint.c 195961 2016-08-08 16:36:34Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -858,7 +858,8 @@ print_cigar (Filestring_T fp, char type, int stringlength, int querypos, int que
debug1(printf(" Pushing %dS\n",matchlength));
FPRINTF(fp,"%dS",matchlength);
} else {
- debug1(printf(" Pushing %dH\n",matchlength));
+ debug1(printf(" Pushing %dH because matchlength %d != trimlength %d\n",
+ matchlength,matchlength,trimlength));
FPRINTF(fp,"%dH",matchlength);
}
}
@@ -909,7 +910,8 @@ print_cigar (Filestring_T fp, char type, int stringlength, int querypos, int que
debug1(printf(" Pushing %dS\n",matchlength));
FPRINTF(fp,"%dS",matchlength);
} else {
- debug1(printf(" Pushing %dH\n",matchlength));
+ debug1(printf(" Pushing %dH because matchlength %d != trimlength %d\n",
+ matchlength,matchlength,trimlength));
FPRINTF(fp,"%dH",matchlength);
}
}
@@ -2173,7 +2175,7 @@ halfacceptor_dinucleotide (char *acceptor2, char *acceptor1, Substring_T accepto
static void
-print_halfdonor (Filestring_T fp, char *abbrev, Substring_T donor, Stage3end_T this, Stage3end_T mate,
+print_halfdonor (Filestring_T fp, char *abbrev, Substring_T donor, Substring_T acceptor, Stage3end_T this, Stage3end_T mate,
char *acc1, char *acc2, int pathnum, int npaths_primary, int npaths_altloc,
int absmq_score, int first_absmq, int second_absmq, int mapq_score,
Univ_IIT_T chromosome_iit, Shortread_T queryseq, int pairedlength,
@@ -2667,7 +2669,8 @@ print_halfdonor (Filestring_T fp, char *abbrev, Substring_T donor, Stage3end_T t
/* 12. TAGS: XT */
if (print_xt_p == true) {
FPRINTF(fp,"\tXT:Z:%c%c-%c%c,%.2f,%.2f",donor1,donor2,acceptor2,acceptor1,donor_prob,acceptor_prob);
- FPRINTF(fp,",%c%s@%u..%c%s@%u",donor_strand,donor_chr,donor_chrpos,acceptor_strand,acceptor_chr,acceptor_chrpos);
+ FPRINTF(fp,",%c%s@%u..%c%s@%u",donor_strand,donor_chr,Substring_chr_splicecoord(donor),
+ acceptor_strand,acceptor_chr,Substring_chr_splicecoord(acceptor));
}
/* 12. TAGS: XC */
@@ -2687,7 +2690,7 @@ print_halfdonor (Filestring_T fp, char *abbrev, Substring_T donor, Stage3end_T t
static void
-print_halfacceptor (Filestring_T fp, char *abbrev, Substring_T acceptor, Stage3end_T this, Stage3end_T mate,
+print_halfacceptor (Filestring_T fp, char *abbrev, Substring_T donor, Substring_T acceptor, Stage3end_T this, Stage3end_T mate,
char *acc1, char *acc2, int pathnum, int npaths_primary, int npaths_altloc,
int absmq_score, int first_absmq, int second_absmq, int mapq_score,
Univ_IIT_T chromosome_iit, Shortread_T queryseq, int pairedlength,
@@ -3171,7 +3174,8 @@ print_halfacceptor (Filestring_T fp, char *abbrev, Substring_T acceptor, Stage3e
/* 12. TAGS: XT */
if (print_xt_p == true) {
FPRINTF(fp,"\tXT:Z:%c%c-%c%c,%.2f,%.2f",donor1,donor2,acceptor2,acceptor1,donor_prob,acceptor_prob);
- FPRINTF(fp,",%c%s@%u..%c%s@%u",donor_strand,donor_chr,donor_chrpos,acceptor_strand,acceptor_chr,acceptor_chrpos);
+ FPRINTF(fp,",%c%s@%u..%c%s@%u",donor_strand,donor_chr,Substring_chr_splicecoord(donor),
+ acceptor_strand,acceptor_chr,Substring_chr_splicecoord(acceptor));
}
@@ -3270,7 +3274,7 @@ print_exon_exon (Filestring_T fp, char *abbrev, Stage3end_T this, Stage3end_T ma
/* NEEDS WORK: Need to decide whether to split halfdonor or halfacceptor */
/* Not sure if circular chromosomes should participate in distant splicing anyway */
if (0 && (circularpos = Stage3end_circularpos(this)) > 0) {
- print_halfdonor(fp,abbrev,donor,this,mate,acc1,acc2,pathnum,npaths_primary,npaths_altloc,
+ print_halfdonor(fp,abbrev,donor,acceptor,this,mate,acc1,acc2,pathnum,npaths_primary,npaths_altloc,
absmq_score,first_absmq,second_absmq,mapq_score,
chromosome_iit,queryseq,pairedlength,
donor_chrpos,acceptor_chrpos,mate_chrpos,
@@ -3280,7 +3284,7 @@ print_exon_exon (Filestring_T fp, char *abbrev, Stage3end_T this, Stage3end_T ma
donor_sensedir,donor_strand,acceptor_strand,donor_chr,acceptor_chr,
donor1,donor2,acceptor2,acceptor1,donor_prob,acceptor_prob,
/*circularp*/true);
- print_halfdonor(fp,abbrev,donor,this,mate,acc1,acc2,pathnum,npaths_primary,npaths_altloc,
+ print_halfdonor(fp,abbrev,donor,acceptor,this,mate,acc1,acc2,pathnum,npaths_primary,npaths_altloc,
absmq_score,first_absmq,second_absmq,mapq_score,
chromosome_iit,queryseq,pairedlength,
/*donor_chrpos*/1,acceptor_chrpos,mate_chrpos,
@@ -3291,7 +3295,7 @@ print_exon_exon (Filestring_T fp, char *abbrev, Stage3end_T this, Stage3end_T ma
donor1,donor2,acceptor2,acceptor1,donor_prob,acceptor_prob,
/*circularp*/true);
} else {
- print_halfdonor(fp,abbrev,donor,this,mate,acc1,acc2,pathnum,npaths_primary,npaths_altloc,
+ print_halfdonor(fp,abbrev,donor,acceptor,this,mate,acc1,acc2,pathnum,npaths_primary,npaths_altloc,
absmq_score,first_absmq,second_absmq,mapq_score,
chromosome_iit,queryseq,pairedlength,
donor_chrpos,acceptor_chrpos,mate_chrpos,
@@ -3304,7 +3308,7 @@ print_exon_exon (Filestring_T fp, char *abbrev, Stage3end_T this, Stage3end_T ma
}
if (0 && (circularpos = Stage3end_circularpos(this)) > 0) {
- print_halfacceptor(fp,abbrev,acceptor,this,mate,acc1,acc2,pathnum,npaths_primary,npaths_altloc,
+ print_halfacceptor(fp,abbrev,donor,acceptor,this,mate,acc1,acc2,pathnum,npaths_primary,npaths_altloc,
absmq_score,first_absmq,second_absmq,mapq_score,
chromosome_iit,queryseq,pairedlength,
donor_chrpos,acceptor_chrpos,mate_chrpos,
@@ -3314,7 +3318,7 @@ print_exon_exon (Filestring_T fp, char *abbrev, Stage3end_T this, Stage3end_T ma
acceptor_sensedir,donor_strand,acceptor_strand,donor_chr,acceptor_chr,
donor1,donor2,acceptor2,acceptor1,donor_prob,acceptor_prob,
/*circularp*/true);
- print_halfacceptor(fp,abbrev,acceptor,this,mate,acc1,acc2,pathnum,npaths_primary,npaths_altloc,
+ print_halfacceptor(fp,abbrev,donor,acceptor,this,mate,acc1,acc2,pathnum,npaths_primary,npaths_altloc,
absmq_score,first_absmq,second_absmq,mapq_score,
chromosome_iit,queryseq,pairedlength,
donor_chrpos,/*acceptor_chrpos*/1,mate_chrpos,
@@ -3325,7 +3329,7 @@ print_exon_exon (Filestring_T fp, char *abbrev, Stage3end_T this, Stage3end_T ma
donor1,donor2,acceptor2,acceptor1,donor_prob,acceptor_prob,
/*circularp*/true);
} else {
- print_halfacceptor(fp,abbrev,acceptor,this,mate,acc1,acc2,pathnum,npaths_primary,npaths_altloc,
+ print_halfacceptor(fp,abbrev,donor,acceptor,this,mate,acc1,acc2,pathnum,npaths_primary,npaths_altloc,
absmq_score,first_absmq,second_absmq,mapq_score,
chromosome_iit,queryseq,pairedlength,
donor_chrpos,acceptor_chrpos,mate_chrpos,
@@ -3339,7 +3343,7 @@ print_exon_exon (Filestring_T fp, char *abbrev, Stage3end_T this, Stage3end_T ma
} else if (Stage3end_sensedir(this) == SENSE_ANTI) {
if (0 && (circularpos = Stage3end_circularpos(this)) > 0) {
- print_halfacceptor(fp,abbrev,acceptor,this,mate,acc1,acc2,pathnum,npaths_primary,npaths_altloc,
+ print_halfacceptor(fp,abbrev,donor,acceptor,this,mate,acc1,acc2,pathnum,npaths_primary,npaths_altloc,
absmq_score,first_absmq,second_absmq,mapq_score,
chromosome_iit,queryseq,pairedlength,
donor_chrpos,acceptor_chrpos,mate_chrpos,
@@ -3349,7 +3353,7 @@ print_exon_exon (Filestring_T fp, char *abbrev, Stage3end_T this, Stage3end_T ma
acceptor_sensedir,donor_strand,acceptor_strand,donor_chr,acceptor_chr,
donor1,donor2,acceptor2,acceptor1,donor_prob,acceptor_prob,
/*circularp*/true);
- print_halfacceptor(fp,abbrev,acceptor,this,mate,acc1,acc2,pathnum,npaths_primary,npaths_altloc,
+ print_halfacceptor(fp,abbrev,donor,acceptor,this,mate,acc1,acc2,pathnum,npaths_primary,npaths_altloc,
absmq_score,first_absmq,second_absmq,mapq_score,
chromosome_iit,queryseq,pairedlength,
donor_chrpos,/*acceptor_chrpos*/1,mate_chrpos,
@@ -3360,7 +3364,7 @@ print_exon_exon (Filestring_T fp, char *abbrev, Stage3end_T this, Stage3end_T ma
donor1,donor2,acceptor2,acceptor1,donor_prob,acceptor_prob,
/*circularp*/true);
} else {
- print_halfacceptor(fp,abbrev,acceptor,this,mate,acc1,acc2,pathnum,npaths_primary,npaths_altloc,
+ print_halfacceptor(fp,abbrev,donor,acceptor,this,mate,acc1,acc2,pathnum,npaths_primary,npaths_altloc,
absmq_score,first_absmq,second_absmq,mapq_score,
chromosome_iit,queryseq,pairedlength,
donor_chrpos,acceptor_chrpos,mate_chrpos,
@@ -3373,7 +3377,7 @@ print_exon_exon (Filestring_T fp, char *abbrev, Stage3end_T this, Stage3end_T ma
}
if (0 && (circularpos = Stage3end_circularpos(this)) > 0) {
- print_halfdonor(fp,abbrev,donor,this,mate,acc1,acc2,pathnum,npaths_primary,npaths_altloc,
+ print_halfdonor(fp,abbrev,donor,acceptor,this,mate,acc1,acc2,pathnum,npaths_primary,npaths_altloc,
absmq_score,first_absmq,second_absmq,mapq_score,
chromosome_iit,queryseq,pairedlength,
donor_chrpos,acceptor_chrpos,mate_chrpos,
@@ -3383,7 +3387,7 @@ print_exon_exon (Filestring_T fp, char *abbrev, Stage3end_T this, Stage3end_T ma
donor_sensedir,donor_strand,acceptor_strand,donor_chr,acceptor_chr,
donor1,donor2,acceptor2,acceptor1,donor_prob,acceptor_prob,
/*circularp*/true);
- print_halfdonor(fp,abbrev,donor,this,mate,acc1,acc2,pathnum,npaths_primary,npaths_altloc,
+ print_halfdonor(fp,abbrev,donor,acceptor,this,mate,acc1,acc2,pathnum,npaths_primary,npaths_altloc,
absmq_score,first_absmq,second_absmq,mapq_score,
chromosome_iit,queryseq,pairedlength,
/*donor_chrpos*/1,acceptor_chrpos,mate_chrpos,
@@ -3394,7 +3398,7 @@ print_exon_exon (Filestring_T fp, char *abbrev, Stage3end_T this, Stage3end_T ma
donor1,donor2,acceptor2,acceptor1,donor_prob,acceptor_prob,
/*circularp*/true);
} else {
- print_halfdonor(fp,abbrev,donor,this,mate,acc1,acc2,pathnum,npaths_primary,npaths_altloc,
+ print_halfdonor(fp,abbrev,donor,acceptor,this,mate,acc1,acc2,pathnum,npaths_primary,npaths_altloc,
absmq_score,first_absmq,second_absmq,mapq_score,
chromosome_iit,queryseq,pairedlength,
donor_chrpos,acceptor_chrpos,mate_chrpos,
@@ -3409,7 +3413,7 @@ print_exon_exon (Filestring_T fp, char *abbrev, Stage3end_T this, Stage3end_T ma
} else {
/* SENSE_NULL (DNA distant chimera) */
if (0 && (circularpos = Stage3end_circularpos(this)) > 0) {
- print_halfacceptor(fp,abbrev,acceptor,this,mate,acc1,acc2,pathnum,npaths_primary,npaths_altloc,
+ print_halfacceptor(fp,abbrev,donor,acceptor,this,mate,acc1,acc2,pathnum,npaths_primary,npaths_altloc,
absmq_score,first_absmq,second_absmq,mapq_score,
chromosome_iit,queryseq,pairedlength,
donor_chrpos,acceptor_chrpos,mate_chrpos,
@@ -3419,7 +3423,7 @@ print_exon_exon (Filestring_T fp, char *abbrev, Stage3end_T this, Stage3end_T ma
acceptor_sensedir,donor_strand,acceptor_strand,donor_chr,acceptor_chr,
donor1,donor2,acceptor2,acceptor1,donor_prob,acceptor_prob,
/*circularp*/true);
- print_halfacceptor(fp,abbrev,acceptor,this,mate,acc1,acc2,pathnum,npaths_primary,npaths_altloc,
+ print_halfacceptor(fp,abbrev,donor,acceptor,this,mate,acc1,acc2,pathnum,npaths_primary,npaths_altloc,
absmq_score,first_absmq,second_absmq,mapq_score,
chromosome_iit,queryseq,pairedlength,
donor_chrpos,/*acceptor_chrpos*/1,mate_chrpos,
@@ -3430,7 +3434,7 @@ print_exon_exon (Filestring_T fp, char *abbrev, Stage3end_T this, Stage3end_T ma
donor1,donor2,acceptor2,acceptor1,donor_prob,acceptor_prob,
/*circularp*/true);
} else {
- print_halfacceptor(fp,abbrev,acceptor,this,mate,acc1,acc2,pathnum,npaths_primary,npaths_altloc,
+ print_halfacceptor(fp,abbrev,donor,acceptor,this,mate,acc1,acc2,pathnum,npaths_primary,npaths_altloc,
absmq_score,first_absmq,second_absmq,mapq_score,
chromosome_iit,queryseq,pairedlength,
donor_chrpos,acceptor_chrpos,mate_chrpos,
@@ -3443,7 +3447,7 @@ print_exon_exon (Filestring_T fp, char *abbrev, Stage3end_T this, Stage3end_T ma
}
if (0 && (circularpos = Stage3end_circularpos(this)) > 0) {
- print_halfdonor(fp,abbrev,donor,this,mate,acc1,acc2,pathnum,npaths_primary,npaths_altloc,
+ print_halfdonor(fp,abbrev,donor,acceptor,this,mate,acc1,acc2,pathnum,npaths_primary,npaths_altloc,
absmq_score,first_absmq,second_absmq,mapq_score,
chromosome_iit,queryseq,pairedlength,
donor_chrpos,acceptor_chrpos,mate_chrpos,
@@ -3453,7 +3457,7 @@ print_exon_exon (Filestring_T fp, char *abbrev, Stage3end_T this, Stage3end_T ma
donor_sensedir,donor_strand,acceptor_strand,donor_chr,acceptor_chr,
donor1,donor2,acceptor2,acceptor1,donor_prob,acceptor_prob,
/*circularp*/true);
- print_halfdonor(fp,abbrev,donor,this,mate,acc1,acc2,pathnum,npaths_primary,npaths_altloc,
+ print_halfdonor(fp,abbrev,donor,acceptor,this,mate,acc1,acc2,pathnum,npaths_primary,npaths_altloc,
absmq_score,first_absmq,second_absmq,mapq_score,
chromosome_iit,queryseq,pairedlength,
/*donor_chrpos*/1,acceptor_chrpos,mate_chrpos,
@@ -3464,7 +3468,7 @@ print_exon_exon (Filestring_T fp, char *abbrev, Stage3end_T this, Stage3end_T ma
donor1,donor2,acceptor2,acceptor1,donor_prob,acceptor_prob,
/*circularp*/true);
} else {
- print_halfdonor(fp,abbrev,donor,this,mate,acc1,acc2,pathnum,npaths_primary,npaths_altloc,
+ print_halfdonor(fp,abbrev,donor,acceptor,this,mate,acc1,acc2,pathnum,npaths_primary,npaths_altloc,
absmq_score,first_absmq,second_absmq,mapq_score,
chromosome_iit,queryseq,pairedlength,
donor_chrpos,acceptor_chrpos,mate_chrpos,
diff --git a/src/sarray-read.c b/src/sarray-read.c
index bc01892..2062400 100644
--- a/src/sarray-read.c
+++ b/src/sarray-read.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: sarray-read.c 195552 2016-08-02 17:31:06Z twu $";
+static char rcsid[] = "$Id: sarray-read.c 195763 2016-08-04 01:37:20Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
diff --git a/src/sedgesort.c b/src/sedgesort.c
index 5fde4cc..3d7da76 100644
--- a/src/sedgesort.c
+++ b/src/sedgesort.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: sedgesort.c 193883 2016-07-12 03:14:35Z twu $";
+static char rcsid[] = "$Id: sedgesort.c 195760 2016-08-04 00:12:04Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
diff --git a/src/sedgesort.h b/src/sedgesort.h
index 28512a3..39167de 100644
--- a/src/sedgesort.h
+++ b/src/sedgesort.h
@@ -1,4 +1,4 @@
-/* $Id: sedgesort.h 193883 2016-07-12 03:14:35Z twu $ */
+/* $Id: sedgesort.h 195760 2016-08-04 00:12:04Z twu $ */
#ifndef SEDGESORT_INCLUDED
#define SEDGESORT_INCLUDED
diff --git a/src/shortread.c b/src/shortread.c
index 66d6389..f434d24 100644
--- a/src/shortread.c
+++ b/src/shortread.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: shortread.c 195492 2016-08-02 02:00:54Z twu $";
+static char rcsid[] = "$Id: shortread.c 195760 2016-08-04 00:12:04Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
diff --git a/src/splice.c b/src/splice.c
index 5605e64..2755863 100644
--- a/src/splice.c
+++ b/src/splice.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: splice.c 193235 2016-06-30 22:34:59Z twu $";
+static char rcsid[] = "$Id: splice.c 195753 2016-08-03 23:44:46Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -1051,21 +1051,25 @@ Splice_solve_single_sense (int *found_score, int *nhits, List_T hits, List_T *lo
best_prob,best_splice_pos,best_donor_splicecoord,best_acceptor_splicecoord));
if (orig_plusp == true) {
/* Originally from plus strand. No complement. */
+
sensedir = (plusp == true) ? SENSE_FORWARD : SENSE_ANTI;
+ assert(plusp == true);
assert(sensedir == SENSE_FORWARD);
donor = Substring_new_donor(best_donor_splicecoord,best_donor_knowni,
- best_splice_pos,best_segmenti_nmismatches,
+ best_splice_pos,/*substring_querystart*/0,/*substring_queryend*/querylength,
+ best_segmenti_nmismatches,
best_donor_prob,/*left*/segmenti_left,query_compress,
- querylength,plusp,genestrand,sensedir,
+ querylength,/*plusp*/true,genestrand,/*sensedir*/SENSE_FORWARD,
segmenti_chrnum,segmenti_chroffset,segmenti_chrhigh,segmenti_chrlength);
acceptor = Substring_new_acceptor(best_acceptor_splicecoord,best_acceptor_knowni,
- best_splice_pos,best_segmentj_nmismatches,
+ best_splice_pos,/*substring_querystart*/0,/*substring_queryend*/querylength,
+ best_segmentj_nmismatches,
best_acceptor_prob,/*left*/segmentj_left,query_compress,
- querylength,plusp,genestrand,sensedir,
+ querylength,/*plusp*/true,genestrand,/*sensedir*/SENSE_FORWARD,
segmentj_chrnum,segmentj_chroffset,segmentj_chrhigh,segmentj_chrlength);
-
+
if (donor == NULL || acceptor == NULL) {
if (donor != NULL) Substring_free(&donor);
if (acceptor != NULL) Substring_free(&acceptor);
@@ -1120,18 +1124,21 @@ Splice_solve_single_sense (int *found_score, int *nhits, List_T hits, List_T *lo
} else {
/* Originally from minus strand. Complement. */
sensedir = (plusp == true) ? SENSE_ANTI : SENSE_FORWARD;
+ assert(plusp == false);
assert(sensedir == SENSE_FORWARD);
donor = Substring_new_donor(best_donor_splicecoord,best_donor_knowni,
- best_splice_pos,best_segmentj_nmismatches,
+ best_splice_pos,/*substring_querystart*/0,/*substring_queryend*/querylength,
+ best_segmentj_nmismatches,
best_donor_prob,/*left*/segmentj_left,query_compress,
- querylength,plusp,genestrand,sensedir,
+ querylength,/*plusp*/false,genestrand,/*sensedir*/SENSE_FORWARD,
segmentj_chrnum,segmentj_chroffset,segmentj_chrhigh,segmentj_chrlength);
acceptor = Substring_new_acceptor(best_acceptor_splicecoord,best_acceptor_knowni,
- best_splice_pos,best_segmenti_nmismatches,
+ best_splice_pos,/*substring_querystart*/0,/*substring_queryend*/querylength,
+ best_segmenti_nmismatches,
best_acceptor_prob,/*left*/segmenti_left,query_compress,
- querylength,plusp,genestrand,sensedir,
+ querylength,/*plusp*/false,genestrand,/*sensedir*/SENSE_FORWARD,
segmenti_chrnum,segmenti_chroffset,segmenti_chrhigh,segmenti_chrlength);
if (donor == NULL || acceptor == NULL) {
@@ -1508,18 +1515,21 @@ Splice_solve_single_antisense (int *found_score, int *nhits, List_T hits, List_T
if (orig_plusp == true) {
/* Originally from plus strand. No complement. */
sensedir = (plusp == true) ? SENSE_FORWARD : SENSE_ANTI;
+ assert(plusp == false);
assert(sensedir == SENSE_ANTI);
donor = Substring_new_donor(best_donor_splicecoord,best_donor_knowni,
- best_splice_pos,best_segmenti_nmismatches,
+ best_splice_pos,/*substring_querystart*/0,/*substring_queryend*/querylength,
+ best_segmenti_nmismatches,
best_donor_prob,/*left*/segmenti_left,query_compress,
- querylength,plusp,genestrand,sensedir,
+ querylength,/*plusp*/false,genestrand,/*sensedir*/SENSE_ANTI,
segmenti_chrnum,segmenti_chroffset,segmenti_chrhigh,segmenti_chrlength);
acceptor = Substring_new_acceptor(best_acceptor_splicecoord,best_acceptor_knowni,
- best_splice_pos,best_segmentj_nmismatches,
+ best_splice_pos,/*substring_querystart*/0,/*substring_queryend*/querylength,
+ best_segmentj_nmismatches,
best_acceptor_prob,/*left*/segmentj_left,query_compress,
- querylength,plusp,genestrand,sensedir,
+ querylength,/*plusp*/false,genestrand,/*sensedir*/SENSE_ANTI,
segmentj_chrnum,segmentj_chroffset,segmentj_chrhigh,segmentj_chrlength);
if (donor == NULL || acceptor == NULL) {
@@ -1576,18 +1586,21 @@ Splice_solve_single_antisense (int *found_score, int *nhits, List_T hits, List_T
} else {
/* Originally from minus strand. Complement. */
sensedir = (plusp == true) ? SENSE_ANTI : SENSE_FORWARD;
+ assert(plusp == true);
assert(sensedir == SENSE_ANTI);
donor = Substring_new_donor(best_donor_splicecoord,best_donor_knowni,
- best_splice_pos,best_segmentj_nmismatches,
+ best_splice_pos,/*substring_querystart*/0,/*substring_queryend*/querylength,
+ best_segmentj_nmismatches,
best_donor_prob,/*left*/segmentj_left,query_compress,
- querylength,plusp,genestrand,sensedir,
+ querylength,/*plusp*/true,genestrand,/*sensedir*/SENSE_ANTI,
segmentj_chrnum,segmentj_chroffset,segmentj_chrhigh,segmentj_chrlength);
acceptor = Substring_new_acceptor(best_acceptor_splicecoord,best_acceptor_knowni,
- best_splice_pos,best_segmenti_nmismatches,
+ best_splice_pos,/*substring_querystart*/0,/*substring_queryend*/querylength,
+ best_segmenti_nmismatches,
best_acceptor_prob,/*left*/segmenti_left,query_compress,
- querylength,plusp,genestrand,sensedir,
+ querylength,/*plusp*/true,genestrand,/*sensedir*/SENSE_ANTI,
segmenti_chrnum,segmenti_chroffset,segmenti_chrhigh,segmenti_chrlength);
if (donor == NULL || acceptor == NULL) {
@@ -2120,7 +2133,8 @@ Splice_solve_double (int *found_score, int *nhits, List_T hits, List_T *lowprob,
sensedir = (plusp == true) ? SENSE_FORWARD : SENSE_ANTI;
donor = Substring_new_donor(best_donor1_splicecoord,best_donor1_knowni,
- best_splice_pos_1,best_segmenti_nmismatches,
+ best_splice_pos_1,/*substring_querystart*/0,/*substring_queryend*/querylength,
+ best_segmenti_nmismatches,
best_donor1_prob,/*left*/segmenti_left,query_compress,
querylength,plusp,genestrand,sensedir,
segmenti_chrnum,segmenti_chroffset,segmenti_chrhigh,segmenti_chrlength);
@@ -2135,7 +2149,8 @@ Splice_solve_double (int *found_score, int *nhits, List_T hits, List_T *lowprob,
segmentm_chrnum,segmentm_chroffset,segmentm_chrhigh,segmentm_chrlength);
acceptor = Substring_new_acceptor(best_acceptor2_splicecoord,best_acceptor2_knowni,
- best_splice_pos_2,best_segmentj_nmismatches,
+ best_splice_pos_2,/*substring_querystart*/0,/*substring_queryend*/querylength,
+ best_segmentj_nmismatches,
best_acceptor2_prob,/*left*/segmentj_left,query_compress,
querylength,plusp,genestrand,sensedir,
segmentj_chrnum,segmentj_chroffset,segmentj_chrhigh,segmentj_chrlength);
@@ -2199,7 +2214,8 @@ Splice_solve_double (int *found_score, int *nhits, List_T hits, List_T *lowprob,
sensedir = (plusp == true) ? SENSE_ANTI : SENSE_FORWARD;
donor = Substring_new_donor(best_donor2_splicecoord,best_donor2_knowni,
- best_splice_pos_2,best_segmentj_nmismatches,
+ best_splice_pos_2,/*substring_querystart*/0,/*substring_queryend*/querylength,
+ best_segmentj_nmismatches,
best_donor2_prob,/*left*/segmentj_left,query_compress,
querylength,plusp,genestrand,sensedir,
segmentj_chrnum,segmentj_chroffset,segmentj_chrhigh,segmentj_chrlength);
@@ -2213,7 +2229,8 @@ Splice_solve_double (int *found_score, int *nhits, List_T hits, List_T *lowprob,
segmentm_chrnum,segmentm_chroffset,segmentm_chrhigh,segmentm_chrlength);
acceptor = Substring_new_acceptor(best_acceptor1_splicecoord,best_acceptor1_knowni,
- best_splice_pos_1,best_segmenti_nmismatches,
+ best_splice_pos_1,/*substring_querystart*/0,/*substring_queryend*/querylength,
+ best_segmenti_nmismatches,
best_acceptor1_prob,/*left*/segmenti_left,query_compress,
querylength,plusp,genestrand,sensedir,
segmenti_chrnum,segmenti_chroffset,segmenti_chrhigh,segmenti_chrlength);
diff --git a/src/stage1hr.c b/src/stage1hr.c
index ae138e9..665fc5a 100644
--- a/src/stage1hr.c
+++ b/src/stage1hr.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: stage1hr.c 193899 2016-07-12 04:41:34Z twu $";
+static char rcsid[] = "$Id: stage1hr.c 195972 2016-08-08 17:11:50Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -446,6 +446,8 @@ struct Segment_T {
int rightmost; /* For segmentj of local splice */
bool left_splice_p; /* Set by find_singlesplices, used by find_doublesplices for speed */
bool right_splice_p; /* Set by find_singlesplices, used by find_doublesplices for speed */
+ bool spliceable_low_p;
+ bool spliceable_high_p;
bool usedp;
bool pairablep;
@@ -4127,6 +4129,7 @@ identify_all_segments (int *nsegments, Segment_T **anchor_segments, int *nanchor
Segment_T ptr, ptr_chrstart;
Segment_T *ptr_spliceable;
+ bool last_spliceable_p = false;
/* bool next_spliceable_p; */
#ifdef DEBUG19
Segment_T ptr0;
@@ -4557,6 +4560,7 @@ identify_all_segments (int *nsegments, Segment_T **anchor_segments, int *nanchor
ptr->floor_right = floor_right;
ptr->leftmost = ptr->rightmost = -1;
ptr->left_splice_p = ptr->right_splice_p = false;
+ ptr->spliceable_low_p = last_spliceable_p;
#if 0
ptr->leftspan = ptr->rightspan = -1;
#endif
@@ -4572,6 +4576,7 @@ identify_all_segments (int *nsegments, Segment_T **anchor_segments, int *nanchor
/* Not spliceable */
} else if (diagonal <= last_diagonal + max_distance) {
*ptr_spliceable++ = ptr;
+ ptr->spliceable_high_p = last_spliceable_p = true;
}
} else {
/* For minus-strand splicing, require segmenti->querypos5 > segmentj->querypos3,
@@ -4580,23 +4585,25 @@ identify_all_segments (int *nsegments, Segment_T **anchor_segments, int *nanchor
/* Not spliceable */
} else if (diagonal <= last_diagonal + max_distance) {
*ptr_spliceable++ = ptr;
+ ptr->spliceable_high_p = last_spliceable_p = true;
}
}
#endif
if (diagonal <= last_diagonal + max_distance) {
*ptr_spliceable++ = ptr;
+ ptr->spliceable_high_p = last_spliceable_p = true;
debug4s(printf("%s diagonal %u is spliceable because next one is at %u\n",
plusp ? "plus" : "minus",last_diagonal,diagonal));
} else {
+ ptr->spliceable_high_p = last_spliceable_p = false;
debug4s(printf("%s diagonal %u is not spliceable because next one is at %u\n",
plusp ? "plus" : "minus",last_diagonal,diagonal));
}
debug14(printf("Saving segment at %u (%u), query %d..%d",last_diagonal,last_diagonal-chroffset,ptr->querypos5,ptr->querypos3));
*ptr_all++ = ptr;
if (last_querypos >= first_querypos + /*min_segment_length*/1) {
-
- *ptr_anchor++ = ptr;
debug14(printf(" ANCHOR"));
+ *ptr_anchor++ = ptr;
}
debug14(printf("\n"));
ptr++;
@@ -4837,6 +4844,7 @@ identify_all_segments (int *nsegments, Segment_T **anchor_segments, int *nanchor
ptr->floor_right = floor_right;
ptr->leftmost = ptr->rightmost = -1;
ptr->left_splice_p = ptr->right_splice_p = false;
+ ptr->spliceable_low_p = last_spliceable_p;
#if 0
ptr->leftspan = ptr->rightspan = -1;
#endif
@@ -4961,7 +4969,8 @@ identify_all_segments (int *nsegments, Segment_T **anchor_segments, int *nanchor
printf("%d selected anchor segments\n",*nanchors);
for (p = &(*anchor_segments)[0]; p< &((*anchor_segments)[*nanchors]); p++) {
segment = (Segment_T) *p;
- printf("%u %d..%d\n",segment->diagonal,segment->querypos5,segment->querypos3);
+ printf("%u %d..%d spliceable_low:%d spliceable_high:%d\n",
+ segment->diagonal,segment->querypos5,segment->querypos3,segment->spliceable_low_p,segment->spliceable_high_p);
}
#endif
@@ -10067,6 +10076,8 @@ find_doublesplices (int *found_score, List_T hits, List_T *lowprob,
if (shortexon_orig_plusp == true) {
debug4k(printf("Short exon candidate, orig_plusp. Saw short exon acceptor...donor on segment i\n"));
sensedir = (plusp == true) ? SENSE_FORWARD : SENSE_ANTI;
+ assert(plusp == true);
+ assert(sensedir == SENSE_FORWARD);
for (j1 = joffset; j1 < j; j1++) {
if (splicetypes[j1] == ACCEPTOR) {
@@ -10075,7 +10086,8 @@ find_doublesplices (int *found_score, List_T hits, List_T *lowprob,
if ((splicesites_i_left =
Splicetrie_find_left(&nmismatches_shortexon_left,&nmismatches_list_left,j1,
/*origleft*/segmentm_left,/*pos5*/0,/*pos3*/leftpos,segmentm->chroffset,
- query_compress,queryptr,querylength,max_mismatches_allowed,plusp,genestrand,first_read_p,
+ query_compress,queryptr,querylength,max_mismatches_allowed,/*plusp*/true,
+ genestrand,first_read_p,
/*collect_all_p*/pairedp == true && first_read_p != plusp)) != NULL) {
ambp_left = (leftpos < min_shortend || Intlist_length(splicesites_i_left) > 1) ? true : false;
@@ -10091,7 +10103,7 @@ find_doublesplices (int *found_score, List_T hits, List_T *lowprob,
/*origleft*/segmentm_left,/*pos5*/rightpos,/*pos3*/querylength,segmentm->chrhigh,
query_compress,queryptr,
max_mismatches_allowed - nmismatches_shortexon_left - nmismatches_shortexon_middle,
- plusp,genestrand,first_read_p,
+ /*plusp*/true,genestrand,first_read_p,
/*collect_all_p*/pairedp == true && first_read_p == plusp)) != NULL) {
ambp_right = (querylength - rightpos < min_shortend || Intlist_length(splicesites_i_right) > 1) ? true : false;
@@ -10107,7 +10119,7 @@ find_doublesplices (int *found_score, List_T hits, List_T *lowprob,
nmismatches_shortexon_middle,
/*acceptor_prob*/2.0,/*donor_prob*/2.0,
/*left*/segmentm_left,query_compress,
- querylength,plusp,genestrand,
+ querylength,/*plusp*/true,genestrand,
sensedir,/*acceptor_ambp*/true,/*donor_ambp*/true,
segmentm->chrnum,segmentm->chroffset,segmentm->chrhigh,segmentm->chrlength);
if (shortexon != NULL) {
@@ -10150,15 +10162,17 @@ find_doublesplices (int *found_score, List_T hits, List_T *lowprob,
nmismatches_shortexon_middle,
/*acceptor_prob*/2.0,/*donor_prob*/2.0,
/*left*/segmentm_left,query_compress,
- querylength,plusp,genestrand,
+ querylength,/*plusp*/true,genestrand,
sensedir,/*acceptor_ambp*/true,/*donor_ambp*/false,
segmentm->chrnum,segmentm->chroffset,segmentm->chrhigh,segmentm->chrlength);
debug4k(printf("acceptor at %d (%llu)\n",best_right_j,(unsigned long long) splicesites[best_right_j]));
acceptor = Substring_new_acceptor(/*acceptor_coord*/splicesites[best_right_j],/*acceptor_knowni*/best_right_j,
- /*splice_pos*/rightpos,nmismatches_shortexon_right,
+ /*splice_pos*/rightpos,/*substring_querystart*/0,/*substring_queryend*/querylength,
+ nmismatches_shortexon_right,
/*prob*/2.0,/*left*/splicesites[best_right_j]-rightpos,
- query_compress,querylength,plusp,genestrand,sensedir,segmentm->chrnum,
+ query_compress,querylength,/*plusp*/true,genestrand,
+ /*sensedir*/SENSE_FORWARD,segmentm->chrnum,
segmentm->chroffset,segmentm->chrhigh,segmentm->chrlength);
if (shortexon == NULL || acceptor == NULL) {
@@ -10194,9 +10208,11 @@ find_doublesplices (int *found_score, List_T hits, List_T *lowprob,
debug4k(printf("donor at %d (%llu)\n",best_left_j,(unsigned long long) splicesites[best_left_j]));
donor = Substring_new_donor(/*donor_coord*/splicesites[best_left_j],/*donor_knowni*/best_left_j,
- /*splice_pos*/leftpos,nmismatches_shortexon_left,
+ /*splice_pos*/leftpos,/*substring_querystart*/0,/*substring_queryend*/querylength,
+ nmismatches_shortexon_left,
/*prob*/2.0,/*left*/splicesites[best_left_j]-leftpos,
- query_compress,querylength,plusp,genestrand,sensedir,segmentm->chrnum,
+ query_compress,querylength,/*plusp*/true,genestrand,
+ /*sensedir*/SENSE_FORWARD,segmentm->chrnum,
segmentm->chroffset,segmentm->chrhigh,segmentm->chrlength);
debug4k(printf("shortexon with acceptor at %d (%llu) ... amb_donor %d (%llu)\n",
@@ -10207,8 +10223,8 @@ find_doublesplices (int *found_score, List_T hits, List_T *lowprob,
nmismatches_shortexon_middle,
/*acceptor_prob*/2.0,/*donor_prob*/2.0,
/*left*/segmentm_left,query_compress,
- querylength,plusp,genestrand,
- sensedir,/*acceptor_ambp*/false,/*donor_ambp*/true,
+ querylength,/*plusp*/true,genestrand,
+ /*sensedir*/SENSE_FORWARD,/*acceptor_ambp*/false,/*donor_ambp*/true,
segmentm->chrnum,segmentm->chroffset,segmentm->chrhigh,segmentm->chrlength);
if (donor == NULL || shortexon == NULL) {
@@ -10242,9 +10258,11 @@ find_doublesplices (int *found_score, List_T hits, List_T *lowprob,
best_left_j = Intlist_head(splicesites_i_left);
best_right_j = Intlist_head(splicesites_i_right);
donor = Substring_new_donor(/*donor_coord*/splicesites[best_left_j],/*donor_knowni*/best_left_j,
- /*splice_pos*/leftpos,nmismatches_shortexon_left,
+ /*splice_pos*/leftpos,/*substring_querystart*/0,/*substring_queryend*/querylength,
+ nmismatches_shortexon_left,
/*prob*/2.0,/*left*/splicesites[best_left_j]-leftpos,
- query_compress,querylength,plusp,genestrand,sensedir,segmentm->chrnum,
+ query_compress,querylength,/*plusp*/true,genestrand,
+ /*sensedir*/SENSE_FORWARD,segmentm->chrnum,
segmentm->chroffset,segmentm->chrhigh,segmentm->chrlength);
shortexon = Substring_new_shortexon(/*acceptor_coord*/splicesites[j1],/*acceptor_knowni*/j1,
@@ -10252,14 +10270,16 @@ find_doublesplices (int *found_score, List_T hits, List_T *lowprob,
/*acceptor_pos*/leftpos,/*donor_pos*/rightpos,
nmismatches_shortexon_middle,/*acceptor_prob*/2.0,/*donor_prob*/2.0,
/*left*/segmentm_left,query_compress,
- querylength,plusp,genestrand,
+ querylength,/*plusp*/true,genestrand,
sensedir,/*acceptor_ambp*/false,/*donor_ambp*/false,
segmentm->chrnum,segmentm->chroffset,segmentm->chrhigh,segmentm->chrlength);
acceptor = Substring_new_acceptor(/*acceptor_coord*/splicesites[best_right_j],/*acceptor_knowni*/best_right_j,
- /*splice_pos*/rightpos,nmismatches_shortexon_right,
+ /*splice_pos*/rightpos,/*substring_querystart*/0,/*substring_queryend*/querylength,
+ nmismatches_shortexon_right,
/*prob*/2.0,/*left*/splicesites[best_right_j]-rightpos,
- query_compress,querylength,plusp,genestrand,sensedir,segmentm->chrnum,
+ query_compress,querylength,/*plusp*/true,genestrand,
+ /*sensedir*/SENSE_FORWARD,segmentm->chrnum,
segmentm->chroffset,segmentm->chrhigh,segmentm->chrlength);
if (donor == NULL || shortexon == NULL || acceptor == NULL) {
@@ -10298,6 +10318,8 @@ find_doublesplices (int *found_score, List_T hits, List_T *lowprob,
if (shortexon_orig_minusp == true) {
debug4k(printf("Short exon candidate, orig_minusp. Saw short exon antidonor...antiacceptor on segment i\n"));
sensedir = (plusp == true) ? SENSE_ANTI : SENSE_FORWARD;
+ assert(plusp == false);
+ assert(sensedir == SENSE_ANTI);
for (j1 = joffset; j1 < j; j1++) {
if (splicetypes[j1] == ANTIDONOR) {
@@ -10307,7 +10329,7 @@ find_doublesplices (int *found_score, List_T hits, List_T *lowprob,
Splicetrie_find_left(&nmismatches_shortexon_left,&nmismatches_list_left,j1,
/*origleft*/segmentm_left,/*pos5*/0,/*pos3*/leftpos,segmentm->chroffset,
query_compress,queryptr,querylength,max_mismatches_allowed,
- plusp,genestrand,first_read_p,
+ /*plusp*/false,genestrand,first_read_p,
/*collect_all_p*/pairedp == true && first_read_p != plusp)) != NULL) {
ambp_left = (leftpos < min_shortend || Intlist_length(splicesites_i_left) > 1) ? true : false;
@@ -10317,13 +10339,13 @@ find_doublesplices (int *found_score, List_T hits, List_T *lowprob,
debug4k(printf(" Doing Splicetrie_find_right from rightpos %d (minus)\n",rightpos));
if ((nmismatches_shortexon_middle =
Genome_count_mismatches_substring(query_compress,segmentm_left,/*pos5*/leftpos,/*pos3*/rightpos,
- plusp,genestrand)) <= max_mismatches_allowed - nmismatches_shortexon_left &&
+ /*plusp*/false,genestrand)) <= max_mismatches_allowed - nmismatches_shortexon_left &&
(splicesites_i_right =
Splicetrie_find_right(&nmismatches_shortexon_right,&nmismatches_list_right,j2,
/*origleft*/segmentm_left,/*pos5*/rightpos,/*pos3*/querylength,segmentm->chrhigh,
query_compress,queryptr,
max_mismatches_allowed - nmismatches_shortexon_left - nmismatches_shortexon_middle,
- plusp,genestrand,first_read_p,
+ /*plusp*/false,genestrand,first_read_p,
/*collect_all_p*/pairedp == true && first_read_p == plusp)) != NULL) {
ambp_right = (querylength - rightpos < min_shortend || Intlist_length(splicesites_i_right) > 1) ? true : false;
@@ -10338,7 +10360,7 @@ find_doublesplices (int *found_score, List_T hits, List_T *lowprob,
/*acceptor_pos*/rightpos,/*donor_pos*/leftpos,nmismatches_shortexon_middle,
/*acceptor_prob*/2.0,/*donor_prob*/2.0,
/*left*/segmentm_left,query_compress,
- querylength,plusp,genestrand,
+ querylength,/*plusp*/false,genestrand,
sensedir,/*acceptor_ambp*/true,/*donor_ambp*/true,
segmentm->chrnum,segmentm->chroffset,segmentm->chrhigh,segmentm->chrlength);
if (shortexon != NULL) {
@@ -10380,15 +10402,17 @@ find_doublesplices (int *found_score, List_T hits, List_T *lowprob,
/*acceptor_pos*/rightpos,/*donor_pos*/leftpos,nmismatches_shortexon_middle,
/*acceptor_prob*/2.0,/*donor_prob*/2.0,
/*left*/segmentm_left,query_compress,
- querylength,plusp,genestrand,
+ querylength,/*plusp*/false,genestrand,
sensedir,/*acceptor_ambp*/false,/*donor_ambp*/true,
segmentm->chrnum,segmentm->chroffset,segmentm->chrhigh,segmentm->chrlength);
debug4k(printf("donor at %d (%llu)\n",best_right_j,(unsigned long long) splicesites[best_right_j]));
donor = Substring_new_donor(/*donor_coord*/splicesites[best_right_j],/*donor_knowni*/best_right_j,
- /*splice_pos*/rightpos,nmismatches_shortexon_right,
+ /*splice_pos*/rightpos,/*substring_querystart*/0,/*substring_queryend*/querylength,
+ nmismatches_shortexon_right,
/*prob*/2.0,/*left*/splicesites[best_right_j]-rightpos,
- query_compress,querylength,plusp,genestrand,sensedir,segmentm->chrnum,
+ query_compress,querylength,/*plusp*/false,genestrand,
+ /*sensedir*/SENSE_ANTI,segmentm->chrnum,
segmentm->chroffset,segmentm->chrhigh,segmentm->chrlength);
if (donor == NULL || shortexon == NULL) {
@@ -10422,9 +10446,11 @@ find_doublesplices (int *found_score, List_T hits, List_T *lowprob,
debug4k(printf("acceptor at %d (%llu)\n",best_left_j,(unsigned long long) splicesites[best_left_j]));
acceptor = Substring_new_acceptor(/*acceptor_coord*/splicesites[best_left_j],/*acceptor_knowni*/best_left_j,
- /*splice_pos*/leftpos,nmismatches_shortexon_left,
+ /*splice_pos*/leftpos,/*substring_querystart*/0,/*substring_queryend*/querylength,
+ nmismatches_shortexon_left,
/*prob*/2.0,/*left*/splicesites[best_left_j]-leftpos,
- query_compress,querylength,plusp,genestrand,sensedir,segmentm->chrnum,
+ query_compress,querylength,/*plusp*/false,genestrand,
+ /*sensedir*/SENSE_ANTI,segmentm->chrnum,
segmentm->chroffset,segmentm->chrhigh,segmentm->chrlength);
debug4k(printf("shortexon with donor at %d (%llu) ... amb_acceptor at %d (%llu)\n",
@@ -10434,7 +10460,7 @@ find_doublesplices (int *found_score, List_T hits, List_T *lowprob,
/*acceptor_pos*/rightpos,/*donor_pos*/leftpos,nmismatches_shortexon_middle,
/*acceptor_prob*/2.0,/*donor_prob*/2.0,
/*left*/segmentm_left,query_compress,
- querylength,plusp,genestrand,
+ querylength,/*plusp*/false,genestrand,
sensedir,/*acceptor_ambp*/true,/*donor_ambp*/false,
segmentm->chrnum,segmentm->chroffset,segmentm->chrhigh,segmentm->chrlength);
@@ -10469,9 +10495,11 @@ find_doublesplices (int *found_score, List_T hits, List_T *lowprob,
best_left_j = Intlist_head(splicesites_i_left);
best_right_j = Intlist_head(splicesites_i_right);
acceptor = Substring_new_acceptor(/*acceptor_coord*/splicesites[best_left_j],/*acceptor_knowni*/best_left_j,
- /*splice_pos*/leftpos,nmismatches_shortexon_left,
+ /*splice_pos*/leftpos,/*substring_querystart*/0,/*substring_queryend*/querylength,
+ nmismatches_shortexon_left,
/*prob*/2.0,/*left*/splicesites[best_left_j]-leftpos,
- query_compress,querylength,plusp,genestrand,sensedir,segmentm->chrnum,
+ query_compress,querylength,/*plusp*/false,genestrand,
+ /*sensedir*/SENSE_ANTI,segmentm->chrnum,
segmentm->chroffset,segmentm->chrhigh,segmentm->chrlength);
shortexon = Substring_new_shortexon(/*acceptor_coord*/splicesites[j2],/*acceptor_knowni*/j2,
@@ -10479,14 +10507,16 @@ find_doublesplices (int *found_score, List_T hits, List_T *lowprob,
/*acceptor_pos*/rightpos,/*donor_pos*/leftpos,
nmismatches_shortexon_middle,/*acceptor_prob*/2.0,/*donor_prob*/2.0,
/*left*/segmentm_left,query_compress,
- querylength,plusp,genestrand,
+ querylength,/*plusp*/false,genestrand,
sensedir,/*acceptor_ambp*/false,/*donor_ambp*/false,
segmentm->chrnum,segmentm->chroffset,segmentm->chrhigh,segmentm->chrlength);
donor = Substring_new_donor(/*donor_coord*/splicesites[best_right_j],/*donor_knowni*/best_right_j,
- /*splice_pos*/rightpos,nmismatches_shortexon_right,
+ /*splice_pos*/rightpos,/*substring_querystart*/0,/*substring_queryend*/querylength,
+ nmismatches_shortexon_right,
/*prob*/2.0,/*left*/splicesites[best_right_j]-rightpos,
- query_compress,querylength,plusp,genestrand,sensedir,segmentm->chrnum,
+ query_compress,querylength,/*plusp*/false,genestrand,
+ /*sensedir*/SENSE_ANTI,segmentm->chrnum,
segmentm->chroffset,segmentm->chrhigh,segmentm->chrlength);
if (acceptor == NULL || shortexon == NULL || donor == NULL) {
@@ -10565,7 +10595,7 @@ find_spliceends_shortend (List_T **shortend_donors, List_T **shortend_antidonors
int i;
#endif
- debug4e(printf("Entering find_spliceends_shortend with %d segments\n",nsegments));
+ debug4e(printf("Entering find_spliceends_shortend with %d anchor segments\n",nanchors));
if (floors != NULL) {
floors_from_neg3 = floors->scorefrom[-index1interval];
@@ -10613,7 +10643,6 @@ find_spliceends_shortend (List_T **shortend_donors, List_T **shortend_antidonors
} else if ((splice_pos_end = mismatch_positions[nmismatches_left-1]) > querylength - 1) {
splice_pos_end = querylength - 1;
}
- splice_pos_end = querylength - 1;
debug4e(printf("Search for splice sites from %d up (%llu) to %d (%llu)\n",
splice_pos_start,(unsigned long long) segment_left+splice_pos_start,
@@ -10648,8 +10677,9 @@ find_spliceends_shortend (List_T **shortend_donors, List_T **shortend_antidonors
segment_left,(unsigned long long) splice_pos,nmismatches,splice_pos_end));
sensedir = (plusp == true) ? SENSE_FORWARD : SENSE_ANTI;
- if ((hit = Substring_new_donor(/*donor_coord*/splicesites[j],/*donor_knowni*/j,splice_pos,nmismatches,
- /*prob*/2.0,/*left*/segment_left,query_compress,
+ if ((hit = Substring_new_donor(/*donor_coord*/splicesites[j],/*donor_knowni*/j,
+ splice_pos,/*substring_querystart*/0,/*substring_queryend*/querylength,
+ nmismatches,/*prob*/2.0,/*left*/segment_left,query_compress,
querylength,plusp,genestrand,
sensedir,segment->chrnum,segment->chroffset,
segment->chrhigh,segment->chrlength)) != NULL) {
@@ -10665,7 +10695,8 @@ find_spliceends_shortend (List_T **shortend_donors, List_T **shortend_antidonors
sensedir = (plusp == true) ? SENSE_ANTI : SENSE_FORWARD;
if ((hit = Substring_new_acceptor(/*acceptor_coord*/splicesites[j],/*acceptor_knowni*/j,
- splice_pos,nmismatches,/*prob*/2.0,/*left*/segment_left,query_compress,
+ splice_pos,/*substring_querystart*/0,/*substring_queryend*/querylength,
+ nmismatches,/*prob*/2.0,/*left*/segment_left,query_compress,
querylength,plusp,genestrand,
sensedir,segment->chrnum,segment->chroffset,
segment->chrhigh,segment->chrlength)) != NULL) {
@@ -10700,6 +10731,7 @@ find_spliceends_shortend (List_T **shortend_donors, List_T **shortend_antidonors
} else if ((splice_pos_start = mismatch_positions[nmismatches_right-1]) < 1) {
splice_pos_start = 1;
}
+ /* splice_pos_start = 1; */
debug4e(printf("Search for splice sites from %d (%llu) down to %d (%llu)\n",
splice_pos_end,(unsigned long long) segment_left+splice_pos_end,
@@ -10735,7 +10767,8 @@ find_spliceends_shortend (List_T **shortend_donors, List_T **shortend_antidonors
sensedir = (plusp == true) ? SENSE_FORWARD : SENSE_ANTI;
if ((hit = Substring_new_acceptor(/*acceptor_coord*/splicesites[j],/*acceptor_knowni*/j,
- splice_pos,nmismatches,/*prob*/2.0,/*left*/segment_left,query_compress,
+ splice_pos,/*substring_querystart*/0,/*substring_queryend*/querylength,
+ nmismatches,/*prob*/2.0,/*left*/segment_left,query_compress,
querylength,plusp,genestrand,
sensedir,segment->chrnum,segment->chroffset,
segment->chrhigh,segment->chrlength)) != NULL) {
@@ -10750,8 +10783,9 @@ find_spliceends_shortend (List_T **shortend_donors, List_T **shortend_antidonors
(unsigned long long) segment_left,splice_pos,nmismatches,splice_pos_start));
sensedir = (plusp == true) ? SENSE_ANTI : SENSE_FORWARD;
- if ((hit = Substring_new_donor(/*donor_coord*/splicesites[j],/*donor_knowni*/j,splice_pos,nmismatches,
- /*prob*/2.0,/*left*/segment_left,query_compress,
+ if ((hit = Substring_new_donor(/*donor_coord*/splicesites[j],/*donor_knowni*/j,
+ splice_pos,/*substring_querystart*/0,/*substring_queryend*/querylength,
+ nmismatches,/*prob*/2.0,/*left*/segment_left,query_compress,
querylength,plusp,genestrand,
sensedir,segment->chrnum,segment->chroffset,
segment->chrhigh,segment->chrlength)) != NULL) {
@@ -10780,6 +10814,7 @@ find_spliceends_distant_dna_plus (List_T **distant_startfrags, List_T **distant_
int max_mismatches_allowed, int genestrand) {
#ifdef DEBUG4E
char *gbuffer;
+ int i;
#endif
Segment_T segment, *p;
@@ -10800,7 +10835,7 @@ find_spliceends_distant_dna_plus (List_T **distant_startfrags, List_T **distant_
#endif
- debug4e(printf("Entering find_spliceends_distant_dna with %d segments\n",nsegments));
+ debug4e(printf("Entering find_spliceends_distant_dna with %d anchor segments\n",nanchors));
if (floors != NULL) {
floors_from_neg3 = floors->scorefrom[-index1interval];
@@ -10811,7 +10846,7 @@ find_spliceends_distant_dna_plus (List_T **distant_startfrags, List_T **distant_
assert(segment->diagonal != (Univcoord_T) -1);
segment_left = segment->diagonal - querylength; /* FORMULA: Corresponds to querypos 0 */
- debug4e(printf("find_spliceends: Checking up to %d mismatches at diagonal %llu (querypos %d..%d) - querylength %d = %llu, floors %d and %d\n",
+ debug4e(printf("find_spliceends_distant_dna: Checking up to %d mismatches at diagonal %llu (querypos %d..%d) - querylength %d = %llu, floors %d and %d\n",
max_mismatches_allowed,(unsigned long long) segment->diagonal,
segment->querypos5,segment->querypos3,querylength,(unsigned long long) segment_left,
floors_from_neg3[segment->querypos5],floors_to_pos3[segment->querypos3]));
@@ -10914,7 +10949,7 @@ find_spliceends_distant_dna_plus (List_T **distant_startfrags, List_T **distant_
while (splice_pos >= splice_pos_start && nmismatches <= max_mismatches_allowed) {
debug4e(printf(" splice pos %d, nmismatches (quick) %d, nmismatches (goldstd) %d\n",splice_pos,nmismatches,
Genome_count_mismatches_substring(query_compress,segment_left,/*pos5*/splice_pos,/*pos3*/querylength,
- /*plusp*/true,genestrand,first_read_p)));
+ /*plusp*/true,genestrand)));
assert(nmismatches == Genome_count_mismatches_substring(query_compress,segment_left,/*pos5*/splice_pos,/*pos3*/querylength,
/*plusp*/true,genestrand));
if ((hit = Substring_new_endfrag(/*endfrag_coord*/segment_left + splice_pos,
@@ -10952,6 +10987,7 @@ find_spliceends_distant_dna_minus (List_T **distant_startfrags, List_T **distant
int max_mismatches_allowed, int genestrand) {
#ifdef DEBUG4E
char *gbuffer;
+ int i;
#endif
Segment_T segment, *p;
@@ -10972,7 +11008,7 @@ find_spliceends_distant_dna_minus (List_T **distant_startfrags, List_T **distant
#endif
- debug4e(printf("Entering find_spliceends_distant_dna with %d segments\n",nsegments));
+ debug4e(printf("Entering find_spliceends_distant_dna with %d anchor segments\n",nanchors));
if (floors != NULL) {
floors_from_neg3 = floors->scorefrom[-index1interval];
@@ -10983,7 +11019,7 @@ find_spliceends_distant_dna_minus (List_T **distant_startfrags, List_T **distant
assert(segment->diagonal != (Univcoord_T) -1);
segment_left = segment->diagonal - querylength; /* FORMULA: Corresponds to querypos 0 */
- debug4e(printf("find_spliceends: Checking up to %d mismatches at diagonal %llu (querypos %d..%d) - querylength %d = %llu, floors %d and %d\n",
+ debug4e(printf("find_spliceends_distant_dna: Checking up to %d mismatches at diagonal %llu (querypos %d..%d) - querylength %d = %llu, floors %d and %d\n",
max_mismatches_allowed,(unsigned long long) segment->diagonal,
segment->querypos5,segment->querypos3,querylength,(unsigned long long) segment_left,
floors_from_neg3[segment->querypos5],floors_to_pos3[segment->querypos3]));
@@ -11086,7 +11122,7 @@ find_spliceends_distant_dna_minus (List_T **distant_startfrags, List_T **distant
while (splice_pos >= splice_pos_start && nmismatches <= max_mismatches_allowed) {
debug4e(printf(" splice pos %d, nmismatches (quick) %d, nmismatches (goldstd) %d\n",splice_pos,nmismatches,
Genome_count_mismatches_substring(query_compress,segment_left,/*pos5*/splice_pos,/*pos3*/querylength,
- /*plusp*/false,genestrand,first_read_p)));
+ /*plusp*/false,genestrand)));
assert(nmismatches == Genome_count_mismatches_substring(query_compress,segment_left,/*pos5*/splice_pos,/*pos3*/querylength,
/*plusp*/false,genestrand));
if ((hit = Substring_new_startfrag(/*startfrag_coord*/segment_left + splice_pos,
@@ -11113,6 +11149,8 @@ find_spliceends_distant_dna_minus (List_T **distant_startfrags, List_T **distant
}
+#if 0
+/* Using a streamlined version instead */
/* Produces lists of distant_donors and distant_acceptors that are substrings */
/* TODO: Change to lists of Stage3end_T objects, including GMAP.
Change definition of a chimera to be two Stage3end_T objects, instead
@@ -11134,7 +11172,7 @@ find_spliceends_distant_rna (List_T **distant_donors, List_T **distant_antidonor
Substring_T hit;
Univcoord_T segment_left;
int nmismatches, j, i;
- int splice_pos;
+ int splice_pos, last_querypos;
double prob;
int nmismatches_left, nmismatches_right;
@@ -11171,7 +11209,7 @@ find_spliceends_distant_rna (List_T **distant_donors, List_T **distant_antidonor
int *donori_knowni, *acceptorj_knowni, *antiacceptori_knowni, *antidonorj_knowni;
- debug4e(printf("Entering find_spliceends_distant_rna with %d segments\n",nsegments));
+ debug4e(printf("Entering find_spliceends_distant_rna with %d anchor segments\n",nanchors));
if (floors != NULL) {
floors_from_neg3 = floors->scorefrom[-index1interval];
@@ -11182,10 +11220,13 @@ find_spliceends_distant_rna (List_T **distant_donors, List_T **distant_antidonor
assert(segment->diagonal != (Univcoord_T) -1);
segment_left = segment->diagonal - querylength; /* FORMULA: Corresponds to querypos 0 */
- debug4e(printf("find_spliceends: Checking up to %d mismatches at diagonal %llu (querypos %d..%d) - querylength %d = %llu, floors %d and %d\n",
+ last_querypos = segment->querypos3 + index1part;
+
+ debug4e(printf("find_spliceends_distant_rna: Checking up to %d mismatches at diagonal %llu (querypos %d..%d) - querylength %d = %llu, floors %d and %d, plusp %d\n",
max_mismatches_allowed,(unsigned long long) segment->diagonal,
segment->querypos5,segment->querypos3,querylength,(unsigned long long) segment_left,
- floors_from_neg3[segment->querypos5],floors_to_pos3[segment->querypos3]));
+ floors_from_neg3[segment->querypos5],floors_to_pos3[segment->querypos3],
+ plusp));
debug4e(
gbuffer = (char *) CALLOC(querylength+1,sizeof(char));
@@ -11195,406 +11236,1267 @@ find_spliceends_distant_rna (List_T **distant_donors, List_T **distant_antidonor
FREE(gbuffer);
);
- /* Splice ends from left to splice site */
- if ((plusp == true && floors_from_neg3[segment->querypos5] <= max_mismatches_allowed) ||
- (plusp == false && floors_to_pos3[segment->querypos3] <= max_mismatches_allowed)) {
+ /* Find splices on genomic right */
+ if (plusp) {
+ /* ? require that floors_from_neg3[segment->querypos5] <= max_mismatches_allowed */
+ if (segment->querypos5 < index1part && last_querypos < query_lastpos) {
+ /* genomic left anchor */
+ debug4e(printf("Searching genomic right: plus genomic left anchor\n"));
+ nmismatches_left = Genome_mismatches_left(mismatch_positions,max_mismatches_allowed,
+ query_compress,/*left*/segment_left,
+#if 0
+ /*pos5*/0,/*pos3*/querylength,
+#else
+ /*pos5 (was 0)*/segment->querypos5,/*pos3*/querylength,
+#endif
+ plusp,genestrand);
+ debug4e(
+ printf("%d mismatches on left (%d allowed) at:",
+ nmismatches_left,max_mismatches_allowed);
+ for (i = 0; i <= nmismatches_left; i++) {
+ printf(" %d",mismatch_positions[i]);
+ }
+ printf("\n");
+ );
- /* pos3 was trimpos */
- nmismatches_left = Genome_mismatches_left(mismatch_positions,max_mismatches_allowed,
- query_compress,/*left*/segment_left,/*pos5*/0,/*pos3*/querylength,
- plusp,genestrand);
+#if 0
+ splice_pos_start = index1part;
+#else
+ splice_pos_start = segment->querypos5;
+#endif
+ if (nmismatches_left <= max_mismatches_allowed) {
+ splice_pos_end = querylength - 1;
+ } else if ((splice_pos_end = mismatch_positions[nmismatches_left-1]) > querylength - 1) {
+ splice_pos_end = querylength - 1;
+ }
+
+ } else if (segment->querypos5 > index1part && last_querypos > query_lastpos) {
+ /* genomic right anchor. No need to find splices on genomic right */
+ debug4e(printf("Searching genomic right: plus genomic right anchor\n"));
+ splice_pos_start = querylength;
+ splice_pos_end = 0;
+
+ } else if (segment->querypos5 > index1part && last_querypos < query_lastpos &&
+ segment->spliceable_low_p == true) {
+ /* middle anchor */
+ debug4e(printf("Searching genomic right: plus middle anchor\n"));
+ nmismatches_left = Genome_mismatches_left(mismatch_positions,max_mismatches_allowed,
+ query_compress,/*left*/segment_left,
+ /*pos5*/segment->querypos5,/*pos3*/querylength,
+ plusp,genestrand);
+ debug4e(
+ printf("%d mismatches on left (%d allowed) at:",
+ nmismatches_left,max_mismatches_allowed);
+ for (i = 0; i <= nmismatches_left; i++) {
+ printf(" %d",mismatch_positions[i]);
+ }
+ printf("\n");
+ );
- debug4e(
- printf("%d mismatches on left (%d allowed) at:",
- nmismatches_left,max_mismatches_allowed);
- for (i = 0; i <= nmismatches_left; i++) {
- printf(" %d",mismatch_positions[i]);
- }
- printf("\n");
- );
+ splice_pos_start = segment->querypos5;
+ if (nmismatches_left <= max_mismatches_allowed) {
+ splice_pos_end = querylength - 1;
+ } else if ((splice_pos_end = mismatch_positions[nmismatches_left-1]) > querylength - 1) {
+ splice_pos_end = querylength - 1;
+ }
- splice_pos_start = index1part;
- if (nmismatches_left <= max_mismatches_allowed) {
- splice_pos_end = querylength - 1;
- } else if ((splice_pos_end = mismatch_positions[nmismatches_left-1]) > querylength - 1) {
- splice_pos_end = querylength - 1;
+ } else {
+ /* full anchor */
+ debug4e(printf("Searching genomic right: plus full anchor\n"));
+ splice_pos_start = querylength;
+ splice_pos_end = 0;
}
- if (splice_pos_start <= splice_pos_end) {
- debug4e(printf("Search for splice sites from %d up to %d\n",splice_pos_start,splice_pos_end));
-
- segment_donor_nknown = 0;
- segment_antiacceptor_nknown = 0;
- if ((j = segment->splicesites_i) >= 0) {
- /* Known splicing */
- /* Ends 1 (donor, plus) and 8 (antiacceptor, plus): mark known splice sites in segment */
- while (j < nsplicesites && splicesites[j] <= segment_left + splice_pos_end) { /* Needs to be <= */
- if (splicetypes[j] == DONOR) {
- segment_donor_knownpos[segment_donor_nknown] = splicesites[j] - segment_left;
- segment_donor_knowni[segment_donor_nknown++] = j;
- } else if (splicetypes[j] == ANTIACCEPTOR) {
- segment_antiacceptor_knownpos[segment_antiacceptor_nknown] = splicesites[j] - segment_left;
- segment_antiacceptor_knowni[segment_antiacceptor_nknown++] = j;
- }
- j++;
- }
+ } else {
+ /* ? require that floors_to_pos3[segment->querypos3] <= max_mismatches_allowed */
+ if (segment->querypos5 < index1part && last_querypos < query_lastpos) {
+ /* genomic right anchor. No need to find splices on genomic right */
+ debug4e(printf("Searching genomic right: minus genomic right anchor\n"));
+ splice_pos_start = querylength;
+ splice_pos_end = 0;
+
+ } else if (segment->querypos5 > index1part && last_querypos > query_lastpos) {
+ /* genomic left anchor */
+ debug4e(printf("Searching genomic right: minus genomic left anchor\n"));
+ nmismatches_left = Genome_mismatches_left(mismatch_positions,max_mismatches_allowed,
+ query_compress,/*left*/segment_left,
+#if 0
+ /*pos5*/0,/*pos3*/querylength,
+#else
+ /*pos5 (was 0)*/querylength - last_querypos,
+ /*pos3*/querylength,
+#endif
+ plusp,genestrand);
+ debug4e(
+ printf("%d mismatches on left (%d allowed) at:",
+ nmismatches_left,max_mismatches_allowed);
+ for (i = 0; i <= nmismatches_left; i++) {
+ printf(" %d",mismatch_positions[i]);
+ }
+ printf("\n");
+ );
+
+#if 0
+ splice_pos_start = index1part;
+#else
+ splice_pos_start = querylength - last_querypos;
+#endif
+ if (nmismatches_left <= max_mismatches_allowed) {
+ splice_pos_end = querylength - 1;
+ } else if ((splice_pos_end = mismatch_positions[nmismatches_left-1]) > querylength - 1) {
+ splice_pos_end = querylength - 1;
}
- segment_donor_knownpos[segment_donor_nknown] = querylength;
- segment_antiacceptor_knownpos[segment_antiacceptor_nknown] = querylength;
-
- /* Originally on plus strand. No complement */
- sensedir = (plusp == true) ? SENSE_FORWARD : SENSE_ANTI;
-
- if (novelsplicingp && segment_left + splice_pos_start >= DONOR_MODEL_LEFT_MARGIN) {
- donori_nsites = Genome_donor_positions(positions_alloc,knowni_alloc,
- segment_donor_knownpos,segment_donor_knowni,
- segment_left,splice_pos_start,splice_pos_end+1);
- donori_positions = positions_alloc;
- donori_knowni = knowni_alloc;
- debug4e(
- printf("Donor dinucleotides:");
- for (i = 0; i < donori_nsites; i++) {
- printf(" %d",donori_positions[i]);
- }
- printf("\n");
- );
- } else {
- donori_nsites = segment_donor_nknown;
- donori_positions = segment_donor_knownpos;
- donori_knowni = segment_donor_knowni;
+
+ } else if (segment->querypos5 > index1part && last_querypos < query_lastpos &&
+ segment->spliceable_low_p == true) {
+ /* middle anchor */
+ debug4e(printf("Searching genomic right: minus middle anchor\n"));
+ nmismatches_left = Genome_mismatches_left(mismatch_positions,max_mismatches_allowed,
+ query_compress,/*left*/segment_left,
+ /*pos5*/querylength - segment->querypos3 - index1part,
+ /*pos3*/querylength,plusp,genestrand);
+ debug4e(
+ printf("%d mismatches on left (%d allowed) at:",
+ nmismatches_left,max_mismatches_allowed);
+ for (i = 0; i <= nmismatches_left; i++) {
+ printf(" %d",mismatch_positions[i]);
+ }
+ printf("\n");
+ );
+
+ splice_pos_start = querylength - last_querypos;
+ if (nmismatches_left <= max_mismatches_allowed) {
+ splice_pos_end = querylength - 1;
+ } else if ((splice_pos_end = mismatch_positions[nmismatches_left-1]) > querylength - 1) {
+ splice_pos_end = querylength - 1;
}
- i = 0;
- nmismatches = 0;
- while (i < donori_nsites && nmismatches <= max_mismatches_allowed) {
- splice_pos = donori_positions[i];
- while (nmismatches < nmismatches_left && mismatch_positions[nmismatches] < splice_pos) { /* Changed from <= to < */
- debug4e(printf(" mismatch at %d\n",mismatch_positions[nmismatches]));
- nmismatches++;
+ } else {
+ /* full anchor */
+ debug4e(printf("Searching genomic right: minus full anchor\n"));
+ splice_pos_start = querylength;
+ splice_pos_end = 0;
+ }
+ }
+
+ if (splice_pos_start <= splice_pos_end) {
+ debug4e(printf("Search for splice sites from %d up to %d\n",splice_pos_start,splice_pos_end));
+
+ segment_donor_nknown = 0;
+ segment_antiacceptor_nknown = 0;
+ if ((j = segment->splicesites_i) >= 0) {
+ /* Known splicing */
+ /* Ends 1 (donor, plus) and 8 (antiacceptor, plus): mark known splice sites in segment */
+ while (j < nsplicesites && splicesites[j] <= segment_left + splice_pos_end) { /* Needs to be <= */
+ if (splicetypes[j] == DONOR) {
+ segment_donor_knownpos[segment_donor_nknown] = splicesites[j] - segment_left;
+ segment_donor_knowni[segment_donor_nknown++] = j;
+ } else if (splicetypes[j] == ANTIACCEPTOR) {
+ segment_antiacceptor_knownpos[segment_antiacceptor_nknown] = splicesites[j] - segment_left;
+ segment_antiacceptor_knowni[segment_antiacceptor_nknown++] = j;
}
- debug4e(printf(" splice pos %d, nmismatches %d\n",splice_pos,nmismatches));
+ j++;
+ }
+ }
+ segment_donor_knownpos[segment_donor_nknown] = querylength;
+ segment_antiacceptor_knownpos[segment_antiacceptor_nknown] = querylength;
+
+ /* Originally on plus strand. No complement */
+ sensedir = (plusp == true) ? SENSE_FORWARD : SENSE_ANTI;
+
+ if (novelsplicingp && segment_left + splice_pos_start >= DONOR_MODEL_LEFT_MARGIN) {
+ donori_nsites = Genome_donor_positions(positions_alloc,knowni_alloc,
+ segment_donor_knownpos,segment_donor_knowni,
+ segment_left,splice_pos_start,splice_pos_end);
+ donori_positions = positions_alloc;
+ donori_knowni = knowni_alloc;
+ debug4e(
+ printf("Donor dinucleotides:");
+ for (i = 0; i < donori_nsites; i++) {
+ printf(" %d",donori_positions[i]);
+ }
+ printf("\n");
+ );
+ } else {
+ donori_nsites = segment_donor_nknown;
+ donori_positions = segment_donor_knownpos;
+ donori_knowni = segment_donor_knowni;
+ }
+
+ i = 0;
+ nmismatches = 0;
+ while (i < donori_nsites && nmismatches <= max_mismatches_allowed) {
+ splice_pos = donori_positions[i];
+ while (nmismatches < nmismatches_left && mismatch_positions[nmismatches] < splice_pos) { /* Changed from <= to < */
+ debug4e(printf(" mismatch at %d\n",mismatch_positions[nmismatches]));
+ nmismatches++;
+ }
+ debug4e(printf(" splice pos %d, nmismatches %d\n",splice_pos,nmismatches));
#if 0
- assert(nmismatches == Genome_count_mismatches_substring(query_compress,segment_left,/*pos5*/0,/*pos3*/splice_pos,
- plusp,genestrand,first_read_p));
+ assert(nmismatches == Genome_count_mismatches_substring(query_compress,segment_left,/*pos5*/0,/*pos3*/splice_pos,
+ plusp,genestrand,first_read_p));
#endif
- if (nmismatches <= max_mismatches_allowed) {
- if (donori_knowni[i] >= 0) {
- debug4e(printf("Known donor for segment at %llu, splice_pos %d (%d mismatches), stopi = %d\n",
- (unsigned long long) segment_left,splice_pos,nmismatches,splice_pos_end));
+ if (nmismatches <= max_mismatches_allowed) {
+ if (donori_knowni[i] >= 0) {
+ debug4e(printf("Known donor for segment at %llu, splice_pos %d (%d mismatches), stopi = %d\n",
+ (unsigned long long) segment_left,splice_pos,nmismatches,splice_pos_end));
- if ((hit = Substring_new_donor(/*donor_coord*/segment_left + splice_pos,/*donor_knowni*/donori_knowni[i],
- splice_pos,nmismatches,/*prob*/2.0,/*left*/segment_left,query_compress,
+ if ((hit = Substring_new_donor(/*donor_coord*/segment_left + splice_pos,/*donor_knowni*/donori_knowni[i],
+ splice_pos,/*substring_querystart*/segment->querypos5,
+ /*substring_queryend*/last_querypos,
+ nmismatches,/*prob*/2.0,/*left*/segment_left,query_compress,
+ querylength,plusp,genestrand,
+ sensedir,segment->chrnum,segment->chroffset,
+ segment->chrhigh,segment->chrlength)) != NULL) {
+ debug4e(printf("=> %s donor: %f at %d (%d mismatches) %d..%d\n",
+ plusp == true ? "plus" : "minus",Maxent_hr_donor_prob(segment_left + splice_pos,segment->chroffset),
+ Substring_chimera_pos(hit),nmismatches,Substring_querystart(hit),Substring_queryend(hit)));
+ debug4e(printf("q: %s\ng: %s\n",queryptr,gbuffer));
+ (*distant_donors)[nmismatches] = List_push((*distant_donors)[nmismatches],(void *) hit);
+ }
+
+ } else {
+ prob = Maxent_hr_donor_prob(segment_left + splice_pos,segment->chroffset);
+ debug4e(printf("splice pos %d, nmismatches %d, prob %f, sufficient %d\n",
+ splice_pos,nmismatches,prob,sufficient_splice_prob_distant(splice_pos,nmismatches,prob)));
+ if (sufficient_splice_prob_distant(/*support*/splice_pos,nmismatches,prob)) {
+ debug4e(printf("Novel donor for segment at %llu, splice_pos %d (%d mismatches), stopi = %d\n",
+ (unsigned long long) segment_left,splice_pos,nmismatches,splice_pos_end));
+ if ((hit = Substring_new_donor(/*donor_coord*/segment_left + splice_pos,/*donor_knowni*/-1,
+ splice_pos,/*substring_querystart*/segment->querypos5,
+ /*substring_queryend*/last_querypos,
+ nmismatches,prob,/*left*/segment_left,query_compress,
querylength,plusp,genestrand,
sensedir,segment->chrnum,segment->chroffset,
segment->chrhigh,segment->chrlength)) != NULL) {
- debug4e(printf("=> %s donor: %f at %d (%d mismatches)\n",
- plusp == true ? "plus" : "minus",Maxent_hr_donor_prob(segment_left + splice_pos,segment->chroffset),
- Substring_chimera_pos(hit),nmismatches));
+ debug4e(printf("=> %s donor: %f at %d (%d mismatches) %d..%d\n",
+ plusp == true ? "plus" : "minus",prob,Substring_chimera_pos(hit),nmismatches,
+ Substring_querystart(hit),Substring_queryend(hit)));
debug4e(printf("q: %s\ng: %s\n",queryptr,gbuffer));
(*distant_donors)[nmismatches] = List_push((*distant_donors)[nmismatches],(void *) hit);
}
-
- } else {
- prob = Maxent_hr_donor_prob(segment_left + splice_pos,segment->chroffset);
- debug4e(printf("splice pos %d, nmismatches %d, prob %f, sufficient %d\n",
- splice_pos,nmismatches,prob,sufficient_splice_prob_distant(splice_pos,nmismatches,prob)));
- if (sufficient_splice_prob_distant(/*support*/splice_pos,nmismatches,prob)) {
- debug4e(printf("Novel donor for segment at %llu, splice_pos %d (%d mismatches), stopi = %d\n",
- (unsigned long long) segment_left,splice_pos,nmismatches,splice_pos_end));
- if ((hit = Substring_new_donor(/*donor_coord*/segment_left + splice_pos,/*donor_knowni*/-1,
- splice_pos,nmismatches,prob,/*left*/segment_left,query_compress,
- querylength,plusp,genestrand,
- sensedir,segment->chrnum,segment->chroffset,
- segment->chrhigh,segment->chrlength)) != NULL) {
- debug4e(printf("=> %s donor: %f at %d (%d mismatches)\n",
- plusp == true ? "plus" : "minus",prob,Substring_chimera_pos(hit),nmismatches));
- debug4e(printf("q: %s\ng: %s\n",queryptr,gbuffer));
- (*distant_donors)[nmismatches] = List_push((*distant_donors)[nmismatches],(void *) hit);
- }
- }
}
}
-
- i++;
}
+ i++;
+ }
+
- /* Splicing originally on minus strand. Complement */
- sensedir = (plusp == true) ? SENSE_ANTI : SENSE_FORWARD;
+ /* Splicing originally on minus strand. Complement */
+ sensedir = (plusp == true) ? SENSE_ANTI : SENSE_FORWARD;
- if (novelsplicingp && segment_left + splice_pos_start >= ACCEPTOR_MODEL_RIGHT_MARGIN) {
- antiacceptori_nsites = Genome_antiacceptor_positions(positions_alloc,knowni_alloc,
- segment_antiacceptor_knownpos,segment_antiacceptor_knowni,
- segment_left,splice_pos_start,splice_pos_end+1);
- antiacceptori_positions = positions_alloc;
- antiacceptori_knowni = knowni_alloc;
- debug4e(
- printf("Antiacceptor dinucleotides:");
- for (i = 0; i < antiacceptori_nsites; i++) {
- printf(" %d",antiacceptori_positions[i]);
- }
- printf("\n");
- );
- } else {
- antiacceptori_nsites = segment_antiacceptor_nknown;
- antiacceptori_positions = segment_antiacceptor_knownpos;
- antiacceptori_knowni = segment_antiacceptor_knowni;
- }
+ if (novelsplicingp && segment_left + splice_pos_start >= ACCEPTOR_MODEL_RIGHT_MARGIN) {
+ antiacceptori_nsites = Genome_antiacceptor_positions(positions_alloc,knowni_alloc,
+ segment_antiacceptor_knownpos,segment_antiacceptor_knowni,
+ segment_left,splice_pos_start,splice_pos_end);
+ antiacceptori_positions = positions_alloc;
+ antiacceptori_knowni = knowni_alloc;
+ debug4e(
+ printf("Antiacceptor dinucleotides:");
+ for (i = 0; i < antiacceptori_nsites; i++) {
+ printf(" %d",antiacceptori_positions[i]);
+ }
+ printf("\n");
+ );
+ } else {
+ antiacceptori_nsites = segment_antiacceptor_nknown;
+ antiacceptori_positions = segment_antiacceptor_knownpos;
+ antiacceptori_knowni = segment_antiacceptor_knowni;
+ }
- i = 0;
- nmismatches = 0;
- while (i < antiacceptori_nsites && nmismatches <= max_mismatches_allowed) {
- splice_pos = antiacceptori_positions[i];
- while (nmismatches < nmismatches_left && mismatch_positions[nmismatches] < splice_pos) { /* Changed from <= to < */
- debug4e(printf(" mismatch at %d\n",mismatch_positions[nmismatches]));
- nmismatches++;
- }
- debug4e(printf(" splice pos %d, nmismatches %d\n",splice_pos,nmismatches));
+ i = 0;
+ nmismatches = 0;
+ while (i < antiacceptori_nsites && nmismatches <= max_mismatches_allowed) {
+ splice_pos = antiacceptori_positions[i];
+ while (nmismatches < nmismatches_left && mismatch_positions[nmismatches] < splice_pos) { /* Changed from <= to < */
+ debug4e(printf(" mismatch at %d\n",mismatch_positions[nmismatches]));
+ nmismatches++;
+ }
+ debug4e(printf(" splice pos %d, nmismatches %d\n",splice_pos,nmismatches));
#if 0
- assert(nmismatches == Genome_count_mismatches_substring(query_compress,segment_left,/*pos5*/0,/*pos3*/splice_pos,
- plusp,genestrand,first_read_p));
+ assert(nmismatches == Genome_count_mismatches_substring(query_compress,segment_left,/*pos5*/0,/*pos3*/splice_pos,
+ plusp,genestrand,first_read_p));
#endif
- if (nmismatches <= max_mismatches_allowed) {
- if (antiacceptori_knowni[i] >= 0) {
- debug4e(printf("Known antiacceptor for segment at %llu, splice_pos %d (%d mismatches), stopi = %d\n",
+ if (nmismatches <= max_mismatches_allowed) {
+ if (antiacceptori_knowni[i] >= 0) {
+ debug4e(printf("Known antiacceptor for segment at %llu, splice_pos %d (%d mismatches), stopi = %d\n",
+ (unsigned long long) segment_left,splice_pos,nmismatches,splice_pos_end));
+ if ((hit = Substring_new_acceptor(/*acceptor_coord*/segment_left + splice_pos,/*acceptor_knowni*/antiacceptori_knowni[i],
+ splice_pos,/*substring_querystart*/segment->querypos5,
+ /*substring_queryend*/last_querypos,
+ nmismatches,/*prob*/2.0,/*left*/segment_left,query_compress,
+ querylength,plusp,genestrand,
+ sensedir,segment->chrnum,segment->chroffset,
+ segment->chrhigh,segment->chrlength)) != NULL) {
+ debug4e(printf("=> %s antiacceptor : %f at %d (%d mismatches) %d..%d\n",
+ plusp == true ? "plus" : "minus",Maxent_hr_antiacceptor_prob(segment_left + splice_pos,segment->chroffset),
+ Substring_chimera_pos(hit),nmismatches,Substring_querystart(hit),Substring_queryend(hit)));
+ debug4e(printf("q: %s\ng: %s\n",queryptr,gbuffer));
+ (*distant_antiacceptors)[nmismatches] = List_push((*distant_antiacceptors)[nmismatches],(void *) hit);
+ }
+
+ } else {
+ prob = Maxent_hr_antiacceptor_prob(segment_left + splice_pos,segment->chroffset);
+ debug4e(printf("splice pos %d, nmismatches %d, prob %f, sufficient %d\n",
+ splice_pos,nmismatches,prob,sufficient_splice_prob_distant(splice_pos,nmismatches,prob)));
+ if (sufficient_splice_prob_distant(/*support*/splice_pos,nmismatches,prob)) {
+ debug4e(printf("Novel antiacceptor for segment at %llu, splice_pos %d (%d mismatches), stopi = %d\n",
(unsigned long long) segment_left,splice_pos,nmismatches,splice_pos_end));
- if ((hit = Substring_new_acceptor(/*acceptor_coord*/segment_left + splice_pos,/*acceptor_knowni*/antiacceptori_knowni[i],
- splice_pos,nmismatches,/*prob*/2.0,/*left*/segment_left,query_compress,
+ if ((hit = Substring_new_acceptor(/*acceptor_coord*/segment_left + splice_pos,/*acceptor_knowni*/-1,
+ splice_pos,/*substring_querystart*/segment->querypos5,
+ /*substring_queryend*/last_querypos,
+ nmismatches,prob,/*left*/segment_left,query_compress,
querylength,plusp,genestrand,
sensedir,segment->chrnum,segment->chroffset,
segment->chrhigh,segment->chrlength)) != NULL) {
- debug4e(printf("=> %s antiacceptor : %f at %d (%d mismatches)\n",
- plusp == true ? "plus" : "minus",Maxent_hr_antiacceptor_prob(segment_left + splice_pos,segment->chroffset),
- Substring_chimera_pos(hit),nmismatches));
+ debug4e(printf("=> %s antiacceptor : %f at %d (%d mismatches) %d..%d\n",
+ plusp == true ? "plus" : "minus",prob,Substring_chimera_pos(hit),nmismatches,
+ Substring_querystart(hit),Substring_queryend(hit)));
debug4e(printf("q: %s\ng: %s\n",queryptr,gbuffer));
(*distant_antiacceptors)[nmismatches] = List_push((*distant_antiacceptors)[nmismatches],(void *) hit);
}
-
- } else {
- prob = Maxent_hr_antiacceptor_prob(segment_left + splice_pos,segment->chroffset);
- debug4e(printf("splice pos %d, nmismatches %d, prob %f, sufficient %d\n",
- splice_pos,nmismatches,prob,sufficient_splice_prob_distant(splice_pos,nmismatches,prob)));
- if (sufficient_splice_prob_distant(/*support*/splice_pos,nmismatches,prob)) {
- debug4e(printf("Novel antiacceptor for segment at %llu, splice_pos %d (%d mismatches), stopi = %d\n",
- (unsigned long long) segment_left,splice_pos,nmismatches,splice_pos_end));
- if ((hit = Substring_new_acceptor(/*acceptor_coord*/segment_left + splice_pos,/*acceptor_knowni*/-1,
- splice_pos,nmismatches,prob,/*left*/segment_left,query_compress,
- querylength,plusp,genestrand,
- sensedir,segment->chrnum,segment->chroffset,
- segment->chrhigh,segment->chrlength)) != NULL) {
- debug4e(printf("=> %s antiacceptor : %f at %d (%d mismatches)\n",
- plusp == true ? "plus" : "minus",prob,Substring_chimera_pos(hit),nmismatches));
- debug4e(printf("q: %s\ng: %s\n",queryptr,gbuffer));
- (*distant_antiacceptors)[nmismatches] = List_push((*distant_antiacceptors)[nmismatches],(void *) hit);
- }
- }
}
}
-
- i++;
}
- }
+ i++;
+ }
}
- /* Splice ends from splice site to right end */
- if ((plusp == true && floors_to_pos3[segment->querypos3] <= max_mismatches_allowed) ||
- (plusp == false && floors_from_neg3[segment->querypos5] <= max_mismatches_allowed)) {
-
- /* pos5 was trimpos+1 */
- nmismatches_right = Genome_mismatches_right(mismatch_positions,max_mismatches_allowed,
- query_compress,/*left*/segment_left,/*pos5*/0,/*pos3*/querylength,
- plusp,genestrand);
- debug4e(
- printf("%d mismatches on right (%d allowed) at:",nmismatches_right,max_mismatches_allowed);
- for (i = 0; i <= nmismatches_right; i++) {
- printf(" %d",mismatch_positions[i]);
- }
- printf("\n");
- );
+ /* Find splices on genomic left */
+ if (plusp) {
+ /* ? require that floors_to_pos3[segment->querypos3] <= max_mismatches_allowed */
+ if (segment->querypos5 < index1part && last_querypos < query_lastpos) {
+ /* genomic left anchor. No need to find splices on genomic left. */
+ debug4e(printf("Searching genomic left: plus genomic left anchor\n"));
+ splice_pos_start = querylength;
+ splice_pos_end = 0;
- splice_pos_end = query_lastpos;
- if (nmismatches_right <= max_mismatches_allowed) {
- splice_pos_start = 1;
- } else if ((splice_pos_start = mismatch_positions[nmismatches_right-1]) < 1) {
- splice_pos_start = 1;
- }
+ } else if (segment->querypos5 > index1part && last_querypos > query_lastpos) {
+ /* genomic right anchor */
+ debug4e(printf("Searching genomic left: plus genomic right anchor\n"));
+ nmismatches_right = Genome_mismatches_right(mismatch_positions,max_mismatches_allowed,
+ query_compress,/*left*/segment_left,
+#if 0
+ /*pos5*/0,/*pos3*/querylength,
+#else
+ /*pos5*/0,/*pos3 (was querylength)*/last_querypos,
+#endif
+ plusp,genestrand);
+ debug4e(
+ printf("%d mismatches on right (%d allowed) at:",nmismatches_right,max_mismatches_allowed);
+ for (i = 0; i <= nmismatches_right; i++) {
+ printf(" %d",mismatch_positions[i]);
+ }
+ printf("\n");
+ );
- if (splice_pos_start <= splice_pos_end) {
- debug4e(printf("Search for splice sites from %d down to %d\n",splice_pos_end,splice_pos_start));
-
- segment_acceptor_nknown = 0;
- segment_antidonor_nknown = 0;
- if ((j = segment->splicesites_i) >= 0) {
- /* Known splicing */
- while (j < nsplicesites && splicesites[j] <= segment_left + splice_pos_end) { /* Needs to be <= */
- if (splicetypes[j] == ACCEPTOR) {
- debug4k(printf("Setting known acceptor %d for segment at %llu\n",j,(unsigned long long) splicesites[j]));
- segment_acceptor_knownpos[segment_acceptor_nknown] = splicesites[j] - segment_left;
- segment_acceptor_knowni[segment_acceptor_nknown++] = j;
- } else if (splicetypes[j] == ANTIDONOR) {
- debug4k(printf("Setting known antidonor %d for segment at %llu\n",j,(unsigned long long) splicesites[j]));
- segment_antidonor_knownpos[segment_antidonor_nknown] = splicesites[j] - segment_left;
- segment_antidonor_knowni[segment_antidonor_nknown++] = j;
- }
- j++;
- }
+#if 0
+ splice_pos_end = query_lastpos;
+#else
+ splice_pos_end = last_querypos;
+#endif
+ if (nmismatches_right <= max_mismatches_allowed) {
+ splice_pos_start = 1;
+ } else if ((splice_pos_start = mismatch_positions[nmismatches_right-1]) < 1) {
+ splice_pos_start = 1;
}
- segment_acceptor_knownpos[segment_acceptor_nknown] = querylength;
- segment_antidonor_knownpos[segment_antidonor_nknown] = querylength;
+ } else if (segment->querypos5 > index1part && last_querypos < query_lastpos &&
+ segment->spliceable_high_p == true) {
+ /* middle anchor */
+ debug4e(printf("Searching genomic left: plus middle anchor\n"));
+ nmismatches_right = Genome_mismatches_right(mismatch_positions,max_mismatches_allowed,
+ query_compress,/*left*/segment_left,
+ /*pos5*/0,/*pos3*/last_querypos,
+ plusp,genestrand);
+ debug4e(
+ printf("%d mismatches on right (%d allowed) at:",nmismatches_right,max_mismatches_allowed);
+ for (i = 0; i <= nmismatches_right; i++) {
+ printf(" %d",mismatch_positions[i]);
+ }
+ printf("\n");
+ );
- /* Splicing originally on plus strand. No complement. */
- sensedir = (plusp == true) ? SENSE_FORWARD : SENSE_ANTI;
+ splice_pos_end = last_querypos;
+ if (nmismatches_right <= max_mismatches_allowed) {
+ splice_pos_start = 1;
+ } else if ((splice_pos_start = mismatch_positions[nmismatches_right-1]) < 1) {
+ splice_pos_start = 1;
+ }
- if (novelsplicingp && segment_left + splice_pos_start >= ACCEPTOR_MODEL_LEFT_MARGIN) {
- acceptorj_nsites = Genome_acceptor_positions(positions_alloc,knowni_alloc,
- segment_acceptor_knownpos,segment_acceptor_knowni,
- segment_left,splice_pos_start,splice_pos_end+1);
- acceptorj_positions = positions_alloc;
- acceptorj_knowni = knowni_alloc;
- debug4e(
- printf("Acceptor dinucleotides:");
- for (i = 0; i < acceptorj_nsites; i++) {
- printf(" %d",acceptorj_positions[i]);
- }
- printf("\n");
- );
- } else {
- acceptorj_nsites = segment_acceptor_nknown;
- acceptorj_positions = segment_acceptor_knownpos;
- acceptorj_knowni = segment_acceptor_knowni;
+ } else {
+ /* full anchor */
+ debug4e(printf("Searching genomic left: plus full anchor\n"));
+ splice_pos_start = querylength;
+ splice_pos_end = 0;
+ }
+
+ } else {
+ /* ? require that floors_from_neg3[segment->querypos5] <= max_mismatches_allowed */
+ if (segment->querypos5 < index1part && last_querypos < query_lastpos) {
+ /* genomic right anchor */
+ debug4e(printf("Searching genomic left: minus genomic right anchor\n"));
+ nmismatches_right = Genome_mismatches_right(mismatch_positions,max_mismatches_allowed,
+ query_compress,/*left*/segment_left,
+#if 0
+ /*pos5*/0,/*pos3*/querylength,
+#else
+ /*pos5*/0,/*pos3 (was querylength)*/querylength - segment->querypos5,
+#endif
+ plusp,genestrand);
+ debug4e(
+ printf("%d mismatches on right (%d allowed) at:",nmismatches_right,max_mismatches_allowed);
+ for (i = 0; i <= nmismatches_right; i++) {
+ printf(" %d",mismatch_positions[i]);
+ }
+ printf("\n");
+ );
+
+#if 0
+ splice_pos_end = query_lastpos;
+#else
+ splice_pos_end = querylength - segment->querypos5;
+#endif
+ if (nmismatches_right <= max_mismatches_allowed) {
+ splice_pos_start = 1;
+ } else if ((splice_pos_start = mismatch_positions[nmismatches_right-1]) < 1) {
+ splice_pos_start = 1;
}
- i = acceptorj_nsites - 1;
- nmismatches = 0;
- while (i >= 0 && nmismatches <= max_mismatches_allowed) {
- splice_pos = acceptorj_positions[i];
- while (nmismatches < nmismatches_right && mismatch_positions[nmismatches] >= splice_pos) { /* Must be >= */
- debug4e(printf(" mismatch at %d\n",mismatch_positions[nmismatches]));
- nmismatches++;
+ } else if (segment->querypos5 > index1part && last_querypos > query_lastpos) {
+ /* genomic left anchor. No need to find splices on genomic left. */
+ debug4e(printf("Searching genomic left: minus genomic left anchor\n"));
+ splice_pos_start = querylength;
+ splice_pos_end = 0;
+
+ } else if (segment->querypos5 > index1part && last_querypos < query_lastpos &&
+ segment->spliceable_high_p == true) {
+ /* middle anchor */
+ debug4e(printf("Searching genomic left: minus middle anchor\n"));
+ nmismatches_right = Genome_mismatches_right(mismatch_positions,max_mismatches_allowed,
+ query_compress,/*left*/segment_left,
+ /*pos5*/0,/*pos3*/querylength - segment->querypos5,
+ plusp,genestrand);
+ debug4e(
+ printf("%d mismatches on right (%d allowed) at:",nmismatches_right,max_mismatches_allowed);
+ for (i = 0; i <= nmismatches_right; i++) {
+ printf(" %d",mismatch_positions[i]);
+ }
+ printf("\n");
+ );
+
+ splice_pos_end = querylength - segment->querypos5;
+ if (nmismatches_right <= max_mismatches_allowed) {
+ splice_pos_start = 1;
+ } else if ((splice_pos_start = mismatch_positions[nmismatches_right-1]) < 1) {
+ splice_pos_start = 1;
+ }
+
+ } else {
+ /* full anchor */
+ debug4e(printf("Searching genomic left: minus full anchor\n"));
+ splice_pos_start = querylength;
+ splice_pos_end = 0;
+ }
+ }
+
+ if (splice_pos_start <= splice_pos_end) {
+ debug4e(printf("Search for splice sites from %d down to %d\n",splice_pos_end,splice_pos_start));
+
+ segment_acceptor_nknown = 0;
+ segment_antidonor_nknown = 0;
+ if ((j = segment->splicesites_i) >= 0) {
+ /* Known splicing */
+ while (j < nsplicesites && splicesites[j] <= segment_left + splice_pos_end) { /* Needs to be <= */
+ if (splicetypes[j] == ACCEPTOR) {
+ debug4k(printf("Setting known acceptor %d for segment at %llu\n",j,(unsigned long long) splicesites[j]));
+ segment_acceptor_knownpos[segment_acceptor_nknown] = splicesites[j] - segment_left;
+ segment_acceptor_knowni[segment_acceptor_nknown++] = j;
+ } else if (splicetypes[j] == ANTIDONOR) {
+ debug4k(printf("Setting known antidonor %d for segment at %llu\n",j,(unsigned long long) splicesites[j]));
+ segment_antidonor_knownpos[segment_antidonor_nknown] = splicesites[j] - segment_left;
+ segment_antidonor_knowni[segment_antidonor_nknown++] = j;
}
- debug4e(printf(" splice pos %d, nmismatches %d\n",splice_pos,nmismatches));
+ j++;
+ }
+ }
+ segment_acceptor_knownpos[segment_acceptor_nknown] = querylength;
+ segment_antidonor_knownpos[segment_antidonor_nknown] = querylength;
+
+ /* Splicing originally on plus strand. No complement. */
+ sensedir = (plusp == true) ? SENSE_FORWARD : SENSE_ANTI;
+
+ if (novelsplicingp && segment_left + splice_pos_start >= ACCEPTOR_MODEL_LEFT_MARGIN) {
+ acceptorj_nsites = Genome_acceptor_positions(positions_alloc,knowni_alloc,
+ segment_acceptor_knownpos,segment_acceptor_knowni,
+ segment_left,splice_pos_start,splice_pos_end);
+ acceptorj_positions = positions_alloc;
+ acceptorj_knowni = knowni_alloc;
+ debug4e(
+ printf("Acceptor dinucleotides:");
+ for (i = 0; i < acceptorj_nsites; i++) {
+ printf(" %d",acceptorj_positions[i]);
+ }
+ printf("\n");
+ );
+ } else {
+ acceptorj_nsites = segment_acceptor_nknown;
+ acceptorj_positions = segment_acceptor_knownpos;
+ acceptorj_knowni = segment_acceptor_knowni;
+ }
+
+ i = acceptorj_nsites - 1;
+ nmismatches = 0;
+ while (i >= 0 && nmismatches <= max_mismatches_allowed) {
+ splice_pos = acceptorj_positions[i];
+ while (nmismatches < nmismatches_right && mismatch_positions[nmismatches] >= splice_pos) { /* Must be >= */
+ debug4e(printf(" mismatch at %d\n",mismatch_positions[nmismatches]));
+ nmismatches++;
+ }
+ debug4e(printf(" splice pos %d, nmismatches %d\n",splice_pos,nmismatches));
#if 0
- assert(nmismatches == Genome_count_mismatches_substring(query_compress,segment_left,/*pos5*/splice_pos,/*pos3*/querylength,
- plusp,genestrand,first_read_p));
+ assert(nmismatches == Genome_count_mismatches_substring(query_compress,segment_left,/*pos5*/splice_pos,/*pos3*/querylength,
+ plusp,genestrand,first_read_p));
#endif
- if (nmismatches <= max_mismatches_allowed) {
- if (acceptorj_knowni[i] >= 0) {
- debug4e(printf("Known acceptor for segment at %llu, splice_pos %d (%d mismatches), stopi = %d\n",
+ if (nmismatches <= max_mismatches_allowed) {
+ if (acceptorj_knowni[i] >= 0) {
+ debug4e(printf("Known acceptor for segment at %llu, splice_pos %d (%d mismatches), stopi = %d\n",
+ (unsigned long long) segment_left,splice_pos,nmismatches,splice_pos_start));
+ if ((hit = Substring_new_acceptor(/*acceptor_coord*/segment_left + splice_pos,/*acceptor_knowni*/acceptorj_knowni[i],
+ splice_pos,/*substring_querystart*/segment->querypos5,
+ /*substring_queryend*/last_querypos,
+ nmismatches,/*prob*/2.0,/*left*/segment_left,query_compress,
+ querylength,plusp,genestrand,
+ sensedir,segment->chrnum,segment->chroffset,
+ segment->chrhigh,segment->chrlength)) != NULL) {
+ debug4e(printf("=> %s acceptor: %f at %d (%d mismatches) %d..%d\n",
+ plusp == true ? "plus" : "minus",Maxent_hr_acceptor_prob(segment_left + splice_pos,segment->chroffset),
+ Substring_chimera_pos(hit),nmismatches,Substring_querystart(hit),Substring_queryend(hit)));
+ debug4e(printf("q: %s\ng: %s\n",queryptr,gbuffer));
+ (*distant_acceptors)[nmismatches] = List_push((*distant_acceptors)[nmismatches],(void *) hit);
+ }
+
+ } else {
+ prob = Maxent_hr_acceptor_prob(segment_left + splice_pos,segment->chroffset);
+ debug4e(printf("splice pos %d, nmismatches %d, prob %f, sufficient %d\n",
+ splice_pos,nmismatches,prob,sufficient_splice_prob_distant(querylength - splice_pos,nmismatches,prob)));
+ if (sufficient_splice_prob_distant(/*support*/querylength - splice_pos,nmismatches,prob)) {
+ debug4e(printf("Novel acceptor for segment at %llu, splice_pos %d (%d mismatches), stopi = %d\n",
(unsigned long long) segment_left,splice_pos,nmismatches,splice_pos_start));
- if ((hit = Substring_new_acceptor(/*acceptor_coord*/segment_left + splice_pos,/*acceptor_knowni*/acceptorj_knowni[i],
- splice_pos,nmismatches,/*prob*/2.0,/*left*/segment_left,query_compress,
+ if ((hit = Substring_new_acceptor(/*acceptor_coord*/segment_left + splice_pos,/*acceptor_knowni*/-1,
+ splice_pos,/*substring_querystart*/segment->querypos5,
+ /*substring_queryend*/last_querypos,
+ nmismatches,prob,/*left*/segment_left,query_compress,
querylength,plusp,genestrand,
sensedir,segment->chrnum,segment->chroffset,
segment->chrhigh,segment->chrlength)) != NULL) {
- debug4e(printf("=> %s acceptor: %f at %d (%d mismatches)\n",
- plusp == true ? "plus" : "minus",Maxent_hr_acceptor_prob(segment_left + splice_pos,segment->chroffset),
- Substring_chimera_pos(hit),nmismatches));
+ debug4e(printf("=> %s acceptor: %f at %d (%d mismatches) %d..%d\n",
+ plusp == true ? "plus" : "minus",prob,Substring_chimera_pos(hit),nmismatches,
+ Substring_querystart(hit),Substring_queryend(hit)));
debug4e(printf("q: %s\ng: %s\n",queryptr,gbuffer));
(*distant_acceptors)[nmismatches] = List_push((*distant_acceptors)[nmismatches],(void *) hit);
}
+ }
+ }
+ }
+
+ i--;
+ }
- } else {
- prob = Maxent_hr_acceptor_prob(segment_left + splice_pos,segment->chroffset);
- debug4e(printf("splice pos %d, nmismatches %d, prob %f, sufficient %d\n",
- splice_pos,nmismatches,prob,sufficient_splice_prob_distant(querylength - splice_pos,nmismatches,prob)));
- if (sufficient_splice_prob_distant(/*support*/querylength - splice_pos,nmismatches,prob)) {
- debug4e(printf("Novel acceptor for segment at %llu, splice_pos %d (%d mismatches), stopi = %d\n",
- (unsigned long long) segment_left,splice_pos,nmismatches,splice_pos_start));
- if ((hit = Substring_new_acceptor(/*acceptor_coord*/segment_left + splice_pos,/*acceptor_knowni*/-1,
- splice_pos,nmismatches,prob,/*left*/segment_left,query_compress,
- querylength,plusp,genestrand,
- sensedir,segment->chrnum,segment->chroffset,
- segment->chrhigh,segment->chrlength)) != NULL) {
- debug4e(printf("=> %s acceptor: %f at %d (%d mismatches)\n",
- plusp == true ? "plus" : "minus",prob,Substring_chimera_pos(hit),nmismatches));
- debug4e(printf("q: %s\ng: %s\n",queryptr,gbuffer));
- (*distant_acceptors)[nmismatches] = List_push((*distant_acceptors)[nmismatches],(void *) hit);
+
+ /* Splicing originally on minus strand. Complement. */
+ sensedir = (plusp == true) ? SENSE_ANTI : SENSE_FORWARD;
+
+ if (novelsplicingp && segment_left + splice_pos_start >= DONOR_MODEL_RIGHT_MARGIN) {
+ antidonorj_nsites = Genome_antidonor_positions(positions_alloc,knowni_alloc,
+ segment_antidonor_knownpos,segment_antidonor_knowni,
+ segment_left,splice_pos_start,splice_pos_end);
+ antidonorj_positions = positions_alloc;
+ antidonorj_knowni = knowni_alloc;
+ debug4e(
+ printf("Antidonor dinucleotides:");
+ for (i = 0; i < antidonorj_nsites; i++) {
+ printf(" %d",antidonorj_positions[i]);
}
+ printf("\n");
+ );
+ } else {
+ antidonorj_nsites = segment_antidonor_nknown;
+ antidonorj_positions = segment_antidonor_knownpos;
+ antidonorj_knowni = segment_antidonor_knowni;
+ }
+
+ i = antidonorj_nsites - 1;
+ nmismatches = 0;
+ while (i >= 0 && nmismatches <= max_mismatches_allowed) {
+ splice_pos = antidonorj_positions[i];
+ while (nmismatches < nmismatches_right && mismatch_positions[nmismatches] >= splice_pos) { /* Must be >= */
+ debug4e(printf(" mismatch at %d\n",mismatch_positions[nmismatches]));
+ nmismatches++;
+ }
+ debug4e(printf(" splice pos %d, nmismatches %d\n",splice_pos,nmismatches));
+#if 0
+ assert(nmismatches == Genome_count_mismatches_substring(query_compress,segment_left,/*pos5*/splice_pos,/*pos3*/querylength,
+ plusp,genestrand,first_read_p));
+#endif
+ if (nmismatches <= max_mismatches_allowed) {
+ if (antidonorj_knowni[i] >= 0) {
+ debug4e(printf("Known antidonor for segmenti at %llu, splice_pos %d (%d mismatches), stopi = %d\n",
+ (unsigned long long) segment_left,splice_pos,nmismatches,splice_pos_start));
+ if ((hit = Substring_new_donor(/*donor_coord*/segment_left + splice_pos,/*donor_knowni*/antidonorj_knowni[i],
+ splice_pos,/*substring_querystart*/segment->querypos5,
+ /*substring_queryend*/last_querypos,
+ nmismatches,/*prob*/2.0,/*left*/segment_left,query_compress,
+ querylength,plusp,genestrand,
+ sensedir,segment->chrnum,segment->chroffset,
+ segment->chrhigh,segment->chrlength)) != NULL) {
+ debug4e(printf("=> %s antidonor: %f at %d (%d mismatches) %d..%d\n",
+ plusp == true ? "plus" : "minus",Maxent_hr_antidonor_prob(segment_left + splice_pos,segment->chroffset),
+ Substring_chimera_pos(hit),nmismatches,Substring_querystart(hit),Substring_queryend(hit)));
+ debug4e(printf("q: %s\ng: %s\n",queryptr,gbuffer));
+ (*distant_antidonors)[nmismatches] = List_push((*distant_antidonors)[nmismatches],(void *) hit);
+ }
+
+ } else {
+ prob = Maxent_hr_antidonor_prob(segment_left + splice_pos,segment->chroffset);
+ debug4e(printf("splice pos %d, nmismatches %d, prob %f, sufficient %d\n",
+ splice_pos,nmismatches,prob,sufficient_splice_prob_distant(querylength - splice_pos,nmismatches,prob)));
+ if (sufficient_splice_prob_distant(/*support*/querylength - splice_pos,nmismatches,prob)) {
+ debug4e(printf("Novel antidonor for segmenti at %llu, splice_pos %d (%d mismatches), stopi = %d\n",
+ (unsigned long long) segment_left,splice_pos,nmismatches,splice_pos_start));
+ if ((hit = Substring_new_donor(/*donor_coord*/segment_left + splice_pos,/*donor_knowni*/-1,
+ splice_pos,/*substring_querystart*/segment->querypos5,
+ /*substring_queryend*/last_querypos,
+ nmismatches,prob,/*left*/segment_left,query_compress,
+ querylength,plusp,genestrand,
+ sensedir,segment->chrnum,segment->chroffset,
+ segment->chrhigh,segment->chrlength)) != NULL) {
+ debug4e(printf("=> %s antidonor: %f at %d (%d mismatches) %d..%d\n",
+ plusp == true ? "plus" : "minus",prob,Substring_chimera_pos(hit),nmismatches,
+ Substring_querystart(hit),Substring_queryend(hit)));
+ debug4e(printf("q: %s\ng: %s\n",queryptr,gbuffer));
+ (*distant_antidonors)[nmismatches] = List_push((*distant_antidonors)[nmismatches],(void *) hit);
}
}
}
-
- i--;
}
+ i--;
+ }
+ }
+ }
+ }
- /* Splicing originally on minus strand. Complement. */
- sensedir = (plusp == true) ? SENSE_ANTI : SENSE_FORWARD;
+ return;
+}
+#endif
- if (novelsplicingp && segment_left + splice_pos_start >= DONOR_MODEL_RIGHT_MARGIN) {
- antidonorj_nsites = Genome_antidonor_positions(positions_alloc,knowni_alloc,
- segment_antidonor_knownpos,segment_antidonor_knowni,
- segment_left,splice_pos_start,splice_pos_end+1);
- antidonorj_positions = positions_alloc;
- antidonorj_knowni = knowni_alloc;
- debug4e(
- printf("Antidonor dinucleotides:");
- for (i = 0; i < antidonorj_nsites; i++) {
- printf(" %d",antidonorj_positions[i]);
- }
- printf("\n");
- );
- } else {
- antidonorj_nsites = segment_antidonor_nknown;
- antidonorj_positions = segment_antidonor_knownpos;
- antidonorj_knowni = segment_antidonor_knowni;
+
+
+/* Streamlined version */
+/* Produces lists of distant_donors and distant_acceptors that are substrings */
+/* Note: Call to Genome_donor_positions and similar functions can
+ yield positions at (splice_pos_start - 1) through (splice_pos_end +
+ 1). Therefore, need to have splice_pos_start > 0 and
+ splice_pos_end < querylength. */
+/* TODO: Change to lists of Stage3end_T objects, including GMAP.
+ Change definition of a chimera to be two Stage3end_T objects, instead
+ of two substrings. */
+static void
+find_spliceends_distant_rna (List_T **distant_donors, List_T **distant_antidonors,
+ List_T **distant_acceptors, List_T **distant_antiacceptors,
+ Segment_T *anchor_segments, int nanchors,
+#ifdef DEBUG4E
+ char *queryptr,
+#endif
+ Floors_T floors, int querylength, int query_lastpos, Compress_T query_compress,
+ int max_mismatches_allowed, bool plusp, int genestrand) {
+#ifdef DEBUG4E
+ char *gbuffer;
+#endif
+
+ Segment_T segment, *p;
+ Substring_T hit;
+ Univcoord_T segment_left;
+ int nmismatches, j, i;
+ int splice_pos, last_querypos;
+ double prob;
+
+ int nmismatches_left, nmismatches_right;
+ int *floors_from_neg3, *floors_to_pos3;
+ int sensedir;
+
+ int splice_pos_start, splice_pos_end;
+
+#ifdef HAVE_ALLOCA
+ int *mismatch_positions = (int *) ALLOCA((querylength+1)*sizeof(int));
+ int *segment_donor_knownpos = (int *) ALLOCA((querylength+1)*sizeof(int));
+ int *segment_acceptor_knownpos = (int *) ALLOCA((querylength+1)*sizeof(int));
+ int *segment_antidonor_knownpos = (int *) ALLOCA((querylength+1)*sizeof(int));
+ int *segment_antiacceptor_knownpos = (int *) ALLOCA((querylength+1)*sizeof(int));
+ int *segment_donor_knowni = (int *) ALLOCA((querylength+1)*sizeof(int));
+ int *segment_acceptor_knowni = (int *) ALLOCA((querylength+1)*sizeof(int));
+ int *segment_antidonor_knowni = (int *) ALLOCA((querylength+1)*sizeof(int));
+ int *segment_antiacceptor_knowni = (int *) ALLOCA((querylength+1)*sizeof(int));
+ int *positions_alloc = (int *) ALLOCA((querylength+1)*sizeof(int));
+ int *knowni_alloc = (int *) ALLOCA((querylength+1)*sizeof(int));
+#else
+ int mismatch_positions[MAX_READLENGTH+1];
+ int segment_donor_knownpos[MAX_READLENGTH+1], segment_acceptor_knownpos[MAX_READLENGTH+1];
+ int segment_antidonor_knownpos[MAX_READLENGTH+1], segment_antiacceptor_knownpos[MAX_READLENGTH+1];
+ int segment_donor_knowni[MAX_READLENGTH+1], segment_acceptor_knowni[MAX_READLENGTH+1];
+ int segment_antidonor_knowni[MAX_READLENGTH+1], segment_antiacceptor_knowni[MAX_READLENGTH+1];
+ int positions_alloc[MAX_READLENGTH+1];
+ int knowni_alloc[MAX_READLENGTH+1];
+#endif
+
+ int segment_donor_nknown, segment_acceptor_nknown, segment_antidonor_nknown, segment_antiacceptor_nknown;
+ int donori_nsites, acceptorj_nsites, antiacceptori_nsites, antidonorj_nsites;
+ int *donori_positions, *acceptorj_positions, *antiacceptori_positions, *antidonorj_positions;
+ int *donori_knowni, *acceptorj_knowni, *antiacceptori_knowni, *antidonorj_knowni;
+
+
+ debug4e(printf("Entering find_spliceends_distant_rna with %d anchor segments\n",nanchors));
+
+ if (floors != NULL) {
+ floors_from_neg3 = floors->scorefrom[-index1interval];
+ floors_to_pos3 = floors->scoreto[query_lastpos+index1interval];
+
+ for (p = &(anchor_segments[0]); p < &(anchor_segments[nanchors]); p++) {
+ segment = *p;
+ assert(segment->diagonal != (Univcoord_T) -1);
+
+ segment_left = segment->diagonal - querylength; /* FORMULA: Corresponds to querypos 0 */
+ last_querypos = segment->querypos3 + index1part;
+ assert(last_querypos <= querylength);
+
+ debug4e(printf("find_spliceends_distant_rna: Checking up to %d mismatches at diagonal %llu (querypos %d..%d) - querylength %d = %llu, floors %d and %d, plusp %d\n",
+ max_mismatches_allowed,(unsigned long long) segment->diagonal,
+ segment->querypos5,segment->querypos3,querylength,(unsigned long long) segment_left,
+ floors_from_neg3[segment->querypos5],floors_to_pos3[segment->querypos3],
+ plusp));
+
+ debug4e(
+ gbuffer = (char *) CALLOC(querylength+1,sizeof(char));
+ Genome_fill_buffer_blocks(segment_left,querylength,gbuffer);
+ printf("genome 0..: %s\n",gbuffer);
+ printf("query 0..: %s\n",queryptr);
+ FREE(gbuffer);
+ );
+
+ /* Find splices on genomic right */
+ if (plusp) {
+ /* ? require that floors_from_neg3[segment->querypos5] <= max_mismatches_allowed */
+ if (last_querypos < query_lastpos &&
+ (segment->querypos5 < index1part || segment->spliceable_low_p == true)) {
+ /* genomic left anchor or middle anchor */
+ debug4e(printf("Searching genomic right: plus genomic left anchor or middle anchor\n"));
+ nmismatches_left = Genome_mismatches_left(mismatch_positions,max_mismatches_allowed,
+ query_compress,/*left*/segment_left,
+ /*pos5*/segment->querypos5,/*pos3*/querylength,
+ plusp,genestrand);
+ debug4e(
+ printf("%d mismatches on left (%d allowed) at:",
+ nmismatches_left,max_mismatches_allowed);
+ for (i = 0; i <= nmismatches_left; i++) {
+ printf(" %d",mismatch_positions[i]);
+ }
+ printf("\n");
+ );
+
+ splice_pos_start = segment->querypos5 + 1;
+ if (nmismatches_left <= max_mismatches_allowed) {
+ splice_pos_end = querylength - 1 - 1;
+ } else if ((splice_pos_end = mismatch_positions[nmismatches_left-1]) > querylength - 1 - 1) {
+ splice_pos_end = querylength - 1 - 1;
}
+
+ } else {
+ /* genomic right anchor or full anchor */
+ debug4e(printf("Searching genomic right: plus genomic right anchor or full anchor\n"));
+ splice_pos_start = querylength;
+ splice_pos_end = 0;
+ }
- i = antidonorj_nsites - 1;
- nmismatches = 0;
- while (i >= 0 && nmismatches <= max_mismatches_allowed) {
- splice_pos = antidonorj_positions[i];
- while (nmismatches < nmismatches_right && mismatch_positions[nmismatches] >= splice_pos) { /* Must be >= */
- debug4e(printf(" mismatch at %d\n",mismatch_positions[nmismatches]));
- nmismatches++;
+ } else {
+ /* ? require that floors_to_pos3[segment->querypos3] <= max_mismatches_allowed */
+ if (segment->querypos5 > index1part &&
+ (last_querypos > query_lastpos || segment->spliceable_low_p == true)) {
+ /* genomic left anchor or middle anchor */
+ debug4e(printf("Searching genomic right: minus genomic left anchor or middle anchor\n"));
+ nmismatches_left = Genome_mismatches_left(mismatch_positions,max_mismatches_allowed,
+ query_compress,/*left*/segment_left,
+ /*pos5*/querylength - last_querypos,
+ /*pos3*/querylength,plusp,genestrand);
+ debug4e(
+ printf("%d mismatches on left (%d allowed) at:",
+ nmismatches_left,max_mismatches_allowed);
+ for (i = 0; i <= nmismatches_left; i++) {
+ printf(" %d",mismatch_positions[i]);
+ }
+ printf("\n");
+ );
+
+ splice_pos_start = querylength - last_querypos + 1;
+ if (nmismatches_left <= max_mismatches_allowed) {
+ splice_pos_end = querylength - 1 - 1;
+ } else if ((splice_pos_end = mismatch_positions[nmismatches_left-1]) > querylength - 1 - 1) {
+ splice_pos_end = querylength - 1 - 1;
+ }
+
+ } else {
+ /* genomic right anchor or full anchor */
+ debug4e(printf("Searching genomic right: minus genomic right anchor or full anchor\n"));
+ splice_pos_start = querylength;
+ splice_pos_end = 0;
+ }
+ }
+
+ if (splice_pos_start <= splice_pos_end) {
+ debug4e(printf("Search for splice sites from %d up to %d\n",splice_pos_start,splice_pos_end));
+
+ segment_donor_nknown = 0;
+ segment_antiacceptor_nknown = 0;
+ if ((j = segment->splicesites_i) >= 0) {
+ /* Known splicing */
+ /* Ends 1 (donor, plus) and 8 (antiacceptor, plus): mark known splice sites in segment */
+ while (j < nsplicesites && splicesites[j] <= segment_left + splice_pos_end) { /* Needs to be <= */
+ if (splicetypes[j] == DONOR) {
+ segment_donor_knownpos[segment_donor_nknown] = splicesites[j] - segment_left;
+ segment_donor_knowni[segment_donor_nknown++] = j;
+ } else if (splicetypes[j] == ANTIACCEPTOR) {
+ segment_antiacceptor_knownpos[segment_antiacceptor_nknown] = splicesites[j] - segment_left;
+ segment_antiacceptor_knowni[segment_antiacceptor_nknown++] = j;
}
- debug4e(printf(" splice pos %d, nmismatches %d\n",splice_pos,nmismatches));
+ j++;
+ }
+ }
+ segment_donor_knownpos[segment_donor_nknown] = querylength;
+ segment_antiacceptor_knownpos[segment_antiacceptor_nknown] = querylength;
+
+ /* Originally on plus strand. No complement */
+ sensedir = (plusp == true) ? SENSE_FORWARD : SENSE_ANTI;
+
+ if (novelsplicingp && segment_left + splice_pos_start >= DONOR_MODEL_LEFT_MARGIN) {
+ assert(splice_pos_start > 0);
+ assert(splice_pos_end < querylength - 1);
+ donori_nsites = Genome_donor_positions(positions_alloc,knowni_alloc,
+ segment_donor_knownpos,segment_donor_knowni,
+ segment_left,splice_pos_start,splice_pos_end);
+ donori_positions = positions_alloc;
+ donori_knowni = knowni_alloc;
+ debug4e(
+ printf("Donor dinucleotides:");
+ for (i = 0; i < donori_nsites; i++) {
+ printf(" %d",donori_positions[i]);
+ }
+ printf("\n");
+ );
+ } else {
+ donori_nsites = segment_donor_nknown;
+ donori_positions = segment_donor_knownpos;
+ donori_knowni = segment_donor_knowni;
+ }
+
+ i = 0;
+ nmismatches = 0;
+ while (i < donori_nsites && nmismatches <= max_mismatches_allowed) {
+ splice_pos = donori_positions[i];
+ while (nmismatches < nmismatches_left && mismatch_positions[nmismatches] < splice_pos) { /* Changed from <= to < */
+ debug4e(printf(" mismatch at %d\n",mismatch_positions[nmismatches]));
+ nmismatches++;
+ }
+ debug4e(printf(" splice pos %d, nmismatches %d\n",splice_pos,nmismatches));
#if 0
- assert(nmismatches == Genome_count_mismatches_substring(query_compress,segment_left,/*pos5*/splice_pos,/*pos3*/querylength,
- plusp,genestrand,first_read_p));
+ assert(nmismatches == Genome_count_mismatches_substring(query_compress,segment_left,/*pos5*/0,/*pos3*/splice_pos,
+ plusp,genestrand,first_read_p));
#endif
- if (nmismatches <= max_mismatches_allowed) {
- if (antidonorj_knowni[i] >= 0) {
- debug4e(printf("Known antidonor for segmenti at %llu, splice_pos %d (%d mismatches), stopi = %d\n",
- (unsigned long long) segment_left,splice_pos,nmismatches,splice_pos_start));
- if ((hit = Substring_new_donor(/*donor_coord*/segment_left + splice_pos,/*donor_knowni*/antidonorj_knowni[i],
- splice_pos,nmismatches,/*prob*/2.0,/*left*/segment_left,query_compress,
+ if (nmismatches <= max_mismatches_allowed) {
+ if (donori_knowni[i] >= 0) {
+ debug4e(printf("Known donor for segment at %llu, splice_pos %d (%d mismatches), stopi = %d\n",
+ (unsigned long long) segment_left,splice_pos,nmismatches,splice_pos_end));
+
+ if ((hit = Substring_new_donor(/*donor_coord*/segment_left + splice_pos,/*donor_knowni*/donori_knowni[i],
+ splice_pos,/*substring_querystart*/segment->querypos5,
+ /*substring_queryend*/last_querypos,
+ nmismatches,/*prob*/2.0,/*left*/segment_left,query_compress,
+ querylength,plusp,genestrand,
+ sensedir,segment->chrnum,segment->chroffset,
+ segment->chrhigh,segment->chrlength)) != NULL) {
+ debug4e(printf("=> %s donor: %f at %d (%d mismatches) %d..%d\n",
+ plusp == true ? "plus" : "minus",Maxent_hr_donor_prob(segment_left + splice_pos,segment->chroffset),
+ Substring_chimera_pos(hit),nmismatches,Substring_querystart(hit),Substring_queryend(hit)));
+ debug4e(printf("q: %s\ng: %s\n",queryptr,gbuffer));
+ (*distant_donors)[nmismatches] = List_push((*distant_donors)[nmismatches],(void *) hit);
+ }
+
+ } else {
+ prob = Maxent_hr_donor_prob(segment_left + splice_pos,segment->chroffset);
+ debug4e(printf("splice pos %d, nmismatches %d, prob %f, sufficient %d\n",
+ splice_pos,nmismatches,prob,sufficient_splice_prob_distant(splice_pos,nmismatches,prob)));
+ if (sufficient_splice_prob_distant(/*support*/splice_pos,nmismatches,prob)) {
+ debug4e(printf("Novel donor for segment at %llu, splice_pos %d (%d mismatches), stopi = %d\n",
+ (unsigned long long) segment_left,splice_pos,nmismatches,splice_pos_end));
+ if ((hit = Substring_new_donor(/*donor_coord*/segment_left + splice_pos,/*donor_knowni*/-1,
+ splice_pos,/*substring_querystart*/segment->querypos5,
+ /*substring_queryend*/last_querypos,
+ nmismatches,prob,/*left*/segment_left,query_compress,
querylength,plusp,genestrand,
sensedir,segment->chrnum,segment->chroffset,
segment->chrhigh,segment->chrlength)) != NULL) {
- debug4e(printf("=> %s antidonor: %f at %d (%d mismatches)\n",
- plusp == true ? "plus" : "minus",Maxent_hr_antidonor_prob(segment_left + splice_pos,segment->chroffset),
- Substring_chimera_pos(hit),nmismatches));
+ debug4e(printf("=> %s donor: %f at %d (%d mismatches) %d..%d\n",
+ plusp == true ? "plus" : "minus",prob,Substring_chimera_pos(hit),nmismatches,
+ Substring_querystart(hit),Substring_queryend(hit)));
debug4e(printf("q: %s\ng: %s\n",queryptr,gbuffer));
- (*distant_antidonors)[nmismatches] = List_push((*distant_antidonors)[nmismatches],(void *) hit);
+ (*distant_donors)[nmismatches] = List_push((*distant_donors)[nmismatches],(void *) hit);
}
-
- } else {
- prob = Maxent_hr_antidonor_prob(segment_left + splice_pos,segment->chroffset);
- debug4e(printf("splice pos %d, nmismatches %d, prob %f, sufficient %d\n",
- splice_pos,nmismatches,prob,sufficient_splice_prob_distant(querylength - splice_pos,nmismatches,prob)));
- if (sufficient_splice_prob_distant(/*support*/querylength - splice_pos,nmismatches,prob)) {
- debug4e(printf("Novel antidonor for segmenti at %llu, splice_pos %d (%d mismatches), stopi = %d\n",
- (unsigned long long) segment_left,splice_pos,nmismatches,splice_pos_start));
- if ((hit = Substring_new_donor(/*donor_coord*/segment_left + splice_pos,/*donor_knowni*/-1,
- splice_pos,nmismatches,prob,/*left*/segment_left,query_compress,
- querylength,plusp,genestrand,
- sensedir,segment->chrnum,segment->chroffset,
- segment->chrhigh,segment->chrlength)) != NULL) {
- debug4e(printf("=> %s antidonor: %f at %d (%d mismatches)\n",
- plusp == true ? "plus" : "minus",prob,Substring_chimera_pos(hit),nmismatches));
- debug4e(printf("q: %s\ng: %s\n",queryptr,gbuffer));
- (*distant_antidonors)[nmismatches] = List_push((*distant_antidonors)[nmismatches],(void *) hit);
+ }
+ }
+ }
+
+ i++;
+ }
+
+
+ /* Splicing originally on minus strand. Complement */
+ sensedir = (plusp == true) ? SENSE_ANTI : SENSE_FORWARD;
+
+ if (novelsplicingp && segment_left + splice_pos_start >= ACCEPTOR_MODEL_RIGHT_MARGIN) {
+ assert(splice_pos_start > 0);
+ assert(splice_pos_end < querylength - 1);
+ antiacceptori_nsites = Genome_antiacceptor_positions(positions_alloc,knowni_alloc,
+ segment_antiacceptor_knownpos,segment_antiacceptor_knowni,
+ segment_left,splice_pos_start,splice_pos_end);
+ antiacceptori_positions = positions_alloc;
+ antiacceptori_knowni = knowni_alloc;
+ debug4e(
+ printf("Antiacceptor dinucleotides:");
+ for (i = 0; i < antiacceptori_nsites; i++) {
+ printf(" %d",antiacceptori_positions[i]);
+ }
+ printf("\n");
+ );
+ } else {
+ antiacceptori_nsites = segment_antiacceptor_nknown;
+ antiacceptori_positions = segment_antiacceptor_knownpos;
+ antiacceptori_knowni = segment_antiacceptor_knowni;
+ }
+
+ i = 0;
+ nmismatches = 0;
+ while (i < antiacceptori_nsites && nmismatches <= max_mismatches_allowed) {
+ splice_pos = antiacceptori_positions[i];
+ while (nmismatches < nmismatches_left && mismatch_positions[nmismatches] < splice_pos) { /* Changed from <= to < */
+ debug4e(printf(" mismatch at %d\n",mismatch_positions[nmismatches]));
+ nmismatches++;
+ }
+ debug4e(printf(" splice pos %d, nmismatches %d\n",splice_pos,nmismatches));
+#if 0
+ assert(nmismatches == Genome_count_mismatches_substring(query_compress,segment_left,/*pos5*/0,/*pos3*/splice_pos,
+ plusp,genestrand,first_read_p));
+#endif
+ if (nmismatches <= max_mismatches_allowed) {
+ if (antiacceptori_knowni[i] >= 0) {
+ debug4e(printf("Known antiacceptor for segment at %llu, splice_pos %d (%d mismatches), stopi = %d\n",
+ (unsigned long long) segment_left,splice_pos,nmismatches,splice_pos_end));
+ if ((hit = Substring_new_acceptor(/*acceptor_coord*/segment_left + splice_pos,/*acceptor_knowni*/antiacceptori_knowni[i],
+ splice_pos,/*substring_querystart*/segment->querypos5,
+ /*substring_queryend*/last_querypos,
+ nmismatches,/*prob*/2.0,/*left*/segment_left,query_compress,
+ querylength,plusp,genestrand,
+ sensedir,segment->chrnum,segment->chroffset,
+ segment->chrhigh,segment->chrlength)) != NULL) {
+ debug4e(printf("=> %s antiacceptor : %f at %d (%d mismatches) %d..%d\n",
+ plusp == true ? "plus" : "minus",Maxent_hr_antiacceptor_prob(segment_left + splice_pos,segment->chroffset),
+ Substring_chimera_pos(hit),nmismatches,Substring_querystart(hit),Substring_queryend(hit)));
+ debug4e(printf("q: %s\ng: %s\n",queryptr,gbuffer));
+ (*distant_antiacceptors)[nmismatches] = List_push((*distant_antiacceptors)[nmismatches],(void *) hit);
+ }
+
+ } else {
+ prob = Maxent_hr_antiacceptor_prob(segment_left + splice_pos,segment->chroffset);
+ debug4e(printf("splice pos %d, nmismatches %d, prob %f, sufficient %d\n",
+ splice_pos,nmismatches,prob,sufficient_splice_prob_distant(splice_pos,nmismatches,prob)));
+ if (sufficient_splice_prob_distant(/*support*/splice_pos,nmismatches,prob)) {
+ debug4e(printf("Novel antiacceptor for segment at %llu, splice_pos %d (%d mismatches), stopi = %d\n",
+ (unsigned long long) segment_left,splice_pos,nmismatches,splice_pos_end));
+ if ((hit = Substring_new_acceptor(/*acceptor_coord*/segment_left + splice_pos,/*acceptor_knowni*/-1,
+ splice_pos,/*substring_querystart*/segment->querypos5,
+ /*substring_queryend*/last_querypos,
+ nmismatches,prob,/*left*/segment_left,query_compress,
+ querylength,plusp,genestrand,
+ sensedir,segment->chrnum,segment->chroffset,
+ segment->chrhigh,segment->chrlength)) != NULL) {
+ debug4e(printf("=> %s antiacceptor : %f at %d (%d mismatches) %d..%d\n",
+ plusp == true ? "plus" : "minus",prob,Substring_chimera_pos(hit),nmismatches,
+ Substring_querystart(hit),Substring_queryend(hit)));
+ debug4e(printf("q: %s\ng: %s\n",queryptr,gbuffer));
+ (*distant_antiacceptors)[nmismatches] = List_push((*distant_antiacceptors)[nmismatches],(void *) hit);
+ }
+ }
+ }
+ }
+
+ i++;
+ }
+ }
+
+
+ /* Find splices on genomic left */
+ if (plusp) {
+ /* ? require that floors_to_pos3[segment->querypos3] <= max_mismatches_allowed */
+ if (segment->querypos5 > index1part &&
+ (last_querypos > query_lastpos || segment->spliceable_high_p == true)) {
+ /* genomic right anchor or middle anchor */
+ debug4e(printf("Searching genomic left: plus genomic right anchor or middle anchor\n"));
+ nmismatches_right = Genome_mismatches_right(mismatch_positions,max_mismatches_allowed,
+ query_compress,/*left*/segment_left,
+ /*pos5*/0,/*pos3*/last_querypos,
+ plusp,genestrand);
+ debug4e(
+ printf("%d mismatches on right (%d allowed) at:",nmismatches_right,max_mismatches_allowed);
+ for (i = 0; i <= nmismatches_right; i++) {
+ printf(" %d",mismatch_positions[i]);
+ }
+ printf("\n");
+ );
+
+ splice_pos_end = last_querypos - 1 - 1;
+ if (nmismatches_right <= max_mismatches_allowed) {
+ splice_pos_start = 1;
+ } else if ((splice_pos_start = mismatch_positions[nmismatches_right-1]) < 1) {
+ splice_pos_start = 1;
+ }
+
+ } else {
+ /* genomic left anchor or full anchor */
+ debug4e(printf("Searching genomic left: plus genomic left anchor or full anchor\n"));
+ splice_pos_start = querylength;
+ splice_pos_end = 0;
+ }
+
+ } else {
+ /* ? require that floors_from_neg3[segment->querypos5] <= max_mismatches_allowed */
+ if (last_querypos < query_lastpos &&
+ (segment->querypos5 < index1part || segment->spliceable_high_p == true)) {
+ /* genomic right anchor or middle anchor*/
+ debug4e(printf("Searching genomic left: minus genomic right anchor or middle anchor\n"));
+ nmismatches_right = Genome_mismatches_right(mismatch_positions,max_mismatches_allowed,
+ query_compress,/*left*/segment_left,
+ /*pos5*/0,/*pos3*/querylength - segment->querypos5,
+ plusp,genestrand);
+ debug4e(
+ printf("%d mismatches on right (%d allowed) at:",nmismatches_right,max_mismatches_allowed);
+ for (i = 0; i <= nmismatches_right; i++) {
+ printf(" %d",mismatch_positions[i]);
}
+ printf("\n");
+ );
+
+ splice_pos_end = querylength - segment->querypos5 - 1 - 1;
+ if (nmismatches_right <= max_mismatches_allowed) {
+ splice_pos_start = 1;
+ } else if ((splice_pos_start = mismatch_positions[nmismatches_right-1]) < 1) {
+ splice_pos_start = 1;
+ }
+
+ } else {
+ /* genomic left anchor or full anchor */
+ debug4e(printf("Searching genomic left: minus genomic left anchor or full anchor\n"));
+ splice_pos_start = querylength;
+ splice_pos_end = 0;
+ }
+ }
+
+ if (splice_pos_start <= splice_pos_end) {
+ debug4e(printf("Search for splice sites from %d down to %d\n",splice_pos_end,splice_pos_start));
+
+ segment_acceptor_nknown = 0;
+ segment_antidonor_nknown = 0;
+ if ((j = segment->splicesites_i) >= 0) {
+ /* Known splicing */
+ while (j < nsplicesites && splicesites[j] <= segment_left + splice_pos_end) { /* Needs to be <= */
+ if (splicetypes[j] == ACCEPTOR) {
+ debug4k(printf("Setting known acceptor %d for segment at %llu\n",j,(unsigned long long) splicesites[j]));
+ segment_acceptor_knownpos[segment_acceptor_nknown] = splicesites[j] - segment_left;
+ segment_acceptor_knowni[segment_acceptor_nknown++] = j;
+ } else if (splicetypes[j] == ANTIDONOR) {
+ debug4k(printf("Setting known antidonor %d for segment at %llu\n",j,(unsigned long long) splicesites[j]));
+ segment_antidonor_knownpos[segment_antidonor_nknown] = splicesites[j] - segment_left;
+ segment_antidonor_knowni[segment_antidonor_nknown++] = j;
+ }
+ j++;
+ }
+ }
+ segment_acceptor_knownpos[segment_acceptor_nknown] = querylength;
+ segment_antidonor_knownpos[segment_antidonor_nknown] = querylength;
+
+ /* Splicing originally on plus strand. No complement. */
+ sensedir = (plusp == true) ? SENSE_FORWARD : SENSE_ANTI;
+
+ if (novelsplicingp && segment_left + splice_pos_start >= ACCEPTOR_MODEL_LEFT_MARGIN) {
+ assert(splice_pos_start > 0);
+ assert(splice_pos_end < querylength - 1);
+ acceptorj_nsites = Genome_acceptor_positions(positions_alloc,knowni_alloc,
+ segment_acceptor_knownpos,segment_acceptor_knowni,
+ segment_left,splice_pos_start,splice_pos_end);
+ acceptorj_positions = positions_alloc;
+ acceptorj_knowni = knowni_alloc;
+ debug4e(
+ printf("Acceptor dinucleotides:");
+ for (i = 0; i < acceptorj_nsites; i++) {
+ printf(" %d",acceptorj_positions[i]);
+ }
+ printf("\n");
+ );
+ } else {
+ acceptorj_nsites = segment_acceptor_nknown;
+ acceptorj_positions = segment_acceptor_knownpos;
+ acceptorj_knowni = segment_acceptor_knowni;
+ }
+
+ i = acceptorj_nsites - 1;
+ nmismatches = 0;
+ while (i >= 0 && nmismatches <= max_mismatches_allowed) {
+ splice_pos = acceptorj_positions[i];
+ while (nmismatches < nmismatches_right && mismatch_positions[nmismatches] >= splice_pos) { /* Must be >= */
+ debug4e(printf(" mismatch at %d\n",mismatch_positions[nmismatches]));
+ nmismatches++;
+ }
+ debug4e(printf(" splice pos %d, nmismatches %d\n",splice_pos,nmismatches));
+#if 0
+ assert(nmismatches == Genome_count_mismatches_substring(query_compress,segment_left,/*pos5*/splice_pos,/*pos3*/querylength,
+ plusp,genestrand,first_read_p));
+#endif
+ if (nmismatches <= max_mismatches_allowed) {
+ if (acceptorj_knowni[i] >= 0) {
+ debug4e(printf("Known acceptor for segment at %llu, splice_pos %d (%d mismatches), stopi = %d\n",
+ (unsigned long long) segment_left,splice_pos,nmismatches,splice_pos_start));
+ if ((hit = Substring_new_acceptor(/*acceptor_coord*/segment_left + splice_pos,/*acceptor_knowni*/acceptorj_knowni[i],
+ splice_pos,/*substring_querystart*/segment->querypos5,
+ /*substring_queryend*/last_querypos,
+ nmismatches,/*prob*/2.0,/*left*/segment_left,query_compress,
+ querylength,plusp,genestrand,
+ sensedir,segment->chrnum,segment->chroffset,
+ segment->chrhigh,segment->chrlength)) != NULL) {
+ debug4e(printf("=> %s acceptor: %f at %d (%d mismatches) %d..%d\n",
+ plusp == true ? "plus" : "minus",Maxent_hr_acceptor_prob(segment_left + splice_pos,segment->chroffset),
+ Substring_chimera_pos(hit),nmismatches,Substring_querystart(hit),Substring_queryend(hit)));
+ debug4e(printf("q: %s\ng: %s\n",queryptr,gbuffer));
+ (*distant_acceptors)[nmismatches] = List_push((*distant_acceptors)[nmismatches],(void *) hit);
+ }
+
+ } else {
+ prob = Maxent_hr_acceptor_prob(segment_left + splice_pos,segment->chroffset);
+ debug4e(printf("splice pos %d, nmismatches %d, prob %f, sufficient %d\n",
+ splice_pos,nmismatches,prob,sufficient_splice_prob_distant(querylength - splice_pos,nmismatches,prob)));
+ if (sufficient_splice_prob_distant(/*support*/querylength - splice_pos,nmismatches,prob)) {
+ debug4e(printf("Novel acceptor for segment at %llu, splice_pos %d (%d mismatches), stopi = %d\n",
+ (unsigned long long) segment_left,splice_pos,nmismatches,splice_pos_start));
+ if ((hit = Substring_new_acceptor(/*acceptor_coord*/segment_left + splice_pos,/*acceptor_knowni*/-1,
+ splice_pos,/*substring_querystart*/segment->querypos5,
+ /*substring_queryend*/last_querypos,
+ nmismatches,prob,/*left*/segment_left,query_compress,
+ querylength,plusp,genestrand,
+ sensedir,segment->chrnum,segment->chroffset,
+ segment->chrhigh,segment->chrlength)) != NULL) {
+ debug4e(printf("=> %s acceptor: %f at %d (%d mismatches) %d..%d\n",
+ plusp == true ? "plus" : "minus",prob,Substring_chimera_pos(hit),nmismatches,
+ Substring_querystart(hit),Substring_queryend(hit)));
+ debug4e(printf("q: %s\ng: %s\n",queryptr,gbuffer));
+ (*distant_acceptors)[nmismatches] = List_push((*distant_acceptors)[nmismatches],(void *) hit);
}
}
}
+ }
+
+ i--;
+ }
+
+
+ /* Splicing originally on minus strand. Complement. */
+ sensedir = (plusp == true) ? SENSE_ANTI : SENSE_FORWARD;
+
+ if (novelsplicingp && segment_left + splice_pos_start >= DONOR_MODEL_RIGHT_MARGIN) {
+ assert(splice_pos_start > 0);
+ assert(splice_pos_end < querylength - 1);
+ antidonorj_nsites = Genome_antidonor_positions(positions_alloc,knowni_alloc,
+ segment_antidonor_knownpos,segment_antidonor_knowni,
+ segment_left,splice_pos_start,splice_pos_end);
+ antidonorj_positions = positions_alloc;
+ antidonorj_knowni = knowni_alloc;
+ debug4e(
+ printf("Antidonor dinucleotides:");
+ for (i = 0; i < antidonorj_nsites; i++) {
+ printf(" %d",antidonorj_positions[i]);
+ }
+ printf("\n");
+ );
+ } else {
+ antidonorj_nsites = segment_antidonor_nknown;
+ antidonorj_positions = segment_antidonor_knownpos;
+ antidonorj_knowni = segment_antidonor_knowni;
+ }
- i--;
+ i = antidonorj_nsites - 1;
+ nmismatches = 0;
+ while (i >= 0 && nmismatches <= max_mismatches_allowed) {
+ splice_pos = antidonorj_positions[i];
+ while (nmismatches < nmismatches_right && mismatch_positions[nmismatches] >= splice_pos) { /* Must be >= */
+ debug4e(printf(" mismatch at %d\n",mismatch_positions[nmismatches]));
+ nmismatches++;
}
+ debug4e(printf(" splice pos %d, nmismatches %d\n",splice_pos,nmismatches));
+#if 0
+ assert(nmismatches == Genome_count_mismatches_substring(query_compress,segment_left,/*pos5*/splice_pos,/*pos3*/querylength,
+ plusp,genestrand,first_read_p));
+#endif
+ if (nmismatches <= max_mismatches_allowed) {
+ if (antidonorj_knowni[i] >= 0) {
+ debug4e(printf("Known antidonor for segmenti at %llu, splice_pos %d (%d mismatches), stopi = %d\n",
+ (unsigned long long) segment_left,splice_pos,nmismatches,splice_pos_start));
+ if ((hit = Substring_new_donor(/*donor_coord*/segment_left + splice_pos,/*donor_knowni*/antidonorj_knowni[i],
+ splice_pos,/*substring_querystart*/segment->querypos5,
+ /*substring_queryend*/last_querypos,
+ nmismatches,/*prob*/2.0,/*left*/segment_left,query_compress,
+ querylength,plusp,genestrand,
+ sensedir,segment->chrnum,segment->chroffset,
+ segment->chrhigh,segment->chrlength)) != NULL) {
+ debug4e(printf("=> %s antidonor: %f at %d (%d mismatches) %d..%d\n",
+ plusp == true ? "plus" : "minus",Maxent_hr_antidonor_prob(segment_left + splice_pos,segment->chroffset),
+ Substring_chimera_pos(hit),nmismatches,Substring_querystart(hit),Substring_queryend(hit)));
+ debug4e(printf("q: %s\ng: %s\n",queryptr,gbuffer));
+ (*distant_antidonors)[nmismatches] = List_push((*distant_antidonors)[nmismatches],(void *) hit);
+ }
+
+ } else {
+ prob = Maxent_hr_antidonor_prob(segment_left + splice_pos,segment->chroffset);
+ debug4e(printf("splice pos %d, nmismatches %d, prob %f, sufficient %d\n",
+ splice_pos,nmismatches,prob,sufficient_splice_prob_distant(querylength - splice_pos,nmismatches,prob)));
+ if (sufficient_splice_prob_distant(/*support*/querylength - splice_pos,nmismatches,prob)) {
+ debug4e(printf("Novel antidonor for segmenti at %llu, splice_pos %d (%d mismatches), stopi = %d\n",
+ (unsigned long long) segment_left,splice_pos,nmismatches,splice_pos_start));
+ if ((hit = Substring_new_donor(/*donor_coord*/segment_left + splice_pos,/*donor_knowni*/-1,
+ splice_pos,/*substring_querystart*/segment->querypos5,
+ /*substring_queryend*/last_querypos,
+ nmismatches,prob,/*left*/segment_left,query_compress,
+ querylength,plusp,genestrand,
+ sensedir,segment->chrnum,segment->chroffset,
+ segment->chrhigh,segment->chrlength)) != NULL) {
+ debug4e(printf("=> %s antidonor: %f at %d (%d mismatches) %d..%d\n",
+ plusp == true ? "plus" : "minus",prob,Substring_chimera_pos(hit),nmismatches,
+ Substring_querystart(hit),Substring_queryend(hit)));
+ debug4e(printf("q: %s\ng: %s\n",queryptr,gbuffer));
+ (*distant_antidonors)[nmismatches] = List_push((*distant_antidonors)[nmismatches],(void *) hit);
+ }
+ }
+ }
+ }
+
+ i--;
}
}
}
@@ -12331,8 +13233,11 @@ find_splicepairs_distant_dna (int *found_score, int *ndistantsplicepairs,
while (p != NULL && q != NULL /* && *nsplicepairs <= MAXCHIMERAPATHS */) {
startfrag = (Substring_T) p->first;
endfrag = (Substring_T) q->first;
- debug4ld(printf("end1-end2: startfrag at %llu and endfrag at %llu\n",
- (unsigned long long) Substring_genomicstart(startfrag),(unsigned long long) Substring_genomicstart(endfrag)));
+ debug4ld(printf("end1-end2: startfrag at %llu %d..%d and endfrag at %llu %d..%d\n",
+ (unsigned long long) Substring_genomicstart(startfrag),
+ Substring_querystart(startfrag),Substring_queryend(startfrag),
+ (unsigned long long) Substring_genomicstart(endfrag),
+ Substring_querystart(endfrag),Substring_queryend(endfrag)));
if ((pos = Substring_chimera_pos(startfrag)) < min_endlength_1) {
debug4ld(printf("chimera_pos of startfrag < min_endlength_1\n"));
@@ -12423,9 +13328,11 @@ find_splicepairs_distant_dna (int *found_score, int *ndistantsplicepairs,
while (p != NULL && q != NULL /* && *nsplicepairs <= MAXCHIMERAPATHS */) {
startfrag = (Substring_T) p->first;
endfrag = (Substring_T) q->first;
- debug4ld(printf("end3-end4: startfrag at %llu and endfrag at %llu\n",
+ debug4ld(printf("end3-end4: startfrag at %llu %d..%d and endfrag at %llu %d..%d\n",
(unsigned long long) Substring_genomicstart(startfrag),
- (unsigned long long) Substring_genomicstart(endfrag)));
+ Substring_querystart(startfrag),Substring_queryend(startfrag),
+ (unsigned long long) Substring_genomicstart(endfrag),
+ Substring_querystart(endfrag),Substring_queryend(endfrag)));
if ((pos = Substring_chimera_pos(startfrag)) < min_endlength_1) {
debug4ld(printf("chimera_pos of startfrag < min_endlength_1\n"));
@@ -12521,9 +13428,11 @@ find_splicepairs_distant_dna (int *found_score, int *ndistantsplicepairs,
while (p != NULL && q != NULL && *ndistantsplicepairs <= MAXCHIMERAPATHS) {
startfrag = (Substring_T) p->first;
endfrag = (Substring_T) q->first;
- debug4ld(printf("end1-end4: startfrag at %llu and endfrag at %llu\n",
+ debug4ld(printf("end1-end4: startfrag at %llu %d..%d and endfrag at %llu %d..%d\n",
(unsigned long long) Substring_genomicstart(startfrag),
- (unsigned long long) Substring_genomicstart(endfrag)));
+ Substring_querystart(startfrag),Substring_queryend(startfrag),
+ (unsigned long long) Substring_genomicstart(endfrag),
+ Substring_querystart(endfrag),Substring_queryend(endfrag)));
if ((pos = Substring_chimera_pos(startfrag)) < min_endlength_1) {
debug4ld(printf("chimera_pos of startfrag < min_endlength_1\n"));
@@ -12583,10 +13492,12 @@ find_splicepairs_distant_dna (int *found_score, int *ndistantsplicepairs,
while (p != NULL && q != NULL && *ndistantsplicepairs <= MAXCHIMERAPATHS) {
startfrag = (Substring_T) p->first;
endfrag = (Substring_T) q->first;
- debug4ld(printf("end3-end2: startfrag at %llu and endfrag at %llu\n",
+ debug4ld(printf("end3-end2: startfrag at %llu %d..%d and endfrag at %llu %d..%d\n",
(unsigned long long) Substring_genomicstart(startfrag),
- (unsigned long long) Substring_genomicstart(endfrag)));
-
+ Substring_querystart(startfrag),Substring_queryend(startfrag),
+ (unsigned long long) Substring_genomicstart(endfrag),
+ Substring_querystart(endfrag),Substring_queryend(endfrag)));
+
if ((pos = Substring_chimera_pos(startfrag)) < min_endlength_1) {
debug4ld(printf("chimera_pos of startfrag < min_endlength_1\n"));
p = p->rest;
@@ -12709,8 +13620,11 @@ find_splicepairs_distant_rna (int *found_score, int *ndistantsplicepairs,
while (p != NULL && q != NULL /* && *nsplicepairs <= MAXCHIMERAPATHS */) {
donor = (Substring_T) p->first;
acceptor = (Substring_T) q->first;
- debug4ld(printf("end1-end2: donor at %llu and acceptor at %llu\n",
- (unsigned long long) Substring_genomicstart(donor),(unsigned long long) Substring_genomicstart(acceptor)));
+ debug4ld(printf("end1-end2: donor at %llu %d..%d and acceptor at %llu %d..%d\n",
+ (unsigned long long) Substring_genomicstart(donor),
+ Substring_querystart(donor),Substring_queryend(donor),
+ (unsigned long long) Substring_genomicstart(acceptor),
+ Substring_querystart(acceptor),Substring_queryend(acceptor)));
if ((pos = Substring_chimera_pos(donor)) < min_endlength_1) {
debug4ld(printf("chimera_pos of donor < min_endlength_1\n"));
@@ -12801,9 +13715,11 @@ find_splicepairs_distant_rna (int *found_score, int *ndistantsplicepairs,
while (p != NULL && q != NULL /* && *nsplicepairs <= MAXCHIMERAPATHS */) {
donor = (Substring_T) p->first;
acceptor = (Substring_T) q->first;
- debug4ld(printf("end3-end4: donor at %llu and acceptor at %llu\n",
+ debug4ld(printf("end3-end4: donor at %llu %d..%d and acceptor at %llu %d..%d\n",
(unsigned long long) Substring_genomicstart(donor),
- (unsigned long long) Substring_genomicstart(acceptor)));
+ Substring_querystart(donor),Substring_queryend(donor),
+ (unsigned long long) Substring_genomicstart(acceptor),
+ Substring_querystart(acceptor),Substring_queryend(acceptor)));
if ((pos = Substring_chimera_pos(donor)) < min_endlength_1) {
debug4ld(printf("chimera_pos of donor < min_endlength_1\n"));
@@ -12891,9 +13807,11 @@ find_splicepairs_distant_rna (int *found_score, int *ndistantsplicepairs,
while (p != NULL && q != NULL /* && *nsplicepairs <= MAXCHIMERAPATHS */) {
donor = (Substring_T) p->first;
acceptor = (Substring_T) q->first;
- debug4ld(printf("end5-end6: donor at %llu and acceptor at %llu\n",
+ debug4ld(printf("end5-end6: donor at %llu %d..%d and acceptor at %llu %d..%d\n",
(unsigned long long) Substring_genomicstart(donor),
- (unsigned long long) Substring_genomicstart(acceptor)));
+ Substring_querystart(donor),Substring_queryend(donor),
+ (unsigned long long) Substring_genomicstart(acceptor),
+ Substring_querystart(acceptor),Substring_queryend(acceptor)));
if ((pos = Substring_chimera_pos(donor)) < min_endlength_2) {
debug4ld(printf("chimera_pos of donor < min_endlength_2\n"));
@@ -12982,9 +13900,11 @@ find_splicepairs_distant_rna (int *found_score, int *ndistantsplicepairs,
while (p != NULL && q != NULL /* && *nsplicepairs <= MAXCHIMERAPATHS */) {
donor = (Substring_T) p->first;
acceptor = (Substring_T) q->first;
- debug4ld(printf("end7-end8: donor at %llu and acceptor at %llu\n",
+ debug4ld(printf("end7-end8: donor at %llu %d..%d and acceptor at %llu %d..%d\n",
(unsigned long long) Substring_genomicstart(donor),
- (unsigned long long) Substring_genomicstart(acceptor)));
+ Substring_querystart(donor),Substring_queryend(donor),
+ (unsigned long long) Substring_genomicstart(acceptor),
+ Substring_querystart(acceptor),Substring_queryend(acceptor)));
if ((pos = Substring_chimera_pos(donor)) < min_endlength_2) {
debug4ld(printf("chimera_pos of donor < min_endlength_2\n"));
@@ -13078,9 +13998,11 @@ find_splicepairs_distant_rna (int *found_score, int *ndistantsplicepairs,
while (p != NULL && q != NULL && *ndistantsplicepairs <= MAXCHIMERAPATHS) {
donor = (Substring_T) p->first;
acceptor = (Substring_T) q->first;
- debug4ld(printf("end1-end4: donor at %llu and acceptor at %llu\n",
+ debug4ld(printf("end1-end4: donor at %llu %d..%d and acceptor at %llu %d..%d\n",
(unsigned long long) Substring_genomicstart(donor),
- (unsigned long long) Substring_genomicstart(acceptor)));
+ Substring_querystart(donor),Substring_queryend(donor),
+ (unsigned long long) Substring_genomicstart(acceptor),
+ Substring_querystart(acceptor),Substring_queryend(acceptor)));
if ((pos = Substring_chimera_pos(donor)) < min_endlength_1) {
debug4ld(printf("chimera_pos of donor < min_endlength_1\n"));
@@ -13140,9 +14062,11 @@ find_splicepairs_distant_rna (int *found_score, int *ndistantsplicepairs,
while (p != NULL && q != NULL && *ndistantsplicepairs <= MAXCHIMERAPATHS) {
donor = (Substring_T) p->first;
acceptor = (Substring_T) q->first;
- debug4ld(printf("end3-end2: donor at %llu and acceptor at %llu\n",
+ debug4ld(printf("end3-end2: donor at %llu %d..%d and acceptor at %llu %d..%d\n",
(unsigned long long) Substring_genomicstart(donor),
- (unsigned long long) Substring_genomicstart(acceptor)));
+ Substring_querystart(donor),Substring_queryend(donor),
+ (unsigned long long) Substring_genomicstart(acceptor),
+ Substring_querystart(acceptor),Substring_queryend(acceptor)));
if ((pos = Substring_chimera_pos(donor)) < min_endlength_1) {
debug4ld(printf("chimera_pos of donor < min_endlength_1\n"));
@@ -13203,9 +14127,11 @@ find_splicepairs_distant_rna (int *found_score, int *ndistantsplicepairs,
while (p != NULL && q != NULL && *ndistantsplicepairs <= MAXCHIMERAPATHS) {
donor = (Substring_T) p->first;
acceptor = (Substring_T) q->first;
- debug4ld(printf("end5-end8: donor at %llu and acceptor at %llu\n",
+ debug4ld(printf("end5-end8: donor at %llu %d..%d and acceptor at %llu %d..%d\n",
(unsigned long long) Substring_genomicstart(donor),
- (unsigned long long) Substring_genomicstart(acceptor)));
+ Substring_querystart(donor),Substring_queryend(donor),
+ (unsigned long long) Substring_genomicstart(acceptor),
+ Substring_querystart(acceptor),Substring_queryend(acceptor)));
if ((pos = Substring_chimera_pos(donor)) < min_endlength_2) {
debug4ld(printf("chimera_pos of donor < min_endlength_2\n"));
@@ -13265,9 +14191,11 @@ find_splicepairs_distant_rna (int *found_score, int *ndistantsplicepairs,
while (p != NULL && q != NULL && *ndistantsplicepairs <= MAXCHIMERAPATHS) {
donor = (Substring_T) p->first;
acceptor = (Substring_T) q->first;
- debug4ld(printf("end7-end6: donor at %llu and acceptor at %llu\n",
+ debug4ld(printf("end7-end6: donor at %llu %d..%d and acceptor at %llu %d..%d\n",
(unsigned long long) Substring_genomicstart(donor),
- (unsigned long long) Substring_genomicstart(acceptor)));
+ Substring_querystart(donor),Substring_queryend(donor),
+ (unsigned long long) Substring_genomicstart(acceptor),
+ Substring_querystart(acceptor),Substring_queryend(acceptor)));
if ((pos = Substring_chimera_pos(donor)) < min_endlength_2) {
debug4ld(printf("chimera_pos of donor < min_endlength_2\n"));
@@ -13440,8 +14368,8 @@ find_splicepairs_shortend (int *found_score, List_T hits,
bestj = Intlist_head(splicesites_i);
bestleft = splicesites[bestj] - support;
if ((acceptor = Substring_new_acceptor(/*acceptor_coord*/splicesites[bestj],/*acceptor_knowni*/bestj,
- Substring_chimera_pos(donor),nmismatches_shortend,
- /*prob*/2.0,/*left*/bestleft,query_compress_fwd,
+ Substring_chimera_pos(donor),/*substring_querystart*/0,/*substring_queryend*/querylength,
+ nmismatches_shortend,/*prob*/2.0,/*left*/bestleft,query_compress_fwd,
querylength,/*plusp*/true,genestrand,/*sensedir*/SENSE_FORWARD,
Substring_chrnum(donor),Substring_chroffset(donor),
Substring_chrhigh(donor),Substring_chrlength(donor))) != NULL) {
@@ -13516,8 +14444,8 @@ find_splicepairs_shortend (int *found_score, List_T hits,
bestj = Intlist_head(splicesites_i);
bestleft = splicesites[bestj] - endlength;
if ((donor = Substring_new_donor(/*donor_coord*/splicesites[bestj],/*donor_knowni*/bestj,
- Substring_chimera_pos(acceptor),nmismatches_shortend,
- /*prob*/2.0,/*left*/bestleft,query_compress_fwd,
+ Substring_chimera_pos(acceptor),/*substring_querystart*/0,/*substring_queryend*/querylength,
+ nmismatches_shortend,/*prob*/2.0,/*left*/bestleft,query_compress_fwd,
querylength,/*plusp*/true,genestrand,/*sensedir*/SENSE_FORWARD,
Substring_chrnum(acceptor),Substring_chroffset(acceptor),
Substring_chrhigh(acceptor),Substring_chrlength(acceptor))) != NULL) {
@@ -13592,8 +14520,9 @@ find_splicepairs_shortend (int *found_score, List_T hits,
bestj = Intlist_head(splicesites_i);
bestleft = splicesites[bestj] - endlength;
if ((acceptor = Substring_new_acceptor(/*acceptor_coord*/splicesites[bestj],/*acceptor_knowni*/bestj,
- querylength-Substring_chimera_pos(donor),nmismatches_shortend,
- /*prob*/2.0,/*left*/bestleft,query_compress_rev,
+ querylength-Substring_chimera_pos(donor),
+ /*substring_querystart*/0,/*substring_queryend*/querylength,
+ nmismatches_shortend,/*prob*/2.0,/*left*/bestleft,query_compress_rev,
querylength,/*plusp*/false,genestrand,/*sensedir*/SENSE_FORWARD,
Substring_chrnum(donor),Substring_chroffset(donor),
Substring_chrhigh(donor),Substring_chrlength(donor))) != NULL) {
@@ -13669,8 +14598,9 @@ find_splicepairs_shortend (int *found_score, List_T hits,
bestj = Intlist_head(splicesites_i);
bestleft = splicesites[bestj] - support;
if ((donor = Substring_new_donor(/*donor_coord*/splicesites[bestj],/*donor_knowni*/bestj,
- querylength-Substring_chimera_pos(acceptor),nmismatches_shortend,
- /*prob*/2.0,/*left*/bestleft,query_compress_rev,
+ querylength-Substring_chimera_pos(acceptor),
+ /*substring_querystart*/0,/*substring_queryend*/querylength,
+ nmismatches_shortend,/*prob*/2.0,/*left*/bestleft,query_compress_rev,
querylength,/*plusp*/false,genestrand,/*sensedir*/SENSE_FORWARD,
Substring_chrnum(acceptor),Substring_chroffset(acceptor),
Substring_chrhigh(acceptor),Substring_chrlength(acceptor))) != NULL) {
@@ -13745,8 +14675,9 @@ find_splicepairs_shortend (int *found_score, List_T hits,
bestj = Intlist_head(splicesites_i);
bestleft = splicesites[bestj] - endlength;
if ((acceptor = Substring_new_acceptor(/*acceptor_coord*/splicesites[bestj],/*acceptor_knowni*/bestj,
- Substring_chimera_pos(donor),nmismatches_shortend,
- /*prob*/2.0,/*left*/bestleft,query_compress_fwd,
+ Substring_chimera_pos(donor),
+ /*substring_querystart*/0,/*substring_queryend*/querylength,
+ nmismatches_shortend,/*prob*/2.0,/*left*/bestleft,query_compress_fwd,
querylength,/*plusp*/true,genestrand,/*sensedir*/SENSE_ANTI,
Substring_chrnum(donor),Substring_chroffset(donor),
Substring_chrhigh(donor),Substring_chrlength(donor))) != NULL) {
@@ -13822,8 +14753,9 @@ find_splicepairs_shortend (int *found_score, List_T hits,
bestj = Intlist_head(splicesites_i);
bestleft = splicesites[bestj] - support;
if ((donor = Substring_new_donor(/*donor_coord*/splicesites[bestj],/*donor_knowni*/bestj,
- Substring_chimera_pos(acceptor),nmismatches_shortend,
- /*prob*/2.0,/*left*/bestleft,query_compress_fwd,
+ Substring_chimera_pos(acceptor),
+ /*substring_querystart*/0,/*substring_queryend*/querylength,
+ nmismatches_shortend,/*prob*/2.0,/*left*/bestleft,query_compress_fwd,
querylength,/*plusp*/true,genestrand,/*sensedir*/SENSE_ANTI,
Substring_chrnum(acceptor),Substring_chroffset(acceptor),
Substring_chrhigh(acceptor),Substring_chrlength(acceptor))) != NULL) {
@@ -13899,8 +14831,9 @@ find_splicepairs_shortend (int *found_score, List_T hits,
bestj = Intlist_head(splicesites_i);
bestleft = splicesites[bestj] - support;
if ((acceptor = Substring_new_acceptor(/*acceptor_coord*/splicesites[bestj],/*acceptor_knowni*/bestj,
- querylength-Substring_chimera_pos(donor),nmismatches_shortend,
- /*prob*/2.0,/*left*/bestleft,query_compress_rev,
+ querylength-Substring_chimera_pos(donor),
+ /*substring_querystart*/0,/*substring_queryend*/querylength,
+ nmismatches_shortend,/*prob*/2.0,/*left*/bestleft,query_compress_rev,
querylength,/*plusp*/false,genestrand,/*sensedir*/SENSE_ANTI,
Substring_chrnum(donor),Substring_chroffset(donor),
Substring_chrhigh(donor),Substring_chrlength(donor))) != NULL) {
@@ -13975,8 +14908,9 @@ find_splicepairs_shortend (int *found_score, List_T hits,
bestj = Intlist_head(splicesites_i);
bestleft = splicesites[bestj] - endlength;
if ((donor = Substring_new_donor(/*donor_coord*/splicesites[bestj],/*donor_knowni*/bestj,
- querylength-Substring_chimera_pos(acceptor),nmismatches_shortend,
- /*prob*/2.0,/*left*/bestleft,query_compress_rev,
+ querylength-Substring_chimera_pos(acceptor),
+ /*substring_querystart*/0,/*substring_queryend*/querylength,
+ nmismatches_shortend,/*prob*/2.0,/*left*/bestleft,query_compress_rev,
querylength,/*plusp*/false,genestrand,/*sensedir*/SENSE_ANTI,
Substring_chrnum(acceptor),Substring_chroffset(acceptor),
Substring_chrhigh(acceptor),Substring_chrlength(acceptor))) != NULL) {
diff --git a/src/stage3.c b/src/stage3.c
index b13c3ab..ef171c2 100644
--- a/src/stage3.c
+++ b/src/stage3.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: stage3.c 195553 2016-08-02 17:32:39Z twu $";
+static char rcsid[] = "$Id: stage3.c 195963 2016-08-08 16:38:05Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -12404,7 +12404,7 @@ Stage3_compute (int *cdna_direction, int *sensedir, List_T *finalpairs1, int *np
int sense_try, int sense_filter,
Oligoindex_array_T oligoindices_minor, Diagpool_T diagpool, Cellpool_T cellpool) {
struct Pair_T *pairarray1;
- List_T p;
+ List_T pairs_fwd_copy, pairs_rev_copy, p;
Chrpos_T *last_genomedp5_fwd = NULL, *last_genomedp3_fwd = NULL, *last_genomedp5_rev = NULL, *last_genomedp3_rev = NULL;
List_T pairs_pretrim, pairs_fwd, pairs_rev, best_pairs, temp_pairs, path_fwd, path_rev, best_path, temp_path;
List_T copy;
@@ -12921,10 +12921,13 @@ Stage3_compute (int *cdna_direction, int *sensedir, List_T *finalpairs1, int *np
*cdna_direction = +1;
*sensedir = SENSE_FORWARD;
+
+ /* path_trim alters pairs_fwd, so make a copy in case we use it for pairs_pretrim */
+ pairs_fwd_copy = Pairpool_copy(pairs_fwd,pairpool);
*finalpairs1 = path_trim(defect_rate_fwd,&(*ambig_end_length_5_1),&(*ambig_end_length_3_1),
&(*ambig_splicetype_5_1),&(*ambig_splicetype_3_1),
&(*ambig_prob_5_1),&(*ambig_prob_3_1),
- pairs_fwd,&(*cdna_direction),watsonp,
+ pairs_fwd_copy,&(*cdna_direction),watsonp,
jump_late_p,querylength,
#ifdef GSNAP
&(*sensedir),
@@ -12942,10 +12945,11 @@ Stage3_compute (int *cdna_direction, int *sensedir, List_T *finalpairs1, int *np
*cdna_direction = -1;
*sensedir = SENSE_ANTI;
+ pairs_rev_copy = Pairpool_copy(pairs_rev,pairpool);
*finalpairs2 = path_trim(defect_rate_rev,&(*ambig_end_length_5_2),&(*ambig_end_length_3_2),
&(*ambig_splicetype_5_2),&(*ambig_splicetype_3_2),
&(*ambig_prob_5_2),&(*ambig_prob_3_2),
- pairs_rev,&(*cdna_direction),watsonp,
+ pairs_rev_copy,&(*cdna_direction),watsonp,
jump_late_p,querylength,
#ifdef GSNAP
&(*sensedir),
@@ -13044,6 +13048,7 @@ Stage3_compute (int *cdna_direction, int *sensedir, List_T *finalpairs1, int *np
#endif
}
+ /* Okay for path_trim to alter pairs_pretrim */
*finalpairs1 = path_trim(defect_rate,&(*ambig_end_length_5_1),&(*ambig_end_length_3_1),
&(*ambig_splicetype_5_1),&(*ambig_splicetype_3_1),
&(*ambig_prob_5_1),&(*ambig_prob_3_1),
diff --git a/src/stage3.h b/src/stage3.h
index ba18dae..4707bc3 100644
--- a/src/stage3.h
+++ b/src/stage3.h
@@ -1,4 +1,4 @@
-/* $Id: stage3.h 193899 2016-07-12 04:41:34Z twu $ */
+/* $Id: stage3.h 193876 2016-07-12 02:46:04Z twu $ */
#ifndef STAGE3_INCLUDED
#define STAGE3_INCLUDED
diff --git a/src/stage3hr.c b/src/stage3hr.c
index 006780a..d5fba8b 100644
--- a/src/stage3hr.c
+++ b/src/stage3hr.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: stage3hr.c 195553 2016-08-02 17:32:39Z twu $";
+static char rcsid[] = "$Id: stage3hr.c 195760 2016-08-04 00:12:04Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
diff --git a/src/stage3hr.h b/src/stage3hr.h
index 5278244..15cbc60 100644
--- a/src/stage3hr.h
+++ b/src/stage3hr.h
@@ -1,4 +1,4 @@
-/* $Id: stage3hr.h 193887 2016-07-12 03:23:17Z twu $ */
+/* $Id: stage3hr.h 195760 2016-08-04 00:12:04Z twu $ */
#ifndef STAGE3HR_INCLUDED
#define STAGE3HR_INCLUDED
diff --git a/src/substring.c b/src/substring.c
index aaa657b..5ec012f 100644
--- a/src/substring.c
+++ b/src/substring.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: substring.c 193899 2016-07-12 04:41:34Z twu $";
+static char rcsid[] = "$Id: substring.c 195961 2016-08-08 16:36:34Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -3410,6 +3410,11 @@ Substring_splicecoord (T this) {
return this->splicecoord;
}
+Chrpos_T
+Substring_chr_splicecoord (T this) {
+ return (Chrpos_T) (this->splicecoord - this->chroffset);
+}
+
int
Substring_splicesites_knowni (T this) {
return this->splicesites_knowni;
@@ -4186,13 +4191,15 @@ Substring_new_endfrag (Univcoord_T endfrag_coord, int splice_pos, int nmismatche
T
-Substring_new_donor (Univcoord_T donor_coord, int donor_knowni, int donor_pos, int donor_nmismatches,
+Substring_new_donor (Univcoord_T donor_coord, int donor_knowni, int donor_pos,
+ int substring_querystart, int substring_queryend, int donor_nmismatches,
double donor_prob, Univcoord_T left, Compress_T query_compress,
int querylength, bool plusp, int genestrand, int sensedir,
Chrnum_T chrnum, Univcoord_T chroffset, Univcoord_T chrhigh, Chrpos_T chrlength) {
T new;
int querystart, queryend;
- Univcoord_T genomicstart, genomicend, alignstart, alignend;
+ Univcoord_T genomicstart, alignstart, alignend;
+ /* Univcoord_T genomicend; */
Endtype_T start_endtype, end_endtype;
Trimaction_T trim_left_action, trim_right_action;
@@ -4206,16 +4213,18 @@ Substring_new_donor (Univcoord_T donor_coord, int donor_knowni, int donor_pos, i
} else if (plusp == true) {
genomicstart = left;
- genomicend = left + querylength;
+ /* genomicend = left + querylength; */
if (sensedir == SENSE_FORWARD) {
start_endtype = END;
end_endtype = DON;
- querystart = 0;
+ querystart = substring_querystart; /* 0, for an end piece */
queryend = donor_pos;
- alignstart = genomicstart;
- alignend = genomicstart + donor_pos;
- trim_left_action = COMPUTE_TRIM; /* querystart == 0 */
+ if (querystart == 0) {
+ trim_left_action = COMPUTE_TRIM; /* querystart == 0 */
+ } else {
+ trim_left_action = PRE_TRIMMED;
+ }
trim_right_action = NO_TRIM;
} else if (sensedir == SENSE_ANTI) {
@@ -4223,28 +4232,35 @@ Substring_new_donor (Univcoord_T donor_coord, int donor_knowni, int donor_pos, i
end_endtype = END;
querystart = donor_pos;
- queryend = querylength;
- alignstart = genomicstart + donor_pos;
- alignend = genomicend;
+ queryend = substring_queryend; /* querylength, for an end piece */
trim_left_action = NO_TRIM;
- trim_right_action = COMPUTE_TRIM; /* queryend == querylength */
+ if (queryend == querylength) {
+ trim_right_action = COMPUTE_TRIM; /* queryend == querylength */
+ } else {
+ trim_right_action = PRE_TRIMMED;
+ }
} else {
abort();
}
+ alignstart = genomicstart + querystart;
+ alignend = genomicstart + queryend;
+
} else {
genomicstart = left + querylength;
- genomicend = left;
+ /* genomicend = left; */
if (sensedir == SENSE_FORWARD) {
start_endtype = END;
end_endtype = DON;
- querystart = 0;
+ querystart = substring_querystart; /* 0, for an end piece */
queryend = querylength - donor_pos;
- alignstart = genomicstart;
- alignend = genomicstart - (querylength - donor_pos);
- trim_left_action = COMPUTE_TRIM; /* querystart == 0 */
+ if (querystart == 0) {
+ trim_left_action = COMPUTE_TRIM; /* querystart == 0 */
+ } else {
+ trim_left_action = PRE_TRIMMED;
+ }
trim_right_action = NO_TRIM;
} else if (sensedir == SENSE_ANTI) {
@@ -4252,15 +4268,20 @@ Substring_new_donor (Univcoord_T donor_coord, int donor_knowni, int donor_pos, i
end_endtype = END;
querystart = querylength - donor_pos;
- queryend = querylength;
- alignstart = genomicstart - (querylength - donor_pos);
- alignend = genomicend;
+ queryend = substring_queryend; /* querylength, for an end piece */
trim_left_action = NO_TRIM;
- trim_right_action = COMPUTE_TRIM; /* queryend == querylength */
+ if (queryend == querylength) {
+ trim_right_action = COMPUTE_TRIM; /* queryend == querylength */
+ } else {
+ trim_right_action = PRE_TRIMMED;
+ }
} else {
abort();
}
+
+ alignstart = genomicstart - querystart;
+ alignend = genomicstart - queryend;
}
if ((new = Substring_new(donor_nmismatches,chrnum,chroffset,chrhigh,chrlength,
@@ -4302,13 +4323,15 @@ Substring_new_donor (Univcoord_T donor_coord, int donor_knowni, int donor_pos, i
T
-Substring_new_acceptor (Univcoord_T acceptor_coord, int acceptor_knowni, int acceptor_pos, int acceptor_nmismatches,
+Substring_new_acceptor (Univcoord_T acceptor_coord, int acceptor_knowni, int acceptor_pos,
+ int substring_querystart, int substring_queryend, int acceptor_nmismatches,
double acceptor_prob, Univcoord_T left, Compress_T query_compress,
int querylength, bool plusp, int genestrand, int sensedir,
Chrnum_T chrnum, Univcoord_T chroffset, Univcoord_T chrhigh, Chrpos_T chrlength) {
T new;
int querystart, queryend;
- Univcoord_T genomicstart, genomicend, alignstart, alignend;
+ Univcoord_T genomicstart, alignstart, alignend;
+ /* Univcoord_T genomicend; */
Endtype_T start_endtype, end_endtype;
Trimaction_T trim_left_action, trim_right_action;
@@ -4322,61 +4345,75 @@ Substring_new_acceptor (Univcoord_T acceptor_coord, int acceptor_knowni, int acc
} else if (plusp == true) {
genomicstart = left;
- genomicend = left + querylength;
+ /* genomicend = left + querylength; */
if (sensedir == SENSE_FORWARD) {
start_endtype = ACC;
end_endtype = END;
querystart = acceptor_pos;
- queryend = querylength;
- alignstart = genomicstart + acceptor_pos;
- alignend = genomicend;
+ queryend = substring_queryend; /* querylength, for an end piece */
trim_left_action = NO_TRIM;
- trim_right_action = COMPUTE_TRIM; /* queryend == querylength */
+ if (queryend == querylength) {
+ trim_right_action = COMPUTE_TRIM; /* queryend == querylength */
+ } else {
+ trim_right_action = PRE_TRIMMED;
+ }
} else if (sensedir == SENSE_ANTI) {
start_endtype = END;
end_endtype = ACC;
- querystart = 0;
+ querystart = substring_querystart; /* 0, for an end piece */
queryend = acceptor_pos;
- alignstart = genomicstart;
- alignend = genomicstart + acceptor_pos;
- trim_left_action = COMPUTE_TRIM; /* querystart == 0 */
+ if (querystart == 0) {
+ trim_left_action = COMPUTE_TRIM; /* querystart == 0 */
+ } else {
+ trim_left_action = PRE_TRIMMED;
+ }
trim_right_action = NO_TRIM;
} else {
abort();
}
+ alignstart = genomicstart + querystart;
+ alignend = genomicstart + queryend;
+
} else {
genomicstart = left + querylength;
- genomicend = left;
+ /* genomicend = left; */
if (sensedir == SENSE_FORWARD) {
start_endtype = ACC;
end_endtype = END;
querystart = querylength - acceptor_pos;
- queryend = querylength;
- alignstart = genomicstart - (querylength - acceptor_pos);
- alignend = genomicend;
+ queryend = substring_queryend; /* querylength, for an end piece */
trim_left_action = NO_TRIM;
- trim_right_action = COMPUTE_TRIM; /* queryend == querylength */
+ if (queryend == querylength) {
+ trim_right_action = COMPUTE_TRIM; /* queryend == querylength */
+ } else {
+ trim_right_action = PRE_TRIMMED;
+ }
} else if (sensedir == SENSE_ANTI) {
start_endtype = END;
end_endtype = ACC;
- querystart = 0;
+ querystart = substring_querystart; /* 0, for an end piece */
queryend = querylength - acceptor_pos;
- alignstart = genomicstart;
- alignend = genomicstart - (querylength - acceptor_pos);
- trim_left_action = COMPUTE_TRIM; /* querystart == 0 */
+ if (querystart == 0) {
+ trim_left_action = COMPUTE_TRIM; /* querystart == 0 */
+ } else {
+ trim_left_action = PRE_TRIMMED;
+ }
trim_right_action = NO_TRIM;
} else {
abort();
}
+
+ alignstart = genomicstart - querystart;
+ alignend = genomicstart - queryend;
}
if ((new = Substring_new(acceptor_nmismatches,chrnum,chroffset,chrhigh,chrlength,
@@ -4389,6 +4426,8 @@ Substring_new_acceptor (Univcoord_T acceptor_coord, int acceptor_knowni, int acc
debug2(printf("Making new acceptor with splicesites_i %d, coord %u and left %u, plusp %d, sensedir %d, query %d..%d, genome %u..%u\n",
acceptor_knowni,acceptor_coord,left,plusp,sensedir,querystart,queryend,alignstart - chroffset,alignend - chroffset));
+ debug2(printf("Original bounds were %d..%d\n",substring_querystart,substring_queryend));
+
new->splicecoord = acceptor_coord;
new->splicesites_knowni = acceptor_knowni;
@@ -4542,16 +4581,16 @@ Substring_assign_donor_prob (T donor) {
} else if (donor->chimera_sensedir == SENSE_FORWARD) {
if (donor->plusp == true) {
- donor->chimera_prob = Maxent_hr_donor_prob(donor->chimera_modelpos,donor->chroffset);
+ donor->chimera_prob = Maxent_hr_donor_prob(donor->splicecoord,donor->chroffset);
} else {
- donor->chimera_prob = Maxent_hr_antidonor_prob(donor->chimera_modelpos,donor->chroffset);
+ donor->chimera_prob = Maxent_hr_antidonor_prob(donor->splicecoord,donor->chroffset);
}
} else if (donor->chimera_sensedir == SENSE_ANTI) {
if (donor->plusp == true) {
- donor->chimera_prob = Maxent_hr_antidonor_prob(donor->chimera_modelpos,donor->chroffset);
+ donor->chimera_prob = Maxent_hr_antidonor_prob(donor->splicecoord,donor->chroffset);
} else {
- donor->chimera_prob = Maxent_hr_donor_prob(donor->chimera_modelpos,donor->chroffset);
+ donor->chimera_prob = Maxent_hr_donor_prob(donor->splicecoord,donor->chroffset);
}
} else {
@@ -4573,16 +4612,16 @@ Substring_assign_acceptor_prob (T acceptor) {
} else if (acceptor->chimera_sensedir == SENSE_FORWARD) {
if (acceptor->plusp == true) {
- acceptor->chimera_prob = Maxent_hr_acceptor_prob(acceptor->chimera_modelpos,acceptor->chroffset);
+ acceptor->chimera_prob = Maxent_hr_acceptor_prob(acceptor->splicecoord,acceptor->chroffset);
} else {
- acceptor->chimera_prob = Maxent_hr_antiacceptor_prob(acceptor->chimera_modelpos,acceptor->chroffset);
+ acceptor->chimera_prob = Maxent_hr_antiacceptor_prob(acceptor->splicecoord,acceptor->chroffset);
}
} else if (acceptor->chimera_sensedir == SENSE_ANTI) {
if (acceptor->plusp == true) {
- acceptor->chimera_prob = Maxent_hr_antiacceptor_prob(acceptor->chimera_modelpos,acceptor->chroffset);
+ acceptor->chimera_prob = Maxent_hr_antiacceptor_prob(acceptor->splicecoord,acceptor->chroffset);
} else {
- acceptor->chimera_prob = Maxent_hr_acceptor_prob(acceptor->chimera_modelpos,acceptor->chroffset);
+ acceptor->chimera_prob = Maxent_hr_acceptor_prob(acceptor->splicecoord,acceptor->chroffset);
}
} else {
diff --git a/src/substring.h b/src/substring.h
index 3975d6e..56936c6 100644
--- a/src/substring.h
+++ b/src/substring.h
@@ -1,4 +1,4 @@
-/* $Id: substring.h 193327 2016-07-01 19:24:12Z twu $ */
+/* $Id: substring.h 195961 2016-08-08 16:36:34Z twu $ */
#ifndef SUBSTRING_INCLUDED
#define SUBSTRING_INCLUDED
@@ -108,6 +108,8 @@ extern Univcoord_T
Substring_left (T this);
extern Univcoord_T
Substring_splicecoord (T this);
+extern Chrpos_T
+Substring_chr_splicecoord (T this);
extern int
Substring_splicesites_knowni (T this);
extern Univcoord_T
@@ -273,12 +275,14 @@ Substring_new_endfrag (Univcoord_T endfrag_coord, int splice_pos, int nmismatche
Chrnum_T chrnum, Univcoord_T chroffset, Univcoord_T chrhigh, Chrpos_T chrlength);
extern T
-Substring_new_donor (Univcoord_T donor_coord, int donor_knowni, int donor_pos, int donor_nmismatches,
+Substring_new_donor (Univcoord_T donor_coord, int donor_knowni, int donor_pos,
+ int substring_querystart, int substring_queryend, int donor_nmismatches,
double donor_prob, Univcoord_T left, Compress_T query_compress,
int querylength, bool plusp, int genestrand, int sensedir,
Chrnum_T chrnum, Univcoord_T chroffset, Univcoord_T chrhigh, Chrpos_T chrlength);
extern T
-Substring_new_acceptor (Univcoord_T acceptor_coord, int acceptor_knowni, int acceptor_pos, int acceptor_nmismatches,
+Substring_new_acceptor (Univcoord_T acceptor_coord, int acceptor_knowni, int acceptor_pos,
+ int substring_querystart, int substring_queryend, int acceptor_nmismatches,
double acceptor_prob, Univcoord_T left, Compress_T query_compress,
int querylength, bool plusp, int genestrand, int sensedir,
Chrnum_T chrnum, Univcoord_T chroffset, Univcoord_T chrhigh, Chrpos_T chrlength);
diff --git a/src/uintlist.c b/src/uintlist.c
index e35d892..f0111d1 100644
--- a/src/uintlist.c
+++ b/src/uintlist.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: uintlist.c 193899 2016-07-12 04:41:34Z twu $";
+static char rcsid[] = "$Id: uintlist.c 193875 2016-07-12 02:43:38Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
diff --git a/src/uintlist.h b/src/uintlist.h
index 83b43e5..2afcae5 100644
--- a/src/uintlist.h
+++ b/src/uintlist.h
@@ -1,4 +1,4 @@
-/* $Id: uintlist.h 193899 2016-07-12 04:41:34Z twu $ */
+/* $Id: uintlist.h 193875 2016-07-12 02:43:38Z twu $ */
#ifndef UINTLIST_INCLUDED
#define UINTLIST_INCLUDED
diff --git a/src/uniqscan.c b/src/uniqscan.c
index cc0fc87..67e63ef 100644
--- a/src/uniqscan.c
+++ b/src/uniqscan.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: uniqscan.c 193899 2016-07-12 04:41:34Z twu $";
+static char rcsid[] = "$Id: uniqscan.c 193877 2016-07-12 02:46:33Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
diff --git a/src/univdiag.h b/src/univdiag.h
index 6fa8ff7..b10fab2 100644
--- a/src/univdiag.h
+++ b/src/univdiag.h
@@ -1,4 +1,4 @@
-/* $Id: univdiag.h 193897 2016-07-12 04:12:45Z twu $ */
+/* $Id: univdiag.h 195760 2016-08-04 00:12:04Z twu $ */
#ifndef UNIVDIAG_INCLUDED
#define UNIVDIAG_INCLUDED
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/gmap.git
More information about the debian-med-commit
mailing list