[med-svn] [gmap] 02/06: Imported Upstream version 2014-07-04
Alex Mestiashvili
malex-guest at moszumanska.debian.org
Fri Jul 4 09:33:49 UTC 2014
This is an automated email from the git hooks/post-receive script.
malex-guest pushed a commit to branch master
in repository gmap.
commit 536c54f9dac3deb5be7c296be7bf9deb33fa0028
Author: Alexandre Mestiashvili <alex at biotec.tu-dresden.de>
Date: Fri Jul 4 09:54:28 2014 +0200
Imported Upstream version 2014-07-04
---
ChangeLog | 102 +++
VERSION | 2 +-
configure | 24 +-
src/access.c | 134 ++--
src/dynprog.c | 9 +-
src/dynprog_end.c | 532 ++++++--------
src/dynprog_simd.c | 40 +-
src/dynprog_simd.h | 2 +-
src/dynprog_single.c | 63 +-
src/genome.c | 14 +-
src/genome.h | 8 +-
src/genome128_hr.c | 367 +++++-----
src/gmap.c | 27 +-
src/gmapindex.c | 23 +-
src/intlist.c | 16 +-
src/intlist.h | 4 +-
src/samprint.c | 10 +-
src/sarray-read.c | 617 ++++++++++------
src/sarray-write.c | 742 ++++++++++++++++++--
src/sarray-write.h | 6 +-
src/splice.c | 1046 ++++++++++++++++++----------
src/splice.h | 57 +-
src/splicetrie.c | 419 +++++------
src/stage1hr.c | 1132 ++++++++++++++++--------------
src/stage3.c | 129 ++--
src/stage3hr.c | 1893 +++++++++++++++++++++++++++++---------------------
src/stage3hr.h | 19 +-
src/substring.c | 316 +++------
src/uint8list.c | 16 +-
src/uint8list.h | 4 +-
src/uintlist.c | 16 +-
src/uintlist.h | 4 +-
32 files changed, 4635 insertions(+), 3158 deletions(-)
diff --git a/ChangeLog b/ChangeLog
index c04bf12..a4a2339 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,105 @@
+2014-07-04 twu
+
+ * VERSION: Updated version number
+
+ * dynprog_end.c: Fixed typo in calling matrix for 16_upper twice, instead of
+ 16_upper and 16_lower
+
+ * splicetrie.c: Handling the case where Dynprog_end5_splicejunction or
+ Dynprog_end3_splicejunction returns NULL
+
+ * dynprog_simd.h: Decreased value of SIMD_MAXLENGTH_EPI8 from 40 to 30 to
+ prevent issues with overflows
+
+ * dynprog_simd.c: In traceback, using main loop to decide whether to handle
+ dir == DIAG
+
+ * dynprog_end.c: In traceback, using main loop to decide whether to handle
+ dir == DIAG. In Dynprog_end5_splicejunction and
+ Dynprog_end3_splicejunction, requiring finalscore to be positive before
+ doing any traceback.
+
+ * dynprog.c: In traceback, using main loop to decide whether to handle dir
+ == DIAG
+
+2014-07-03 twu
+
+ * sarray-write.c: Made monitoring statements work for
+ Sarray_discriminating_chars
+
+ * sarray-write.c: Implemented batch reading method for
+ Sarray_discriminating_chars
+
+ * gmapindex.c, sarray-write.c, sarray-write.h: Made the building of the LCP
+ array and discriminating chars array more memory efficient by writing
+ temporary files for rank and permuted sarray
+
+ * genome.c, genome.h: Changed type of counts to be Univcoord_T
+
+ * access.c: Fixed bug in handling of final partial block. Added debugging
+ code for checking results.
+
+2014-07-02 twu
+
+ * access.c: Increased FREAD_BATCH to 100 million bytes. Modified
+ Access_allocated to always read in batches of size FREAD_BATCH.
+
+ * config.site.rescomp.prd, config.site.rescomp.tst, genome128_hr.c, gmap.c,
+ intlist.c, intlist.h, samprint.c, sarray-read.c, splice.c, splice.h, src,
+ stage1hr.c, stage3.c, stage3hr.c, stage3hr.h, substring.c, trunk,
+ uint8list.c, uint8list.h, uintlist.c, uintlist.h: Merged revisions 140131
+ through 140367 from branches/2014-06-27-fix-amb to implement separate
+ eventrim scores for start/end of read, fix cmet-stranded and
+ cmet-nonstranded modes, implement separate sense/antisense for
+ Splice_solve_single, and rewrite of ambiguous parameters from left/right
+ to donor/acceptor
+
+ * index.html: Updated for version 2014-06-10
+
+ * archive.html: Added link to version 2011-12-28
+
+ * VERSION: Updated version number
+
+ * stage3hr.c: In Stage3end_new_shortexon, setting amb_nmismatches_start and
+ amb_nmismatches_end separately
+
+ * stage3.c: Using score_introns (which looks at splice site neighborhood),
+ instead of score_alignment to count canonical introns. Using defect_rate
+ to determine whether to rely on splice site probabilities.
+
+ * stage1hr.c: Added blank lines
+
+2014-07-01 twu
+
+ * gmap.c: Preventing leftpos and rightpos from exceeding query coordinates
+ in solving for chimeras. Not using extension in finding remaining
+ alignment, since it makes alignment harder.
+
+2014-06-30 twu
+
+ * stage3.c: Transferring microexon pairs without looking at probabilities
+
+ * dynprog_single.c: Using MIN_MICROEXON_LENGTH instead of 8
+
+2014-06-26 twu
+
+ * stage3hr.c: Fixed assignment of amb_nmatches_start and amb_nmatches_end
+ for shortexons on minus strand
+
+2014-06-25 twu
+
+ * stage3hr.c: Removed debugging code
+
+ * sarray-write.c: In Sarray_compute_child, cleaning out stack at end,
+ because skipping it results in an incorrect child array
+
+2014-06-24 twu
+
+ * stage3hr.c: Commenting out assertions that are not always true
+
+ * stage1hr.c: Assigning correct values of amb_nmatches_donor and
+ amb_nmatches_acceptor to Stage3end_new_shortexon
+
2014-06-11 twu
* VERSION: Updated version number
diff --git a/VERSION b/VERSION
index 5150f45..af5e428 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-2014-06-10
\ No newline at end of file
+2014-07-04
\ No newline at end of file
diff --git a/configure b/configure
index 77000c1..c6ad8ce 100755
--- a/configure
+++ b/configure
@@ -1,6 +1,6 @@
#! /bin/sh
# Guess values for system-dependent variables and create Makefiles.
-# Generated by GNU Autoconf 2.63 for gmap 2014-06-10.
+# Generated by GNU Autoconf 2.63 for gmap 2014-07-04.
#
# Report bugs to <Thomas Wu <twu at gene.com>>.
#
@@ -745,8 +745,8 @@ SHELL=${CONFIG_SHELL-/bin/sh}
# Identity of this package.
PACKAGE_NAME='gmap'
PACKAGE_TARNAME='gmap'
-PACKAGE_VERSION='2014-06-10'
-PACKAGE_STRING='gmap 2014-06-10'
+PACKAGE_VERSION='2014-07-04'
+PACKAGE_STRING='gmap 2014-07-04'
PACKAGE_BUGREPORT='Thomas Wu <twu at gene.com>'
ac_unique_file="src/gmap.c"
@@ -1501,7 +1501,7 @@ if test "$ac_init_help" = "long"; then
# Omit some internal or obsolete options to make the list less imposing.
# This message is too long to be a string in the A/UX 3.1 sh.
cat <<_ACEOF
-\`configure' configures gmap 2014-06-10 to adapt to many kinds of systems.
+\`configure' configures gmap 2014-07-04 to adapt to many kinds of systems.
Usage: $0 [OPTION]... [VAR=VALUE]...
@@ -1572,7 +1572,7 @@ fi
if test -n "$ac_init_help"; then
case $ac_init_help in
- short | recursive ) echo "Configuration of gmap 2014-06-10:";;
+ short | recursive ) echo "Configuration of gmap 2014-07-04:";;
esac
cat <<\_ACEOF
@@ -1695,7 +1695,7 @@ fi
test -n "$ac_init_help" && exit $ac_status
if $ac_init_version; then
cat <<\_ACEOF
-gmap configure 2014-06-10
+gmap configure 2014-07-04
generated by GNU Autoconf 2.63
Copyright (C) 1992, 1993, 1994, 1995, 1996, 1998, 1999, 2000, 2001,
@@ -1709,7 +1709,7 @@ cat >config.log <<_ACEOF
This file contains any messages produced by compilers while
running configure, to aid debugging if configure makes a mistake.
-It was created by gmap $as_me 2014-06-10, which was
+It was created by gmap $as_me 2014-07-04, which was
generated by GNU Autoconf 2.63. Invocation command line was
$ $0 $@
@@ -2079,8 +2079,8 @@ ac_compiler_gnu=$ac_cv_c_compiler_gnu
{ $as_echo "$as_me:$LINENO: checking package version" >&5
$as_echo_n "checking package version... " >&6; }
-{ $as_echo "$as_me:$LINENO: result: 2014-06-10" >&5
-$as_echo "2014-06-10" >&6; }
+{ $as_echo "$as_me:$LINENO: result: 2014-07-04" >&5
+$as_echo "2014-07-04" >&6; }
### Read defaults
@@ -3979,7 +3979,7 @@ fi
# Define the identity of the package.
PACKAGE=gmap
- VERSION=2014-06-10
+ VERSION=2014-07-04
cat >>confdefs.h <<_ACEOF
@@ -23689,7 +23689,7 @@ exec 6>&1
# report actual input values of CONFIG_FILES etc. instead of their
# values after options handling.
ac_log="
-This file was extended by gmap $as_me 2014-06-10, which was
+This file was extended by gmap $as_me 2014-07-04, which was
generated by GNU Autoconf 2.63. Invocation command line was
CONFIG_FILES = $CONFIG_FILES
@@ -23752,7 +23752,7 @@ Report bugs to <bug-autoconf at gnu.org>."
_ACEOF
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
ac_cs_version="\\
-gmap config.status 2014-06-10
+gmap config.status 2014-07-04
configured by $0, generated by GNU Autoconf 2.63,
with options \\"`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`\\"
diff --git a/src/access.c b/src/access.c
index a1f8c05..aaf0d72 100644
--- a/src/access.c
+++ b/src/access.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: access.c 129929 2014-03-13 03:27:49Z twu $";
+static char rcsid[] = "$Id: access.c 140509 2014-07-03 01:47:47Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -264,17 +264,17 @@ first_nonzero_uint8 (size_t *i, char *filename) {
#endif
-#define FREAD_BATCH 10000000 /* 10 million at a time */
+#define FREAD_BATCH 100000000 /* 100 million elements at a time */
/* Bigendian conversion not needed after this */
void *
Access_allocated (size_t *len, double *seconds, char *filename, size_t eltsize) {
void *memory;
+#ifdef CHECK
+ void *memory2;
+#endif
FILE *fp;
Stopwatch_T stopwatch;
- unsigned char value1;
- UINT4 value4;
- UINT8 value8;
void *p;
size_t i;
@@ -284,110 +284,86 @@ Access_allocated (size_t *len, double *seconds, char *filename, size_t eltsize)
return (void *) NULL;
}
+ Stopwatch_start(stopwatch = Stopwatch_new());
+ memory = (void *) MALLOC(*len);
+
+#ifdef CHECK
+ memory2 = (void *) MALLOC(*len);
if ((fp = FOPEN_READ_BINARY(filename)) == NULL) {
fprintf(stderr,"Error: can't open file %s with fopen\n",filename);
exit(9);
}
- Stopwatch_start(stopwatch = Stopwatch_new());
- memory = (void *) MALLOC(*len);
if (eltsize == 1) {
- FREAD_CHARS(memory,(*len)/eltsize,fp);
+ FREAD_CHARS(memory2,(*len)/eltsize,fp);
} else if (eltsize == 4) {
- FREAD_UINTS(memory,(*len)/eltsize,fp);
+ FREAD_UINTS(memory2,(*len)/eltsize,fp);
} else if (eltsize == 8) {
- FREAD_UINT8S(memory,(*len)/eltsize,fp);
+ FREAD_UINT8S(memory2,(*len)/eltsize,fp);
} else {
fprintf(stderr,"Access_allocated called with an element size of %d, which is not handled\n",(int) eltsize);
exit(9);
}
fclose(fp);
+#endif
-#ifndef WORDS_BIGENDIAN
- if (eltsize == 1) {
- /* Test if Macintosh fread failure occurs. Apple bug ID 6434977 */
- value1 = first_nonzero_char(&i,filename);
- if (((unsigned char *) memory)[i] != value1) {
- fprintf(stderr,"single fread command failed (observed on Macs with -B 3 or greater on large genomes)");
-#if 0
- fprintf(stderr,"...reading file in smaller batches...");
- fp = FOPEN_READ_BINARY(filename);
-
- for (i = 0; i < (*len)/eltsize; i += FREAD_BATCH) {
- p = (void *) &(((unsigned char *) memory)[i]);
- fread(p,sizeof(unsigned char),FREAD_BATCH,fp);
- }
- if (i < (*len)/eltsize) {
- p = (void *) &(((unsigned char *) memory)[i]);
- fread(p,sizeof(unsigned char),(*len)/eltsize - i,fp);
- }
+ if ((fp = FOPEN_READ_BINARY(filename)) == NULL) {
+ fprintf(stderr,"Error: can't open file %s with fopen\n",filename);
+ exit(9);
+ }
- fclose(fp);
-#else
- fprintf(stderr,"...unable to handle this value of --batch on your machine\n");
- exit(9);
-#endif
+ if (eltsize == 1) {
+ for (i = 0; i + FREAD_BATCH < (*len)/eltsize; i += FREAD_BATCH) {
+ p = (void *) &(((unsigned char *) memory)[i]);
+ fread(p,sizeof(unsigned char),FREAD_BATCH,fp);
}
- } else if (eltsize == 4) {
- /* Test if Macintosh fread failure occurs. Apple bug ID 6434977 */
- value4 = first_nonzero_uint(&i,filename);
- if (((UINT4 *) memory)[i] != value4) {
- fprintf(stderr,"single fread command failed (observed on Macs with -B 3 or greater on large genomes)");
-#if 0
- fprintf(stderr,"...reading file in smaller batches...");
- fp = FOPEN_READ_BINARY(filename);
-
- for (i = 0; i < (*len)/eltsize; i += FREAD_BATCH) {
- p = (void *) &(((UINT4 *) memory)[i]);
- fread(p,sizeof(UINT4),FREAD_BATCH,fp);
- }
+ if (i < (*len)/eltsize) {
+ p = (void *) &(((unsigned char *) memory)[i]);
+ fread(p,sizeof(unsigned char),(*len)/eltsize - i,fp);
+ }
- if (i < (*len)/eltsize) {
- p = (void *) &(((UINT4 *) memory)[i]);
- fread(p,sizeof(UINT4),(*len)/eltsize - i,fp);
- }
+ } else if (eltsize == 4) {
+ for (i = 0; i + FREAD_BATCH < (*len)/eltsize; i += FREAD_BATCH) {
+ p = (void *) &(((UINT4 *) memory)[i]);
+ fread(p,sizeof(UINT4),FREAD_BATCH,fp);
+ }
- fclose(fp);
-#else
- fprintf(stderr,"...unable to handle this value of --batch on your machine\n");
- exit(9);
-#endif
+ if (i < (*len)/eltsize) {
+ p = (void *) &(((UINT4 *) memory)[i]);
+ fread(p,sizeof(UINT4),(*len)/eltsize - i,fp);
}
} else if (eltsize == 8) {
- /* Test if Macintosh fread failure occurs. Apple bug ID 6434977 */
- value8 = first_nonzero_uint8(&i,filename);
- if (((UINT8 *) memory)[i] != value8) {
- fprintf(stderr,"single fread command failed (observed on Macs with -B 3 or greater on large genomes)");
-#if 0
- fprintf(stderr,"...reading file in smaller batches...");
- fp = FOPEN_READ_BINARY(filename);
-
- for (i = 0; i < (*len)/eltsize; i += FREAD_BATCH) {
- p = (void *) &(((UINT8 *) memory)[i]);
- fread(p,sizeof(UINT8),FREAD_BATCH,fp);
- }
-
- if (i < (*len)/eltsize) {
- p = (void *) &(((UINT8 *) memory)[i]);
- fread(p,sizeof(UINT8),(*len)/eltsize - i,fp);
- }
-
- fclose(fp);
-#else
- fprintf(stderr,"...unable to handle this value of --batch on your machine\n");
- exit(9);
-#endif
+ for (i = 0; i + FREAD_BATCH < (*len)/eltsize; i += FREAD_BATCH) {
+ p = (void *) &(((UINT8 *) memory)[i]);
+ fread(p,sizeof(UINT8),FREAD_BATCH,fp);
+ }
+
+ if (i < (*len)/eltsize) {
+ p = (void *) &(((UINT8 *) memory)[i]);
+ fread(p,sizeof(UINT8),(*len)/eltsize - i,fp);
}
+ } else {
+ fprintf(stderr,"Access_allocated called with an element size of %d, which is not handled\n",(int) eltsize);
+ exit(9);
}
-#endif
+ fclose(fp);
/* Note: the following (old non-batch mode) requires conversion to bigendian later, as needed */
/* fread(new->offsets,eltsize,sb.st_size/eltsize,fp); */
+#ifdef CHECK
+ for (i = 0; i < *len; i++) {
+ if (((unsigned char *) memory)[i] != ((unsigned char *) memory2)[i]) {
+ abort();
+ }
+ }
+ FREE(memory2);
+#endif
+
*seconds = Stopwatch_stop(stopwatch);
Stopwatch_free(&stopwatch);
diff --git a/src/dynprog.c b/src/dynprog.c
index a6f5e9d..88eaa5c 100644
--- a/src/dynprog.c
+++ b/src/dynprog.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: dynprog.c 138110 2014-06-04 19:34:22Z twu $";
+static char rcsid[] = "$Id: dynprog.c 140648 2014-07-04 01:14:57Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -1580,12 +1580,14 @@ Dynprog_traceback_std (List_T pairs, int *nmatches, int *nmismatches, int *nopen
while (c > 0 && directions_Egap[c--][r] != DIAG) {
dist++;
}
+#if 0
if (c == 0) {
/* Directions in column 0 can sometimes be DIAG */
dir = VERT;
} else {
dir = directions_nogap[c][r];
}
+#endif
debug(printf("H%d: ",dist));
pairs = Pairpool_add_genomeskip(&add_dashes_p,pairs,r,c+dist,dist,
@@ -1603,12 +1605,14 @@ Dynprog_traceback_std (List_T pairs, int *nmatches, int *nmismatches, int *nopen
while (r > 0 && directions_Fgap[c][r--] != DIAG) {
dist++;
}
+#if 0
if (r == 0) {
/* Directions in row 0 can sometimes be DIAG */
dir = HORIZ;
} else {
dir = directions_nogap[c][r];
}
+#endif
debug(printf("V%d: ",dist));
pairs = Pairpool_add_queryskip(pairs,r+dist,c,dist,rsequence,
@@ -1617,9 +1621,8 @@ Dynprog_traceback_std (List_T pairs, int *nmatches, int *nmismatches, int *nopen
*nopens += 1;
*nindels += dist;
debug(printf("\n"));
- }
- if (dir == DIAG) {
+ } else {
querycoord = r-1;
genomecoord = c-1;
if (revp == true) {
diff --git a/src/dynprog_end.c b/src/dynprog_end.c
index ed14180..06d44c5 100644
--- a/src/dynprog_end.c
+++ b/src/dynprog_end.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: dynprog_end.c 138715 2014-06-11 17:05:56Z twu $";
+static char rcsid[] = "$Id: dynprog_end.c 140653 2014-07-04 02:01:10Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -673,36 +673,6 @@ traceback_local_8_upper (List_T pairs, int *nmatches, int *nmismatches, int *nop
/* We care only only about genomic coordinate c */
-#if 0
- if (*c <= endc) {
- /* Do nothing */
-
- } else if ((dir = directions_nogap[*c][*r]) == DIAG) {
- /* Not an indel. Do nothing. */
-
- } else {
- /* Must be HORIZ */
- dist = 1;
- while (*c > 1 && directions_Egap[*c][*r] != DIAG) {
- dist++;
- (*c)--;
- }
- (*c)--;
- /* dir = directions_nogap[*c][*r]; */
-
- debug(printf("H%d: ",dist));
- pairs = Pairpool_add_genomeskip(&add_dashes_p,pairs,*r,(*c)+dist,dist,genomesequence,genomesequenceuc,
- queryoffset,genomeoffset,pairpool,revp,chroffset,chrhigh,
- cdna_direction,watsonp,dynprogindex,/*use_genomicseg_p*/true);
- if (add_dashes_p == true) {
- *nopens += 1;
- *nindels += dist;
- }
- debug(printf("\n"));
-
- }
-#endif
-
while (*r > 0 && *c > endc) {
if ((dir = directions_nogap[*c][*r]) != DIAG) {
/* Must be HORIZ */
@@ -710,8 +680,7 @@ traceback_local_8_upper (List_T pairs, int *nmatches, int *nmismatches, int *nop
while (*c > endc && directions_Egap[(*c)--][*r] != DIAG) {
dist++;
}
- assert(c != endc);
- dir = directions_nogap[*c][*r];
+ /* assert(*c != endc); */
debug(printf("H%d: ",dist));
pairs = Pairpool_add_genomeskip(&add_dashes_p,pairs,*r,(*c)+dist,dist,
@@ -723,9 +692,8 @@ traceback_local_8_upper (List_T pairs, int *nmatches, int *nmismatches, int *nop
*nindels += dist;
}
debug(printf("\n"));
- }
- if (dir == DIAG) {
+ } else {
querycoord = (*r)-1;
genomecoord = (*c)-1;
if (revp == true) {
@@ -759,8 +727,8 @@ traceback_local_8_upper (List_T pairs, int *nmatches, int *nmismatches, int *nop
pairs = Pairpool_push(pairs,pairpool,queryoffset+querycoord,genomeoffset+genomecoord,
c1,MISMATCH_COMP,c2,c2_alt,dynprogindex);
}
- (*r)--; (*c)--;
+ (*r)--; (*c)--;
}
}
@@ -801,48 +769,20 @@ traceback_local_8_lower (List_T pairs, int *nmatches, int *nmismatches, int *nop
int querycoord, genomecoord;
Direction8_T dir;
- debug(printf("Starting traceback_local at r=%d,c=%d (roffset=%d, goffset=%d)\n",*r,*c,queryoffset,genomeoffset));
+ debug(printf("Starting traceback_local_8_lower at r=%d,c=%d (roffset=%d, goffset=%d)\n",*r,*c,queryoffset,genomeoffset));
/* We care only only about genomic coordinate c */
-#if 0
- if (*c <= endc) {
- /* Do nothing */
-
- } else if ((dir = directions_nogap[*r][*c]) == DIAG) {
- /* Not an indel. Do nothing. */
-
- } else {
- /* Must be VERT */
- dist = 1;
- while (*r > 1 && directions_Egap[*r][*c] != DIAG) {
- dist++;
- (*r)--;
- }
- (*r)--;
- /* dir = directions_nogap[*r][*c]; */
-
- debug(printf("V%d: ",dist));
- pairs = Pairpool_add_queryskip(pairs,(*r)+dist,*c,dist,rsequence,
- queryoffset,genomeoffset,pairpool,revp,
- dynprogindex);
- *nopens += 1;
- *nindels += dist;
- debug(printf("\n"));
- }
-#endif
-
while (*r > 0 && *c > endc) {
if ((dir = directions_nogap[*r][*c]) != DIAG) {
/* Must be VERT */
dist = 1;
/* Should not need to check for r > 0 if the main diagonal is populated with DIAG */
- while (/* r > 0 && */ directions_Egap[(*r)--][*c] != DIAG) {
+ while (/* *r > 0 && */ directions_Egap[(*r)--][*c] != DIAG) {
dist++;
}
- assert(r != 0);
- dir = directions_nogap[*r][*c];
-
+ /* assert(*r != 0); */
+
debug(printf("V%d: ",dist));
pairs = Pairpool_add_queryskip(pairs,(*r)+dist,*c,dist,rsequence,
queryoffset,genomeoffset,pairpool,revp,
@@ -850,9 +790,8 @@ traceback_local_8_lower (List_T pairs, int *nmatches, int *nmismatches, int *nop
*nopens += 1;
*nindels += dist;
debug(printf("\n"));
- }
-
- if (dir == DIAG) {
+
+ } else {
querycoord = (*r)-1;
genomecoord = (*c)-1;
if (revp == true) {
@@ -886,6 +825,7 @@ traceback_local_8_lower (List_T pairs, int *nmatches, int *nmismatches, int *nop
pairs = Pairpool_push(pairs,pairpool,queryoffset+querycoord,genomeoffset+genomecoord,
c1,MISMATCH_COMP,c2,c2_alt,dynprogindex);
}
+
(*r)--; (*c)--;
}
}
@@ -929,35 +869,6 @@ traceback_local_16_upper (List_T pairs, int *nmatches, int *nmismatches, int *no
/* We care only only about genomic coordinate c */
-#if 0
- if (*c <= endc) {
- /* Do nothing */
-
- } else if ((dir = directions_nogap[*c][*r]) == DIAG) {
- /* Not an indel. Do nothing. */
-
- } else {
- /* Must be HORIZ */
- dist = 1;
- while (*c > 1 && directions_Egap[*c][*r] != DIAG) {
- dist++;
- (*c)--;
- }
- (*c)--;
- /* dir = directions_nogap[c][r]; */
-
- debug(printf("H%d: ",dist));
- pairs = Pairpool_add_genomeskip(&add_dashes_p,pairs,*r,(*c)+dist,dist,genomesequence,genomesequenceuc,
- queryoffset,genomeoffset,pairpool,revp,chroffset,chrhigh,
- cdna_direction,watsonp,dynprogindex,/*use_genomicseg_p*/true);
- if (add_dashes_p == true) {
- *nopens += 1;
- *nindels += dist;
- }
- debug(printf("\n"));
- }
-#endif
-
while (*r > 0 && *c > endc) {
if ((dir = directions_nogap[*c][*r]) != DIAG) {
/* Must be HORIZ */
@@ -965,7 +876,6 @@ traceback_local_16_upper (List_T pairs, int *nmatches, int *nmismatches, int *no
while (*c > endc && directions_Egap[(*c)--][*r] != DIAG) {
dist++;
}
- dir = directions_nogap[*c][*r];
debug(printf("H%d: ",dist));
pairs = Pairpool_add_genomeskip(&add_dashes_p,pairs,*r,(*c)+dist,dist,
@@ -977,9 +887,8 @@ traceback_local_16_upper (List_T pairs, int *nmatches, int *nmismatches, int *no
*nindels += dist;
}
debug(printf("\n"));
- }
- if (dir == DIAG) {
+ } else {
querycoord = (*r)-1;
genomecoord = (*c)-1;
if (revp == true) {
@@ -1013,8 +922,8 @@ traceback_local_16_upper (List_T pairs, int *nmatches, int *nmismatches, int *no
pairs = Pairpool_push(pairs,pairpool,queryoffset+querycoord,genomeoffset+genomecoord,
c1,MISMATCH_COMP,c2,c2_alt,dynprogindex);
}
- (*r)--; (*c)--;
+ (*r)--; (*c)--;
}
}
@@ -1058,43 +967,15 @@ traceback_local_16_lower (List_T pairs, int *nmatches, int *nmismatches, int *no
/* We care only only about genomic coordinate c */
-#if 0
- if (*c <= endc) {
- /* Do nothing */
-
- } else if ((dir = directions_nogap[*r][*c]) == DIAG) {
- /* Not an indel. Do nothing. */
-
- } else {
- /* Must be VERT */
- dist = 1;
- while (*r > 1 && directions_Egap[*r][*c] != DIAG) {
- dist++;
- (*r)--;
- }
- (*r)--;
- /* dir = directions_nogap[*r][*c]; */
-
- debug(printf("V%d: ",dist));
- pairs = Pairpool_add_queryskip(pairs,(*r)+dist,*c,dist,rsequence,
- queryoffset,genomeoffset,pairpool,revp,
- dynprogindex);
- *nopens += 1;
- *nindels += dist;
- debug(printf("\n"));
- }
-#endif
-
while (*r > 0 && *c > endc) {
if ((dir = directions_nogap[*r][*c]) != DIAG) {
/* Must be VERT */
dist = 1;
/* Should not need to check for r > 0 if the main diagonal is populated with DIAG */
- while (/* r > 0 && */ directions_Egap[(*r)--][*c] != DIAG) {
+ while (/* *r > 0 && */ directions_Egap[(*r)--][*c] != DIAG) {
dist++;
}
- assert(*r != 0);
- dir = directions_nogap[*r][*c];
+ /* assert(*r != 0); */
debug(printf("V%d: ",dist));
pairs = Pairpool_add_queryskip(pairs,(*r)+dist,*c,dist,rsequence,
@@ -1103,9 +984,8 @@ traceback_local_16_lower (List_T pairs, int *nmatches, int *nmismatches, int *no
*nopens += 1;
*nindels += dist;
debug(printf("\n"));
- }
- if (dir == DIAG) {
+ } else {
querycoord = (*r)-1;
genomecoord = (*c)-1;
if (revp == true) {
@@ -1139,8 +1019,8 @@ traceback_local_16_lower (List_T pairs, int *nmatches, int *nmismatches, int *no
pairs = Pairpool_push(pairs,pairpool,queryoffset+querycoord,genomeoffset+genomecoord,
c1,MISMATCH_COMP,c2,c2_alt,dynprogindex);
}
+
(*r)--; (*c)--;
-
}
}
@@ -1758,7 +1638,7 @@ Dynprog_end5_splicejunction (int *dynprogindex, int *finalscore, int *missscore,
#endif
mismatchtype,open,extend,
uband,/*for revp true*/!jump_late_p,/*revp*/true);
- matrix16_lower = Dynprog_simd_16_upper(&directions16_lower_nogap,&directions16_lower_Egap,dynprog,
+ matrix16_lower = Dynprog_simd_16_lower(&directions16_lower_nogap,&directions16_lower_Egap,dynprog,
rev_rsequence,rev_gsequence_uc,rev_gsequence_alt,
rlength,glength,
#ifdef DEBUG14
@@ -1781,119 +1661,125 @@ Dynprog_end5_splicejunction (int *dynprogindex, int *finalscore, int *missscore,
!jump_late_p);
#endif
- *nmatches = *nmismatches = *nopens = *nindels = 0;
+ if (*finalscore < 0) {
+ /* Need a reasonable alignment to call a splice */
+ return (List_T) NULL;
+
+ } else {
+ *nmatches = *nmismatches = *nopens = *nindels = 0;
#if defined(HAVE_SSE4_1) || defined(HAVE_SSE2)
- if (use8p == true) {
- if (bestc >= bestr) {
- pairs = traceback_local_8_upper(NULL,&(*nmatches),&(*nmismatches),&(*nopens),&(*nindels),
- directions8_upper_nogap,directions8_upper_Egap,&bestr,&bestc,/*endc*/contlength,
- rev_rsequence,rev_rsequenceuc,
- rev_gsequence,rev_gsequence_uc,rev_gsequence_alt,
- rev_roffset,rev_goffset_far,pairpool,/*revp*/true,
- chroffset,chrhigh,cdna_direction,watsonp,*dynprogindex);
- } else {
- pairs = traceback_local_8_lower(NULL,&(*nmatches),&(*nmismatches),&(*nopens),&(*nindels),
- directions8_lower_nogap,directions8_lower_Egap,&bestr,&bestc,/*endc*/contlength,
- rev_rsequence,rev_rsequenceuc,
- rev_gsequence,rev_gsequence_uc,rev_gsequence_alt,
- rev_roffset,rev_goffset_far,pairpool,/*revp*/true,
- chroffset,chrhigh,cdna_direction,watsonp,*dynprogindex);
- }
+ if (use8p == true) {
+ if (bestc >= bestr) {
+ pairs = traceback_local_8_upper(NULL,&(*nmatches),&(*nmismatches),&(*nopens),&(*nindels),
+ directions8_upper_nogap,directions8_upper_Egap,&bestr,&bestc,/*endc*/contlength,
+ rev_rsequence,rev_rsequenceuc,
+ rev_gsequence,rev_gsequence_uc,rev_gsequence_alt,
+ rev_roffset,rev_goffset_far,pairpool,/*revp*/true,
+ chroffset,chrhigh,cdna_direction,watsonp,*dynprogindex);
+ } else {
+ pairs = traceback_local_8_lower(NULL,&(*nmatches),&(*nmismatches),&(*nopens),&(*nindels),
+ directions8_lower_nogap,directions8_lower_Egap,&bestr,&bestc,/*endc*/contlength,
+ rev_rsequence,rev_rsequenceuc,
+ rev_gsequence,rev_gsequence_uc,rev_gsequence_alt,
+ rev_roffset,rev_goffset_far,pairpool,/*revp*/true,
+ chroffset,chrhigh,cdna_direction,watsonp,*dynprogindex);
+ }
- pairs = Pairpool_push_gapholder(pairs,pairpool,/*queryjump*/0,/*genomejump*/rev_goffset_anchor - rev_goffset_far,
- /*leftpair*/NULL,/*rightpair*/NULL,/*knownp*/true);
+ pairs = Pairpool_push_gapholder(pairs,pairpool,/*queryjump*/0,/*genomejump*/rev_goffset_anchor - rev_goffset_far,
+ /*leftpair*/NULL,/*rightpair*/NULL,/*knownp*/true);
- if (bestc >= bestr) {
- pairs = traceback_local_8_upper(pairs,&(*nmatches),&(*nmismatches),&(*nopens),&(*nindels),
- directions8_upper_nogap,directions8_upper_Egap,&bestr,&bestc,/*endc*/0,
- rev_rsequence,rev_rsequenceuc,
- rev_gsequence,rev_gsequence_uc,rev_gsequence_alt,
- rev_roffset,rev_goffset_anchor,pairpool,/*revp*/true,
- chroffset,chrhigh,cdna_direction,watsonp,*dynprogindex);
- } else {
- pairs = traceback_local_8_lower(pairs,&(*nmatches),&(*nmismatches),&(*nopens),&(*nindels),
- directions8_lower_nogap,directions8_lower_Egap,&bestr,&bestc,/*endc*/0,
- rev_rsequence,rev_rsequenceuc,
- rev_gsequence,rev_gsequence_uc,rev_gsequence_alt,
- rev_roffset,rev_goffset_anchor,pairpool,/*revp*/true,
- chroffset,chrhigh,cdna_direction,watsonp,*dynprogindex);
- }
+ if (bestc >= bestr) {
+ pairs = traceback_local_8_upper(pairs,&(*nmatches),&(*nmismatches),&(*nopens),&(*nindels),
+ directions8_upper_nogap,directions8_upper_Egap,&bestr,&bestc,/*endc*/0,
+ rev_rsequence,rev_rsequenceuc,
+ rev_gsequence,rev_gsequence_uc,rev_gsequence_alt,
+ rev_roffset,rev_goffset_anchor,pairpool,/*revp*/true,
+ chroffset,chrhigh,cdna_direction,watsonp,*dynprogindex);
+ } else {
+ pairs = traceback_local_8_lower(pairs,&(*nmatches),&(*nmismatches),&(*nopens),&(*nindels),
+ directions8_lower_nogap,directions8_lower_Egap,&bestr,&bestc,/*endc*/0,
+ rev_rsequence,rev_rsequenceuc,
+ rev_gsequence,rev_gsequence_uc,rev_gsequence_alt,
+ rev_roffset,rev_goffset_anchor,pairpool,/*revp*/true,
+ chroffset,chrhigh,cdna_direction,watsonp,*dynprogindex);
+ }
- } else {
- if (bestc >= bestr) {
- pairs = traceback_local_16_upper(NULL,&(*nmatches),&(*nmismatches),&(*nopens),&(*nindels),
- directions16_upper_nogap,directions16_upper_Egap,&bestr,&bestc,/*endc*/contlength,
- rev_rsequence,rev_rsequenceuc,
- rev_gsequence,rev_gsequence_uc,rev_gsequence_alt,
- rev_roffset,rev_goffset_far,pairpool,/*revp*/true,
- chroffset,chrhigh,cdna_direction,watsonp,*dynprogindex);
} else {
- pairs = traceback_local_16_lower(NULL,&(*nmatches),&(*nmismatches),&(*nopens),&(*nindels),
- directions16_lower_nogap,directions16_lower_Egap,&bestr,&bestc,/*endc*/contlength,
- rev_rsequence,rev_rsequenceuc,
- rev_gsequence,rev_gsequence_uc,rev_gsequence_alt,
- rev_roffset,rev_goffset_far,pairpool,/*revp*/true,
- chroffset,chrhigh,cdna_direction,watsonp,*dynprogindex);
- }
+ if (bestc >= bestr) {
+ pairs = traceback_local_16_upper(NULL,&(*nmatches),&(*nmismatches),&(*nopens),&(*nindels),
+ directions16_upper_nogap,directions16_upper_Egap,&bestr,&bestc,/*endc*/contlength,
+ rev_rsequence,rev_rsequenceuc,
+ rev_gsequence,rev_gsequence_uc,rev_gsequence_alt,
+ rev_roffset,rev_goffset_far,pairpool,/*revp*/true,
+ chroffset,chrhigh,cdna_direction,watsonp,*dynprogindex);
+ } else {
+ pairs = traceback_local_16_lower(NULL,&(*nmatches),&(*nmismatches),&(*nopens),&(*nindels),
+ directions16_lower_nogap,directions16_lower_Egap,&bestr,&bestc,/*endc*/contlength,
+ rev_rsequence,rev_rsequenceuc,
+ rev_gsequence,rev_gsequence_uc,rev_gsequence_alt,
+ rev_roffset,rev_goffset_far,pairpool,/*revp*/true,
+ chroffset,chrhigh,cdna_direction,watsonp,*dynprogindex);
+ }
- pairs = Pairpool_push_gapholder(pairs,pairpool,/*queryjump*/0,/*genomejump*/rev_goffset_anchor - rev_goffset_far,
- /*leftpair*/NULL,/*rightpair*/NULL,/*knownp*/true);
+ pairs = Pairpool_push_gapholder(pairs,pairpool,/*queryjump*/0,/*genomejump*/rev_goffset_anchor - rev_goffset_far,
+ /*leftpair*/NULL,/*rightpair*/NULL,/*knownp*/true);
- if (bestc >= bestr) {
- pairs = traceback_local_16_upper(pairs,&(*nmatches),&(*nmismatches),&(*nopens),&(*nindels),
- directions16_upper_nogap,directions16_upper_Egap,&bestr,&bestc,/*endc*/0,
- rev_rsequence,rev_rsequenceuc,
- rev_gsequence,rev_gsequence_uc,rev_gsequence_alt,
- rev_roffset,rev_goffset_anchor,pairpool,/*revp*/true,
- chroffset,chrhigh,cdna_direction,watsonp,*dynprogindex);
- } else {
- pairs = traceback_local_16_lower(pairs,&(*nmatches),&(*nmismatches),&(*nopens),&(*nindels),
- directions16_lower_nogap,directions16_lower_Egap,&bestr,&bestc,/*endc*/0,
- rev_rsequence,rev_rsequenceuc,
- rev_gsequence,rev_gsequence_uc,rev_gsequence_alt,
- rev_roffset,rev_goffset_anchor,pairpool,/*revp*/true,
- chroffset,chrhigh,cdna_direction,watsonp,*dynprogindex);
+ if (bestc >= bestr) {
+ pairs = traceback_local_16_upper(pairs,&(*nmatches),&(*nmismatches),&(*nopens),&(*nindels),
+ directions16_upper_nogap,directions16_upper_Egap,&bestr,&bestc,/*endc*/0,
+ rev_rsequence,rev_rsequenceuc,
+ rev_gsequence,rev_gsequence_uc,rev_gsequence_alt,
+ rev_roffset,rev_goffset_anchor,pairpool,/*revp*/true,
+ chroffset,chrhigh,cdna_direction,watsonp,*dynprogindex);
+ } else {
+ pairs = traceback_local_16_lower(pairs,&(*nmatches),&(*nmismatches),&(*nopens),&(*nindels),
+ directions16_lower_nogap,directions16_lower_Egap,&bestr,&bestc,/*endc*/0,
+ rev_rsequence,rev_rsequenceuc,
+ rev_gsequence,rev_gsequence_uc,rev_gsequence_alt,
+ rev_roffset,rev_goffset_anchor,pairpool,/*revp*/true,
+ chroffset,chrhigh,cdna_direction,watsonp,*dynprogindex);
+ }
}
- }
#else
- pairs = traceback_local_std(NULL,&(*nmatches),&(*nmismatches),&(*nopens),&(*nindels),
- directions_nogap,directions_Egap,directions_Fgap,&bestr,&bestc,/*endc*/contlength,
- rev_rsequence,rev_rsequenceuc,
- rev_gsequence,rev_gsequence_uc,rev_gsequence_alt,
- rev_roffset,rev_goffset_far,pairpool,/*revp*/true,
- chroffset,chrhigh,cdna_direction,watsonp,*dynprogindex);
+ pairs = traceback_local_std(NULL,&(*nmatches),&(*nmismatches),&(*nopens),&(*nindels),
+ directions_nogap,directions_Egap,directions_Fgap,&bestr,&bestc,/*endc*/contlength,
+ rev_rsequence,rev_rsequenceuc,
+ rev_gsequence,rev_gsequence_uc,rev_gsequence_alt,
+ rev_roffset,rev_goffset_far,pairpool,/*revp*/true,
+ chroffset,chrhigh,cdna_direction,watsonp,*dynprogindex);
- pairs = Pairpool_push_gapholder(pairs,pairpool,/*queryjump*/0,/*genomejump*/rev_goffset_anchor - rev_goffset_far,
- /*leftpair*/NULL,/*rightpair*/NULL,/*knownp*/true);
+ pairs = Pairpool_push_gapholder(pairs,pairpool,/*queryjump*/0,/*genomejump*/rev_goffset_anchor - rev_goffset_far,
+ /*leftpair*/NULL,/*rightpair*/NULL,/*knownp*/true);
- pairs = traceback_local_std(pairs,&(*nmatches),&(*nmismatches),&(*nopens),&(*nindels),
- directions_nogap,directions_Egap,directions_Fgap,&bestr,&bestc,/*endc*/0,
- rev_rsequence,rev_rsequenceuc,
- rev_gsequence,rev_gsequence_uc,rev_gsequence_alt,
- rev_roffset,rev_goffset_anchor,pairpool,/*revp*/true,
- chroffset,chrhigh,cdna_direction,watsonp,*dynprogindex);
+ pairs = traceback_local_std(pairs,&(*nmatches),&(*nmismatches),&(*nopens),&(*nindels),
+ directions_nogap,directions_Egap,directions_Fgap,&bestr,&bestc,/*endc*/0,
+ rev_rsequence,rev_rsequenceuc,
+ rev_gsequence,rev_gsequence_uc,rev_gsequence_alt,
+ rev_roffset,rev_goffset_anchor,pairpool,/*revp*/true,
+ chroffset,chrhigh,cdna_direction,watsonp,*dynprogindex);
#endif
- /* Score compared with perfect score, so heavy weight on mismatches may not be necessary */
- *finalscore = (*nmatches)*FULLMATCH + (*nmismatches)*MISMATCH_ENDQ + (*nopens)*open + (*nindels)*extend;
- *missscore = (*finalscore) - rlength*FULLMATCH;
- debug6(printf("finalscore %d = %d*%d matches + %d*%d mismatches + %d*%d opens + %d*%d extends\n",
- *finalscore,FULLMATCH,*nmatches,MISMATCH_ENDQ,*nmismatches,open,*nopens,extend,*nindels));
- debug6(printf("missscore = %d\n",*missscore));
+ /* Score compared with perfect score, so heavy weight on mismatches may not be necessary */
+ *finalscore = (*nmatches)*FULLMATCH + (*nmismatches)*MISMATCH_ENDQ + (*nopens)*open + (*nindels)*extend;
+ *missscore = (*finalscore) - rlength*FULLMATCH;
+ debug6(printf("finalscore %d = %d*%d matches + %d*%d mismatches + %d*%d opens + %d*%d extends\n",
+ *finalscore,FULLMATCH,*nmatches,MISMATCH_ENDQ,*nmismatches,open,*nopens,extend,*nindels));
+ debug6(printf("missscore = %d\n",*missscore));
- /* Add 1 to count the match already in the alignment */
- pairs = List_reverse(pairs); /* Look at 5' end to remove excess gaps */
- while (pairs != NULL && (pair = List_head(pairs)) && pair->comp == INDEL_COMP) {
- pairs = List_next(pairs);
- }
+ /* Add 1 to count the match already in the alignment */
+ pairs = List_reverse(pairs); /* Look at 5' end to remove excess gaps */
+ while (pairs != NULL && (pair = List_head(pairs)) && pair->comp == INDEL_COMP) {
+ pairs = List_next(pairs);
+ }
- debug6(Pair_dump_list(pairs,true));
- debug6(printf("End of dynprog end5 gap splicejunction\n\n"));
+ debug6(Pair_dump_list(pairs,true));
+ debug6(printf("End of dynprog end5 gap splicejunction\n\n"));
- *dynprogindex += (*dynprogindex > 0 ? +1 : -1);
- return List_reverse(pairs);
+ *dynprogindex += (*dynprogindex > 0 ? +1 : -1);
+ return List_reverse(pairs);
+ }
}
@@ -2018,7 +1904,7 @@ Dynprog_end3_gap (int *dynprogindex, int *finalscore, int *nmatches, int *nmisma
#endif
mismatchtype,open,extend,
uband,jump_late_p,/*revp*/false);
- matrix16_lower = Dynprog_simd_16_upper(&directions16_lower_nogap,&directions16_lower_Egap,dynprog,
+ matrix16_lower = Dynprog_simd_16_lower(&directions16_lower_nogap,&directions16_lower_Egap,dynprog,
rsequenceuc,gsequence,gsequence_alt,rlength,glength,
#ifdef DEBUG14
goffset,chroffset,chrhigh,watsonp,
@@ -2338,105 +2224,111 @@ Dynprog_end3_splicejunction (int *dynprogindex, int *finalscore, int *missscore,
jump_late_p);
#endif
- *nmatches = *nmismatches = *nopens = *nindels = 0;
-#if defined(HAVE_SSE4_1) || defined(HAVE_SSE2)
- if (use8p == true) {
- if (bestc >= bestr) {
- pairs = traceback_local_8_upper(NULL,&(*nmatches),&(*nmismatches),&(*nopens),&(*nindels),
- directions8_upper_nogap,directions8_upper_Egap,&bestr,&bestc,/*endc*/contlength,
- rsequence,rsequenceuc,gsequence,gsequence_uc,gsequence_alt,
- roffset,goffset_far,pairpool,/*revp*/false,
- chroffset,chrhigh,cdna_direction,watsonp,*dynprogindex);
- } else {
- pairs = traceback_local_8_lower(NULL,&(*nmatches),&(*nmismatches),&(*nopens),&(*nindels),
- directions8_lower_nogap,directions8_lower_Egap,&bestr,&bestc,/*endc*/contlength,
- rsequence,rsequenceuc,gsequence,gsequence_uc,gsequence_alt,
- roffset,goffset_far,pairpool,/*revp*/false,
- chroffset,chrhigh,cdna_direction,watsonp,*dynprogindex);
- }
- pairs = Pairpool_push_gapholder(pairs,pairpool,/*queryjump*/0,/*genomejump*/goffset_far - goffset_anchor,
- /*leftpair*/NULL,/*rightpair*/NULL,/*knownp*/true);
- if (bestc >= bestr) {
- pairs = traceback_local_8_upper(pairs,&(*nmatches),&(*nmismatches),&(*nopens),&(*nindels),
- directions8_upper_nogap,directions8_upper_Egap,&bestr,&bestc,/*endc*/0,
- rsequence,rsequenceuc,gsequence,gsequence_uc,gsequence_alt,
- roffset,goffset_anchor,pairpool,/*revp*/false,
- chroffset,chrhigh,cdna_direction,watsonp,*dynprogindex);
- } else {
- pairs = traceback_local_8_lower(pairs,&(*nmatches),&(*nmismatches),&(*nopens),&(*nindels),
- directions8_lower_nogap,directions8_lower_Egap,&bestr,&bestc,/*endc*/0,
- rsequence,rsequenceuc,gsequence,gsequence_uc,gsequence_alt,
- roffset,goffset_anchor,pairpool,/*revp*/false,
- chroffset,chrhigh,cdna_direction,watsonp,*dynprogindex);
- }
+ if (*finalscore < 0) {
+ /* Need a reasonable alignment to call a splice */
+ return (List_T) NULL;
} else {
- if (bestc >= bestr) {
- pairs = traceback_local_16_upper(NULL,&(*nmatches),&(*nmismatches),&(*nopens),&(*nindels),
- directions16_upper_nogap,directions16_upper_Egap,&bestr,&bestc,/*endc*/contlength,
- rsequence,rsequenceuc,gsequence,gsequence_uc,gsequence_alt,
- roffset,goffset_far,pairpool,/*revp*/false,
- chroffset,chrhigh,cdna_direction,watsonp,*dynprogindex);
- } else {
- pairs = traceback_local_16_lower(NULL,&(*nmatches),&(*nmismatches),&(*nopens),&(*nindels),
- directions16_lower_nogap,directions16_lower_Egap,&bestr,&bestc,/*endc*/contlength,
- rsequence,rsequenceuc,gsequence,gsequence_uc,gsequence_alt,
- roffset,goffset_far,pairpool,/*revp*/false,
- chroffset,chrhigh,cdna_direction,watsonp,*dynprogindex);
- }
- pairs = Pairpool_push_gapholder(pairs,pairpool,/*queryjump*/0,/*genomejump*/goffset_far - goffset_anchor,
- /*leftpair*/NULL,/*rightpair*/NULL,/*knownp*/true);
- if (bestc >= bestr) {
- pairs = traceback_local_16_upper(pairs,&(*nmatches),&(*nmismatches),&(*nopens),&(*nindels),
- directions16_upper_nogap,directions16_upper_Egap,&bestr,&bestc,/*endc*/0,
- rsequence,rsequenceuc,gsequence,gsequence_uc,gsequence_alt,
- roffset,goffset_anchor,pairpool,/*revp*/false,
- chroffset,chrhigh,cdna_direction,watsonp,*dynprogindex);
+ *nmatches = *nmismatches = *nopens = *nindels = 0;
+#if defined(HAVE_SSE4_1) || defined(HAVE_SSE2)
+ if (use8p == true) {
+ if (bestc >= bestr) {
+ pairs = traceback_local_8_upper(NULL,&(*nmatches),&(*nmismatches),&(*nopens),&(*nindels),
+ directions8_upper_nogap,directions8_upper_Egap,&bestr,&bestc,/*endc*/contlength,
+ rsequence,rsequenceuc,gsequence,gsequence_uc,gsequence_alt,
+ roffset,goffset_far,pairpool,/*revp*/false,
+ chroffset,chrhigh,cdna_direction,watsonp,*dynprogindex);
+ } else {
+ pairs = traceback_local_8_lower(NULL,&(*nmatches),&(*nmismatches),&(*nopens),&(*nindels),
+ directions8_lower_nogap,directions8_lower_Egap,&bestr,&bestc,/*endc*/contlength,
+ rsequence,rsequenceuc,gsequence,gsequence_uc,gsequence_alt,
+ roffset,goffset_far,pairpool,/*revp*/false,
+ chroffset,chrhigh,cdna_direction,watsonp,*dynprogindex);
+ }
+ pairs = Pairpool_push_gapholder(pairs,pairpool,/*queryjump*/0,/*genomejump*/goffset_far - goffset_anchor,
+ /*leftpair*/NULL,/*rightpair*/NULL,/*knownp*/true);
+ if (bestc >= bestr) {
+ pairs = traceback_local_8_upper(pairs,&(*nmatches),&(*nmismatches),&(*nopens),&(*nindels),
+ directions8_upper_nogap,directions8_upper_Egap,&bestr,&bestc,/*endc*/0,
+ rsequence,rsequenceuc,gsequence,gsequence_uc,gsequence_alt,
+ roffset,goffset_anchor,pairpool,/*revp*/false,
+ chroffset,chrhigh,cdna_direction,watsonp,*dynprogindex);
+ } else {
+ pairs = traceback_local_8_lower(pairs,&(*nmatches),&(*nmismatches),&(*nopens),&(*nindels),
+ directions8_lower_nogap,directions8_lower_Egap,&bestr,&bestc,/*endc*/0,
+ rsequence,rsequenceuc,gsequence,gsequence_uc,gsequence_alt,
+ roffset,goffset_anchor,pairpool,/*revp*/false,
+ chroffset,chrhigh,cdna_direction,watsonp,*dynprogindex);
+ }
+
} else {
- pairs = traceback_local_16_lower(pairs,&(*nmatches),&(*nmismatches),&(*nopens),&(*nindels),
- directions16_lower_nogap,directions16_lower_Egap,&bestr,&bestc,/*endc*/0,
- rsequence,rsequenceuc,gsequence,gsequence_uc,gsequence_alt,
- roffset,goffset_anchor,pairpool,/*revp*/false,
- chroffset,chrhigh,cdna_direction,watsonp,*dynprogindex);
+ if (bestc >= bestr) {
+ pairs = traceback_local_16_upper(NULL,&(*nmatches),&(*nmismatches),&(*nopens),&(*nindels),
+ directions16_upper_nogap,directions16_upper_Egap,&bestr,&bestc,/*endc*/contlength,
+ rsequence,rsequenceuc,gsequence,gsequence_uc,gsequence_alt,
+ roffset,goffset_far,pairpool,/*revp*/false,
+ chroffset,chrhigh,cdna_direction,watsonp,*dynprogindex);
+ } else {
+ pairs = traceback_local_16_lower(NULL,&(*nmatches),&(*nmismatches),&(*nopens),&(*nindels),
+ directions16_lower_nogap,directions16_lower_Egap,&bestr,&bestc,/*endc*/contlength,
+ rsequence,rsequenceuc,gsequence,gsequence_uc,gsequence_alt,
+ roffset,goffset_far,pairpool,/*revp*/false,
+ chroffset,chrhigh,cdna_direction,watsonp,*dynprogindex);
+ }
+ pairs = Pairpool_push_gapholder(pairs,pairpool,/*queryjump*/0,/*genomejump*/goffset_far - goffset_anchor,
+ /*leftpair*/NULL,/*rightpair*/NULL,/*knownp*/true);
+ if (bestc >= bestr) {
+ pairs = traceback_local_16_upper(pairs,&(*nmatches),&(*nmismatches),&(*nopens),&(*nindels),
+ directions16_upper_nogap,directions16_upper_Egap,&bestr,&bestc,/*endc*/0,
+ rsequence,rsequenceuc,gsequence,gsequence_uc,gsequence_alt,
+ roffset,goffset_anchor,pairpool,/*revp*/false,
+ chroffset,chrhigh,cdna_direction,watsonp,*dynprogindex);
+ } else {
+ pairs = traceback_local_16_lower(pairs,&(*nmatches),&(*nmismatches),&(*nopens),&(*nindels),
+ directions16_lower_nogap,directions16_lower_Egap,&bestr,&bestc,/*endc*/0,
+ rsequence,rsequenceuc,gsequence,gsequence_uc,gsequence_alt,
+ roffset,goffset_anchor,pairpool,/*revp*/false,
+ chroffset,chrhigh,cdna_direction,watsonp,*dynprogindex);
+ }
}
- }
#else
- /* Non-SIMD methods */
- pairs = traceback_local_std(NULL,&(*nmatches),&(*nmismatches),&(*nopens),&(*nindels),
- directions_nogap,directions_Egap,directions_Fgap,&bestr,&bestc,/*endc*/contlength,
- rsequence,rsequenceuc,gsequence,gsequence_uc,gsequence_alt,
- roffset,goffset_far,pairpool,/*revp*/false,
- chroffset,chrhigh,cdna_direction,watsonp,*dynprogindex);
+ /* Non-SIMD methods */
+ pairs = traceback_local_std(NULL,&(*nmatches),&(*nmismatches),&(*nopens),&(*nindels),
+ directions_nogap,directions_Egap,directions_Fgap,&bestr,&bestc,/*endc*/contlength,
+ rsequence,rsequenceuc,gsequence,gsequence_uc,gsequence_alt,
+ roffset,goffset_far,pairpool,/*revp*/false,
+ chroffset,chrhigh,cdna_direction,watsonp,*dynprogindex);
- pairs = Pairpool_push_gapholder(pairs,pairpool,/*queryjump*/0,/*genomejump*/goffset_far - goffset_anchor,
- /*leftpair*/NULL,/*rightpair*/NULL,/*knownp*/true);
+ pairs = Pairpool_push_gapholder(pairs,pairpool,/*queryjump*/0,/*genomejump*/goffset_far - goffset_anchor,
+ /*leftpair*/NULL,/*rightpair*/NULL,/*knownp*/true);
- pairs = traceback_local_std(pairs,&(*nmatches),&(*nmismatches),&(*nopens),&(*nindels),
- directions_nogap,directions_Egap,directions_Fgap,&bestr,&bestc,/*endc*/0,
- rsequence,rsequenceuc,gsequence,gsequence_uc,gsequence_alt,
- roffset,goffset_anchor,pairpool,/*revp*/false,
- chroffset,chrhigh,cdna_direction,watsonp,*dynprogindex);
+ pairs = traceback_local_std(pairs,&(*nmatches),&(*nmismatches),&(*nopens),&(*nindels),
+ directions_nogap,directions_Egap,directions_Fgap,&bestr,&bestc,/*endc*/0,
+ rsequence,rsequenceuc,gsequence,gsequence_uc,gsequence_alt,
+ roffset,goffset_anchor,pairpool,/*revp*/false,
+ chroffset,chrhigh,cdna_direction,watsonp,*dynprogindex);
#endif
- /* Score compared with perfect score, so heavy weight on mismatches may not be necessary */
- *finalscore = (*nmatches)*FULLMATCH + (*nmismatches)*MISMATCH_ENDQ + (*nopens)*open + (*nindels)*extend;
- *missscore = (*finalscore) - rlength*FULLMATCH;
- debug6(printf("finalscore %d = %d*%d matches + %d*%d mismatches + %d*%d opens + %d*%d extends\n",
- *finalscore,FULLMATCH,*nmatches,MISMATCH_ENDQ,*nmismatches,open,*nopens,extend,*nindels));
- debug6(printf("missscore = %d\n",*missscore));
+ /* Score compared with perfect score, so heavy weight on mismatches may not be necessary */
+ *finalscore = (*nmatches)*FULLMATCH + (*nmismatches)*MISMATCH_ENDQ + (*nopens)*open + (*nindels)*extend;
+ *missscore = (*finalscore) - rlength*FULLMATCH;
+ debug6(printf("finalscore %d = %d*%d matches + %d*%d mismatches + %d*%d opens + %d*%d extends\n",
+ *finalscore,FULLMATCH,*nmatches,MISMATCH_ENDQ,*nmismatches,open,*nopens,extend,*nindels));
+ debug6(printf("missscore = %d\n",*missscore));
- /* Add 1 to count the match already in the alignment */
- pairs = List_reverse(pairs); /* Look at 3' end to remove excess gaps */
- while (pairs != NULL && (pair = List_head(pairs)) && pair->comp == INDEL_COMP) {
- pairs = List_next(pairs);
- }
+ /* Add 1 to count the match already in the alignment */
+ pairs = List_reverse(pairs); /* Look at 3' end to remove excess gaps */
+ while (pairs != NULL && (pair = List_head(pairs)) && pair->comp == INDEL_COMP) {
+ pairs = List_next(pairs);
+ }
- debug6(Pair_dump_list(pairs,true));
- debug6(printf("End of dynprog end3 gap splicejunction\n\n"));
+ debug6(Pair_dump_list(pairs,true));
+ debug6(printf("End of dynprog end3 gap splicejunction\n\n"));
- *dynprogindex += (*dynprogindex > 0 ? +1 : -1);
- return pairs; /* not List_reverse(pairs) */
+ *dynprogindex += (*dynprogindex > 0 ? +1 : -1);
+ return pairs; /* not List_reverse(pairs) */
+ }
}
diff --git a/src/dynprog_simd.c b/src/dynprog_simd.c
index 3ca7304..0cca861 100644
--- a/src/dynprog_simd.c
+++ b/src/dynprog_simd.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: dynprog_simd.c 138119 2014-06-04 20:29:09Z twu $";
+static char rcsid[] = "$Id: dynprog_simd.c 140650 2014-07-04 01:15:48Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -4186,12 +4186,14 @@ Dynprog_traceback_8 (List_T pairs, int *nmatches, int *nmismatches, int *nopens,
while (c > 0 && directions_Egap[c--][r] != DIAG) {
dist++;
}
+#if 0
if (c == 0) {
/* Directions in column 0 can sometimes be DIAG */
dir = VERT;
} else {
dir = directions_nogap[c][r];
}
+#endif
debug(printf("H%d: ",dist));
pairs = Pairpool_add_genomeskip(&add_dashes_p,pairs,r,c+dist,dist,
@@ -4209,12 +4211,14 @@ Dynprog_traceback_8 (List_T pairs, int *nmatches, int *nmismatches, int *nopens,
while (r > 0 && directions_Fgap[c][r--] != DIAG) {
dist++;
}
+#if 0
if (r == 0) {
/* Directions in row 0 can sometimes be DIAG */
dir = HORIZ;
} else {
dir = directions_nogap[c][r];
}
+#endif
debug(printf("V%d: ",dist));
pairs = Pairpool_add_queryskip(pairs,r+dist,c,dist,rsequence,
@@ -4223,9 +4227,8 @@ Dynprog_traceback_8 (List_T pairs, int *nmatches, int *nmismatches, int *nopens,
*nopens += 1;
*nindels += dist;
debug(printf("\n"));
- }
- if (dir == DIAG) {
+ } else {
querycoord = r-1;
genomecoord = c-1;
if (revp == true) {
@@ -4339,8 +4342,7 @@ Dynprog_traceback_8_upper (List_T pairs, int *nmatches, int *nmismatches, int *n
while (/* c > 0 && */ directions_Egap[c--][r] != DIAG) {
dist++;
}
- assert(c != 0);
- dir = directions_nogap[c][r];
+ /* assert(c != 0); */
debug(printf("H%d: ",dist));
pairs = Pairpool_add_genomeskip(&add_dashes_p,pairs,r,c+dist,dist,
@@ -4352,9 +4354,8 @@ Dynprog_traceback_8_upper (List_T pairs, int *nmatches, int *nmismatches, int *n
*nindels += dist;
}
debug(printf("\n"));
- }
- if (dir == DIAG) {
+ } else {
querycoord = r-1;
genomecoord = c-1;
if (revp == true) {
@@ -4457,8 +4458,7 @@ Dynprog_traceback_8_lower (List_T pairs, int *nmatches, int *nmismatches, int *n
while (/* r > 0 && */ directions_Egap[r--][c] != DIAG) {
dist++;
}
- assert(r != 0);
- dir = directions_nogap[r][c];
+ /* assert(r != 0); */
debug(printf("V%d: ",dist));
pairs = Pairpool_add_queryskip(pairs,r+dist,c,dist,rsequence,
@@ -4467,9 +4467,8 @@ Dynprog_traceback_8_lower (List_T pairs, int *nmatches, int *nmismatches, int *n
*nopens += 1;
*nindels += dist;
debug(printf("\n"));
- }
- if (dir == DIAG) {
+ } else {
querycoord = r-1;
genomecoord = c-1;
if (revp == true) {
@@ -4568,12 +4567,14 @@ Dynprog_traceback_16 (List_T pairs, int *nmatches, int *nmismatches, int *nopens
while (c > 0 && directions_Egap[c--][r] != DIAG) {
dist++;
}
+#if 0
if (c == 0) {
/* Directions in column 0 can sometimes be DIAG */
dir = VERT;
} else {
dir = directions_nogap[c][r];
}
+#endif
debug(printf("H%d: ",dist));
pairs = Pairpool_add_genomeskip(&add_dashes_p,pairs,r,c+dist,dist,
@@ -4591,12 +4592,14 @@ Dynprog_traceback_16 (List_T pairs, int *nmatches, int *nmismatches, int *nopens
while (r > 0 && directions_Fgap[c][r--] != DIAG) {
dist++;
}
+#if 0
if (r == 0) {
/* Directions in row 0 can sometimes be DIAG */
dir = HORIZ;
} else {
dir = directions_nogap[c][r];
}
+#endif
debug(printf("V%d: ",dist));
debug(printf("New dir at %d,%d is %d\n",c,r,dir));
@@ -4606,9 +4609,8 @@ Dynprog_traceback_16 (List_T pairs, int *nmatches, int *nmismatches, int *nopens
*nopens += 1;
*nindels += dist;
debug(printf("\n"));
- }
- if (dir == DIAG) {
+ } else {
querycoord = r-1;
genomecoord = c-1;
if (revp == true) {
@@ -4720,8 +4722,7 @@ Dynprog_traceback_16_upper (List_T pairs, int *nmatches, int *nmismatches, int *
while (/* c > 0 && */ directions_Egap[c--][r] != DIAG) {
dist++;
}
- assert(c != 0);
- dir = directions_nogap[c][r];
+ /* assert(c != 0); */
debug(printf("H%d: ",dist));
pairs = Pairpool_add_genomeskip(&add_dashes_p,pairs,r,c+dist,dist,
@@ -4733,9 +4734,8 @@ Dynprog_traceback_16_upper (List_T pairs, int *nmatches, int *nmismatches, int *
*nindels += dist;
}
debug(printf("\n"));
- }
- if (dir == DIAG) {
+ } else {
querycoord = r-1;
genomecoord = c-1;
if (revp == true) {
@@ -4838,8 +4838,7 @@ Dynprog_traceback_16_lower (List_T pairs, int *nmatches, int *nmismatches, int *
while (/* r > 0 && */ directions_Egap[r--][c] != DIAG) {
dist++;
}
- assert(r != 0);
- dir = directions_nogap[r][c];
+ /* assert(r != 0); */
debug(printf("V%d: ",dist));
pairs = Pairpool_add_queryskip(pairs,r+dist,c,dist,rsequence,
@@ -4848,9 +4847,8 @@ Dynprog_traceback_16_lower (List_T pairs, int *nmatches, int *nmismatches, int *
*nopens += 1;
*nindels += dist;
debug(printf("\n"));
- }
- if (dir == DIAG) {
+ } else {
querycoord = r-1;
genomecoord = c-1;
if (revp == true) {
diff --git a/src/dynprog_simd.h b/src/dynprog_simd.h
index 56948cf..f2e314b 100644
--- a/src/dynprog_simd.h
+++ b/src/dynprog_simd.h
@@ -3,7 +3,7 @@
#include "dynprog.h"
-#define SIMD_MAXLENGTH_EPI8 40 /* 128/3 */
+#define SIMD_MAXLENGTH_EPI8 30 /* Previously had 40 = 128/3, but have seen 7-bit overflow empirically at matrices of size 30 */
#define T Dynprog_T
diff --git a/src/dynprog_single.c b/src/dynprog_single.c
index 154e473..50f982e 100644
--- a/src/dynprog_single.c
+++ b/src/dynprog_single.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: dynprog_single.c 137609 2014-05-30 01:06:40Z twu $";
+static char rcsid[] = "$Id: dynprog_single.c 140230 2014-06-30 21:31:58Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -45,6 +45,13 @@ static char rcsid[] = "$Id: dynprog_single.c 137609 2014-05-30 01:06:40Z twu $";
#define debug(x)
#endif
+/* Microexon search */
+#ifdef DEBUG1
+#define debug1(x) x
+#else
+#define debug1(x)
+#endif
+
/* Getting genomic nt */
#ifdef DEBUG8
#define debug8(x) x
@@ -849,7 +856,7 @@ Dynprog_microexon_int (double *bestprob2, double *bestprob3, int *dynprogindex,
int bestcL = -1, bestcR = -1, best_middlelength;
int middlelength, cL, cR, mincR, maxcR, leftbound, rightbound, textleft, textright,
best_candidate, candidate, i;
- int min_microexon_length, span, nmismatches;
+ int span, nmismatches;
char left1, left2, right2, right1, left1_alt, left2_alt, right2_alt, right1_alt;
char c, c_alt;
char c1_alt, c2_alt, c3_alt, c4_alt;
@@ -899,16 +906,16 @@ Dynprog_microexon_int (double *bestprob2, double *bestprob3, int *dynprogindex,
#endif
#ifdef EXTRACT_GENOMICSEG
- debug(printf("Begin microexon search for %.*s and %.*s\n",
+ debug1(printf("Begin microexon search for %.*s and %.*s\n",
glengthL,gsequenceL,glengthR,&(rev_gsequenceR[-glengthR+1])));
#else
- debug(printf("Begin microexon search\n"));
+ debug1(printf("Begin microexon search\n"));
#endif
- debug(printf(" Query sequence is %.*s\n",rlength,rsequence));
+ debug1(printf(" Query sequence is %.*s\n",rlength,rsequence));
span = rev_goffsetR-goffsetL;
- debug(printf(" Genomic span is of length %d\n",span));
+ debug1(printf(" Genomic span is of length %d\n",span));
#if 0
if (span <= 0) {
@@ -918,27 +925,27 @@ Dynprog_microexon_int (double *bestprob2, double *bestprob3, int *dynprogindex,
min_microexon_length = ceilf(-fasterlog(1.0-powf(1.0-pvalue,1.0/(float) span)) / /*log(4)*/1.386294);
}
min_microexon_length -= 8; /* Two donor-acceptor pairs */
- debug(printf(" Min microexon length is %d\n",min_microexon_length));
+ debug1(printf(" Min microexon length is %d\n",min_microexon_length));
if (min_microexon_length > MAX_MICROEXON_LENGTH) {
*microintrontype = NONINTRON;
return NULL;
} else if (min_microexon_length < MIN_MICROEXON_LENGTH) {
min_microexon_length = MIN_MICROEXON_LENGTH;
}
-#else
- min_microexon_length = 8;
+#elif 0
+ min_microexon_length = 6;
#endif
- debug(printf("\nFinding starting boundary on left\n"));
+ debug1(printf("\nFinding starting boundary on left\n"));
leftbound = 0;
nmismatches = 0;
while (leftbound < rlength - 1 && nmismatches <= 1) {
- debug(printf(" leftbound = %d, nmismatches = %d.",leftbound,nmismatches));
+ debug1(printf(" leftbound = %d, nmismatches = %d.",leftbound,nmismatches));
c = get_genomic_nt(&c_alt,goffsetL+leftbound,chroffset,chrhigh,watsonp);
#ifdef EXTRACT_GENOMICSEG
assert(c == gsequence_ucL[leftbound]);
#endif
- debug(printf(" Comparing %c with %c\n",rsequence[leftbound],c));
+ debug1(printf(" Comparing %c with %c\n",rsequence[leftbound],c));
#ifdef PMAP
if (matchtable[rsequence[leftbound]-'A'][c-'A'] == false) {
nmismatches++;
@@ -952,17 +959,17 @@ Dynprog_microexon_int (double *bestprob2, double *bestprob3, int *dynprogindex,
}
leftbound--; /* This is where the leftmost mismatch occurred */
- debug(printf("\nFinding starting boundary on right\n"));
+ debug1(printf("\nFinding starting boundary on right\n"));
rightbound = 0;
i = rlength-1;
nmismatches = 0;
while (i >= 0 && nmismatches <= 1) {
- debug(printf(" rightbound = %d, nmismatches = %d.",rightbound,nmismatches));
+ debug1(printf(" rightbound = %d, nmismatches = %d.",rightbound,nmismatches));
c = get_genomic_nt(&c_alt,rev_goffsetR-rightbound,chroffset,chrhigh,watsonp);
#ifdef EXTRACT_GENOMICSEG
assert(c == rev_gsequence_ucR[-rightbound]);
#endif
- debug(printf(" Comparing %c with %c\n",rsequence[i],c));
+ debug1(printf(" Comparing %c with %c\n",rsequence[i],c));
#ifdef PMAP
if (matchtable[rsequence[i]-'A'][c-'A'] == false) {
nmismatches++;
@@ -977,7 +984,7 @@ Dynprog_microexon_int (double *bestprob2, double *bestprob3, int *dynprogindex,
}
rightbound--; /* This is where the rightmost mismatch occurred */
- debug(printf(" Left must start before %d from left end of query. Right must start after %d from right end of query\n",
+ debug1(printf(" Left must start before %d from left end of query. Right must start after %d from right end of query\n",
leftbound,rightbound));
/* We require that cL >= 1 and cR >= 1 so that lengthL and lengthR are >= 1 */
@@ -989,17 +996,21 @@ Dynprog_microexon_int (double *bestprob2, double *bestprob3, int *dynprogindex,
assert(left2 == gsequence_ucL[cL+1]);
#endif
- debug(printf(" %d: %c%c\n",cL,left1,left2));
+ debug1(printf(" %d: %c%c\n",cL,left1,left2));
if (left1 == intron1 && left2 == intron2) {
mincR = rlength - MAX_MICROEXON_LENGTH - cL;
+ debug1(printf(" mincR %d = rlength %d - MAX_MICROEXON_LENGTH %d - cL %d\n",
+ mincR,rlength,MAX_MICROEXON_LENGTH,cL));
if (mincR < 1) {
mincR = 1;
}
- maxcR = rlength - min_microexon_length - cL;
+ maxcR = rlength - MIN_MICROEXON_LENGTH - cL;
+ debug1(printf(" maxcR %d = rlength %d - MIN_MICROEXON_LENGTH %d - cL %d\n",
+ maxcR,rlength,MIN_MICROEXON_LENGTH,cL));
if (maxcR > rightbound) {
maxcR = rightbound;
- }
- debug(printf(" Found left GT at %d. Scanning from %d - cL - (1-7), or %d to %d\n",
+ }
+ debug1(printf(" Found left GT at %d. Scanning from %d - cL - (1-7), or %d to %d\n",
cL,rlength,mincR,maxcR));
for (cR = mincR; cR <= maxcR; cR++) {
right2 = get_genomic_nt(&right2_alt,rev_goffsetR-cR-1,chroffset,chrhigh,watsonp);
@@ -1008,10 +1019,10 @@ Dynprog_microexon_int (double *bestprob2, double *bestprob3, int *dynprogindex,
assert(right2 == rev_gsequence_ucR[-cR-1]);
assert(right1 == rev_gsequence_ucR[-cR]);
#endif
- debug(printf(" Checking %d: %c%c\n",cR,right2,right1));
+ debug1(printf(" Checking %d: %c%c\n",cR,right2,right1));
if (right2 == intron3 && right1 == intron4) {
middlelength = rlength - cL - cR;
- debug(printf(" Found pair at %d to %d, length %d. Middle sequence is %.*s\n",
+ debug1(printf(" Found pair at %d to %d, length %d. Middle sequence is %.*s\n",
cL,cR,middlelength,middlelength,&(rsequence[cL])));
textleft = goffsetL + cL + MICROINTRON_LENGTH;
@@ -1037,7 +1048,7 @@ Dynprog_microexon_int (double *bestprob2, double *bestprob3, int *dynprogindex,
/*genomicuc[candidate - 1]*/ get_genomic_nt(&c4_alt,candidate-1,chroffset,chrhigh,watsonp) == intron4 &&
/*genomicuc[candidate + middlelength]*/ get_genomic_nt(&c1_alt,candidate+middlelength,chroffset,chrhigh,watsonp) == intron1 &&
/*genomicuc[candidate + middlelength + 1]*/ get_genomic_nt(&c2_alt,candidate+middlelength+1,chroffset,chrhigh,watsonp) == intron2) {
- debug(printf(" Successful microexon at %d >>> %d..%d >>> %d\n",goffsetL+cL,candidate,candidate+middlelength,rev_goffsetR-cR));
+ debug1(printf(" Successful microexon at %d >>> %d..%d >>> %d\n",goffsetL+cL,candidate,candidate+middlelength,rev_goffsetR-cR));
/* Not handling known splice sites yet */
if (watsonp == true) {
@@ -1066,7 +1077,7 @@ Dynprog_microexon_int (double *bestprob2, double *bestprob3, int *dynprogindex,
}
}
- debug(printf("microexon probabilities: prob2 = %f, prob3 = %f\n",prob2,prob3));
+ debug1(printf("microexon probabilities: prob2 = %f, prob3 = %f\n",prob2,prob3));
if (prob2 + prob3 > bestprob) {
bestcL = cL;
bestcR = cR;
@@ -1087,13 +1098,13 @@ Dynprog_microexon_int (double *bestprob2, double *bestprob3, int *dynprogindex,
}
if (bestcL < 0 || bestcR < 0) {
- debug(printf("End of dynprog microexon int\n"));
+ debug1(printf("End of dynprog microexon int\n"));
*microintrontype = NONINTRON;
return NULL;
} else {
- debug(printf("Making microexon pairs with candidate %u\n",best_candidate));
+ debug1(printf("Making microexon pairs with candidate %u\n",best_candidate));
pairs = make_microexon_pairs_double(roffset,/*roffsetM*/roffset+bestcL,/*roffsetR*/roffset+bestcL+best_middlelength,
goffsetL,/*candidate*/best_candidate,/*goffsetR*/rev_goffsetR-bestcR+1,
/*lengthL*/bestcL,/*lengthM*/best_middlelength,/*lengthR*/bestcR,
diff --git a/src/genome.c b/src/genome.c
index 9b5b1d9..d9c5484 100644
--- a/src/genome.c
+++ b/src/genome.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: genome.c 136793 2014-05-21 18:08:15Z twu $";
+static char rcsid[] = "$Id: genome.c 140510 2014-07-03 01:48:26Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -8709,7 +8709,7 @@ uncompress_fileio (char *gbuffer1, T this, Univcoord_T startpos,
}
static void
-ntcounts_fileio (int *na, int *nc, int *ng, int *nt,
+ntcounts_fileio (Univcoord_T *na, Univcoord_T *nc, Univcoord_T *ng, Univcoord_T *nt,
T this, Univcoord_T startpos, Univcoord_T endpos,
const char defaultchars[], const char flagchars[]) {
/* Chrpos_T length = endpos - startpos; */
@@ -9451,7 +9451,7 @@ uncompress_one_char_ignore_flags (Genomecomp_T *blocks, Univcoord_T pos) {
static void
-Genome_ntcounts_mmap (int *na, int *nc, int *ng, int *nt, Genomecomp_T *blocks,
+Genome_ntcounts_mmap (Univcoord_T *na, Univcoord_T *nc, Univcoord_T *ng, Univcoord_T *nt, Genomecomp_T *blocks,
Univcoord_T startpos, Univcoord_T endpos, const char defaultchars[],
const char flagchars[]) {
/* Chrpos_T length = endpos - startpos; */
@@ -11071,11 +11071,11 @@ Genome_get_segment_snp (T this, Univcoord_T left, Chrpos_T length, Univ_IIT_T ch
}
-int
-Genome_ntcounts (int *na, int *nc, int *ng, int *nt,
- T this, Univcoord_T left, Chrpos_T length) {
+Univcoord_T
+Genome_ntcounts (Univcoord_T *na, Univcoord_T *nc, Univcoord_T *ng, Univcoord_T *nt,
+ T this, Univcoord_T left, Univcoord_T length) {
char *gbuffer, *p;
- unsigned int i;
+ Univcoord_T i;
*na = *nc = *ng = *nt = 0;
diff --git a/src/genome.h b/src/genome.h
index 22103c0..619cc33 100644
--- a/src/genome.h
+++ b/src/genome.h
@@ -1,4 +1,4 @@
-/* $Id: genome.h 133760 2014-04-20 05:16:56Z twu $ */
+/* $Id: genome.h 140510 2014-07-03 01:48:26Z twu $ */
#ifndef GENOME_INCLUDED
#define GENOME_INCLUDED
@@ -81,9 +81,9 @@ Genome_get_segment_snp (T this, Univcoord_T left, Chrpos_T length, Univ_IIT_T ch
bool revcomp);
extern int
Genome_next_char (T this);
-extern int
-Genome_ntcounts (int *na, int *nc, int *ng, int *nt,
- T this, Univcoord_T left, Chrpos_T length);
+extern Univcoord_T
+Genome_ntcounts (Univcoord_T *na, Univcoord_T *nc, Univcoord_T *ng, Univcoord_T *nt,
+ T this, Univcoord_T left, Univcoord_T length);
#undef T
#endif
diff --git a/src/genome128_hr.c b/src/genome128_hr.c
index 86c5ea7..bbbbbd0 100644
--- a/src/genome128_hr.c
+++ b/src/genome128_hr.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: genome128_hr.c 137999 2014-06-04 02:01:04Z twu $";
+static char rcsid[] = "$Id: genome128_hr.c 140368 2014-07-02 00:56:33Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -16802,7 +16802,7 @@ typedef UINT4 Genomediff_T;
static UINT4
block_diff_standard_32 (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
- bool plusp, int genestrand, bool first_read_p, bool query_unk_mismatch_local_p) {
+ bool plusp, int genestrand, bool query_unk_mismatch_local_p) {
UINT4 diff;
debug(printf("Comparing high: query %08X with genome %08X ",query_shifted[0],ref_ptr[0]));
@@ -16862,7 +16862,7 @@ block_diff_standard_32 (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
static Genomediff_T
block_diff_standard (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
- bool plusp, int genestrand, bool first_read_p, bool query_unk_mismatch_local_p) {
+ bool plusp, int genestrand, bool query_unk_mismatch_local_p) {
#ifdef HAVE_SSE2
__m128i _diff, _query_high, _query_low, _query_flags, _ref_high, _ref_low, _ref_flags;
@@ -16938,7 +16938,7 @@ block_diff_standard (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
static UINT4
block_diff_standard_wildcard_32 (Genomecomp_T *query_shifted, Genomecomp_T *snp_ptr, Genomecomp_T *ref_ptr,
- bool plusp, int genestrand, bool first_read_p, bool query_unk_mismatch_local_p) {
+ bool plusp, int genestrand, bool query_unk_mismatch_local_p) {
UINT4 diff, non_wildcard;
/* Taken from block_diff_standard */
@@ -17028,7 +17028,7 @@ block_diff_standard_wildcard_32 (Genomecomp_T *query_shifted, Genomecomp_T *snp_
/* snp_ptr here is alt_ptr */
static Genomediff_T
block_diff_standard_wildcard (Genomecomp_T *query_shifted, Genomecomp_T *snp_ptr, Genomecomp_T *ref_ptr,
- bool plusp, int genestrand, bool first_read_p, bool query_unk_mismatch_local_p) {
+ bool plusp, int genestrand, bool query_unk_mismatch_local_p) {
#ifdef HAVE_SSE2
__m128i _diff, _wildcard, _query_high, _query_low, _query_flags,
_ref_high, _ref_low, _ref_flags, _snp_high, _snp_low, _snp_flags;
@@ -17150,6 +17150,7 @@ block_diff_metct_32 (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
UINT4 diff;
if (sarrayp == true) {
+ /* Convert everything to 3-nucleotide space */
diff = 0U;
} else {
/* Mark genome-T to query-C mismatches */
@@ -17230,7 +17231,7 @@ block_diff_metct (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
_ref_low = _mm_load_si128((__m128i *) &(ref_ptr[4]));
if (sarrayp == true) {
- /* Ignore genome-T to query-C mismatches */
+ /* Ignore genome-T to query-C mismatches. Convert everything to 3-nucleotide space */
_diff = _mm_setzero_si128();
} else {
/* Mark genome-T to query-C mismatches */
@@ -17261,6 +17262,7 @@ block_diff_metct (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
UINT4 diff;
if (sarrayp == true) {
+ /* Convert everything to 3-nucleotide space */
diff = 0U;
} else {
/* Mark genome-T to query-C mismatches */
@@ -17320,7 +17322,7 @@ block_diff_metga_32 (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
UINT4 diff;
if (sarrayp == true) {
- /* Ignore genome-A to query-G mismatches */
+ /* Ignore genome-A to query-G mismatches. Convert everything to 3-nucleotide space. */
diff = 0U;
} else {
/* Mark genome-A to query-G mismatches */
@@ -17401,7 +17403,7 @@ block_diff_metga (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
_ref_low = _mm_load_si128((__m128i *) &(ref_ptr[4]));
if (sarrayp == true) {
- /* Ignore genome-A to query-G mismatches */
+ /* Ignore genome-A to query-G mismatches. Convert everything to 3-nucleotide space. */
_diff = _mm_setzero_si128();
} else {
/* Mark genome-A to query-G mismatches */
@@ -17434,7 +17436,7 @@ block_diff_metga (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
UINT4 diff;
if (sarrayp == true) {
- /* Ignore genome-A to query-G mismatches */
+ /* Ignore genome-A to query-G mismatches. Convert everything to 3-nucleotide space. */
diff = 0U;
} else {
/* Mark genome-A to query-G mismatches */
@@ -17489,15 +17491,15 @@ block_diff_metga (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
static UINT4
block_diff_cmet_32 (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
- bool plusp, int genestrand, bool first_read_p, bool query_unk_mismatch_local_p) {
+ bool plusp, int genestrand, bool query_unk_mismatch_local_p) {
if (genestrand == +2) {
- if (plusp != first_read_p) {
- return block_diff_metct_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
- } else {
+ if (plusp) {
return block_diff_metga_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
+ } else {
+ return block_diff_metct_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
}
} else {
- if (plusp == first_read_p) {
+ if (plusp) {
return block_diff_metct_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
} else {
return block_diff_metga_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
@@ -17507,15 +17509,15 @@ block_diff_cmet_32 (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
static Genomediff_T
block_diff_cmet (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
- bool plusp, int genestrand, bool first_read_p, bool query_unk_mismatch_local_p) {
+ bool plusp, int genestrand, bool query_unk_mismatch_local_p) {
if (genestrand == +2) {
- if (plusp != first_read_p) {
- return block_diff_metct(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
- } else {
+ if (plusp) {
return block_diff_metga(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
+ } else {
+ return block_diff_metct(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
}
} else {
- if (plusp == first_read_p) {
+ if (plusp) {
return block_diff_metct(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
} else {
return block_diff_metga(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
@@ -17525,16 +17527,15 @@ block_diff_cmet (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
static UINT4
block_diff_cmet_sarray_32 (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
- bool plusp, int genestrand, bool first_read_p, bool query_unk_mismatch_local_p) {
+ bool plusp, int genestrand, bool query_unk_mismatch_local_p) {
if (genestrand == +2) {
- if (plusp != first_read_p) {
- return block_diff_metct_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true);
- } else {
+ if (plusp) {
return block_diff_metga_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true);
+ } else {
+ return block_diff_metct_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true);
}
-
} else {
- if (plusp == first_read_p) {
+ if (plusp) {
return block_diff_metct_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true);
} else {
return block_diff_metga_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true);
@@ -17544,16 +17545,15 @@ block_diff_cmet_sarray_32 (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
static Genomediff_T
block_diff_cmet_sarray (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
- bool plusp, int genestrand, bool first_read_p, bool query_unk_mismatch_local_p) {
+ bool plusp, int genestrand, bool query_unk_mismatch_local_p) {
if (genestrand == +2) {
- if (plusp != first_read_p) {
- return block_diff_metct(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true);
- } else {
+ if (plusp) {
return block_diff_metga(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true);
+ } else {
+ return block_diff_metct(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true);
}
-
} else {
- if (plusp == first_read_p) {
+ if (plusp) {
return block_diff_metct(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true);
} else {
return block_diff_metga(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true);
@@ -17564,15 +17564,15 @@ block_diff_cmet_sarray (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
/* Ignores snp_ptr */
static UINT4
block_diff_cmet_snp_32 (Genomecomp_T *query_shifted, Genomecomp_T *snp_ptr, Genomecomp_T *ref_ptr,
- bool plusp, int genestrand, bool first_read_p, bool query_unk_mismatch_local_p) {
+ bool plusp, int genestrand, bool query_unk_mismatch_local_p) {
if (genestrand == +2) {
- if (plusp != first_read_p) {
- return block_diff_metct_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
- } else {
+ if (plusp) {
return block_diff_metga_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
+ } else {
+ return block_diff_metct_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
}
} else {
- if (plusp == first_read_p) {
+ if (plusp) {
return block_diff_metct_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
} else {
return block_diff_metga_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
@@ -17580,18 +17580,19 @@ block_diff_cmet_snp_32 (Genomecomp_T *query_shifted, Genomecomp_T *snp_ptr, Geno
}
}
+
/* Ignores snp_ptr */
static Genomediff_T
block_diff_cmet_snp (Genomecomp_T *query_shifted, Genomecomp_T *snp_ptr, Genomecomp_T *ref_ptr,
- bool plusp, int genestrand, bool first_read_p, bool query_unk_mismatch_local_p) {
+ bool plusp, int genestrand, bool query_unk_mismatch_local_p) {
if (genestrand == +2) {
- if (plusp != first_read_p) {
- return block_diff_metct(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
- } else {
+ if (plusp) {
return block_diff_metga(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
+ } else {
+ return block_diff_metct(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
}
} else {
- if (plusp == first_read_p) {
+ if (plusp) {
return block_diff_metct(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
} else {
return block_diff_metga(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
@@ -17611,7 +17612,7 @@ block_diff_a2iag_32 (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
UINT4 diff;
if (sarrayp == true) {
- /* Ignore genome-G to query-A mismatches */
+ /* Ignore genome-G to query-A mismatches. Convert everything to 3-nucleotide space. */
diff = 0U;
} else {
/* Mark genome-G to query-A mismatches */
@@ -17693,7 +17694,7 @@ block_diff_a2iag (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
_ref_low = _mm_load_si128((__m128i *) &(ref_ptr[4]));
if (sarrayp == true) {
- /* Ignore genome-G to query-A mismatches */
+ /* Ignore genome-G to query-A mismatches. Convert everything to 3-nucleotide space. */
_diff = _mm_setzero_si128();
} else {
/* Mark genome-G to query-A mismatches */
@@ -17724,7 +17725,7 @@ block_diff_a2iag (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
UINT4 diff;
if (sarrayp == true) {
- /* Ignore genome-G to query-A mismatches */
+ /* Ignore genome-G to query-A mismatches. Convert everything to 3-nucleotide space. */
diff = 0U;
} else {
/* Mark genome-G to query-A mismatches */
@@ -17951,15 +17952,15 @@ block_diff_a2itc (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
static UINT4
block_diff_atoi_32 (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
- bool plusp, int genestrand, bool first_read_p, bool query_unk_mismatch_local_p) {
+ bool plusp, int genestrand, bool query_unk_mismatch_local_p) {
if (genestrand == +2) {
- if (plusp != first_read_p) {
- return block_diff_a2iag_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
- } else {
+ if (plusp) {
return block_diff_a2itc_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
+ } else {
+ return block_diff_a2iag_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
}
} else {
- if (plusp == first_read_p) {
+ if (plusp) {
return block_diff_a2iag_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
} else {
return block_diff_a2itc_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
@@ -17970,15 +17971,15 @@ block_diff_atoi_32 (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
static Genomediff_T
block_diff_atoi (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
- bool plusp, int genestrand, bool first_read_p, bool query_unk_mismatch_local_p) {
+ bool plusp, int genestrand, bool query_unk_mismatch_local_p) {
if (genestrand == +2) {
- if (plusp != first_read_p) {
- return block_diff_a2iag(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
- } else {
+ if (plusp) {
return block_diff_a2itc(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
+ } else {
+ return block_diff_a2iag(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
}
} else {
- if (plusp == first_read_p) {
+ if (plusp) {
return block_diff_a2iag(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
} else {
return block_diff_a2itc(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
@@ -17988,15 +17989,15 @@ block_diff_atoi (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
static UINT4
block_diff_atoi_sarray_32 (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
- bool plusp, int genestrand, bool first_read_p, bool query_unk_mismatch_local_p) {
+ bool plusp, int genestrand, bool query_unk_mismatch_local_p) {
if (genestrand == +2) {
- if (plusp != first_read_p) {
- return block_diff_a2iag_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true);
- } else {
+ if (plusp) {
return block_diff_a2itc_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true);
+ } else {
+ return block_diff_a2iag_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true);
}
} else {
- if (plusp == first_read_p) {
+ if (plusp) {
return block_diff_a2iag_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true);
} else {
return block_diff_a2itc_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true);
@@ -18006,15 +18007,15 @@ block_diff_atoi_sarray_32 (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
static Genomediff_T
block_diff_atoi_sarray (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
- bool plusp, int genestrand, bool first_read_p, bool query_unk_mismatch_local_p) {
+ bool plusp, int genestrand, bool query_unk_mismatch_local_p) {
if (genestrand == +2) {
- if (plusp != first_read_p) {
- return block_diff_a2iag(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true);
- } else {
+ if (plusp) {
return block_diff_a2itc(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true);
+ } else {
+ return block_diff_a2iag(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true);
}
} else {
- if (plusp == first_read_p) {
+ if (plusp) {
return block_diff_a2iag(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true);
} else {
return block_diff_a2itc(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true);
@@ -18025,15 +18026,15 @@ block_diff_atoi_sarray (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
/* Ignores snp_ptr */
static UINT4
block_diff_atoi_snp_32 (Genomecomp_T *query_shifted, Genomecomp_T *snp_ptr, Genomecomp_T *ref_ptr,
- bool plusp, int genestrand, bool first_read_p, bool query_unk_mismatch_local_p) {
+ bool plusp, int genestrand, bool query_unk_mismatch_local_p) {
if (genestrand == +2) {
- if (plusp != first_read_p) {
- return block_diff_a2iag_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
- } else {
+ if (plusp) {
return block_diff_a2itc_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
+ } else {
+ return block_diff_a2iag_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
}
} else {
- if (plusp == first_read_p) {
+ if (plusp) {
return block_diff_a2iag_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
} else {
return block_diff_a2itc_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
@@ -18044,15 +18045,15 @@ block_diff_atoi_snp_32 (Genomecomp_T *query_shifted, Genomecomp_T *snp_ptr, Geno
/* Ignores snp_ptr */
static Genomediff_T
block_diff_atoi_snp (Genomecomp_T *query_shifted, Genomecomp_T *snp_ptr, Genomecomp_T *ref_ptr,
- bool plusp, int genestrand, bool first_read_p, bool query_unk_mismatch_local_p) {
+ bool plusp, int genestrand, bool query_unk_mismatch_local_p) {
if (genestrand == +2) {
- if (plusp != first_read_p) {
- return block_diff_a2iag(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
- } else {
+ if (plusp) {
return block_diff_a2itc(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
+ } else {
+ return block_diff_a2iag(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
}
} else {
- if (plusp == first_read_p) {
+ if (plusp) {
return block_diff_a2iag(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
} else {
return block_diff_a2itc(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
@@ -18062,11 +18063,11 @@ block_diff_atoi_snp (Genomecomp_T *query_shifted, Genomecomp_T *snp_ptr, Genomec
-/* query_shifted, (snp_ptr,) ref_ptr, plusp, genestrand, first_read_p, query_unk_mismatch_local_p */
-typedef Genomediff_T (*Diffproc_T) (Genomecomp_T *, Genomecomp_T *, bool, int, bool, bool);
-typedef Genomediff_T (*Diffproc_snp_T) (Genomecomp_T *, Genomecomp_T *, Genomecomp_T *, bool, int, bool, bool);
-typedef UINT4 (*Diffproc_32_T) (Genomecomp_T *, Genomecomp_T *, bool, int, bool, bool);
-typedef UINT4 (*Diffproc_snp_32_T) (Genomecomp_T *, Genomecomp_T *, Genomecomp_T *, bool, int, bool, bool);
+/* query_shifted, (snp_ptr,) ref_ptr, plusp, genestrand, query_unk_mismatch_local_p */
+typedef Genomediff_T (*Diffproc_T) (Genomecomp_T *, Genomecomp_T *, bool, int, bool);
+typedef Genomediff_T (*Diffproc_snp_T) (Genomecomp_T *, Genomecomp_T *, Genomecomp_T *, bool, int, bool);
+typedef UINT4 (*Diffproc_32_T) (Genomecomp_T *, Genomecomp_T *, bool, int, bool);
+typedef UINT4 (*Diffproc_snp_32_T) (Genomecomp_T *, Genomecomp_T *, Genomecomp_T *, bool, int, bool);
static Diffproc_T block_diff;
static Diffproc_snp_T block_diff_snp;
@@ -18685,7 +18686,7 @@ Genome_consecutive_matches_rightward (Compress_T query_compress, Univcoord_T lef
+ startcolumni
#endif
,&(ref_blocks[startblocki_32]),
- plusp,genestrand,first_read_p,/*query_unk_mismatch_local_p*/true);
+ plusp,genestrand,/*query_unk_mismatch_local_p*/true);
diff_32 = clear_start_32(diff_32,startdiscard);
diff_32 = clear_end_32(diff_32,enddiscard);
@@ -18731,7 +18732,7 @@ Genome_consecutive_matches_rightward (Compress_T query_compress, Univcoord_T lef
#ifdef HAVE_SSE2
if (endblocki == startblocki) {
diff = (block_diff_sarray)(query_shifted,&(ref_blocks[startblocki]),
- plusp,genestrand,first_read_p,/*query_unk_mismatch_local_p*/true);
+ plusp,genestrand,/*query_unk_mismatch_local_p*/true);
diff = clear_start(diff,startdiscard);
diff = clear_end(diff,enddiscard);
@@ -18751,7 +18752,7 @@ Genome_consecutive_matches_rightward (Compress_T query_compress, Univcoord_T lef
/* Startblock */
diff = (block_diff_sarray)(query_shifted,&(ref_blocks[startblocki]),
- plusp,genestrand,first_read_p,/*query_unk_mismatch_local_p*/true);
+ plusp,genestrand,/*query_unk_mismatch_local_p*/true);
diff = clear_start(diff,startdiscard);
if (nonzero_p(diff)) {
@@ -18771,7 +18772,7 @@ Genome_consecutive_matches_rightward (Compress_T query_compress, Univcoord_T lef
end = &(ref_blocks[endblocki]);
offset += STEP_SIZE; /* 128 or 32 */
while (ptr < end) {
- diff = (block_diff_sarray)(query_shifted,ptr,plusp,genestrand,first_read_p,/*query_unk_mismatch_local_p*/true);
+ diff = (block_diff_sarray)(query_shifted,ptr,plusp,genestrand,/*query_unk_mismatch_local_p*/true);
if (nonzero_p(diff) /* != 0*/) {
mismatch_position = offset + (relpos = count_trailing_zeroes(diff));
@@ -18790,7 +18791,7 @@ Genome_consecutive_matches_rightward (Compress_T query_compress, Univcoord_T lef
}
/* Endblock */
- diff = (block_diff_sarray)(query_shifted,ptr,plusp,genestrand,first_read_p,/*query_unk_mismatch_local_p*/true);
+ diff = (block_diff_sarray)(query_shifted,ptr,plusp,genestrand,/*query_unk_mismatch_local_p*/true);
diff = clear_end(diff,enddiscard);
if (nonzero_p(diff)) {
@@ -18865,7 +18866,7 @@ Genome_consecutive_matches_leftward (Compress_T query_compress, Univcoord_T left
+ endcolumni
#endif
,&(ref_blocks[endblocki_32]),
- plusp,genestrand,first_read_p,/*query_unk_mismatch_local_p*/true);
+ plusp,genestrand,/*query_unk_mismatch_local_p*/true);
diff_32 = clear_start_32(diff_32,startdiscard);
diff_32 = clear_end_32(diff_32,enddiscard);
@@ -18911,7 +18912,7 @@ Genome_consecutive_matches_leftward (Compress_T query_compress, Univcoord_T left
#ifdef HAVE_SSE2
if (startblocki == endblocki) {
diff = (block_diff_sarray)(query_shifted,&(ref_blocks[endblocki]),
- plusp,genestrand,first_read_p,/*query_unk_mismatch_local_p*/true);
+ plusp,genestrand,/*query_unk_mismatch_local_p*/true);
diff = clear_start(diff,startdiscard);
diff = clear_end(diff,enddiscard);
@@ -18931,7 +18932,7 @@ Genome_consecutive_matches_leftward (Compress_T query_compress, Univcoord_T left
/* Endblock */
diff = (block_diff_sarray)(query_shifted,&(ref_blocks[endblocki]),
- plusp,genestrand,first_read_p,/*query_unk_mismatch_local_p*/true);
+ plusp,genestrand,/*query_unk_mismatch_local_p*/true);
diff = clear_end(diff,enddiscard);
if (nonzero_p(diff)) {
@@ -18951,7 +18952,7 @@ Genome_consecutive_matches_leftward (Compress_T query_compress, Univcoord_T left
start = &(ref_blocks[startblocki]);
offset -= STEP_SIZE; /* 128 or 32 */
while (ptr > start) {
- diff = (block_diff_sarray)(query_shifted,ptr,plusp,genestrand,first_read_p,/*query_unk_mismatch_local_p*/true);
+ diff = (block_diff_sarray)(query_shifted,ptr,plusp,genestrand,/*query_unk_mismatch_local_p*/true);
if (nonzero_p(diff)) {
mismatch_position = offset - (relpos = count_leading_zeroes(diff));
@@ -18970,7 +18971,7 @@ Genome_consecutive_matches_leftward (Compress_T query_compress, Univcoord_T left
}
/* Startblock */
- diff = (block_diff_sarray)(query_shifted,ptr,plusp,genestrand,first_read_p,/*query_unk_mismatch_local_p*/true);
+ diff = (block_diff_sarray)(query_shifted,ptr,plusp,genestrand,/*query_unk_mismatch_local_p*/true);
diff = clear_start(diff,startdiscard);
if (nonzero_p(diff)) {
@@ -19372,7 +19373,7 @@ count_mismatches_limit (Compress_T query_compress, Univcoord_T left,
+ startcolumni
#endif
,&(ref_blocks[startblocki_32]),
- plusp,genestrand,first_read_p,query_unk_mismatch_p);
+ plusp,genestrand,query_unk_mismatch_p);
diff_32 = clear_start_32(diff_32,startdiscard);
diff_32 = clear_end_32(diff_32,enddiscard);
@@ -19406,7 +19407,7 @@ count_mismatches_limit (Compress_T query_compress, Univcoord_T left,
if (endblocki == startblocki) {
debug(printf("** Single block **\n"));
diff = (block_diff)(query_shifted,&(ref_blocks[startblocki]),
- plusp,genestrand,first_read_p,query_unk_mismatch_p);
+ plusp,genestrand,query_unk_mismatch_p);
diff = clear_start(diff,startdiscard);
diff = clear_end(diff,enddiscard);
@@ -19423,7 +19424,7 @@ count_mismatches_limit (Compress_T query_compress, Univcoord_T left,
/* 1/2: Startblock */
diff = (block_diff)(query_shifted,&(ref_blocks[startblocki]),
- plusp,genestrand,first_read_p,query_unk_mismatch_p);
+ plusp,genestrand,query_unk_mismatch_p);
diff = clear_start(diff,startdiscard);
debug(print_diff_popcount(diff));
@@ -19435,7 +19436,7 @@ count_mismatches_limit (Compress_T query_compress, Univcoord_T left,
/* 2/2: Endblock */
diff = (block_diff)(/*endblock*/query_shifted+COMPRESS_BLOCKSIZE,
&(ref_blocks[endblocki]),
- plusp,genestrand,first_read_p,query_unk_mismatch_p);
+ plusp,genestrand,query_unk_mismatch_p);
diff = clear_end(diff,enddiscard);
debug(print_diff_popcount(diff));
@@ -19449,7 +19450,7 @@ count_mismatches_limit (Compress_T query_compress, Univcoord_T left,
/* 1/2: Endblock */
diff = (block_diff)(/*endblock*/query_shifted+COMPRESS_BLOCKSIZE,
&(ref_blocks[endblocki]),
- plusp,genestrand,first_read_p,query_unk_mismatch_p);
+ plusp,genestrand,query_unk_mismatch_p);
diff = clear_end(diff,enddiscard);
debug(print_diff_popcount(diff));
@@ -19460,7 +19461,7 @@ count_mismatches_limit (Compress_T query_compress, Univcoord_T left,
/* 2/2: Startblock */
diff = (block_diff)(query_shifted,&(ref_blocks[startblocki]),
- plusp,genestrand,first_read_p,query_unk_mismatch_p);
+ plusp,genestrand,query_unk_mismatch_p);
diff = clear_start(diff,startdiscard);
debug(print_diff_popcount(diff));
@@ -19488,7 +19489,7 @@ count_mismatches_limit (Compress_T query_compress, Univcoord_T left,
nmismatches = 0;
while (ptr < endblock) {
- diff = (block_diff)(query_shifted,ptr,plusp,genestrand,first_read_p,query_unk_mismatch_p);
+ diff = (block_diff)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_p);
debug(print_diff_popcount(diff));
if ((nmismatches += popcount_ones(diff)) > max_mismatches) {
@@ -19509,7 +19510,7 @@ count_mismatches_limit (Compress_T query_compress, Univcoord_T left,
debug(printf("** Final block, end block first **\n"));
/* n/n: Go first to end block */
- diff = (block_diff)(query_shifted,ptr,plusp,genestrand,first_read_p,query_unk_mismatch_p);
+ diff = (block_diff)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_p);
diff = clear_end(diff,enddiscard);
debug(print_diff_popcount(diff));
@@ -19520,7 +19521,7 @@ count_mismatches_limit (Compress_T query_compress, Univcoord_T left,
/* 1/n: Go second to start block */
diff = (block_diff)(query_shifted_save_start,&(ref_blocks[startblocki]),
- plusp,genestrand,first_read_p,query_unk_mismatch_p);
+ plusp,genestrand,query_unk_mismatch_p);
diff = clear_start(diff,startdiscard);
debug(print_diff_popcount(diff));
@@ -19532,7 +19533,7 @@ count_mismatches_limit (Compress_T query_compress, Univcoord_T left,
/* 1/n: Go first to start block */
diff = (block_diff)(query_shifted_save_start,&(ref_blocks[startblocki]),
- plusp,genestrand,first_read_p,query_unk_mismatch_p);
+ plusp,genestrand,query_unk_mismatch_p);
diff = clear_start(diff,startdiscard);
debug(print_diff_popcount(diff));
@@ -19543,7 +19544,7 @@ count_mismatches_limit (Compress_T query_compress, Univcoord_T left,
/* n/n: Go second to end block */
diff = (block_diff)(query_shifted,&(ref_blocks[endblocki]),
- plusp,genestrand,first_read_p,query_unk_mismatch_p);
+ plusp,genestrand,query_unk_mismatch_p);
diff = clear_end(diff,enddiscard);
debug(print_diff_popcount(diff));
@@ -19612,7 +19613,7 @@ count_mismatches_limit_snps (Compress_T query_compress, Univcoord_T left, int po
+ startcolumni
#endif
,&(snp_blocks[startblocki_32]),&(ref_blocks[startblocki_32]),
- plusp,genestrand,first_read_p,query_unk_mismatch_p);
+ plusp,genestrand,query_unk_mismatch_p);
diff_32 = clear_start_32(diff_32,startdiscard);
diff_32 = clear_end_32(diff_32,enddiscard);
@@ -19646,7 +19647,7 @@ count_mismatches_limit_snps (Compress_T query_compress, Univcoord_T left, int po
if (endblocki == startblocki) {
debug(printf("** Single block **\n"));
diff = (block_diff_snp)(query_shifted,&(snp_blocks[startblocki]),&(ref_blocks[startblocki]),
- plusp,genestrand,first_read_p,query_unk_mismatch_p);
+ plusp,genestrand,query_unk_mismatch_p);
diff = clear_start(diff,startdiscard);
diff = clear_end(diff,enddiscard);
@@ -19663,7 +19664,7 @@ count_mismatches_limit_snps (Compress_T query_compress, Univcoord_T left, int po
/* 1/2: Startblock */
diff = (block_diff_snp)(query_shifted,&(snp_blocks[startblocki]),&(ref_blocks[startblocki]),
- plusp,genestrand,first_read_p,query_unk_mismatch_p);
+ plusp,genestrand,query_unk_mismatch_p);
diff = clear_start(diff,startdiscard);
debug(print_diff_popcount(diff));
@@ -19676,7 +19677,7 @@ count_mismatches_limit_snps (Compress_T query_compress, Univcoord_T left, int po
/* 2/2: Endblock */
diff = (block_diff_snp)(/*endblock*/query_shifted+COMPRESS_BLOCKSIZE,
&(snp_blocks[endblocki]),&(ref_blocks[endblocki]),
- plusp,genestrand,first_read_p,query_unk_mismatch_p);
+ plusp,genestrand,query_unk_mismatch_p);
diff = clear_end(diff,enddiscard);
debug(print_diff_popcount(diff));
@@ -19690,7 +19691,7 @@ count_mismatches_limit_snps (Compress_T query_compress, Univcoord_T left, int po
/* 1/2: Endblock */
diff = (block_diff_snp)(/*endblock*/query_shifted+COMPRESS_BLOCKSIZE,
&(snp_blocks[endblocki]),&(ref_blocks[endblocki]),
- plusp,genestrand,first_read_p,query_unk_mismatch_p);
+ plusp,genestrand,query_unk_mismatch_p);
diff = clear_end(diff,enddiscard);
debug(print_diff_popcount(diff));
@@ -19702,7 +19703,7 @@ count_mismatches_limit_snps (Compress_T query_compress, Univcoord_T left, int po
/* 2/2: Startblock */
diff = (block_diff_snp)(query_shifted,&(snp_blocks[startblocki]),&(ref_blocks[startblocki]),
- plusp,genestrand,first_read_p,query_unk_mismatch_p);
+ plusp,genestrand,query_unk_mismatch_p);
diff = clear_start(diff,startdiscard);
debug(print_diff_popcount(diff));
@@ -19732,7 +19733,7 @@ count_mismatches_limit_snps (Compress_T query_compress, Univcoord_T left, int po
nmismatches = 0;
while (ref_ptr < endblock) {
- diff = (block_diff_snp)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,first_read_p,query_unk_mismatch_p);
+ diff = (block_diff_snp)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_p);
debug(print_diff_popcount(diff));
if ((nmismatches += popcount_ones(diff)) > max_mismatches) {
@@ -19753,7 +19754,7 @@ count_mismatches_limit_snps (Compress_T query_compress, Univcoord_T left, int po
debug(printf("** Final block, end block first **\n"));
/* n/n: Go first to end block */
- diff = (block_diff_snp)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,first_read_p,query_unk_mismatch_p);
+ diff = (block_diff_snp)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_p);
diff = clear_end(diff,enddiscard);
debug(print_diff_popcount(diff));
@@ -19764,7 +19765,7 @@ count_mismatches_limit_snps (Compress_T query_compress, Univcoord_T left, int po
/* 1/n: Go second to start block */
diff = (block_diff_snp)(query_shifted_save_start,&(snp_blocks[startblocki]),&(ref_blocks[startblocki]),
- plusp,genestrand,first_read_p,query_unk_mismatch_p);
+ plusp,genestrand,query_unk_mismatch_p);
diff = clear_start(diff,startdiscard);
debug(print_diff_popcount(diff));
@@ -19776,7 +19777,7 @@ count_mismatches_limit_snps (Compress_T query_compress, Univcoord_T left, int po
/* 1/n: Go first to start block */
diff = (block_diff_snp)(query_shifted_save_start,&(snp_blocks[startblocki]),&(ref_blocks[startblocki]),
- plusp,genestrand,first_read_p,query_unk_mismatch_p);
+ plusp,genestrand,query_unk_mismatch_p);
diff = clear_start(diff,startdiscard);
debug(print_diff_popcount(diff));
@@ -19787,7 +19788,7 @@ count_mismatches_limit_snps (Compress_T query_compress, Univcoord_T left, int po
/* n/n: Go second to end block */
diff = (block_diff_snp)(query_shifted,&(snp_blocks[endblocki]),&(ref_blocks[endblocki]),
- plusp,genestrand,first_read_p,query_unk_mismatch_p);
+ plusp,genestrand,query_unk_mismatch_p);
diff = clear_end(diff,enddiscard);
debug(print_diff_popcount(diff));
@@ -19877,7 +19878,7 @@ Genome_count_mismatches_substring_ref (Compress_T query_compress, Univcoord_T le
+ startcolumni
#endif
,&(ref_blocks[startblocki_32]),
- plusp,genestrand,first_read_p,query_unk_mismatch_p);
+ plusp,genestrand,query_unk_mismatch_p);
diff_32 = clear_start_32(diff_32,startdiscard);
diff_32 = clear_end_32(diff_32,enddiscard);
@@ -19911,7 +19912,7 @@ Genome_count_mismatches_substring_ref (Compress_T query_compress, Univcoord_T le
#ifdef HAVE_SSE2
if (endblocki == startblocki) {
diff = (block_diff)(query_shifted,&(ref_blocks[startblocki]),
- plusp,genestrand,first_read_p,query_unk_mismatch_p);
+ plusp,genestrand,query_unk_mismatch_p);
diff = clear_start(diff,startdiscard);
diff = clear_end(diff,enddiscard);
@@ -19924,7 +19925,7 @@ Genome_count_mismatches_substring_ref (Compress_T query_compress, Univcoord_T le
/* Startblock */
diff = (block_diff)(query_shifted,&(ref_blocks[startblocki]),
- plusp,genestrand,first_read_p,query_unk_mismatch_p);
+ plusp,genestrand,query_unk_mismatch_p);
diff = clear_start(diff,startdiscard);
debug(print_diff_popcount(diff));
@@ -19939,7 +19940,7 @@ Genome_count_mismatches_substring_ref (Compress_T query_compress, Univcoord_T le
#endif
end = &(ref_blocks[endblocki]);
while (ptr < end) {
- diff = (block_diff)(query_shifted,ptr,plusp,genestrand,first_read_p,query_unk_mismatch_p);
+ diff = (block_diff)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_p);
debug(print_diff_popcount(diff));
nmismatches += popcount_ones(diff);
@@ -19953,7 +19954,7 @@ Genome_count_mismatches_substring_ref (Compress_T query_compress, Univcoord_T le
}
/* Endblock */
- diff = (block_diff)(query_shifted,ptr,plusp,genestrand,first_read_p,query_unk_mismatch_p);
+ diff = (block_diff)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_p);
diff = clear_end(diff,enddiscard);
debug(print_diff_popcount(diff));
@@ -20018,7 +20019,7 @@ count_mismatches_substring_snps (Compress_T query_compress, Univcoord_T left, in
+ startcolumni
#endif
,&(snp_blocks[startblocki_32]),&(ref_blocks[startblocki_32]),
- plusp,genestrand,first_read_p,query_unk_mismatch_p);
+ plusp,genestrand,query_unk_mismatch_p);
diff_32 = clear_start_32(diff_32,startdiscard);
diff_32 = clear_end_32(diff_32,enddiscard);
@@ -20052,7 +20053,7 @@ count_mismatches_substring_snps (Compress_T query_compress, Univcoord_T left, in
#ifdef HAVE_SSE2
if (endblocki == startblocki) {
diff = (block_diff_snp)(query_shifted,&(snp_blocks[startblocki]),&(ref_blocks[startblocki]),
- plusp,genestrand,first_read_p,query_unk_mismatch_p);
+ plusp,genestrand,query_unk_mismatch_p);
diff = clear_start(diff,startdiscard);
diff = clear_end(diff,enddiscard);
@@ -20065,7 +20066,7 @@ count_mismatches_substring_snps (Compress_T query_compress, Univcoord_T left, in
/* Startblock */
diff = (block_diff_snp)(query_shifted,&(snp_blocks[startblocki]),&(ref_blocks[startblocki]),
- plusp,genestrand,first_read_p,query_unk_mismatch_p);
+ plusp,genestrand,query_unk_mismatch_p);
diff = clear_start(diff,startdiscard);
debug(print_diff_popcount(diff));
@@ -20082,7 +20083,7 @@ count_mismatches_substring_snps (Compress_T query_compress, Univcoord_T left, in
#endif
end = &(ref_blocks[endblocki]);
while (ref_ptr < end) {
- diff = (block_diff_snp)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,first_read_p,query_unk_mismatch_p);
+ diff = (block_diff_snp)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_p);
debug(print_diff_popcount(diff));
nmismatches += popcount_ones(diff);
@@ -20096,7 +20097,7 @@ count_mismatches_substring_snps (Compress_T query_compress, Univcoord_T left, in
}
/* Endblock */
- diff = (block_diff_snp)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,first_read_p,query_unk_mismatch_p);
+ diff = (block_diff_snp)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_p);
diff = clear_end(diff,enddiscard);
debug(print_diff_popcount(diff));
@@ -20249,7 +20250,7 @@ Genome_count_mismatches_fragment_right (Compress_T query_compress, int pos5, int
static int
mismatches_left (int *mismatch_positions, int max_mismatches, Compress_T query_compress,
- Univcoord_T left, int pos5, int pos3, bool plusp, int genestrand, bool first_read_p,
+ Univcoord_T left, int pos5, int pos3, bool plusp, int genestrand,
bool query_unk_mismatch_local_p) {
#ifdef DEBUG14
int answer;
@@ -20301,7 +20302,7 @@ mismatches_left (int *mismatch_positions, int max_mismatches, Compress_T query_c
+ startcolumni
#endif
,&(ref_blocks[startblocki_32]),
- plusp,genestrand,first_read_p,query_unk_mismatch_local_p);
+ plusp,genestrand,query_unk_mismatch_local_p);
diff_32 = clear_start_32(diff_32,startdiscard);
diff_32 = clear_end_32(diff_32,enddiscard);
@@ -20342,7 +20343,7 @@ mismatches_left (int *mismatch_positions, int max_mismatches, Compress_T query_c
#ifdef HAVE_SSE2
if (endblocki == startblocki) {
diff = (block_diff)(query_shifted,&(ref_blocks[startblocki]),
- plusp,genestrand,first_read_p,query_unk_mismatch_local_p);
+ plusp,genestrand,query_unk_mismatch_local_p);
diff = clear_start(diff,startdiscard);
diff = clear_end(diff,enddiscard);
@@ -20359,7 +20360,7 @@ mismatches_left (int *mismatch_positions, int max_mismatches, Compress_T query_c
/* Startblock */
diff = (block_diff)(query_shifted,&(ref_blocks[startblocki]),
- plusp,genestrand,first_read_p,query_unk_mismatch_local_p);
+ plusp,genestrand,query_unk_mismatch_local_p);
diff = clear_start(diff,startdiscard);
while (nonzero_p(diff) && nmismatches <= max_mismatches) {
@@ -20382,7 +20383,7 @@ mismatches_left (int *mismatch_positions, int max_mismatches, Compress_T query_c
end = &(ref_blocks[endblocki]);
offset += STEP_SIZE; /* 128 or 32 */
while (ptr < end) {
- diff = (block_diff)(query_shifted,ptr,plusp,genestrand,first_read_p,query_unk_mismatch_local_p);
+ diff = (block_diff)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_local_p);
while (nonzero_p(diff) && nmismatches <= max_mismatches) {
mismatch_positions[nmismatches++] = offset + (relpos = count_trailing_zeroes(diff));
@@ -20404,7 +20405,7 @@ mismatches_left (int *mismatch_positions, int max_mismatches, Compress_T query_c
}
/* Endblock */
- diff = (block_diff)(query_shifted,ptr,plusp,genestrand,first_read_p,query_unk_mismatch_local_p);
+ diff = (block_diff)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_local_p);
diff = clear_end(diff,enddiscard);
while (nonzero_p(diff) && nmismatches <= max_mismatches) {
@@ -20424,7 +20425,7 @@ mismatches_left (int *mismatch_positions, int max_mismatches, Compress_T query_c
/* Returns mismatch_positions[0..max_mismatches] */
static int
mismatches_left_snps (int *mismatch_positions, int max_mismatches, Compress_T query_compress,
- Univcoord_T left, int pos5, int pos3, bool plusp, int genestrand, bool first_read_p,
+ Univcoord_T left, int pos5, int pos3, bool plusp, int genestrand,
bool query_unk_mismatch_local_p) {
#ifdef DEBUG14
int answer;
@@ -20476,7 +20477,7 @@ mismatches_left_snps (int *mismatch_positions, int max_mismatches, Compress_T qu
+ startcolumni
#endif
,&(snp_blocks[startblocki_32]),&(ref_blocks[startblocki_32]),
- plusp,genestrand,first_read_p,query_unk_mismatch_local_p);
+ plusp,genestrand,query_unk_mismatch_local_p);
diff_32 = clear_start_32(diff_32,startdiscard);
diff_32 = clear_end_32(diff_32,enddiscard);
@@ -20516,7 +20517,7 @@ mismatches_left_snps (int *mismatch_positions, int max_mismatches, Compress_T qu
#ifdef HAVE_SSE2
if (endblocki == startblocki) {
diff = (block_diff_snp)(query_shifted,&(snp_blocks[startblocki]),&(ref_blocks[startblocki]),
- plusp,genestrand,first_read_p,query_unk_mismatch_local_p);
+ plusp,genestrand,query_unk_mismatch_local_p);
diff = clear_start(diff,startdiscard);
diff = clear_end(diff,enddiscard);
@@ -20533,7 +20534,7 @@ mismatches_left_snps (int *mismatch_positions, int max_mismatches, Compress_T qu
/* Startblock */
diff = (block_diff_snp)(query_shifted,&(snp_blocks[startblocki]),&(ref_blocks[startblocki]),
- plusp,genestrand,first_read_p,query_unk_mismatch_local_p);
+ plusp,genestrand,query_unk_mismatch_local_p);
diff = clear_start(diff,startdiscard);
while (nonzero_p(diff) && nmismatches_both <= max_mismatches) {
@@ -20558,7 +20559,7 @@ mismatches_left_snps (int *mismatch_positions, int max_mismatches, Compress_T qu
end = &(ref_blocks[endblocki]);
offset += STEP_SIZE; /* 128 or 32 */
while (ref_ptr < end) {
- diff = (block_diff_snp)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,first_read_p,query_unk_mismatch_local_p);
+ diff = (block_diff_snp)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_local_p);
while (nonzero_p(diff) && nmismatches_both <= max_mismatches) {
mismatch_positions[nmismatches_both++] = offset + (relpos = count_trailing_zeroes(diff));
@@ -20580,7 +20581,7 @@ mismatches_left_snps (int *mismatch_positions, int max_mismatches, Compress_T qu
}
/* Endblock */
- diff = (block_diff_snp)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,first_read_p,query_unk_mismatch_local_p);
+ diff = (block_diff_snp)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_local_p);
diff = clear_end(diff,enddiscard);
while (nonzero_p(diff) && nmismatches_both <= max_mismatches) {
@@ -20621,11 +20622,11 @@ Genome_mismatches_left (int *mismatch_positions, int max_mismatches, Compress_T
if (snp_blocks == NULL) {
nmismatches = mismatches_left(&(*mismatch_positions),max_mismatches,query_compress,
- left,pos5,pos3,plusp,genestrand,first_read_p,query_unk_mismatch_p);
+ left,pos5,pos3,plusp,genestrand,query_unk_mismatch_p);
mismatch_positions[nmismatches] = pos3;
} else {
nmismatches = mismatches_left_snps(&(*mismatch_positions),max_mismatches,query_compress,
- left,pos5,pos3,plusp,genestrand,first_read_p,query_unk_mismatch_p);
+ left,pos5,pos3,plusp,genestrand,query_unk_mismatch_p);
mismatch_positions[nmismatches] = pos3;
}
debug(
@@ -20664,11 +20665,11 @@ Genome_mismatches_left_trim (int *mismatch_positions, int max_mismatches, Compre
if (snp_blocks == NULL) {
nmismatches = mismatches_left(&(*mismatch_positions),max_mismatches,query_compress,
- left,pos5,pos3,plusp,genestrand,first_read_p,/*query_unk_mismatch_p*/false);
+ left,pos5,pos3,plusp,genestrand,/*query_unk_mismatch_p*/false);
mismatch_positions[nmismatches] = pos3;
} else {
nmismatches = mismatches_left_snps(&(*mismatch_positions),max_mismatches,query_compress,
- left,pos5,pos3,plusp,genestrand,first_read_p,/*query_unk_mismatch_p*/false);
+ left,pos5,pos3,plusp,genestrand,/*query_unk_mismatch_p*/false);
mismatch_positions[nmismatches] = pos3;
}
debug(
@@ -20685,7 +20686,7 @@ Genome_mismatches_left_trim (int *mismatch_positions, int max_mismatches, Compre
static int
mismatches_right (int *mismatch_positions, int max_mismatches, Compress_T query_compress,
- Univcoord_T left, int pos5, int pos3, bool plusp, int genestrand, bool first_read_p,
+ Univcoord_T left, int pos5, int pos3, bool plusp, int genestrand,
bool query_unk_mismatch_local_p) {
#ifdef DEBUG14
int answer;
@@ -20738,7 +20739,7 @@ mismatches_right (int *mismatch_positions, int max_mismatches, Compress_T query_
+ endcolumni
#endif
,&(ref_blocks[endblocki_32]),
- plusp,genestrand,first_read_p,query_unk_mismatch_local_p);
+ plusp,genestrand,query_unk_mismatch_local_p);
diff_32 = clear_start_32(diff_32,startdiscard);
diff_32 = clear_end_32(diff_32,enddiscard);
@@ -20779,7 +20780,7 @@ mismatches_right (int *mismatch_positions, int max_mismatches, Compress_T query_
#ifdef HAVE_SSE2
if (startblocki == endblocki) {
diff = (block_diff)(query_shifted,&(ref_blocks[endblocki]),
- plusp,genestrand,first_read_p,query_unk_mismatch_local_p);
+ plusp,genestrand,query_unk_mismatch_local_p);
diff = clear_start(diff,startdiscard);
diff = clear_end(diff,enddiscard);
@@ -20796,7 +20797,7 @@ mismatches_right (int *mismatch_positions, int max_mismatches, Compress_T query_
/* Endblock */
diff = (block_diff)(query_shifted,&(ref_blocks[endblocki]),
- plusp,genestrand,first_read_p,query_unk_mismatch_local_p);
+ plusp,genestrand,query_unk_mismatch_local_p);
diff = clear_end(diff,enddiscard);
while (nonzero_p(diff) && nmismatches <= max_mismatches) {
@@ -20819,7 +20820,7 @@ mismatches_right (int *mismatch_positions, int max_mismatches, Compress_T query_
start = &(ref_blocks[startblocki]);
offset -= STEP_SIZE; /* 128 or 32 */
while (ptr > start) {
- diff = (block_diff)(query_shifted,ptr,plusp,genestrand,first_read_p,query_unk_mismatch_local_p);
+ diff = (block_diff)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_local_p);
while (nonzero_p(diff) && nmismatches <= max_mismatches) {
mismatch_positions[nmismatches++] = offset - (relpos = count_leading_zeroes(diff));
@@ -20841,7 +20842,7 @@ mismatches_right (int *mismatch_positions, int max_mismatches, Compress_T query_
}
/* Startblock */
- diff = (block_diff)(query_shifted,ptr,plusp,genestrand,first_read_p,query_unk_mismatch_local_p);
+ diff = (block_diff)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_local_p);
diff = clear_start(diff,startdiscard);
while (nonzero_p(diff) && nmismatches <= max_mismatches) {
@@ -20861,7 +20862,7 @@ mismatches_right (int *mismatch_positions, int max_mismatches, Compress_T query_
static int
mismatches_right_snps (int *mismatch_positions, int max_mismatches, Compress_T query_compress,
- Univcoord_T left, int pos5, int pos3, bool plusp, int genestrand, bool first_read_p,
+ Univcoord_T left, int pos5, int pos3, bool plusp, int genestrand,
bool query_unk_mismatch_local_p) {
#ifdef DEBUG14
int answer;
@@ -20915,7 +20916,7 @@ mismatches_right_snps (int *mismatch_positions, int max_mismatches, Compress_T q
+ endcolumni
#endif
,&(snp_blocks[endblocki_32]),&(ref_blocks[endblocki_32]),
- plusp,genestrand,first_read_p,query_unk_mismatch_local_p);
+ plusp,genestrand,query_unk_mismatch_local_p);
diff_32 = clear_start_32(diff_32,startdiscard);
diff_32 = clear_end_32(diff_32,enddiscard);
@@ -20955,7 +20956,7 @@ mismatches_right_snps (int *mismatch_positions, int max_mismatches, Compress_T q
#ifdef HAVE_SSE2
if (startblocki == endblocki) {
diff = (block_diff_snp)(query_shifted,&(snp_blocks[endblocki]),&(ref_blocks[endblocki]),
- plusp,genestrand,first_read_p,query_unk_mismatch_local_p);
+ plusp,genestrand,query_unk_mismatch_local_p);
diff = clear_start(diff,startdiscard);
diff = clear_end(diff,enddiscard);
@@ -20972,7 +20973,7 @@ mismatches_right_snps (int *mismatch_positions, int max_mismatches, Compress_T q
/* Endblock */
diff = (block_diff_snp)(query_shifted,&(snp_blocks[endblocki]),&(ref_blocks[endblocki]),
- plusp,genestrand,first_read_p,query_unk_mismatch_local_p);
+ plusp,genestrand,query_unk_mismatch_local_p);
diff = clear_end(diff,enddiscard);
while (nonzero_p(diff) && nmismatches_both <= max_mismatches) {
@@ -20997,7 +20998,7 @@ mismatches_right_snps (int *mismatch_positions, int max_mismatches, Compress_T q
start = &(ref_blocks[startblocki]);
offset -= STEP_SIZE; /* 128 or 32 */
while (ref_ptr > start) {
- diff = (block_diff_snp)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,first_read_p,query_unk_mismatch_local_p);
+ diff = (block_diff_snp)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_local_p);
while (nonzero_p(diff) && nmismatches_both <= max_mismatches) {
mismatch_positions[nmismatches_both++] = offset - (relpos = count_leading_zeroes(diff));
@@ -21019,7 +21020,7 @@ mismatches_right_snps (int *mismatch_positions, int max_mismatches, Compress_T q
}
/* Startblock */
- diff = (block_diff_snp)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,first_read_p,query_unk_mismatch_local_p);
+ diff = (block_diff_snp)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_local_p);
diff = clear_start(diff,startdiscard);
while (nonzero_p(diff) && nmismatches_both <= max_mismatches) {
@@ -21057,10 +21058,10 @@ Genome_mismatches_right (int *mismatch_positions, int max_mismatches, Compress_T
if (snp_blocks == NULL) {
nmismatches = mismatches_right(&(*mismatch_positions),max_mismatches,query_compress,
- left,pos5,pos3,plusp,genestrand,first_read_p,query_unk_mismatch_p);
+ left,pos5,pos3,plusp,genestrand,query_unk_mismatch_p);
} else {
nmismatches = mismatches_right_snps(&(*mismatch_positions),max_mismatches,query_compress,
- left,pos5,pos3,plusp,genestrand,first_read_p,query_unk_mismatch_p);
+ left,pos5,pos3,plusp,genestrand,query_unk_mismatch_p);
}
mismatch_positions[nmismatches] = -1;
debug(
@@ -21095,10 +21096,10 @@ Genome_mismatches_right_trim (int *mismatch_positions, int max_mismatches, Compr
if (snp_blocks == NULL) {
nmismatches = mismatches_right(&(*mismatch_positions),max_mismatches,query_compress,
- left,pos5,pos3,plusp,genestrand,first_read_p,/*query_unk_mismatch_p*/false);
+ left,pos5,pos3,plusp,genestrand,/*query_unk_mismatch_p*/false);
} else {
nmismatches = mismatches_right_snps(&(*mismatch_positions),max_mismatches,query_compress,
- left,pos5,pos3,plusp,genestrand,first_read_p,/*query_unk_mismatch_p*/false);
+ left,pos5,pos3,plusp,genestrand,/*query_unk_mismatch_p*/false);
}
mismatch_positions[nmismatches] = -1;
debug(
@@ -21177,7 +21178,7 @@ Genome_mark_mismatches_ref (char *genomic, int querylength, Compress_T query_com
+ startcolumni
#endif
,&(ref_blocks[startblocki_32]),
- plusp,genestrand,first_read_p,query_unk_mismatch_p);
+ plusp,genestrand,query_unk_mismatch_p);
diff_32 = clear_start_32(diff_32,startdiscard);
diff_32 = clear_end_32(diff_32,enddiscard);
@@ -21227,7 +21228,7 @@ Genome_mark_mismatches_ref (char *genomic, int querylength, Compress_T query_com
#ifdef HAVE_SSE2
if (endblocki == startblocki) {
diff = (block_diff)(query_shifted,&(ref_blocks[startblocki]),
- plusp,genestrand,first_read_p,query_unk_mismatch_p);
+ plusp,genestrand,query_unk_mismatch_p);
diff = clear_start(diff,startdiscard);
diff = clear_end(diff,enddiscard);
@@ -21250,7 +21251,7 @@ Genome_mark_mismatches_ref (char *genomic, int querylength, Compress_T query_com
/* Startblock */
diff = (block_diff)(query_shifted,&(ref_blocks[startblocki]),
- plusp,genestrand,first_read_p,query_unk_mismatch_p);
+ plusp,genestrand,query_unk_mismatch_p);
diff = clear_start(diff,startdiscard);
while (nonzero_p(diff)) {
@@ -21274,7 +21275,7 @@ Genome_mark_mismatches_ref (char *genomic, int querylength, Compress_T query_com
end = &(ref_blocks[endblocki]);
offset += STEP_SIZE; /* 128 or 32 */
while (ptr < end) {
- diff = (block_diff)(query_shifted,ptr,plusp,genestrand,first_read_p,query_unk_mismatch_p);
+ diff = (block_diff)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_p);
while (nonzero_p(diff)) {
mismatch_position = offset + (relpos = count_trailing_zeroes(diff));
@@ -21297,7 +21298,7 @@ Genome_mark_mismatches_ref (char *genomic, int querylength, Compress_T query_com
}
/* Endblock */
- diff = (block_diff)(query_shifted,ptr,plusp,genestrand,first_read_p,query_unk_mismatch_p);
+ diff = (block_diff)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_p);
diff = clear_end(diff,enddiscard);
while (nonzero_p(diff)) {
@@ -21380,7 +21381,7 @@ mark_mismatches_snps (char *genomic, int querylength, Compress_T query_compress,
+ startcolumni
#endif
,&(snp_blocks[startblocki_32]),&(ref_blocks[startblocki_32]),
- plusp,genestrand,first_read_p,query_unk_mismatch_p);
+ plusp,genestrand,query_unk_mismatch_p);
diff_32 = clear_start_32(diff_32,startdiscard);
diff_32 = clear_end_32(diff_32,enddiscard);
@@ -21430,7 +21431,7 @@ mark_mismatches_snps (char *genomic, int querylength, Compress_T query_compress,
#ifdef HAVE_SSE2
if (endblocki == startblocki) {
diff = (block_diff_snp)(query_shifted,&(snp_blocks[startblocki]),&(ref_blocks[startblocki]),
- plusp,genestrand,first_read_p,query_unk_mismatch_p);
+ plusp,genestrand,query_unk_mismatch_p);
diff = clear_start(diff,startdiscard);
diff = clear_end(diff,enddiscard);
@@ -21453,7 +21454,7 @@ mark_mismatches_snps (char *genomic, int querylength, Compress_T query_compress,
/* Startblock */
diff = (block_diff_snp)(query_shifted,&(snp_blocks[startblocki]),&(ref_blocks[startblocki]),
- plusp,genestrand,first_read_p,query_unk_mismatch_p);
+ plusp,genestrand,query_unk_mismatch_p);
diff = clear_start(diff,startdiscard);
while (nonzero_p(diff)) {
@@ -21479,7 +21480,7 @@ mark_mismatches_snps (char *genomic, int querylength, Compress_T query_compress,
end = &(ref_blocks[endblocki]);
offset += STEP_SIZE; /* 128 or 32 */
while (ref_ptr < end) {
- diff = (block_diff_snp)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,first_read_p,query_unk_mismatch_p);
+ diff = (block_diff_snp)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_p);
while (nonzero_p(diff)) {
mismatch_position = offset + (relpos = count_trailing_zeroes(diff));
@@ -21502,7 +21503,7 @@ mark_mismatches_snps (char *genomic, int querylength, Compress_T query_compress,
}
/* Endblock */
- diff = (block_diff_snp)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,first_read_p,query_unk_mismatch_p);
+ diff = (block_diff_snp)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_p);
diff = clear_end(diff,enddiscard);
while (nonzero_p(diff)) {
@@ -21614,7 +21615,7 @@ trim_left_substring (Compress_T query_compress, Univcoord_T left, int pos5, int
+ endcolumni
#endif
,&(ref_blocks[endblocki_32]),
- plusp,genestrand,first_read_p,query_unk_mismatch_p);
+ plusp,genestrand,query_unk_mismatch_p);
diff_32 = clear_end_32(diff_32,enddiscard); /* puts 0 (matches) at end */
diff_32 = set_start_32(diff_32,startdiscard); /* puts 1 (mismatches) at start */
@@ -21669,7 +21670,7 @@ trim_left_substring (Compress_T query_compress, Univcoord_T left, int pos5, int
#ifdef HAVE_SSE2
if (startblocki == endblocki) {
diff = (block_diff)(query_shifted,&(ref_blocks[endblocki]),
- plusp,genestrand,first_read_p,query_unk_mismatch_p);
+ plusp,genestrand,query_unk_mismatch_p);
diff = clear_end(diff,enddiscard); /* puts 0 (matches) at end */
diff = set_start(diff,startdiscard); /* puts 1 (mismatches) at start */
@@ -21760,7 +21761,7 @@ trim_left_substring (Compress_T query_compress, Univcoord_T left, int pos5, int
/* Endblock */
diff = (block_diff)(query_shifted,&(ref_blocks[endblocki]),
- plusp,genestrand,first_read_p,query_unk_mismatch_p);
+ plusp,genestrand,query_unk_mismatch_p);
diff = clear_end(diff,enddiscard); /* puts 0 (matches) at end */
#ifdef HAVE_SSE2
@@ -21871,7 +21872,7 @@ trim_left_substring (Compress_T query_compress, Univcoord_T left, int pos5, int
#endif
start = &(ref_blocks[startblocki]);
while (ptr > start) {
- diff = (block_diff)(query_shifted,ptr,plusp,genestrand,first_read_p,query_unk_mismatch_p);
+ diff = (block_diff)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_p);
#ifdef HAVE_SSE2
p = 3*((unsigned short) _mm_extract_epi16(diff,7));
@@ -21985,7 +21986,7 @@ trim_left_substring (Compress_T query_compress, Univcoord_T left, int pos5, int
}
/* Startblock */
- diff = (block_diff)(query_shifted,ptr,plusp,genestrand,first_read_p,query_unk_mismatch_p);
+ diff = (block_diff)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_p);
diff = set_start(diff,startdiscard); /* puts 1 (mismatches) at start */
#ifdef HAVE_SSE2
@@ -22156,7 +22157,7 @@ trim_left_substring_snps (Compress_T query_compress, Univcoord_T left, int pos5,
+ endcolumni
#endif
,&(snp_blocks[endblocki_32]),&(ref_blocks[endblocki_32]),
- plusp,genestrand,first_read_p,query_unk_mismatch_p);
+ plusp,genestrand,query_unk_mismatch_p);
diff_32 = clear_end_32(diff_32,enddiscard); /* puts 0 (matches) at end */
diff_32 = set_start_32(diff_32,startdiscard); /* puts 1 (mismatches) at start */
@@ -22211,7 +22212,7 @@ trim_left_substring_snps (Compress_T query_compress, Univcoord_T left, int pos5,
#ifdef HAVE_SSE2
if (startblocki == endblocki) {
diff = (block_diff_snp)(query_shifted,&(snp_blocks[endblocki]),&(ref_blocks[endblocki]),
- plusp,genestrand,first_read_p,query_unk_mismatch_p);
+ plusp,genestrand,query_unk_mismatch_p);
diff = clear_end(diff,enddiscard); /* puts 0 (matches) at end */
diff = set_start(diff,startdiscard); /* puts 1 (mismatches) at start */
@@ -22303,7 +22304,7 @@ trim_left_substring_snps (Compress_T query_compress, Univcoord_T left, int pos5,
/* Endblock */
diff = (block_diff_snp)(query_shifted,&(snp_blocks[endblocki]),&(ref_blocks[endblocki]),
- plusp,genestrand,first_read_p,query_unk_mismatch_p);
+ plusp,genestrand,query_unk_mismatch_p);
diff = clear_end(diff,enddiscard); /* puts 0 (matches) at end */
#ifdef HAVE_SSE2
@@ -22416,7 +22417,7 @@ trim_left_substring_snps (Compress_T query_compress, Univcoord_T left, int pos5,
#endif
start = &(ref_blocks[startblocki]);
while (ref_ptr > start) {
- diff = (block_diff_snp)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,first_read_p,query_unk_mismatch_p);
+ diff = (block_diff_snp)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_p);
#ifdef HAVE_SSE2
p = 3*((unsigned short) _mm_extract_epi16(diff,7));
@@ -22530,7 +22531,7 @@ trim_left_substring_snps (Compress_T query_compress, Univcoord_T left, int pos5,
}
/* Startblock */
- diff = (block_diff_snp)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,first_read_p,query_unk_mismatch_p);
+ diff = (block_diff_snp)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_p);
diff = set_start(diff,startdiscard); /* puts 1 (mismatches) at start */
@@ -22703,7 +22704,7 @@ trim_right_substring (Compress_T query_compress, Univcoord_T left, int pos5, int
+ startcolumni
#endif
,&(ref_blocks[startblocki_32]),
- plusp,genestrand,first_read_p,query_unk_mismatch_p);
+ plusp,genestrand,query_unk_mismatch_p);
diff_32 = clear_start_32(diff_32,startdiscard); /* puts 0 (matches) at start */
diff_32 = set_end_32(diff_32,enddiscard); /* puts 1 (mismatches) at end */
@@ -22758,7 +22759,7 @@ trim_right_substring (Compress_T query_compress, Univcoord_T left, int pos5, int
#ifdef HAVE_SSE2
if (endblocki == startblocki) {
diff = (block_diff)(query_shifted,&(ref_blocks[startblocki]),
- plusp,genestrand,first_read_p,query_unk_mismatch_p);
+ plusp,genestrand,query_unk_mismatch_p);
diff = clear_start(diff,startdiscard); /* puts 0 (matches) at start */
diff = set_end(diff,enddiscard); /* puts 1 (mismatches) at end */
@@ -22849,7 +22850,7 @@ trim_right_substring (Compress_T query_compress, Univcoord_T left, int pos5, int
/* Startblock */
diff = (block_diff)(query_shifted,&(ref_blocks[startblocki]),
- plusp,genestrand,first_read_p,query_unk_mismatch_p);
+ plusp,genestrand,query_unk_mismatch_p);
diff = clear_start(diff,startdiscard); /* puts 0 (matches) at start */
debug(printf("clearing start %08X\n",clear_start_mask(startdiscard)));
@@ -22961,7 +22962,7 @@ trim_right_substring (Compress_T query_compress, Univcoord_T left, int pos5, int
#endif
end = &(ref_blocks[endblocki]);
while (ptr < end) {
- diff = (block_diff)(query_shifted,ptr,plusp,genestrand,first_read_p,query_unk_mismatch_p);
+ diff = (block_diff)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_p);
#ifdef HAVE_SSE2
p = 3*((unsigned short) _mm_extract_epi16(diff,0));
@@ -23075,7 +23076,7 @@ trim_right_substring (Compress_T query_compress, Univcoord_T left, int pos5, int
}
/* Endblock */
- diff = (block_diff)(query_shifted,ptr,plusp,genestrand,first_read_p,query_unk_mismatch_p);
+ diff = (block_diff)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_p);
diff = set_end(diff,enddiscard); /* puts 1 (mismatches) at end */
#ifdef HAVE_SSE2
@@ -23247,7 +23248,7 @@ trim_right_substring_snps (Compress_T query_compress, Univcoord_T left, int pos5
+ startcolumni
#endif
,&(snp_blocks[startblocki_32]),&(ref_blocks[startblocki_32]),
- plusp,genestrand,first_read_p,query_unk_mismatch_p);
+ plusp,genestrand,query_unk_mismatch_p);
diff_32 = clear_start_32(diff_32,startdiscard); /* puts 0 (matches) at start */
diff_32 = set_end_32(diff_32,enddiscard); /* puts 1 (mismatches) at end */
@@ -23301,7 +23302,7 @@ trim_right_substring_snps (Compress_T query_compress, Univcoord_T left, int pos5
#ifdef HAVE_SSE2
if (endblocki == startblocki) {
diff = (block_diff_snp)(query_shifted,&(snp_blocks[startblocki]),&(ref_blocks[startblocki]),
- plusp,genestrand,first_read_p,query_unk_mismatch_p);
+ plusp,genestrand,query_unk_mismatch_p);
diff = clear_start(diff,startdiscard); /* puts 0 (matches) at start */
diff = set_end(diff,enddiscard); /* puts 1 (mismatches) at end */
@@ -23392,7 +23393,7 @@ trim_right_substring_snps (Compress_T query_compress, Univcoord_T left, int pos5
/* Startblock */
diff = (block_diff_snp)(query_shifted,&(snp_blocks[startblocki]),&(ref_blocks[startblocki]),
- plusp,genestrand,first_read_p,query_unk_mismatch_p);
+ plusp,genestrand,query_unk_mismatch_p);
diff = clear_start(diff,startdiscard); /* puts 0 (matches) at start */
@@ -23506,7 +23507,7 @@ trim_right_substring_snps (Compress_T query_compress, Univcoord_T left, int pos5
#endif
end = &(ref_blocks[endblocki]);
while (ref_ptr < end) {
- diff = (block_diff_snp)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,first_read_p,query_unk_mismatch_p);
+ diff = (block_diff_snp)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_p);
#ifdef HAVE_SSE2
p = 3*((unsigned short) _mm_extract_epi16(diff,0));
@@ -23620,7 +23621,7 @@ trim_right_substring_snps (Compress_T query_compress, Univcoord_T left, int pos5
}
/* Endblock */
- diff = (block_diff_snp)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,first_read_p,query_unk_mismatch_p);
+ diff = (block_diff_snp)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_p);
diff = set_end(diff,enddiscard); /* puts 1 (mismatches) at end */
diff --git a/src/gmap.c b/src/gmap.c
index caa9f22..5fce38e 100644
--- a/src/gmap.c
+++ b/src/gmap.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: gmap.c 138000 2014-06-04 02:04:31Z twu $";
+static char rcsid[] = "$Id: gmap.c 140368 2014-07-02 00:56:33Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -2115,10 +2115,17 @@ find_breakpoint (int *cdna_direction, int *chimerapos, int *chimeraequivpos, int
if (Stage3_queryend(from) < Stage3_querystart(to)) {
/* Gap exists between the two parts */
- leftpos = Stage3_queryend(from) - 8;
- rightpos = Stage3_querystart(to) + 8;
+ if ((leftpos = Stage3_queryend(from) - 8) < 0) {
+ leftpos = 0;
+ }
+ if ((rightpos = Stage3_querystart(to) + 8) >= queryntlength) {
+ rightpos = queryntlength - 1;
+ }
maxpeelback_from = 8;
maxpeelback_to = 8;
+ debug2(printf("overlap: leftpos %d, rightpos %d, queryntlength %d, maxpeelback_from %d, maxpeelback_to %d\n",
+ leftpos,rightpos,queryntlength,maxpeelback_from,maxpeelback_to));
+
if (Stage3_watsonp(from) == true && Stage3_watsonp(to) == true) {
queryjump = Stage3_querystart(to) - Stage3_queryend(from) - 1;
genomejump = Stage3_genomicstart(to) - Stage3_genomicend(from) - 1U;
@@ -2146,8 +2153,8 @@ find_breakpoint (int *cdna_direction, int *chimerapos, int *chimeraequivpos, int
/* maxpeelback_to = Stage3_queryend(from) - leftpos; */
maxpeelback_from = rightpos - midpos;
maxpeelback_to = midpos - leftpos;
- debug2(printf("overlap: leftpos %d, rightpos %d, midpos %d, maxpeelback_from %d, maxpeelback_to %d\n",
- leftpos,rightpos,midpos,maxpeelback_from,maxpeelback_to));
+ debug2(printf("overlap: leftpos %d, rightpos %d, midpos %d, queryntlength %d, maxpeelback_from %d, maxpeelback_to %d\n",
+ leftpos,rightpos,midpos,queryntlength,maxpeelback_from,maxpeelback_to));
#if 0
if (Stage3_watsonp(from) == true && Stage3_watsonp(to) == true) {
queryjump = Stage3_queryend(from) - Stage3_querystart(to) - 1;
@@ -2305,6 +2312,8 @@ check_for_local (bool *mergedp, List_T stage3list, int effective_start, int effe
if (five_margin < chimera_margin && three_margin < chimera_margin) {
debug2(printf("Insufficient margins\n"));
} else if (five_margin > three_margin) {
+#if 0
+ /* extension makes it harder to find the other alignment. The merging process will help fill in any gap. */
extension = CHIMERA_SLOP;
debug2(printf("Comparing extension %d with %d = (effective_start %d)/2\n",
extension,effective_start/2,effective_start));
@@ -2313,6 +2322,9 @@ check_for_local (bool *mergedp, List_T stage3list, int effective_start, int effe
debug2(printf("Proposed extension of %d is too long relative to effective_start %d\n",extension,effective_start));
extension = effective_start/3;
}
+#else
+ extension = 0;
+#endif
if ((querysubseq = Sequence_subsequence(queryseq,0,effective_start+extension)) != NULL) {
if ((querysubuc = Sequence_subsequence(queryuc,0,effective_start+extension)) != NULL) {
debug2(printf("5 margin > 3 margin. "));
@@ -2402,6 +2414,8 @@ check_for_local (bool *mergedp, List_T stage3list, int effective_start, int effe
npaths_sub1,npaths_sub2,List_length(nonjoinable)));
} else {
+#if 0
+ /* extension makes it harder to find the other alignment. The merging process will help fill in any gap. */
extension = CHIMERA_SLOP;
debug2(printf("Comparing extension %d with %d = (queryntlength %d - effective_end %d)/2\n",
extension,(queryntlength-effective_end)/2,queryntlength,effective_end));
@@ -2411,6 +2425,9 @@ check_for_local (bool *mergedp, List_T stage3list, int effective_start, int effe
extension,queryntlength,effective_end));
extension = (queryntlength - effective_end)/3;
}
+#else
+ extension = 0;
+#endif
if ((querysubseq = Sequence_subsequence(queryseq,effective_end-extension,queryntlength)) != NULL) {
if ((querysubuc = Sequence_subsequence(queryuc,effective_end-extension,queryntlength)) != NULL) {
debug2(printf("5 margin <= 3 margin. "));
diff --git a/src/gmapindex.c b/src/gmapindex.c
index 74f27a3..7795d92 100644
--- a/src/gmapindex.c
+++ b/src/gmapindex.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: gmapindex.c 138522 2014-06-09 17:08:44Z twu $";
+static char rcsid[] = "$Id: gmapindex.c 140511 2014-07-03 01:50:36Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -951,6 +951,7 @@ main (int argc, char *argv[]) {
Univcoord_T n, genomelength, totalnts;
char *chromosomefile, *iitfile, *positionsfile_high, *positionsfile_low, interval_char;
char *sarrayfile, *lcpexcfile, *lcpguidefile;
+ char *rankfile, *permuted_sarray_file; /* temporary files */
char *childbytesfile, *childexcfile, *childguidefile;
char *lcpchilddcfile;
#ifdef USE_SEPARATE_BUCKETS
@@ -1441,10 +1442,9 @@ main (int argc, char *argv[]) {
n = genomelength;
+ /* No need to mmap SA anymore */
sarrayfile = (char *) CALLOC(strlen(destdir)+strlen("/")+strlen(fileroot)+strlen(".sarray")+1,sizeof(char));
sprintf(sarrayfile,"%s/%s.sarray",destdir,fileroot);
- SA = (UINT4 *) Access_mmap(&sa_fd,&sa_len,sarrayfile,sizeof(UINT4),/*randomp*/false);
- FREE(sarrayfile);
/* Required for computing LCP, but uses non-SIMD instructions */
genomebits = Genome_new(sourcedir,fileroot,/*snps_root*/NULL,/*genometype*/GENOME_BITS,
@@ -1453,7 +1453,15 @@ main (int argc, char *argv[]) {
/*query_unk_mismatch_p*/false,/*genome_unk_mismatch_p*/false,
/*mode*/STANDARD);
- lcp = Sarray_compute_lcp(SA,n);
+
+ rankfile = (char *) CALLOC(strlen(destdir)+strlen("/")+strlen(fileroot)+strlen(".rank")+1,sizeof(char));
+ sprintf(rankfile,"%s/%s.rank",destdir,fileroot);
+ permuted_sarray_file = (char *) CALLOC(strlen(destdir)+strlen("/")+strlen(fileroot)+strlen(".permuted_sarray")+1,sizeof(char));
+ sprintf(permuted_sarray_file,"%s/%s.permuted_sarray",destdir,fileroot);
+ lcp = Sarray_compute_lcp(rankfile,permuted_sarray_file,sarrayfile,n);
+ FREE(permuted_sarray_file);
+ FREE(rankfile);
+
Genome_free(&genomebits);
/* Write lcp exceptions/guide, but return lcp_bytes */
@@ -1484,16 +1492,15 @@ main (int argc, char *argv[]) {
n_lcp_exceptions = lcpexc_len/(sizeof(UINT4) + sizeof(UINT4));
FREE(lcpexcfile);
- /* SA and genome needed for creating discrim_chars */
genomecomp = Genome_new(sourcedir,fileroot,/*snps_root*/NULL,/*genometype*/GENOME_OLIGOS,
/*uncompressedp*/false,/*access*/USE_MMAP_ONLY);
/* Compute discriminating chars (DC) array */
- discrim_chars = Sarray_discriminating_chars(&nbytes,SA,genomecomp,lcp_bytes,lcp_guide,
+ discrim_chars = Sarray_discriminating_chars(&nbytes,sarrayfile,genomecomp,lcp_bytes,lcp_guide,
lcp_exceptions,/*guide_interval*/1024,n,CHARTABLE);
+ FREE(sarrayfile);
Genome_free(&genomecomp);
- munmap((void *) SA,sa_len);
- close(sa_fd);
+ /* No need to munmap SA anymore */
fprintf(stderr,"Building child array\n");
diff --git a/src/intlist.c b/src/intlist.c
index 6c163c6..e23f53b 100644
--- a/src/intlist.c
+++ b/src/intlist.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: intlist.c 134888 2014-05-01 23:30:38Z twu $";
+static char rcsid[] = "$Id: intlist.c 140368 2014-07-02 00:56:33Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -188,6 +188,20 @@ Intlist_to_char_array (int *n, T list) {
}
T
+Intlist_from_array (int *array, int n) {
+ T list = NULL, p;
+
+ while (--n >= 0) {
+ p = (T) MALLOC(sizeof(*p));
+ p->first = array[n];
+ p->rest = list;
+ list = p;
+ }
+
+ return list;
+}
+
+T
Intlist_copy (T list) {
T head, *p = &head;
diff --git a/src/intlist.h b/src/intlist.h
index 4b34b75..13ec371 100644
--- a/src/intlist.h
+++ b/src/intlist.h
@@ -1,4 +1,4 @@
-/* $Id: intlist.h 134888 2014-05-01 23:30:38Z twu $ */
+/* $Id: intlist.h 140368 2014-07-02 00:56:33Z twu $ */
#ifndef INTLIST_INCLUDED
#define INTLIST_INCLUDED
#include "bool.h"
@@ -36,6 +36,8 @@ extern int *
Intlist_to_array_out (int *n, T list);
extern char *
Intlist_to_char_array (int *n, T list);
+extern T
+Intlist_from_array (int *array, int n);
extern T
Intlist_copy (T list);
extern T
diff --git a/src/samprint.c b/src/samprint.c
index 2ced66a..482d286 100644
--- a/src/samprint.c
+++ b/src/samprint.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: samprint.c 138718 2014-06-11 17:06:57Z twu $";
+static char rcsid[] = "$Id: samprint.c 140368 2014-07-02 00:56:33Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -3905,13 +3905,13 @@ print_shortexon (FILE *fp, char *abbrev, Stage3end_T shortexon, Stage3end_T mate
if (sensep == plusp) {
substring1 = /* donor */ Stage3end_substringD(shortexon);
- distance1 = Stage3end_shortexon_acceptor_distance(shortexon);
- distance2 = Stage3end_shortexon_donor_distance(shortexon);
+ distance1 = Stage3end_shortexonA_distance(shortexon);
+ distance2 = Stage3end_shortexonD_distance(shortexon);
substring2 = /* acceptor */ Stage3end_substringA(shortexon);
} else {
substring1 = /* acceptor */ Stage3end_substringA(shortexon);
- distance1 = Stage3end_shortexon_donor_distance(shortexon);
- distance2 = Stage3end_shortexon_acceptor_distance(shortexon);
+ distance1 = Stage3end_shortexonD_distance(shortexon);
+ distance2 = Stage3end_shortexonA_distance(shortexon);
substring2 = /* donor */ Stage3end_substringD(shortexon);
}
diff --git a/src/sarray-read.c b/src/sarray-read.c
index 1dfa627..cebf3b0 100644
--- a/src/sarray-read.c
+++ b/src/sarray-read.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: sarray-read.c 138745 2014-06-11 19:04:25Z twu $";
+static char rcsid[] = "$Id: sarray-read.c 140368 2014-07-02 00:56:33Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -1218,13 +1218,13 @@ sarray_search (Sarrayptr_T *initptr, Sarrayptr_T *finalptr, bool *successp,
plusp,genestrand,first_read_p);
printf("%d\t%u\t%u\t",recount,(*initptr)-1,sarray->array[(*initptr)-1] /*+ 1U*/);
if (genestrand == +2) {
- if (plusp != first_read_p) {
- Genome_fill_buffer_convert_fwd(sarray->array[(*initptr)-1],recount+1,Buffer);
- } else {
+ if (plusp) {
Genome_fill_buffer_convert_rev(sarray->array[(*initptr)-1],recount+1,Buffer);
+ } else {
+ Genome_fill_buffer_convert_fwd(sarray->array[(*initptr)-1],recount+1,Buffer);
}
} else {
- if (plusp == first_read_p) {
+ if (plusp) {
Genome_fill_buffer_convert_fwd(sarray->array[(*initptr)-1],recount+1,Buffer);
} else {
Genome_fill_buffer_convert_rev(sarray->array[(*initptr)-1],recount+1,Buffer);
@@ -1249,13 +1249,13 @@ sarray_search (Sarrayptr_T *initptr, Sarrayptr_T *finalptr, bool *successp,
plusp,genestrand,first_read_p);
printf("%d\t%u\t%u\t",recount,(*initptr)+k,hit /*+ 1U*/);
if (genestrand == +2) {
- if (plusp != first_read_p) {
- Genome_fill_buffer_convert_fwd(sarray->array[(*initptr)+k],recount+1,Buffer);
- } else {
+ if (plusp) {
Genome_fill_buffer_convert_rev(sarray->array[(*initptr)+k],recount+1,Buffer);
+ } else {
+ Genome_fill_buffer_convert_fwd(sarray->array[(*initptr)+k],recount+1,Buffer);
}
} else {
- if (plusp == first_read_p) {
+ if (plusp) {
Genome_fill_buffer_convert_fwd(sarray->array[(*initptr)+k],recount+1,Buffer);
} else {
Genome_fill_buffer_convert_rev(sarray->array[(*initptr)+k],recount+1,Buffer);
@@ -1280,13 +1280,13 @@ sarray_search (Sarrayptr_T *initptr, Sarrayptr_T *finalptr, bool *successp,
plusp,genestrand,first_read_p);
printf("%d\t%u\t%u\t",recount,(*finalptr)+1,sarray->array[(*finalptr)+1] /*+ 1U*/);
if (genestrand == +2) {
- if (plusp != first_read_p) {
- Genome_fill_buffer_convert_fwd(sarray->array[(*finalptr)+1],recount+1,Buffer);
- } else {
+ if (plusp) {
Genome_fill_buffer_convert_rev(sarray->array[(*finalptr)+1],recount+1,Buffer);
+ } else {
+ Genome_fill_buffer_convert_fwd(sarray->array[(*finalptr)+1],recount+1,Buffer);
}
} else {
- if (plusp == first_read_p) {
+ if (plusp) {
Genome_fill_buffer_convert_fwd(sarray->array[(*finalptr)+1],recount+1,Buffer);
} else {
Genome_fill_buffer_convert_rev(sarray->array[(*finalptr)+1],recount+1,Buffer);
@@ -1304,7 +1304,7 @@ sarray_search (Sarrayptr_T *initptr, Sarrayptr_T *finalptr, bool *successp,
if (failp == true) {
/* Can happen because $ ranks below 0 */
/* Can also happen with CMET or ATOI, since genome128_hr procedures find genome-to-query mismatches */
- abort();
+ /* abort(); */
}
#endif
@@ -2225,14 +2225,15 @@ collect_elt_matches (int *found_score, List_T *subs, List_T *indels, List_T *amb
int query_indel_pos;
#endif
- List_T spliceends, lowprob;
- int nhits, nspliceends, n_good_spliceends;
+ List_T spliceends_sense, spliceends_antisense, lowprob;
+ int nhits, nspliceends_sense, nspliceends_antisense, n_good_spliceends;
+ int best_nmismatches, nmismatches_donor, nmismatches_acceptor;
double best_prob, prob;
Substring_T donor, acceptor;
int sensedir;
Uintlist_T ambcoords, ambcoords_left, ambcoords_right;
- Intlist_T amb_knowni, amb_nmismatches, amb_knowni_left, amb_knowni_right, amb_nmismatches_left, amb_nmismatches_right;
+ Intlist_T amb_knowni, amb_nmismatches;
int segmenti_donor_knownpos[MAX_READLENGTH+1], segmentj_acceptor_knownpos[MAX_READLENGTH+1],
segmentj_antidonor_knownpos[MAX_READLENGTH+1], segmenti_antiacceptor_knownpos[MAX_READLENGTH+1];
@@ -2359,7 +2360,7 @@ collect_elt_matches (int *found_score, List_T *subs, List_T *indels, List_T *amb
Uintlist_free(&difflist);
debug7(printf("Have %d matching diffs\n",n));
- spliceends = (List_T) NULL;
+ spliceends_sense = spliceends_antisense = (List_T) NULL;
lowprob = (List_T) NULL;
for (i = 0; i < n; i++) {
left2 = array[i];
@@ -2415,21 +2416,38 @@ collect_elt_matches (int *found_score, List_T *subs, List_T *indels, List_T *amb
segmentj_antidonor_knownpos[segmentj_antidonor_nknown] = MAX_READLENGTH;
/* nspliceends = 0; */
- spliceends = Splice_solve_single(&(*found_score),&nspliceends,spliceends,&lowprob,
- &segmenti_usedp,&segmentj_usedp,
- /*segmenti_left*/left1,/*segmentj_left*/left2,
- chrnum,chroffset,chrhigh,chrlength,
- chrnum,chroffset,chrhigh,chrlength,
- querylength,query_compress,
- segmenti_donor_knownpos,segmentj_acceptor_knownpos,
- segmentj_antidonor_knownpos,segmenti_antiacceptor_knownpos,
- segmenti_donor_knowni,segmentj_acceptor_knowni,
- segmentj_antidonor_knowni,segmenti_antiacceptor_knowni,
- segmenti_donor_nknown,segmentj_acceptor_nknown,
- segmentj_antidonor_nknown,segmenti_antiacceptor_nknown,
- splicing_penalty,/*max_mismatches_allowed*/1000,
- plusp,genestrand,first_read_p,/*subs_or_indels_p*/false,
- /*sarrayp*/true);
+ spliceends_sense =
+ Splice_solve_single_sense(&(*found_score),&nspliceends_sense,spliceends_sense,&lowprob,
+ &segmenti_usedp,&segmentj_usedp,
+ /*segmenti_left*/left1,/*segmentj_left*/left2,
+ chrnum,chroffset,chrhigh,chrlength,
+ chrnum,chroffset,chrhigh,chrlength,
+ querylength,query_compress,
+ segmenti_donor_knownpos,segmentj_acceptor_knownpos,
+ segmentj_antidonor_knownpos,segmenti_antiacceptor_knownpos,
+ segmenti_donor_knowni,segmentj_acceptor_knowni,
+ segmentj_antidonor_knowni,segmenti_antiacceptor_knowni,
+ segmenti_donor_nknown,segmentj_acceptor_nknown,
+ segmentj_antidonor_nknown,segmenti_antiacceptor_nknown,
+ splicing_penalty,/*max_mismatches_allowed*/1000,
+ plusp,genestrand,first_read_p,/*subs_or_indels_p*/false,
+ /*sarrayp*/true);
+ spliceends_antisense =
+ Splice_solve_single_antisense(&(*found_score),&nspliceends_antisense,spliceends_antisense,&lowprob,
+ &segmenti_usedp,&segmentj_usedp,
+ /*segmenti_left*/left1,/*segmentj_left*/left2,
+ chrnum,chroffset,chrhigh,chrlength,
+ chrnum,chroffset,chrhigh,chrlength,
+ querylength,query_compress,
+ segmenti_donor_knownpos,segmentj_acceptor_knownpos,
+ segmentj_antidonor_knownpos,segmenti_antiacceptor_knownpos,
+ segmenti_donor_knowni,segmentj_acceptor_knowni,
+ segmentj_antidonor_knowni,segmenti_antiacceptor_knowni,
+ segmenti_donor_nknown,segmentj_acceptor_nknown,
+ segmentj_antidonor_nknown,segmenti_antiacceptor_nknown,
+ splicing_penalty,/*max_mismatches_allowed*/1000,
+ plusp,genestrand,first_read_p,/*subs_or_indels_p*/false,
+ /*sarrayp*/true);
} else if (left2 > left1) {
nindels = left2 - left1;
@@ -2513,10 +2531,13 @@ collect_elt_matches (int *found_score, List_T *subs, List_T *indels, List_T *amb
}
}
- if (spliceends != NULL) {
+ if (spliceends_sense != NULL) {
/* nmismatches should be the same for all spliceends, so pick based on prob */
+ hit = (Stage3end_T) List_head(spliceends_sense);
+ best_nmismatches = Stage3end_nmismatches_whole(hit);
+
best_prob = 0.0;
- for (p = spliceends; p != NULL; p = List_next(p)) {
+ for (p = spliceends_sense; p != NULL; p = List_next(p)) {
hit = (Stage3end_T) List_head(p);
debug7(printf("analyzing distance %d, probabilities %f and %f\n",
Stage3end_distance(hit),Substring_chimera_prob(Stage3end_substring_donor(hit)),
@@ -2527,7 +2548,7 @@ collect_elt_matches (int *found_score, List_T *subs, List_T *indels, List_T *amb
}
n_good_spliceends = 0;
- for (p = spliceends; p != NULL; p = List_next(p)) {
+ for (p = spliceends_sense; p != NULL; p = List_next(p)) {
hit = (Stage3end_T) List_head(p);
if (Stage3end_chimera_prob(hit) > best_prob - LOCALSPLICING_SLOP) {
debug7(printf("accepting distance %d, probabilities %f and %f\n",
@@ -2538,7 +2559,7 @@ collect_elt_matches (int *found_score, List_T *subs, List_T *indels, List_T *amb
}
if (n_good_spliceends == 1) {
- for (p = spliceends; p != NULL; p = List_next(p)) {
+ for (p = spliceends_sense; p != NULL; p = List_next(p)) {
hit = (Stage3end_T) List_head(p);
if (Stage3end_chimera_prob(hit) == best_prob) {
debug7(printf("pushing distance %d, probabilities %f and %f\n",
@@ -2550,11 +2571,11 @@ collect_elt_matches (int *found_score, List_T *subs, List_T *indels, List_T *amb
Stage3end_free(&hit);
}
}
- List_free(&spliceends);
+ List_free(&spliceends_sense);
} else {
- /* Create ambiguous */
- hit = (Stage3end_T) List_head(spliceends);
+ /* Create ambiguous, sense */
+ hit = (Stage3end_T) List_head(spliceends_sense);
donor = Stage3end_substring_donor(hit);
acceptor = Stage3end_substring_acceptor(hit);
sensedir = Stage3end_sensedir(hit);
@@ -2562,8 +2583,9 @@ collect_elt_matches (int *found_score, List_T *subs, List_T *indels, List_T *amb
ambcoords = (Uintlist_T) NULL;
amb_knowni = (Intlist_T) NULL;
amb_nmismatches = (Intlist_T) NULL;
+
if (Substring_left_genomicseg(donor) == left1) {
- for (p = spliceends; p != NULL; p = List_next(p)) {
+ for (p = spliceends_sense; p != NULL; p = List_next(p)) {
hit = (Stage3end_T) List_head(p);
acceptor = Stage3end_substring_acceptor(hit);
#ifdef LARGE_GENOMES
@@ -2575,44 +2597,15 @@ collect_elt_matches (int *found_score, List_T *subs, List_T *indels, List_T *amb
amb_nmismatches = Intlist_push(amb_nmismatches,Substring_nmismatches_whole(acceptor));
}
- if (plusp == true) {
- if (sensedir == SENSE_FORWARD) {
- ambcoords_left = NULL;
- amb_knowni_left = amb_nmismatches_left = (Intlist_T) NULL;
- ambcoords_right = ambcoords;
- amb_knowni_right = amb_knowni;
- amb_nmismatches_right = amb_nmismatches;
- } else {
- ambcoords_left = ambcoords;
- amb_knowni_left = amb_knowni;
- amb_nmismatches_left = amb_nmismatches;
- ambcoords_right = NULL;
- amb_knowni_right = amb_nmismatches_right = (Intlist_T) NULL;
- }
- } else {
- if (sensedir == SENSE_FORWARD) {
- ambcoords_left = ambcoords;
- amb_knowni_left = amb_knowni;
- amb_nmismatches_left = amb_nmismatches;
- ambcoords_right = NULL;
- amb_knowni_right = amb_nmismatches_right = (Intlist_T) NULL;
- } else {
- ambcoords_left = NULL;
- amb_knowni_left = amb_nmismatches_left = (Intlist_T) NULL;
- ambcoords_right = ambcoords;
- amb_knowni_right = amb_knowni;
- amb_nmismatches_right = amb_nmismatches;
- }
- }
-
+ nmismatches_acceptor = best_nmismatches - Substring_nmismatches_whole(donor);
*ambiguous = List_push(*ambiguous,
(void *) Stage3end_new_splice(&(*found_score),
- /*nmismatches_donor*/Substring_nmismatches_whole(donor),/*nmismatches_acceptor*/0,
+ /*nmismatches_donor*/Substring_nmismatches_whole(donor),nmismatches_acceptor,
donor,/*acceptor*/NULL,/*distance*/0U,
- /*shortdistancep*/false,/*penalty*/0,querylength,
- /*amb_nmatches*/querylength - Substring_match_length_orig(donor),
- ambcoords_left,ambcoords_right,amb_knowni_left,amb_knowni_right,
- amb_nmismatches_left,amb_nmismatches_right,
+ /*shortdistancep*/false,/*penalty*/0,querylength,/*amb_nmatches*/Substring_nmatches_posttrim(acceptor),
+ /*ambcoords_donor*/NULL,ambcoords,
+ /*amb_knowni_donor*/NULL,amb_knowni,
+ /*amb_nmismatches_donor*/NULL,amb_nmismatches,
/*copy_donor_p*/true,/*copy_acceptor_p*/false,first_read_p,
sensedir,/*sarrayp*/true));
Intlist_free(&amb_nmismatches);
@@ -2620,7 +2613,7 @@ collect_elt_matches (int *found_score, List_T *subs, List_T *indels, List_T *amb
Uintlist_free(&ambcoords); /* LARGE_GENOMES not possible with suffix array */
} else if (Substring_left_genomicseg(acceptor) == left1) {
- for (p = spliceends; p != NULL; p = List_next(p)) {
+ for (p = spliceends_sense; p != NULL; p = List_next(p)) {
hit = (Stage3end_T) List_head(p);
donor = Stage3end_substring_donor(hit);
#ifdef LARGE_GENOMES
@@ -2632,44 +2625,138 @@ collect_elt_matches (int *found_score, List_T *subs, List_T *indels, List_T *amb
amb_nmismatches = Intlist_push(amb_nmismatches,Substring_nmismatches_whole(donor));
}
- if (plusp == true) {
- if (sensedir == SENSE_FORWARD) {
- ambcoords_left = ambcoords;
- amb_knowni_left = amb_knowni;
- amb_nmismatches_left = amb_nmismatches;
- ambcoords_right = NULL;
- amb_knowni_right = amb_nmismatches_right = (Intlist_T) NULL;
- } else {
- ambcoords_left = NULL;
- amb_knowni_left = amb_nmismatches_left = (Intlist_T) NULL;
- ambcoords_right = ambcoords;
- amb_knowni_right = amb_knowni;
- amb_nmismatches_right = amb_nmismatches;
- }
+ nmismatches_donor = best_nmismatches - Substring_nmismatches_whole(acceptor);
+ *ambiguous = List_push(*ambiguous,
+ (void *) Stage3end_new_splice(&(*found_score),
+ nmismatches_donor,/*nmismatches_acceptor*/Substring_nmismatches_whole(acceptor),
+ /*donor*/NULL,acceptor,/*distance*/0U,
+ /*shortdistancep*/false,/*penalty*/0,querylength,/*amb_nmatches*/Substring_nmatches_posttrim(donor),
+ ambcoords,/*ambcoords_acceptor*/NULL,
+ amb_knowni,/*amb_knowni_acceptor*/NULL,
+ amb_nmismatches,/*amb_nmismatches_acceptor*/NULL,
+ /*copy_donor_p*/false,/*copy_acceptor_p*/true,first_read_p,
+ sensedir,/*sarrayp*/true));
+ Intlist_free(&amb_nmismatches);
+ Intlist_free(&amb_knowni);
+ Uintlist_free(&ambcoords); /* LARGE_GENOMES not possible with suffix array */
+
+ } else {
+ fprintf(stderr,"Unexpected: Neither donor left %u nor acceptor left %u equals left1 %u\n",
+ Substring_left_genomicseg(donor),Substring_left_genomicseg(acceptor),left1);
+ abort();
+ }
+
+ for (p = spliceends_sense; p != NULL; p = List_next(p)) {
+ hit = (Stage3end_T) List_head(p);
+ Stage3end_free(&hit);
+ }
+ List_free(&spliceends_sense);
+ }
+ }
+
+ if (spliceends_antisense != NULL) {
+ /* nmismatches should be the same for all spliceends, so pick based on prob */
+ hit = (Stage3end_T) List_head(spliceends_antisense);
+ best_nmismatches = Stage3end_nmismatches_whole(hit);
+
+ best_prob = 0.0;
+ for (p = spliceends_antisense; p != NULL; p = List_next(p)) {
+ hit = (Stage3end_T) List_head(p);
+ debug7(printf("analyzing distance %d, probabilities %f and %f\n",
+ Stage3end_distance(hit),Substring_chimera_prob(Stage3end_substring_donor(hit)),
+ Substring_chimera_prob(Stage3end_substring_acceptor(hit))));
+ if ((prob = Stage3end_chimera_prob(hit)) > best_prob) {
+ best_prob = prob;
+ }
+ }
+
+ n_good_spliceends = 0;
+ for (p = spliceends_antisense; p != NULL; p = List_next(p)) {
+ hit = (Stage3end_T) List_head(p);
+ if (Stage3end_chimera_prob(hit) > best_prob - LOCALSPLICING_SLOP) {
+ debug7(printf("accepting distance %d, probabilities %f and %f\n",
+ Stage3end_distance(hit),Substring_chimera_prob(Stage3end_substring_donor(hit)),
+ Substring_chimera_prob(Stage3end_substring_acceptor(hit))));
+ n_good_spliceends += 1;
+ }
+ }
+
+ if (n_good_spliceends == 1) {
+ for (p = spliceends_antisense; p != NULL; p = List_next(p)) {
+ hit = (Stage3end_T) List_head(p);
+ if (Stage3end_chimera_prob(hit) == best_prob) {
+ debug7(printf("pushing distance %d, probabilities %f and %f\n",
+ Stage3end_distance(hit),Substring_chimera_prob(Stage3end_substring_donor(hit)),
+ Substring_chimera_prob(Stage3end_substring_acceptor(hit))));
+ *singlesplicing = List_push(*singlesplicing,(void *) hit);
+ nhits += 1;
} else {
- if (sensedir == SENSE_FORWARD) {
- ambcoords_left = NULL;
- amb_knowni_left = amb_nmismatches_left = (Intlist_T) NULL;
- ambcoords_right = ambcoords;
- amb_knowni_right = amb_knowni;
- amb_nmismatches_right = amb_nmismatches;
- } else {
- ambcoords_left = ambcoords;
- amb_knowni_left = amb_knowni;
- amb_nmismatches_left = amb_nmismatches;
- ambcoords_right = NULL;
- amb_knowni_right = amb_nmismatches_right = (Intlist_T) NULL;
- }
+ Stage3end_free(&hit);
+ }
+ }
+ List_free(&spliceends_antisense);
+
+ } else {
+ /* Create ambiguous, antisense */
+ hit = (Stage3end_T) List_head(spliceends_antisense);
+ donor = Stage3end_substring_donor(hit);
+ acceptor = Stage3end_substring_acceptor(hit);
+ sensedir = Stage3end_sensedir(hit);
+
+ ambcoords = (Uintlist_T) NULL;
+ amb_knowni = (Intlist_T) NULL;
+ amb_nmismatches = (Intlist_T) NULL;
+
+ if (Substring_left_genomicseg(donor) == left1) {
+ for (p = spliceends_antisense; p != NULL; p = List_next(p)) {
+ hit = (Stage3end_T) List_head(p);
+ acceptor = Stage3end_substring_acceptor(hit);
+#ifdef LARGE_GENOMES
+ ambcoords = Uint8list_push(ambcoords,Substring_splicecoord(acceptor));
+#else
+ ambcoords = Uintlist_push(ambcoords,Substring_splicecoord(acceptor));
+#endif
+ amb_knowni = Intlist_push(amb_knowni,-1);
+ amb_nmismatches = Intlist_push(amb_nmismatches,Substring_nmismatches_whole(acceptor));
+ }
+
+ nmismatches_acceptor = best_nmismatches - Substring_nmismatches_whole(donor);
+ *ambiguous = List_push(*ambiguous,
+ (void *) Stage3end_new_splice(&(*found_score),
+ /*nmismatches_donor*/Substring_nmismatches_whole(donor),nmismatches_acceptor,
+ donor,/*acceptor*/NULL,/*distance*/0U,
+ /*shortdistancep*/false,/*penalty*/0,querylength,/*amb_nmatches*/Substring_nmatches_posttrim(acceptor),
+ /*ambcoords_donor*/NULL,ambcoords,
+ /*amb_knowni_donor*/NULL,amb_knowni,
+ /*amb_nmismatches_donor*/NULL,amb_nmismatches,
+ /*copy_donor_p*/true,/*copy_acceptor_p*/false,first_read_p,
+ sensedir,/*sarrayp*/true));
+ Intlist_free(&amb_nmismatches);
+ Intlist_free(&amb_knowni);
+ Uintlist_free(&ambcoords); /* LARGE_GENOMES not possible with suffix array */
+
+ } else if (Substring_left_genomicseg(acceptor) == left1) {
+ for (p = spliceends_antisense; p != NULL; p = List_next(p)) {
+ hit = (Stage3end_T) List_head(p);
+ donor = Stage3end_substring_donor(hit);
+#ifdef LARGE_GENOMES
+ ambcoords = Uint8list_push(ambcoords,Substring_splicecoord(donor));
+#else
+ ambcoords = Uintlist_push(ambcoords,Substring_splicecoord(donor));
+#endif
+ amb_knowni = Intlist_push(amb_knowni,-1);
+ amb_nmismatches = Intlist_push(amb_nmismatches,Substring_nmismatches_whole(donor));
}
+ nmismatches_donor = best_nmismatches - Substring_nmismatches_whole(acceptor);
*ambiguous = List_push(*ambiguous,
(void *) Stage3end_new_splice(&(*found_score),
- /*nmismatches_donor*/0,/*nmismatches_acceptor*/Substring_nmismatches_whole(acceptor),
+ nmismatches_donor,/*nmismatches_acceptor*/Substring_nmismatches_whole(acceptor),
/*donor*/NULL,acceptor,/*distance*/0U,
- /*shortdistancep*/false,/*penalty*/0,querylength,
- /*amb_nmatches*/querylength - Substring_match_length_orig(acceptor),
- ambcoords_left,ambcoords_right,amb_knowni_left,amb_knowni_right,
- amb_nmismatches_left,amb_nmismatches_right,
+ /*shortdistancep*/false,/*penalty*/0,querylength,/*amb_nmatches*/Substring_nmatches_posttrim(donor),
+ ambcoords,/*ambcoords_acceptor*/NULL,
+ amb_knowni,/*amb_knowni_acceptor*/NULL,
+ amb_nmismatches,/*amb_nmismatches_acceptor*/NULL,
/*copy_donor_p*/false,/*copy_acceptor_p*/true,first_read_p,
sensedir,/*sarrayp*/true));
Intlist_free(&amb_nmismatches);
@@ -2682,11 +2769,11 @@ collect_elt_matches (int *found_score, List_T *subs, List_T *indels, List_T *amb
abort();
}
- for (p = spliceends; p != NULL; p = List_next(p)) {
+ for (p = spliceends_antisense; p != NULL; p = List_next(p)) {
hit = (Stage3end_T) List_head(p);
Stage3end_free(&hit);
}
- List_free(&spliceends);
+ List_free(&spliceends_antisense);
}
}
@@ -2710,7 +2797,7 @@ collect_elt_matches (int *found_score, List_T *subs, List_T *indels, List_T *amb
Uintlist_free(&difflist);
debug7(printf("Have %d matching diffs\n",n));
- spliceends = (List_T) NULL;
+ spliceends_sense = spliceends_antisense = (List_T) NULL;
lowprob = (List_T) NULL;
for (i = 0; i < n; i++) {
left1 = array[i];
@@ -2766,21 +2853,38 @@ collect_elt_matches (int *found_score, List_T *subs, List_T *indels, List_T *amb
segmentj_antidonor_knownpos[segmentj_antidonor_nknown] = MAX_READLENGTH;
/* nspliceends = 0; */
- spliceends = Splice_solve_single(&(*found_score),&nspliceends,spliceends,&lowprob,
- &segmenti_usedp,&segmentj_usedp,
- /*segmenti_left*/left1,/*segmentj_left*/left2,
- chrnum,chroffset,chrhigh,chrlength,
- chrnum,chroffset,chrhigh,chrlength,
- querylength,query_compress,
- segmenti_donor_knownpos,segmentj_acceptor_knownpos,
- segmentj_antidonor_knownpos,segmenti_antiacceptor_knownpos,
- segmenti_donor_knowni,segmentj_acceptor_knowni,
- segmentj_antidonor_knowni,segmenti_antiacceptor_knowni,
- segmenti_donor_nknown,segmentj_acceptor_nknown,
- segmentj_antidonor_nknown,segmenti_antiacceptor_nknown,
- splicing_penalty,/*max_mismatches_allowed*/1000,
- plusp,genestrand,first_read_p,/*subs_or_indels_p*/false,
- /*sarrayp*/true);
+ spliceends_sense =
+ Splice_solve_single_sense(&(*found_score),&nspliceends_sense,spliceends_sense,&lowprob,
+ &segmenti_usedp,&segmentj_usedp,
+ /*segmenti_left*/left1,/*segmentj_left*/left2,
+ chrnum,chroffset,chrhigh,chrlength,
+ chrnum,chroffset,chrhigh,chrlength,
+ querylength,query_compress,
+ segmenti_donor_knownpos,segmentj_acceptor_knownpos,
+ segmentj_antidonor_knownpos,segmenti_antiacceptor_knownpos,
+ segmenti_donor_knowni,segmentj_acceptor_knowni,
+ segmentj_antidonor_knowni,segmenti_antiacceptor_knowni,
+ segmenti_donor_nknown,segmentj_acceptor_nknown,
+ segmentj_antidonor_nknown,segmenti_antiacceptor_nknown,
+ splicing_penalty,/*max_mismatches_allowed*/1000,
+ plusp,genestrand,first_read_p,/*subs_or_indels_p*/false,
+ /*sarrayp*/true);
+ spliceends_antisense =
+ Splice_solve_single_antisense(&(*found_score),&nspliceends_antisense,spliceends_antisense,&lowprob,
+ &segmenti_usedp,&segmentj_usedp,
+ /*segmenti_left*/left1,/*segmentj_left*/left2,
+ chrnum,chroffset,chrhigh,chrlength,
+ chrnum,chroffset,chrhigh,chrlength,
+ querylength,query_compress,
+ segmenti_donor_knownpos,segmentj_acceptor_knownpos,
+ segmentj_antidonor_knownpos,segmenti_antiacceptor_knownpos,
+ segmenti_donor_knowni,segmentj_acceptor_knowni,
+ segmentj_antidonor_knowni,segmenti_antiacceptor_knowni,
+ segmenti_donor_nknown,segmentj_acceptor_nknown,
+ segmentj_antidonor_nknown,segmenti_antiacceptor_nknown,
+ splicing_penalty,/*max_mismatches_allowed*/1000,
+ plusp,genestrand,first_read_p,/*subs_or_indels_p*/false,
+ /*sarrayp*/true);
} else if (left2 > left1) {
nindels = left2 - left1;
@@ -2864,10 +2968,13 @@ collect_elt_matches (int *found_score, List_T *subs, List_T *indels, List_T *amb
}
}
- if (spliceends != NULL) {
+ if (spliceends_sense != NULL) {
/* nmismatches should be the same for all spliceends, so pick based on prob */
+ hit = (Stage3end_T) List_head(spliceends_sense);
+ best_nmismatches = Stage3end_nmismatches_whole(hit);
+
best_prob = 0.0;
- for (p = spliceends; p != NULL; p = List_next(p)) {
+ for (p = spliceends_sense; p != NULL; p = List_next(p)) {
hit = (Stage3end_T) List_head(p);
debug7(printf("analyzing distance %d, probabilities %f and %f\n",
Stage3end_distance(hit),Substring_chimera_prob(Stage3end_substring_donor(hit)),
@@ -2878,7 +2985,7 @@ collect_elt_matches (int *found_score, List_T *subs, List_T *indels, List_T *amb
}
n_good_spliceends = 0;
- for (p = spliceends; p != NULL; p = List_next(p)) {
+ for (p = spliceends_sense; p != NULL; p = List_next(p)) {
hit = (Stage3end_T) List_head(p);
if (Stage3end_chimera_prob(hit) > best_prob - LOCALSPLICING_SLOP) {
debug7(printf("accepting distance %d, probabilities %f and %f\n",
@@ -2889,7 +2996,7 @@ collect_elt_matches (int *found_score, List_T *subs, List_T *indels, List_T *amb
}
if (n_good_spliceends == 1) {
- for (p = spliceends; p != NULL; p = List_next(p)) {
+ for (p = spliceends_sense; p != NULL; p = List_next(p)) {
hit = (Stage3end_T) List_head(p);
if (Stage3end_chimera_prob(hit) == best_prob) {
debug7(printf("pushing distance %d, probabilities %f and %f\n",
@@ -2901,11 +3008,11 @@ collect_elt_matches (int *found_score, List_T *subs, List_T *indels, List_T *amb
Stage3end_free(&hit);
}
}
- List_free(&spliceends);
+ List_free(&spliceends_sense);
} else {
- /* Create ambiguous */
- hit = (Stage3end_T) List_head(spliceends);
+ /* Create ambiguous, sense */
+ hit = (Stage3end_T) List_head(spliceends_sense);
donor = Stage3end_substring_donor(hit);
acceptor = Stage3end_substring_acceptor(hit);
sensedir = Stage3end_sensedir(hit);
@@ -2913,8 +3020,9 @@ collect_elt_matches (int *found_score, List_T *subs, List_T *indels, List_T *amb
ambcoords = (Uintlist_T) NULL;
amb_knowni = (Intlist_T) NULL;
amb_nmismatches = (Intlist_T) NULL;
+
if (Substring_left_genomicseg(donor) == left2) {
- for (p = spliceends; p != NULL; p = List_next(p)) {
+ for (p = spliceends_sense; p != NULL; p = List_next(p)) {
hit = (Stage3end_T) List_head(p);
acceptor = Stage3end_substring_acceptor(hit);
#ifdef LARGE_GENOMES
@@ -2926,44 +3034,15 @@ collect_elt_matches (int *found_score, List_T *subs, List_T *indels, List_T *amb
amb_nmismatches = Intlist_push(amb_nmismatches,Substring_nmismatches_whole(acceptor));
}
- if (plusp == true) {
- if (sensedir == SENSE_FORWARD) {
- ambcoords_left = NULL;
- amb_knowni_left = amb_nmismatches_left = (Intlist_T) NULL;
- ambcoords_right = ambcoords;
- amb_knowni_right = amb_knowni;
- amb_nmismatches_right = amb_nmismatches;
- } else {
- ambcoords_left = ambcoords;
- amb_knowni_left = amb_knowni;
- amb_nmismatches_left = amb_nmismatches;
- ambcoords_right = NULL;
- amb_knowni_right = amb_nmismatches_right = (Intlist_T) NULL;
- }
- } else {
- if (sensedir == SENSE_FORWARD) {
- ambcoords_left = ambcoords;
- amb_knowni_left = amb_knowni;
- amb_nmismatches_left = amb_nmismatches;
- ambcoords_right = NULL;
- amb_knowni_right = amb_nmismatches_right = (Intlist_T) NULL;
- } else {
- ambcoords_left = NULL;
- amb_knowni_left = amb_nmismatches_left = (Intlist_T) NULL;
- ambcoords_right = ambcoords;
- amb_knowni_right = amb_knowni;
- amb_nmismatches_right = amb_nmismatches;
- }
- }
-
+ nmismatches_acceptor = best_nmismatches - Substring_nmismatches_whole(donor);
*ambiguous = List_push(*ambiguous,
(void *) Stage3end_new_splice(&(*found_score),
- /*nmismatches_donor*/Substring_nmismatches_whole(donor),/*nmismatches_acceptor*/0,
+ /*nmismatches_donor*/Substring_nmismatches_whole(donor),nmismatches_acceptor,
donor,/*acceptor*/NULL,/*distance*/0U,
- /*shortdistancep*/false,/*penalty*/0,querylength,
- /*amb_nmatches*/querylength - Substring_match_length_orig(donor),
- ambcoords_left,ambcoords_right,amb_knowni_left,amb_knowni_right,
- amb_nmismatches_left,amb_nmismatches_right,
+ /*shortdistancep*/false,/*penalty*/0,querylength,/*amb_nmatches*/Substring_nmatches_posttrim(acceptor),
+ /*ambcoords_donor*/NULL,ambcoords,
+ /*amb_knowni_donor*/NULL,amb_knowni,
+ /*amb_nmismatches_donor*/NULL,amb_nmismatches,
/*copy_donor_p*/true,/*copy_acceptor_p*/false,first_read_p,
sensedir,/*sarrayp*/true));
Intlist_free(&amb_nmismatches);
@@ -2971,7 +3050,7 @@ collect_elt_matches (int *found_score, List_T *subs, List_T *indels, List_T *amb
Uintlist_free(&ambcoords); /* LARGE_GENOMES not possible with suffix array */
} else if (Substring_left_genomicseg(acceptor) == left2) {
- for (p = spliceends; p != NULL; p = List_next(p)) {
+ for (p = spliceends_sense; p != NULL; p = List_next(p)) {
hit = (Stage3end_T) List_head(p);
donor = Stage3end_substring_donor(hit);
#ifdef LARGE_GENOMES
@@ -2983,44 +3062,138 @@ collect_elt_matches (int *found_score, List_T *subs, List_T *indels, List_T *amb
amb_nmismatches = Intlist_push(amb_nmismatches,Substring_nmismatches_whole(donor));
}
- if (plusp == true) {
- if (sensedir == SENSE_FORWARD) {
- ambcoords_left = ambcoords;
- amb_knowni_left = amb_knowni;
- amb_nmismatches_left = amb_nmismatches;
- ambcoords_right = NULL;
- amb_knowni_right = amb_nmismatches_right = (Intlist_T) NULL;
- } else {
- ambcoords_left = NULL;
- amb_knowni_left = amb_nmismatches_left = (Intlist_T) NULL;
- ambcoords_right = ambcoords;
- amb_knowni_right = amb_knowni;
- amb_nmismatches_right = amb_nmismatches;
- }
+ nmismatches_donor = best_nmismatches - Substring_nmismatches_whole(acceptor);
+ *ambiguous = List_push(*ambiguous,
+ (void *) Stage3end_new_splice(&(*found_score),
+ nmismatches_donor,/*nmismatches_acceptor*/Substring_nmismatches_whole(acceptor),
+ /*donor*/NULL,acceptor,/*distance*/0U,
+ /*shortdistancep*/false,/*penalty*/0,querylength,/*amb_nmatches*/Substring_nmatches_posttrim(donor),
+ ambcoords,/*ambcoords_acceptor*/NULL,
+ amb_knowni,/*amb_known_acceptor*/NULL,
+ amb_nmismatches,/*amb_nmismatches_acceptor*/NULL,
+ /*copy_donor_p*/false,/*copy_acceptor_p*/true,first_read_p,
+ sensedir,/*sarrayp*/true));
+ Intlist_free(&amb_nmismatches);
+ Intlist_free(&amb_knowni);
+ Uintlist_free(&ambcoords); /* LARGE_GENOMES not possible with suffix array */
+
+ } else {
+ fprintf(stderr,"Unexpected: Neither donor left %u nor acceptor left %u equals left2 %u\n",
+ Substring_left_genomicseg(donor),Substring_left_genomicseg(acceptor),left2);
+ abort();
+ }
+
+ for (p = spliceends_sense; p != NULL; p = List_next(p)) {
+ hit = (Stage3end_T) List_head(p);
+ Stage3end_free(&hit);
+ }
+ List_free(&spliceends_sense);
+ }
+ }
+
+ if (spliceends_antisense != NULL) {
+ /* nmismatches should be the same for all spliceends, so pick based on prob */
+ hit = (Stage3end_T) List_head(spliceends_antisense);
+ best_nmismatches = Stage3end_nmismatches_whole(hit);
+
+ best_prob = 0.0;
+ for (p = spliceends_antisense; p != NULL; p = List_next(p)) {
+ hit = (Stage3end_T) List_head(p);
+ debug7(printf("analyzing distance %d, probabilities %f and %f\n",
+ Stage3end_distance(hit),Substring_chimera_prob(Stage3end_substring_donor(hit)),
+ Substring_chimera_prob(Stage3end_substring_acceptor(hit))));
+ if ((prob = Stage3end_chimera_prob(hit)) > best_prob) {
+ best_prob = prob;
+ }
+ }
+
+ n_good_spliceends = 0;
+ for (p = spliceends_antisense; p != NULL; p = List_next(p)) {
+ hit = (Stage3end_T) List_head(p);
+ if (Stage3end_chimera_prob(hit) > best_prob - LOCALSPLICING_SLOP) {
+ debug7(printf("accepting distance %d, probabilities %f and %f\n",
+ Stage3end_distance(hit),Substring_chimera_prob(Stage3end_substring_donor(hit)),
+ Substring_chimera_prob(Stage3end_substring_acceptor(hit))));
+ n_good_spliceends += 1;
+ }
+ }
+
+ if (n_good_spliceends == 1) {
+ for (p = spliceends_antisense; p != NULL; p = List_next(p)) {
+ hit = (Stage3end_T) List_head(p);
+ if (Stage3end_chimera_prob(hit) == best_prob) {
+ debug7(printf("pushing distance %d, probabilities %f and %f\n",
+ Stage3end_distance(hit),Substring_chimera_prob(Stage3end_substring_donor(hit)),
+ Substring_chimera_prob(Stage3end_substring_acceptor(hit))));
+ *singlesplicing = List_push(*singlesplicing,(void *) hit);
+ nhits += 1;
} else {
- if (sensedir == SENSE_FORWARD) {
- ambcoords_left = NULL;
- amb_knowni_left = amb_nmismatches_left = (Intlist_T) NULL;
- ambcoords_right = ambcoords;
- amb_knowni_right = amb_knowni;
- amb_nmismatches_right = amb_nmismatches;
- } else {
- ambcoords_left = ambcoords;
- amb_knowni_left = amb_knowni;
- amb_nmismatches_left = amb_nmismatches;
- ambcoords_right = NULL;
- amb_knowni_right = amb_nmismatches_right = (Intlist_T) NULL;
- }
+ Stage3end_free(&hit);
+ }
+ }
+ List_free(&spliceends_antisense);
+
+ } else {
+ /* Create ambiguous, antisense */
+ hit = (Stage3end_T) List_head(spliceends_antisense);
+ donor = Stage3end_substring_donor(hit);
+ acceptor = Stage3end_substring_acceptor(hit);
+ sensedir = Stage3end_sensedir(hit);
+
+ ambcoords = (Uintlist_T) NULL;
+ amb_knowni = (Intlist_T) NULL;
+ amb_nmismatches = (Intlist_T) NULL;
+
+ if (Substring_left_genomicseg(donor) == left2) {
+ for (p = spliceends_antisense; p != NULL; p = List_next(p)) {
+ hit = (Stage3end_T) List_head(p);
+ acceptor = Stage3end_substring_acceptor(hit);
+#ifdef LARGE_GENOMES
+ ambcoords = Uint8list_push(ambcoords,Substring_splicecoord(acceptor));
+#else
+ ambcoords = Uintlist_push(ambcoords,Substring_splicecoord(acceptor));
+#endif
+ amb_knowni = Intlist_push(amb_knowni,-1);
+ amb_nmismatches = Intlist_push(amb_nmismatches,Substring_nmismatches_whole(acceptor));
+ }
+
+ nmismatches_acceptor = best_nmismatches - Substring_nmismatches_whole(donor);
+ *ambiguous = List_push(*ambiguous,
+ (void *) Stage3end_new_splice(&(*found_score),
+ /*nmismatches_donor*/Substring_nmismatches_whole(donor),nmismatches_acceptor,
+ donor,/*acceptor*/NULL,/*distance*/0U,
+ /*shortdistancep*/false,/*penalty*/0,querylength,/*amb_nmatches*/Substring_nmatches_posttrim(acceptor),
+ /*ambcoords_donor*/NULL,ambcoords,
+ /*amb_knowni_donor*/NULL,amb_knowni,
+ /*amb_nmismatches_donor*/NULL,amb_nmismatches,
+ /*copy_donor_p*/true,/*copy_acceptor_p*/false,first_read_p,
+ sensedir,/*sarrayp*/true));
+ Intlist_free(&amb_nmismatches);
+ Intlist_free(&amb_knowni);
+ Uintlist_free(&ambcoords); /* LARGE_GENOMES not possible with suffix array */
+
+ } else if (Substring_left_genomicseg(acceptor) == left2) {
+ for (p = spliceends_antisense; p != NULL; p = List_next(p)) {
+ hit = (Stage3end_T) List_head(p);
+ donor = Stage3end_substring_donor(hit);
+#ifdef LARGE_GENOMES
+ ambcoords = Uint8list_push(ambcoords,Substring_splicecoord(donor));
+#else
+ ambcoords = Uintlist_push(ambcoords,Substring_splicecoord(donor));
+#endif
+ amb_knowni = Intlist_push(amb_knowni,-1);
+ amb_nmismatches = Intlist_push(amb_nmismatches,Substring_nmismatches_whole(donor));
}
+ nmismatches_donor = best_nmismatches - Substring_nmismatches_whole(acceptor);
*ambiguous = List_push(*ambiguous,
(void *) Stage3end_new_splice(&(*found_score),
- /*nmismatches_donor*/0,/*nmismatches_acceptor*/Substring_nmismatches_whole(acceptor),
+ nmismatches_donor,/*nmismatches_acceptor*/Substring_nmismatches_whole(acceptor),
/*donor*/NULL,acceptor,/*distance*/0U,
- /*shortdistancep*/false,/*penalty*/0,querylength,
- /*amb_nmatches*/querylength - Substring_match_length_orig(acceptor),
- ambcoords_left,ambcoords_right,amb_knowni_left,amb_knowni_right,
- amb_nmismatches_left,amb_nmismatches_right,
+ /*shortdistancep*/false,/*penalty*/0,querylength,/*amb_nmatches*/Substring_nmatches_posttrim(donor),
+ ambcoords,/*ambcoords_acceptor*/NULL,
+ amb_knowni,/*amb_known_acceptor*/NULL,
+ amb_nmismatches,/*amb_nmismatches_acceptor*/NULL,
/*copy_donor_p*/false,/*copy_acceptor_p*/true,first_read_p,
sensedir,/*sarrayp*/true));
Intlist_free(&amb_nmismatches);
@@ -3033,14 +3206,15 @@ collect_elt_matches (int *found_score, List_T *subs, List_T *indels, List_T *amb
abort();
}
- for (p = spliceends; p != NULL; p = List_next(p)) {
+ for (p = spliceends_antisense; p != NULL; p = List_next(p)) {
hit = (Stage3end_T) List_head(p);
Stage3end_free(&hit);
}
- List_free(&spliceends);
+ List_free(&spliceends_antisense);
}
}
+
/* Don't use lowprob in suffix array stage */
debug7(printf("freeing lowprobs\n"));
for (p = lowprob; p != NULL; p = List_next(p)) {
@@ -3087,35 +3261,22 @@ Sarray_search_greedy (int *found_score, List_T *subs, List_T *indels, List_T *am
}
debug(printf("\nStarting Sarray_search_greedy with querylength %d and indexsize %d and nmisses_allowed %d\n",
querylength,sarray_fwd->indexsize,nmisses_allowed));
+ debug(printf("genestrand = %d\n",genestrand));
*found_score = querylength;
if (genestrand == +2) {
- if (first_read_p == false) {
- plus_conversion = conversion_fwd;
- minus_conversion = conversion_rev;
- plus_sarray = sarray_fwd;
- minus_sarray = sarray_rev;
- } else {
- plus_conversion = conversion_rev;
- minus_conversion = conversion_fwd;
- plus_sarray = sarray_rev;
- minus_sarray = sarray_fwd;
- }
+ plus_conversion = conversion_rev;
+ minus_conversion = conversion_fwd;
+ plus_sarray = sarray_rev;
+ minus_sarray = sarray_fwd;
} else {
- if (first_read_p == true) {
- plus_conversion = conversion_fwd;
- minus_conversion = conversion_rev;
- plus_sarray = sarray_fwd;
- minus_sarray = sarray_rev;
- } else {
- plus_conversion = conversion_rev;
- minus_conversion = conversion_fwd;
- plus_sarray = sarray_rev;
- minus_sarray = sarray_fwd;
- }
+ plus_conversion = conversion_fwd;
+ minus_conversion = conversion_rev;
+ plus_sarray = sarray_fwd;
+ minus_sarray = sarray_rev;
}
-
+
/* Do one plus round */
plus_querypos = 0;
sarray_search(&initptr,&finalptr,&successp,&best_plus_nmatches,&(queryuc_ptr[plus_querypos]),
diff --git a/src/sarray-write.c b/src/sarray-write.c
index 6b0f9ef..5c2aa6e 100644
--- a/src/sarray-write.c
+++ b/src/sarray-write.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: sarray-write.c 133760 2014-04-20 05:16:56Z twu $";
+static char rcsid[] = "$Id: sarray-write.c 140591 2014-07-03 16:08:23Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -662,8 +662,9 @@ Sarray_write_index_interleaved (char *indexptrsfile, char *indexcompfile,
}
+#if 0
UINT4 *
-Sarray_compute_lcp (UINT4 *SA, UINT4 n) {
+Sarray_compute_lcp_kasai (UINT4 *SA, UINT4 n) {
UINT4 *lcp;
UINT4 *rank, h;
UINT4 i, j;
@@ -679,6 +680,14 @@ Sarray_compute_lcp (UINT4 *SA, UINT4 n) {
rank[SA[i]] = i;
}
+#if 0
+ /* Used for comparison with Manzini */
+ for (i = 0; i <= n; i++) {
+ printf("%u %u\n",i,rank[i]);
+ }
+ printf("End of Kasai\n\n");
+#endif
+
lcp[0] = 0; /* -1 ? */
h = 0;
for (i = 0; i <= n; i++) {
@@ -711,8 +720,325 @@ Sarray_compute_lcp (UINT4 *SA, UINT4 n) {
return lcp;
}
+#endif
+
+
+#if 0
+/* Puts rank in file, to save on memory */
+UINT4 *
+Sarray_compute_lcp (char *rankfile, UINT4 *SA, UINT4 n) {
+ UINT4 *lcp;
+ UINT4 *rank, rank_i, h;
+ UINT4 i, j;
+ char *comma;
+
+ FILE *fp;
+#ifdef DEBUG14
+ UINT4 horig;
+#endif
+
+ /* Compute rank and store in temporary file */
+ rank = (UINT4 *) MALLOC((n+1)*sizeof(UINT4));
+ for (i = 0; i <= n; i++) {
+ rank[SA[i]] = i;
+ }
+
+ fprintf(stderr,"Writing temporary file %s...",rankfile);
+ fp = fopen(rankfile,"w");
+ for (i = 0; i + FWRITE_BATCH <= n; i += FWRITE_BATCH) {
+ fwrite((void *) &(rank[i]),sizeof(UINT4),FWRITE_BATCH,fp);
+ }
+
+ if (i <= n) {
+ fwrite((void *) &(rank[i]),sizeof(UINT4),n - i + 1,fp);
+ }
+ fclose(fp);
+ FREE(rank);
+ fprintf(stderr,"done\n");
+
+
+ /* Now allocate memory for lcp */
+ fp = fopen(rankfile,"r");
+
+ lcp = (UINT4 *) MALLOC((n+1)*sizeof(UINT4));
+
+ lcp[0] = 0; /* -1 ? */
+ h = 0;
+ for (i = 0; i <= n; i++) {
+ FREAD_UINT(&rank_i,fp);
+ if (rank_i > 0) {
+ j = SA[rank_i - 1];
+
+ h += Genome_consecutive_matches_pair(i+h,j+h,/*genomelength*/n);
+
+ lcp[rank_i] = h;
+ if (h > 0) {
+ h--;
+ }
+ }
+ if (i % MONITOR_INTERVAL == 0) {
+ comma = Genomicpos_commafmt(i);
+ fprintf(stderr,"Computing lcp index %s\n",comma);
+ FREE(comma);
+ }
+ }
+ fclose(fp);
+
+ remove(rankfile);
+
+ return lcp;
+}
+#endif
+
+
+#define RW_BATCH 10000000 /* 10 million elements */
+
+/* Puts rank and permuted suffix array in file, to save on memory even further */
+UINT4 *
+Sarray_compute_lcp (char *rankfile, char *permuted_sarray_file, char *sarrayfile, UINT4 n) {
+ UINT4 *lcp;
+ UINT4 *SA, SA_i, zero = 0;
+ UINT4 *rank, rank_i, h;
+ UINT4 i, ii, b, j;
+ char *comma;
+ UINT4 *read_buffer_1, *read_buffer_2, *write_buffer;
+ void *p;
+
+ int sa_fd;
+ size_t sa_len;
+ FILE *fp, *permsa_fp;
+
+
+ read_buffer_1 = (UINT4 *) MALLOC(RW_BATCH * sizeof(UINT4));
+
+ /* Compute rank */
+ fp = fopen(sarrayfile,"rb");
+ rank = (UINT4 *) MALLOC((n+1)*sizeof(UINT4));
+
+ for (ii = 0; ii + RW_BATCH <= n; ii += RW_BATCH) {
+ FREAD_UINTS(read_buffer_1,RW_BATCH,fp);
+ for (b = 0, i = ii; b < RW_BATCH; b++, i++) {
+ rank[read_buffer_1[b]] = i; /* rank[SA_i] = i; */
+ }
+ if (ii % MONITOR_INTERVAL == 0) {
+ comma = Genomicpos_commafmt(ii);
+ fprintf(stderr,"Computing rank %s\n",comma);
+ FREE(comma);
+ }
+ }
+ for (i = ii; i <= n; i++) { /* final partial batch */
+ FREAD_UINT(&SA_i,fp);
+ rank[SA_i] = i;
+ }
+
+ fclose(fp); /* sarrayfile */
+
+
+ /* Store rank in temporary file */
+ fprintf(stderr,"Writing temporary file for rank...");
+ fp = fopen(rankfile,"wb");
+ for (ii = 0; ii + RW_BATCH <= n; ii += RW_BATCH) {
+ p = (void *) &(rank[ii]);
+ FWRITE_UINTS(p,RW_BATCH,fp);
+ }
+ if (ii <= n) {
+ p = (void *) &(rank[ii]);
+ FWRITE_UINTS(p,n - ii + 1,fp);
+ }
+ fclose(fp); /* rankfile */
+ FREE(rank);
+ fprintf(stderr,"done\n");
+
+
+ /* Write permuted sarray */
+ fprintf(stderr,"Writing temporary file for permuted sarray...");
+ write_buffer = (UINT4 *) MALLOC(RW_BATCH * sizeof(UINT4));
+ fp = fopen(rankfile,"rb");
+ permsa_fp = fopen(permuted_sarray_file,"wb");
+ SA = (UINT4 *) Access_mmap(&sa_fd,&sa_len,sarrayfile,sizeof(UINT4),/*randomp*/false);
+
+ for (ii = 0; ii + RW_BATCH <= n; ii += RW_BATCH) {
+ FREAD_UINTS(read_buffer_1,RW_BATCH,fp);
+ for (b = 0, i = ii; b < RW_BATCH; b++, i++) {
+ rank_i = read_buffer_1[b];
+ if (rank_i > 0) {
+ write_buffer[b] = SA[rank_i - 1];
+ } else {
+ write_buffer[b] = 0; /* Will be ignored */
+ }
+ }
+ FWRITE_UINTS(write_buffer,RW_BATCH,permsa_fp);
+ }
+ for (i = ii; i <= n; i++) { /* final partial batch */
+ FREAD_UINT(&rank_i,fp);
+ if (rank_i > 0) {
+ FWRITE_UINT(SA[rank_i - 1],permsa_fp);
+ } else {
+ FWRITE_UINT(zero,permsa_fp); /* Will be ignored */
+ }
+ }
+
+ munmap((void *) SA,sa_len);
+ close(sa_fd);
+ fclose(permsa_fp); /* permuted_sarray_file */
+ fclose(fp); /* rankfile */
+ FREE(write_buffer);
+ fprintf(stderr,"done\n");
+
+
+ /* Now allocate memory for lcp and compute */
+ read_buffer_2 = (UINT4 *) MALLOC(RW_BATCH * sizeof(UINT4));
+ fp = fopen(rankfile,"rb");
+ permsa_fp = fopen(permuted_sarray_file,"rb");
+
+ lcp = (UINT4 *) MALLOC((n+1)*sizeof(UINT4));
+
+ lcp[0] = 0; /* -1 ? */
+ h = 0;
+
+ for (ii = 0; ii + RW_BATCH <= n; ii += RW_BATCH) {
+ FREAD_UINTS(read_buffer_1,RW_BATCH,fp);
+ FREAD_UINTS(read_buffer_2,RW_BATCH,permsa_fp);
+ for (b = 0, i = ii; b < RW_BATCH; b++, i++) {
+ rank_i = read_buffer_1[b];
+ j = read_buffer_2[b]; /* j = SA[rank_i - 1] */
+ if (rank_i > 0) {
+ h += Genome_consecutive_matches_pair(i+h,j+h,/*genomelength*/n);
+ lcp[rank_i] = h;
+ if (h > 0) {
+ h--;
+ }
+ }
+ }
+
+ if (ii % MONITOR_INTERVAL == 0) {
+ comma = Genomicpos_commafmt(ii);
+ fprintf(stderr,"Computing lcp index %s\n",comma);
+ FREE(comma);
+ }
+ }
+
+ for (i = ii; i <= n; i++) { /* final partial batch */
+ FREAD_UINT(&rank_i,fp);
+ FREAD_UINT(&j,permsa_fp); /* j = SA[rank_i - 1] */
+ if (rank_i > 0) {
+ h += Genome_consecutive_matches_pair(i+h,j+h,/*genomelength*/n);
+ lcp[rank_i] = h;
+ if (h > 0) {
+ h--;
+ }
+ }
+ }
+
+ fclose(permsa_fp); /* permuted_sarray_file */
+ fclose(fp); /* rankfile */
+ FREE(read_buffer_2);
+ FREE(read_buffer_1);
+
+ remove(permuted_sarray_file);
+ remove(rankfile);
+
+ return lcp;
+}
+
+
+#if 0
+/* Based on Manzini, 2004 */
+/* eos_pos: end-of-string pos? */
+static UINT4
+compute_next_rank (UINT4 *next_rank, UINT4 *SA, Genome_T genomecomp, UINT4 n, char *chartable) {
+ UINT4 eos_pos;
+ Univcoord_T nACGT, na, nc, ng, nt;
+ UINT4 i, j;
+ UINT4 count[5];
+ int numeric[128];
+ unsigned char c;
+ char *comma;
+
+ nACGT = Genome_ntcounts(&na,&nc,&ng,&nt,genomecomp,/*left*/0,/*length*/n);
+ fprintf(stderr,"Genome content: A %u, C %u, G %u, T %u, other %u\n",na,nc,ng,nt,n - nACGT);
+ count[0] = 0;
+ count[1] = na;
+ count[2] = na + nc;
+ count[3] = na + nc + ng;
+ count[4] = na + nc + ng + nt;
+
+ for (c = 0; c < 128; c++) {
+ numeric[c] = 4;
+ }
+ numeric['A'] = 0;
+ numeric['C'] = 1;
+ numeric['G'] = 2;
+ numeric['T'] = 3;
+
+ c = Genome_get_char_lex(genomecomp,/*pos*/n,n,chartable);
+ j = ++count[numeric[c]];
+ next_rank[j] = 0;
+
+ for (i = 1; i <= n; i++) {
+ if (SA[i] == 1) {
+ eos_pos = i;
+ } else {
+ c = Genome_get_char_lex(genomecomp,/*pos*/SA[i] - 1,n,chartable);
+ j = ++count[numeric[c]];
+ next_rank[j] = i;
+ }
+
+ if (i % MONITOR_INTERVAL == 0) {
+ comma = Genomicpos_commafmt(i);
+ fprintf(stderr,"Computing rank %s\n",comma);
+ FREE(comma);
+ }
+ }
+
+ for (i = 0; i <= n; i++) {
+ printf("%u %u\n",i,next_rank[i]);
+ }
+ printf("Returning %u\n",eos_pos);
+ exit(0);
+
+
+ return eos_pos;
+}
+
+
+/* Use rank_i instead of k, and next_rank_i instead of nextk */
+/* Buggy: Result differs from that obtained by Kasai procedure */
+UINT4 *
+Sarray_compute_lcp_manzini (UINT4 *SA, Genome_T genomecomp, UINT4 n, char *chartable) {
+ UINT4 *lcp;
+ UINT4 h, i, j, rank_i, next_rank_i;
+ char *comma;
+
+ lcp = (UINT4 *) MALLOC((n+1)*sizeof(UINT4));
+ rank_i = compute_next_rank(lcp,SA,genomecomp,n,chartable); /* Re-use lcp for next_rank */
+ h = 0;
+
+ for (i = 1; i <= n; i++) {
+ next_rank_i = lcp[rank_i];
+ if (rank_i > 0) {
+ j = SA[rank_i - 1];
+ h += Genome_consecutive_matches_pair(i+h,j+h,/*genomelength*/n);
+ lcp[rank_i] = h;
+ if (h > 0) {
+ h--;
+ }
+ }
+ rank_i = next_rank_i;
+
+ if (i % MONITOR_INTERVAL == 0) {
+ comma = Genomicpos_commafmt(i);
+ fprintf(stderr,"Computing lcp index %s\n",comma);
+ FREE(comma);
+ }
+ }
+
+ return lcp;
+}
+#endif
+/* Used by cmetindex and atoiindex */
UINT4 *
Sarray_compute_lcp_from_genome (UINT4 *SA, unsigned char *gbuffer, UINT4 n) {
UINT4 *lcp;
@@ -1056,66 +1382,77 @@ make_child_twopass (UINT8 **nextp, UINT4 *nbytes, UINT4 *SA, UINT4 *plcpptrs, UI
Uintlist_free(&indexstack);
return child;
- }
- #endif
+}
+#endif
+
+
+#if 0
+/* Reads SA one element at a time */
+/* For adjoining chars, need to store 15 possibilities, or one nibble */
+/* Possibilities: $a, $c, $g, $t, $x, ac, ag, at, ax, cg, ct, cx, gt, gx, tx */
+unsigned char *
+Sarray_discriminating_chars (UINT4 *nbytes, char *sarrayfile, Genome_T genome,
+ unsigned char *lcp_bytes, UINT4 *lcp_guide, UINT4 *lcp_exceptions, int guide_interval,
+ UINT4 n, char *chartable) {
+ unsigned char *discrim_chars;
+ char char_before, char_at;
+ UINT4 i;
+ UINT4 lcp_i;
+
+ FILE *fp;
+ UINT4 SA_i_minus_1, SA_i;
+
+ *nbytes = ((n+1) + 1)/2;
+ discrim_chars = (unsigned char *) CALLOC(*nbytes,sizeof(unsigned char));
- /* For adjoining chars, need to store 15 possibilities, or one nibble */
- /* Possibilities: $a, $c, $g, $t, $x, ac, ag, at, ax, cg, ct, cx, gt, gx, tx */
- unsigned char *
- Sarray_discriminating_chars (UINT4 *nbytes, UINT4 *SA, Genome_T genome,
- unsigned char *lcp_bytes, UINT4 *lcp_guide, UINT4 *lcp_exceptions, int guide_interval,
- UINT4 n, char *chartable) {
- unsigned char *discrim_chars;
- char char_before, char_at;
- UINT4 i;
- UINT4 lcp_i;
-
- *nbytes = ((n+1) + 1)/2;
- discrim_chars = (unsigned char *) CALLOC(*nbytes,sizeof(unsigned char));
-
- for (i = 1; i <= n; i++) {
- lcp_i = Bytecoding_read_wguide(i,lcp_bytes,lcp_guide,lcp_exceptions,/*lcp_guide_interval*/1024);
- char_before = Genome_get_char_lex(genome,/*left*/SA[i-1] + lcp_i,/*genomelength*/n,chartable);
- char_at = Genome_get_char_lex(genome,/*left*/SA[i] + lcp_i,/*genomelength*/n,chartable);
- debug4(printf("i = %u, SA = %u and %u, and lcp_i = %u => %c %c\n",
- i,SA[i-1],SA[i],lcp_i,char_before == 0 ? '$' : char_before,char_at));
-
- if (i % 2 == 0) {
- /* Even, put into low nibble of byte */
- switch (char_before) {
- case 0:
- switch(char_at) {
- case 'A': discrim_chars[i/2] |= 0x01; break;
- case 'C': discrim_chars[i/2] |= 0x02; break;
- case 'G': discrim_chars[i/2] |= 0x03; break;
- case 'T': discrim_chars[i/2] |= 0x04; break;
- case 'X': discrim_chars[i/2] |= 0x05; break;
- default: abort();
- }
- break;
-
- case 'A':
- switch(char_at) {
- case 'C': discrim_chars[i/2] |= 0x06; break;
- case 'G': discrim_chars[i/2] |= 0x07; break;
- case 'T': discrim_chars[i/2] |= 0x08; break;
- case 'X': discrim_chars[i/2] |= 0x09; break;
- default: abort();
- }
- break;
-
- case 'C':
- switch(char_at) {
- case 'G': discrim_chars[i/2] |= 0x0A; break;
- case 'T': discrim_chars[i/2] |= 0x0B; break;
- case 'X': discrim_chars[i/2] |= 0x0C; break;
- default: abort();
+ fp = fopen(sarrayfile,"r");
+ FREAD_UINT(&SA_i_minus_1,fp);
+
+ for (i = 1; i <= n; i++) {
+ FREAD_UINT(&SA_i,fp);
+
+ lcp_i = Bytecoding_read_wguide(i,lcp_bytes,lcp_guide,lcp_exceptions,/*lcp_guide_interval*/1024);
+ char_before = Genome_get_char_lex(genome,/*left: SA[i-1]*/SA_i_minus_1 + lcp_i,/*genomelength*/n,chartable);
+ char_at = Genome_get_char_lex(genome,/*left: SA[i]*/SA_i + lcp_i,/*genomelength*/n,chartable);
+ debug4(printf("i = %u, SA = %u and %u, and lcp_i = %u => %c %c\n",
+ i,SA_i_minus_1,SA_i,lcp_i,char_before == 0 ? '$' : char_before,char_at));
+
+ if (i % 2 == 0) {
+ /* Even, put into low nibble of byte */
+ switch (char_before) {
+ case 0:
+ switch (char_at) {
+ case 'A': discrim_chars[i/2] |= 0x01; break;
+ case 'C': discrim_chars[i/2] |= 0x02; break;
+ case 'G': discrim_chars[i/2] |= 0x03; break;
+ case 'T': discrim_chars[i/2] |= 0x04; break;
+ case 'X': discrim_chars[i/2] |= 0x05; break;
+ default: abort();
+ }
+ break;
+
+ case 'A':
+ switch (char_at) {
+ case 'C': discrim_chars[i/2] |= 0x06; break;
+ case 'G': discrim_chars[i/2] |= 0x07; break;
+ case 'T': discrim_chars[i/2] |= 0x08; break;
+ case 'X': discrim_chars[i/2] |= 0x09; break;
+ default: abort();
+ }
+ break;
+
+ case 'C':
+ switch (char_at) {
+ case 'G': discrim_chars[i/2] |= 0x0A; break;
+ case 'T': discrim_chars[i/2] |= 0x0B; break;
+ case 'X': discrim_chars[i/2] |= 0x0C; break;
+ default: abort();
}
break;
case 'G':
- switch(char_at) {
+ switch (char_at) {
case 'T': discrim_chars[i/2] |= 0x0D; break;
case 'X': discrim_chars[i/2] |= 0x0E; break;
default: abort();
@@ -1123,7 +1460,7 @@ make_child_twopass (UINT8 **nextp, UINT4 *nbytes, UINT4 *SA, UINT4 *plcpptrs, UI
break;
case 'T':
- switch(char_at) {
+ switch (char_at) {
case 'X': discrim_chars[i/2] |= 0x0F; break;
default: abort();
}
@@ -1134,7 +1471,7 @@ make_child_twopass (UINT8 **nextp, UINT4 *nbytes, UINT4 *SA, UINT4 *plcpptrs, UI
/* Odd, put into high nibble of byte */
switch (char_before) {
case 0:
- switch(char_at) {
+ switch (char_at) {
case 'A': discrim_chars[i/2] |= 0x10; break;
case 'C': discrim_chars[i/2] |= 0x20; break;
case 'G': discrim_chars[i/2] |= 0x30; break;
@@ -1145,7 +1482,7 @@ make_child_twopass (UINT8 **nextp, UINT4 *nbytes, UINT4 *SA, UINT4 *plcpptrs, UI
break;
case 'A':
- switch(char_at) {
+ switch (char_at) {
case 'C': discrim_chars[i/2] |= 0x60; break;
case 'G': discrim_chars[i/2] |= 0x70; break;
case 'T': discrim_chars[i/2] |= 0x80; break;
@@ -1155,7 +1492,7 @@ make_child_twopass (UINT8 **nextp, UINT4 *nbytes, UINT4 *SA, UINT4 *plcpptrs, UI
break;
case 'C':
- switch(char_at) {
+ switch (char_at) {
case 'G': discrim_chars[i/2] |= 0xA0; break;
case 'T': discrim_chars[i/2] |= 0xB0; break;
case 'X': discrim_chars[i/2] |= 0xC0; break;
@@ -1164,7 +1501,7 @@ make_child_twopass (UINT8 **nextp, UINT4 *nbytes, UINT4 *SA, UINT4 *plcpptrs, UI
break;
case 'G':
- switch(char_at) {
+ switch (char_at) {
case 'T': discrim_chars[i/2] |= 0xD0; break;
case 'X': discrim_chars[i/2] |= 0xE0; break;
default: abort();
@@ -1172,19 +1509,290 @@ make_child_twopass (UINT8 **nextp, UINT4 *nbytes, UINT4 *SA, UINT4 *plcpptrs, UI
break;
case 'T':
- switch(char_at) {
+ switch (char_at) {
case 'X': discrim_chars[i/2] |= 0xF0; break;
default: abort();
}
break;
}
-
}
+
+ SA_i_minus_1 = SA_i;
}
+ fclose(fp);
+
return discrim_chars;
}
+#endif
+
+
+/* Reads SA in batches for faster I/O */
+/* For adjoining chars, need to store 15 possibilities, or one nibble */
+/* Possibilities: $a, $c, $g, $t, $x, ac, ag, at, ax, cg, ct, cx, gt, gx, tx */
+unsigned char *
+Sarray_discriminating_chars (UINT4 *nbytes, char *sarrayfile, Genome_T genome,
+ unsigned char *lcp_bytes, UINT4 *lcp_guide, UINT4 *lcp_exceptions, int guide_interval,
+ UINT4 n, char *chartable) {
+ unsigned char *discrim_chars;
+ char char_before, char_at;
+ UINT4 i, ii, b;
+ UINT4 lcp_i;
+ char *comma;
+
+ FILE *fp;
+ UINT4 *read_buffer;
+ UINT4 SA_i_minus_1, SA_i;
+
+
+ *nbytes = ((n+1) + 1)/2;
+ discrim_chars = (unsigned char *) CALLOC(*nbytes,sizeof(unsigned char));
+
+
+ read_buffer = (UINT4 *) MALLOC(RW_BATCH * sizeof(UINT4));
+
+ fp = fopen(sarrayfile,"rb");
+ FREAD_UINT(&SA_i_minus_1,fp); /* Initializes SA[0] */
+
+ for (ii = 1; ii + RW_BATCH <= n; ii += RW_BATCH) {
+ FREAD_UINTS(read_buffer,RW_BATCH,fp);
+
+ for (b = 0, i = ii; b < RW_BATCH; b++, i++) {
+ SA_i = read_buffer[b];
+
+ lcp_i = Bytecoding_read_wguide(i,lcp_bytes,lcp_guide,lcp_exceptions,/*lcp_guide_interval*/1024);
+ char_before = Genome_get_char_lex(genome,/*left: SA[i-1]*/SA_i_minus_1 + lcp_i,/*genomelength*/n,chartable);
+ char_at = Genome_get_char_lex(genome,/*left: SA[i]*/SA_i + lcp_i,/*genomelength*/n,chartable);
+ debug4(printf("i = %u, SA = %u and %u, and lcp_i = %u => %c %c\n",
+ i,SA_i_minus_1,SA_i,lcp_i,char_before == 0 ? '$' : char_before,char_at));
+
+ if (i % 2 == 0) {
+ /* Even, put into low nibble of byte */
+ switch (char_before) {
+ case 0:
+ switch (char_at) {
+ case 'A': discrim_chars[i/2] |= 0x01; break;
+ case 'C': discrim_chars[i/2] |= 0x02; break;
+ case 'G': discrim_chars[i/2] |= 0x03; break;
+ case 'T': discrim_chars[i/2] |= 0x04; break;
+ case 'X': discrim_chars[i/2] |= 0x05; break;
+ default: abort();
+ }
+ break;
+
+ case 'A':
+ switch (char_at) {
+ case 'C': discrim_chars[i/2] |= 0x06; break;
+ case 'G': discrim_chars[i/2] |= 0x07; break;
+ case 'T': discrim_chars[i/2] |= 0x08; break;
+ case 'X': discrim_chars[i/2] |= 0x09; break;
+ default: abort();
+ }
+ break;
+
+ case 'C':
+ switch (char_at) {
+ case 'G': discrim_chars[i/2] |= 0x0A; break;
+ case 'T': discrim_chars[i/2] |= 0x0B; break;
+ case 'X': discrim_chars[i/2] |= 0x0C; break;
+ default: abort();
+ }
+ break;
+
+ case 'G':
+ switch (char_at) {
+ case 'T': discrim_chars[i/2] |= 0x0D; break;
+ case 'X': discrim_chars[i/2] |= 0x0E; break;
+ default: abort();
+ }
+ break;
+
+ case 'T':
+ switch (char_at) {
+ case 'X': discrim_chars[i/2] |= 0x0F; break;
+ default: abort();
+ }
+ break;
+ }
+
+ } else {
+ /* Odd, put into high nibble of byte */
+ switch (char_before) {
+ case 0:
+ switch (char_at) {
+ case 'A': discrim_chars[i/2] |= 0x10; break;
+ case 'C': discrim_chars[i/2] |= 0x20; break;
+ case 'G': discrim_chars[i/2] |= 0x30; break;
+ case 'T': discrim_chars[i/2] |= 0x40; break;
+ case 'X': discrim_chars[i/2] |= 0x50; break;
+ default: abort();
+ }
+ break;
+
+ case 'A':
+ switch (char_at) {
+ case 'C': discrim_chars[i/2] |= 0x60; break;
+ case 'G': discrim_chars[i/2] |= 0x70; break;
+ case 'T': discrim_chars[i/2] |= 0x80; break;
+ case 'X': discrim_chars[i/2] |= 0x90; break;
+ default: abort();
+ }
+ break;
+
+ case 'C':
+ switch (char_at) {
+ case 'G': discrim_chars[i/2] |= 0xA0; break;
+ case 'T': discrim_chars[i/2] |= 0xB0; break;
+ case 'X': discrim_chars[i/2] |= 0xC0; break;
+ default: abort();
+ }
+ break;
+
+ case 'G':
+ switch (char_at) {
+ case 'T': discrim_chars[i/2] |= 0xD0; break;
+ case 'X': discrim_chars[i/2] |= 0xE0; break;
+ default: abort();
+ }
+ break;
+
+ case 'T':
+ switch (char_at) {
+ case 'X': discrim_chars[i/2] |= 0xF0; break;
+ default: abort();
+ }
+ break;
+ }
+ }
+
+ SA_i_minus_1 = SA_i;
+ }
+
+ /* Need (ii - 1) because we start with ii = 1 */
+ if ((ii - 1) % MONITOR_INTERVAL == 0) {
+ comma = Genomicpos_commafmt(ii-1);
+ fprintf(stderr,"Computing DC array %s\n",comma);
+ FREE(comma);
+ }
+ }
+
+ for (i = ii; i <= n; i++) {
+ FREAD_UINT(&SA_i,fp);
+
+ lcp_i = Bytecoding_read_wguide(i,lcp_bytes,lcp_guide,lcp_exceptions,/*lcp_guide_interval*/1024);
+ char_before = Genome_get_char_lex(genome,/*left: SA[i-1]*/SA_i_minus_1 + lcp_i,/*genomelength*/n,chartable);
+ char_at = Genome_get_char_lex(genome,/*left: SA[i]*/SA_i + lcp_i,/*genomelength*/n,chartable);
+ debug4(printf("i = %u, SA = %u and %u, and lcp_i = %u => %c %c\n",
+ i,SA_i_minus_1,SA_i,lcp_i,char_before == 0 ? '$' : char_before,char_at));
+
+ if (i % 2 == 0) {
+ /* Even, put into low nibble of byte */
+ switch (char_before) {
+ case 0:
+ switch (char_at) {
+ case 'A': discrim_chars[i/2] |= 0x01; break;
+ case 'C': discrim_chars[i/2] |= 0x02; break;
+ case 'G': discrim_chars[i/2] |= 0x03; break;
+ case 'T': discrim_chars[i/2] |= 0x04; break;
+ case 'X': discrim_chars[i/2] |= 0x05; break;
+ default: abort();
+ }
+ break;
+ case 'A':
+ switch (char_at) {
+ case 'C': discrim_chars[i/2] |= 0x06; break;
+ case 'G': discrim_chars[i/2] |= 0x07; break;
+ case 'T': discrim_chars[i/2] |= 0x08; break;
+ case 'X': discrim_chars[i/2] |= 0x09; break;
+ default: abort();
+ }
+ break;
+
+ case 'C':
+ switch (char_at) {
+ case 'G': discrim_chars[i/2] |= 0x0A; break;
+ case 'T': discrim_chars[i/2] |= 0x0B; break;
+ case 'X': discrim_chars[i/2] |= 0x0C; break;
+ default: abort();
+ }
+ break;
+
+ case 'G':
+ switch (char_at) {
+ case 'T': discrim_chars[i/2] |= 0x0D; break;
+ case 'X': discrim_chars[i/2] |= 0x0E; break;
+ default: abort();
+ }
+ break;
+
+ case 'T':
+ switch (char_at) {
+ case 'X': discrim_chars[i/2] |= 0x0F; break;
+ default: abort();
+ }
+ break;
+ }
+
+ } else {
+ /* Odd, put into high nibble of byte */
+ switch (char_before) {
+ case 0:
+ switch (char_at) {
+ case 'A': discrim_chars[i/2] |= 0x10; break;
+ case 'C': discrim_chars[i/2] |= 0x20; break;
+ case 'G': discrim_chars[i/2] |= 0x30; break;
+ case 'T': discrim_chars[i/2] |= 0x40; break;
+ case 'X': discrim_chars[i/2] |= 0x50; break;
+ default: abort();
+ }
+ break;
+
+ case 'A':
+ switch (char_at) {
+ case 'C': discrim_chars[i/2] |= 0x60; break;
+ case 'G': discrim_chars[i/2] |= 0x70; break;
+ case 'T': discrim_chars[i/2] |= 0x80; break;
+ case 'X': discrim_chars[i/2] |= 0x90; break;
+ default: abort();
+ }
+ break;
+
+ case 'C':
+ switch (char_at) {
+ case 'G': discrim_chars[i/2] |= 0xA0; break;
+ case 'T': discrim_chars[i/2] |= 0xB0; break;
+ case 'X': discrim_chars[i/2] |= 0xC0; break;
+ default: abort();
+ }
+ break;
+
+ case 'G':
+ switch (char_at) {
+ case 'T': discrim_chars[i/2] |= 0xD0; break;
+ case 'X': discrim_chars[i/2] |= 0xE0; break;
+ default: abort();
+ }
+ break;
+
+ case 'T':
+ switch (char_at) {
+ case 'X': discrim_chars[i/2] |= 0xF0; break;
+ default: abort();
+ }
+ break;
+ }
+ }
+
+ SA_i_minus_1 = SA_i;
+ }
+
+ fclose(fp);
+
+ FREE(read_buffer);
+
+ return discrim_chars;
+}
/* Onepass method */
@@ -1269,10 +1877,12 @@ Sarray_compute_child (unsigned char *lcp_bytes, UINT4 *lcp_guide, UINT4 *lcp_exc
debug2(printf("\n"));
}
- /* No need to clean out stack, because all of the next links have been written. */
+ /* Previously, thought there was no need to clean out stack, because
+ all of the next links have been written. However, skipping the
+ section below gave rise to an incorrect child array in the T section */
debug2(printf("stack still has %d entries\n",Uintlist_length(lcpstack)));
-#if 0
- lcp_i = -1;
+#if 1
+ lcp_i = 0;
while (lcp_i < Uintlist_head(lcpstack)) {
indexstack = Uintlist_pop(indexstack,&lastindex);
lcpstack = Uintlist_pop(lcpstack,&lcp_lastindex);
diff --git a/src/sarray-write.h b/src/sarray-write.h
index 91b7e4e..64aa3bf 100644
--- a/src/sarray-write.h
+++ b/src/sarray-write.h
@@ -1,4 +1,4 @@
-/* $Id: sarray-write.h 133760 2014-04-20 05:16:56Z twu $ */
+/* $Id: sarray-write.h 140511 2014-07-03 01:50:36Z twu $ */
#ifndef SARRAY_WRITE_INCLUDED
#define SARRAY_WRITE_INCLUDED
#include "types.h"
@@ -21,12 +21,12 @@ Sarray_write_index_interleaved (char *indexptrsfile, char *indexcompfile,
char chartable[]);
extern UINT4 *
-Sarray_compute_lcp (UINT4 *SA, UINT4 n);
+Sarray_compute_lcp (char *rankfile, char *permuted_sarray_file, char *sarrayfile, UINT4 n);
extern UINT4 *
Sarray_compute_lcp_from_genome (UINT4 *SA, unsigned char *gbuffer, UINT4 n);
extern unsigned char *
-Sarray_discriminating_chars (UINT4 *nbytes, UINT4 *SA, Genome_T genome,
+Sarray_discriminating_chars (UINT4 *nbytes, char *sarrayfile, Genome_T genome,
unsigned char *lcp_bytes, UINT4 *lcp_guide, UINT4 *lcp_exceptions, int guide_interval,
UINT4 n, char chartable[]);
diff --git a/src/splice.c b/src/splice.c
index d2c2d7c..dcdd3ec 100644
--- a/src/splice.c
+++ b/src/splice.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: splice.c 138745 2014-06-11 19:04:25Z twu $";
+static char rcsid[] = "$Id: splice.c 140368 2014-07-02 00:56:33Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -7,6 +7,7 @@ static char rcsid[] = "$Id: splice.c 138745 2014-06-11 19:04:25Z twu $";
#include <stdio.h>
#include "mem.h"
+#include "assert.h"
#include "sense.h"
#include "genome128_hr.h"
#include "genome_sites.h"
@@ -78,25 +79,25 @@ sufficient_splice_prob_local (int support, int nmismatches, double spliceprob) {
1 to obtain joffset + j. */
List_T
-Splice_solve_single (int *found_score, int *nhits, List_T hits, List_T *lowprob,
-
- bool *segmenti_usedp, bool *segmentj_usedp,
- Univcoord_T segmenti_left, Univcoord_T segmentj_left,
- Chrnum_T segmenti_chrnum, Univcoord_T segmenti_chroffset,
- Univcoord_T segmenti_chrhigh, Chrpos_T segmenti_chrlength,
- Chrnum_T segmentj_chrnum, Univcoord_T segmentj_chroffset,
- Univcoord_T segmentj_chrhigh, Chrpos_T segmentj_chrlength,
+Splice_solve_single_sense (int *found_score, int *nhits, List_T hits, List_T *lowprob,
+
+ bool *segmenti_usedp, bool *segmentj_usedp,
+ Univcoord_T segmenti_left, Univcoord_T segmentj_left,
+ Chrnum_T segmenti_chrnum, Univcoord_T segmenti_chroffset,
+ Univcoord_T segmenti_chrhigh, Chrpos_T segmenti_chrlength,
+ Chrnum_T segmentj_chrnum, Univcoord_T segmentj_chroffset,
+ Univcoord_T segmentj_chrhigh, Chrpos_T segmentj_chrlength,
- int querylength, Compress_T query_compress,
- int *segmenti_donor_knownpos, int *segmentj_acceptor_knownpos,
- int *segmentj_antidonor_knownpos, int *segmenti_antiacceptor_knownpos,
- int *segmenti_donor_knowni, int *segmentj_acceptor_knowni,
- int *segmentj_antidonor_knowni, int *segmenti_antiacceptor_knowni,
- int segmenti_donor_nknown, int segmentj_acceptor_nknown,
- int segmentj_antidonor_nknown, int segmenti_antiacceptor_nknown,
- int splicing_penalty, int max_mismatches_allowed,
- bool plusp, int genestrand, bool first_read_p,
- bool subs_or_indels_p, bool sarrayp) {
+ int querylength, Compress_T query_compress,
+ int *segmenti_donor_knownpos, int *segmentj_acceptor_knownpos,
+ int *segmentj_antidonor_knownpos, int *segmenti_antiacceptor_knownpos,
+ int *segmenti_donor_knowni, int *segmentj_acceptor_knowni,
+ int *segmentj_antidonor_knowni, int *segmenti_antiacceptor_knowni,
+ int segmenti_donor_nknown, int segmentj_acceptor_nknown,
+ int segmentj_antidonor_nknown, int segmenti_antiacceptor_nknown,
+ int splicing_penalty, int max_mismatches_allowed,
+ bool plusp, int genestrand, bool first_read_p,
+ bool subs_or_indels_p, bool sarrayp) {
Substring_T donor, acceptor;
int best_splice_pos, splice_pos_start, splice_pos_end, splice_pos, i, j;
int donor_positions_alloc[MAX_READLENGTH+1], acceptor_positions_alloc[MAX_READLENGTH+1];
@@ -156,211 +157,652 @@ Splice_solve_single (int *found_score, int *nhits, List_T hits, List_T *lowprob,
if (splice_pos_start <= splice_pos_end) {
- /* Originally from plus strand. No complement. */
- /* Sense (End 1 to End 2) or Antisense (End 5 to End 6) */
- if (novelsplicingp && segmenti_left + splice_pos_start >= DONOR_MODEL_LEFT_MARGIN) {
- donori_nsites = Genome_donor_positions(donor_positions_alloc,donor_knowni_alloc,
- segmenti_donor_knownpos,segmenti_donor_knowni,
- segmenti_left,splice_pos_start,splice_pos_end);
- donori_positions = donor_positions_alloc;
- donori_knowni = donor_knowni_alloc;
- } else {
- donori_nsites = segmenti_donor_nknown;
- donori_positions = segmenti_donor_knownpos;
- donori_knowni = segmenti_donor_knowni;
- }
+ if (plusp == true) {
+ /* Originally from plus strand. No complement. */
+ /* Sense (End 1 to End 2) or Antisense (End 5 to End 6) */
+ if (novelsplicingp && segmenti_left + splice_pos_start >= DONOR_MODEL_LEFT_MARGIN) {
+ donori_nsites = Genome_donor_positions(donor_positions_alloc,donor_knowni_alloc,
+ segmenti_donor_knownpos,segmenti_donor_knowni,
+ segmenti_left,splice_pos_start,splice_pos_end);
+ donori_positions = donor_positions_alloc;
+ donori_knowni = donor_knowni_alloc;
+ } else {
+ donori_nsites = segmenti_donor_nknown;
+ donori_positions = segmenti_donor_knownpos;
+ donori_knowni = segmenti_donor_knowni;
+ }
#ifdef DEBUG1
- printf("Found %d donori sites:",donori_nsites);
- for (i = 0; i < donori_nsites; i++) {
- printf(" %d",donori_positions[i]);
- if (donori_knowni[i] >= 0) {
- printf(" (%d)",donori_knowni[i]);
+ printf("Found %d donori sites:",donori_nsites);
+ for (i = 0; i < donori_nsites; i++) {
+ printf(" %d",donori_positions[i]);
+ if (donori_knowni[i] >= 0) {
+ printf(" (%d)",donori_knowni[i]);
+ }
}
- }
- printf("\n");
+ printf("\n");
#endif
- if (novelsplicingp && segmentj_left + splice_pos_start >= ACCEPTOR_MODEL_LEFT_MARGIN) {
- acceptorj_nsites = Genome_acceptor_positions(acceptor_positions_alloc,acceptor_knowni_alloc,
- segmentj_acceptor_knownpos,segmentj_acceptor_knowni,
- segmentj_left,splice_pos_start,splice_pos_end);
- acceptorj_positions = acceptor_positions_alloc;
- acceptorj_knowni = acceptor_knowni_alloc;
- } else {
- acceptorj_nsites = segmentj_acceptor_nknown;
- acceptorj_positions = segmentj_acceptor_knownpos;
- acceptorj_knowni = segmentj_acceptor_knowni;
- }
+ if (novelsplicingp && segmentj_left + splice_pos_start >= ACCEPTOR_MODEL_LEFT_MARGIN) {
+ acceptorj_nsites = Genome_acceptor_positions(acceptor_positions_alloc,acceptor_knowni_alloc,
+ segmentj_acceptor_knownpos,segmentj_acceptor_knowni,
+ segmentj_left,splice_pos_start,splice_pos_end);
+ acceptorj_positions = acceptor_positions_alloc;
+ acceptorj_knowni = acceptor_knowni_alloc;
+ } else {
+ acceptorj_nsites = segmentj_acceptor_nknown;
+ acceptorj_positions = segmentj_acceptor_knownpos;
+ acceptorj_knowni = segmentj_acceptor_knowni;
+ }
#ifdef DEBUG1
- printf("Found %d acceptorj sites:",acceptorj_nsites);
- for (i = 0; i < acceptorj_nsites; i++) {
- printf(" %d",acceptorj_positions[i]);
- if (acceptorj_knowni[i] >= 0) {
- printf(" (%d)",acceptorj_knowni[i]);
+ printf("Found %d acceptorj sites:",acceptorj_nsites);
+ for (i = 0; i < acceptorj_nsites; i++) {
+ printf(" %d",acceptorj_positions[i]);
+ if (acceptorj_knowni[i] >= 0) {
+ printf(" (%d)",acceptorj_knowni[i]);
+ }
}
- }
- printf("\n");
+ printf("\n");
#endif
- best_nmismatches = max_mismatches_allowed;
- best_prob = 0.0;
- orig_plusp = true;
+ best_nmismatches = max_mismatches_allowed;
+ best_prob = 0.0;
- i = j = 0;
- while (i < donori_nsites && j < acceptorj_nsites) {
- if ((splice_pos = donori_positions[i]) < acceptorj_positions[j]) {
- i++;
- } else if (splice_pos > acceptorj_positions[j]) {
- j++;
+ i = j = 0;
+ while (i < donori_nsites && j < acceptorj_nsites) {
+ if ((splice_pos = donori_positions[i]) < acceptorj_positions[j]) {
+ i++;
+ } else if (splice_pos > acceptorj_positions[j]) {
+ j++;
+ } else {
+ segmenti_nmismatches = Genome_count_mismatches_substring(query_compress,/*left*/segmenti_left,/*pos5*/0,/*pos3*/splice_pos,
+ plusp,genestrand,first_read_p);
+ segmentj_nmismatches = Genome_count_mismatches_substring(query_compress,/*left*/segmentj_left,/*pos5*/splice_pos,/*pos3*/querylength,
+ plusp,genestrand,first_read_p);
+ if ((nmismatches = segmenti_nmismatches + segmentj_nmismatches) <= best_nmismatches) {
+ if (donori_knowni[i] >= 0) {
+ probi = 1.0; /* Needs to be 1.0 for output */
+ } else {
+ probi = Maxent_hr_donor_prob(segmenti_left + splice_pos,segmenti_chroffset);
+ }
+
+ if (acceptorj_knowni[j] >= 0) {
+ probj = 1.0; /* Needs to be 1.0 for output */
+ } else {
+ probj = Maxent_hr_acceptor_prob(segmentj_left + splice_pos,segmentj_chroffset);
+ }
+
+ debug1(
+ if (plusp == true) {
+ printf("plus sense splice_pos %d, i.donor %f, j.acceptor %f\n",splice_pos,probi,probj);
+ } else {
+ printf("minus antisense splice_pos %d, i.donor %f, j.acceptor %f\n",splice_pos,probi,probj);
+ });
+
+ if (nmismatches < best_nmismatches ||
+ (nmismatches == best_nmismatches && probi + probj > best_prob)) {
+ /* Success */
+ best_nmismatches = nmismatches;
+ best_prob = probi + probj;
+
+ best_donor_splicecoord = segmenti_left + splice_pos;
+ best_acceptor_splicecoord = segmentj_left + splice_pos;
+ best_donor_knowni = donori_knowni[i];
+ best_acceptor_knowni = acceptorj_knowni[j];
+ best_donor_prob = probi;
+ best_acceptor_prob = probj;
+ best_splice_pos = splice_pos;
+ best_segmenti_nmismatches = segmenti_nmismatches;
+ best_segmentj_nmismatches = segmentj_nmismatches;
+ orig_plusp = true; /* for sense, require plusp to be true */
+ }
+ }
+ i++;
+ j++;
+ }
+ }
+
+ } else {
+ /* minus */
+ /* Originally from minus strand. Complement. */
+ /* Antisense (End 7 to End 8) or Sense (End 3 to End 4) */
+ if (novelsplicingp && segmenti_left + splice_pos_start >= ACCEPTOR_MODEL_RIGHT_MARGIN) {
+ antiacceptori_nsites = Genome_antiacceptor_positions(acceptor_positions_alloc,acceptor_knowni_alloc,
+ segmenti_antiacceptor_knownpos,segmenti_antiacceptor_knowni,
+ segmenti_left,splice_pos_start,splice_pos_end);
+ antiacceptori_positions = acceptor_positions_alloc;
+ antiacceptori_knowni = acceptor_knowni_alloc;
} else {
- segmenti_nmismatches = Genome_count_mismatches_substring(query_compress,/*left*/segmenti_left,/*pos5*/0,/*pos3*/splice_pos,
- plusp,genestrand,first_read_p);
- segmentj_nmismatches = Genome_count_mismatches_substring(query_compress,/*left*/segmentj_left,/*pos5*/splice_pos,/*pos3*/querylength,
- plusp,genestrand,first_read_p);
- if ((nmismatches = segmenti_nmismatches + segmentj_nmismatches) <= best_nmismatches) {
- if (donori_knowni[i] >= 0) {
- probi = 1.0; /* Needs to be 1.0 for output */
- } else {
- probi = Maxent_hr_donor_prob(segmenti_left + splice_pos,segmenti_chroffset);
+ antiacceptori_nsites = segmenti_antiacceptor_nknown;
+ antiacceptori_positions = segmenti_antiacceptor_knownpos;
+ antiacceptori_knowni = segmenti_antiacceptor_knowni;
+ }
+
+#ifdef DEBUG1
+ printf("Found %d antiacceptori sites:",antiacceptori_nsites);
+ for (i = 0; i < antiacceptori_nsites; i++) {
+ printf(" %d",antiacceptori_positions[i]);
+ if (antiacceptori_knowni[i] >= 0) {
+ printf(" (%d)",antiacceptori_knowni[i]);
+ }
+ }
+ printf("\n");
+#endif
+
+ if (novelsplicingp && segmentj_left + splice_pos_start >= DONOR_MODEL_RIGHT_MARGIN) {
+ antidonorj_nsites = Genome_antidonor_positions(donor_positions_alloc,donor_knowni_alloc,
+ segmentj_antidonor_knownpos,segmentj_antidonor_knowni,
+ segmentj_left,splice_pos_start,splice_pos_end);
+ antidonorj_positions = donor_positions_alloc;
+ antidonorj_knowni = donor_knowni_alloc;
+ } else {
+ antidonorj_nsites = segmentj_antidonor_nknown;
+ antidonorj_positions = segmentj_antidonor_knownpos;
+ antidonorj_knowni = segmentj_antidonor_knowni;
+ }
+
+#ifdef DEBUG1
+ printf("Found %d antidonorj sites:",antidonorj_nsites);
+ for (i = 0; i < antidonorj_nsites; i++) {
+ printf(" %d",antidonorj_positions[i]);
+ if (antidonorj_knowni[i] >= 0) {
+ printf(" (%d)",antidonorj_knowni[i]);
+ }
+ }
+ printf("\n");
+#endif
+
+ best_nmismatches = max_mismatches_allowed;
+ best_prob = 0.0;
+
+ i = j = 0;
+ while (i < antiacceptori_nsites && j < antidonorj_nsites) {
+ if ((splice_pos = antiacceptori_positions[i]) < antidonorj_positions[j]) {
+ i++;
+ } else if (splice_pos > antidonorj_positions[j]) {
+ j++;
+ } else {
+ segmenti_nmismatches = Genome_count_mismatches_substring(query_compress,/*left*/segmenti_left,/*pos5*/0,/*pos3*/splice_pos,
+ plusp,genestrand,first_read_p);
+ segmentj_nmismatches = Genome_count_mismatches_substring(query_compress,/*left*/segmentj_left,/*pos5*/splice_pos,/*pos3*/querylength,
+ plusp,genestrand,first_read_p);
+ if ((nmismatches = segmenti_nmismatches + segmentj_nmismatches) <= best_nmismatches) {
+ if (antiacceptori_knowni[i] >= 0) {
+ probi = 1.0; /* Needs to be 1.0 for output */
+ } else {
+ probi = Maxent_hr_antiacceptor_prob(segmenti_left + splice_pos,segmenti_chroffset);
+ }
+
+ if (antidonorj_knowni[j] >= 0) {
+ probj = 1.0; /* Needs to be 1.0 for output */
+ } else {
+ probj = Maxent_hr_antidonor_prob(segmentj_left + splice_pos,segmentj_chroffset);
+ }
+
+ debug1(
+ if (plusp == true) {
+ printf("plus antisense splice_pos %d, j.donor %f, i.acceptor %f\n",splice_pos,probj,probi);
+ } else {
+ printf("minus sense splice_pos %d, j.donor %f, i.acceptor %f\n",splice_pos,probj,probi);
+ });
+
+ if (nmismatches < best_nmismatches ||
+ (nmismatches == best_nmismatches && probi + probj > best_prob)) {
+ /* Success */
+ best_nmismatches = nmismatches;
+ best_prob = probi + probj;
+
+ best_donor_splicecoord = segmentj_left + splice_pos;
+ best_acceptor_splicecoord = segmenti_left + splice_pos;
+ best_donor_knowni = antidonorj_knowni[j];
+ best_acceptor_knowni = antiacceptori_knowni[i];
+ best_donor_prob = probj;
+ best_acceptor_prob = probi;
+ best_splice_pos = splice_pos;
+ best_segmentj_nmismatches = segmentj_nmismatches;
+ best_segmenti_nmismatches = segmenti_nmismatches;
+ orig_plusp = false; /* for sense, require plusp to be false */
+ }
}
+ i++;
+ j++;
+ }
+ }
+ }
- if (acceptorj_knowni[j] >= 0) {
- probj = 1.0; /* Needs to be 1.0 for output */
+ if (best_prob > 0.0) {
+ debug1(printf("best_prob = %f at splice_pos %d (%u,%u)\n",
+ best_prob,best_splice_pos,best_donor_splicecoord,best_acceptor_splicecoord));
+ if (orig_plusp == true) {
+ /* Originally from plus strand. No complement. */
+ sensep = (plusp == true) ? true : false;
+ sensedir = (plusp == true) ? SENSE_FORWARD : SENSE_ANTI;
+ assert(sensedir == SENSE_FORWARD);
+
+ donor = Substring_new_donor(best_donor_splicecoord,best_donor_knowni,
+ best_splice_pos,best_segmenti_nmismatches,
+ best_donor_prob,/*left*/segmenti_left,query_compress,
+ querylength,plusp,genestrand,first_read_p,sensep,
+ segmenti_chrnum,segmenti_chroffset,segmenti_chrhigh,segmenti_chrlength);
+
+ acceptor = Substring_new_acceptor(best_acceptor_splicecoord,best_acceptor_knowni,
+ best_splice_pos,best_segmentj_nmismatches,
+ best_acceptor_prob,/*left*/segmentj_left,query_compress,
+ querylength,plusp,genestrand,first_read_p,sensep,
+ segmentj_chrnum,segmentj_chroffset,segmentj_chrhigh,segmentj_chrlength);
+
+ if (donor == NULL || acceptor == NULL) {
+ if (donor != NULL) Substring_free(&donor);
+ if (acceptor != NULL) Substring_free(&acceptor);
+ } else {
+ debug1(printf("Splice_solve_single_sense success\n"));
+ *segmenti_usedp = *segmentj_usedp = true;
+
+ donor_support = best_splice_pos;
+ acceptor_support = querylength - best_splice_pos;
+ sufficient1p = sufficient_splice_prob_local(donor_support,best_segmenti_nmismatches,best_donor_prob);
+ sufficient2p = sufficient_splice_prob_local(acceptor_support,best_segmentj_nmismatches,best_acceptor_prob);
+
+ if (sufficient1p && sufficient2p) {
+ *nhits += 1;
+ return List_push(hits,(void *) Stage3end_new_splice(&(*found_score),best_segmenti_nmismatches,best_segmentj_nmismatches,
+ donor,acceptor,/*distance*/segmentj_left - segmenti_left,
+ /*shortdistancep*/true,splicing_penalty,querylength,/*amb_nmatches*/0,
+ /*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
+ /*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
+ /*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
+ /*copy_donor_p*/false,/*copy_acceptor_p*/false,first_read_p,sensedir,
+ sarrayp));
+ } else if (subs_or_indels_p == true) {
+ if (donor != NULL) Substring_free(&donor);
+ if (acceptor != NULL) Substring_free(&acceptor);
+ return hits;
+ } else if (donor_support < LOWPROB_SUPPORT || acceptor_support < LOWPROB_SUPPORT) {
+ if (donor != NULL) Substring_free(&donor);
+ if (acceptor != NULL) Substring_free(&acceptor);
+ return hits;
+ } else if (sufficient1p || sufficient2p) {
+ *lowprob = List_push(*lowprob,
+ (void *) Stage3end_new_splice(&(*found_score),best_segmenti_nmismatches,best_segmentj_nmismatches,
+ donor,acceptor,/*distance*/segmentj_left - segmenti_left,
+ /*shortdistancep*/true,splicing_penalty,querylength,/*amb_nmatches*/0,
+ /*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
+ /*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
+ /*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
+ /*copy_donor_p*/false,/*copy_acceptor_p*/false,first_read_p,sensedir,
+ sarrayp));
+ return hits;
} else {
- probj = Maxent_hr_acceptor_prob(segmentj_left + splice_pos,segmentj_chroffset);
+ if (donor != NULL) Substring_free(&donor);
+ if (acceptor != NULL) Substring_free(&acceptor);
}
+ }
+
+ } else {
+ /* Originally from minus strand. Complement. */
+ sensep = (plusp == true) ? false : true;
+ sensedir = (plusp == true) ? SENSE_ANTI : SENSE_FORWARD;
+ assert(sensedir == SENSE_FORWARD);
+
+ donor = Substring_new_donor(best_donor_splicecoord,best_donor_knowni,
+ best_splice_pos,best_segmentj_nmismatches,
+ best_donor_prob,/*left*/segmentj_left,query_compress,
+ querylength,plusp,genestrand,first_read_p,sensep,
+ segmentj_chrnum,segmentj_chroffset,segmentj_chrhigh,segmentj_chrlength);
+
+ acceptor = Substring_new_acceptor(best_acceptor_splicecoord,best_acceptor_knowni,
+ best_splice_pos,best_segmenti_nmismatches,
+ best_acceptor_prob,/*left*/segmenti_left,query_compress,
+ querylength,plusp,genestrand,first_read_p,sensep,
+ segmenti_chrnum,segmenti_chroffset,segmenti_chrhigh,segmenti_chrlength);
- debug1(
- if (plusp == true) {
- printf("plus sense splice_pos %d, i.donor %f, j.acceptor %f\n",splice_pos,probi,probj);
- } else {
- printf("minus antisense splice_pos %d, i.donor %f, j.acceptor %f\n",splice_pos,probi,probj);
- });
-
- if (nmismatches < best_nmismatches ||
- (nmismatches == best_nmismatches && probi + probj > best_prob)) {
- /* Success */
- best_nmismatches = nmismatches;
- best_prob = probi + probj;
-
- best_donor_splicecoord = segmenti_left + splice_pos;
- best_acceptor_splicecoord = segmentj_left + splice_pos;
- best_donor_knowni = donori_knowni[i];
- best_acceptor_knowni = acceptorj_knowni[j];
- best_donor_prob = probi;
- best_acceptor_prob = probj;
- best_splice_pos = splice_pos;
- best_segmenti_nmismatches = segmenti_nmismatches;
- best_segmentj_nmismatches = segmentj_nmismatches;
+ if (donor == NULL || acceptor == NULL) {
+ if (donor != NULL) Substring_free(&donor);
+ if (acceptor != NULL) Substring_free(&acceptor);
+ } else {
+ debug1(printf("Splice_solve_single_sense success\n"));
+ *segmenti_usedp = *segmentj_usedp = true;
+
+ acceptor_support = best_splice_pos;
+ donor_support = querylength - best_splice_pos;
+ sufficient1p = sufficient_splice_prob_local(acceptor_support,best_segmenti_nmismatches,best_acceptor_prob);
+ sufficient2p = sufficient_splice_prob_local(donor_support,best_segmentj_nmismatches,best_donor_prob);
+ if (sufficient1p && sufficient2p) {
+ *nhits += 1;
+ return List_push(hits,(void *) Stage3end_new_splice(&(*found_score),best_segmentj_nmismatches,best_segmenti_nmismatches,
+ donor,acceptor,/*distance*/segmentj_left - segmenti_left,
+ /*shortdistancep*/true,splicing_penalty,querylength,/*amb_nmatches*/0,
+ /*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
+ /*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
+ /*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
+ /*copy_donor_p*/false,/*copy_acceptor_p*/false,first_read_p,sensedir,
+ sarrayp));
+ } else if (subs_or_indels_p == true) {
+ if (donor != NULL) Substring_free(&donor);
+ if (acceptor != NULL) Substring_free(&acceptor);
+ return hits;
+ } else if (donor_support < LOWPROB_SUPPORT || acceptor_support < LOWPROB_SUPPORT) {
+ if (donor != NULL) Substring_free(&donor);
+ if (acceptor != NULL) Substring_free(&acceptor);
+ return hits;
+ } else if (sufficient1p || sufficient2p) {
+ *lowprob = List_push(*lowprob,
+ (void *) Stage3end_new_splice(&(*found_score),best_segmentj_nmismatches,best_segmenti_nmismatches,
+ donor,acceptor,/*distance*/segmentj_left - segmenti_left,
+ /*shortdistancep*/true,splicing_penalty,querylength,/*amb_nmatches*/0,
+ /*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
+ /*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
+ /*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
+ /*copy_donor_p*/false,/*copy_acceptor_p*/false,first_read_p,sensedir,
+ sarrayp));
+ return hits;
+ } else {
+ if (donor != NULL) Substring_free(&donor);
+ if (acceptor != NULL) Substring_free(&acceptor);
+ return hits;
}
}
- i++;
- j++;
}
}
+ }
+ debug1(printf("Splice_solve_single_sense fail\n"));
+ return hits;
+}
- /* Originally from minus strand. Complement. */
- /* Antisense (End 7 to End 8) or Sense (End 3 to End 4) */
- if (novelsplicingp && segmenti_left + splice_pos_start >= ACCEPTOR_MODEL_RIGHT_MARGIN) {
- antiacceptori_nsites = Genome_antiacceptor_positions(acceptor_positions_alloc,acceptor_knowni_alloc,
- segmenti_antiacceptor_knownpos,segmenti_antiacceptor_knowni,
- segmenti_left,splice_pos_start,splice_pos_end);
- antiacceptori_positions = acceptor_positions_alloc;
- antiacceptori_knowni = acceptor_knowni_alloc;
- } else {
- antiacceptori_nsites = segmenti_antiacceptor_nknown;
- antiacceptori_positions = segmenti_antiacceptor_knownpos;
- antiacceptori_knowni = segmenti_antiacceptor_knowni;
+
+List_T
+Splice_solve_single_antisense (int *found_score, int *nhits, List_T hits, List_T *lowprob,
+
+ bool *segmenti_usedp, bool *segmentj_usedp,
+ Univcoord_T segmenti_left, Univcoord_T segmentj_left,
+ Chrnum_T segmenti_chrnum, Univcoord_T segmenti_chroffset,
+ Univcoord_T segmenti_chrhigh, Chrpos_T segmenti_chrlength,
+ Chrnum_T segmentj_chrnum, Univcoord_T segmentj_chroffset,
+ Univcoord_T segmentj_chrhigh, Chrpos_T segmentj_chrlength,
+
+ int querylength, Compress_T query_compress,
+ int *segmenti_donor_knownpos, int *segmentj_acceptor_knownpos,
+ int *segmentj_antidonor_knownpos, int *segmenti_antiacceptor_knownpos,
+ int *segmenti_donor_knowni, int *segmentj_acceptor_knowni,
+ int *segmentj_antidonor_knowni, int *segmenti_antiacceptor_knowni,
+ int segmenti_donor_nknown, int segmentj_acceptor_nknown,
+ int segmentj_antidonor_nknown, int segmenti_antiacceptor_nknown,
+ int splicing_penalty, int max_mismatches_allowed,
+ bool plusp, int genestrand, bool first_read_p,
+ bool subs_or_indels_p, bool sarrayp) {
+ Substring_T donor, acceptor;
+ int best_splice_pos, splice_pos_start, splice_pos_end, splice_pos, i, j;
+ int donor_positions_alloc[MAX_READLENGTH+1], acceptor_positions_alloc[MAX_READLENGTH+1];
+ int donor_knowni_alloc[MAX_READLENGTH+1], acceptor_knowni_alloc[MAX_READLENGTH+1];
+
+ int best_nmismatches, nmismatches;
+ int best_segmenti_nmismatches, best_segmentj_nmismatches, segmenti_nmismatches, segmentj_nmismatches;
+ int donor_support, acceptor_support;
+ Univcoord_T best_donor_splicecoord, best_acceptor_splicecoord;
+ int best_donor_knowni, best_acceptor_knowni;
+ double best_prob, best_donor_prob, best_acceptor_prob, probi, probj;
+ bool sufficient1p, sufficient2p, orig_plusp, sensep;
+ int sensedir;
+
+ int donori_nsites, acceptorj_nsites, antiacceptori_nsites, antidonorj_nsites;
+ int *donori_positions, *acceptorj_positions, *antiacceptori_positions, *antidonorj_positions;
+ int *donori_knowni, *acceptorj_knowni, *antiacceptori_knowni, *antidonorj_knowni;
+
+
+ debug1(printf("Splice_solve_single: Getting genome at lefti %u and leftj %u (diff: %d)\n",
+ segmenti_left,segmentj_left,segmentj_left-segmenti_left));
+ *nhits = 0;
+
+#if 0
+ int sum, lefti, righti;
+ splice_pos_start = querylength;
+ splice_pos_end = 0;
+ for (sum = 0; sum <= max_mismatches_allowed; sum++) {
+ for (lefti = 0; lefti <= sum && lefti < nmismatches_left; lefti++) {
+ if ((righti = sum - lefti) < nmismatches_right &&
+ mismatch_positions_left[lefti] > mismatch_positions_right[righti]) {
+ debug1(printf("At %d+%d mismatches, splice_pos using right: %d\n",lefti,righti,mismatch_positions_right[righti]+1));
+ debug1(printf("At %d+%d mismatches, splice_pos using left: %d\n",lefti,righti,mismatch_positions_left[lefti]));
+ if (mismatch_positions_right[righti] + 1 < splice_pos_start) {
+ splice_pos_start = mismatch_positions_right[righti] + 1; /* This is leftmost position in righti+1 .. lefti */
+ }
+ if (mismatch_positions_left[lefti] > splice_pos_end) {
+ splice_pos_end = mismatch_positions_left[lefti]; /* This is rightmost position in righti+1 .. lefti */
+ }
+ }
}
+ }
+
+ /* Exclude ends */
+ if (splice_pos_start < min_localsplicing_end_matches) {
+ splice_pos_start = min_localsplicing_end_matches;
+ }
+ if (splice_pos_end > querylength - min_localsplicing_end_matches) {
+ splice_pos_end = querylength - min_localsplicing_end_matches;
+ }
+#else
+ /* splice_pos_start = min_localsplicing_end_matches; */
+ /* splice_pos_end = querylength - min_localsplicing_end_matches; */
+ splice_pos_start = min_shortend;
+ splice_pos_end = querylength - min_shortend; /* ? off by 1, so -l 3 allows only ends of up to 2 */
+#endif
+
+
+ if (splice_pos_start <= splice_pos_end) {
+ if (plusp == false) {
+ /* minus */
+ /* Originally from plus strand. No complement. */
+ /* Sense (End 1 to End 2) or Antisense (End 5 to End 6) */
+ if (novelsplicingp && segmenti_left + splice_pos_start >= DONOR_MODEL_LEFT_MARGIN) {
+ donori_nsites = Genome_donor_positions(donor_positions_alloc,donor_knowni_alloc,
+ segmenti_donor_knownpos,segmenti_donor_knowni,
+ segmenti_left,splice_pos_start,splice_pos_end);
+ donori_positions = donor_positions_alloc;
+ donori_knowni = donor_knowni_alloc;
+ } else {
+ donori_nsites = segmenti_donor_nknown;
+ donori_positions = segmenti_donor_knownpos;
+ donori_knowni = segmenti_donor_knowni;
+ }
#ifdef DEBUG1
- printf("Found %d antiacceptori sites:",antiacceptori_nsites);
- for (i = 0; i < antiacceptori_nsites; i++) {
- printf(" %d",antiacceptori_positions[i]);
- if (antiacceptori_knowni[i] >= 0) {
- printf(" (%d)",antiacceptori_knowni[i]);
+ printf("Found %d donori sites:",donori_nsites);
+ for (i = 0; i < donori_nsites; i++) {
+ printf(" %d",donori_positions[i]);
+ if (donori_knowni[i] >= 0) {
+ printf(" (%d)",donori_knowni[i]);
+ }
}
- }
- printf("\n");
+ printf("\n");
#endif
- if (novelsplicingp && segmentj_left + splice_pos_start >= DONOR_MODEL_RIGHT_MARGIN) {
- antidonorj_nsites = Genome_antidonor_positions(donor_positions_alloc,donor_knowni_alloc,
- segmentj_antidonor_knownpos,segmentj_antidonor_knowni,
+ if (novelsplicingp && segmentj_left + splice_pos_start >= ACCEPTOR_MODEL_LEFT_MARGIN) {
+ acceptorj_nsites = Genome_acceptor_positions(acceptor_positions_alloc,acceptor_knowni_alloc,
+ segmentj_acceptor_knownpos,segmentj_acceptor_knowni,
segmentj_left,splice_pos_start,splice_pos_end);
- antidonorj_positions = donor_positions_alloc;
- antidonorj_knowni = donor_knowni_alloc;
+ acceptorj_positions = acceptor_positions_alloc;
+ acceptorj_knowni = acceptor_knowni_alloc;
+ } else {
+ acceptorj_nsites = segmentj_acceptor_nknown;
+ acceptorj_positions = segmentj_acceptor_knownpos;
+ acceptorj_knowni = segmentj_acceptor_knowni;
+ }
+
+#ifdef DEBUG1
+ printf("Found %d acceptorj sites:",acceptorj_nsites);
+ for (i = 0; i < acceptorj_nsites; i++) {
+ printf(" %d",acceptorj_positions[i]);
+ if (acceptorj_knowni[i] >= 0) {
+ printf(" (%d)",acceptorj_knowni[i]);
+ }
+ }
+ printf("\n");
+#endif
+
+ best_nmismatches = max_mismatches_allowed;
+ best_prob = 0.0;
+
+ i = j = 0;
+ while (i < donori_nsites && j < acceptorj_nsites) {
+ if ((splice_pos = donori_positions[i]) < acceptorj_positions[j]) {
+ i++;
+ } else if (splice_pos > acceptorj_positions[j]) {
+ j++;
+ } else {
+ segmenti_nmismatches = Genome_count_mismatches_substring(query_compress,/*left*/segmenti_left,/*pos5*/0,/*pos3*/splice_pos,
+ plusp,genestrand,first_read_p);
+ segmentj_nmismatches = Genome_count_mismatches_substring(query_compress,/*left*/segmentj_left,/*pos5*/splice_pos,/*pos3*/querylength,
+ plusp,genestrand,first_read_p);
+ if ((nmismatches = segmenti_nmismatches + segmentj_nmismatches) <= best_nmismatches) {
+ if (donori_knowni[i] >= 0) {
+ probi = 1.0; /* Needs to be 1.0 for output */
+ } else {
+ probi = Maxent_hr_donor_prob(segmenti_left + splice_pos,segmenti_chroffset);
+ }
+
+ if (acceptorj_knowni[j] >= 0) {
+ probj = 1.0; /* Needs to be 1.0 for output */
+ } else {
+ probj = Maxent_hr_acceptor_prob(segmentj_left + splice_pos,segmentj_chroffset);
+ }
+
+ debug1(
+ if (plusp == true) {
+ printf("plus sense splice_pos %d, i.donor %f, j.acceptor %f\n",splice_pos,probi,probj);
+ } else {
+ printf("minus antisense splice_pos %d, i.donor %f, j.acceptor %f\n",splice_pos,probi,probj);
+ });
+
+ if (nmismatches < best_nmismatches ||
+ (nmismatches == best_nmismatches && probi + probj > best_prob)) {
+ /* Success */
+ best_nmismatches = nmismatches;
+ best_prob = probi + probj;
+
+ best_donor_splicecoord = segmenti_left + splice_pos;
+ best_acceptor_splicecoord = segmentj_left + splice_pos;
+ best_donor_knowni = donori_knowni[i];
+ best_acceptor_knowni = acceptorj_knowni[j];
+ best_donor_prob = probi;
+ best_acceptor_prob = probj;
+ best_splice_pos = splice_pos;
+ best_segmenti_nmismatches = segmenti_nmismatches;
+ best_segmentj_nmismatches = segmentj_nmismatches;
+ orig_plusp = true; /* for antisense, require plusp to be false */
+ }
+ }
+ i++;
+ j++;
+ }
+ }
+
} else {
- antidonorj_nsites = segmentj_antidonor_nknown;
- antidonorj_positions = segmentj_antidonor_knownpos;
- antidonorj_knowni = segmentj_antidonor_knowni;
- }
+ /* plus */
+ /* Originally from minus strand. Complement. */
+ /* Antisense (End 7 to End 8) or Sense (End 3 to End 4) */
+ if (novelsplicingp && segmenti_left + splice_pos_start >= ACCEPTOR_MODEL_RIGHT_MARGIN) {
+ antiacceptori_nsites = Genome_antiacceptor_positions(acceptor_positions_alloc,acceptor_knowni_alloc,
+ segmenti_antiacceptor_knownpos,segmenti_antiacceptor_knowni,
+ segmenti_left,splice_pos_start,splice_pos_end);
+ antiacceptori_positions = acceptor_positions_alloc;
+ antiacceptori_knowni = acceptor_knowni_alloc;
+ } else {
+ antiacceptori_nsites = segmenti_antiacceptor_nknown;
+ antiacceptori_positions = segmenti_antiacceptor_knownpos;
+ antiacceptori_knowni = segmenti_antiacceptor_knowni;
+ }
#ifdef DEBUG1
- printf("Found %d antidonorj sites:",antidonorj_nsites);
- for (i = 0; i < antidonorj_nsites; i++) {
- printf(" %d",antidonorj_positions[i]);
- if (antidonorj_knowni[i] >= 0) {
- printf(" (%d)",antidonorj_knowni[i]);
+ printf("Found %d antiacceptori sites:",antiacceptori_nsites);
+ for (i = 0; i < antiacceptori_nsites; i++) {
+ printf(" %d",antiacceptori_positions[i]);
+ if (antiacceptori_knowni[i] >= 0) {
+ printf(" (%d)",antiacceptori_knowni[i]);
+ }
}
- }
- printf("\n");
+ printf("\n");
#endif
- i = j = 0;
- while (i < antiacceptori_nsites && j < antidonorj_nsites) {
- if ((splice_pos = antiacceptori_positions[i]) < antidonorj_positions[j]) {
- i++;
- } else if (splice_pos > antidonorj_positions[j]) {
- j++;
+ if (novelsplicingp && segmentj_left + splice_pos_start >= DONOR_MODEL_RIGHT_MARGIN) {
+ antidonorj_nsites = Genome_antidonor_positions(donor_positions_alloc,donor_knowni_alloc,
+ segmentj_antidonor_knownpos,segmentj_antidonor_knowni,
+ segmentj_left,splice_pos_start,splice_pos_end);
+ antidonorj_positions = donor_positions_alloc;
+ antidonorj_knowni = donor_knowni_alloc;
} else {
- segmenti_nmismatches = Genome_count_mismatches_substring(query_compress,/*left*/segmenti_left,/*pos5*/0,/*pos3*/splice_pos,
- plusp,genestrand,first_read_p);
- segmentj_nmismatches = Genome_count_mismatches_substring(query_compress,/*left*/segmentj_left,/*pos5*/splice_pos,/*pos3*/querylength,
- plusp,genestrand,first_read_p);
- if ((nmismatches = segmenti_nmismatches + segmentj_nmismatches) <= best_nmismatches) {
- if (antiacceptori_knowni[i] >= 0) {
- probi = 1.0; /* Needs to be 1.0 for output */
- } else {
- probi = Maxent_hr_antiacceptor_prob(segmenti_left + splice_pos,segmenti_chroffset);
- }
+ antidonorj_nsites = segmentj_antidonor_nknown;
+ antidonorj_positions = segmentj_antidonor_knownpos;
+ antidonorj_knowni = segmentj_antidonor_knowni;
+ }
- if (antidonorj_knowni[j] >= 0) {
- probj = 1.0; /* Needs to be 1.0 for output */
- } else {
- probj = Maxent_hr_antidonor_prob(segmentj_left + splice_pos,segmentj_chroffset);
- }
+#ifdef DEBUG1
+ printf("Found %d antidonorj sites:",antidonorj_nsites);
+ for (i = 0; i < antidonorj_nsites; i++) {
+ printf(" %d",antidonorj_positions[i]);
+ if (antidonorj_knowni[i] >= 0) {
+ printf(" (%d)",antidonorj_knowni[i]);
+ }
+ }
+ printf("\n");
+#endif
+
+ best_nmismatches = max_mismatches_allowed;
+ best_prob = 0.0;
+
+ i = j = 0;
+ while (i < antiacceptori_nsites && j < antidonorj_nsites) {
+ if ((splice_pos = antiacceptori_positions[i]) < antidonorj_positions[j]) {
+ i++;
+ } else if (splice_pos > antidonorj_positions[j]) {
+ j++;
+ } else {
+ segmenti_nmismatches = Genome_count_mismatches_substring(query_compress,/*left*/segmenti_left,/*pos5*/0,/*pos3*/splice_pos,
+ plusp,genestrand,first_read_p);
+ segmentj_nmismatches = Genome_count_mismatches_substring(query_compress,/*left*/segmentj_left,/*pos5*/splice_pos,/*pos3*/querylength,
+ plusp,genestrand,first_read_p);
+ if ((nmismatches = segmenti_nmismatches + segmentj_nmismatches) <= best_nmismatches) {
+ if (antiacceptori_knowni[i] >= 0) {
+ probi = 1.0; /* Needs to be 1.0 for output */
+ } else {
+ probi = Maxent_hr_antiacceptor_prob(segmenti_left + splice_pos,segmenti_chroffset);
+ }
+
+ if (antidonorj_knowni[j] >= 0) {
+ probj = 1.0; /* Needs to be 1.0 for output */
+ } else {
+ probj = Maxent_hr_antidonor_prob(segmentj_left + splice_pos,segmentj_chroffset);
+ }
- debug1(
- if (plusp == true) {
- printf("plus antisense splice_pos %d, j.donor %f, i.acceptor %f\n",splice_pos,probj,probi);
- } else {
- printf("minus sense splice_pos %d, j.donor %f, i.acceptor %f\n",splice_pos,probj,probi);
- });
+ debug1(
+ if (plusp == true) {
+ printf("plus antisense splice_pos %d, j.donor %f, i.acceptor %f\n",splice_pos,probj,probi);
+ } else {
+ printf("minus sense splice_pos %d, j.donor %f, i.acceptor %f\n",splice_pos,probj,probi);
+ });
- if (nmismatches < best_nmismatches ||
- (nmismatches == best_nmismatches && probi + probj > best_prob)) {
- /* Success */
- best_nmismatches = nmismatches;
- best_prob = probi + probj;
-
- best_donor_splicecoord = segmentj_left + splice_pos;
- best_acceptor_splicecoord = segmenti_left + splice_pos;
- best_donor_knowni = antidonorj_knowni[j];
- best_acceptor_knowni = antiacceptori_knowni[i];
- best_donor_prob = probj;
- best_acceptor_prob = probi;
- best_splice_pos = splice_pos;
- best_segmentj_nmismatches = segmentj_nmismatches;
- best_segmenti_nmismatches = segmenti_nmismatches;
- orig_plusp = false;
+ if (nmismatches < best_nmismatches ||
+ (nmismatches == best_nmismatches && probi + probj > best_prob)) {
+ /* Success */
+ best_nmismatches = nmismatches;
+ best_prob = probi + probj;
+
+ best_donor_splicecoord = segmentj_left + splice_pos;
+ best_acceptor_splicecoord = segmenti_left + splice_pos;
+ best_donor_knowni = antidonorj_knowni[j];
+ best_acceptor_knowni = antiacceptori_knowni[i];
+ best_donor_prob = probj;
+ best_acceptor_prob = probi;
+ best_splice_pos = splice_pos;
+ best_segmentj_nmismatches = segmentj_nmismatches;
+ best_segmenti_nmismatches = segmenti_nmismatches;
+ orig_plusp = false; /* for antisense, require plusp to be true */
+ }
}
+ i++;
+ j++;
}
- i++;
- j++;
}
}
@@ -371,6 +813,7 @@ Splice_solve_single (int *found_score, int *nhits, List_T hits, List_T *lowprob,
/* Originally from plus strand. No complement. */
sensep = (plusp == true) ? true : false;
sensedir = (plusp == true) ? SENSE_FORWARD : SENSE_ANTI;
+ assert(sensedir == SENSE_ANTI);
donor = Substring_new_donor(best_donor_splicecoord,best_donor_knowni,
best_splice_pos,best_segmenti_nmismatches,
@@ -388,7 +831,7 @@ Splice_solve_single (int *found_score, int *nhits, List_T hits, List_T *lowprob,
if (donor != NULL) Substring_free(&donor);
if (acceptor != NULL) Substring_free(&acceptor);
} else {
- debug1(printf("Splice_solve_single success\n"));
+ debug1(printf("Splice_solve_single_antisense success\n"));
*segmenti_usedp = *segmentj_usedp = true;
donor_support = best_splice_pos;
@@ -400,10 +843,10 @@ Splice_solve_single (int *found_score, int *nhits, List_T hits, List_T *lowprob,
*nhits += 1;
return List_push(hits,(void *) Stage3end_new_splice(&(*found_score),best_segmenti_nmismatches,best_segmentj_nmismatches,
donor,acceptor,/*distance*/segmentj_left - segmenti_left,
- /*shortdistancep*/true,splicing_penalty,querylength,
- /*amb_nmatches*/0,/*ambcoords_left*/NULL,/*ambcoords_right*/NULL,
- /*amb_knowni_left*/NULL,/*amb_knowni_right*/NULL,
- /*amb_nmismatches_left*/NULL,/*amb_nmismatches_right*/NULL,
+ /*shortdistancep*/true,splicing_penalty,querylength,/*amb_nmatches*/0,
+ /*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
+ /*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
+ /*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
/*copy_donor_p*/false,/*copy_acceptor_p*/false,first_read_p,sensedir,
sarrayp));
} else if (subs_or_indels_p == true) {
@@ -418,10 +861,10 @@ Splice_solve_single (int *found_score, int *nhits, List_T hits, List_T *lowprob,
*lowprob = List_push(*lowprob,
(void *) Stage3end_new_splice(&(*found_score),best_segmenti_nmismatches,best_segmentj_nmismatches,
donor,acceptor,/*distance*/segmentj_left - segmenti_left,
- /*shortdistancep*/true,splicing_penalty,querylength,
- /*amb_nmatches*/0,/*ambcoords_left*/NULL,/*ambcoords_right*/NULL,
- /*amb_knowni_left*/NULL,/*amb_knowni_right*/NULL,
- /*amb_nmismatches_left*/NULL,/*amb_nmismatches_right*/NULL,
+ /*shortdistancep*/true,splicing_penalty,querylength,/*amb_nmatches*/0,
+ /*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
+ /*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
+ /*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
/*copy_donor_p*/false,/*copy_acceptor_p*/false,first_read_p,sensedir,
sarrayp));
return hits;
@@ -435,6 +878,7 @@ Splice_solve_single (int *found_score, int *nhits, List_T hits, List_T *lowprob,
/* Originally from minus strand. Complement. */
sensep = (plusp == true) ? false : true;
sensedir = (plusp == true) ? SENSE_ANTI : SENSE_FORWARD;
+ assert(sensedir == SENSE_ANTI);
donor = Substring_new_donor(best_donor_splicecoord,best_donor_knowni,
best_splice_pos,best_segmentj_nmismatches,
@@ -452,7 +896,7 @@ Splice_solve_single (int *found_score, int *nhits, List_T hits, List_T *lowprob,
if (donor != NULL) Substring_free(&donor);
if (acceptor != NULL) Substring_free(&acceptor);
} else {
- debug1(printf("Splice_solve_single success\n"));
+ debug1(printf("Splice_solve_single_antisense success\n"));
*segmenti_usedp = *segmentj_usedp = true;
acceptor_support = best_splice_pos;
@@ -463,10 +907,10 @@ Splice_solve_single (int *found_score, int *nhits, List_T hits, List_T *lowprob,
*nhits += 1;
return List_push(hits,(void *) Stage3end_new_splice(&(*found_score),best_segmentj_nmismatches,best_segmenti_nmismatches,
donor,acceptor,/*distance*/segmentj_left - segmenti_left,
- /*shortdistancep*/true,splicing_penalty,querylength,
- /*amb_nmatches*/0,/*ambcoords_left*/NULL,/*ambcoords_right*/NULL,
- /*amb_knowni_left*/NULL,/*amb_knowni_right*/NULL,
- /*amb_nmismatches_left*/NULL,/*amb_nmismatches_right*/NULL,
+ /*shortdistancep*/true,splicing_penalty,querylength,/*amb_nmatches*/0,
+ /*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
+ /*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
+ /*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
/*copy_donor_p*/false,/*copy_acceptor_p*/false,first_read_p,sensedir,
sarrayp));
} else if (subs_or_indels_p == true) {
@@ -481,10 +925,10 @@ Splice_solve_single (int *found_score, int *nhits, List_T hits, List_T *lowprob,
*lowprob = List_push(*lowprob,
(void *) Stage3end_new_splice(&(*found_score),best_segmentj_nmismatches,best_segmenti_nmismatches,
donor,acceptor,/*distance*/segmentj_left - segmenti_left,
- /*shortdistancep*/true,splicing_penalty,querylength,
- /*amb_nmatches*/0,/*ambcoords_left*/NULL,/*ambcoords_right*/NULL,
- /*amb_knowni_left*/NULL,/*amb_knowni_right*/NULL,
- /*amb_nmismatches_left*/NULL,/*amb_nmismatches_right*/NULL,
+ /*shortdistancep*/true,splicing_penalty,querylength,/*amb_nmatches*/0,
+ /*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
+ /*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
+ /*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
/*copy_donor_p*/false,/*copy_acceptor_p*/false,first_read_p,sensedir,
sarrayp));
return hits;
@@ -498,7 +942,7 @@ Splice_solve_single (int *found_score, int *nhits, List_T hits, List_T *lowprob,
}
}
- debug1(printf("Splice_solve_single fail\n"));
+ debug1(printf("Splice_solve_single_antisense fail\n"));
return hits;
}
@@ -999,12 +1443,10 @@ Splice_solve_double (int *found_score, int *nhits, List_T hits, List_T *lowprob,
if (sufficient1p && sufficient2p && sufficient3p && sufficient4p) {
*nhits += 1;
hits = List_push(hits,(void *) Stage3end_new_shortexon(&(*found_score),donor,acceptor,shortexon,
- /*acceptor_distance*/segmentm_left - segmenti_left,
- /*donor_distance*/segmentj_left - segmentm_left,
/*amb_nmatches_donor*/0,/*amb_nmatches_acceptor*/0,
- /*ambcoords_left*/NULL,/*ambcoords_right*/NULL,
- /*amb_knowni_left*/NULL,/*amb_knowni_right*/NULL,
- /*amb_nmismatches_left*/NULL,/*amb_nmismatches_right*/NULL,
+ /*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
+ /*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
+ /*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
/*copy_donor_p*/false,/*copy_acceptor_p*/false,/*copy_shortexon_p*/false,
splicing_penalty,querylength,sensedir,sarrayp));
} else if (subs_or_indels_p == true) {
@@ -1019,12 +1461,10 @@ Splice_solve_double (int *found_score, int *nhits, List_T hits, List_T *lowprob,
} else if ((sufficient1p || sufficient2p) && (sufficient3p || sufficient4p)) {
*lowprob = List_push(*lowprob,
(void *) Stage3end_new_shortexon(&(*found_score),donor,acceptor,shortexon,
- /*acceptor_distance*/segmentm_left - segmenti_left,
- /*donor_distance*/segmentj_left - segmentm_left,
/*amb_nmatches_donor*/0,/*amb_nmatches_acceptor*/0,
- /*ambcoords_left*/NULL,/*ambcoords_right*/NULL,
- /*amb_knowni_left*/NULL,/*amb_knowni_right*/NULL,
- /*amb_nmismatches_left*/NULL,/*amb_nmismatches_right*/NULL,
+ /*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
+ /*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
+ /*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
/*copy_donor_p*/false,/*copy_acceptor_p*/false,/*copy_shortexon_p*/false,
splicing_penalty,querylength,sensedir,sarrayp));
} else {
@@ -1076,12 +1516,10 @@ Splice_solve_double (int *found_score, int *nhits, List_T hits, List_T *lowprob,
if (sufficient1p && sufficient2p && sufficient3p && sufficient4p) {
*nhits += 1;
hits = List_push(hits,(void *) Stage3end_new_shortexon(&(*found_score),donor,acceptor,shortexon,
- /*acceptor_distance*/segmentj_left - segmentm_left,
- /*donor_distance*/segmentm_left - segmenti_left,
/*amb_nmatches_donor*/0,/*amb_nmatches_acceptor*/0,
- /*ambcoords_left*/NULL,/*ambcoords_right*/NULL,
- /*amb_knowni_left*/NULL,/*amb_knowni_right*/NULL,
- /*amb_nmismatches_left*/NULL,/*amb_nmismatches_right*/NULL,
+ /*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
+ /*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
+ /*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
/*copy_donor_p*/false,/*copy_acceptor_p*/false,/*copy_shortexon_p*/false,
splicing_penalty,querylength,sensedir,sarrayp));
} else if (subs_or_indels_p == true) {
@@ -1096,12 +1534,10 @@ Splice_solve_double (int *found_score, int *nhits, List_T hits, List_T *lowprob,
} else if ((sufficient1p || sufficient2p) && (sufficient3p || sufficient4p)) {
*lowprob = List_push(*lowprob,
(void *) Stage3end_new_shortexon(&(*found_score),donor,acceptor,shortexon,
- /*acceptor_distance*/segmentj_left - segmentm_left,
- /*donor_distance*/segmentm_left - segmenti_left,
/*amb_nmatches_donor*/0,/*amb_nmatches_acceptor*/0,
- /*ambcoords_left*/NULL,/*ambcoords_right*/NULL,
- /*amb_knowni_left*/NULL,/*amb_knowni_right*/NULL,
- /*amb_nmismatches_left*/NULL,/*amb_nmismatches_right*/NULL,
+ /*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
+ /*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
+ /*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
/*copy_donor_p*/false,/*copy_acceptor_p*/false,/*copy_shortexon_p*/false,
splicing_penalty,querylength,sensedir,sarrayp));
} else {
@@ -1132,11 +1568,11 @@ Splice_group_by_segmenti (int *found_score, List_T localsplicing, List_T *ambigu
int sensedir;
#ifdef LARGE_GENOMES
- Uint8list_T ambcoords, ambcoords_left, ambcoords_right;
+ Uint8list_T ambcoords;
#else
- Uintlist_T ambcoords, ambcoords_left, ambcoords_right;
+ Uintlist_T ambcoords;
#endif
- Intlist_T amb_knowni, amb_nmismatches, amb_knowni_left, amb_knowni_right, amb_nmismatches_left, amb_nmismatches_right;
+ Intlist_T amb_knowni, amb_nmismatches;
array = (Stage3end_T *) List_to_array_n(&n,localsplicing);
qsort(array,n,sizeof(Stage3end_T),Stage3end_chimera_segmenti_cmp);
@@ -1218,6 +1654,7 @@ Splice_group_by_segmenti (int *found_score, List_T localsplicing, List_T *ambigu
ambcoords = NULL;
amb_knowni = (Intlist_T) NULL;
amb_nmismatches = (Intlist_T) NULL;
+
if (Substring_left_genomicseg(donor) == segmenti_left) {
for (i = j; i < k; i++) {
hit = array[i];
@@ -1231,45 +1668,15 @@ Splice_group_by_segmenti (int *found_score, List_T localsplicing, List_T *ambigu
amb_nmismatches = Intlist_push(amb_nmismatches,Substring_nmismatches_whole(acceptor));
}
- if (Substring_plusp(donor) == true) {
- if (sensedir == SENSE_FORWARD) {
- ambcoords_left = NULL;
- amb_knowni_left = amb_nmismatches_left = (Intlist_T) NULL;
- ambcoords_right = ambcoords;
- amb_knowni_right = amb_knowni;
- amb_nmismatches_right = amb_nmismatches;
- } else {
- ambcoords_left = ambcoords;
- amb_knowni_left = amb_knowni;
- amb_nmismatches_left = amb_nmismatches;
- ambcoords_right = NULL;
- amb_knowni_right = amb_nmismatches_right = (Intlist_T) NULL;
- }
- } else {
- if (sensedir == SENSE_FORWARD) {
- ambcoords_left = ambcoords;
- amb_knowni_left = amb_knowni;
- amb_nmismatches_left = amb_nmismatches;
- ambcoords_right = NULL;
- amb_knowni_right = amb_nmismatches_right = (Intlist_T) NULL;
- } else {
- ambcoords_left = NULL;
- amb_knowni_left = amb_nmismatches_left = (Intlist_T) NULL;
- ambcoords_right = ambcoords;
- amb_knowni_right = amb_knowni;
- amb_nmismatches_right = amb_nmismatches;
- }
- }
-
nmismatches_acceptor = best_nmismatches - Substring_nmismatches_whole(donor);
*ambiguous = List_push(*ambiguous,
(void *) Stage3end_new_splice(&(*found_score),
/*nmismatches_donor*/Substring_nmismatches_whole(donor),nmismatches_acceptor,
donor,/*acceptor*/NULL,/*distance*/0U,
- /*shortdistancep*/false,/*penalty*/0,querylength,
- /*amb_nmatches*/querylength - Substring_match_length_orig(donor) - nmismatches_acceptor,
- ambcoords_left,ambcoords_right,amb_knowni_left,amb_knowni_right,
- amb_nmismatches_left,amb_nmismatches_right,
+ /*shortdistancep*/false,/*penalty*/0,querylength,/*amb_nmatches*/Substring_nmatches_posttrim(acceptor),
+ /*ambcoords_donor*/NULL,ambcoords,
+ /*amb_knowni_donor*/NULL,amb_knowni,
+ /*amb_nmismatches_donor*/NULL,amb_nmismatches,
/*copy_donor_p*/true,/*copy_acceptor_p*/false,first_read_p,
sensedir,sarrayp));
#ifdef LARGE_GENOMES
@@ -1293,45 +1700,15 @@ Splice_group_by_segmenti (int *found_score, List_T localsplicing, List_T *ambigu
amb_nmismatches = Intlist_push(amb_nmismatches,Substring_nmismatches_whole(donor));
}
- if (Substring_plusp(acceptor) == true) {
- if (sensedir == SENSE_FORWARD) {
- ambcoords_left = ambcoords;
- amb_knowni_left = amb_knowni;
- amb_nmismatches_left = amb_nmismatches;
- ambcoords_right = NULL;
- amb_knowni_right = amb_nmismatches_right = (Intlist_T) NULL;
- } else {
- ambcoords_left = NULL;
- amb_knowni_left = amb_nmismatches_left = (Intlist_T) NULL;
- ambcoords_right = ambcoords;
- amb_knowni_right = amb_knowni;
- amb_nmismatches_right = amb_nmismatches;
- }
- } else {
- if (sensedir == SENSE_FORWARD) {
- ambcoords_left = NULL;
- amb_knowni_left = amb_nmismatches_left = (Intlist_T) NULL;
- ambcoords_right = ambcoords;
- amb_knowni_right = amb_knowni;
- amb_nmismatches_right = amb_nmismatches;
- } else {
- ambcoords_left = ambcoords;
- amb_knowni_left = amb_knowni;
- amb_nmismatches_left = amb_nmismatches;
- ambcoords_right = NULL;
- amb_knowni_right = amb_nmismatches_right = (Intlist_T) NULL;
- }
- }
-
nmismatches_donor = best_nmismatches - Substring_nmismatches_whole(acceptor);
*ambiguous = List_push(*ambiguous,
(void *) Stage3end_new_splice(&(*found_score),
nmismatches_donor,/*nmismatches_acceptor*/Substring_nmismatches_whole(acceptor),
/*donor*/NULL,acceptor,/*distance*/0U,
- /*shortdistancep*/false,/*penalty*/0,querylength,
- /*amb_nmatches*/querylength - Substring_match_length_orig(acceptor) - nmismatches_donor,
- ambcoords_left,ambcoords_right,amb_knowni_left,amb_knowni_right,
- amb_nmismatches_left,amb_nmismatches_right,
+ /*shortdistancep*/false,/*penalty*/0,querylength,/*amb_nmatches*/Substring_nmatches_posttrim(donor),
+ ambcoords,/*ambcoords_acceptor*/NULL,
+ amb_knowni,/*amb_knowni_acceptor*/NULL,
+ amb_nmismatches,/*amb_nmismatches_acceptor*/NULL,
/*copy_donor_p*/false,/*copy_acceptor_p*/true,first_read_p,
sensedir,sarrayp));
#ifdef LARGE_GENOMES
@@ -1387,11 +1764,11 @@ Splice_group_by_segmentj (int *found_score, List_T localsplicing, List_T *ambigu
int sensedir;
#ifdef LARGE_GENOMES
- Uint8list_T ambcoords, ambcoords_left, ambcoords_right;
+ Uint8list_T ambcoords;
#else
- Uintlist_T ambcoords, ambcoords_left, ambcoords_right;
+ Uintlist_T ambcoords;
#endif
- Intlist_T amb_knowni, amb_nmismatches, amb_knowni_left, amb_knowni_right, amb_nmismatches_left, amb_nmismatches_right;
+ Intlist_T amb_knowni, amb_nmismatches;
array = (Stage3end_T *) List_to_array_n(&n,localsplicing);
qsort(array,n,sizeof(Stage3end_T),Stage3end_chimera_segmentj_cmp);
@@ -1424,6 +1801,7 @@ Splice_group_by_segmentj (int *found_score, List_T localsplicing, List_T *ambigu
Stage3end_distance(hit),Stage3end_nmismatches_whole(hit),
Substring_chimera_prob(Stage3end_substring_donor(hit)),
Substring_chimera_prob(Stage3end_substring_acceptor(hit))));
+
if ((nmismatches = Stage3end_nmismatches_whole(hit)) < best_nmismatches) {
best_nmismatches = nmismatches;
best_prob = Stage3end_chimera_prob(hit);
@@ -1472,6 +1850,7 @@ Splice_group_by_segmentj (int *found_score, List_T localsplicing, List_T *ambigu
ambcoords = NULL;
amb_knowni = (Intlist_T) NULL;
amb_nmismatches = (Intlist_T) NULL;
+
if (Substring_left_genomicseg(donor) == segmentj_left) {
for (i = j; i < k; i++) {
hit = array[i];
@@ -1485,45 +1864,15 @@ Splice_group_by_segmentj (int *found_score, List_T localsplicing, List_T *ambigu
amb_nmismatches = Intlist_push(amb_nmismatches,Substring_nmismatches_whole(acceptor));
}
- if (Substring_plusp(donor) == true) {
- if (sensedir == SENSE_FORWARD) {
- ambcoords_left = NULL;
- amb_knowni_left = amb_nmismatches_left = (Intlist_T) NULL;
- ambcoords_right = ambcoords;
- amb_knowni_right = amb_knowni;
- amb_nmismatches_right = amb_nmismatches;
- } else {
- ambcoords_left = ambcoords;
- amb_knowni_left = amb_knowni;
- amb_nmismatches_left = amb_nmismatches;
- ambcoords_right = NULL;
- amb_knowni_right = amb_nmismatches_right = (Intlist_T) NULL;
- }
- } else {
- if (sensedir == SENSE_FORWARD) {
- ambcoords_left = ambcoords;
- amb_knowni_left = amb_knowni;
- amb_nmismatches_left = amb_nmismatches;
- ambcoords_right = NULL;
- amb_knowni_right = amb_nmismatches_right = (Intlist_T) NULL;
- } else {
- ambcoords_left = NULL;
- amb_knowni_left = amb_nmismatches_left = (Intlist_T) NULL;
- ambcoords_right = ambcoords;
- amb_knowni_right = amb_knowni;
- amb_nmismatches_right = amb_nmismatches;
- }
- }
-
nmismatches_acceptor = best_nmismatches - Substring_nmismatches_whole(donor);
*ambiguous = List_push(*ambiguous,
(void *) Stage3end_new_splice(&(*found_score),
/*nmismatches_donor*/Substring_nmismatches_whole(donor),nmismatches_acceptor,
donor,/*acceptor*/NULL,/*distance*/0U,
- /*shortdistancep*/false,/*penalty*/0,querylength,
- /*amb_nmatches*/querylength - Substring_match_length_orig(donor) - nmismatches_acceptor,
- ambcoords_left,ambcoords_right,amb_knowni_left,amb_knowni_right,
- amb_nmismatches_left,amb_nmismatches_right,
+ /*shortdistancep*/false,/*penalty*/0,querylength,/*amb_nmatches*/Substring_nmatches_posttrim(acceptor),
+ /*ambcoords_donor*/NULL,ambcoords,
+ /*amb_knowni_donor*/NULL,amb_knowni,
+ /*amb_nmismatches_donor*/NULL,amb_nmismatches,
/*copy_donor_p*/true,/*copy_acceptor_p*/false,first_read_p,
sensedir,sarrayp));
#ifdef LARGE_GENOMES
@@ -1547,48 +1896,17 @@ Splice_group_by_segmentj (int *found_score, List_T localsplicing, List_T *ambigu
amb_nmismatches = Intlist_push(amb_nmismatches,Substring_nmismatches_whole(donor));
}
- if (Substring_plusp(acceptor) == true) {
- if (sensedir == SENSE_FORWARD) {
- ambcoords_left = ambcoords;
- amb_knowni_left = amb_knowni;
- amb_nmismatches_left = amb_nmismatches;
- ambcoords_right = NULL;
- amb_knowni_right = amb_nmismatches_right = (Intlist_T) NULL;
- } else {
- ambcoords_left = NULL;
- amb_knowni_left = amb_nmismatches_left = (Intlist_T) NULL;
- ambcoords_right = ambcoords;
- amb_knowni_right = amb_knowni;
- amb_nmismatches_right = amb_nmismatches;
- }
- } else {
- if (sensedir == SENSE_FORWARD) {
- ambcoords_left = NULL;
- amb_knowni_left = amb_nmismatches_left = (Intlist_T) NULL;
- ambcoords_right = ambcoords;
- amb_knowni_right = amb_knowni;
- amb_nmismatches_right = amb_nmismatches;
- } else {
- ambcoords_left = ambcoords;
- amb_knowni_left = amb_knowni;
- amb_nmismatches_left = amb_nmismatches;
- ambcoords_right = NULL;
- amb_knowni_right = amb_nmismatches_right = (Intlist_T) NULL;
- }
- }
-
nmismatches_donor = best_nmismatches - Substring_nmismatches_whole(acceptor);
*ambiguous = List_push(*ambiguous,
(void *) Stage3end_new_splice(&(*found_score),
nmismatches_donor,/*nmismatches_acceptor*/Substring_nmismatches_whole(acceptor),
/*donor*/NULL,acceptor,/*distance*/0U,
- /*shortdistancep*/false,/*penalty*/0,querylength,
- /*amb_nmatches*/querylength - Substring_match_length_orig(acceptor) - nmismatches_donor,
- ambcoords_left,ambcoords_right,amb_knowni_left,amb_knowni_right,
- amb_nmismatches_left,amb_nmismatches_right,
+ /*shortdistancep*/false,/*penalty*/0,querylength,/*amb_nmatches*/Substring_nmatches_posttrim(donor),
+ ambcoords,/*ambcoords_acceptor*/NULL,
+ amb_knowni,/*amb_knowni_acceptor*/NULL,
+ amb_nmismatches,/*amb_nmismatches_acceptor*/NULL,
/*copy_donor_p*/false,/*copy_acceptor_p*/true,first_read_p,
sensedir,sarrayp));
-
#ifdef LARGE_GENOMES
Uint8list_free(&ambcoords);
#else
diff --git a/src/splice.h b/src/splice.h
index 71ea07f..1baebb9 100644
--- a/src/splice.h
+++ b/src/splice.h
@@ -1,4 +1,4 @@
-/* $Id: splice.h 136085 2014-05-13 23:00:04Z twu $ */
+/* $Id: splice.h 140368 2014-07-02 00:56:33Z twu $ */
#ifndef SPLICE_INCLUDED
#define SPLICE_INCLUDED
#include "bool.h"
@@ -11,25 +11,46 @@ extern void
Splice_setup (int min_shortend_in);
extern List_T
-Splice_solve_single (int *found_score, int *nhits, List_T hits, List_T *lowprob,
+Splice_solve_single_sense (int *found_score, int *nhits, List_T hits, List_T *lowprob,
- bool *segmenti_usedp, bool *segmentj_usedp,
- Univcoord_T segmenti_left, Univcoord_T segmentj_left,
- Chrnum_T segmenti_chrnum, Univcoord_T segmenti_chroffset,
- Univcoord_T segmenti_chrhigh, Chrpos_T segmenti_chrlength,
- Chrnum_T segmentj_chrnum, Univcoord_T segmentj_chroffset,
- Univcoord_T segmentj_chrhigh, Chrpos_T segmentj_chrlength,
+ bool *segmenti_usedp, bool *segmentj_usedp,
+ Univcoord_T segmenti_left, Univcoord_T segmentj_left,
+ Chrnum_T segmenti_chrnum, Univcoord_T segmenti_chroffset,
+ Univcoord_T segmenti_chrhigh, Chrpos_T segmenti_chrlength,
+ Chrnum_T segmentj_chrnum, Univcoord_T segmentj_chroffset,
+ Univcoord_T segmentj_chrhigh, Chrpos_T segmentj_chrlength,
- int querylength, Compress_T query_compress,
- int *segmenti_donor_knownpos, int *segmentj_acceptor_knownpos,
- int *segmentj_antidonor_knownpos, int *segmenti_antiacceptor_knownpos,
- int *segmenti_donor_knowni, int *segmentj_acceptor_knowni,
- int *segmentj_antidonor_knowni, int *segmenti_antiacceptor_knowni,
- int segmenti_donor_nknown, int segmentj_acceptor_nknown,
- int segmentj_antidonor_nknown, int segmenti_antiacceptor_nknown,
- int splicing_penalty, int max_mismatches_allowed,
- bool plusp, int genestrand, bool first_read_p,
- bool subs_or_indels_p, bool sarrayp);
+ int querylength, Compress_T query_compress,
+ int *segmenti_donor_knownpos, int *segmentj_acceptor_knownpos,
+ int *segmentj_antidonor_knownpos, int *segmenti_antiacceptor_knownpos,
+ int *segmenti_donor_knowni, int *segmentj_acceptor_knowni,
+ int *segmentj_antidonor_knowni, int *segmenti_antiacceptor_knowni,
+ int segmenti_donor_nknown, int segmentj_acceptor_nknown,
+ int segmentj_antidonor_nknown, int segmenti_antiacceptor_nknown,
+ int splicing_penalty, int max_mismatches_allowed,
+ bool plusp, int genestrand, bool first_read_p,
+ bool subs_or_indels_p, bool sarrayp);
+
+extern List_T
+Splice_solve_single_antisense (int *found_score, int *nhits, List_T hits, List_T *lowprob,
+
+ bool *segmenti_usedp, bool *segmentj_usedp,
+ Univcoord_T segmenti_left, Univcoord_T segmentj_left,
+ Chrnum_T segmenti_chrnum, Univcoord_T segmenti_chroffset,
+ Univcoord_T segmenti_chrhigh, Chrpos_T segmenti_chrlength,
+ Chrnum_T segmentj_chrnum, Univcoord_T segmentj_chroffset,
+ Univcoord_T segmentj_chrhigh, Chrpos_T segmentj_chrlength,
+
+ int querylength, Compress_T query_compress,
+ int *segmenti_donor_knownpos, int *segmentj_acceptor_knownpos,
+ int *segmentj_antidonor_knownpos, int *segmenti_antiacceptor_knownpos,
+ int *segmenti_donor_knowni, int *segmentj_acceptor_knowni,
+ int *segmentj_antidonor_knowni, int *segmenti_antiacceptor_knowni,
+ int segmenti_donor_nknown, int segmentj_acceptor_nknown,
+ int segmentj_antidonor_nknown, int segmenti_antiacceptor_nknown,
+ int splicing_penalty, int max_mismatches_allowed,
+ bool plusp, int genestrand, bool first_read_p,
+ bool subs_or_indels_p, bool sarrayp);
extern List_T
Splice_solve_double (int *found_score, int *nhits, List_T hits, List_T *lowprob,
diff --git a/src/splicetrie.c b/src/splicetrie.c
index 4b7b5af..67cfcec 100644
--- a/src/splicetrie.c
+++ b/src/splicetrie.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: splicetrie.c 133760 2014-04-20 05:16:56Z twu $";
+static char rcsid[] = "$Id: splicetrie.c 140652 2014-07-04 01:16:50Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -334,108 +334,13 @@ solve_end5_aux (Univcoord_T **coordsptr, Univcoord_T *coords,
spliceoffset2_anchor = revoffset2;
spliceoffset2_far = spliceoffset2_anchor + anchor_splicesite - splicecoord;
}
- pairs = Dynprog_end5_splicejunction(&(*dynprogindex),&score,&miss_score,&nmatches0,&nmismatches0,
- &nopens0,&nindels0,dynprog,revsequence1,revsequenceuc1,
- /*revsequence2*/&(splicejunction[length2-1]),/*revsequenceuc2*/&(splicejunction[length2-1]),
- /*revsequencealt2*/&(splicejunction_alt[length2-1]),
- length1,length2,revoffset1,spliceoffset2_anchor,spliceoffset2_far,
- chroffset,chrhigh,cdna_direction,watsonp,jump_late_p,pairpool,
- extraband_end,defect_rate,contlength);
-
- /* miss_score = perfect_score - score; */
- assert(miss_score <= 0);
- debug7(printf("score %d - perfect score %d = miss %d expected vs %d returned. ",
- score,perfect_score,score-perfect_score,miss_score));
- debug7(printf(" comparing against threshold_miss %d + obsmax_penalty %d\n",*threshold_miss_score,obsmax_penalty));
- if (score > 0 && miss_score > *threshold_miss_score + obsmax_penalty) {
- debug7(printf("miss %d > threshold %d + %d",miss_score,*threshold_miss_score,obsmax_penalty));
-#if 0
- /* Just use results from Dynprog_end5_splicejunction */
- pairs = Dynprog_add_known_splice_5(&length_distal,pairs,anchor_splicesite,splicecoord,chroffset,
- watsonp,pairpool);
-#else
- length_distal = length1 - contlength;
-#endif
- best_pairs = pairs;
- *finalscore = score;
- *nmatches = nmatches0;
- *nmismatches = nmismatches0;
- *nopens = nopens0;
- *nindels = nindels0;
- *knownsplicep = true;
- *ambig_end_length = length_distal;
- *threshold_miss_score = miss_score - obsmax_penalty;
- shortest_intron_length = (splicecoord > anchor_splicesite) ? splicecoord - anchor_splicesite : anchor_splicesite - splicecoord;
- *coordsptr = coords;
- *(*coordsptr)++ = splicecoord;
-
- } else if (miss_score == *threshold_miss_score + obsmax_penalty
-#if 0
- && Genomicposlist_find(*coords,splicecoord) == false
-#endif
- ) {
- if (amb_closest_p == false) {
- debug7(printf("miss %d == threshold %d + %d, so ambiguous",miss_score,*threshold_miss_score,obsmax_penalty));
- /* best_pairs = (List_T) NULL; */
- *(*coordsptr)++ = splicecoord;
- } else {
- intron_length = (splicecoord > anchor_splicesite) ? splicecoord - anchor_splicesite : anchor_splicesite - splicecoord;
- if (intron_length > shortest_intron_length) {
- debug7(printf("miss %d == threshold %d + %d, but intron_length %d > shortest %d, so ignore",
- miss_score,*threshold_miss_score,obsmax_penalty,intron_length,shortest_intron_length));
- } else {
- debug7(printf("miss %d == threshold %d + %d, but intron_length %d < shortest %d, so new best",
- miss_score,*threshold_miss_score,obsmax_penalty,intron_length,shortest_intron_length));
-#if 0
- /* Just use results from Dynprog_end5_splicejunction */
- pairs = Dynprog_add_known_splice_5(&length_distal,pairs,anchor_splicesite,splicecoord,chroffset,
- watsonp,pairpool);
-#else
- length_distal = length1 - contlength;
-#endif
- best_pairs = pairs;
- *finalscore = score;
- *nmatches = nmatches0;
- *nmismatches = nmismatches0;
- *nopens = nopens0;
- *nindels = nindels0;
- *knownsplicep = true;
- *ambig_end_length = length_distal;
- *threshold_miss_score = miss_score - obsmax_penalty;
- shortest_intron_length = intron_length;
- *coordsptr = coords;
- *(*coordsptr)++ = splicecoord;
- }
- }
- }
- }
- debug7(printf("\n"));
-
- } else if (multiple_leaf_p(leaf)) {
- nleaves = (int) (-leaf);
- for (i = 1; i <= nleaves; i++) {
- leaf = triestart[i];
- splicecoord = splicesites[leaf];
- debug7(printf("Checking leaf %d at %u: ",(int) leaf,splicecoord));
- if (splicecoord >= knownsplice_limit_low && splicecoord <= knownsplice_limit_high &&
- Dynprog_make_splicejunction_5(splicejunction,splicejunction_alt,splicecoord,splicelength,contlength,far_splicetype,watsonp) == true) {
- debug7(printf("intron length %d, ",splicecoord - anchor_splicesite));
- debug7(printf("length1 = %d, length2 = %d, chroffset = %u, splicecoord = %u\n",
- length1,length2,chroffset,splicecoord));
- if (watsonp) {
- spliceoffset2_anchor = revoffset2;
- spliceoffset2_far = spliceoffset2_anchor - anchor_splicesite + splicecoord;
- } else {
- spliceoffset2_anchor = revoffset2;
- spliceoffset2_far = spliceoffset2_anchor + anchor_splicesite - splicecoord;
- }
- pairs = Dynprog_end5_splicejunction(&(*dynprogindex),&score,&miss_score,&nmatches0,&nmismatches0,
- &nopens0,&nindels0,dynprog,revsequence1,revsequenceuc1,
- /*revsequence2*/&(splicejunction[length2-1]),/*revsequenceuc2*/&(splicejunction[length2-1]),
- /*revsequencealt2*/&(splicejunction_alt[length2-1]),
- length1,length2,revoffset1,spliceoffset2_anchor,spliceoffset2_far,
- chroffset,chrhigh,cdna_direction,watsonp,jump_late_p,pairpool,
- extraband_end,defect_rate,contlength);
+ if ((pairs = Dynprog_end5_splicejunction(&(*dynprogindex),&score,&miss_score,&nmatches0,&nmismatches0,
+ &nopens0,&nindels0,dynprog,revsequence1,revsequenceuc1,
+ /*revsequence2*/&(splicejunction[length2-1]),/*revsequenceuc2*/&(splicejunction[length2-1]),
+ /*revsequencealt2*/&(splicejunction_alt[length2-1]),
+ length1,length2,revoffset1,spliceoffset2_anchor,spliceoffset2_far,
+ chroffset,chrhigh,cdna_direction,watsonp,jump_late_p,pairpool,
+ extraband_end,defect_rate,contlength)) != NULL) {
/* miss_score = perfect_score - score; */
assert(miss_score <= 0);
@@ -446,7 +351,8 @@ solve_end5_aux (Univcoord_T **coordsptr, Univcoord_T *coords,
debug7(printf("miss %d > threshold %d + %d",miss_score,*threshold_miss_score,obsmax_penalty));
#if 0
/* Just use results from Dynprog_end5_splicejunction */
- pairs = Dynprog_add_known_splice_5(&length_distal,pairs,anchor_splicesite,splicecoord,chroffset,watsonp,pairpool);
+ pairs = Dynprog_add_known_splice_5(&length_distal,pairs,anchor_splicesite,splicecoord,chroffset,
+ watsonp,pairpool);
#else
length_distal = length1 - contlength;
#endif
@@ -462,9 +368,9 @@ solve_end5_aux (Univcoord_T **coordsptr, Univcoord_T *coords,
shortest_intron_length = (splicecoord > anchor_splicesite) ? splicecoord - anchor_splicesite : anchor_splicesite - splicecoord;
*coordsptr = coords;
*(*coordsptr)++ = splicecoord;
+
} else if (miss_score == *threshold_miss_score + obsmax_penalty
#if 0
- /* Filter for duplicates later */
&& Genomicposlist_find(*coords,splicecoord) == false
#endif
) {
@@ -482,7 +388,8 @@ solve_end5_aux (Univcoord_T **coordsptr, Univcoord_T *coords,
miss_score,*threshold_miss_score,obsmax_penalty,intron_length,shortest_intron_length));
#if 0
/* Just use results from Dynprog_end5_splicejunction */
- pairs = Dynprog_add_known_splice_5(&length_distal,pairs,anchor_splicesite,splicecoord,chroffset,watsonp,pairpool);
+ pairs = Dynprog_add_known_splice_5(&length_distal,pairs,anchor_splicesite,splicecoord,chroffset,
+ watsonp,pairpool);
#else
length_distal = length1 - contlength;
#endif
@@ -502,6 +409,102 @@ solve_end5_aux (Univcoord_T **coordsptr, Univcoord_T *coords,
}
}
}
+ }
+ debug7(printf("\n"));
+
+ } else if (multiple_leaf_p(leaf)) {
+ nleaves = (int) (-leaf);
+ for (i = 1; i <= nleaves; i++) {
+ leaf = triestart[i];
+ splicecoord = splicesites[leaf];
+ debug7(printf("Checking leaf %d at %u: ",(int) leaf,splicecoord));
+ if (splicecoord >= knownsplice_limit_low && splicecoord <= knownsplice_limit_high &&
+ Dynprog_make_splicejunction_5(splicejunction,splicejunction_alt,splicecoord,splicelength,contlength,far_splicetype,watsonp) == true) {
+ debug7(printf("intron length %d, ",splicecoord - anchor_splicesite));
+ debug7(printf("length1 = %d, length2 = %d, chroffset = %u, splicecoord = %u\n",
+ length1,length2,chroffset,splicecoord));
+ if (watsonp) {
+ spliceoffset2_anchor = revoffset2;
+ spliceoffset2_far = spliceoffset2_anchor - anchor_splicesite + splicecoord;
+ } else {
+ spliceoffset2_anchor = revoffset2;
+ spliceoffset2_far = spliceoffset2_anchor + anchor_splicesite - splicecoord;
+ }
+ if ((pairs = Dynprog_end5_splicejunction(&(*dynprogindex),&score,&miss_score,&nmatches0,&nmismatches0,
+ &nopens0,&nindels0,dynprog,revsequence1,revsequenceuc1,
+ /*revsequence2*/&(splicejunction[length2-1]),/*revsequenceuc2*/&(splicejunction[length2-1]),
+ /*revsequencealt2*/&(splicejunction_alt[length2-1]),
+ length1,length2,revoffset1,spliceoffset2_anchor,spliceoffset2_far,
+ chroffset,chrhigh,cdna_direction,watsonp,jump_late_p,pairpool,
+ extraband_end,defect_rate,contlength)) != NULL) {
+
+ /* miss_score = perfect_score - score; */
+ assert(miss_score <= 0);
+ debug7(printf("score %d - perfect score %d = miss %d expected vs %d returned. ",
+ score,perfect_score,score-perfect_score,miss_score));
+ debug7(printf(" comparing against threshold_miss %d + obsmax_penalty %d\n",*threshold_miss_score,obsmax_penalty));
+ if (score > 0 && miss_score > *threshold_miss_score + obsmax_penalty) {
+ debug7(printf("miss %d > threshold %d + %d",miss_score,*threshold_miss_score,obsmax_penalty));
+#if 0
+ /* Just use results from Dynprog_end5_splicejunction */
+ pairs = Dynprog_add_known_splice_5(&length_distal,pairs,anchor_splicesite,splicecoord,chroffset,watsonp,pairpool);
+#else
+ length_distal = length1 - contlength;
+#endif
+ best_pairs = pairs;
+ *finalscore = score;
+ *nmatches = nmatches0;
+ *nmismatches = nmismatches0;
+ *nopens = nopens0;
+ *nindels = nindels0;
+ *knownsplicep = true;
+ *ambig_end_length = length_distal;
+ *threshold_miss_score = miss_score - obsmax_penalty;
+ shortest_intron_length = (splicecoord > anchor_splicesite) ? splicecoord - anchor_splicesite : anchor_splicesite - splicecoord;
+ *coordsptr = coords;
+ *(*coordsptr)++ = splicecoord;
+
+ } else if (miss_score == *threshold_miss_score + obsmax_penalty
+#if 0
+ /* Filter for duplicates later */
+ && Genomicposlist_find(*coords,splicecoord) == false
+#endif
+ ) {
+ if (amb_closest_p == false) {
+ debug7(printf("miss %d == threshold %d + %d, so ambiguous",miss_score,*threshold_miss_score,obsmax_penalty));
+ /* best_pairs = (List_T) NULL; */
+ *(*coordsptr)++ = splicecoord;
+ } else {
+ intron_length = (splicecoord > anchor_splicesite) ? splicecoord - anchor_splicesite : anchor_splicesite - splicecoord;
+ if (intron_length > shortest_intron_length) {
+ debug7(printf("miss %d == threshold %d + %d, but intron_length %d > shortest %d, so ignore",
+ miss_score,*threshold_miss_score,obsmax_penalty,intron_length,shortest_intron_length));
+ } else {
+ debug7(printf("miss %d == threshold %d + %d, but intron_length %d < shortest %d, so new best",
+ miss_score,*threshold_miss_score,obsmax_penalty,intron_length,shortest_intron_length));
+#if 0
+ /* Just use results from Dynprog_end5_splicejunction */
+ pairs = Dynprog_add_known_splice_5(&length_distal,pairs,anchor_splicesite,splicecoord,chroffset,watsonp,pairpool);
+#else
+ length_distal = length1 - contlength;
+#endif
+ best_pairs = pairs;
+ *finalscore = score;
+ *nmatches = nmatches0;
+ *nmismatches = nmismatches0;
+ *nopens = nopens0;
+ *nindels = nindels0;
+ *knownsplicep = true;
+ *ambig_end_length = length_distal;
+ *threshold_miss_score = miss_score - obsmax_penalty;
+ shortest_intron_length = intron_length;
+ *coordsptr = coords;
+ *(*coordsptr)++ = splicecoord;
+ }
+ }
+ }
+ }
+ }
debug7(printf("\n"));
}
@@ -609,106 +612,13 @@ solve_end3_aux (Univcoord_T **coordsptr, Univcoord_T *coords,
spliceoffset2_anchor = offset2;
spliceoffset2_far = spliceoffset2_anchor + anchor_splicesite - splicecoord;
}
- pairs = Dynprog_end3_splicejunction(&(*dynprogindex),&score,&miss_score,&nmatches0,&nmismatches0,
- &nopens0,&nindels0,dynprog,sequence1,sequenceuc1,
- /*sequence2*/splicejunction,/*sequenceuc2*/splicejunction,
- /*sequencealt2*/splicejunction_alt,
- length1,length2,offset1,spliceoffset2_anchor,spliceoffset2_far,
- chroffset,chrhigh,cdna_direction,watsonp,jump_late_p,pairpool,
- extraband_end,defect_rate,contlength);
-
- /* miss_score = perfect_score - score; */
- assert(miss_score <= 0);
- debug7(printf("score %d - perfect score %d = miss %d expected vs %d returned. ",
- score,perfect_score,score-perfect_score,miss_score));
- debug7(printf(" comparing against threshold_miss %d + obsmax_penalty %d\n",*threshold_miss_score,obsmax_penalty));
- if (score > 0 && miss_score > *threshold_miss_score + obsmax_penalty) {
- debug7(printf("miss %d > threshold %d + %d",miss_score,*threshold_miss_score,obsmax_penalty));
-#if 0
- /* Just results of Dynprog_end3_splicejunction */
- pairs = Dynprog_add_known_splice_3(&length_distal,pairs,anchor_splicesite,splicecoord,chroffset,watsonp,pairpool);
-#else
- length_distal = length1 - contlength;
-#endif
- best_pairs = pairs;
- *finalscore = score;
- *nmatches = nmatches0;
- *nmismatches = nmismatches0;
- *nopens = nopens0;
- *nindels = nindels0;
- *knownsplicep = true;
- *ambig_end_length = length_distal;
- *threshold_miss_score = miss_score - obsmax_penalty;
- shortest_intron_length = (splicecoord > anchor_splicesite) ? splicecoord - anchor_splicesite : anchor_splicesite - splicecoord;
- *coordsptr = coords;
- *(*coordsptr)++ = splicecoord;
-
- } else if (miss_score == *threshold_miss_score + obsmax_penalty
-#if 0
- && Genomicposlist_find(*coords,splicecoord) == false
-#endif
- ) {
- if (amb_closest_p == false) {
- debug7(printf("miss %d == threshold %d + %d, so ambiguous",miss_score,*threshold_miss_score,obsmax_penalty));
- /* best_pairs = (List_T) NULL; */
- *(*coordsptr)++ = splicecoord;
- } else {
- intron_length = (splicecoord > anchor_splicesite) ? splicecoord - anchor_splicesite : anchor_splicesite - splicecoord;
- if (intron_length > shortest_intron_length) {
- debug7(printf("miss %d == threshold %d + %d, but intron_length %d > shortest %d, so ignore",
- miss_score,*threshold_miss_score,obsmax_penalty,intron_length,shortest_intron_length));
- } else {
- debug7(printf("miss %d == threshold %d + %d, but intron_length %d < shortest %d, so new best",
- miss_score,*threshold_miss_score,obsmax_penalty,intron_length,shortest_intron_length));
-#if 0
- /* Just use results of Dynprog_end3_splicejunction */
- pairs = Dynprog_add_known_splice_3(&length_distal,pairs,anchor_splicesite,splicecoord,chroffset,watsonp,pairpool);
-#else
- length_distal = length1 - contlength;
-#endif
- best_pairs = pairs;
- *finalscore = score;
- *nmatches = nmatches0;
- *nmismatches = nmismatches0;
- *nopens = nopens0;
- *nindels = nindels0;
- *knownsplicep = true;
- *ambig_end_length = length_distal;
- *threshold_miss_score = miss_score - obsmax_penalty;
- shortest_intron_length = intron_length;
- *coordsptr = coords;
- *(*coordsptr)++ = splicecoord;
- }
- }
- }
- }
- debug7(printf("\n"));
-
- } else if (multiple_leaf_p(leaf)) {
- nleaves = (int) (-leaf);
- for (i = 1; i <= nleaves; i++) {
- leaf = triestart[i];
- splicecoord = splicesites[leaf];
- debug7(printf("Checking leaf %d at %u: ",(int) leaf,splicecoord));
- if (splicecoord >= knownsplice_limit_low && splicecoord <= knownsplice_limit_high &&
- Dynprog_make_splicejunction_3(splicejunction,splicejunction_alt,splicecoord,splicelength,contlength,far_splicetype,watsonp) == true) {
- debug7(printf("intron length %d, ",splicecoord - anchor_splicesite));
- debug7(printf("length1 = %d, length2 = %d, chroffset = %u, splicecoord = %u\n",
- length1,length2,chroffset,splicecoord));
- if (watsonp) {
- spliceoffset2_anchor = offset2;
- spliceoffset2_far = spliceoffset2_anchor - anchor_splicesite + splicecoord;
- } else {
- spliceoffset2_anchor = offset2;
- spliceoffset2_far = spliceoffset2_anchor + anchor_splicesite - splicecoord;
- }
- pairs = Dynprog_end3_splicejunction(&(*dynprogindex),&score,&miss_score,&nmatches0,&nmismatches0,
- &nopens0,&nindels0,dynprog,sequence1,sequenceuc1,
- /*sequence2*/splicejunction,/*sequenceuc2*/splicejunction,
- /*sequencealt2*/splicejunction_alt,
- length1,length2,offset1,spliceoffset2_anchor,spliceoffset2_far,
- chroffset,chrhigh,cdna_direction,watsonp,jump_late_p,pairpool,
- extraband_end,defect_rate,contlength);
+ if ((pairs = Dynprog_end3_splicejunction(&(*dynprogindex),&score,&miss_score,&nmatches0,&nmismatches0,
+ &nopens0,&nindels0,dynprog,sequence1,sequenceuc1,
+ /*sequence2*/splicejunction,/*sequenceuc2*/splicejunction,
+ /*sequencealt2*/splicejunction_alt,
+ length1,length2,offset1,spliceoffset2_anchor,spliceoffset2_far,
+ chroffset,chrhigh,cdna_direction,watsonp,jump_late_p,pairpool,
+ extraband_end,defect_rate,contlength)) != NULL) {
/* miss_score = perfect_score - score; */
assert(miss_score <= 0);
@@ -718,7 +628,7 @@ solve_end3_aux (Univcoord_T **coordsptr, Univcoord_T *coords,
if (score > 0 && miss_score > *threshold_miss_score + obsmax_penalty) {
debug7(printf("miss %d > threshold %d + %d",miss_score,*threshold_miss_score,obsmax_penalty));
#if 0
- /* Just use results of Dynprog_end3_splicejunction */
+ /* Just results of Dynprog_end3_splicejunction */
pairs = Dynprog_add_known_splice_3(&length_distal,pairs,anchor_splicesite,splicecoord,chroffset,watsonp,pairpool);
#else
length_distal = length1 - contlength;
@@ -735,6 +645,7 @@ solve_end3_aux (Univcoord_T **coordsptr, Univcoord_T *coords,
shortest_intron_length = (splicecoord > anchor_splicesite) ? splicecoord - anchor_splicesite : anchor_splicesite - splicecoord;
*coordsptr = coords;
*(*coordsptr)++ = splicecoord;
+
} else if (miss_score == *threshold_miss_score + obsmax_penalty
#if 0
&& Genomicposlist_find(*coords,splicecoord) == false
@@ -774,6 +685,100 @@ solve_end3_aux (Univcoord_T **coordsptr, Univcoord_T *coords,
}
}
}
+ }
+ debug7(printf("\n"));
+
+ } else if (multiple_leaf_p(leaf)) {
+ nleaves = (int) (-leaf);
+ for (i = 1; i <= nleaves; i++) {
+ leaf = triestart[i];
+ splicecoord = splicesites[leaf];
+ debug7(printf("Checking leaf %d at %u: ",(int) leaf,splicecoord));
+ if (splicecoord >= knownsplice_limit_low && splicecoord <= knownsplice_limit_high &&
+ Dynprog_make_splicejunction_3(splicejunction,splicejunction_alt,splicecoord,splicelength,contlength,far_splicetype,watsonp) == true) {
+ debug7(printf("intron length %d, ",splicecoord - anchor_splicesite));
+ debug7(printf("length1 = %d, length2 = %d, chroffset = %u, splicecoord = %u\n",
+ length1,length2,chroffset,splicecoord));
+ if (watsonp) {
+ spliceoffset2_anchor = offset2;
+ spliceoffset2_far = spliceoffset2_anchor - anchor_splicesite + splicecoord;
+ } else {
+ spliceoffset2_anchor = offset2;
+ spliceoffset2_far = spliceoffset2_anchor + anchor_splicesite - splicecoord;
+ }
+ if ((pairs = Dynprog_end3_splicejunction(&(*dynprogindex),&score,&miss_score,&nmatches0,&nmismatches0,
+ &nopens0,&nindels0,dynprog,sequence1,sequenceuc1,
+ /*sequence2*/splicejunction,/*sequenceuc2*/splicejunction,
+ /*sequencealt2*/splicejunction_alt,
+ length1,length2,offset1,spliceoffset2_anchor,spliceoffset2_far,
+ chroffset,chrhigh,cdna_direction,watsonp,jump_late_p,pairpool,
+ extraband_end,defect_rate,contlength)) != NULL) {
+
+ /* miss_score = perfect_score - score; */
+ assert(miss_score <= 0);
+ debug7(printf("score %d - perfect score %d = miss %d expected vs %d returned. ",
+ score,perfect_score,score-perfect_score,miss_score));
+ debug7(printf(" comparing against threshold_miss %d + obsmax_penalty %d\n",*threshold_miss_score,obsmax_penalty));
+ if (score > 0 && miss_score > *threshold_miss_score + obsmax_penalty) {
+ debug7(printf("miss %d > threshold %d + %d",miss_score,*threshold_miss_score,obsmax_penalty));
+#if 0
+ /* Just use results of Dynprog_end3_splicejunction */
+ pairs = Dynprog_add_known_splice_3(&length_distal,pairs,anchor_splicesite,splicecoord,chroffset,watsonp,pairpool);
+#else
+ length_distal = length1 - contlength;
+#endif
+ best_pairs = pairs;
+ *finalscore = score;
+ *nmatches = nmatches0;
+ *nmismatches = nmismatches0;
+ *nopens = nopens0;
+ *nindels = nindels0;
+ *knownsplicep = true;
+ *ambig_end_length = length_distal;
+ *threshold_miss_score = miss_score - obsmax_penalty;
+ shortest_intron_length = (splicecoord > anchor_splicesite) ? splicecoord - anchor_splicesite : anchor_splicesite - splicecoord;
+ *coordsptr = coords;
+ *(*coordsptr)++ = splicecoord;
+ } else if (miss_score == *threshold_miss_score + obsmax_penalty
+#if 0
+ && Genomicposlist_find(*coords,splicecoord) == false
+#endif
+ ) {
+ if (amb_closest_p == false) {
+ debug7(printf("miss %d == threshold %d + %d, so ambiguous",miss_score,*threshold_miss_score,obsmax_penalty));
+ /* best_pairs = (List_T) NULL; */
+ *(*coordsptr)++ = splicecoord;
+ } else {
+ intron_length = (splicecoord > anchor_splicesite) ? splicecoord - anchor_splicesite : anchor_splicesite - splicecoord;
+ if (intron_length > shortest_intron_length) {
+ debug7(printf("miss %d == threshold %d + %d, but intron_length %d > shortest %d, so ignore",
+ miss_score,*threshold_miss_score,obsmax_penalty,intron_length,shortest_intron_length));
+ } else {
+ debug7(printf("miss %d == threshold %d + %d, but intron_length %d < shortest %d, so new best",
+ miss_score,*threshold_miss_score,obsmax_penalty,intron_length,shortest_intron_length));
+#if 0
+ /* Just use results of Dynprog_end3_splicejunction */
+ pairs = Dynprog_add_known_splice_3(&length_distal,pairs,anchor_splicesite,splicecoord,chroffset,watsonp,pairpool);
+#else
+ length_distal = length1 - contlength;
+#endif
+ best_pairs = pairs;
+ *finalscore = score;
+ *nmatches = nmatches0;
+ *nmismatches = nmismatches0;
+ *nopens = nopens0;
+ *nindels = nindels0;
+ *knownsplicep = true;
+ *ambig_end_length = length_distal;
+ *threshold_miss_score = miss_score - obsmax_penalty;
+ shortest_intron_length = intron_length;
+ *coordsptr = coords;
+ *(*coordsptr)++ = splicecoord;
+ }
+ }
+ }
+ }
+ }
debug7(printf("\n"));
}
diff --git a/src/stage1hr.c b/src/stage1hr.c
index e0d1210..9b1d09c 100644
--- a/src/stage1hr.c
+++ b/src/stage1hr.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: stage1hr.c 138745 2014-06-11 19:04:25Z twu $";
+static char rcsid[] = "$Id: stage1hr.c 140368 2014-07-02 00:56:33Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -1016,54 +1016,7 @@ read_oligos (bool *allvalidp, T this, char *queryuc_ptr, int querylength,
}
} else if (mode == CMET_STRANDED || mode == CMET_NONSTRANDED) {
- while ((last_state = Oligo_next(last_state,&querypos,&forward,&revcomp,
- reader,/*cdnaend*/FIVE)) != DONE) {
-#ifdef LARGE_GENOMES
- this->plus_positions_high[querypos] = (unsigned char *) NULL;
- this->plus_positions_low[querypos] = (UINT4 *) NULL;
- this->minus_positions_high[querypos] = (unsigned char *) NULL;
- this->minus_positions_low[querypos] = (UINT4 *) NULL;
-#else
- this->plus_positions[querypos] = (Univcoord_T *) NULL;
- this->minus_positions[querypos] = (Univcoord_T *) NULL;
-#endif
- this->plus_npositions[querypos] = 0;
- this->minus_npositions[querypos] = 0;
-
- if (last_state == VALID) {
-#ifdef USE_VALIDP
- this->validp[querypos] = true;
-#endif
- this->plus_retrievedp[querypos] = false;
- this->minus_retrievedp[querypos] = false;
-
- if (genestrand == +2) {
- if (first_read_p == false) {
- this->forward_oligos[querypos] = Cmet_reduce_ct(forward) & oligobase_mask;
- this->revcomp_oligos[querypos] = Cmet_reduce_ga(revcomp >> leftreadshift) & oligobase_mask;
- } else {
- this->forward_oligos[querypos] = Cmet_reduce_ga(forward) & oligobase_mask;
- this->revcomp_oligos[querypos] = Cmet_reduce_ct(revcomp >> leftreadshift) & oligobase_mask;
- }
- } else {
- if (first_read_p == true) {
- this->forward_oligos[querypos] = Cmet_reduce_ct(forward) & oligobase_mask;
- this->revcomp_oligos[querypos] = Cmet_reduce_ga(revcomp >> leftreadshift) & oligobase_mask;
- } else {
- this->forward_oligos[querypos] = Cmet_reduce_ga(forward) & oligobase_mask;
- this->revcomp_oligos[querypos] = Cmet_reduce_ct(revcomp >> leftreadshift) & oligobase_mask;
- }
- }
-
- debug(printf("At querypos %d, fwd oligo %06X => %06X and rev oligo %06X => %06X\n",
- querypos,forward & oligobase_mask,this->forward_oligos[querypos],
- (revcomp >> leftreadshift) & oligobase_mask,this->revcomp_oligos[querypos]));
- noligos++;
- }
- }
-
- } else if (mode == CMET_NONSTRANDED) {
- if (genestrand == +1) {
+ if (genestrand == +2) {
while ((last_state = Oligo_next(last_state,&querypos,&forward,&revcomp,
reader,/*cdnaend*/FIVE)) != DONE) {
#ifdef LARGE_GENOMES
@@ -1085,13 +1038,14 @@ read_oligos (bool *allvalidp, T this, char *queryuc_ptr, int querylength,
this->plus_retrievedp[querypos] = false;
this->minus_retrievedp[querypos] = false;
- this->forward_oligos[querypos] = Cmet_reduce_ct(forward) & oligobase_mask;
+ this->forward_oligos[querypos] = Cmet_reduce_ga(forward) & oligobase_mask;
this->revcomp_oligos[querypos] = Cmet_reduce_ct(revcomp >> leftreadshift) & oligobase_mask;
debug(printf("At querypos %d, read oligo = %06X\n",querypos,this->forward_oligos[querypos]));
noligos++;
}
}
+
} else {
while ((last_state = Oligo_next(last_state,&querypos,&forward,&revcomp,
reader,/*cdnaend*/FIVE)) != DONE) {
@@ -1114,7 +1068,7 @@ read_oligos (bool *allvalidp, T this, char *queryuc_ptr, int querylength,
this->plus_retrievedp[querypos] = false;
this->minus_retrievedp[querypos] = false;
- this->forward_oligos[querypos] = Cmet_reduce_ga(forward) & oligobase_mask;
+ this->forward_oligos[querypos] = Cmet_reduce_ct(forward) & oligobase_mask;
this->revcomp_oligos[querypos] = Cmet_reduce_ga(revcomp >> leftreadshift) & oligobase_mask;
debug(printf("At querypos %d, read oligo = %06X\n",querypos,this->forward_oligos[querypos]));
@@ -1123,43 +1077,8 @@ read_oligos (bool *allvalidp, T this, char *queryuc_ptr, int querylength,
}
}
- } else if (mode == ATOI_STRANDED) {
- while ((last_state = Oligo_next(last_state,&querypos,&forward,&revcomp,
- reader,/*cdnaend*/FIVE)) != DONE) {
-#ifdef LARGE_GENOMES
- this->plus_positions_high[querypos] = (unsigned char *) NULL;
- this->plus_positions_low[querypos] = (UINT4 *) NULL;
- this->minus_positions_high[querypos] = (unsigned char *) NULL;
- this->minus_positions_low[querypos] = (UINT4 *) NULL;
-#else
- this->plus_positions[querypos] = (Univcoord_T *) NULL;
- this->minus_positions[querypos] = (Univcoord_T *) NULL;
-#endif
- this->plus_npositions[querypos] = 0;
- this->minus_npositions[querypos] = 0;
-
- if (last_state == VALID) {
-#ifdef USE_VALIDP
- this->validp[querypos] = true;
-#endif
- this->plus_retrievedp[querypos] = false;
- this->minus_retrievedp[querypos] = false;
-
- if (first_read_p == true) {
- this->forward_oligos[querypos] = Atoi_reduce_ag(forward) & oligobase_mask;
- this->revcomp_oligos[querypos] = Atoi_reduce_tc(revcomp >> leftreadshift) & oligobase_mask;
- } else {
- this->forward_oligos[querypos] = Atoi_reduce_tc(forward) & oligobase_mask;
- this->revcomp_oligos[querypos] = Atoi_reduce_ag(revcomp >> leftreadshift) & oligobase_mask;
- }
-
- debug(printf("At querypos %d, read oligo = %06X\n",querypos,this->forward_oligos[querypos]));
- noligos++;
- }
- }
-
- } else if (mode == ATOI_NONSTRANDED) {
- if (genestrand == +1) {
+ } else if (mode == ATOI_STRANDED || mode == ATOI_NONSTRANDED) {
+ if (genestrand == +2) {
while ((last_state = Oligo_next(last_state,&querypos,&forward,&revcomp,
reader,/*cdnaend*/FIVE)) != DONE) {
#ifdef LARGE_GENOMES
@@ -1181,7 +1100,7 @@ read_oligos (bool *allvalidp, T this, char *queryuc_ptr, int querylength,
this->plus_retrievedp[querypos] = false;
this->minus_retrievedp[querypos] = false;
- this->forward_oligos[querypos] = Atoi_reduce_ag(forward) & oligobase_mask;
+ this->forward_oligos[querypos] = Atoi_reduce_tc(forward) & oligobase_mask;
this->revcomp_oligos[querypos] = Atoi_reduce_ag(revcomp >> leftreadshift) & oligobase_mask;
debug(printf("At querypos %d, read oligo = %06X\n",querypos,this->forward_oligos[querypos]));
@@ -1210,7 +1129,7 @@ read_oligos (bool *allvalidp, T this, char *queryuc_ptr, int querylength,
this->plus_retrievedp[querypos] = false;
this->minus_retrievedp[querypos] = false;
- this->forward_oligos[querypos] = Atoi_reduce_tc(forward) & oligobase_mask;
+ this->forward_oligos[querypos] = Atoi_reduce_ag(forward) & oligobase_mask;
this->revcomp_oligos[querypos] = Atoi_reduce_tc(revcomp >> leftreadshift) & oligobase_mask;
debug(printf("At querypos %d, read oligo = %06X\n",querypos,this->forward_oligos[querypos]));
@@ -6621,20 +6540,20 @@ find_singlesplices_plus (int *found_score, List_T hits, List_T *ambiguous, List_
int *floors_from_neg3, *floors_to_pos3;
int nhits_local /*= 0*/;
- List_T spliceends, p;
+ List_T spliceends_sense, spliceends_antisense, p;
Stage3end_T hit;
- int nspliceends, n_good_spliceends;
+ int n_good_spliceends;
int best_nmismatches, nmismatches, nmismatches_donor, nmismatches_acceptor;
double best_prob, prob;
Substring_T donor, acceptor;
int sensedir;
#ifdef LARGE_GENOMES
- Uint8list_T ambcoords, ambcoords_left, ambcoords_right;
+ Uint8list_T ambcoords;
#else
- Uintlist_T ambcoords, ambcoords_left, ambcoords_right;
+ Uintlist_T ambcoords;
#endif
- Intlist_T amb_knowni, amb_nmismatches, amb_knowni_left, amb_knowni_right, amb_nmismatches_left, amb_nmismatches_right;
+ Intlist_T amb_knowni, amb_nmismatches;
debug4s(printf("*** Starting find_singlesplices_plus on %d spliceable segments ***\n",plus_nspliceable));
@@ -6690,7 +6609,7 @@ find_singlesplices_plus (int *found_score, List_T hits, List_T *ambiguous, List_
segmentj_end++;
}
- spliceends = (List_T) NULL;
+ spliceends_sense = spliceends_antisense = (List_T) NULL;
if (segmentj_end - segmenti >= MAX_LOCALSPLICING_POTENTIAL) {
/* Too many to check */
@@ -6781,31 +6700,49 @@ find_singlesplices_plus (int *found_score, List_T hits, List_T *ambiguous, List_
debug4s(printf(" => checking for single splice: Splice_solve_single_plus\n"));
- spliceends = Splice_solve_single(&(*found_score),&nhits_local,spliceends,&(*lowprob),
- &segmenti->usedp,&segmentj->usedp,
- /*segmenti_left*/segmenti->diagonal - querylength,
- /*segmentj_left*/segmentj->diagonal - querylength,
- segmenti->chrnum,segmenti->chroffset,segmenti->chrhigh,segmenti->chrlength,
- segmentj->chrnum,segmentj->chroffset,segmentj->chrhigh,segmentj->chrlength,
- querylength,query_compress,
- segmenti_donor_knownpos,segmentj_acceptor_knownpos,
- segmentj_antidonor_knownpos,segmenti_antiacceptor_knownpos,
- segmenti_donor_knowni,segmentj_acceptor_knowni,
- segmentj_antidonor_knowni,segmenti_antiacceptor_knowni,
- segmenti_donor_nknown,segmentj_acceptor_nknown,
- segmentj_antidonor_nknown,segmenti_antiacceptor_nknown,
- splicing_penalty,max_mismatches_allowed,
- /*plusp*/true,genestrand,first_read_p,subs_or_indels_p,
- /*sarrayp*/false);
+ spliceends_sense =
+ Splice_solve_single_sense(&(*found_score),&nhits_local,spliceends_sense,&(*lowprob),
+ &segmenti->usedp,&segmentj->usedp,
+ /*segmenti_left*/segmenti->diagonal - querylength,
+ /*segmentj_left*/segmentj->diagonal - querylength,
+ segmenti->chrnum,segmenti->chroffset,segmenti->chrhigh,segmenti->chrlength,
+ segmentj->chrnum,segmentj->chroffset,segmentj->chrhigh,segmentj->chrlength,
+ querylength,query_compress,
+ segmenti_donor_knownpos,segmentj_acceptor_knownpos,
+ segmentj_antidonor_knownpos,segmenti_antiacceptor_knownpos,
+ segmenti_donor_knowni,segmentj_acceptor_knowni,
+ segmentj_antidonor_knowni,segmenti_antiacceptor_knowni,
+ segmenti_donor_nknown,segmentj_acceptor_nknown,
+ segmentj_antidonor_nknown,segmenti_antiacceptor_nknown,
+ splicing_penalty,max_mismatches_allowed,
+ /*plusp*/true,genestrand,first_read_p,subs_or_indels_p,
+ /*sarrayp*/false);
+ spliceends_antisense =
+ Splice_solve_single_antisense(&(*found_score),&nhits_local,spliceends_antisense,&(*lowprob),
+ &segmenti->usedp,&segmentj->usedp,
+ /*segmenti_left*/segmenti->diagonal - querylength,
+ /*segmentj_left*/segmentj->diagonal - querylength,
+ segmenti->chrnum,segmenti->chroffset,segmenti->chrhigh,segmenti->chrlength,
+ segmentj->chrnum,segmentj->chroffset,segmentj->chrhigh,segmentj->chrlength,
+ querylength,query_compress,
+ segmenti_donor_knownpos,segmentj_acceptor_knownpos,
+ segmentj_antidonor_knownpos,segmenti_antiacceptor_knownpos,
+ segmenti_donor_knowni,segmentj_acceptor_knowni,
+ segmentj_antidonor_knowni,segmenti_antiacceptor_knowni,
+ segmenti_donor_nknown,segmentj_acceptor_nknown,
+ segmentj_antidonor_nknown,segmenti_antiacceptor_nknown,
+ splicing_penalty,max_mismatches_allowed,
+ /*plusp*/true,genestrand,first_read_p,subs_or_indels_p,
+ /*sarrayp*/false);
}
}
}
}
- /* Process results for segmenti. Modified from collect_elt_matches in sarray-read.c. */
- if (spliceends != NULL) {
+ /* Process results for segmenti, sense. Modified from collect_elt_matches in sarray-read.c. */
+ if (spliceends_sense != NULL) {
best_nmismatches = querylength;
- for (p = spliceends; p != NULL; p = List_next(p)) {
+ for (p = spliceends_sense; p != NULL; p = List_next(p)) {
hit = (Stage3end_T) List_head(p);
debug7(printf("analyzing distance %d, nmismatches %d, probabilities %f and %f\n",
Stage3end_distance(hit),Substring_nmismatches_whole(hit),
@@ -6820,7 +6757,7 @@ find_singlesplices_plus (int *found_score, List_T hits, List_T *ambiguous, List_
}
n_good_spliceends = 0;
- for (p = spliceends; p != NULL; p = List_next(p)) {
+ for (p = spliceends_sense; p != NULL; p = List_next(p)) {
hit = (Stage3end_T) List_head(p);
if (Stage3end_nmismatches_whole(hit) == best_nmismatches &&
(Stage3end_chimera_prob(hit) > best_prob - LOCALSPLICING_SLOP)) {
@@ -6833,7 +6770,7 @@ find_singlesplices_plus (int *found_score, List_T hits, List_T *ambiguous, List_
}
if (n_good_spliceends == 1) {
- for (p = spliceends; p != NULL; p = List_next(p)) {
+ for (p = spliceends_sense; p != NULL; p = List_next(p)) {
hit = (Stage3end_T) List_head(p);
if (Stage3end_nmismatches_whole(hit) == best_nmismatches &&
(Stage3end_chimera_prob(hit) > best_prob - LOCALSPLICING_SLOP)) {
@@ -6846,11 +6783,11 @@ find_singlesplices_plus (int *found_score, List_T hits, List_T *ambiguous, List_
Stage3end_free(&hit);
}
}
- List_free(&spliceends);
+ List_free(&spliceends_sense);
} else {
- /* Create ambiguous */
- hit = (Stage3end_T) List_head(spliceends);
+ /* Create ambiguous, sense */
+ hit = (Stage3end_T) List_head(spliceends_sense);
donor = Stage3end_substring_donor(hit);
acceptor = Stage3end_substring_acceptor(hit);
sensedir = Stage3end_sensedir(hit);
@@ -6858,8 +6795,9 @@ find_singlesplices_plus (int *found_score, List_T hits, List_T *ambiguous, List_
ambcoords = NULL;
amb_knowni = (Intlist_T) NULL;
amb_nmismatches = (Intlist_T) NULL;
+
if (Substring_left_genomicseg(donor) == /*segmenti_left*/ segmenti->diagonal - querylength) {
- for (p = spliceends; p != NULL; p = List_next(p)) {
+ for (p = spliceends_sense; p != NULL; p = List_next(p)) {
hit = (Stage3end_T) List_head(p);
acceptor = Stage3end_substring_acceptor(hit);
#ifdef LARGE_GENOMES
@@ -6871,29 +6809,15 @@ find_singlesplices_plus (int *found_score, List_T hits, List_T *ambiguous, List_
amb_nmismatches = Intlist_push(amb_nmismatches,Substring_nmismatches_whole(acceptor));
}
- if (sensedir == SENSE_FORWARD) {
- ambcoords_left = NULL;
- amb_knowni_left = amb_nmismatches_left = (Intlist_T) NULL;
- ambcoords_right = ambcoords;
- amb_knowni_right = amb_knowni;
- amb_nmismatches_right = amb_nmismatches;
- } else {
- ambcoords_left = ambcoords;
- amb_knowni_left = amb_knowni;
- amb_nmismatches_left = amb_nmismatches;
- ambcoords_right = NULL;
- amb_knowni_right = amb_nmismatches_right = (Intlist_T) NULL;
- }
-
nmismatches_acceptor = best_nmismatches - Substring_nmismatches_whole(donor);
*ambiguous = List_push(*ambiguous,
(void *) Stage3end_new_splice(&(*found_score),
/*nmismatches_donor*/Substring_nmismatches_whole(donor),nmismatches_acceptor,
donor,/*acceptor*/NULL,/*distance*/0U,
- /*shortdistancep*/false,/*penalty*/0,querylength,
- /*amb_nmatches*/querylength - Substring_match_length_orig(donor) - nmismatches_acceptor,
- ambcoords_left,ambcoords_right,amb_knowni_left,amb_knowni_right,
- amb_nmismatches_left,amb_nmismatches_right,
+ /*shortdistancep*/false,/*penalty*/0,querylength,/*amb_nmatches*/Substring_nmatches_posttrim(acceptor),
+ /*ambcoords_donor*/NULL,ambcoords,
+ /*amb_knowni_donor*/NULL,amb_knowni,
+ /*amb_nmismatches_donor*/NULL,amb_nmismatches,
/*copy_donor_p*/true,/*copy_acceptor_p*/false,first_read_p,
sensedir,/*sarrayp*/false));
#ifdef LARGE_GENOMES
@@ -6905,7 +6829,7 @@ find_singlesplices_plus (int *found_score, List_T hits, List_T *ambiguous, List_
Intlist_free(&amb_nmismatches);
} else if (Substring_left_genomicseg(acceptor) == /*segmenti_left*/ segmenti->diagonal - querylength) {
- for (p = spliceends; p != NULL; p = List_next(p)) {
+ for (p = spliceends_sense; p != NULL; p = List_next(p)) {
hit = (Stage3end_T) List_head(p);
donor = Stage3end_substring_donor(hit);
#ifdef LARGE_GENOMES
@@ -6917,29 +6841,150 @@ find_singlesplices_plus (int *found_score, List_T hits, List_T *ambiguous, List_
amb_nmismatches = Intlist_push(amb_nmismatches,Substring_nmismatches_whole(donor));
}
- if (sensedir == SENSE_FORWARD) {
- ambcoords_left = ambcoords;
- amb_knowni_left = amb_knowni;
- amb_nmismatches_left = amb_nmismatches;
- ambcoords_right = NULL;
- amb_knowni_right = amb_nmismatches_right = (Intlist_T) NULL;
+ nmismatches_donor = best_nmismatches - Substring_nmismatches_whole(acceptor);
+ *ambiguous = List_push(*ambiguous,
+ (void *) Stage3end_new_splice(&(*found_score),
+ nmismatches_donor,/*nmismatches_acceptor*/Substring_nmismatches_whole(acceptor),
+ /*donor*/NULL,acceptor,/*distance*/0U,
+ /*shortdistancep*/false,/*penalty*/0,querylength,/*amb_nmatches*/Substring_nmatches_posttrim(donor),
+ ambcoords,/*ambcoords_acceptor*/NULL,
+ amb_knowni,/*amb_knowni_acceptor*/NULL,
+ amb_nmismatches,/*amb_nmismatches_acceptor*/NULL,
+ /*copy_donor_p*/false,/*copy_acceptor_p*/true,first_read_p,
+ sensedir,/*sarrayp*/false));
+#ifdef LARGE_GENOMES
+ Uint8list_free(&ambcoords);
+#else
+ Uintlist_free(&ambcoords);
+#endif
+ Intlist_free(&amb_knowni);
+ Intlist_free(&amb_nmismatches);
+
+ } else {
+ fprintf(stderr,"Unexpected: Neither donor left %u nor acceptor left %u equals segmenti_left %u\n",
+ Substring_left_genomicseg(donor),Substring_left_genomicseg(acceptor),segmenti->diagonal - querylength);
+ abort();
+ }
+
+ for (p = spliceends_sense; p != NULL; p = List_next(p)) {
+ hit = (Stage3end_T) List_head(p);
+ Stage3end_free(&hit);
+ }
+ List_free(&spliceends_sense);
+ }
+ }
+
+ /* Process results for segmenti, antisense. Modified from collect_elt_matches in sarray-read.c. */
+ if (spliceends_antisense != NULL) {
+ best_nmismatches = querylength;
+ for (p = spliceends_antisense; p != NULL; p = List_next(p)) {
+ hit = (Stage3end_T) List_head(p);
+ debug7(printf("analyzing distance %d, nmismatches %d, probabilities %f and %f\n",
+ Stage3end_distance(hit),Substring_nmismatches_whole(hit),
+ Substring_chimera_prob(Stage3end_substring_donor(hit)),
+ Substring_chimera_prob(Stage3end_substring_acceptor(hit))));
+ if ((nmismatches = Stage3end_nmismatches_whole(hit)) < best_nmismatches) {
+ best_nmismatches = nmismatches;
+ best_prob = Stage3end_chimera_prob(hit);
+ } else if (nmismatches == best_nmismatches && (prob = Stage3end_chimera_prob(hit)) > best_prob) {
+ best_prob = prob;
+ }
+ }
+
+ n_good_spliceends = 0;
+ for (p = spliceends_antisense; p != NULL; p = List_next(p)) {
+ hit = (Stage3end_T) List_head(p);
+ if (Stage3end_nmismatches_whole(hit) == best_nmismatches &&
+ (Stage3end_chimera_prob(hit) > best_prob - LOCALSPLICING_SLOP)) {
+ debug7(printf("accepting distance %d, nmismatches %d, probabilities %f and %f\n",
+ Stage3end_distance(hit),Stage3end_nmismatches_whole(hit),
+ Substring_chimera_prob(Stage3end_substring_donor(hit)),
+ Substring_chimera_prob(Stage3end_substring_acceptor(hit))));
+ n_good_spliceends += 1;
+ }
+ }
+
+ if (n_good_spliceends == 1) {
+ for (p = spliceends_antisense; p != NULL; p = List_next(p)) {
+ hit = (Stage3end_T) List_head(p);
+ if (Stage3end_nmismatches_whole(hit) == best_nmismatches &&
+ (Stage3end_chimera_prob(hit) > best_prob - LOCALSPLICING_SLOP)) {
+ debug7(printf("pushing distance %d, nmismatches %d, probabilities %f and %f\n",
+ Stage3end_distance(hit),Stage3end_nmismatches_whole(hit),
+ Substring_chimera_prob(Stage3end_substring_donor(hit)),
+ Substring_chimera_prob(Stage3end_substring_acceptor(hit))));
+ hits = List_push(hits,(void *) hit);
} else {
- ambcoords_left = NULL;
- amb_knowni_left = amb_nmismatches_left = (Intlist_T) NULL;
- ambcoords_right = ambcoords;
- amb_knowni_right = amb_knowni;
- amb_nmismatches_right = amb_nmismatches;
+ Stage3end_free(&hit);
}
+ }
+ List_free(&spliceends_antisense);
+
+ } else {
+ /* Create ambiguous, sense */
+ hit = (Stage3end_T) List_head(spliceends_antisense);
+ donor = Stage3end_substring_donor(hit);
+ acceptor = Stage3end_substring_acceptor(hit);
+ sensedir = Stage3end_sensedir(hit);
+
+ ambcoords = NULL;
+ amb_knowni = (Intlist_T) NULL;
+ amb_nmismatches = (Intlist_T) NULL;
+
+ if (Substring_left_genomicseg(donor) == /*segmenti_left*/ segmenti->diagonal - querylength) {
+ for (p = spliceends_antisense; p != NULL; p = List_next(p)) {
+ hit = (Stage3end_T) List_head(p);
+ acceptor = Stage3end_substring_acceptor(hit);
+#ifdef LARGE_GENOMES
+ ambcoords = Uint8list_push(ambcoords,Substring_splicecoord(acceptor));
+#else
+ ambcoords = Uintlist_push(ambcoords,Substring_splicecoord(acceptor));
+#endif
+ amb_knowni = Intlist_push(amb_knowni,-1);
+ amb_nmismatches = Intlist_push(amb_nmismatches,Substring_nmismatches_whole(acceptor));
+ }
+
+ nmismatches_acceptor = best_nmismatches - Substring_nmismatches_whole(donor);
+ *ambiguous = List_push(*ambiguous,
+ (void *) Stage3end_new_splice(&(*found_score),
+ /*nmismatches_donor*/Substring_nmismatches_whole(donor),nmismatches_acceptor,
+ donor,/*acceptor*/NULL,/*distance*/0U,
+ /*shortdistancep*/false,/*penalty*/0,querylength,/*amb_nmatches*/Substring_nmatches_posttrim(acceptor),
+ /*ambcoords_donor*/NULL,ambcoords,
+ /*amb_knowni_donor*/NULL,amb_knowni,
+ /*amb_nmismatches_donor*/NULL,amb_nmismatches,
+ /*copy_donor_p*/true,/*copy_acceptor_p*/false,first_read_p,
+ sensedir,/*sarrayp*/false));
+#ifdef LARGE_GENOMES
+ Uint8list_free(&ambcoords);
+#else
+ Uintlist_free(&ambcoords);
+#endif
+ Intlist_free(&amb_knowni);
+ Intlist_free(&amb_nmismatches);
+ } else if (Substring_left_genomicseg(acceptor) == /*segmenti_left*/ segmenti->diagonal - querylength) {
+ for (p = spliceends_antisense; p != NULL; p = List_next(p)) {
+ hit = (Stage3end_T) List_head(p);
+ donor = Stage3end_substring_donor(hit);
+#ifdef LARGE_GENOMES
+ ambcoords = Uint8list_push(ambcoords,Substring_splicecoord(donor));
+#else
+ ambcoords = Uintlist_push(ambcoords,Substring_splicecoord(donor));
+#endif
+ amb_knowni = Intlist_push(amb_knowni,-1);
+ amb_nmismatches = Intlist_push(amb_nmismatches,Substring_nmismatches_whole(donor));
+ }
+
nmismatches_donor = best_nmismatches - Substring_nmismatches_whole(acceptor);
*ambiguous = List_push(*ambiguous,
(void *) Stage3end_new_splice(&(*found_score),
nmismatches_donor,/*nmismatches_acceptor*/Substring_nmismatches_whole(acceptor),
/*donor*/NULL,acceptor,/*distance*/0U,
- /*shortdistancep*/false,/*penalty*/0,querylength,
- /*amb_nmatches*/querylength - Substring_match_length_orig(acceptor) - nmismatches_donor,
- ambcoords_left,ambcoords_right,amb_knowni_left,amb_knowni_right,
- amb_nmismatches_left,amb_nmismatches_right,
+ /*shortdistancep*/false,/*penalty*/0,querylength,/*amb_nmatches*/Substring_nmatches_posttrim(donor),
+ ambcoords,/*ambcoords_acceptor*/NULL,
+ amb_knowni,/*amb_knowni_acceptor*/NULL,
+ amb_nmismatches,/*amb_nmismatches_acceptor*/NULL,
/*copy_donor_p*/false,/*copy_acceptor_p*/true,first_read_p,
sensedir,/*sarrayp*/false));
#ifdef LARGE_GENOMES
@@ -6956,13 +7001,12 @@ find_singlesplices_plus (int *found_score, List_T hits, List_T *ambiguous, List_
abort();
}
- for (p = spliceends; p != NULL; p = List_next(p)) {
+ for (p = spliceends_antisense; p != NULL; p = List_next(p)) {
hit = (Stage3end_T) List_head(p);
Stage3end_free(&hit);
}
- List_free(&spliceends);
+ List_free(&spliceends_antisense);
}
-
}
}
@@ -7002,20 +7046,20 @@ find_singlesplices_minus (int *found_score, List_T hits, List_T *ambiguous, List
int *floors_from_neg3, *floors_to_pos3;
int nhits_local /*= 0*/;
- List_T spliceends, p;
+ List_T spliceends_sense, spliceends_antisense, p;
Stage3end_T hit;
- int nspliceends, n_good_spliceends;
+ int n_good_spliceends;
int best_nmismatches, nmismatches, nmismatches_donor, nmismatches_acceptor;
double best_prob, prob;
Substring_T donor, acceptor;
int sensedir;
#ifdef LARGE_GENOMES
- Uint8list_T ambcoords, ambcoords_left, ambcoords_right;
+ Uint8list_T ambcoords;
#else
- Uintlist_T ambcoords, ambcoords_left, ambcoords_right;
+ Uintlist_T ambcoords;
#endif
- Intlist_T amb_knowni, amb_nmismatches, amb_knowni_left, amb_knowni_right, amb_nmismatches_left, amb_nmismatches_right;
+ Intlist_T amb_knowni, amb_nmismatches;
debug4s(printf("*** Starting find_singlesplices_minus on %d spliceable segments ***\n",minus_nspliceable));
@@ -7071,7 +7115,7 @@ find_singlesplices_minus (int *found_score, List_T hits, List_T *ambiguous, List
}
- spliceends = (List_T) NULL;
+ spliceends_sense = spliceends_antisense = (List_T) NULL;
if (segmentj_end - segmenti >= MAX_LOCALSPLICING_POTENTIAL) {
/* Too many to check */
@@ -7161,31 +7205,49 @@ find_singlesplices_minus (int *found_score, List_T hits, List_T *ambiguous, List
segmentj_acceptor_knownpos[segmentj_acceptor_nknown] = MAX_READLENGTH;
debug4s(printf(" => checking for single splice: Splice_solve_single_minus\n"));
- spliceends = Splice_solve_single(&(*found_score),&nhits_local,spliceends,&(*lowprob),
- &segmenti->usedp,&segmentj->usedp,
- /*segmenti_left*/segmenti->diagonal - querylength,
- /*segmentj_left*/segmentj->diagonal - querylength,
- segmenti->chrnum,segmenti->chroffset,segmenti->chrhigh,segmenti->chrlength,
- segmentj->chrnum,segmentj->chroffset,segmentj->chrhigh,segmentj->chrlength,
- querylength,query_compress,
- segmenti_donor_knownpos,segmentj_acceptor_knownpos,
- segmentj_antidonor_knownpos,segmenti_antiacceptor_knownpos,
- segmenti_donor_knowni,segmentj_acceptor_knowni,
- segmentj_antidonor_knowni,segmenti_antiacceptor_knowni,
- segmenti_donor_nknown,segmentj_acceptor_nknown,
- segmentj_antidonor_nknown,segmenti_antiacceptor_nknown,
- splicing_penalty,max_mismatches_allowed,
- /*plusp*/false,genestrand,first_read_p,subs_or_indels_p,
- /*sarrayp*/false);
+ spliceends_sense =
+ Splice_solve_single_sense(&(*found_score),&nhits_local,spliceends_sense,&(*lowprob),
+ &segmenti->usedp,&segmentj->usedp,
+ /*segmenti_left*/segmenti->diagonal - querylength,
+ /*segmentj_left*/segmentj->diagonal - querylength,
+ segmenti->chrnum,segmenti->chroffset,segmenti->chrhigh,segmenti->chrlength,
+ segmentj->chrnum,segmentj->chroffset,segmentj->chrhigh,segmentj->chrlength,
+ querylength,query_compress,
+ segmenti_donor_knownpos,segmentj_acceptor_knownpos,
+ segmentj_antidonor_knownpos,segmenti_antiacceptor_knownpos,
+ segmenti_donor_knowni,segmentj_acceptor_knowni,
+ segmentj_antidonor_knowni,segmenti_antiacceptor_knowni,
+ segmenti_donor_nknown,segmentj_acceptor_nknown,
+ segmentj_antidonor_nknown,segmenti_antiacceptor_nknown,
+ splicing_penalty,max_mismatches_allowed,
+ /*plusp*/false,genestrand,first_read_p,subs_or_indels_p,
+ /*sarrayp*/false);
+ spliceends_antisense =
+ Splice_solve_single_antisense(&(*found_score),&nhits_local,spliceends_antisense,&(*lowprob),
+ &segmenti->usedp,&segmentj->usedp,
+ /*segmenti_left*/segmenti->diagonal - querylength,
+ /*segmentj_left*/segmentj->diagonal - querylength,
+ segmenti->chrnum,segmenti->chroffset,segmenti->chrhigh,segmenti->chrlength,
+ segmentj->chrnum,segmentj->chroffset,segmentj->chrhigh,segmentj->chrlength,
+ querylength,query_compress,
+ segmenti_donor_knownpos,segmentj_acceptor_knownpos,
+ segmentj_antidonor_knownpos,segmenti_antiacceptor_knownpos,
+ segmenti_donor_knowni,segmentj_acceptor_knowni,
+ segmentj_antidonor_knowni,segmenti_antiacceptor_knowni,
+ segmenti_donor_nknown,segmentj_acceptor_nknown,
+ segmentj_antidonor_nknown,segmenti_antiacceptor_nknown,
+ splicing_penalty,max_mismatches_allowed,
+ /*plusp*/false,genestrand,first_read_p,subs_or_indels_p,
+ /*sarrayp*/false);
}
}
}
}
- /* Process results for segmenti. Modified from collect_elt_matches in sarray-read.c. */
- if (spliceends != NULL) {
+ /* Process results for segmenti, sense. Modified from collect_elt_matches in sarray-read.c. */
+ if (spliceends_sense != NULL) {
best_nmismatches = querylength;
- for (p = spliceends; p != NULL; p = List_next(p)) {
+ for (p = spliceends_sense; p != NULL; p = List_next(p)) {
hit = (Stage3end_T) List_head(p);
debug7(printf("analyzing distance %d, nmismatches %d, probabilities %f and %f\n",
Stage3end_distance(hit),Stage3end_nmismatches_whole(hit),
@@ -7200,7 +7262,7 @@ find_singlesplices_minus (int *found_score, List_T hits, List_T *ambiguous, List
}
n_good_spliceends = 0;
- for (p = spliceends; p != NULL; p = List_next(p)) {
+ for (p = spliceends_sense; p != NULL; p = List_next(p)) {
hit = (Stage3end_T) List_head(p);
if (Stage3end_nmismatches_whole(hit) == best_nmismatches &&
(Stage3end_chimera_prob(hit) > best_prob - LOCALSPLICING_SLOP)) {
@@ -7213,7 +7275,7 @@ find_singlesplices_minus (int *found_score, List_T hits, List_T *ambiguous, List
}
if (n_good_spliceends == 1) {
- for (p = spliceends; p != NULL; p = List_next(p)) {
+ for (p = spliceends_sense; p != NULL; p = List_next(p)) {
hit = (Stage3end_T) List_head(p);
if (Stage3end_nmismatches_whole(hit) == best_nmismatches &&
(Stage3end_chimera_prob(hit) > best_prob - LOCALSPLICING_SLOP)) {
@@ -7226,11 +7288,11 @@ find_singlesplices_minus (int *found_score, List_T hits, List_T *ambiguous, List
Stage3end_free(&hit);
}
}
- List_free(&spliceends);
+ List_free(&spliceends_sense);
} else {
- /* Create ambiguous */
- hit = (Stage3end_T) List_head(spliceends);
+ /* Create ambiguous, sense */
+ hit = (Stage3end_T) List_head(spliceends_sense);
donor = Stage3end_substring_donor(hit);
acceptor = Stage3end_substring_acceptor(hit);
sensedir = Stage3end_sensedir(hit);
@@ -7238,8 +7300,9 @@ find_singlesplices_minus (int *found_score, List_T hits, List_T *ambiguous, List
ambcoords = NULL;
amb_knowni = (Intlist_T) NULL;
amb_nmismatches = (Intlist_T) NULL;
+
if (Substring_left_genomicseg(donor) == /*segmenti_left*/ segmenti->diagonal - querylength) {
- for (p = spliceends; p != NULL; p = List_next(p)) {
+ for (p = spliceends_sense; p != NULL; p = List_next(p)) {
hit = (Stage3end_T) List_head(p);
acceptor = Stage3end_substring_acceptor(hit);
#ifdef LARGE_GENOMES
@@ -7251,29 +7314,15 @@ find_singlesplices_minus (int *found_score, List_T hits, List_T *ambiguous, List
amb_nmismatches = Intlist_push(amb_nmismatches,Substring_nmismatches_whole(acceptor));
}
- if (sensedir == SENSE_FORWARD) {
- ambcoords_left = ambcoords;
- amb_knowni_left = amb_knowni;
- amb_nmismatches_left = amb_nmismatches;
- ambcoords_right = NULL;
- amb_knowni_right = amb_nmismatches_right = (Intlist_T) NULL;
- } else {
- ambcoords_left = NULL;
- amb_knowni_left = amb_nmismatches_left = (Intlist_T) NULL;
- ambcoords_right = ambcoords;
- amb_knowni_right = amb_knowni;
- amb_nmismatches_right = amb_nmismatches;
- }
-
nmismatches_acceptor = best_nmismatches - Substring_nmismatches_whole(donor);
*ambiguous = List_push(*ambiguous,
(void *) Stage3end_new_splice(&(*found_score),
/*nmismatches_donor*/Substring_nmismatches_whole(donor),nmismatches_acceptor,
donor,/*acceptor*/NULL,/*distance*/0U,
- /*shortdistancep*/false,/*penalty*/0,querylength,
- /*amb_nmatches*/querylength - Substring_match_length_orig(donor) - nmismatches_acceptor,
- ambcoords_left,ambcoords_right,amb_knowni_left,amb_knowni_right,
- amb_nmismatches_left,amb_nmismatches_right,
+ /*shortdistancep*/false,/*penalty*/0,querylength,/*amb_nmatches*/Substring_nmatches_posttrim(acceptor),
+ /*ambcoords_donor*/NULL,ambcoords,
+ /*amb_knowni_donor*/NULL,amb_knowni,
+ /*amb_nmismatches_donor*/NULL,amb_nmismatches,
/*copy_donor_p*/true,/*copy_acceptor_p*/false,first_read_p,
sensedir,/*sarrayp*/false));
#ifdef LARGE_GENOMES
@@ -7285,7 +7334,7 @@ find_singlesplices_minus (int *found_score, List_T hits, List_T *ambiguous, List
Intlist_free(&amb_nmismatches);
} else if (Substring_left_genomicseg(acceptor) == /*segmenti_left*/ segmenti->diagonal - querylength) {
- for (p = spliceends; p != NULL; p = List_next(p)) {
+ for (p = spliceends_sense; p != NULL; p = List_next(p)) {
hit = (Stage3end_T) List_head(p);
donor = Stage3end_substring_donor(hit);
#ifdef LARGE_GENOMES
@@ -7297,18 +7346,139 @@ find_singlesplices_minus (int *found_score, List_T hits, List_T *ambiguous, List
amb_nmismatches = Intlist_push(amb_nmismatches,Substring_nmismatches_whole(donor));
}
- if (sensedir == SENSE_FORWARD) {
- ambcoords_left = NULL;
- amb_knowni_left = amb_nmismatches_left = (Intlist_T) NULL;
- ambcoords_right = ambcoords;
- amb_knowni_right = amb_knowni;
- amb_nmismatches_right = amb_nmismatches;
+ nmismatches_donor = best_nmismatches - Substring_nmismatches_whole(acceptor);
+ *ambiguous = List_push(*ambiguous,
+ (void *) Stage3end_new_splice(&(*found_score),
+ nmismatches_donor,/*nmismatches_acceptor*/Substring_nmismatches_whole(acceptor),
+ /*donor*/NULL,acceptor,/*distance*/0U,
+ /*shortdistancep*/false,/*penalty*/0,querylength,/*amb_nmatches*/Substring_nmatches_posttrim(donor),
+ ambcoords,/*ambcoords_acceptor*/NULL,
+ amb_knowni,/*amb_knowni_acceptor*/NULL,
+ amb_nmismatches,/*amb_nmismatches_acceptor*/NULL,
+ /*copy_donor_p*/false,/*copy_acceptor_p*/true,first_read_p,
+ sensedir,/*sarrayp*/false));
+#ifdef LARGE_GENOMES
+ Uint8list_free(&ambcoords);
+#else
+ Uintlist_free(&ambcoords);
+#endif
+ Intlist_free(&amb_knowni);
+ Intlist_free(&amb_nmismatches);
+
+ } else {
+ fprintf(stderr,"Unexpected: Neither donor left %u nor acceptor left %u equals segmenti_left %u\n",
+ Substring_left_genomicseg(donor),Substring_left_genomicseg(acceptor),segmenti->diagonal - querylength);
+ abort();
+ }
+
+ for (p = spliceends_sense; p != NULL; p = List_next(p)) {
+ hit = (Stage3end_T) List_head(p);
+ Stage3end_free(&hit);
+ }
+ List_free(&spliceends_sense);
+ }
+ }
+
+ /* Process results for segmenti, antisense. Modified from collect_elt_matches in sarray-read.c. */
+ if (spliceends_antisense != NULL) {
+ best_nmismatches = querylength;
+ for (p = spliceends_antisense; p != NULL; p = List_next(p)) {
+ hit = (Stage3end_T) List_head(p);
+ debug7(printf("analyzing distance %d, nmismatches %d, probabilities %f and %f\n",
+ Stage3end_distance(hit),Stage3end_nmismatches_whole(hit),
+ Substring_chimera_prob(Stage3end_substring_donor(hit)),
+ Substring_chimera_prob(Stage3end_substring_acceptor(hit))));
+ if ((nmismatches = Stage3end_nmismatches_whole(hit)) < best_nmismatches) {
+ best_nmismatches = nmismatches;
+ best_prob = Stage3end_chimera_prob(hit);
+ } else if (nmismatches == best_nmismatches && (prob = Stage3end_chimera_prob(hit)) > best_prob) {
+ best_prob = prob;
+ }
+ }
+
+ n_good_spliceends = 0;
+ for (p = spliceends_antisense; p != NULL; p = List_next(p)) {
+ hit = (Stage3end_T) List_head(p);
+ if (Stage3end_nmismatches_whole(hit) == best_nmismatches &&
+ (Stage3end_chimera_prob(hit) > best_prob - LOCALSPLICING_SLOP)) {
+ debug7(printf("accepting distance %d, nmismatches %d, probabilities %f and %f\n",
+ Stage3end_distance(hit),Stage3end_nmismatches_whole(hit),
+ Substring_chimera_prob(Stage3end_substring_donor(hit)),
+ Substring_chimera_prob(Stage3end_substring_acceptor(hit))));
+ n_good_spliceends += 1;
+ }
+ }
+
+ if (n_good_spliceends == 1) {
+ for (p = spliceends_antisense; p != NULL; p = List_next(p)) {
+ hit = (Stage3end_T) List_head(p);
+ if (Stage3end_nmismatches_whole(hit) == best_nmismatches &&
+ (Stage3end_chimera_prob(hit) > best_prob - LOCALSPLICING_SLOP)) {
+ debug7(printf("pushing distance %d, nmismatches %d, probabilities %f and %f\n",
+ Stage3end_distance(hit),Stage3end_nmismatches_whole(hit),
+ Substring_chimera_prob(Stage3end_substring_donor(hit)),
+ Substring_chimera_prob(Stage3end_substring_acceptor(hit))));
+ hits = List_push(hits,(void *) hit);
} else {
- ambcoords_left = ambcoords;
- amb_knowni_left = amb_knowni;
- amb_nmismatches_left = amb_nmismatches;
- ambcoords_right = NULL;
- amb_knowni_right = amb_nmismatches_right = (Intlist_T) NULL;
+ Stage3end_free(&hit);
+ }
+ }
+ List_free(&spliceends_antisense);
+
+ } else {
+ /* Create ambiguous, sense */
+ hit = (Stage3end_T) List_head(spliceends_antisense);
+ donor = Stage3end_substring_donor(hit);
+ acceptor = Stage3end_substring_acceptor(hit);
+ sensedir = Stage3end_sensedir(hit);
+
+ ambcoords = NULL;
+ amb_knowni = (Intlist_T) NULL;
+ amb_nmismatches = (Intlist_T) NULL;
+
+ if (Substring_left_genomicseg(donor) == /*segmenti_left*/ segmenti->diagonal - querylength) {
+ for (p = spliceends_antisense; p != NULL; p = List_next(p)) {
+ hit = (Stage3end_T) List_head(p);
+ acceptor = Stage3end_substring_acceptor(hit);
+#ifdef LARGE_GENOMES
+ ambcoords = Uint8list_push(ambcoords,Substring_splicecoord(acceptor));
+#else
+ ambcoords = Uintlist_push(ambcoords,Substring_splicecoord(acceptor));
+#endif
+ amb_knowni = Intlist_push(amb_knowni,-1);
+ amb_nmismatches = Intlist_push(amb_nmismatches,Substring_nmismatches_whole(acceptor));
+ }
+
+ nmismatches_acceptor = best_nmismatches - Substring_nmismatches_whole(donor);
+ *ambiguous = List_push(*ambiguous,
+ (void *) Stage3end_new_splice(&(*found_score),
+ /*nmismatches_donor*/Substring_nmismatches_whole(donor),nmismatches_acceptor,
+ donor,/*acceptor*/NULL,/*distance*/0U,
+ /*shortdistancep*/false,/*penalty*/0,querylength,/*amb_nmatches*/Substring_nmatches_posttrim(acceptor),
+ /*ambcoords_donor*/NULL,ambcoords,
+ /*amb_knowni_donor*/NULL,amb_knowni,
+ /*amb_nmismatches_donor*/NULL,amb_nmismatches,
+ /*copy_donor_p*/true,/*copy_acceptor_p*/false,first_read_p,
+ sensedir,/*sarrayp*/false));
+#ifdef LARGE_GENOMES
+ Uint8list_free(&ambcoords);
+#else
+ Uintlist_free(&ambcoords);
+#endif
+ Intlist_free(&amb_knowni);
+ Intlist_free(&amb_nmismatches);
+
+ } else if (Substring_left_genomicseg(acceptor) == /*segmenti_left*/ segmenti->diagonal - querylength) {
+ for (p = spliceends_antisense; p != NULL; p = List_next(p)) {
+ hit = (Stage3end_T) List_head(p);
+ donor = Stage3end_substring_donor(hit);
+#ifdef LARGE_GENOMES
+ ambcoords = Uint8list_push(ambcoords,Substring_splicecoord(donor));
+#else
+ ambcoords = Uintlist_push(ambcoords,Substring_splicecoord(donor));
+#endif
+ amb_knowni = Intlist_push(amb_knowni,-1);
+ amb_nmismatches = Intlist_push(amb_nmismatches,Substring_nmismatches_whole(donor));
}
nmismatches_donor = best_nmismatches - Substring_nmismatches_whole(acceptor);
@@ -7316,10 +7486,10 @@ find_singlesplices_minus (int *found_score, List_T hits, List_T *ambiguous, List
(void *) Stage3end_new_splice(&(*found_score),
nmismatches_donor,/*nmismatches_acceptor*/Substring_nmismatches_whole(acceptor),
/*donor*/NULL,acceptor,/*distance*/0U,
- /*shortdistancep*/false,/*penalty*/0,querylength,
- /*amb_nmatches*/querylength - Substring_match_length_orig(acceptor) - nmismatches_donor,
- ambcoords_left,ambcoords_right,amb_knowni_left,amb_knowni_right,
- amb_nmismatches_left,amb_nmismatches_right,
+ /*shortdistancep*/false,/*penalty*/0,querylength,/*amb_nmatches*/Substring_nmatches_posttrim(donor),
+ ambcoords,/*ambcoords_acceptor*/NULL,
+ amb_knowni,/*amb_knowni_acceptor*/NULL,
+ amb_nmismatches,/*amb_nmismatches_acceptor*/NULL,
/*copy_donor_p*/false,/*copy_acceptor_p*/true,first_read_p,
sensedir,/*sarrayp*/false));
#ifdef LARGE_GENOMES
@@ -7336,14 +7506,14 @@ find_singlesplices_minus (int *found_score, List_T hits, List_T *ambiguous, List
abort();
}
- for (p = spliceends; p != NULL; p = List_next(p)) {
+ for (p = spliceends_antisense; p != NULL; p = List_next(p)) {
hit = (Stage3end_T) List_head(p);
Stage3end_free(&hit);
}
- List_free(&spliceends);
+ List_free(&spliceends_antisense);
}
-
}
+
}
}
@@ -7408,9 +7578,9 @@ find_doublesplices (int *found_score, List_T hits, List_T *lowprob,
segmentm_antidonor_nknown, segmentm_antiacceptor_nknown;
#ifdef LARGE_GENOMES
- Uint8list_T donor_ambcoords, acceptor_ambcoords, ambcoords_left, ambcoords_right;
+ Uint8list_T donor_ambcoords, acceptor_ambcoords, ambcoords_donor, ambcoords_acceptor;
#else
- Uintlist_T donor_ambcoords, acceptor_ambcoords, ambcoords_left, ambcoords_right;
+ Uintlist_T donor_ambcoords, acceptor_ambcoords, ambcoords_donor, ambcoords_acceptor;
#endif
Intlist_T splicesites_i_left, splicesites_i_right;
Intlist_T nmismatches_list_left, nmismatches_list_right;
@@ -7431,11 +7601,9 @@ find_doublesplices (int *found_score, List_T hits, List_T *lowprob,
Stage3end_T hit, *array;
int best_nmismatches, nmismatches;
int n_good_spliceends, n, i;
- int donor_distance, acceptor_distance;
double best_prob, prob;
Univcoord_T lastpos;
- Intlist_T donor_amb_knowni, acceptor_amb_knowni, donor_amb_nmismatches, acceptor_amb_nmismatches,
- amb_knowni_left, amb_knowni_right, amb_nmismatches_left, amb_nmismatches_right;
+ Intlist_T donor_amb_knowni, acceptor_amb_knowni, donor_amb_nmismatches, acceptor_amb_nmismatches;
debug(printf("*** Starting find_known_doublesplices on %d segments ***\n",nspliceable));
@@ -7791,89 +7959,38 @@ find_doublesplices (int *found_score, List_T hits, List_T *lowprob,
FREE(array);
if (Intlist_length(donor_amb_nmismatches) == 1 && Intlist_length(acceptor_amb_nmismatches) == 1) {
- acceptor_distance = Substring_splicecoord_A(shortexon) - Substring_splicecoord(donor);
- if (acceptor_distance < 0) {
- acceptor_distance = -acceptor_distance;
- }
- donor_distance = Substring_splicecoord_D(shortexon) - Substring_splicecoord(acceptor);
- if (donor_distance < 0) {
- donor_distance = -donor_distance;
- }
hits = List_push(hits,(void *) Stage3end_new_shortexon(&(*found_score),donor,acceptor,shortexon,
- acceptor_distance,donor_distance,
/*amb_nmatches_donor*/0,/*amb_nmatches_acceptor*/0,
- /*ambcoords_left*/NULL,/*ambcoords_right*/NULL,
- /*amb_knowni_left*/NULL,/*amb_knowni_right*/NULL,
- /*amb_nmismatches_left*/NULL,/*amb_nmismatches_right*/NULL,
+ /*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
+ /*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
+ /*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
/*copy_donor_p*/true,/*copy_acceptor_p*/true,/*copy_shortexon_p*/true,
splicing_penalty,querylength,sensedir,/*sarrayp*/false));
} else if (Intlist_length(donor_amb_nmismatches) > 1 && Intlist_length(acceptor_amb_nmismatches) == 1) {
- if (sensedir == SENSE_FORWARD) {
- ambcoords_left = donor_ambcoords; amb_knowni_left = donor_amb_knowni; amb_nmismatches_left = donor_amb_nmismatches;
- ambcoords_right = NULL;
- amb_knowni_right = amb_nmismatches_right = NULL;
- } else if (sensedir == SENSE_ANTI) {
- ambcoords_right = donor_ambcoords; amb_knowni_right = donor_amb_knowni; amb_nmismatches_right = donor_amb_nmismatches;
- ambcoords_left = NULL;
- amb_knowni_left = amb_nmismatches_left = NULL;
- } else {
- abort();
- }
-
- donor_distance = Substring_splicecoord_D(shortexon) - Substring_splicecoord(acceptor);
- if (donor_distance < 0) {
- donor_distance = -donor_distance;
- }
hits = List_push(hits,(void *) Stage3end_new_shortexon(&(*found_score),/*donor*/NULL,acceptor,shortexon,
- /*acceptor_distance*/0U,donor_distance,
- /*amb_nmatches_donor*/Substring_nmismatches_whole(donor),/*amb_nmatches_acceptor*/0,
- ambcoords_left,ambcoords_right,amb_knowni_left,amb_knowni_right,
- amb_nmismatches_left,amb_nmismatches_right,
+ /*amb_nmatches_donor*/Substring_nmatches_posttrim(donor),/*amb_nmatches_acceptor*/0,
+ donor_ambcoords,/*acceptor_ambcoords*/NULL,
+ donor_amb_knowni,/*amb_knowni_acceptor*/NULL,
+ donor_amb_nmismatches,/*amb_nmismatches_acceptor*/NULL,
/*copy_donor_p*/false,/*copy_acceptor_p*/true,/*copy_shortexon_p*/true,
splicing_penalty,querylength,sensedir,/*sarrayp*/false));
} else if (Intlist_length(donor_amb_nmismatches) == 1 && Intlist_length(acceptor_amb_nmismatches) > 1) {
- if (sensedir == SENSE_FORWARD) {
- ambcoords_right = acceptor_ambcoords; amb_knowni_right = acceptor_amb_knowni; amb_nmismatches_right = acceptor_amb_nmismatches;
- ambcoords_left = NULL;
- amb_knowni_left = amb_nmismatches_left = NULL;
- } else if (sensedir == SENSE_ANTI) {
- ambcoords_left = acceptor_ambcoords; amb_knowni_left = acceptor_amb_knowni; amb_nmismatches_left = acceptor_amb_nmismatches;
- ambcoords_right = NULL;
- amb_knowni_right = amb_nmismatches_right = NULL;
- } else {
- abort();
- }
-
- acceptor_distance = Substring_splicecoord_A(shortexon) - Substring_splicecoord(donor);
- if (acceptor_distance < 0) {
- acceptor_distance = -acceptor_distance;
- }
hits = List_push(hits,(void *) Stage3end_new_shortexon(&(*found_score),donor,/*acceptor*/NULL,shortexon,
- acceptor_distance,/*donor_distance*/0U,
- /*amb_nmatches_donor*/0,/*amb_nmatches_acceptor*/Substring_nmismatches_whole(acceptor),
- ambcoords_left,ambcoords_right,amb_knowni_left,amb_knowni_right,
- amb_nmismatches_left,amb_nmismatches_right,
+ /*amb_nmatches_donor*/0,/*amb_nmatches_acceptor*/Substring_nmatches_posttrim(acceptor),
+ /*ambcoords_donor*/NULL,acceptor_ambcoords,
+ /*amb_knowni_donor*/NULL,acceptor_amb_knowni,
+ /*amb_nmismatches_donor*/NULL,acceptor_amb_nmismatches,
/*copy_donor_p*/true,/*copy_acceptor_p*/false,/*copy_shortexon_p*/true,
splicing_penalty,querylength,sensedir,/*sarrayp*/false));
} else {
- if (sensedir == SENSE_FORWARD) {
- ambcoords_left = donor_ambcoords; amb_knowni_left = donor_amb_knowni; amb_nmismatches_left = donor_amb_nmismatches;
- ambcoords_right = acceptor_ambcoords; amb_knowni_right = acceptor_amb_knowni; amb_nmismatches_right = acceptor_amb_nmismatches;
- } else if (sensedir == SENSE_ANTI) {
- ambcoords_left = acceptor_ambcoords; amb_knowni_left = acceptor_amb_knowni; amb_nmismatches_left = acceptor_amb_nmismatches;
- ambcoords_right = donor_ambcoords; amb_knowni_right = donor_amb_knowni; amb_nmismatches_right = donor_amb_nmismatches;
- } else {
- abort();
- }
-
hits = List_push(hits,(void *) Stage3end_new_shortexon(&(*found_score),/*donor*/NULL,/*acceptor*/NULL,shortexon,
- /*acceptor_distance*/0U,/*donor_distance*/0U,
- /*amb_nmatches_donor*/Substring_nmismatches_whole(donor),
- /*amb_nmatches_acceptor*/Substring_nmismatches_whole(acceptor),
- ambcoords_left,ambcoords_right,amb_knowni_left,amb_knowni_right,
- amb_nmismatches_left,amb_nmismatches_right,
+ /*amb_nmatches_donor*/Substring_nmatches_posttrim(donor),
+ /*amb_nmatches_acceptor*/Substring_nmatches_posttrim(acceptor),
+ donor_ambcoords,acceptor_ambcoords,
+ donor_amb_knowni,acceptor_amb_knowni,
+ donor_amb_nmismatches,acceptor_amb_nmismatches,
/*copy_donor_p*/false,/*copy_acceptor_p*/false,/*copy_shortexon_p*/true,
splicing_penalty,querylength,sensedir,/*sarrayp*/false));
}
@@ -7949,25 +8066,24 @@ find_doublesplices (int *found_score, List_T hits, List_T *lowprob,
segmentm->chrnum,segmentm->chroffset,segmentm->chrhigh,segmentm->chrlength);
if (shortexon != NULL) {
debug4k(printf("New one-third shortexon at left %lu\n",segmentm_left));
- ambcoords_left = lookup_splicesites(splicesites_i_left,splicesites);
- ambcoords_right = lookup_splicesites(splicesites_i_right,splicesites);
+ ambcoords_donor = lookup_splicesites(splicesites_i_left,splicesites);
+ ambcoords_acceptor = lookup_splicesites(splicesites_i_right,splicesites);
amb_nmatches_donor = leftpos - nmismatches_shortexon_left;
amb_nmatches_acceptor = querylength - rightpos - nmismatches_shortexon_right;
segmentm->usedp = true;
hits = List_push(hits,(void *) Stage3end_new_shortexon(&(*found_score),/*donor*/NULL,/*acceptor*/NULL,shortexon,
- /*acceptor_distance*/0U,/*donor_distance*/0U,
amb_nmatches_donor,amb_nmatches_acceptor,
- ambcoords_left,ambcoords_right,
- /*amb_knowni_left*/splicesites_i_left,/*amb_knowni_right*/splicesites_i_right,
- nmismatches_list_left,nmismatches_list_right,
+ ambcoords_donor,ambcoords_acceptor,
+ /*amb_knowni_donor*/splicesites_i_left,/*amb_knowni_acceptor*/splicesites_i_right,
+ /*amb_nmismatches_donor*/nmismatches_list_left,/*amb_nmismatches_acceptor*/nmismatches_list_right,
/*copy_donor_p*/false,/*copy_acceptor_p*/false,/*copy_shortexon_p*/false,
splicing_penalty,querylength,sensedir,/*sarrayp*/false));
#ifdef LARGE_GENOMES
- Uint8list_free(&ambcoords_right);
- Uint8list_free(&ambcoords_left);
+ Uint8list_free(&ambcoords_donor);
+ Uint8list_free(&ambcoords_acceptor);
#else
- Uintlist_free(&ambcoords_right);
- Uintlist_free(&ambcoords_left);
+ Uintlist_free(&ambcoords_donor);
+ Uintlist_free(&ambcoords_acceptor);
#endif
}
@@ -7999,22 +8115,20 @@ find_doublesplices (int *found_score, List_T hits, List_T *lowprob,
if (acceptor != NULL) Substring_free(&acceptor);
} else {
debug4k(printf("ambp_left true, ambp_right false: New two-thirds shortexon at left %lu\n",segmentm_left));
- ambcoords_left = lookup_splicesites(splicesites_i_left,splicesites);
+ ambcoords_donor = lookup_splicesites(splicesites_i_left,splicesites);
amb_nmatches_donor = leftpos - nmismatches_shortexon_left;
segmentm->usedp = true;
hits = List_push(hits,(void *) Stage3end_new_shortexon(&(*found_score),/*donor*/NULL,acceptor,shortexon,
- /*acceptor_distance*/0U,
- /*donor_distance*/splicesites[best_right_j]-splicesites[j2],
amb_nmatches_donor,/*amb_nmatches_acceptor*/0,
- ambcoords_left,/*ambcoords_right*/NULL,
- /*amb_knowni_left*/splicesites_i_left,/*amb_knowni_right*/NULL,
- nmismatches_list_left,/*amb_nmismatches_right*/NULL,
+ ambcoords_donor,/*ambcoords_acceptor*/NULL,
+ /*amb_knowni_donor*/splicesites_i_left,/*amb_knowni_acceptor*/NULL,
+ /*amb_nmismatches_donor*/nmismatches_list_left,/*amb_nmismatches_acceptor*/NULL,
/*copy_donor_p*/false,/*copy_acceptor_p*/false,/*copy_shortexon_p*/false,
splicing_penalty,querylength,sensedir,/*sarrayp*/false));
#ifdef LARGE_GENOMES
- Uint8list_free(&ambcoords_left);
+ Uint8list_free(&ambcoords_donor);
#else
- Uintlist_free(&ambcoords_left);
+ Uintlist_free(&ambcoords_donor);
#endif
}
@@ -8045,22 +8159,20 @@ find_doublesplices (int *found_score, List_T hits, List_T *lowprob,
if (donor != NULL) Substring_free(&donor);
if (shortexon != NULL) Substring_free(&shortexon);
} else {
- ambcoords_right = lookup_splicesites(splicesites_i_right,splicesites);
+ ambcoords_acceptor = lookup_splicesites(splicesites_i_right,splicesites);
amb_nmatches_acceptor = querylength - rightpos - nmismatches_shortexon_right;
segmentm->usedp = true;
hits = List_push(hits,(void *) Stage3end_new_shortexon(&(*found_score),donor,/*acceptor*/NULL,shortexon,
- /*acceptor_distance*/splicesites[j1]-splicesites[best_left_j],
- /*donor_distance*/0U,
/*amb_nmatches_donor*/0,amb_nmatches_acceptor,
- /*ambcoords_left*/NULL,ambcoords_right,
- /*amb_knowni_left*/NULL,/*amb_knowni_right*/splicesites_i_right,
- /*amb_nmismatches_left*/NULL,nmismatches_list_right,
+ /*ambcoords_donor*/NULL,ambcoords_acceptor,
+ /*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/splicesites_i_right,
+ /*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/nmismatches_list_right,
/*copy_donor_p*/false,/*copy_acceptor_p*/false,/*copy_shortexon_p*/false,
splicing_penalty,querylength,sensedir,/*sarrayp*/false));
#ifdef LARGE_GENOMES
- Uint8list_free(&ambcoords_right);
+ Uint8list_free(&ambcoords_acceptor);
#else
- Uintlist_free(&ambcoords_right);
+ Uintlist_free(&ambcoords_acceptor);
#endif
}
@@ -8098,12 +8210,10 @@ find_doublesplices (int *found_score, List_T hits, List_T *lowprob,
debug4k(printf("New shortexon at left %lu\n",segmentm_left));
segmentm->usedp = true;
hits = List_push(hits,(void *) Stage3end_new_shortexon(&(*found_score),donor,acceptor,shortexon,
- /*acceptor_distance*/splicesites[j1]-splicesites[best_left_j],
- /*donor_distance*/splicesites[best_right_j]-splicesites[j2],
/*amb_nmatches_donor*/0,/*amb_nmatches_acceptor*/0,
- /*ambcoords_left*/NULL,/*ambcoords_right*/NULL,
- /*amb_knowni_left*/NULL,/*amb_knowni_right*/NULL,
- /*amb_nmismatches_left*/NULL,/*amb_nmismatches_right*/NULL,
+ /*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
+ /*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
+ /*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
/*copy_donor_p*/false,/*copy_acceptor_p*/false,/*copy_shortexon_p*/false,
splicing_penalty,querylength,sensedir,/*sarrayp*/false));
}
@@ -8170,25 +8280,24 @@ find_doublesplices (int *found_score, List_T hits, List_T *lowprob,
segmentm->chrnum,segmentm->chroffset,segmentm->chrhigh,segmentm->chrlength);
if (shortexon != NULL) {
debug4k(printf("New one-third shortexon at left %lu\n",segmentm_left));
- ambcoords_left = lookup_splicesites(splicesites_i_left,splicesites);
- ambcoords_right = lookup_splicesites(splicesites_i_right,splicesites);
+ ambcoords_donor = lookup_splicesites(splicesites_i_right,splicesites);
+ ambcoords_acceptor = lookup_splicesites(splicesites_i_left,splicesites);
amb_nmatches_donor = querylength - rightpos - nmismatches_shortexon_right;
amb_nmatches_acceptor = leftpos - nmismatches_shortexon_left;
segmentm->usedp = true;
hits = List_push(hits,(void *) Stage3end_new_shortexon(&(*found_score),/*donor*/NULL,/*acceptor*/NULL,shortexon,
- /*acceptor_distance*/0U,/*donor_distance*/0U,
amb_nmatches_donor,amb_nmatches_acceptor,
- ambcoords_left,ambcoords_right,
- /*amb_knowni_left*/splicesites_i_left,/*amb_knowni_right*/splicesites_i_right,
- nmismatches_list_left,nmismatches_list_right,
+ ambcoords_donor,ambcoords_acceptor,
+ /*amb_knowni_donor*/splicesites_i_right,/*amb_knowni_acceptor*/splicesites_i_left,
+ /*amb_nmismatches_donor*/nmismatches_list_right,/*amb_nmismatches_acceptor*/nmismatches_list_left,
/*copy_donor_p*/false,/*copy_acceptor_p*/false,/*copy_shortexon_p*/false,
splicing_penalty,querylength,sensedir,/*sarrayp*/false));
#ifdef LARGE_GENOMES
- Uint8list_free(&ambcoords_right);
- Uint8list_free(&ambcoords_left);
+ Uint8list_free(&ambcoords_donor);
+ Uint8list_free(&ambcoords_acceptor);
#else
- Uintlist_free(&ambcoords_right);
- Uintlist_free(&ambcoords_left);
+ Uintlist_free(&ambcoords_donor);
+ Uintlist_free(&ambcoords_acceptor);
#endif
}
@@ -8218,22 +8327,20 @@ find_doublesplices (int *found_score, List_T hits, List_T *lowprob,
if (donor != NULL) Substring_free(&donor);
if (shortexon != NULL) Substring_free(&shortexon);
} else {
- ambcoords_left = lookup_splicesites(splicesites_i_left,splicesites);
+ ambcoords_acceptor = lookup_splicesites(splicesites_i_left,splicesites);
amb_nmatches_acceptor = leftpos - nmismatches_shortexon_left;
segmentm->usedp = true;
hits = List_push(hits,(void *) Stage3end_new_shortexon(&(*found_score),donor,/*acceptor*/NULL,shortexon,
- /*acceptor_distance*/splicesites[best_right_j]-splicesites[j2],
- /*donor_distance*/0U,
/*amb_nmatches_donor*/0,amb_nmatches_acceptor,
- ambcoords_left,/*ambcoords_right*/NULL,
- /*amb_knowni_left*/splicesites_i_left,/*amb_knowni_right*/NULL,
- nmismatches_list_left,/*amb_nmismatches_right*/NULL,
+ /*ambcoords_donor*/NULL,ambcoords_acceptor,
+ /*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/splicesites_i_left,
+ /*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/nmismatches_list_left,
/*copy_donor_p*/false,/*copy_acceptor_p*/false,/*copy_shortexon_p*/false,
splicing_penalty,querylength,sensedir,/*sarrayp*/false));
#ifdef LARGE_GENOMES
- Uint8list_free(&ambcoords_left);
+ Uint8list_free(&ambcoords_acceptor);
#else
- Uintlist_free(&ambcoords_left);
+ Uintlist_free(&ambcoords_acceptor);
#endif
}
@@ -8264,22 +8371,20 @@ find_doublesplices (int *found_score, List_T hits, List_T *lowprob,
if (acceptor != NULL) Substring_free(&acceptor);
} else {
debug4k(printf("ambp_left false, ambp_right true: New splice at left %lu\n",segmentm_left));
- ambcoords_right = lookup_splicesites(splicesites_i_right,splicesites);
+ ambcoords_donor = lookup_splicesites(splicesites_i_right,splicesites);
amb_nmatches_donor = querylength - rightpos - nmismatches_shortexon_right;
segmentm->usedp = true;
hits = List_push(hits,(void *) Stage3end_new_shortexon(&(*found_score),/*donor*/NULL,acceptor,shortexon,
- /*acceptor_distance*/0U,
- /*donor_distance*/splicesites[j1]-splicesites[best_left_j],
amb_nmatches_donor,/*amb_nmatches_acceptor*/0,
- /*ambcoords_left*/NULL,ambcoords_right,
- /*amb_knowni_left*/NULL,/*amb_knowni_right*/splicesites_i_right,
- /*amb_nmismatches_left*/NULL,nmismatches_list_right,
+ ambcoords_donor,/*ambcoords_acceptor*/NULL,
+ /*amb_knowni_donor*/splicesites_i_right,/*amb_knowni_acceptor*/NULL,
+ /*amb_nmismatches_donor*/nmismatches_list_right,/*amb_nmismatches_acceptor*/NULL,
/*copy_donor_p*/false,/*copy_acceptor_p*/false,/*copy_shortexon_p*/false,
splicing_penalty,querylength,sensedir,/*sarrayp*/false));
#ifdef LARGE_GENOMES
- Uint8list_free(&ambcoords_right);
+ Uint8list_free(&ambcoords_donor);
#else
- Uintlist_free(&ambcoords_right);
+ Uintlist_free(&ambcoords_donor);
#endif
}
@@ -8315,12 +8420,10 @@ find_doublesplices (int *found_score, List_T hits, List_T *lowprob,
debug4k(printf("New shortexon at left %lu\n",segmentm_left));
segmentm->usedp = true;
hits = List_push(hits,(void *) Stage3end_new_shortexon(&(*found_score),donor,acceptor,shortexon,
- /*acceptor_distance*/splicesites[best_right_j]-splicesites[j2],
- /*donor_distance*/splicesites[j1]-splicesites[best_left_j],
/*amb_nmatches_donor*/0,/*amb_nmatches_acceptor*/0,
- /*ambcoords_left*/NULL,/*ambcoords_right*/NULL,
- /*amb_knowni_left*/NULL,/*amb_knowni_right*/NULL,
- /*amb_nmismatches_left*/NULL,/*amb_nmismatches_right*/NULL,
+ /*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
+ /*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
+ /*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
/*copy_donor_p*/false,/*copy_acceptor_p*/false,/*copy_shortexon_p*/false,
splicing_penalty,querylength,sensedir,/*sarrayp*/false));
}
@@ -9095,7 +9198,7 @@ find_terminals (struct Segment_T *plus_segments, int plus_nsegments,
if (plus_nsegments > 0) {
nterminals_left = nterminals_right = nterminals_middle = 0;
- for (segment = plus_segments; (nterminals_middle < MAX_NTERMINALS || nterminals_left < MAX_NTERMINALS || nterminals_right < MAX_NTERMINALS) &&
+ for (segment = plus_segments; (/*nterminals_middle < MAX_NTERMINALS ||*/ nterminals_left < MAX_NTERMINALS || nterminals_right < MAX_NTERMINALS) &&
segment < &(plus_segments[plus_nsegments]); segment++) {
if (0 && segment->usedp == true) {
/* Previously skipped, but looks like a bad idea */
@@ -9312,7 +9415,7 @@ find_terminals (struct Segment_T *plus_segments, int plus_nsegments,
if (minus_nsegments > 0) {
nterminals_left = nterminals_right = nterminals_middle = 0;
- for (segment = minus_segments; (nterminals_middle < MAX_NTERMINALS || nterminals_left < MAX_NTERMINALS || nterminals_right < MAX_NTERMINALS) &&
+ for (segment = minus_segments; (/*nterminals_middle < MAX_NTERMINALS ||*/ nterminals_left < MAX_NTERMINALS || nterminals_right < MAX_NTERMINALS) &&
segment < &(minus_segments[minus_nsegments]); segment++) {
if (0 && segment->usedp == true) {
/* Previously skipped, but looks like a bad idea */
@@ -9823,20 +9926,20 @@ find_splicepairs_distant (int *found_score, int *ndistantsplicepairs,
*localsplicing = List_push(*localsplicing,
(void *) Stage3end_new_splice(&(*found_score),nmismatches1,nmismatches2,
donor,acceptor,distance,
- /*shortdistancep*/true,localsplicing_penalty,querylength,
- /*amb_nmatches*/0,/*ambcoords_left*/NULL,/*ambcoords_right*/NULL,
- /*amb_knowni_left*/NULL,/*amb_knowni_right*/NULL,
- /*amb_nmismatches_left*/NULL,/*amb_nmismatches_right*/NULL,
+ /*shortdistancep*/true,localsplicing_penalty,querylength,/*amb_nmatches*/0,
+ /*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
+ /*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
+ /*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
/*copy_donor_p*/true,/*copy_acceptor_p*/true,first_read_p,
/*sensedir*/SENSE_FORWARD,/*sarrayp*/false));
} else if (*ndistantsplicepairs <= MAXCHIMERAPATHS) {
distantsplicing = List_push(distantsplicing,
(void *) Stage3end_new_splice(&(*found_score),nmismatches1,nmismatches2,
donor,acceptor,distance,
- /*shortdistancep*/false,distantsplicing_penalty,querylength,
- /*amb_nmatches*/0,/*ambcoords_left*/NULL,/*ambcoords_right*/NULL,
- /*amb_knowni_left*/NULL,/*amb_knowni_right*/NULL,
- /*amb_nmismatches_left*/NULL,/*amb_nmismatches_right*/NULL,
+ /*shortdistancep*/false,distantsplicing_penalty,querylength,/*amb_nmatches*/0,
+ /*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
+ /*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
+ /*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
/*copy_donor_p*/true,/*copy_acceptor_p*/true,first_read_p,
/*sensedir*/SENSE_FORWARD,/*sarrayp*/false));
(*ndistantsplicepairs)++;
@@ -9909,20 +10012,20 @@ find_splicepairs_distant (int *found_score, int *ndistantsplicepairs,
*localsplicing = List_push(*localsplicing,
(void *) Stage3end_new_splice(&(*found_score),nmismatches1,nmismatches2,
donor,acceptor,distance,
- /*shortdistancep*/true,localsplicing_penalty,querylength,
- /*amb_nmatches*/0,/*ambcoords_left*/NULL,/*ambcoords_right*/NULL,
- /*amb_knowni_left*/NULL,/*amb_knowni_right*/NULL,
- /*amb_nmismatches_left*/NULL,/*amb_nmismatches_right*/NULL,
+ /*shortdistancep*/true,localsplicing_penalty,querylength,/*amb_nmatches*/0,
+ /*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
+ /*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
+ /*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
/*copy_donor_p*/true,/*copy_acceptor_p*/true,first_read_p,
/*sensedir*/SENSE_FORWARD,/*sarrayp*/false));
} else if (*ndistantsplicepairs <= MAXCHIMERAPATHS) {
distantsplicing = List_push(distantsplicing,
(void *) Stage3end_new_splice(&(*found_score),nmismatches1,nmismatches2,
donor,acceptor,distance,
- /*shortdistancep*/false,distantsplicing_penalty,querylength,
- /*amb_nmatches*/0,/*ambcoords_left*/NULL,/*ambcoords_right*/NULL,
- /*amb_knowni_left*/NULL,/*amb_knowni_right*/NULL,
- /*amb_nmismatches_left*/NULL,/*amb_nmismatches_right*/NULL,
+ /*shortdistancep*/false,distantsplicing_penalty,querylength,/*amb_nmatches*/0,
+ /*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
+ /*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
+ /*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
/*copy_donor_p*/true,/*copy_acceptor_p*/true,first_read_p,
/*sensedir*/SENSE_FORWARD,/*sarrayp*/false));
(*ndistantsplicepairs)++;
@@ -9995,20 +10098,20 @@ find_splicepairs_distant (int *found_score, int *ndistantsplicepairs,
*localsplicing = List_push(*localsplicing,
(void *) Stage3end_new_splice(&(*found_score),nmismatches1,nmismatches2,
donor,acceptor,distance,
- /*shortdistancep*/true,localsplicing_penalty,querylength,
- /*amb_nmatches*/0,/*ambcoords_left*/NULL,/*ambcoords_right*/NULL,
- /*amb_knowni_left*/NULL,/*amb_knowni_right*/NULL,
- /*amb_nmismatches_left*/NULL,/*amb_nmismatches_right*/NULL,
+ /*shortdistancep*/true,localsplicing_penalty,querylength,/*amb_nmatches*/0,
+ /*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
+ /*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
+ /*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
/*copy_donor_p*/true,/*copy_acceptor_p*/true,first_read_p,
/*sensedir*/SENSE_ANTI,/*sarrayp*/false));
} else if (*ndistantsplicepairs <= MAXCHIMERAPATHS) {
distantsplicing = List_push(distantsplicing,
(void *) Stage3end_new_splice(&(*found_score),nmismatches1,nmismatches2,
donor,acceptor,distance,
- /*shortdistancep*/false,distantsplicing_penalty,querylength,
- /*amb_nmatches*/0,/*ambcoords_left*/NULL,/*ambcoords_right*/NULL,
- /*amb_knowni_left*/NULL,/*amb_knowni_right*/NULL,
- /*amb_nmismatches_left*/NULL,/*amb_nmismatches_right*/NULL,
+ /*shortdistancep*/false,distantsplicing_penalty,querylength,/*amb_nmatches*/0,
+ /*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
+ /*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
+ /*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
/*copy_donor_p*/true,/*copy_acceptor_p*/true,first_read_p,
/*sensedir*/SENSE_ANTI,/*sarrayp*/false));
(*ndistantsplicepairs)++;
@@ -10081,20 +10184,20 @@ find_splicepairs_distant (int *found_score, int *ndistantsplicepairs,
*localsplicing = List_push(*localsplicing,
(void *) Stage3end_new_splice(&(*found_score),nmismatches1,nmismatches2,
donor,acceptor,distance,
- /*shortdistancep*/true,localsplicing_penalty,querylength,
- /*amb_nmatches*/0,/*ambcoords_left*/NULL,/*ambcoords_right*/NULL,
- /*amb_knowni_left*/NULL,/*amb_knowni_right*/NULL,
- /*amb_nmismatches_left*/NULL,/*amb_nmismatches_right*/NULL,
+ /*shortdistancep*/true,localsplicing_penalty,querylength,/*amb_nmatches*/0,
+ /*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
+ /*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
+ /*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
/*copy_donor_p*/true,/*copy_acceptor_p*/true,first_read_p,
/*sensedir*/SENSE_ANTI,/*sarrayp*/false));
} else if (*ndistantsplicepairs <= MAXCHIMERAPATHS) {
distantsplicing = List_push(distantsplicing,
(void *) Stage3end_new_splice(&(*found_score),nmismatches1,nmismatches2,
donor,acceptor,distance,
- /*shortdistancep*/false,distantsplicing_penalty,querylength,
- /*amb_nmatches*/0,/*ambcoords_left*/NULL,/*ambcoords_right*/NULL,
- /*amb_knowni_left*/NULL,/*amb_knowni_right*/NULL,
- /*amb_nmismatches_left*/NULL,/*amb_nmismatches_right*/NULL,
+ /*shortdistancep*/false,distantsplicing_penalty,querylength,/*amb_nmatches*/0,
+ /*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
+ /*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
+ /*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
/*copy_donor_p*/true,/*copy_acceptor_p*/true,first_read_p,
/*sensedir*/SENSE_ANTI,/*sarrayp*/false));
(*ndistantsplicepairs)++;
@@ -10157,10 +10260,10 @@ find_splicepairs_distant (int *found_score, int *ndistantsplicepairs,
distantsplicing = List_push(distantsplicing,
(void *) Stage3end_new_splice(&(*found_score),nmismatches1,nmismatches2,
donor,acceptor,distance,
- /*shortdistancep*/false,distantsplicing_penalty,querylength,
- /*amb_nmatches*/0,/*ambcoords_left*/NULL,/*ambcoords_right*/NULL,
- /*amb_knowni_left*/NULL,/*amb_knowni_right*/NULL,
- /*amb_nmismatches_left*/NULL,/*amb_nmismatches_right*/NULL,
+ /*shortdistancep*/false,distantsplicing_penalty,querylength,/*amb_nmatches*/0,
+ /*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
+ /*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
+ /*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
/*copy_donor_p*/true,/*copy_acceptor_p*/true,first_read_p,
/*sensedir*/SENSE_FORWARD,/*sarrayp*/false));
(*ndistantsplicepairs)++;
@@ -10215,10 +10318,10 @@ find_splicepairs_distant (int *found_score, int *ndistantsplicepairs,
distantsplicing = List_push(distantsplicing,
(void *) Stage3end_new_splice(&(*found_score),nmismatches1,nmismatches2,
donor,acceptor,distance,
- /*shortdistancep*/false,distantsplicing_penalty,querylength,
- /*amb_nmatches*/0,/*ambcoords_left*/NULL,/*ambcoords_right*/NULL,
- /*amb_knowni_left*/NULL,/*amb_knowni_right*/NULL,
- /*amb_nmismatches_left*/NULL,/*amb_nmismatches_right*/NULL,
+ /*shortdistancep*/false,distantsplicing_penalty,querylength,/*amb_nmatches*/0,
+ /*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
+ /*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
+ /*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
/*copy_donor_p*/true,/*copy_acceptor_p*/true,first_read_p,
/*sensedir*/SENSE_FORWARD,/*sarrayp*/false));
(*ndistantsplicepairs)++;
@@ -10274,10 +10377,10 @@ find_splicepairs_distant (int *found_score, int *ndistantsplicepairs,
distantsplicing = List_push(distantsplicing,
(void *) Stage3end_new_splice(&(*found_score),nmismatches1,nmismatches2,
donor,acceptor,distance,
- /*shortdistancep*/false,distantsplicing_penalty,querylength,
- /*amb_nmatches*/0,/*ambcoords_left*/NULL,/*ambcoords_right*/NULL,
- /*amb_knowni_left*/NULL,/*amb_knowni_right*/NULL,
- /*amb_nmismatches_left*/NULL,/*amb_nmismatches_right*/NULL,
+ /*shortdistancep*/false,distantsplicing_penalty,querylength,/*amb_nmatches*/0,
+ /*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
+ /*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
+ /*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
/*copy_donor_p*/true,/*copy_acceptor_p*/true,first_read_p,
/*sensedir*/SENSE_ANTI,/*sarrayp*/false));
(*ndistantsplicepairs)++;
@@ -10332,10 +10435,10 @@ find_splicepairs_distant (int *found_score, int *ndistantsplicepairs,
distantsplicing = List_push(distantsplicing,
(void *) Stage3end_new_splice(&(*found_score),nmismatches1,nmismatches2,
donor,acceptor,distance,
- /*shortdistancep*/false,distantsplicing_penalty,querylength,
- /*amb_nmatches*/0,/*ambcoords_left*/NULL,/*ambcoords_right*/NULL,
- /*amb_knowni_left*/NULL,/*amb_knowni_right*/NULL,
- /*amb_nmismatches_left*/NULL,/*amb_nmismatches_right*/NULL,
+ /*shortdistancep*/false,distantsplicing_penalty,querylength,/*amb_nmatches*/0,
+ /*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
+ /*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
+ /*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
/*copy_donor_p*/true,/*copy_acceptor_p*/true,first_read_p,
/*sensedir*/SENSE_ANTI,/*sarrayp*/false));
(*ndistantsplicepairs)++;
@@ -10371,9 +10474,9 @@ find_splicepairs_shortend (int *found_score, List_T hits,
List_T p;
Substring_T donor, acceptor;
#ifdef LARGE_GENOMES
- Uint8list_T ambcoords_left, ambcoords_right;
+ Uint8list_T ambcoords;
#else
- Uintlist_T ambcoords_left, ambcoords_right;
+ Uintlist_T ambcoords;
#endif
Intlist_T splicesites_i;
Intlist_T nmismatches_list;
@@ -10443,22 +10546,22 @@ find_splicepairs_shortend (int *found_score, List_T hits,
/*collect_all_p*/pairedp == true && first_read_p == true)) != NULL) {
if (endlength < min_shortend || Intlist_length(splicesites_i) > 1) {
- ambcoords_right = lookup_splicesites(splicesites_i,splicesites);
+ ambcoords = lookup_splicesites(splicesites_i,splicesites);
amb_nmatches = endlength - nmismatches_shortend;
debug4h(printf("End 1: short-overlap donor_plus: Successful ambiguous from donor #%d with amb_nmatches %d\n",
Substring_splicesites_knowni(donor),amb_nmatches));
hits = List_push(hits,(void *) Stage3end_new_splice(&(*found_score),nmismatches,nmismatches_shortend,
donor,/*acceptor*/NULL,/*distance*/0U,
- /*shortdistancep*/false,/*penalty*/0,querylength,
- amb_nmatches,/*ambcoords_left*/NULL,ambcoords_right,
- /*ambi_left*/NULL,/*ambi_right*/splicesites_i,
- /*amb_nmismatches_left*/NULL,nmismatches_list,
+ /*shortdistancep*/false,/*penalty*/0,querylength,amb_nmatches,
+ /*ambcoords_donor*/NULL,ambcoords,
+ /*ambi_donor*/NULL,/*ambi_acceptor*/splicesites_i,
+ /*amb_nmismatches_donor*/NULL,/*nmismatches_acceptor*/nmismatches_list,
/*copy_donor_p*/true,/*copy_acceptor_p*/false,first_read_p,
/*sensedir*/SENSE_FORWARD,/*sarrayp*/false));
#ifdef LARGE_GENOMES
- Uint8list_free(&ambcoords_right);
+ Uint8list_free(&ambcoords);
#else
- Uintlist_free(&ambcoords_right);
+ Uintlist_free(&ambcoords);
#endif
} else {
bestj = Intlist_head(splicesites_i);
@@ -10473,10 +10576,10 @@ find_splicepairs_shortend (int *found_score, List_T hits,
Substring_splicesites_knowni(donor),Substring_splicesites_knowni(acceptor)));
hits = List_push(hits,(void *) Stage3end_new_splice(&(*found_score),nmismatches,nmismatches_shortend,
donor,acceptor,/*distance*/bestleft-origleft,
- /*shortdistancep*/true,localsplicing_penalty,querylength,
- /*amb_nmatches*/0,/*ambcoords_left*/NULL,/*ambcoords_right*/NULL,
- /*amb_knowni_left*/NULL,/*amb_knowni_right*/NULL,
- /*amb_nmismatches_left*/NULL,/*amb_nmismatches_right*/NULL,
+ /*shortdistancep*/true,localsplicing_penalty,querylength,/*amb_nmatches*/0,
+ /*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
+ /*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
+ /*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
/*copy_donor_p*/true,/*copy_acceptor_p*/false,first_read_p,
/*sensedir*/SENSE_FORWARD,/*sarrayp*/false));
}
@@ -10516,22 +10619,22 @@ find_splicepairs_shortend (int *found_score, List_T hits,
/*collect_all_p*/pairedp == true && first_read_p == false)) != NULL) {
if (endlength < min_shortend || Intlist_length(splicesites_i) > 1) {
- ambcoords_left = lookup_splicesites(splicesites_i,splicesites);
+ ambcoords = lookup_splicesites(splicesites_i,splicesites);
amb_nmatches = endlength - nmismatches_shortend;
debug4h(printf("End 2: short-overlap acceptor_plus: Successful ambiguous from acceptor #%d with amb_nmatches %d\n",
Substring_splicesites_knowni(acceptor),amb_nmatches));
hits = List_push(hits,(void *) Stage3end_new_splice(&(*found_score),nmismatches_shortend,nmismatches,
/*donor*/NULL,acceptor,/*distance*/0U,
- /*shortdistancep*/false,/*penalty*/0,querylength,
- amb_nmatches,ambcoords_left,/*ambcoords_right*/NULL,
- /*amb_knowni_left*/splicesites_i,/*amb_knowni_right*/NULL,
- nmismatches_list,/*amb_nmismatches_right*/NULL,
+ /*shortdistancep*/false,/*penalty*/0,querylength,amb_nmatches,
+ ambcoords,/*ambcoords_acceptor*/NULL,
+ /*amb_knowni_donor*/splicesites_i,/*amb_knowni_acceptor*/NULL,
+ /*amb_nmismatches_donor*/nmismatches_list,/*amb_nmismatches_acceptor*/NULL,
/*copy_donor_p*/false,/*copy_acceptor_p*/true,first_read_p,
/*sensedir*/SENSE_FORWARD,/*sarrayp*/false));
#ifdef LARGE_GENOMES
- Uint8list_free(&ambcoords_left);
+ Uint8list_free(&ambcoords);
#else
- Uintlist_free(&ambcoords_left);
+ Uintlist_free(&ambcoords);
#endif
} else {
bestj = Intlist_head(splicesites_i);
@@ -10546,10 +10649,10 @@ find_splicepairs_shortend (int *found_score, List_T hits,
Substring_splicesites_knowni(acceptor),Substring_splicesites_knowni(donor)));
hits = List_push(hits,(void *) Stage3end_new_splice(&(*found_score),nmismatches_shortend,nmismatches,
donor,acceptor,/*distance*/origleft-bestleft,
- /*shortdistancep*/true,localsplicing_penalty,querylength,
- /*amb_nmatches*/0,/*ambcoords_left*/NULL,/*ambcoords_right*/NULL,
- /*amb_knowni_left*/NULL,/*amb_knowni_right*/NULL,
- /*amb_nmismatches_left*/NULL,/*amb_nmismatches_right*/NULL,
+ /*shortdistancep*/true,localsplicing_penalty,querylength,/*amb_nmatches*/0,
+ /*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
+ /*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
+ /*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
/*copy_donor_p*/false,/*copy_acceptor_p*/true,first_read_p,
/*sensedir*/SENSE_FORWARD,/*sarrayp*/false));
}
@@ -10589,22 +10692,22 @@ find_splicepairs_shortend (int *found_score, List_T hits,
/*collect_all_p*/pairedp == true && first_read_p == true)) != NULL) {
if (endlength < min_shortend || Intlist_length(splicesites_i) > 1) {
- ambcoords_left = lookup_splicesites(splicesites_i,splicesites);
+ ambcoords = lookup_splicesites(splicesites_i,splicesites);
amb_nmatches = endlength - nmismatches_shortend;
debug4h(printf("End 3: short-overlap donor_minus: Successful ambiguous from donor #%d with amb_nmatches %d\n",
Substring_splicesites_knowni(donor),amb_nmatches));
hits = List_push(hits,(void *) Stage3end_new_splice(&(*found_score),nmismatches,nmismatches_shortend,
donor,/*acceptor*/NULL,/*distance*/0U,
- /*shortdistancep*/false,/*penalty*/0,querylength,
- amb_nmatches,ambcoords_left,/*ambcoords_right*/NULL,
- /*amb_knowni_left*/splicesites_i,/*amb_knowni_right*/NULL,
- nmismatches_list,/*amb_nmismatches_right*/NULL,
+ /*shortdistancep*/false,/*penalty*/0,querylength,amb_nmatches,
+ /*ambcoords_donor*/NULL,ambcoords,
+ /*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/splicesites_i,
+ /*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/nmismatches_list,
/*copy_donor_p*/true,/*copy_acceptor_p*/false,first_read_p,
/*sensedir*/SENSE_FORWARD,/*sarrayp*/false));
#ifdef LARGE_GENOMES
- Uint8list_free(&ambcoords_left);
+ Uint8list_free(&ambcoords);
#else
- Uintlist_free(&ambcoords_left);
+ Uintlist_free(&ambcoords);
#endif
} else {
bestj = Intlist_head(splicesites_i);
@@ -10619,10 +10722,10 @@ find_splicepairs_shortend (int *found_score, List_T hits,
Substring_splicesites_knowni(donor),Substring_splicesites_knowni(acceptor)));
hits = List_push(hits,(void *) Stage3end_new_splice(&(*found_score),nmismatches,nmismatches_shortend,
donor,acceptor,/*distance*/origleft-bestleft,
- /*shortdistancep*/true,localsplicing_penalty,querylength,
- /*amb_nmatches*/0,/*ambcoords_left*/NULL,/*ambcoords_right*/NULL,
- /*amb_knowni_left*/NULL,/*amb_knowni_right*/NULL,
- /*amb_nmismatches_left*/NULL,/*amb_nmismatches_right*/NULL,
+ /*shortdistancep*/true,localsplicing_penalty,querylength,/*amb_nmatches*/0,
+ /*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
+ /*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
+ /*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
/*copy_donor_p*/true,/*copy_acceptor_p*/false,first_read_p,
/*sensedir*/SENSE_FORWARD,/*sarrayp*/false));
}
@@ -10663,22 +10766,22 @@ find_splicepairs_shortend (int *found_score, List_T hits,
/*collect_all_p*/pairedp == true && first_read_p == false)) != NULL) {
if (endlength < min_shortend || Intlist_length(splicesites_i) > 1) {
- ambcoords_right = lookup_splicesites(splicesites_i,splicesites);
+ ambcoords = lookup_splicesites(splicesites_i,splicesites);
amb_nmatches = endlength - nmismatches_shortend;
debug4h(printf("End 4: short-overlap acceptor_minus: Successful ambiguous from acceptor #%d with amb_nmatches %d\n",
Substring_splicesites_knowni(acceptor),amb_nmatches));
hits = List_push(hits,(void *) Stage3end_new_splice(&(*found_score),nmismatches_shortend,nmismatches,
/*donor*/NULL,acceptor,/*distance*/0U,
- /*shortdistancep*/false,/*penalty*/0,querylength,
- amb_nmatches,/*ambcoords_left*/NULL,ambcoords_right,
- /*amb_knowni_left*/NULL,/*amb_knowni_right*/splicesites_i,
- /*amb_nmismatches_left*/NULL,nmismatches_list,
+ /*shortdistancep*/false,/*penalty*/0,querylength,amb_nmatches,
+ ambcoords,/*ambcoords_acceptor*/NULL,
+ /*amb_knowni_donor*/splicesites_i,/*amb_knowni_acceptor*/NULL,
+ /*amb_nmismatches_donor*/nmismatches_list,/*amb_nmismatches_acceptor*/NULL,
/*copy_donor_p*/false,/*copy_acceptor_p*/true,first_read_p,
/*sensedir*/SENSE_FORWARD,/*sarrayp*/false));
#ifdef LARGE_GENOMES
- Uint8list_free(&ambcoords_right);
+ Uint8list_free(&ambcoords);
#else
- Uintlist_free(&ambcoords_right);
+ Uintlist_free(&ambcoords);
#endif
} else {
bestj = Intlist_head(splicesites_i);
@@ -10693,10 +10796,10 @@ find_splicepairs_shortend (int *found_score, List_T hits,
Substring_splicesites_knowni(acceptor),Substring_splicesites_knowni(donor)));
hits = List_push(hits,(void *) Stage3end_new_splice(&(*found_score),nmismatches_shortend,nmismatches,
donor,acceptor,/*distance*/bestleft-origleft,
- /*shortdistancep*/true,localsplicing_penalty,querylength,
- /*amb_nmatches*/0,/*ambcoords_left*/NULL,/*ambcoords_right*/NULL,
- /*amb_knowni_left*/NULL,/*amb_knowni_right*/NULL,
- /*amb_nmismatches_left*/NULL,/*amb_nmismatches_right*/NULL,
+ /*shortdistancep*/true,localsplicing_penalty,querylength,/*amb_nmatches*/0,
+ /*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
+ /*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
+ /*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
/*copy_donor_p*/false,/*copy_acceptor_p*/true,first_read_p,
/*sensedir*/SENSE_FORWARD,/*sarrayp*/false));
}
@@ -10736,22 +10839,22 @@ find_splicepairs_shortend (int *found_score, List_T hits,
/*collect_all_p*/pairedp == true && first_read_p == false)) != NULL) {
if (endlength < min_shortend || Intlist_length(splicesites_i) > 1) {
- ambcoords_left = lookup_splicesites(splicesites_i,splicesites);
+ ambcoords = lookup_splicesites(splicesites_i,splicesites);
amb_nmatches = endlength - nmismatches_shortend;
debug4h(printf("End 5: short-overlap antidonor_plus: Successful ambiguous from antidonor #%d with amb_nmatches %d\n",
Substring_splicesites_knowni(donor),amb_nmatches));
hits = List_push(hits,(void *) Stage3end_new_splice(&(*found_score),nmismatches,nmismatches_shortend,
donor,/*acceptor*/NULL,/*distance*/0U,
- /*shortdistancep*/false,/*penalty*/0,querylength,
- amb_nmatches,ambcoords_left,/*ambcoords_right*/NULL,
- /*amb_knowni_left*/splicesites_i,/*amb_knowni_right*/NULL,
- nmismatches_list,/*amb_nmismatches_right*/NULL,
+ /*shortdistancep*/false,/*penalty*/0,querylength,amb_nmatches,
+ /*ambcoords_donor*/NULL,ambcoords,
+ /*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/splicesites_i,
+ /*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/nmismatches_list,
/*copy_donor_p*/true,/*copy_acceptor_p*/false,first_read_p,
/*sensedir*/SENSE_ANTI,/*sarrayp*/false));
#ifdef LARGE_GENOMES
- Uint8list_free(&ambcoords_left);
+ Uint8list_free(&ambcoords);
#else
- Uintlist_free(&ambcoords_left);
+ Uintlist_free(&ambcoords);
#endif
} else {
bestj = Intlist_head(splicesites_i);
@@ -10766,10 +10869,10 @@ find_splicepairs_shortend (int *found_score, List_T hits,
Substring_splicesites_knowni(donor),Substring_splicesites_knowni(acceptor)));
hits = List_push(hits,(void *) Stage3end_new_splice(&(*found_score),nmismatches,nmismatches_shortend,
donor,acceptor,/*distance*/origleft-bestleft,
- /*shortdistancep*/true,localsplicing_penalty,querylength,
- /*amb_nmatches*/0,/*ambcoords_left*/NULL,/*ambcoords_right*/NULL,
- /*amb_knowni_left*/NULL,/*amb_knowni_right*/NULL,
- /*amb_nmismatches_left*/NULL,/*amb_nmismatches_right*/NULL,
+ /*shortdistancep*/true,localsplicing_penalty,querylength,/*amb_nmatches*/0,
+ /*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
+ /*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
+ /*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
/*copy_donor_p*/true,/*copy_acceptor_p*/false,first_read_p,
/*sensedir*/SENSE_ANTI,/*sarrayp*/false));
}
@@ -10810,22 +10913,22 @@ find_splicepairs_shortend (int *found_score, List_T hits,
/*collect_all_p*/pairedp == true && first_read_p == true)) != NULL) {
if (endlength < min_shortend || Intlist_length(splicesites_i) > 1) {
- ambcoords_right = lookup_splicesites(splicesites_i,splicesites);
+ ambcoords = lookup_splicesites(splicesites_i,splicesites);
amb_nmatches = endlength - nmismatches_shortend;
debug4h(printf("End 6: short-overlap antiacceptor_plus: Successful ambiguous from antiacceptor #%d with amb_nmatches %d\n",
Substring_splicesites_knowni(acceptor),amb_nmatches));
hits = List_push(hits,(void *) Stage3end_new_splice(&(*found_score),nmismatches_shortend,nmismatches,
/*donor*/NULL,acceptor,/*distance*/0U,
- /*shortdistancep*/false,/*penalty*/0,querylength,
- amb_nmatches,/*ambcoords_left*/NULL,ambcoords_right,
- /*amb_knowni_left*/NULL,/*amb_knowni_right*/splicesites_i,
- /*amb_nmismatches_left*/NULL,nmismatches_list,
+ /*shortdistancep*/false,/*penalty*/0,querylength,amb_nmatches,
+ ambcoords,/*ambcoords_acceptor*/NULL,
+ /*amb_knowni_donor*/splicesites_i,/*amb_knowni_acceptor*/NULL,
+ /*amb_nmismatches_donor*/nmismatches_list,/*amb_nmismatches_acceptor*/NULL,
/*copy_donor_p*/false,/*copy_acceptor_p*/true,first_read_p,
/*sensedir*/SENSE_ANTI,/*sarrayp*/false));
#ifdef LARGE_GENOMES
- Uint8list_free(&ambcoords_right);
+ Uint8list_free(&ambcoords);
#else
- Uintlist_free(&ambcoords_right);
+ Uintlist_free(&ambcoords);
#endif
} else {
bestj = Intlist_head(splicesites_i);
@@ -10840,10 +10943,10 @@ find_splicepairs_shortend (int *found_score, List_T hits,
Substring_splicesites_knowni(acceptor),Substring_splicesites_knowni(donor)));
hits = List_push(hits,(void *) Stage3end_new_splice(&(*found_score),nmismatches_shortend,nmismatches,
donor,acceptor,/*distance*/bestleft-origleft,
- /*shortdistancep*/true,localsplicing_penalty,querylength,
- /*amb_nmatches*/0,/*ambcoords_left*/NULL,/*ambcoords_right*/NULL,
- /*amb_knowni_left*/NULL,/*amb_knowni_right*/NULL,
- /*amb_nmismatches_left*/NULL,/*amb_nmismatches_right*/NULL,
+ /*shortdistancep*/true,localsplicing_penalty,querylength,/*amb_nmatches*/0,
+ /*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
+ /*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
+ /*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
/*copy_donor_p*/false,/*copy_acceptor_p*/true,first_read_p,
/*sensedir*/SENSE_ANTI,/*sarrayp*/false));
}
@@ -10884,22 +10987,22 @@ find_splicepairs_shortend (int *found_score, List_T hits,
/*collect_all_p*/pairedp == true && first_read_p == false)) != NULL) {
if (endlength < min_shortend || Intlist_length(splicesites_i) > 1) {
- ambcoords_right = lookup_splicesites(splicesites_i,splicesites);
+ ambcoords = lookup_splicesites(splicesites_i,splicesites);
amb_nmatches = endlength - nmismatches_shortend;
debug4h(printf("End 7: short-overlap antidonor_minus: Successful ambiguous from antidonor #%d with amb_nmatches %d\n",
Substring_splicesites_knowni(donor),amb_nmatches));
hits = List_push(hits,(void *) Stage3end_new_splice(&(*found_score),nmismatches,nmismatches_shortend,
donor,/*acceptor*/NULL,/*distance*/0U,
- /*shortdistancep*/false,/*penalty*/0,querylength,
- amb_nmatches,/*ambcoords_left*/NULL,ambcoords_right,
- /*amb_knowni_left*/NULL,/*amb_knowni_right*/splicesites_i,
- /*amb_nmismatches_left*/NULL,nmismatches_list,
+ /*shortdistancep*/false,/*penalty*/0,querylength,amb_nmatches,
+ /*ambcoords_donor*/NULL,ambcoords,
+ /*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/splicesites_i,
+ /*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/nmismatches_list,
/*copy_donor_p*/true,/*copy_acceptor_p*/false,first_read_p,
/*sensedir*/SENSE_ANTI,/*sarrayp*/false));
#ifdef LARGE_GENOMES
- Uint8list_free(&ambcoords_right);
+ Uint8list_free(&ambcoords);
#else
- Uintlist_free(&ambcoords_right);
+ Uintlist_free(&ambcoords);
#endif
} else {
bestj = Intlist_head(splicesites_i);
@@ -10914,10 +11017,10 @@ find_splicepairs_shortend (int *found_score, List_T hits,
Substring_splicesites_knowni(donor),Substring_splicesites_knowni(acceptor)));
hits = List_push(hits,(void *) Stage3end_new_splice(&(*found_score),nmismatches,nmismatches_shortend,
donor,acceptor,/*distance*/bestleft-origleft,
- /*shortdistancep*/true,localsplicing_penalty,querylength,
- /*amb_nmatches*/0,/*ambcoords_left*/NULL,/*ambcoords_right*/NULL,
- /*amb_knowni_left*/NULL,/*amb_knowni_right*/NULL,
- /*amb_nmismatches_left*/NULL,/*amb_nmismatches_right*/NULL,
+ /*shortdistancep*/true,localsplicing_penalty,querylength,/*amb_nmatches*/0,
+ /*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
+ /*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
+ /*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
/*copy_donor_p*/true,/*copy_acceptor_p*/false,first_read_p,
/*sensedir*/SENSE_ANTI,/*sarrayp*/false));
}
@@ -10957,22 +11060,22 @@ find_splicepairs_shortend (int *found_score, List_T hits,
/*collect_all_p*/pairedp == true && first_read_p == true)) != NULL) {
if (endlength < min_shortend || Intlist_length(splicesites_i) > 1) {
- ambcoords_left = lookup_splicesites(splicesites_i,splicesites);
+ ambcoords = lookup_splicesites(splicesites_i,splicesites);
amb_nmatches = endlength - nmismatches_shortend;
debug4h(printf("End 8: short-overlap antiacceptor_minus: Successful ambiguous from antiacceptor #%d with amb_nmatches %d\n",
Substring_splicesites_knowni(acceptor),amb_nmatches));
hits = List_push(hits,(void *) Stage3end_new_splice(&(*found_score),nmismatches_shortend,nmismatches,
/*donor*/NULL,acceptor,/*distance*/0U,
- /*shortdistancep*/false,/*penalty*/0,querylength,
- amb_nmatches,ambcoords_left,/*ambcoords_right*/NULL,
- /*amb_knowni_left*/splicesites_i,/*amb_knowni_right*/NULL,
- nmismatches_list,/*amb_nmismatches_right*/NULL,
+ /*shortdistancep*/false,/*penalty*/0,querylength,amb_nmatches,
+ ambcoords,/*ambcoords_acceptor*/NULL,
+ /*amb_knowni_donor*/splicesites_i,/*amb_knowni_acceptor*/NULL,
+ /*amb_nmismatches_donor*/nmismatches_list,/*amb_nmismatches_acceptor*/NULL,
/*copy_donor_p*/false,/*copy_acceptor_p*/true,first_read_p,
/*sensedir*/SENSE_ANTI,/*sarrayp*/false));
#ifdef LARGE_GENOMES
- Uint8list_free(&ambcoords_left);
+ Uint8list_free(&ambcoords);
#else
- Uintlist_free(&ambcoords_left);
+ Uintlist_free(&ambcoords);
#endif
} else {
bestj = Intlist_head(splicesites_i);
@@ -10987,10 +11090,10 @@ find_splicepairs_shortend (int *found_score, List_T hits,
Substring_splicesites_knowni(acceptor),Substring_splicesites_knowni(donor)));
hits = List_push(hits,(void *) Stage3end_new_splice(&(*found_score),nmismatches_shortend,nmismatches,
donor,acceptor,/*distance*/origleft-bestleft,
- /*shortdistancep*/true,localsplicing_penalty,querylength,
- /*amb_nmatches*/0,/*ambcoords_left*/NULL,/*ambcoords_right*/NULL,
- /*amb_knowni_left*/NULL,/*amb_knowni_right*/NULL,
- /*amb_nmismatches_left*/NULL,/*amb_nmismatches_right*/NULL,
+ /*shortdistancep*/true,localsplicing_penalty,querylength,/*amb_nmatches*/0,
+ /*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
+ /*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
+ /*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
/*copy_donor_p*/false,/*copy_acceptor_p*/true,first_read_p,
/*sensedir*/SENSE_ANTI,/*sarrayp*/false));
}
@@ -12536,21 +12639,11 @@ align_end (int *cutoff_level, History_T gmap_history, T this,
Indexdb_T plus_indexdb, minus_indexdb;
if (genestrand == +2) {
- if (first_read_p == false) {
- plus_indexdb = indexdb_fwd;
- minus_indexdb = indexdb_rev;
- } else {
- plus_indexdb = indexdb_rev;
- minus_indexdb = indexdb_fwd;
- }
+ plus_indexdb = indexdb_rev;
+ minus_indexdb = indexdb_fwd;
} else {
- if (first_read_p == true) {
- plus_indexdb = indexdb_fwd;
- minus_indexdb = indexdb_rev;
- } else {
- plus_indexdb = indexdb_rev;
- minus_indexdb = indexdb_fwd;
- }
+ plus_indexdb = indexdb_fwd;
+ minus_indexdb = indexdb_rev;
}
found_score = querylength;
@@ -14809,17 +14902,16 @@ align_pair (bool *abort_pairing_p, int *found_score, int *cutoff_level_5, int *c
if (genestrand == +2) {
plus_indexdb_5 = indexdb_rev;
- plus_indexdb_3 = indexdb_fwd;
+ plus_indexdb_3 = indexdb_rev;
minus_indexdb_5 = indexdb_fwd;
- minus_indexdb_3 = indexdb_rev;
+ minus_indexdb_3 = indexdb_fwd;
} else {
plus_indexdb_5 = indexdb_fwd;
- plus_indexdb_3 = indexdb_rev;
+ plus_indexdb_3 = indexdb_fwd;
minus_indexdb_5 = indexdb_rev;
- minus_indexdb_3 = indexdb_fwd;
+ minus_indexdb_3 = indexdb_rev;
}
-
*samechr = (List_T) NULL;
*conc_transloc = (List_T) NULL;
*with_terminal = (List_T) NULL;
@@ -17813,7 +17905,7 @@ paired_read_tolerant_nonstranded (int *npaths, int *first_absmq, int *second_abs
hits_geneplus_3 = align_end(&cutoff_level_3,gmap_history_3,this_geneplus_3,
query3_compress_fwd,query3_compress_rev,
queryuc_ptr_3,queryrc3,querylength3,query3_lastpos,
- indexdb_fwd,indexdb_rev,indexdb_size_threshold,
+ indexdb_fwd,indexdb_fwd,indexdb_size_threshold,
floors_array,oligoindices_major,oligoindices_minor,
pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,
user_maxlevel_3,subopt_levels,
diff --git a/src/stage3.c b/src/stage3.c
index a578c6d..dfae4ce 100644
--- a/src/stage3.c
+++ b/src/stage3.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: stage3.c 138109 2014-06-04 19:33:33Z twu $";
+static char rcsid[] = "$Id: stage3.c 140368 2014-07-02 00:56:33Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -4481,7 +4481,7 @@ Stage3_recompute_coverage (List_T stage3list, Sequence_T queryseq) {
static List_T
pick_cdna_direction (int *winning_cdna_direction, int *sensedir,
- List_T pairs_fwd, List_T pairs_rev,
+ List_T pairs_fwd, List_T pairs_rev, double defect_rate_fwd, double defect_rate_rev,
int ncanonical_fwd, int nsemicanonical_fwd,
int nnoncanonical_fwd, int nbadintrons_fwd,
int ncanonical_rev, int nsemicanonical_rev,
@@ -4549,17 +4549,21 @@ pick_cdna_direction (int *winning_cdna_direction, int *sensedir,
*winning_cdna_direction = -1;
#endif
- } else if (avg_donor_score_fwd > 0.9 && avg_acceptor_score_fwd > 0.9 &&
+ } else if (defect_rate_fwd > DEFECT_MEDQ && defect_rate_rev > MEDQ &&
+ avg_donor_score_fwd > 0.9 && avg_acceptor_score_fwd > 0.9 &&
(avg_donor_score_rev < 0.5 || avg_acceptor_score_rev < 0.5)) {
- debug11(printf("intronscores fwd %f,%f > intronscores rev %f,%f, so fwd wins\n",
- avg_donor_score_fwd,avg_acceptor_score_fwd,avg_donor_score_rev,avg_acceptor_score_rev));
+ debug11(printf("defect_rate %f, %f and intronscores fwd %f,%f > intronscores rev %f,%f, so fwd wins\n",
+ defect_rate_fwd,defect_rate_rev,avg_donor_score_fwd,avg_acceptor_score_fwd,
+ avg_donor_score_rev,avg_acceptor_score_rev));
/* intronscores reveal a clear sensedir */
*winning_cdna_direction = +1;
- } else if (avg_donor_score_rev > 0.9 && avg_acceptor_score_rev > 0.9 &&
+ } else if (defect_rate_fwd > DEFECT_MEDQ && defect_rate_rev > MEDQ &&
+ avg_donor_score_rev > 0.9 && avg_acceptor_score_rev > 0.9 &&
(avg_donor_score_fwd < 0.5 || avg_acceptor_score_fwd < 0.5)) {
- debug11(printf("intronscores rev %f,%f > intronscores fwd %f,%f, so fwd wins\n",
- avg_donor_score_rev,avg_acceptor_score_rev,avg_donor_score_fwd,avg_acceptor_score_fwd));
+ debug11(printf("defect_rate %f, %f and intronscores rev %f,%f > intronscores fwd %f,%f, so fwd wins\n",
+ defect_rate_fwd,defect_rate_rev,avg_donor_score_rev,avg_acceptor_score_rev,
+ avg_donor_score_fwd,avg_acceptor_score_fwd));
/* intronscores reveal a clear sensedir */
*winning_cdna_direction = -1;
@@ -7322,6 +7326,7 @@ traverse_genome_gap (bool *filledp, bool *shiftp, int *dynprogindex_minor, int *
debug(Pair_dump_list(micropairs,/*zerobasedp*/true));
debug(printf("\n"));
+#if 0
if (1 || (nindels == 0 && nmismatches < 4)) {
/* Have a higher standard */
if (prob2 >= 0.95 && prob3 >= 0.95) {
@@ -7343,6 +7348,14 @@ traverse_genome_gap (bool *filledp, bool *shiftp, int *dynprogindex_minor, int *
pairs = Pairpool_transfer(pairs,gappairs);
}
}
+#else
+ /* Just transfer */
+ debug(printf("Transferring microexon pairs\n"));
+ pairs = Pairpool_transfer(pairs,micropairs);
+ introntype = microintrontype;
+ *shiftp = true;
+#endif
+
}
#endif
}
@@ -9312,13 +9325,13 @@ score_alignment (int *nmatches, int *nmismatches, int *nindels,
#ifdef COMPLEX_DIRECTION
int *indel_alignment_score,
#endif
- int *ncanonical, int *nsemicanonical, int *nnoncanonical,
- List_T pairs, int cdna_direction) {
+ int *nsemicanonical, int *nnoncanonical, List_T pairs, int cdna_direction) {
+ int ncanonical; /* Do not return this; use score_introns instead */
int nunknowns, qopens, qindels, topens, tindels;
double min_splice_prob;
Pair_fracidentity(&(*nmatches),&nunknowns,&(*nmismatches),&qopens,&qindels,&topens,&tindels,
- &(*ncanonical),&(*nsemicanonical),&(*nnoncanonical),&min_splice_prob,
+ &ncanonical,&(*nsemicanonical),&(*nnoncanonical),&min_splice_prob,
pairs,cdna_direction);
debug11(printf("%d matches, %d nmismatches, %d+%d qgaps, %d+%d tgaps => alignment_score is %d\n",
*nmatches,*nmismatches,qopens,qindels,topens,tindels,
@@ -9338,8 +9351,8 @@ score_alignment (int *nmatches, int *nmismatches, int *nindels,
static List_T
-score_introns (double *max_intron_score, double *avg_donor_score, double *avg_acceptor_score, int *nbadintrons,
- List_T path, int cdna_direction, bool watsonp,
+score_introns (double *max_intron_score, double *avg_donor_score, double *avg_acceptor_score,
+ int *ncanonical, int *nbadintrons, List_T path, int cdna_direction, bool watsonp,
Chrnum_T chrnum, Univcoord_T chroffset, Univcoord_T chrhigh,
#ifdef WASTE
Pairpool_T pairpool,
@@ -9364,7 +9377,7 @@ score_introns (double *max_intron_score, double *avg_donor_score, double *avg_ac
minintronlen = MININTRONLEN_FINAL;
*max_intron_score = *avg_donor_score = *avg_acceptor_score = 0.0;
- *nbadintrons = 0;
+ *ncanonical = *nbadintrons = 0;
total_matches = total_denominator = 0;
for (p = path; p != NULL; p = p->rest) {
@@ -9429,9 +9442,9 @@ score_introns (double *max_intron_score, double *avg_donor_score, double *avg_ac
if (theta > 1.0) {
theta = 1.0;
}
- debug11(printf("intron_matches %d, intron_denominator %d, theta %f => pvalue %g\n",
+ debug11(printf("right neighborhood: intron_matches %d, intron_denominator %d, theta %f => pvalue %g\n",
intron_matches,intron_denominator,theta,Pbinom(intron_matches,intron_denominator,theta)));
- if (Pbinom(intron_matches,intron_denominator,theta) < 1e-3) {
+ if (Pbinom(intron_matches,intron_denominator,theta) < 0.9) { /* was 1e-3 */
/* Not a good intron */
/* *nbadintrons += 1; */
@@ -9450,9 +9463,9 @@ score_introns (double *max_intron_score, double *avg_donor_score, double *avg_ac
if (theta > 1.0) {
theta = 1.0;
}
- debug11(printf("intron_matches %d, intron_denominator %d, theta %f => pvalue %g\n",
+ debug11(printf("left neighborhood: intron_matches %d, intron_denominator %d, theta %f => pvalue %g\n",
intron_matches,intron_denominator,theta,Pbinom(intron_matches,intron_denominator,theta)));
- if (Pbinom(intron_matches,intron_denominator,theta) < 1e-3) {
+ if (Pbinom(intron_matches,intron_denominator,theta) < 0.9) { /* was 1e-3 */
/* Not a good intron */
/* *nbadintrons += 1; */
@@ -9512,7 +9525,10 @@ score_introns (double *max_intron_score, double *avg_donor_score, double *avg_ac
nintrons += 1;
if (pair->knowngapp == true) {
/* Skip */
- } else if (pair->comp == FWD_CANONICAL_INTRON_COMP && (donor_score < 0.9 && acceptor_score < 0.9)) {
+ *ncanonical += 1;
+ } else if (pair->comp == FWD_CANONICAL_INTRON_COMP) {
+ *ncanonical += 1;
+ } else if (donor_score < 0.9 && acceptor_score < 0.9) {
*nbadintrons = 1;
}
*avg_donor_score += donor_score;
@@ -9582,7 +9598,10 @@ score_introns (double *max_intron_score, double *avg_donor_score, double *avg_ac
nintrons += 1;
if (pair->knowngapp == true) {
/* Skip */
- } else if (pair->comp == REV_CANONICAL_INTRON_COMP && (donor_score < 0.9 && acceptor_score < 0.9)) {
+ *ncanonical += 1;
+ } else if (pair->comp == REV_CANONICAL_INTRON_COMP) {
+ *ncanonical += 1;
+ } else if (donor_score < 0.9 && acceptor_score < 0.9) {
*nbadintrons += 1;
}
*avg_donor_score += donor_score;
@@ -9661,50 +9680,46 @@ end_compare (List_T x, List_T y, int cdna_direction, bool watsonp,
path1 = List_reverse(pairs1);
debug11(printf("Calling score_introns for end_compare on path1\n"));
- pairs1 = score_introns(&max_intron_score,&avg_donor_score_1,&avg_acceptor_score_1,&nbadintrons_1,
- path1,cdna_direction,watsonp,chrnum,chroffset,chrhigh,nullgap);
+ pairs1 = score_introns(&max_intron_score,&avg_donor_score_1,&avg_acceptor_score_1,&ncanonical_1,
+ &nbadintrons_1,path1,cdna_direction,watsonp,chrnum,chroffset,chrhigh,nullgap);
alignment_score_1 = score_alignment(&nmatches_1,&nmismatches_1,&nindels_1,
#ifdef COMPLEX_DIRECTION
&indel_alignment_score_1,
#endif
- &ncanonical_1,&nsemicanonical_1,&nnoncanonical_1,
- pairs1,cdna_direction);
+ &nsemicanonical_1,&nnoncanonical_1,pairs1,cdna_direction);
path2 = List_reverse(pairs2);
debug11(printf("Calling score_introns for end_compare on path2\n"));
- pairs2 = score_introns(&max_intron_score,&avg_donor_score_2,&avg_acceptor_score_2,&nbadintrons_2,
- path2,cdna_direction,watsonp,chrnum,chroffset,chrhigh,nullgap);
+ pairs2 = score_introns(&max_intron_score,&avg_donor_score_2,&avg_acceptor_score_2,&ncanonical_2,
+ &nbadintrons_2,path2,cdna_direction,watsonp,chrnum,chroffset,chrhigh,nullgap);
alignment_score_2 = score_alignment(&nmatches_2,&nmismatches_2,&nindels_2,
#ifdef COMPLEX_DIRECTION
&indel_alignment_score_2,
#endif
- &ncanonical_2,&nsemicanonical_2,&nnoncanonical_2,
- pairs2,cdna_direction);
+ &nsemicanonical_2,&nnoncanonical_2,pairs2,cdna_direction);
} else {
path1 = x;
path2 = y;
debug11(printf("Calling score_introns for end_compare on path1\n"));
- pairs1 = score_introns(&max_intron_score,&avg_donor_score_1,&avg_acceptor_score_1,&nbadintrons_1,
- path1,cdna_direction,watsonp,chrnum,chroffset,chrhigh,nullgap);
+ pairs1 = score_introns(&max_intron_score,&avg_donor_score_1,&avg_acceptor_score_1,&ncanonical_1,
+ &nbadintrons_1,path1,cdna_direction,watsonp,chrnum,chroffset,chrhigh,nullgap);
alignment_score_1 = score_alignment(&nmatches_1,&nmismatches_1,&nindels_1,
#ifdef COMPLEX_DIRECTION
&indel_alignment_score_1,
#endif
- &ncanonical_1,&nsemicanonical_1,&nnoncanonical_1,
- pairs1,cdna_direction);
+ &nsemicanonical_1,&nnoncanonical_1,pairs1,cdna_direction);
path1 = List_reverse(pairs1);
debug11(printf("Calling score_introns for end_compare on path2\n"));
- pairs2 = score_introns(&max_intron_score,&avg_donor_score_2,&avg_acceptor_score_2,&nbadintrons_2,
- path2,cdna_direction,watsonp,chrnum,chroffset,chrhigh,nullgap);
+ pairs2 = score_introns(&max_intron_score,&avg_donor_score_2,&avg_acceptor_score_2,&ncanonical_2,
+ &nbadintrons_2,path2,cdna_direction,watsonp,chrnum,chroffset,chrhigh,nullgap);
alignment_score_2 = score_alignment(&nmatches_2,&nmismatches_2,&nindels_2,
#ifdef COMPLEX_DIRECTION
&indel_alignment_score_2,
#endif
- &ncanonical_2,&nsemicanonical_2,&nnoncanonical_2,
- pairs2,cdna_direction);
+ &nsemicanonical_2,&nnoncanonical_2,pairs2,cdna_direction);
path2 = List_reverse(pairs2);
}
@@ -12015,8 +12030,8 @@ Stage3_compute (List_T *finalpairs, int *npairs, int *cdna_direction, int *sense
chrnum,chroffset,chrhigh,pairpool);
path_fwd = List_reverse(pairs_fwd);
debug11(printf("Calling score_introns for path_fwd after path_compute_dir\n"));
- pairs_fwd = score_introns(&max_intron_score_fwd,&avg_donor_score_fwd,&avg_acceptor_score_fwd,&nbadintrons_fwd,
- path_fwd,/*cdna_direction*/+1,watsonp,
+ pairs_fwd = score_introns(&max_intron_score_fwd,&avg_donor_score_fwd,&avg_acceptor_score_fwd,
+ &ncanonical_fwd,&nbadintrons_fwd,path_fwd,/*cdna_direction*/+1,watsonp,
chrnum,chroffset,chrhigh,
#ifdef WASTE
pairpool,
@@ -12027,8 +12042,8 @@ Stage3_compute (List_T *finalpairs, int *npairs, int *cdna_direction, int *sense
chrnum,chroffset,chrhigh,pairpool);
path_rev = List_reverse(pairs_rev);
debug11(printf("Calling score_introns for path_rev after path_compute_dir\n"));
- pairs_rev = score_introns(&max_intron_score_rev,&avg_donor_score_rev,&avg_acceptor_score_rev,&nbadintrons_rev,
- path_rev,/*cdna_direction*/-1,watsonp,
+ pairs_rev = score_introns(&max_intron_score_rev,&avg_donor_score_rev,&avg_acceptor_score_rev,
+ &ncanonical_rev,&nbadintrons_rev,path_rev,/*cdna_direction*/-1,watsonp,
chrnum,chroffset,chrhigh,
#ifdef WASTE
pairpool,
@@ -12467,8 +12482,8 @@ Stage3_compute (List_T *finalpairs, int *npairs, int *cdna_direction, int *sense
} else {
path_fwd = List_reverse(pairs_fwd);
debug11(printf("Calling score_introns for path_fwd before path_trim\n"));
- pairs_fwd = score_introns(&max_intron_score_fwd,&avg_donor_score_fwd,&avg_acceptor_score_fwd,&nbadintrons_fwd,
- path_fwd,/*cdna_direction*/+1,watsonp,
+ pairs_fwd = score_introns(&max_intron_score_fwd,&avg_donor_score_fwd,&avg_acceptor_score_fwd,
+ &ncanonical_fwd,&nbadintrons_fwd,path_fwd,/*cdna_direction*/+1,watsonp,
chrnum,chroffset,chrhigh,
#ifdef WASTE
pairpool,
@@ -12478,13 +12493,13 @@ Stage3_compute (List_T *finalpairs, int *npairs, int *cdna_direction, int *sense
#ifdef COMPLEX_DIRECTION
&indel_alignment_score_fwd,
#endif
- &ncanonical_fwd,&nsemicanonical_fwd,&nnoncanonical_fwd,
+ &nsemicanonical_fwd,&nnoncanonical_fwd,
pairs_fwd,/*cdna_direction*/+1);
path_rev = List_reverse(pairs_rev);
debug11(printf("Calling score_introns for path_rev before path_trim\n"));
- pairs_rev = score_introns(&max_intron_score_rev,&avg_donor_score_rev,&avg_acceptor_score_rev,&nbadintrons_rev,
- path_rev,/*cdna_direction*/-1,watsonp,
+ pairs_rev = score_introns(&max_intron_score_rev,&avg_donor_score_rev,&avg_acceptor_score_rev,
+ &ncanonical_rev,&nbadintrons_rev,path_rev,/*cdna_direction*/-1,watsonp,
chrnum,chroffset,chrhigh,
#ifdef WASTE
pairpool,
@@ -12494,10 +12509,11 @@ Stage3_compute (List_T *finalpairs, int *npairs, int *cdna_direction, int *sense
#ifdef COMPLEX_DIRECTION
&indel_alignment_score_rev,
#endif
- &ncanonical_rev,&nsemicanonical_rev,&nnoncanonical_rev,
+ &nsemicanonical_rev,&nnoncanonical_rev,
pairs_rev,/*cdna_direction*/-1);
pairs_pretrim = pick_cdna_direction(&(*cdna_direction),&(*sensedir),pairs_fwd,pairs_rev,
+ defect_rate_fwd,defect_rate_rev,
ncanonical_fwd,nsemicanonical_fwd,nnoncanonical_fwd,nbadintrons_fwd,
ncanonical_rev,nsemicanonical_rev,nnoncanonical_rev,nbadintrons_rev,
max_intron_score_fwd,avg_donor_score_fwd,avg_acceptor_score_fwd,
@@ -12790,6 +12806,7 @@ Stage3_extend_right (T this, int goal, int querylength,
bool mismatchp, protectedp;
int n_peeled_indels;
+ debug10(printf("Entered Stage3_extend_right with goal %d\n",goal));
debug10(printf("LEFT BEFORE FILL\n"));
debug10(Pair_dump_list(this->pairs,true));
debug10(printf("END_LEFT BEFORE FILL\n"));
@@ -12979,6 +12996,7 @@ Stage3_extend_left (T this, int goal,
bool mismatchp, protectedp;
int n_peeled_indels;
+ debug10(printf("Entered Stage3_extend_left with goal %d\n",goal));
debug10(printf("RIGHT BEFORE FILL\n"));
debug10(Pair_dump_list(this->pairs,true));
debug10(printf("END_RIGHT BEFORE FILL\n"));
@@ -13372,12 +13390,12 @@ recompute_for_cdna_direction (int *cdna_direction, List_T pairs, int genestrand,
nmatches_rev, nmismatches_rev, nindels_rev, ncanonical_rev, nsemicanonical_rev, nnoncanonical_rev, nbadintrons_rev;
int sensedir;
- double defect_rate;
+ double defect_rate_fwd, defect_rate_rev;
copy = Pairpool_copy(pairs,pairpool);
/* Compute fwd */
- path_fwd = path_compute_dir(&defect_rate,/*pairs*/copy,/*cdna_direction*/+1,watsonp,
+ path_fwd = path_compute_dir(&defect_rate_fwd,/*pairs*/copy,/*cdna_direction*/+1,watsonp,
genestrand,/*jump_late_p*/watsonp ? false : true,
#ifdef PMAP
queryaaseq_ptr,
@@ -13388,8 +13406,8 @@ recompute_for_cdna_direction (int *cdna_direction, List_T pairs, int genestrand,
oligoindices_minor,diagpool,cellpool,
sufflookback,nsufflookback,maxintronlen_bound,/*close_indels_mode*/+1,
paired_favor_mode,zero_offset);
- pairs_fwd = score_introns(&max_intron_score_fwd,&avg_donor_score_fwd,&avg_acceptor_score_fwd,&nbadintrons_fwd,
- path_fwd,/*cdna_direction*/+1,watsonp,
+ pairs_fwd = score_introns(&max_intron_score_fwd,&avg_donor_score_fwd,&avg_acceptor_score_fwd,
+ &ncanonical_rev,&nbadintrons_fwd,path_fwd,/*cdna_direction*/+1,watsonp,
chrnum,chroffset,chrhigh,
#ifdef WASTE
pairpool,
@@ -13399,12 +13417,12 @@ recompute_for_cdna_direction (int *cdna_direction, List_T pairs, int genestrand,
#ifdef COMPLEX_DIRECTION
&indel_alignment_score_fwd,
#endif
- &ncanonical_fwd,&nsemicanonical_fwd,&nnoncanonical_fwd,
+ &nsemicanonical_fwd,&nnoncanonical_fwd,
pairs_fwd,/*cdna_direction*/+1);
/* Compute rev */
- path_rev = path_compute_dir(&defect_rate,/*pairs*/pairs,/*cdna_direction*/-1,watsonp,
+ path_rev = path_compute_dir(&defect_rate_rev,/*pairs*/pairs,/*cdna_direction*/-1,watsonp,
genestrand,/*jump_late_p*/watsonp ? false : true,
#ifdef PMAP
queryaaseq_ptr,
@@ -13415,8 +13433,8 @@ recompute_for_cdna_direction (int *cdna_direction, List_T pairs, int genestrand,
oligoindices_minor,diagpool,cellpool,
sufflookback,nsufflookback,maxintronlen_bound,/*close_indels_mode*/+1,
paired_favor_mode,zero_offset);
- pairs_rev = score_introns(&max_intron_score_rev,&avg_donor_score_rev,&avg_acceptor_score_rev,&nbadintrons_rev,
- path_rev,/*cdna_direction*/-1,watsonp,
+ pairs_rev = score_introns(&max_intron_score_rev,&avg_donor_score_rev,&avg_acceptor_score_rev,
+ &ncanonical_rev,&nbadintrons_rev,path_rev,/*cdna_direction*/-1,watsonp,
chrnum,chroffset,chrhigh,
#ifdef WASTE
pairpool,
@@ -13426,10 +13444,11 @@ recompute_for_cdna_direction (int *cdna_direction, List_T pairs, int genestrand,
#ifdef COMPLEX_DIRECTION
&indel_alignment_score_rev,
#endif
- &ncanonical_rev,&nsemicanonical_rev,&nnoncanonical_rev,
+ &nsemicanonical_rev,&nnoncanonical_rev,
pairs_rev,/*cdna_direction*/-1);
pairs = pick_cdna_direction(&(*cdna_direction),&sensedir,pairs_fwd,pairs_rev,
+ defect_rate_fwd,defect_rate_rev,
ncanonical_fwd,nsemicanonical_fwd,nnoncanonical_fwd,nbadintrons_fwd,
ncanonical_rev,nsemicanonical_rev,nnoncanonical_rev,nbadintrons_rev,
max_intron_score_fwd,avg_donor_score_fwd,avg_acceptor_score_fwd,
diff --git a/src/stage3hr.c b/src/stage3hr.c
index ead301e..4b2ef6f 100644
--- a/src/stage3hr.c
+++ b/src/stage3hr.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: stage3hr.c 138745 2014-06-11 19:04:25Z twu $";
+static char rcsid[] = "$Id: stage3hr.c 140368 2014-07-02 00:56:33Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -401,7 +401,6 @@ Stage3hr_file_setup_all (FILE *fp_failedinput_1_in, FILE *fp_failedinput_2_in, F
-#ifdef DEBUG5
static char *
print_sense (int sense) {
if (sense == SENSE_NULL) {
@@ -414,7 +413,6 @@ print_sense (int sense) {
abort();
}
}
-#endif
@@ -463,7 +461,8 @@ struct T {
bool trim_right_splicep;
int penalties; /* Indel penalties */
- int score_eventrim; /* Temporary storage used by Stage3end_optimal_score */
+ int score_eventrim_start; /* Temporary storage used by Stage3end_optimal_score */
+ int score_eventrim_end; /* Temporary storage used by Stage3end_optimal_score */
Overlap_T gene_overlap;
long int tally;
@@ -478,8 +477,8 @@ struct T {
char *deletion; /* for deletions */
Chrpos_T distance; /* for splicing or shortexon (sum of two distances) */
- Chrpos_T acceptor_distance; /* for shortexon */
- Chrpos_T donor_distance; /* for shortexon */
+ Chrpos_T shortexonA_distance; /* for shortexon */
+ Chrpos_T shortexonD_distance; /* for shortexon */
int gmap_nindelbreaks;
int gmap_cdna_direction;
@@ -492,21 +491,33 @@ struct T {
int nchimera_known;
int nchimera_novel;
- int amb_nmatches_start; /* For splice, shortexon, and GMAP */
- int amb_nmatches_end; /* For splice, shortexon, and GMAP */
+ int start_amb_nmatches; /* For splice, shortexon, and GMAP */
+ int end_amb_nmatches; /* For splice, shortexon, and GMAP */
int amb_nmatches_donor; /* For shortexon only */
int amb_nmatches_acceptor; /* For shortexon only */
Endtype_T gmap_start_endtype; /* For GMAP, which has no substrings */
Endtype_T gmap_end_endtype; /* For GMAP, which has no substrings */
- Univcoord_T *start_ambcoords;
- Univcoord_T *end_ambcoords;
- int *start_amb_knowni;
- int *end_amb_knowni;
- int start_nambcoords;
- int end_nambcoords;
- int *start_amb_nmismatches;
- int *end_amb_nmismatches;
+ Univcoord_T *start_ambcoords; /* Pointer to either ambcoords_donor or ambcoords_acceptor */
+ Univcoord_T *end_ambcoords; /* Pointer to either ambcoords_donor or ambcoords_acceptor */
+ int start_nambcoords; /* Equal to either nambcoords_donor or nambcoords_acceptor */
+ int end_nambcoords; /* Equal to either nambcoords_donor or nambcoords_acceptor */
+ Univcoord_T *ambcoords_donor;
+ Univcoord_T *ambcoords_acceptor;
+ int nambcoords_donor;
+ int nambcoords_acceptor;
+
+
+ int *start_amb_knowni; /* Pointer to either amb_knowni_donor or amb_knowni_acceptor */
+ int *end_amb_knowni; /* Pointer to either amb_knowni_donor or amb_knowni_acceptor */
+ int *amb_knowni_donor;
+ int *amb_knowni_acceptor;
+
+ int *start_amb_nmismatches; /* Pointer to either amb_nmismatches_donor or amb_nmismatches_acceptor */
+ int *end_amb_nmismatches; /* Pointer to either amb_nmismatches_donor or amb_nmismatches_acceptor */
+ int *amb_nmismatches_donor;
+ int *amb_nmismatches_acceptor;
+
/* Single: substring1 */
/* Indel: substring1 + substring2 */
@@ -887,12 +898,12 @@ Stage3end_trim_right (T this) {
int
Stage3end_trim_left_raw (T this) {
- return this->trim_left + this->amb_nmatches_start;
+ return this->trim_left + this->start_amb_nmatches;
}
int
Stage3end_trim_right_raw (T this) {
- return this->trim_right + this->amb_nmatches_end;
+ return this->trim_right + this->end_amb_nmatches;
}
int
@@ -993,13 +1004,13 @@ Stage3end_distance (T this) {
}
Chrpos_T
-Stage3end_shortexon_acceptor_distance (T this) {
- return this->acceptor_distance;
+Stage3end_shortexonA_distance (T this) {
+ return this->shortexonA_distance;
}
Chrpos_T
-Stage3end_shortexon_donor_distance (T this) {
- return this->donor_distance;
+Stage3end_shortexonD_distance (T this) {
+ return this->shortexonD_distance;
}
double
@@ -1201,12 +1212,12 @@ Stage3end_end_ambiguous_p (T this) {
int
Stage3end_amb_nmatches_start (T this) {
- return this->amb_nmatches_start;
+ return this->start_amb_nmatches;
}
int
Stage3end_amb_nmatches_end (T this) {
- return this->amb_nmatches_end;
+ return this->end_amb_nmatches;
}
@@ -1553,12 +1564,12 @@ void
Stage3end_free (T *old) {
debug0(printf("Freeing Stage3end %p of type %s\n",*old,hittype_string((*old)->hittype)));
- FREE_OUT((*old)->end_ambcoords);
- FREE_OUT((*old)->start_ambcoords);
- FREE_OUT((*old)->end_amb_knowni);
- FREE_OUT((*old)->start_amb_knowni);
- FREE_OUT((*old)->end_amb_nmismatches);
- FREE_OUT((*old)->start_amb_nmismatches);
+ FREE_OUT((*old)->ambcoords_donor);
+ FREE_OUT((*old)->ambcoords_acceptor);
+ FREE_OUT((*old)->amb_knowni_donor);
+ FREE_OUT((*old)->amb_knowni_acceptor);
+ FREE_OUT((*old)->amb_nmismatches_donor);
+ FREE_OUT((*old)->amb_nmismatches_acceptor);
if ((*old)->deletion != NULL) {
FREE_OUT((*old)->deletion);
@@ -1696,11 +1707,11 @@ gmap5_substring3_overlap (Stage3end_T hit5, Stage3end_T hit3, Substring_T substr
if (hit5->pairarray[i].genomepos == Substring_alignstart_trim(substring) - chroffset) {
debug13(printf("case 1\n"));
return (hit5->pairarray[i].querypos - hit5->pairarray[0].querypos + 1) +
- hit3->querylength - Substring_querystart(substring) - hit3->trim_right - hit3->amb_nmatches_end;
+ hit3->querylength - Substring_querystart(substring) - hit3->trim_right - hit3->end_amb_nmatches;
} else if (hit5->pairarray[i].genomepos == Substring_alignend_trim(substring) - chroffset) {
debug13(printf("case 2\n"));
return (hit5->pairarray[i].querypos - hit5->pairarray[0].querypos + 1) +
- hit3->querylength - Substring_queryend(substring) - hit3->trim_right - hit3->amb_nmatches_end;
+ hit3->querylength - Substring_queryend(substring) - hit3->trim_right - hit3->end_amb_nmatches;
} else {
i++;
}
@@ -1715,13 +1726,13 @@ gmap5_substring3_overlap (Stage3end_T hit5, Stage3end_T hit3, Substring_T substr
hit5->pairarray[i].genomepos,hit5->pairarray[i].querypos,
hit3->trim_left,Substring_querystart(substring),Substring_queryend(substring),hit3->trim_right));
return (hit5->pairarray[i].querypos - hit5->pairarray[0].querypos + 1) +
- (hit3->querylength - Substring_querystart(substring) - hit3->trim_right - hit3->amb_nmatches_end); /* trim2_1&trim2_2 */
+ (hit3->querylength - Substring_querystart(substring) - hit3->trim_right - hit3->end_amb_nmatches); /* trim2_1&trim2_2 */
} else if (hit5->pairarray[i].genomepos == Substring_alignend_trim(substring) - chroffset) {
debug13(printf("case 4: genomepos %u, at GMAP %d. substring trim %d, %d..%d** trim %d\n",
hit5->pairarray[i].genomepos,hit5->pairarray[i].querypos,
hit3->trim_left,Substring_querystart(substring),Substring_queryend(substring),hit3->trim_right));
return (hit5->pairarray[i].querypos - hit5->pairarray[0].querypos + 1) +
- hit3->querylength - Substring_queryend(substring) - hit3->trim_right - hit3->amb_nmatches_end;
+ hit3->querylength - Substring_queryend(substring) - hit3->trim_right - hit3->end_amb_nmatches;
} else {
i++;
}
@@ -1745,13 +1756,13 @@ substring5_gmap3_overlap (Stage3end_T hit5, Stage3end_T hit3, Substring_T substr
hit3->pairarray[j].genomepos,hit3->pairarray[j].querypos,
hit5->trim_left,Substring_querystart(substring),Substring_queryend(substring),hit5->trim_right));
return (hit3->pairarray[hit3->npairs-1].querypos - hit3->pairarray[j].querypos + 1) +
- Substring_querystart(substring) - hit5->trim_left - hit5->amb_nmatches_start; /* okay */
+ Substring_querystart(substring) - hit5->trim_left - hit5->start_amb_nmatches; /* okay */
} else if (hit3->pairarray[j].genomepos == Substring_alignend_trim(substring) - chroffset) {
debug13(printf("case 6: genomepos %u, at GMAP %d. substring trim %d, %d..%d** trim %d\n",
hit3->pairarray[j].genomepos,hit3->pairarray[j].querypos,
hit5->trim_left,Substring_querystart(substring),Substring_queryend(substring),hit5->trim_right));
return (hit3->pairarray[hit3->npairs-1].querypos - hit3->pairarray[j].querypos + 1) +
- Substring_queryend(substring) - hit5->trim_left - hit5->amb_nmatches_start; /* okay: trim2_2&trim2_1 */
+ Substring_queryend(substring) - hit5->trim_left - hit5->start_amb_nmatches; /* okay: trim2_2&trim2_1 */
} else {
j++;
}
@@ -1764,11 +1775,11 @@ substring5_gmap3_overlap (Stage3end_T hit5, Stage3end_T hit3, Substring_T substr
if (hit3->pairarray[j].genomepos == Substring_alignstart_trim(substring) - chroffset) {
debug13(printf("case 7\n"));
return (hit3->pairarray[hit3->npairs-1].querypos - hit3->pairarray[j].querypos + 1) +
- Substring_querystart(substring) - hit5->trim_left - hit5->amb_nmatches_start;
+ Substring_querystart(substring) - hit5->trim_left - hit5->start_amb_nmatches;
} else if (hit3->pairarray[j].genomepos == Substring_alignend_trim(substring) - chroffset) {
debug13(printf("case 8\n"));
return (hit3->pairarray[hit3->npairs-1].querypos - hit3->pairarray[j].querypos + 1) +
- Substring_queryend(substring) - hit5->trim_left - hit5->amb_nmatches_start; /* okay: trim2_2&trim2_1 */
+ Substring_queryend(substring) - hit5->trim_left - hit5->start_amb_nmatches; /* okay: trim2_2&trim2_1 */
} else {
j++;
}
@@ -1842,10 +1853,10 @@ pair_insert_length_trimmed (Stage3end_T hit5, Stage3end_T hit3, int hit5_trimmed
} else if (hit5->plusp == true && hit3->plusp == true) {
/* plus/plus */
- start5 = hit5->genomicstart + hit5->trim_left + hit5->amb_nmatches_start;
- end5 = hit5->genomicend - hit5->trim_right - hit5->amb_nmatches_end;
- start3 = hit3->genomicstart + hit3->trim_left + hit3->amb_nmatches_start;
- end3 = hit3->genomicend - hit3->trim_right - hit3->amb_nmatches_end;
+ start5 = hit5->genomicstart + hit5->trim_left + hit5->start_amb_nmatches;
+ end5 = hit5->genomicend - hit5->trim_right - hit5->end_amb_nmatches;
+ start3 = hit3->genomicstart + hit3->trim_left + hit3->start_amb_nmatches;
+ end3 = hit3->genomicend - hit3->trim_right - hit3->end_amb_nmatches;
if (end3 < start5) {
/* Case 1 */
@@ -1859,25 +1870,25 @@ pair_insert_length_trimmed (Stage3end_T hit5, Stage3end_T hit3, int hit5_trimmed
debug13(printf("plus case 2a: start5 %u\n",start5 - hit5->chroffset));
if (Substring_overlap_point_trimmed_p(hit3->substring0,start5)) {
return (Substring_alignend_trim(hit3->substring0) - start5)
- + hit3->querylength - Substring_queryend(hit3->substring0) - hit3->trim_right - hit3->amb_nmatches_end;
+ + hit3->querylength - Substring_queryend(hit3->substring0) - hit3->trim_right - hit3->end_amb_nmatches;
} else if (Substring_overlap_point_trimmed_p(hit3->substring1,start5)) {
return (Substring_alignend_trim(hit3->substring1) - start5)
- + hit3->querylength - Substring_queryend(hit3->substring1) - hit3->trim_right - hit3->amb_nmatches_end;
+ + hit3->querylength - Substring_queryend(hit3->substring1) - hit3->trim_right - hit3->end_amb_nmatches;
} else if (Substring_overlap_point_trimmed_p(hit3->substring2,start5)) {
return (Substring_alignend_trim(hit3->substring2) - start5)
- /* + hit3->querylength - Substring_queryend(hit3->substring2) - hit3->trim_right - hit3->amb_nmatches_end */;
+ /* + hit3->querylength - Substring_queryend(hit3->substring2) - hit3->trim_right - hit3->end_amb_nmatches */;
}
/* Case 2: Tails overlap. Go from start5 to end3 */
debug13(printf("plus case 2b: end3 %u\n",end3 - hit3->chroffset));
if (Substring_overlap_point_trimmed_p(hit5->substring2,end3)) {
- return Substring_querystart(hit5->substring2) - hit5->trim_left - hit5->amb_nmatches_start +
+ return Substring_querystart(hit5->substring2) - hit5->trim_left - hit5->start_amb_nmatches +
(end3 - Substring_alignstart_trim(hit5->substring2));
} else if (Substring_overlap_point_trimmed_p(hit5->substring1,end3)) {
- return Substring_querystart(hit5->substring1) - hit5->trim_left - hit5->amb_nmatches_start +
+ return Substring_querystart(hit5->substring1) - hit5->trim_left - hit5->start_amb_nmatches +
(end3 - Substring_alignstart_trim(hit5->substring1));
} else if (Substring_overlap_point_trimmed_p(hit5->substring0,end3)) {
- return /* Substring_querystart(hit5->substring0) - hit5->trim_left - hit5->amb_nmatches_start + */
+ return /* Substring_querystart(hit5->substring0) - hit5->trim_left - hit5->start_amb_nmatches + */
(end3 - Substring_alignstart_trim(hit5->substring0));
}
/* Fall through to general algorithm */
@@ -1887,13 +1898,13 @@ pair_insert_length_trimmed (Stage3end_T hit5, Stage3end_T hit3, int hit5_trimmed
debug13(printf("plus case 3\n"));
if (Substring_overlap_point_trimmed_p(hit3->substring2,end5)) {
return hit5_trimmed_length + (Substring_alignend_trim(hit3->substring2) - end5)
- /* + hit3->querylength - Substring_queryend(hit3->substring2) - hit3->trim_right - hit3->amb_nmatches_end */;
+ /* + hit3->querylength - Substring_queryend(hit3->substring2) - hit3->trim_right - hit3->end_amb_nmatches */;
} else if (Substring_overlap_point_trimmed_p(hit3->substring1,end5)) {
return hit5_trimmed_length + (Substring_alignend_trim(hit3->substring1) - end5)
- + hit3->querylength - Substring_queryend(hit3->substring1) - hit3->trim_right - hit3->amb_nmatches_end;
+ + hit3->querylength - Substring_queryend(hit3->substring1) - hit3->trim_right - hit3->end_amb_nmatches;
} else if (Substring_overlap_point_trimmed_p(hit3->substring0,end5)) {
return hit5_trimmed_length + (Substring_alignend_trim(hit3->substring0) - end5)
- + hit3->querylength - Substring_queryend(hit3->substring0) - hit3->trim_right - hit3->amb_nmatches_end;
+ + hit3->querylength - Substring_queryend(hit3->substring0) - hit3->trim_right - hit3->end_amb_nmatches;
}
/* Fall through to general algorithm */
}
@@ -1903,13 +1914,13 @@ pair_insert_length_trimmed (Stage3end_T hit5, Stage3end_T hit3, int hit5_trimmed
/* Case 4: hit5 subsumes hit3 */
debug13(printf("plus case 4\n"));
if (Substring_overlap_point_trimmed_p(hit5->substring0,start3)) {
- return /* (Substring_querystart(hit5->substring0) - hit5->trim_left - hit5->amb_nmatches_start) + */
+ return /* (Substring_querystart(hit5->substring0) - hit5->trim_left - hit5->start_amb_nmatches) + */
(start3 - Substring_alignstart_trim(hit5->substring0)) + hit3_trimmed_length;
} else if (Substring_overlap_point_trimmed_p(hit5->substring1,start3)) {
- return (Substring_querystart(hit5->substring1) - hit5->trim_left - hit5->amb_nmatches_start) +
+ return (Substring_querystart(hit5->substring1) - hit5->trim_left - hit5->start_amb_nmatches) +
(start3 - Substring_alignstart_trim(hit5->substring1)) + hit3_trimmed_length;
} else if (Substring_overlap_point_trimmed_p(hit5->substring2,start3)) {
- return (Substring_querystart(hit5->substring2) - hit5->trim_left - hit5->amb_nmatches_start) +
+ return (Substring_querystart(hit5->substring2) - hit5->trim_left - hit5->start_amb_nmatches) +
(start3 - Substring_alignstart_trim(hit5->substring2)) + hit3_trimmed_length;
}
/* Fall through to general algorithm */
@@ -1918,26 +1929,26 @@ pair_insert_length_trimmed (Stage3end_T hit5, Stage3end_T hit3, int hit5_trimmed
/* Case 5: Based on hit3_trimmed_length */
debug13(printf("plus case 5a\n"));
if (Substring_overlap_point_trimmed_p(hit5->substring0,start3)) {
- return /* (Substring_querystart(hit5->substring0) - hit5->trim_left - hit5->amb_nmatches_start) + */
+ return /* (Substring_querystart(hit5->substring0) - hit5->trim_left - hit5->start_amb_nmatches) + */
(start3 - Substring_alignstart_trim(hit5->substring0)) + hit3_trimmed_length;
} else if (Substring_overlap_point_trimmed_p(hit5->substring1,start3)) {
- return (Substring_querystart(hit5->substring1) - hit5->trim_left - hit5->amb_nmatches_start) +
+ return (Substring_querystart(hit5->substring1) - hit5->trim_left - hit5->start_amb_nmatches) +
(start3 - Substring_alignstart_trim(hit5->substring1)) + hit3_trimmed_length;
} else if (Substring_overlap_point_trimmed_p(hit5->substring2,start3)) {
- return (Substring_querystart(hit5->substring2) - hit5->trim_left - hit5->amb_nmatches_start) +
+ return (Substring_querystart(hit5->substring2) - hit5->trim_left - hit5->start_amb_nmatches) +
(start3 - Substring_alignstart_trim(hit5->substring2)) + hit3_trimmed_length;
}
/* Case 5: Based on hit5_trimmed_length */
debug13(printf("plus case 5b\n"));
if (Substring_overlap_point_trimmed_p(hit3->substring2,end5)) {
- return /* hit3->querylength - Substring_queryend(hit3->substring2) - hit3->trim_right - hit3->amb_nmatches_end + */
+ return /* hit3->querylength - Substring_queryend(hit3->substring2) - hit3->trim_right - hit3->end_amb_nmatches + */
(Substring_alignend_trim(hit3->substring2) - end5) + hit5_trimmed_length;
} else if (Substring_overlap_point_trimmed_p(hit3->substring1,end5)) {
- return hit3->querylength - Substring_queryend(hit3->substring1) - hit3->trim_right - hit3->amb_nmatches_end +
+ return hit3->querylength - Substring_queryend(hit3->substring1) - hit3->trim_right - hit3->end_amb_nmatches +
(Substring_alignend_trim(hit3->substring1) - end5) + hit5_trimmed_length;
} else if (Substring_overlap_point_trimmed_p(hit3->substring0,end5)) {
- return hit3->querylength - Substring_queryend(hit3->substring0) - hit3->trim_right - hit3->amb_nmatches_end +
+ return hit3->querylength - Substring_queryend(hit3->substring0) - hit3->trim_right - hit3->end_amb_nmatches +
(Substring_alignend_trim(hit3->substring0) - end5) + hit5_trimmed_length;
}
/* Fall through to general algorithm */
@@ -1947,50 +1958,50 @@ pair_insert_length_trimmed (Stage3end_T hit5, Stage3end_T hit3, int hit5_trimmed
/* General algorithm */
debug13(printf("plus general: hit3->substring1\n"));
if (Substring_overlap_segment_trimmed_p(hit5->substring1,hit3->substring1)) {
- return (Substring_querystart(hit5->substring1) - hit5->trim_left - hit5->amb_nmatches_start) +
+ return (Substring_querystart(hit5->substring1) - hit5->trim_left - hit5->start_amb_nmatches) +
(Substring_alignend_trim(hit3->substring1) - Substring_alignstart_trim(hit5->substring1))
- + hit3->querylength - Substring_queryend(hit3->substring1) - hit3->trim_right - hit3->amb_nmatches_end;
+ + hit3->querylength - Substring_queryend(hit3->substring1) - hit3->trim_right - hit3->end_amb_nmatches;
} else if (hit5->substring2 != NULL && Substring_overlap_segment_trimmed_p(hit5->substring2,hit3->substring1)) {
- return (Substring_querystart(hit5->substring2) - hit5->trim_left - hit5->amb_nmatches_start) +
+ return (Substring_querystart(hit5->substring2) - hit5->trim_left - hit5->start_amb_nmatches) +
(Substring_alignend_trim(hit3->substring1) - Substring_alignstart_trim(hit5->substring2))
- + hit3->querylength - Substring_queryend(hit3->substring1) - hit3->trim_right - hit3->amb_nmatches_end;
+ + hit3->querylength - Substring_queryend(hit3->substring1) - hit3->trim_right - hit3->end_amb_nmatches;
} else if (hit5->substring0 != NULL && Substring_overlap_segment_trimmed_p(hit5->substring0,hit3->substring1)) {
- return /* (Substring_querystart(hit5->substring0) - hit5->trim_left - hit5->amb_nmatches_start) + */
+ return /* (Substring_querystart(hit5->substring0) - hit5->trim_left - hit5->start_amb_nmatches) + */
(Substring_alignend_trim(hit3->substring1) - Substring_alignstart_trim(hit5->substring0))
- + hit3->querylength - Substring_queryend(hit3->substring1) - hit3->trim_right - hit3->amb_nmatches_end;
+ + hit3->querylength - Substring_queryend(hit3->substring1) - hit3->trim_right - hit3->end_amb_nmatches;
}
if (hit3->substring2 != NULL) {
debug13(printf("plus general: hit3->substring2\n"));
if (Substring_overlap_segment_trimmed_p(hit5->substring1,hit3->substring2)) {
- return (Substring_querystart(hit5->substring1) - hit5->trim_left - hit5->amb_nmatches_start) +
+ return (Substring_querystart(hit5->substring1) - hit5->trim_left - hit5->start_amb_nmatches) +
(Substring_alignend_trim(hit3->substring2) - Substring_alignstart_trim(hit5->substring1))
- /* + hit3->querylength - Substring_queryend(hit3->substring2) - hit3->trim_right - hit3->amb_nmatches_end */;
+ /* + hit3->querylength - Substring_queryend(hit3->substring2) - hit3->trim_right - hit3->end_amb_nmatches */;
} else if (hit5->substring2 != NULL && Substring_overlap_segment_trimmed_p(hit5->substring2,hit3->substring2)) {
- return (Substring_querystart(hit5->substring2) - hit5->trim_left - hit5->amb_nmatches_start) +
+ return (Substring_querystart(hit5->substring2) - hit5->trim_left - hit5->start_amb_nmatches) +
(Substring_alignend_trim(hit3->substring2) - Substring_alignstart_trim(hit5->substring2))
- /* + hit3->querylength - Substring_queryend(hit3->substring2) - hit3->trim_right - hit3->amb_nmatches_end */;
+ /* + hit3->querylength - Substring_queryend(hit3->substring2) - hit3->trim_right - hit3->end_amb_nmatches */;
} else if (hit5->substring0 != NULL && Substring_overlap_segment_trimmed_p(hit5->substring0,hit3->substring2)) {
- return /* (Substring_querystart(hit5->substring0) - hit5->trim_left - hit5->amb_nmatches_start) + */
+ return /* (Substring_querystart(hit5->substring0) - hit5->trim_left - hit5->start_amb_nmatches) + */
(Substring_alignend_trim(hit3->substring2) - Substring_alignstart_trim(hit5->substring0))
- /* + hit3->querylength - Substring_queryend(hit3->substring2) - hit3->trim_right - hit3->amb_nmatches_end */;
+ /* + hit3->querylength - Substring_queryend(hit3->substring2) - hit3->trim_right - hit3->end_amb_nmatches */;
}
}
if (hit3->substring0 != NULL) {
debug13(printf("plus general: hit3->substring0\n"));
if (Substring_overlap_segment_trimmed_p(hit5->substring1,hit3->substring0)) {
- return (Substring_querystart(hit5->substring1) - hit5->trim_left - hit5->amb_nmatches_start) +
+ return (Substring_querystart(hit5->substring1) - hit5->trim_left - hit5->start_amb_nmatches) +
(Substring_alignend_trim(hit3->substring0) - Substring_alignstart_trim(hit5->substring1))
- + hit3->querylength - Substring_queryend(hit3->substring0) - hit3->trim_right - hit3->amb_nmatches_end;
+ + hit3->querylength - Substring_queryend(hit3->substring0) - hit3->trim_right - hit3->end_amb_nmatches;
} else if (hit5->substring2 != NULL && Substring_overlap_segment_trimmed_p(hit5->substring2,hit3->substring0)) {
- return (Substring_querystart(hit5->substring2) - hit5->trim_left - hit5->amb_nmatches_start) +
+ return (Substring_querystart(hit5->substring2) - hit5->trim_left - hit5->start_amb_nmatches) +
(Substring_alignend_trim(hit3->substring0) - Substring_alignstart_trim(hit5->substring2))
- + hit3->querylength - Substring_queryend(hit3->substring0) - hit3->trim_right - hit3->amb_nmatches_end;
+ + hit3->querylength - Substring_queryend(hit3->substring0) - hit3->trim_right - hit3->end_amb_nmatches;
} else if (hit5->substring0 != NULL && Substring_overlap_segment_trimmed_p(hit5->substring0,hit3->substring0)) {
- return /* (Substring_querystart(hit5->substring0) - hit5->trim_left - hit5->amb_nmatches_start) + */
+ return /* (Substring_querystart(hit5->substring0) - hit5->trim_left - hit5->start_amb_nmatches) + */
(Substring_alignend_trim(hit3->substring0) - Substring_alignstart_trim(hit5->substring0))
- + hit3->querylength - Substring_queryend(hit3->substring0) - hit3->trim_right - hit3->amb_nmatches_end;
+ + hit3->querylength - Substring_queryend(hit3->substring0) - hit3->trim_right - hit3->end_amb_nmatches;
}
}
@@ -1998,10 +2009,10 @@ pair_insert_length_trimmed (Stage3end_T hit5, Stage3end_T hit3, int hit5_trimmed
} else if (hit5->plusp == true && hit3->plusp == false) {
/* plus/minus */
- start5 = hit5->genomicstart + hit5->trim_left + hit5->amb_nmatches_start;
- end5 = hit5->genomicend - hit5->trim_right - hit5->amb_nmatches_end;
- start3 = hit3->genomicstart - hit3->trim_left - hit3->amb_nmatches_start;
- end3 = hit3->genomicend + hit3->trim_right + hit3->amb_nmatches_end;
+ start5 = hit5->genomicstart + hit5->trim_left + hit5->start_amb_nmatches;
+ end5 = hit5->genomicend - hit5->trim_right - hit5->end_amb_nmatches;
+ start3 = hit3->genomicstart - hit3->trim_left - hit3->start_amb_nmatches;
+ end3 = hit3->genomicend + hit3->trim_right + hit3->end_amb_nmatches;
if (start3 < start5) {
/* Case 1 */
@@ -2015,25 +2026,25 @@ pair_insert_length_trimmed (Stage3end_T hit5, Stage3end_T hit3, int hit5_trimmed
debug13(printf("plus case 2a: start5 %u\n",start5 - hit5->chroffset));
if (Substring_overlap_point_trimmed_p(hit3->substring0,start5)) {
return (Substring_alignstart_trim(hit3->substring0) - start5)
- + hit3->querylength - Substring_queryend(hit3->substring0) + hit3->trim_right + hit3->amb_nmatches_end;
+ + hit3->querylength - Substring_queryend(hit3->substring0) + hit3->trim_right + hit3->end_amb_nmatches;
} else if (Substring_overlap_point_trimmed_p(hit3->substring1,start5)) {
return (Substring_alignstart_trim(hit3->substring1) - start5)
- + hit3->querylength - Substring_queryend(hit3->substring1) + hit3->trim_right + hit3->amb_nmatches_end;
+ + hit3->querylength - Substring_queryend(hit3->substring1) + hit3->trim_right + hit3->end_amb_nmatches;
} else if (Substring_overlap_point_trimmed_p(hit3->substring2,start5)) {
return (Substring_alignstart_trim(hit3->substring2) - start5)
- /* + hit3->querylength - Substring_queryend(hit3->substring2) + hit3->trim_right + hit3->amb_nmatches_end */;
+ /* + hit3->querylength - Substring_queryend(hit3->substring2) + hit3->trim_right + hit3->end_amb_nmatches */;
}
/* Case 2: Tails overlap. Go from start5 to start3 */
debug13(printf("plus case 2b: start3 %u\n",start3 - hit3->chroffset));
if (Substring_overlap_point_trimmed_p(hit5->substring2,start3)) {
- return Substring_querystart(hit5->substring2) - hit5->trim_left - hit5->amb_nmatches_start +
+ return Substring_querystart(hit5->substring2) - hit5->trim_left - hit5->start_amb_nmatches +
(start3 - Substring_alignstart_trim(hit5->substring2));
} else if (Substring_overlap_point_trimmed_p(hit5->substring1,start3)) {
- return Substring_querystart(hit5->substring1) - hit5->trim_left - hit5->amb_nmatches_start +
+ return Substring_querystart(hit5->substring1) - hit5->trim_left - hit5->start_amb_nmatches +
(start3 - Substring_alignstart_trim(hit5->substring1));
} else if (Substring_overlap_point_trimmed_p(hit5->substring0,start3)) {
- return /* Substring_querystart(hit5->substring0) - hit5->trim_left - hit5->amb_nmatches_start + */
+ return /* Substring_querystart(hit5->substring0) - hit5->trim_left - hit5->start_amb_nmatches + */
(start3 - Substring_alignstart_trim(hit5->substring0));
}
/* Fall through to general algorithm */
@@ -2043,13 +2054,13 @@ pair_insert_length_trimmed (Stage3end_T hit5, Stage3end_T hit3, int hit5_trimmed
debug13(printf("plus case 3\n"));
if (Substring_overlap_point_trimmed_p(hit3->substring2,end5)) {
return hit5_trimmed_length + (Substring_alignstart_trim(hit3->substring2) - end5)
- /* + hit3->querylength - Substring_queryend(hit3->substring2) + hit3->trim_right + hit3->amb_nmatches_end */;
+ /* + hit3->querylength - Substring_queryend(hit3->substring2) + hit3->trim_right + hit3->end_amb_nmatches */;
} else if (Substring_overlap_point_trimmed_p(hit3->substring1,end5)) {
return hit5_trimmed_length + (Substring_alignstart_trim(hit3->substring1) - end5)
- + hit3->querylength - Substring_queryend(hit3->substring1) + hit3->trim_right + hit3->amb_nmatches_end;
+ + hit3->querylength - Substring_queryend(hit3->substring1) + hit3->trim_right + hit3->end_amb_nmatches;
} else if (Substring_overlap_point_trimmed_p(hit3->substring0,end5)) {
return hit5_trimmed_length + (Substring_alignstart_trim(hit3->substring0) - end5)
- + hit3->querylength - Substring_queryend(hit3->substring0) + hit3->trim_right + hit3->amb_nmatches_end;
+ + hit3->querylength - Substring_queryend(hit3->substring0) + hit3->trim_right + hit3->end_amb_nmatches;
}
/* Fall through to general algorithm */
}
@@ -2059,13 +2070,13 @@ pair_insert_length_trimmed (Stage3end_T hit5, Stage3end_T hit3, int hit5_trimmed
/* Case 4: hit5 subsumes hit3 */
debug13(printf("plus case 4\n"));
if (Substring_overlap_point_trimmed_p(hit5->substring0,end3)) {
- return /* (Substring_querystart(hit5->substring0) - hit5->trim_left - hit5->amb_nmatches_start) + */
+ return /* (Substring_querystart(hit5->substring0) - hit5->trim_left - hit5->start_amb_nmatches) + */
(end3 - Substring_alignstart_trim(hit5->substring0)) + hit3_trimmed_length;
} else if (Substring_overlap_point_trimmed_p(hit5->substring1,end3)) {
- return (Substring_querystart(hit5->substring1) - hit5->trim_left - hit5->amb_nmatches_start) +
+ return (Substring_querystart(hit5->substring1) - hit5->trim_left - hit5->start_amb_nmatches) +
(end3 - Substring_alignstart_trim(hit5->substring1)) + hit3_trimmed_length;
} else if (Substring_overlap_point_trimmed_p(hit5->substring2,end3)) {
- return (Substring_querystart(hit5->substring2) - hit5->trim_left - hit5->amb_nmatches_start) +
+ return (Substring_querystart(hit5->substring2) - hit5->trim_left - hit5->start_amb_nmatches) +
(end3 - Substring_alignstart_trim(hit5->substring2)) + hit3_trimmed_length;
}
/* Fall through to general algorithm */
@@ -2074,26 +2085,26 @@ pair_insert_length_trimmed (Stage3end_T hit5, Stage3end_T hit3, int hit5_trimmed
/* Case 5: Based on hit3_trimmed_length */
debug13(printf("plus case 5a\n"));
if (Substring_overlap_point_trimmed_p(hit5->substring0,end3)) {
- return /* (Substring_querystart(hit5->substring0) - hit5->trim_left - hit5->amb_nmatches_start) + */
+ return /* (Substring_querystart(hit5->substring0) - hit5->trim_left - hit5->start_amb_nmatches) + */
(end3 - Substring_alignstart_trim(hit5->substring0)) + hit3_trimmed_length;
} else if (Substring_overlap_point_trimmed_p(hit5->substring1,end3)) {
- return (Substring_querystart(hit5->substring1) - hit5->trim_left - hit5->amb_nmatches_start) +
+ return (Substring_querystart(hit5->substring1) - hit5->trim_left - hit5->start_amb_nmatches) +
(end3 - Substring_alignstart_trim(hit5->substring1)) + hit3_trimmed_length;
} else if (Substring_overlap_point_trimmed_p(hit5->substring2,end3)) {
- return (Substring_querystart(hit5->substring2) - hit5->trim_left - hit5->amb_nmatches_start) +
+ return (Substring_querystart(hit5->substring2) - hit5->trim_left - hit5->start_amb_nmatches) +
(end3 - Substring_alignstart_trim(hit5->substring2)) + hit3_trimmed_length;
}
/* Case 5: Based on hit5_trimmed_length */
debug13(printf("plus case 5b\n"));
if (Substring_overlap_point_trimmed_p(hit3->substring2,end5)) {
- return /* hit3->querylength - Substring_queryend(hit3->substring2) + hit3->trim_right + hit3->amb_nmatches_end + */
+ return /* hit3->querylength - Substring_queryend(hit3->substring2) + hit3->trim_right + hit3->end_amb_nmatches + */
(Substring_alignstart_trim(hit3->substring2) - end5) + hit5_trimmed_length;
} else if (Substring_overlap_point_trimmed_p(hit3->substring1,end5)) {
- return hit3->querylength - Substring_queryend(hit3->substring1) + hit3->trim_right + hit3->amb_nmatches_end +
+ return hit3->querylength - Substring_queryend(hit3->substring1) + hit3->trim_right + hit3->end_amb_nmatches +
(Substring_alignstart_trim(hit3->substring1) - end5) + hit5_trimmed_length;
} else if (Substring_overlap_point_trimmed_p(hit3->substring0,end5)) {
- return hit3->querylength - Substring_queryend(hit3->substring0) + hit3->trim_right + hit3->amb_nmatches_end +
+ return hit3->querylength - Substring_queryend(hit3->substring0) + hit3->trim_right + hit3->end_amb_nmatches +
(Substring_alignstart_trim(hit3->substring0) - end5) + hit5_trimmed_length;
}
/* Fall through to general algorithm */
@@ -2103,50 +2114,50 @@ pair_insert_length_trimmed (Stage3end_T hit5, Stage3end_T hit3, int hit5_trimmed
/* General algorithm */
debug13(printf("plus general: hit3->substring1\n"));
if (Substring_overlap_segment_trimmed_p(hit5->substring1,hit3->substring1)) {
- return (Substring_querystart(hit5->substring1) - hit5->trim_left - hit5->amb_nmatches_start) +
+ return (Substring_querystart(hit5->substring1) - hit5->trim_left - hit5->start_amb_nmatches) +
(Substring_alignstart_trim(hit3->substring1) - Substring_alignstart_trim(hit5->substring1))
- + hit3->querylength - Substring_queryend(hit3->substring1) + hit3->trim_right + hit3->amb_nmatches_end;
+ + hit3->querylength - Substring_queryend(hit3->substring1) + hit3->trim_right + hit3->end_amb_nmatches;
} else if (hit5->substring2 != NULL && Substring_overlap_segment_trimmed_p(hit5->substring2,hit3->substring1)) {
- return (Substring_querystart(hit5->substring2) - hit5->trim_left - hit5->amb_nmatches_start) +
+ return (Substring_querystart(hit5->substring2) - hit5->trim_left - hit5->start_amb_nmatches) +
(Substring_alignstart_trim(hit3->substring1) - Substring_alignstart_trim(hit5->substring2))
- + hit3->querylength - Substring_queryend(hit3->substring1) + hit3->trim_right + hit3->amb_nmatches_end;
+ + hit3->querylength - Substring_queryend(hit3->substring1) + hit3->trim_right + hit3->end_amb_nmatches;
} else if (hit5->substring0 != NULL && Substring_overlap_segment_trimmed_p(hit5->substring0,hit3->substring1)) {
- return /* (Substring_querystart(hit5->substring0) - hit5->trim_left - hit5->amb_nmatches_start) + */
+ return /* (Substring_querystart(hit5->substring0) - hit5->trim_left - hit5->start_amb_nmatches) + */
(Substring_alignstart_trim(hit3->substring1) - Substring_alignstart_trim(hit5->substring0))
- + hit3->querylength - Substring_queryend(hit3->substring1) + hit3->trim_right + hit3->amb_nmatches_end;
+ + hit3->querylength - Substring_queryend(hit3->substring1) + hit3->trim_right + hit3->end_amb_nmatches;
}
if (hit3->substring2 != NULL) {
debug13(printf("plus general: hit3->substring2\n"));
if (Substring_overlap_segment_trimmed_p(hit5->substring1,hit3->substring2)) {
- return (Substring_querystart(hit5->substring1) - hit5->trim_left - hit5->amb_nmatches_start) +
+ return (Substring_querystart(hit5->substring1) - hit5->trim_left - hit5->start_amb_nmatches) +
(Substring_alignstart_trim(hit3->substring2) - Substring_alignstart_trim(hit5->substring1))
- /* + hit3->querylength - Substring_queryend(hit3->substring2) + hit3->trim_right + hit3->amb_nmatches_end */;
+ /* + hit3->querylength - Substring_queryend(hit3->substring2) + hit3->trim_right + hit3->end_amb_nmatches */;
} else if (hit5->substring2 != NULL && Substring_overlap_segment_trimmed_p(hit5->substring2,hit3->substring2)) {
- return (Substring_querystart(hit5->substring2) - hit5->trim_left - hit5->amb_nmatches_start) +
+ return (Substring_querystart(hit5->substring2) - hit5->trim_left - hit5->start_amb_nmatches) +
(Substring_alignstart_trim(hit3->substring2) - Substring_alignstart_trim(hit5->substring2))
- /* + hit3->querylength - Substring_queryend(hit3->substring2) + hit3->trim_right + hit3->amb_nmatches_end */;
+ /* + hit3->querylength - Substring_queryend(hit3->substring2) + hit3->trim_right + hit3->end_amb_nmatches */;
} else if (hit5->substring0 != NULL && Substring_overlap_segment_trimmed_p(hit5->substring0,hit3->substring2)) {
- return /* (Substring_querystart(hit5->substring0) - hit5->trim_left - hit5->amb_nmatches_start) + */
+ return /* (Substring_querystart(hit5->substring0) - hit5->trim_left - hit5->start_amb_nmatches) + */
(Substring_alignstart_trim(hit3->substring2) - Substring_alignstart_trim(hit5->substring0))
- /* + hit3->querylength - Substring_queryend(hit3->substring2) + hit3->trim_right + hit3->amb_nmatches_end */;
+ /* + hit3->querylength - Substring_queryend(hit3->substring2) + hit3->trim_right + hit3->end_amb_nmatches */;
}
}
if (hit3->substring0 != NULL) {
debug13(printf("plus general: hit3->substring0\n"));
if (Substring_overlap_segment_trimmed_p(hit5->substring1,hit3->substring0)) {
- return (Substring_querystart(hit5->substring1) - hit5->trim_left - hit5->amb_nmatches_start) +
+ return (Substring_querystart(hit5->substring1) - hit5->trim_left - hit5->start_amb_nmatches) +
(Substring_alignstart_trim(hit3->substring0) - Substring_alignstart_trim(hit5->substring1))
- + hit3->querylength - Substring_queryend(hit3->substring0) + hit3->trim_right + hit3->amb_nmatches_end;
+ + hit3->querylength - Substring_queryend(hit3->substring0) + hit3->trim_right + hit3->end_amb_nmatches;
} else if (hit5->substring2 != NULL && Substring_overlap_segment_trimmed_p(hit5->substring2,hit3->substring0)) {
- return (Substring_querystart(hit5->substring2) - hit5->trim_left - hit5->amb_nmatches_start) +
+ return (Substring_querystart(hit5->substring2) - hit5->trim_left - hit5->start_amb_nmatches) +
(Substring_alignstart_trim(hit3->substring0) - Substring_alignstart_trim(hit5->substring2))
- + hit3->querylength - Substring_queryend(hit3->substring0) + hit3->trim_right + hit3->amb_nmatches_end;
+ + hit3->querylength - Substring_queryend(hit3->substring0) + hit3->trim_right + hit3->end_amb_nmatches;
} else if (hit5->substring0 != NULL && Substring_overlap_segment_trimmed_p(hit5->substring0,hit3->substring0)) {
- return /* (Substring_querystart(hit5->substring0) - hit5->trim_left - hit5->amb_nmatches_start) + */
+ return /* (Substring_querystart(hit5->substring0) - hit5->trim_left - hit5->start_amb_nmatches) + */
(Substring_alignstart_trim(hit3->substring0) - Substring_alignstart_trim(hit5->substring0))
- + hit3->querylength - Substring_queryend(hit3->substring0) + hit3->trim_right + hit3->amb_nmatches_end;
+ + hit3->querylength - Substring_queryend(hit3->substring0) + hit3->trim_right + hit3->end_amb_nmatches;
}
}
@@ -2154,10 +2165,10 @@ pair_insert_length_trimmed (Stage3end_T hit5, Stage3end_T hit3, int hit5_trimmed
} else if (hit5->plusp == false && hit3->plusp == true) {
/* minus/plus */
- start5 = hit5->genomicstart - hit5->trim_left - hit5->amb_nmatches_start;
- end5 = hit5->genomicend + hit5->trim_right + hit5->amb_nmatches_end;
- start3 = hit3->genomicstart + hit3->trim_left + hit3->amb_nmatches_start;
- end3 = hit3->genomicend - hit3->trim_right - hit3->amb_nmatches_end;
+ start5 = hit5->genomicstart - hit5->trim_left - hit5->start_amb_nmatches;
+ end5 = hit5->genomicend + hit5->trim_right + hit5->end_amb_nmatches;
+ start3 = hit3->genomicstart + hit3->trim_left + hit3->start_amb_nmatches;
+ end3 = hit3->genomicend - hit3->trim_right - hit3->end_amb_nmatches;
if (end3 < end5) {
/* Case 1 */
@@ -2171,25 +2182,25 @@ pair_insert_length_trimmed (Stage3end_T hit5, Stage3end_T hit3, int hit5_trimmed
debug13(printf("plus case 2a: end5 %u\n",end5 - hit5->chroffset));
if (Substring_overlap_point_trimmed_p(hit3->substring0,end5)) {
return (Substring_alignend_trim(hit3->substring0) - end5)
- + hit3->querylength - Substring_queryend(hit3->substring0) - hit3->trim_right - hit3->amb_nmatches_end;
+ + hit3->querylength - Substring_queryend(hit3->substring0) - hit3->trim_right - hit3->end_amb_nmatches;
} else if (Substring_overlap_point_trimmed_p(hit3->substring1,end5)) {
return (Substring_alignend_trim(hit3->substring1) - end5)
- + hit3->querylength - Substring_queryend(hit3->substring1) - hit3->trim_right - hit3->amb_nmatches_end;
+ + hit3->querylength - Substring_queryend(hit3->substring1) - hit3->trim_right - hit3->end_amb_nmatches;
} else if (Substring_overlap_point_trimmed_p(hit3->substring2,end5)) {
return (Substring_alignend_trim(hit3->substring2) - end5)
- /* + hit3->querylength - Substring_queryend(hit3->substring2) - hit3->trim_right - hit3->amb_nmatches_end */;
+ /* + hit3->querylength - Substring_queryend(hit3->substring2) - hit3->trim_right - hit3->end_amb_nmatches */;
}
/* Case 2: Tails overlap. Go from end5 to end3 */
debug13(printf("plus case 2b: end3 %u\n",end3 - hit3->chroffset));
if (Substring_overlap_point_trimmed_p(hit5->substring2,end3)) {
- return Substring_queryend(hit5->substring2) + hit5->trim_left + hit5->amb_nmatches_start +
+ return Substring_queryend(hit5->substring2) + hit5->trim_left + hit5->start_amb_nmatches +
(end3 - Substring_alignend_trim(hit5->substring2));
} else if (Substring_overlap_point_trimmed_p(hit5->substring1,end3)) {
- return Substring_queryend(hit5->substring1) + hit5->trim_left + hit5->amb_nmatches_start +
+ return Substring_queryend(hit5->substring1) + hit5->trim_left + hit5->start_amb_nmatches +
(end3 - Substring_alignend_trim(hit5->substring1));
} else if (Substring_overlap_point_trimmed_p(hit5->substring0,end3)) {
- return /* Substring_querystart(hit5->substring0) + hit5->trim_left + hit5->amb_nmatches_start + */
+ return /* Substring_querystart(hit5->substring0) + hit5->trim_left + hit5->start_amb_nmatches + */
(end3 - Substring_alignend_trim(hit5->substring0));
}
/* Fall through to general algorithm */
@@ -2199,13 +2210,13 @@ pair_insert_length_trimmed (Stage3end_T hit5, Stage3end_T hit3, int hit5_trimmed
debug13(printf("plus case 3\n"));
if (Substring_overlap_point_trimmed_p(hit3->substring2,start5)) {
return hit5_trimmed_length + (Substring_alignend_trim(hit3->substring2) - start5)
- /* + hit3->querylength - Substring_queryend(hit3->substring2) - hit3->trim_right - hit3->amb_nmatches_end */;
+ /* + hit3->querylength - Substring_queryend(hit3->substring2) - hit3->trim_right - hit3->end_amb_nmatches */;
} else if (Substring_overlap_point_trimmed_p(hit3->substring1,start5)) {
return hit5_trimmed_length + (Substring_alignend_trim(hit3->substring1) - start5)
- + hit3->querylength - Substring_queryend(hit3->substring1) - hit3->trim_right - hit3->amb_nmatches_end;
+ + hit3->querylength - Substring_queryend(hit3->substring1) - hit3->trim_right - hit3->end_amb_nmatches;
} else if (Substring_overlap_point_trimmed_p(hit3->substring0,start5)) {
return hit5_trimmed_length + (Substring_alignend_trim(hit3->substring0) - start5)
- + hit3->querylength - Substring_queryend(hit3->substring0) - hit3->trim_right - hit3->amb_nmatches_end;
+ + hit3->querylength - Substring_queryend(hit3->substring0) - hit3->trim_right - hit3->end_amb_nmatches;
}
/* Fall through to general algorithm */
}
@@ -2215,13 +2226,13 @@ pair_insert_length_trimmed (Stage3end_T hit5, Stage3end_T hit3, int hit5_trimmed
/* Case 4: hit5 subsumes hit3 */
debug13(printf("plus case 4\n"));
if (Substring_overlap_point_trimmed_p(hit5->substring0,start3)) {
- return /* (Substring_querystart(hit5->substring0) + hit5->trim_left + hit5->amb_nmatches_start) + */
+ return /* (Substring_querystart(hit5->substring0) + hit5->trim_left + hit5->start_amb_nmatches) + */
(start3 - Substring_alignend_trim(hit5->substring0)) + hit3_trimmed_length;
} else if (Substring_overlap_point_trimmed_p(hit5->substring1,start3)) {
- return (Substring_querystart(hit5->substring1) + hit5->trim_left + hit5->amb_nmatches_start) +
+ return (Substring_querystart(hit5->substring1) + hit5->trim_left + hit5->start_amb_nmatches) +
(start3 - Substring_alignend_trim(hit5->substring1)) + hit3_trimmed_length;
} else if (Substring_overlap_point_trimmed_p(hit5->substring2,start3)) {
- return (Substring_querystart(hit5->substring2) + hit5->trim_left + hit5->amb_nmatches_start) +
+ return (Substring_querystart(hit5->substring2) + hit5->trim_left + hit5->start_amb_nmatches) +
(start3 - Substring_alignend_trim(hit5->substring2)) + hit3_trimmed_length;
}
/* Fall through to general algorithm */
@@ -2230,26 +2241,26 @@ pair_insert_length_trimmed (Stage3end_T hit5, Stage3end_T hit3, int hit5_trimmed
/* Case 5: Based on hit3_trimmed_length */
debug13(printf("plus case 5a\n"));
if (Substring_overlap_point_trimmed_p(hit5->substring0,start3)) {
- return /* (Substring_querystart(hit5->substring0) + hit5->trim_left + hit5->amb_nmatches_start) + */
+ return /* (Substring_querystart(hit5->substring0) + hit5->trim_left + hit5->start_amb_nmatches) + */
(start3 - Substring_alignend_trim(hit5->substring0)) + hit3_trimmed_length;
} else if (Substring_overlap_point_trimmed_p(hit5->substring1,start3)) {
- return (Substring_querystart(hit5->substring1) + hit5->trim_left + hit5->amb_nmatches_start) +
+ return (Substring_querystart(hit5->substring1) + hit5->trim_left + hit5->start_amb_nmatches) +
(start3 - Substring_alignend_trim(hit5->substring1)) + hit3_trimmed_length;
} else if (Substring_overlap_point_trimmed_p(hit5->substring2,start3)) {
- return (Substring_querystart(hit5->substring2) + hit5->trim_left + hit5->amb_nmatches_start) +
+ return (Substring_querystart(hit5->substring2) + hit5->trim_left + hit5->start_amb_nmatches) +
(start3 - Substring_alignend_trim(hit5->substring2)) + hit3_trimmed_length;
}
/* Case 5: Based on hit5_trimmed_length */
debug13(printf("plus case 5b\n"));
if (Substring_overlap_point_trimmed_p(hit3->substring2,start5)) {
- return /* hit3->querylength - Substring_queryend(hit3->substring2) - hit3->trim_right - hit3->amb_nmatches_end + */
+ return /* hit3->querylength - Substring_queryend(hit3->substring2) - hit3->trim_right - hit3->end_amb_nmatches + */
(Substring_alignend_trim(hit3->substring2) - start5) + hit5_trimmed_length;
} else if (Substring_overlap_point_trimmed_p(hit3->substring1,start5)) {
- return hit3->querylength - Substring_queryend(hit3->substring1) - hit3->trim_right - hit3->amb_nmatches_end +
+ return hit3->querylength - Substring_queryend(hit3->substring1) - hit3->trim_right - hit3->end_amb_nmatches +
(Substring_alignend_trim(hit3->substring1) - start5) + hit5_trimmed_length;
} else if (Substring_overlap_point_trimmed_p(hit3->substring0,start5)) {
- return hit3->querylength - Substring_queryend(hit3->substring0) - hit3->trim_right - hit3->amb_nmatches_end +
+ return hit3->querylength - Substring_queryend(hit3->substring0) - hit3->trim_right - hit3->end_amb_nmatches +
(Substring_alignend_trim(hit3->substring0) - start5) + hit5_trimmed_length;
}
/* Fall through to general algorithm */
@@ -2259,50 +2270,50 @@ pair_insert_length_trimmed (Stage3end_T hit5, Stage3end_T hit3, int hit5_trimmed
/* General algorithm */
debug13(printf("plus general: hit3->substring1\n"));
if (Substring_overlap_segment_trimmed_p(hit5->substring1,hit3->substring1)) {
- return (Substring_querystart(hit5->substring1) + hit5->trim_left + hit5->amb_nmatches_start) +
+ return (Substring_querystart(hit5->substring1) + hit5->trim_left + hit5->start_amb_nmatches) +
(Substring_alignend_trim(hit3->substring1) - Substring_alignend_trim(hit5->substring1))
- + hit3->querylength - Substring_queryend(hit3->substring1) - hit3->trim_right - hit3->amb_nmatches_end;
+ + hit3->querylength - Substring_queryend(hit3->substring1) - hit3->trim_right - hit3->end_amb_nmatches;
} else if (hit5->substring2 != NULL && Substring_overlap_segment_trimmed_p(hit5->substring2,hit3->substring1)) {
- return (Substring_queryend(hit5->substring2) + hit5->trim_left + hit5->amb_nmatches_start) +
+ return (Substring_queryend(hit5->substring2) + hit5->trim_left + hit5->start_amb_nmatches) +
(Substring_alignend_trim(hit3->substring1) - Substring_alignend_trim(hit5->substring2))
- + hit3->querylength - Substring_queryend(hit3->substring1) - hit3->trim_right - hit3->amb_nmatches_end;
+ + hit3->querylength - Substring_queryend(hit3->substring1) - hit3->trim_right - hit3->end_amb_nmatches;
} else if (hit5->substring0 != NULL && Substring_overlap_segment_trimmed_p(hit5->substring0,hit3->substring1)) {
- return /* (Substring_querystart(hit5->substring0) + hit5->trim_left + hit5->amb_nmatches_start) + */
+ return /* (Substring_querystart(hit5->substring0) + hit5->trim_left + hit5->start_amb_nmatches) + */
(Substring_alignend_trim(hit3->substring1) - Substring_alignend_trim(hit5->substring0))
- + hit3->querylength - Substring_queryend(hit3->substring1) - hit3->trim_right - hit3->amb_nmatches_end;
+ + hit3->querylength - Substring_queryend(hit3->substring1) - hit3->trim_right - hit3->end_amb_nmatches;
}
if (hit3->substring2 != NULL) {
debug13(printf("plus general: hit3->substring2\n"));
if (Substring_overlap_segment_trimmed_p(hit5->substring1,hit3->substring2)) {
- return (Substring_querystart(hit5->substring1) + hit5->trim_left + hit5->amb_nmatches_start) +
+ return (Substring_querystart(hit5->substring1) + hit5->trim_left + hit5->start_amb_nmatches) +
(Substring_alignend_trim(hit3->substring2) - Substring_alignend_trim(hit5->substring1))
- /* + hit3->querylength - Substring_queryend(hit3->substring2) - hit3->trim_right - hit3->amb_nmatches_end */;
+ /* + hit3->querylength - Substring_queryend(hit3->substring2) - hit3->trim_right - hit3->end_amb_nmatches */;
} else if (hit5->substring2 != NULL && Substring_overlap_segment_trimmed_p(hit5->substring2,hit3->substring2)) {
- return (Substring_querystart(hit5->substring2) + hit5->trim_left + hit5->amb_nmatches_start) +
+ return (Substring_querystart(hit5->substring2) + hit5->trim_left + hit5->start_amb_nmatches) +
(Substring_alignend_trim(hit3->substring2) - Substring_alignend_trim(hit5->substring2))
- /* + hit3->querylength - Substring_queryend(hit3->substring2) - hit3->trim_right - hit3->amb_nmatches_end */;
+ /* + hit3->querylength - Substring_queryend(hit3->substring2) - hit3->trim_right - hit3->end_amb_nmatches */;
} else if (hit5->substring0 != NULL && Substring_overlap_segment_trimmed_p(hit5->substring0,hit3->substring2)) {
- return /* (Substring_querystart(hit5->substring0) + hit5->trim_left + hit5->amb_nmatches_start) + */
+ return /* (Substring_querystart(hit5->substring0) + hit5->trim_left + hit5->start_amb_nmatches) + */
(Substring_alignend_trim(hit3->substring2) - Substring_alignend_trim(hit5->substring0))
- /* + hit3->querylength - Substring_queryend(hit3->substring2) - hit3->trim_right - hit3->amb_nmatches_end */;
+ /* + hit3->querylength - Substring_queryend(hit3->substring2) - hit3->trim_right - hit3->end_amb_nmatches */;
}
}
if (hit3->substring0 != NULL) {
debug13(printf("plus general: hit3->substring0\n"));
if (Substring_overlap_segment_trimmed_p(hit5->substring1,hit3->substring0)) {
- return (Substring_querystart(hit5->substring1) + hit5->trim_left + hit5->amb_nmatches_start) +
+ return (Substring_querystart(hit5->substring1) + hit5->trim_left + hit5->start_amb_nmatches) +
(Substring_alignend_trim(hit3->substring0) - Substring_alignend_trim(hit5->substring1))
- + hit3->querylength - Substring_queryend(hit3->substring0) - hit3->trim_right - hit3->amb_nmatches_end;
+ + hit3->querylength - Substring_queryend(hit3->substring0) - hit3->trim_right - hit3->end_amb_nmatches;
} else if (hit5->substring2 != NULL && Substring_overlap_segment_trimmed_p(hit5->substring2,hit3->substring0)) {
- return (Substring_querystart(hit5->substring2) + hit5->trim_left + hit5->amb_nmatches_start) +
+ return (Substring_querystart(hit5->substring2) + hit5->trim_left + hit5->start_amb_nmatches) +
(Substring_alignend_trim(hit3->substring0) - Substring_alignend_trim(hit5->substring2))
- + hit3->querylength - Substring_queryend(hit3->substring0) - hit3->trim_right - hit3->amb_nmatches_end;
+ + hit3->querylength - Substring_queryend(hit3->substring0) - hit3->trim_right - hit3->end_amb_nmatches;
} else if (hit5->substring0 != NULL && Substring_overlap_segment_trimmed_p(hit5->substring0,hit3->substring0)) {
- return /* (Substring_querystart(hit5->substring0) + hit5->trim_left + hit5->amb_nmatches_start) + */
+ return /* (Substring_querystart(hit5->substring0) + hit5->trim_left + hit5->start_amb_nmatches) + */
(Substring_alignend_trim(hit3->substring0) - Substring_alignend_trim(hit5->substring0))
- + hit3->querylength - Substring_queryend(hit3->substring0) - hit3->trim_right - hit3->amb_nmatches_end;
+ + hit3->querylength - Substring_queryend(hit3->substring0) - hit3->trim_right - hit3->end_amb_nmatches;
}
}
@@ -2310,10 +2321,10 @@ pair_insert_length_trimmed (Stage3end_T hit5, Stage3end_T hit3, int hit5_trimmed
} else if (hit5->plusp == false && hit3->plusp == false) {
/* minus/minus */
- start5 = hit5->genomicstart - hit5->trim_left - hit5->amb_nmatches_start;
- end5 = hit5->genomicend + hit5->trim_right + hit5->amb_nmatches_end;
- start3 = hit3->genomicstart - hit3->trim_left - hit3->amb_nmatches_start;
- end3 = hit3->genomicend + hit3->trim_right + hit3->amb_nmatches_end;
+ start5 = hit5->genomicstart - hit5->trim_left - hit5->start_amb_nmatches;
+ end5 = hit5->genomicend + hit5->trim_right + hit5->end_amb_nmatches;
+ start3 = hit3->genomicstart - hit3->trim_left - hit3->start_amb_nmatches;
+ end3 = hit3->genomicend + hit3->trim_right + hit3->end_amb_nmatches;
if (end3 > start5) {
/* Case 1 */
@@ -2327,25 +2338,25 @@ pair_insert_length_trimmed (Stage3end_T hit5, Stage3end_T hit3, int hit5_trimmed
debug13(printf("plus case 2a: start5 %u\n",start5 - hit5->chroffset));
if (Substring_overlap_point_trimmed_p(hit3->substring0,start5)) {
return (start5 - Substring_alignend_trim(hit3->substring0))
- + hit3->querylength - Substring_queryend(hit3->substring0) - hit3->trim_right - hit3->amb_nmatches_end;
+ + hit3->querylength - Substring_queryend(hit3->substring0) - hit3->trim_right - hit3->end_amb_nmatches;
} else if (Substring_overlap_point_trimmed_p(hit3->substring1,start5)) {
return (start5 - Substring_alignend_trim(hit3->substring1))
- + hit3->querylength - Substring_queryend(hit3->substring1) - hit3->trim_right - hit3->amb_nmatches_end;
+ + hit3->querylength - Substring_queryend(hit3->substring1) - hit3->trim_right - hit3->end_amb_nmatches;
} else if (Substring_overlap_point_trimmed_p(hit3->substring2,start5)) {
return (start5 - Substring_alignend_trim(hit3->substring2))
- /* + hit3->querylength - Substring_queryend(hit3->substring2) - hit3->trim_right - hit3->amb_nmatches_end */;
+ /* + hit3->querylength - Substring_queryend(hit3->substring2) - hit3->trim_right - hit3->end_amb_nmatches */;
}
/* Case 2: Tails overlap. Go from start5 to end3 */
debug13(printf("plus case 2b: end3 %u\n",end3 - hit3->chroffset));
if (Substring_overlap_point_trimmed_p(hit5->substring2,end3)) {
- return Substring_querystart(hit5->substring2) - hit5->trim_left - hit5->amb_nmatches_start +
+ return Substring_querystart(hit5->substring2) - hit5->trim_left - hit5->start_amb_nmatches +
(Substring_alignstart_trim(hit5->substring2) - end3);
} else if (Substring_overlap_point_trimmed_p(hit5->substring1,end3)) {
- return Substring_querystart(hit5->substring1) - hit5->trim_left - hit5->amb_nmatches_start +
+ return Substring_querystart(hit5->substring1) - hit5->trim_left - hit5->start_amb_nmatches +
(Substring_alignstart_trim(hit5->substring1) - end3);
} else if (Substring_overlap_point_trimmed_p(hit5->substring0,end3)) {
- return /* Substring_querystart(hit5->substring0) - hit5->trim_left - hit5->amb_nmatches_start + */
+ return /* Substring_querystart(hit5->substring0) - hit5->trim_left - hit5->start_amb_nmatches + */
(Substring_alignstart_trim(hit5->substring0) - end3);
}
/* Fall through to general algorithm */
@@ -2355,13 +2366,13 @@ pair_insert_length_trimmed (Stage3end_T hit5, Stage3end_T hit3, int hit5_trimmed
debug13(printf("minus case 3: end5 %u\n",end5 - hit5->chroffset));
if (Substring_overlap_point_trimmed_p(hit3->substring2,end5)) {
return hit5_trimmed_length + (end5 - Substring_alignend_trim(hit3->substring2))
- /* + hit3->querylength - Substring_queryend(hit3->substring2) - hit3->trim_right - hit3->amb_nmatches_end */;
+ /* + hit3->querylength - Substring_queryend(hit3->substring2) - hit3->trim_right - hit3->end_amb_nmatches */;
} else if (Substring_overlap_point_trimmed_p(hit3->substring1,end5)) {
return hit5_trimmed_length + (end5 - Substring_alignend_trim(hit3->substring1))
- + hit3->querylength - Substring_queryend(hit3->substring1) - hit3->trim_right - hit3->amb_nmatches_end;
+ + hit3->querylength - Substring_queryend(hit3->substring1) - hit3->trim_right - hit3->end_amb_nmatches;
} else if (Substring_overlap_point_trimmed_p(hit3->substring0,end5)) {
return hit5_trimmed_length + (end5 - Substring_alignend_trim(hit3->substring0))
- + hit3->querylength - Substring_queryend(hit3->substring0) - hit3->trim_right - hit3->amb_nmatches_end;
+ + hit3->querylength - Substring_queryend(hit3->substring0) - hit3->trim_right - hit3->end_amb_nmatches;
}
/* Fall through to general algorithm */
}
@@ -2371,13 +2382,13 @@ pair_insert_length_trimmed (Stage3end_T hit5, Stage3end_T hit3, int hit5_trimmed
/* Case 4: hit5 subsumes hit3 */
debug13(printf("minus case 4\n"));
if (Substring_overlap_point_trimmed_p(hit5->substring0,start3)) {
- return /* (Substring_querystart(hit5->substring0) - hit5->trim_left - hit5->amb_nmatches_start) + */
+ return /* (Substring_querystart(hit5->substring0) - hit5->trim_left - hit5->start_amb_nmatches) + */
(Substring_alignstart_trim(hit5->substring0) - start3) + hit3_trimmed_length;
} else if (Substring_overlap_point_trimmed_p(hit5->substring1,start3)) {
- return (Substring_querystart(hit5->substring1) - hit5->trim_left - hit5->amb_nmatches_start) +
+ return (Substring_querystart(hit5->substring1) - hit5->trim_left - hit5->start_amb_nmatches) +
(Substring_alignstart_trim(hit5->substring1) - start3) + hit3_trimmed_length;
} else if (Substring_overlap_point_trimmed_p(hit5->substring2,start3)) {
- return (Substring_querystart(hit5->substring2) - hit5->trim_left - hit5->amb_nmatches_start) +
+ return (Substring_querystart(hit5->substring2) - hit5->trim_left - hit5->start_amb_nmatches) +
(Substring_alignstart_trim(hit5->substring2) - start3) + hit3_trimmed_length;
}
/* Fall through to general algorithm */
@@ -2386,26 +2397,26 @@ pair_insert_length_trimmed (Stage3end_T hit5, Stage3end_T hit3, int hit5_trimmed
/* Case 5: Based on hit3_trimmed_length */
debug13(printf("minus case 5a: start3 %u\n",start3 - hit3->chroffset));
if (Substring_overlap_point_trimmed_p(hit5->substring0,start3)) {
- return /* (Substring_querystart(hit5->substring0) - hit5->trim_left - hit5->amb_nmatches_start) + */
+ return /* (Substring_querystart(hit5->substring0) - hit5->trim_left - hit5->start_amb_nmatches) + */
(Substring_alignstart_trim(hit5->substring0) - start3) + hit3_trimmed_length;
} else if (Substring_overlap_point_trimmed_p(hit5->substring1,start3)) {
- return (Substring_querystart(hit5->substring1) - hit5->trim_left - hit5->amb_nmatches_start) +
+ return (Substring_querystart(hit5->substring1) - hit5->trim_left - hit5->start_amb_nmatches) +
(Substring_alignstart_trim(hit5->substring1) - start3) + hit3_trimmed_length;
} else if (Substring_overlap_point_trimmed_p(hit5->substring2,start3)) {
- return (Substring_querystart(hit5->substring2) - hit5->trim_left - hit5->amb_nmatches_start) +
+ return (Substring_querystart(hit5->substring2) - hit5->trim_left - hit5->start_amb_nmatches) +
(Substring_alignstart_trim(hit5->substring2) - start3) + hit3_trimmed_length;
}
/* Case 5: Based on hit5_trimmed_length */
debug13(printf("minus case 5b: end5 %u\n",end5 - hit5->chroffset));
if (Substring_overlap_point_trimmed_p(hit3->substring2,end5)) {
- return /* hit3->querylength - Substring_queryend(hit3->substring2) - hit3->trim_right - hit3->amb_nmatches_end + */
+ return /* hit3->querylength - Substring_queryend(hit3->substring2) - hit3->trim_right - hit3->end_amb_nmatches + */
(end5 - Substring_alignend_trim(hit3->substring2)) + hit5_trimmed_length;
} else if (Substring_overlap_point_trimmed_p(hit3->substring1,end5)) {
- return hit3->querylength - Substring_queryend(hit3->substring1) - hit3->trim_right - hit3->amb_nmatches_end +
+ return hit3->querylength - Substring_queryend(hit3->substring1) - hit3->trim_right - hit3->end_amb_nmatches +
(end5 - Substring_alignend_trim(hit3->substring1)) + hit5_trimmed_length;
} else if (Substring_overlap_point_trimmed_p(hit3->substring0,end5)) {
- return hit3->querylength - Substring_queryend(hit3->substring0) - hit3->trim_right - hit3->amb_nmatches_end +
+ return hit3->querylength - Substring_queryend(hit3->substring0) - hit3->trim_right - hit3->end_amb_nmatches +
(end5 - Substring_alignend_trim(hit3->substring0)) + hit5_trimmed_length;
}
/* Fall through to general algorithm */
@@ -2415,50 +2426,50 @@ pair_insert_length_trimmed (Stage3end_T hit5, Stage3end_T hit3, int hit5_trimmed
/* General algorithm */
debug13(printf("minus general: hit3->substring1\n"));
if (Substring_overlap_segment_trimmed_p(hit5->substring1,hit3->substring1)) {
- return (Substring_querystart(hit5->substring1) - hit5->trim_left - hit5->amb_nmatches_start) +
+ return (Substring_querystart(hit5->substring1) - hit5->trim_left - hit5->start_amb_nmatches) +
(Substring_alignstart_trim(hit5->substring1) - Substring_alignend_trim(hit3->substring1))
- + hit3->querylength - Substring_queryend(hit3->substring1) - hit3->trim_right - hit3->amb_nmatches_end;
+ + hit3->querylength - Substring_queryend(hit3->substring1) - hit3->trim_right - hit3->end_amb_nmatches;
} else if (hit5->substring2 != NULL && Substring_overlap_segment_trimmed_p(hit5->substring2,hit3->substring1)) {
- return (Substring_querystart(hit5->substring2) - hit5->trim_left - hit5->amb_nmatches_start) +
+ return (Substring_querystart(hit5->substring2) - hit5->trim_left - hit5->start_amb_nmatches) +
(Substring_alignstart_trim(hit5->substring2) - Substring_alignend_trim(hit3->substring1))
- + hit3->querylength - Substring_queryend(hit3->substring1) - hit3->trim_right - hit3->amb_nmatches_end;
+ + hit3->querylength - Substring_queryend(hit3->substring1) - hit3->trim_right - hit3->end_amb_nmatches;
} else if (hit5->substring0 != NULL && Substring_overlap_segment_trimmed_p(hit5->substring0,hit3->substring1)) {
- return /* (Substring_querystart(hit5->substring0) - hit5->trim_left - hit5->amb_nmatches_start) + */
+ return /* (Substring_querystart(hit5->substring0) - hit5->trim_left - hit5->start_amb_nmatches) + */
(Substring_alignstart_trim(hit5->substring0) - Substring_alignend_trim(hit3->substring1))
- + hit3->querylength - Substring_queryend(hit3->substring1) - hit3->trim_right - hit3->amb_nmatches_end;
+ + hit3->querylength - Substring_queryend(hit3->substring1) - hit3->trim_right - hit3->end_amb_nmatches;
}
if (hit3->substring2 != NULL) {
debug13(printf("minus general: hit3->substring2\n"));
if (Substring_overlap_segment_trimmed_p(hit5->substring1,hit3->substring2)) {
- return (Substring_querystart(hit5->substring1) - hit5->trim_left - hit5->amb_nmatches_start) +
+ return (Substring_querystart(hit5->substring1) - hit5->trim_left - hit5->start_amb_nmatches) +
(Substring_alignstart_trim(hit5->substring1) - Substring_alignend_trim(hit3->substring2))
- /* + hit3->querylength - Substring_queryend(hit3->substring2) - hit3->trim_right - hit3->amb_nmatches_end */;
+ /* + hit3->querylength - Substring_queryend(hit3->substring2) - hit3->trim_right - hit3->end_amb_nmatches */;
} else if (hit5->substring2 != NULL && Substring_overlap_segment_trimmed_p(hit5->substring2,hit3->substring2)) {
- return (Substring_querystart(hit5->substring2) - hit5->trim_left - hit5->amb_nmatches_start) +
+ return (Substring_querystart(hit5->substring2) - hit5->trim_left - hit5->start_amb_nmatches) +
(Substring_alignstart_trim(hit5->substring2) - Substring_alignend_trim(hit3->substring2))
- /* + hit3->querylength - Substring_queryend(hit3->substring2) - hit3->trim_right - hit3->amb_nmatches_end */;
+ /* + hit3->querylength - Substring_queryend(hit3->substring2) - hit3->trim_right - hit3->end_amb_nmatches */;
} else if (hit5->substring0 != NULL && Substring_overlap_segment_trimmed_p(hit5->substring0,hit3->substring2)) {
- return /* (Substring_querystart(hit5->substring0) - hit5->trim_left - hit5->amb_nmatches_start) + */
+ return /* (Substring_querystart(hit5->substring0) - hit5->trim_left - hit5->start_amb_nmatches) + */
(Substring_alignstart_trim(hit5->substring0) - Substring_alignend_trim(hit3->substring2))
- /* + hit3->querylength - Substring_queryend(hit3->substring2) - hit3->trim_right - hit3->amb_nmatches_end */;
+ /* + hit3->querylength - Substring_queryend(hit3->substring2) - hit3->trim_right - hit3->end_amb_nmatches */;
}
}
if (hit3->substring0 != NULL) {
debug13(printf("minus general: hit3->substring0\n"));
if (Substring_overlap_segment_trimmed_p(hit5->substring1,hit3->substring0)) {
- return (Substring_querystart(hit5->substring1) - hit5->trim_left - hit5->amb_nmatches_start) +
+ return (Substring_querystart(hit5->substring1) - hit5->trim_left - hit5->start_amb_nmatches) +
(Substring_alignstart_trim(hit5->substring1) - Substring_alignend_trim(hit3->substring0))
- + hit3->querylength - Substring_queryend(hit3->substring0) - hit3->trim_right - hit3->amb_nmatches_end;
+ + hit3->querylength - Substring_queryend(hit3->substring0) - hit3->trim_right - hit3->end_amb_nmatches;
} else if (hit5->substring2 != NULL && Substring_overlap_segment_trimmed_p(hit5->substring2,hit3->substring0)) {
- return (Substring_querystart(hit5->substring2) - hit5->trim_left - hit5->amb_nmatches_start) +
+ return (Substring_querystart(hit5->substring2) - hit5->trim_left - hit5->start_amb_nmatches) +
(Substring_alignstart_trim(hit5->substring2) - Substring_alignend_trim(hit3->substring0))
- + hit3->querylength - Substring_queryend(hit3->substring0) - hit3->trim_right - hit3->amb_nmatches_end;
+ + hit3->querylength - Substring_queryend(hit3->substring0) - hit3->trim_right - hit3->end_amb_nmatches;
} else if (hit5->substring0 != NULL && Substring_overlap_segment_trimmed_p(hit5->substring0,hit3->substring0)) {
- return /* (Substring_querystart(hit5->substring0) - hit5->trim_left - hit5->amb_nmatches_start) + */
+ return /* (Substring_querystart(hit5->substring0) - hit5->trim_left - hit5->start_amb_nmatches) + */
(Substring_alignstart_trim(hit5->substring0) - Substring_alignend_trim(hit3->substring0))
- + hit3->querylength - Substring_queryend(hit3->substring0) - hit3->trim_right - hit3->amb_nmatches_end;
+ + hit3->querylength - Substring_queryend(hit3->substring0) - hit3->trim_right - hit3->end_amb_nmatches;
}
}
@@ -2491,24 +2502,24 @@ Stage3pair_overlap (int *hardclip5, int *hardclip3, Stage3pair_T this) {
return 0;
} else {
debug13(printf("hit5 trim_left %d + amb_start %d, trim_right %d + amb_end %d, hit3 trim_left %d + amb_start %d, trim_right %d + amb_end %d\n",
- hit5->trim_left,hit5->amb_nmatches_start,hit5->trim_right,hit5->amb_nmatches_end,
- hit3->trim_left,hit3->amb_nmatches_start,hit3->trim_right,hit3->amb_nmatches_end));
+ hit5->trim_left,hit5->start_amb_nmatches,hit5->trim_right,hit5->end_amb_nmatches,
+ hit3->trim_left,hit3->start_amb_nmatches,hit3->trim_right,hit3->end_amb_nmatches));
if (hit5->plusp == true) {
- hit5_trimmed_length = hit5->querylength - hit5->trim_left - hit5->trim_right - hit5->amb_nmatches_start - hit5->amb_nmatches_end;
- hit3_trimmed_length = hit3->querylength - hit3->trim_left - hit3->trim_right - hit3->amb_nmatches_start - hit3->amb_nmatches_end;
+ hit5_trimmed_length = hit5->querylength - hit5->trim_left - hit5->trim_right - hit5->start_amb_nmatches - hit5->end_amb_nmatches;
+ hit3_trimmed_length = hit3->querylength - hit3->trim_left - hit3->trim_right - hit3->start_amb_nmatches - hit3->end_amb_nmatches;
totallength = hit5_trimmed_length + hit3_trimmed_length;
debug13(printf("totallength = %d, hit5 trimmed length = %d, hit3 trimmed length = %d\n",
totallength,hit5_trimmed_length,hit3_trimmed_length));
#if 0
- hit5_inside = (hit5->genomicend - 1) - hit5->trim_right - hit5->amb_nmatches_end;
- hit3_inside = hit3->genomicstart + hit3->trim_left + hit3->amb_nmatches_start; /* because hit3 is inverted */
+ hit5_inside = (hit5->genomicend - 1) - hit5->trim_right - hit5->end_amb_nmatches;
+ hit3_inside = hit3->genomicstart + hit3->trim_left + hit3->start_amb_nmatches; /* because hit3 is inverted */
insertlength = (hit3_inside + hit3_trimmed_length - 1) - (hit5_inside - hit5_trimmed_length + 1) + 1;
#endif
debug13(printf("original insertlength: %d, trim+amb5: %d..%d, trim+amb3: %d..%d\n",
- this->insertlength,hit5->trim_left + hit5->amb_nmatches_start,
- hit5->trim_right + hit5->amb_nmatches_end,hit3->trim_left + hit3->amb_nmatches_start,
- hit3->trim_right + hit3->amb_nmatches_end));
+ this->insertlength,hit5->trim_left + hit5->start_amb_nmatches,
+ hit5->trim_right + hit5->end_amb_nmatches,hit3->trim_left + hit3->start_amb_nmatches,
+ hit3->trim_right + hit3->end_amb_nmatches));
if (hit5->hittype == GMAP || hit3->hittype == GMAP) {
insertlength = this->insertlength = pair_insert_length_trimmed(hit5,hit3,hit5_trimmed_length,hit3_trimmed_length);
} else {
@@ -2533,24 +2544,24 @@ Stage3pair_overlap (int *hardclip5, int *hardclip3, Stage3pair_T this) {
/* Clip tails equally */
*hardclip5 = insertlength/2;
*hardclip3 = insertlength - (*hardclip5);
- *hardclip3 += hit3->trim_right + hit3->amb_nmatches_end;
- *hardclip5 += hit5->trim_left + hit5->amb_nmatches_start;
+ *hardclip3 += hit3->trim_right + hit3->end_amb_nmatches;
+ *hardclip5 += hit5->trim_left + hit5->start_amb_nmatches;
debug13(printf("Clip tails with hardclip5 %d and hardclip3 %d\n",*hardclip5,*hardclip3));
clipdir = -1;
} else if (overlap > minlength) {
/* Clip heads, but limit clipping on hit5 */
*hardclip5 = hit5_trimmed_length/2;
*hardclip3 = overlap - (*hardclip5);
- *hardclip3 += hit3->trim_left + hit3->amb_nmatches_start;
- *hardclip5 += hit5->trim_right + hit5->amb_nmatches_end;
+ *hardclip3 += hit3->trim_left + hit3->start_amb_nmatches;
+ *hardclip5 += hit5->trim_right + hit5->end_amb_nmatches;
debug13(printf("Clip heads limited with hardclip5 %d and hardclip3 %d\n",*hardclip5,*hardclip3));
clipdir = +1;
} else {
/* Clip heads equally */
*hardclip5 = overlap/2;
*hardclip3 = overlap - (*hardclip5);
- *hardclip3 += hit3->trim_left + hit3->amb_nmatches_start;
- *hardclip5 += hit5->trim_right + hit5->amb_nmatches_end;
+ *hardclip3 += hit3->trim_left + hit3->start_amb_nmatches;
+ *hardclip5 += hit5->trim_right + hit5->end_amb_nmatches;
debug13(printf("Clip heads with hardclip5 %d and hardclip3 %d\n",*hardclip5,*hardclip3));
clipdir = +1;
}
@@ -2562,24 +2573,24 @@ Stage3pair_overlap (int *hardclip5, int *hardclip3, Stage3pair_T this) {
/* Clip tails equally */
*hardclip3 = insertlength/2;
*hardclip5 = insertlength - (*hardclip3);
- *hardclip3 += hit3->trim_right + hit3->amb_nmatches_end;
- *hardclip5 += hit5->trim_left + hit5->amb_nmatches_start;
+ *hardclip3 += hit3->trim_right + hit3->end_amb_nmatches;
+ *hardclip5 += hit5->trim_left + hit5->start_amb_nmatches;
debug13(printf("Clip tails with hardclip5 %d and hardclip3 %d\n",*hardclip5,*hardclip3));
clipdir = -1;
} else if (overlap > minlength) {
/* Clip heads, but limit clipping on hit3 */
*hardclip3 = hit3_trimmed_length/2;
*hardclip5 = overlap - (*hardclip3);
- *hardclip3 += hit3->trim_left + hit3->amb_nmatches_start;
- *hardclip5 += hit5->trim_right + hit5->amb_nmatches_end;
+ *hardclip3 += hit3->trim_left + hit3->start_amb_nmatches;
+ *hardclip5 += hit5->trim_right + hit5->end_amb_nmatches;
debug13(printf("Clip heads limited with hardclip5 %d and hardclip3 %d\n",*hardclip5,*hardclip3));
clipdir = +1;
} else {
/* Clip heads equally */
*hardclip3 = overlap/2;
*hardclip5 = overlap - (*hardclip3);
- *hardclip3 += hit3->trim_left + hit3->amb_nmatches_start;
- *hardclip5 += hit5->trim_right + hit5->amb_nmatches_end;
+ *hardclip3 += hit3->trim_left + hit3->start_amb_nmatches;
+ *hardclip5 += hit5->trim_right + hit5->end_amb_nmatches;
debug13(printf("Clip heads with hardclip5 %d and hardclip3 %d\n",*hardclip5,*hardclip3));
clipdir = +1;
}
@@ -2589,21 +2600,21 @@ Stage3pair_overlap (int *hardclip5, int *hardclip3, Stage3pair_T this) {
return clipdir;
} else {
- hit5_trimmed_length = hit5->querylength - hit5->trim_left - hit5->trim_right - hit5->amb_nmatches_start - hit5->amb_nmatches_end;
- hit3_trimmed_length = hit3->querylength - hit3->trim_left - hit3->trim_right - hit3->amb_nmatches_start - hit3->amb_nmatches_end;
+ hit5_trimmed_length = hit5->querylength - hit5->trim_left - hit5->trim_right - hit5->start_amb_nmatches - hit5->end_amb_nmatches;
+ hit3_trimmed_length = hit3->querylength - hit3->trim_left - hit3->trim_right - hit3->start_amb_nmatches - hit3->end_amb_nmatches;
totallength = hit5_trimmed_length + hit3_trimmed_length;
debug13(printf("totallength = %d, hit5 trimmed length = %d, hit3 trimmed length = %d\n",
totallength,hit5_trimmed_length,hit3_trimmed_length));
#if 0
- hit5_inside = hit5->genomicend + hit5->trim_right + hit5->amb_nmatches_end;
- hit3_inside = (hit3->genomicstart - 1) - hit3->trim_left - hit3->amb_nmatches_start; /* because hit3 is inverted */
+ hit5_inside = hit5->genomicend + hit5->trim_right + hit5->end_amb_nmatches;
+ hit3_inside = (hit3->genomicstart - 1) - hit3->trim_left - hit3->start_amb_nmatches; /* because hit3 is inverted */
insertlength = (hit5_inside + hit5_trimmed_length - 1) - (hit3_inside - hit3_trimmed_length + 1) + 1;
#endif
debug13(printf("original insertlength: %d, trim+amb5: %d..%d, trim+amb3: %d..%d\n",
- this->insertlength,hit5->trim_left + hit5->amb_nmatches_start,
- hit5->trim_right + hit5->amb_nmatches_end,hit3->trim_left + hit3->amb_nmatches_start,
- hit3->trim_right + hit3->amb_nmatches_end));
+ this->insertlength,hit5->trim_left + hit5->start_amb_nmatches,
+ hit5->trim_right + hit5->end_amb_nmatches,hit3->trim_left + hit3->start_amb_nmatches,
+ hit3->trim_right + hit3->end_amb_nmatches));
if (hit5->hittype == GMAP || hit3->hittype == GMAP) {
insertlength = this->insertlength = pair_insert_length_trimmed(hit5,hit3,hit5_trimmed_length,hit3_trimmed_length);
} else {
@@ -2628,24 +2639,24 @@ Stage3pair_overlap (int *hardclip5, int *hardclip3, Stage3pair_T this) {
/* Clip tails equally */
*hardclip5 = insertlength/2;
*hardclip3 = insertlength - (*hardclip5);
- *hardclip3 += hit3->trim_right + hit3->amb_nmatches_end;
- *hardclip5 += hit5->trim_left + hit5->amb_nmatches_start;
+ *hardclip3 += hit3->trim_right + hit3->end_amb_nmatches;
+ *hardclip5 += hit5->trim_left + hit5->start_amb_nmatches;
debug13(printf("Clip tails with hardclip5 %d and hardclip3 %d\n",*hardclip5,*hardclip3));
clipdir = -1;
} else if (overlap > minlength) {
/* Clip heads, but limit clipping on hit5 */
*hardclip5 = hit5_trimmed_length/2;
*hardclip3 = overlap - (*hardclip5);
- *hardclip3 += hit3->trim_left + hit3->amb_nmatches_start;
- *hardclip5 += hit5->trim_right + hit5->amb_nmatches_end;
+ *hardclip3 += hit3->trim_left + hit3->start_amb_nmatches;
+ *hardclip5 += hit5->trim_right + hit5->end_amb_nmatches;
debug13(printf("Clip heads limited with hardclip5 %d and hardclip3 %d\n",*hardclip5,*hardclip3));
clipdir = +1;
} else {
/* Clip heads equally */
*hardclip5 = overlap/2;
*hardclip3 = overlap - (*hardclip5);
- *hardclip3 += hit3->trim_left + hit3->amb_nmatches_start;
- *hardclip5 += hit5->trim_right + hit5->amb_nmatches_end;
+ *hardclip3 += hit3->trim_left + hit3->start_amb_nmatches;
+ *hardclip5 += hit5->trim_right + hit5->end_amb_nmatches;
debug13(printf("Clip heads with hardclip5 %d and hardclip3 %d\n",*hardclip5,*hardclip3));
clipdir = +1;
}
@@ -2657,24 +2668,24 @@ Stage3pair_overlap (int *hardclip5, int *hardclip3, Stage3pair_T this) {
/* Clip tails equally */
*hardclip3 = insertlength/2;
*hardclip5 = insertlength - (*hardclip3);
- *hardclip3 += hit3->trim_right + hit3->amb_nmatches_end;
- *hardclip5 += hit5->trim_left + hit5->amb_nmatches_start;
+ *hardclip3 += hit3->trim_right + hit3->end_amb_nmatches;
+ *hardclip5 += hit5->trim_left + hit5->start_amb_nmatches;
debug13(printf("Clip tails with hardclip5 %d and hardclip3 %d\n",*hardclip5,*hardclip3));
clipdir = -1;
} else if (overlap > minlength) {
/* Clip heads, but limit clipping on hit3 */
*hardclip3 = hit3_trimmed_length/2;
*hardclip5 = overlap - (*hardclip3);
- *hardclip3 += hit3->trim_left + hit3->amb_nmatches_start;
- *hardclip5 += hit5->trim_right + hit5->amb_nmatches_end;
+ *hardclip3 += hit3->trim_left + hit3->start_amb_nmatches;
+ *hardclip5 += hit5->trim_right + hit5->end_amb_nmatches;
debug13(printf("Clip heads limited with hardclip5 %d and hardclip3 %d\n",*hardclip5,*hardclip3));
clipdir = +1;
} else {
/* Clip heads equally */
*hardclip3 = overlap/2;
*hardclip5 = overlap - (*hardclip3);
- *hardclip3 += hit3->trim_left + hit3->amb_nmatches_start;
- *hardclip5 += hit5->trim_right + hit5->amb_nmatches_end;
+ *hardclip3 += hit3->trim_left + hit3->start_amb_nmatches;
+ *hardclip5 += hit5->trim_right + hit5->end_amb_nmatches;
debug13(printf("Clip heads with hardclip5 %d and hardclip3 %d\n",*hardclip5,*hardclip3));
clipdir = +1;
}
@@ -2848,7 +2859,8 @@ Stage3end_copy (T old) {
new->trim_right_splicep = old->trim_right_splicep;
new->penalties = old->penalties;
- new->score_eventrim = old->score_eventrim;
+ new->score_eventrim_start = old->score_eventrim_start;
+ new->score_eventrim_end = old->score_eventrim_end;
new->gene_overlap = old->gene_overlap;
new->tally = old->tally;
@@ -2868,8 +2880,8 @@ Stage3end_copy (T old) {
}
new->distance = old->distance;
- new->acceptor_distance = old->acceptor_distance;
- new->donor_distance = old->donor_distance;
+ new->shortexonA_distance = old->shortexonA_distance;
+ new->shortexonD_distance = old->shortexonD_distance;
new->gmap_nindelbreaks = old->gmap_nindelbreaks;
new->gmap_cdna_direction = old->gmap_cdna_direction;
@@ -2877,45 +2889,70 @@ Stage3end_copy (T old) {
new->sensedir = old->sensedir;
new->sensedir_nonamb = old->sensedir_nonamb;
+ new->gmap_start_endtype = old->gmap_start_endtype;
+ new->gmap_end_endtype = old->gmap_end_endtype;
+
+
new->start_ambiguous_p = old->start_ambiguous_p;
new->end_ambiguous_p = old->end_ambiguous_p;
- new->nchimera_known = old->nchimera_known;
- new->nchimera_novel = old->nchimera_novel;
- new->amb_nmatches_start = old->amb_nmatches_start;
- new->amb_nmatches_end = old->amb_nmatches_end;
+ new->start_amb_nmatches = old->start_amb_nmatches;
+ new->end_amb_nmatches = old->end_amb_nmatches;
new->amb_nmatches_donor = old->amb_nmatches_donor;
new->amb_nmatches_acceptor = old->amb_nmatches_acceptor;
+ if ((new->nambcoords_donor = old->nambcoords_donor) == 0) {
+ new->ambcoords_donor = (Univcoord_T *) NULL;
+ new->amb_knowni_donor = (int *) NULL;
+ new->amb_nmismatches_donor = (int *) NULL;
+ } else {
+ new->ambcoords_donor = (Univcoord_T *) CALLOC_OUT(old->nambcoords_donor,sizeof(Univcoord_T));
+ memcpy(new->ambcoords_donor,old->ambcoords_donor,old->nambcoords_donor*sizeof(Univcoord_T));
+ new->amb_knowni_donor = (int *) CALLOC_OUT(old->nambcoords_donor,sizeof(int));
+ memcpy(new->amb_knowni_donor,old->amb_knowni_donor,old->nambcoords_donor*sizeof(int));
+ new->amb_nmismatches_donor = (int *) CALLOC_OUT(old->nambcoords_donor,sizeof(int));
+ memcpy(new->amb_nmismatches_donor,old->amb_nmismatches_donor,old->nambcoords_donor*sizeof(int));
+ }
- new->gmap_start_endtype = old->gmap_start_endtype;
- new->gmap_end_endtype = old->gmap_end_endtype;
-
- if ((new->start_nambcoords = old->start_nambcoords) == 0) {
- new->start_ambcoords = (Univcoord_T *) NULL;
- new->start_amb_knowni = (int *) NULL;
- new->start_amb_nmismatches = (int *) NULL;
+ if ((new->nambcoords_acceptor = old->nambcoords_acceptor) == 0) {
+ new->ambcoords_acceptor = (Univcoord_T *) NULL;
+ new->amb_knowni_acceptor = (int *) NULL;
+ new->amb_nmismatches_acceptor = (int *) NULL;
} else {
- new->start_ambcoords = (Univcoord_T *) CALLOC_OUT(old->start_nambcoords,sizeof(Univcoord_T));
- memcpy(new->start_ambcoords,old->start_ambcoords,old->start_nambcoords*sizeof(Univcoord_T));
- new->start_amb_knowni = (int *) CALLOC_OUT(old->start_nambcoords,sizeof(int));
- memcpy(new->start_amb_knowni,old->start_amb_knowni,old->start_nambcoords*sizeof(int));
- new->start_amb_nmismatches = (int *) CALLOC_OUT(old->start_nambcoords,sizeof(int));
- memcpy(new->start_amb_nmismatches,old->start_amb_nmismatches,old->start_nambcoords*sizeof(int));
+ new->ambcoords_acceptor = (Univcoord_T *) CALLOC_OUT(old->nambcoords_acceptor,sizeof(Univcoord_T));
+ memcpy(new->ambcoords_acceptor,old->ambcoords_acceptor,old->nambcoords_acceptor*sizeof(Univcoord_T));
+ new->amb_knowni_acceptor = (int *) CALLOC_OUT(old->nambcoords_acceptor,sizeof(int));
+ memcpy(new->amb_knowni_acceptor,old->amb_knowni_acceptor,old->nambcoords_acceptor*sizeof(int));
+ new->amb_nmismatches_acceptor = (int *) CALLOC_OUT(old->nambcoords_acceptor,sizeof(int));
+ memcpy(new->amb_nmismatches_acceptor,old->amb_nmismatches_acceptor,old->nambcoords_acceptor*sizeof(int));
}
- if ((new->end_nambcoords = old->end_nambcoords) == 0) {
- new->end_ambcoords = (Univcoord_T *) NULL;
- new->end_amb_knowni = (int *) NULL;
- new->end_amb_nmismatches = (int *) NULL;
+ if (old->sensedir == SENSE_FORWARD) {
+ new->start_ambcoords = new->ambcoords_donor;
+ new->start_nambcoords = new->nambcoords_donor;
+ new->start_amb_knowni = new->amb_knowni_donor;
+ new->start_amb_nmismatches = new->amb_nmismatches_donor;
+
+ new->end_ambcoords = new->ambcoords_acceptor;
+ new->end_nambcoords = new->nambcoords_acceptor;
+ new->end_amb_knowni = new->amb_knowni_acceptor;
+ new->end_amb_nmismatches = new->amb_nmismatches_acceptor;
+
} else {
- new->end_ambcoords = (Univcoord_T *) CALLOC_OUT(old->end_nambcoords,sizeof(Univcoord_T));
- memcpy(new->end_ambcoords,old->end_ambcoords,old->end_nambcoords*sizeof(Univcoord_T));
- new->end_amb_knowni = (int *) CALLOC_OUT(old->end_nambcoords,sizeof(int));
- memcpy(new->end_amb_knowni,old->end_amb_knowni,old->end_nambcoords*sizeof(int));
- new->end_amb_nmismatches = (int *) CALLOC_OUT(old->end_nambcoords,sizeof(int));
- memcpy(new->end_amb_nmismatches,old->end_amb_nmismatches,old->end_nambcoords*sizeof(int));
+ new->start_ambcoords = new->ambcoords_acceptor;
+ new->start_nambcoords = new->nambcoords_acceptor;
+ new->start_amb_knowni = new->amb_knowni_acceptor;
+ new->start_amb_nmismatches = new->amb_nmismatches_acceptor;
+
+ new->end_ambcoords = new->ambcoords_donor;
+ new->end_nambcoords = new->nambcoords_donor;
+ new->end_amb_knowni = new->amb_knowni_donor;
+ new->end_amb_nmismatches = new->amb_nmismatches_donor;
}
+
+
+ new->nchimera_known = old->nchimera_known;
+ new->nchimera_novel = old->nchimera_novel;
new->substring1 = Substring_copy(old->substring1);
new->substring2 = Substring_copy(old->substring2);
@@ -3216,18 +3253,23 @@ Stage3end_new_exact (int *found_score, Univcoord_T left, int genomiclength, Comp
new->tally = -1L;
*found_score = 0;
- new->amb_nmatches_start = new->amb_nmatches_end = 0;
+ new->start_amb_nmatches = new->end_amb_nmatches = 0;
+ new->amb_nmatches_donor = new->amb_nmatches_acceptor = 0;
new->start_ambiguous_p = new->end_ambiguous_p = false;
new->start_ambcoords = new->end_ambcoords = (Univcoord_T *) NULL;
+ new->ambcoords_donor = new->ambcoords_acceptor = (Univcoord_T *) NULL;
new->start_amb_knowni = new->end_amb_knowni = (int *) NULL;
+ new->amb_knowni_donor = new->amb_knowni_acceptor = (int *) NULL;
new->start_amb_nmismatches = new->end_amb_nmismatches = (int *) NULL;
+ new->amb_nmismatches_donor = new->amb_nmismatches_acceptor = (int *) NULL;
new->start_nambcoords = new->end_nambcoords = 0;
+ new->nambcoords_donor = new->nambcoords_acceptor = 0;
new->nchimera_known = 0;
new->nchimera_novel = 0;
new->distance = 0U;
- new->acceptor_distance = new->donor_distance = 0U;
+ new->shortexonA_distance = new->shortexonD_distance = 0U;
new->paired_usedp = false;
new->paired_seenp = false;
@@ -3360,18 +3402,23 @@ Stage3end_new_substitution (int *found_score, int nmismatches_whole, Univcoord_T
*found_score = new->score;
}
- new->amb_nmatches_start = new->amb_nmatches_end = 0;
+ new->start_amb_nmatches = new->end_amb_nmatches = 0;
+ new->amb_nmatches_donor = new->amb_nmatches_acceptor = 0;
new->start_ambiguous_p = new->end_ambiguous_p = false;
new->start_ambcoords = new->end_ambcoords = (Univcoord_T *) NULL;
+ new->ambcoords_donor = new->ambcoords_acceptor = (Univcoord_T *) NULL;
new->start_amb_knowni = new->end_amb_knowni = (int *) NULL;
+ new->amb_knowni_donor = new->amb_knowni_acceptor = (int *) NULL;
new->start_amb_nmismatches = new->end_amb_nmismatches = (int *) NULL;
+ new->amb_nmismatches_donor = new->amb_nmismatches_acceptor = (int *) NULL;
new->start_nambcoords = new->end_nambcoords = 0;
+ new->nambcoords_donor = new->nambcoords_acceptor = 0;
new->nchimera_known = 0;
new->nchimera_novel = 0;
new->distance = 0U;
- new->acceptor_distance = new->donor_distance = 0U;
+ new->shortexonA_distance = new->shortexonD_distance = 0U;
new->paired_usedp = false;
new->paired_seenp = false;
@@ -3546,18 +3593,23 @@ Stage3end_new_insertion (int *found_score, int nindels, int indel_pos, int nmism
*found_score = new->score;
}
- new->amb_nmatches_start = new->amb_nmatches_end = 0;
+ new->start_amb_nmatches = new->end_amb_nmatches = 0;
+ new->amb_nmatches_donor = new->amb_nmatches_acceptor = 0;
new->start_ambiguous_p = new->end_ambiguous_p = false;
new->start_ambcoords = new->end_ambcoords = (Univcoord_T *) NULL;
+ new->ambcoords_donor = new->ambcoords_acceptor = (Univcoord_T *) NULL;
new->start_amb_knowni = new->end_amb_knowni = (int *) NULL;
+ new->amb_knowni_donor = new->amb_knowni_acceptor = (int *) NULL;
new->start_amb_nmismatches = new->end_amb_nmismatches = (int *) NULL;
+ new->amb_nmismatches_donor = new->amb_nmismatches_acceptor = (int *) NULL;
new->start_nambcoords = new->end_nambcoords = 0;
+ new->nambcoords_donor = new->nambcoords_acceptor = 0;
new->nchimera_known = 0;
new->nchimera_novel = 0;
new->distance = 0U;
- new->acceptor_distance = new->donor_distance = 0U;
+ new->shortexonA_distance = new->shortexonD_distance = 0U;
new->paired_usedp = false;
new->paired_seenp = false;
@@ -3760,18 +3812,23 @@ Stage3end_new_deletion (int *found_score, int nindels, int indel_pos, int nmisma
*found_score = new->score;
}
- new->amb_nmatches_start = new->amb_nmatches_end = 0;
+ new->start_amb_nmatches = new->end_amb_nmatches = 0;
+ new->amb_nmatches_donor = new->amb_nmatches_acceptor = 0;
new->start_ambiguous_p = new->end_ambiguous_p = false;
new->start_ambcoords = new->end_ambcoords = (Univcoord_T *) NULL;
+ new->ambcoords_donor = new->ambcoords_acceptor = (Univcoord_T *) NULL;
new->start_amb_knowni = new->end_amb_knowni = (int *) NULL;
+ new->amb_knowni_donor = new->amb_knowni_acceptor = (int *) NULL;
new->start_amb_nmismatches = new->end_amb_nmismatches = (int *) NULL;
+ new->amb_nmismatches_donor = new->amb_nmismatches_acceptor = (int *) NULL;
new->start_nambcoords = new->end_nambcoords = 0;
+ new->nambcoords_donor = new->nambcoords_acceptor = 0;
new->nchimera_known = 0;
new->nchimera_novel = 0;
new->distance = 0U;
- new->acceptor_distance = new->donor_distance = 0U;
+ new->shortexonA_distance = new->shortexonD_distance = 0U;
new->paired_usedp = false;
new->paired_seenp = false;
@@ -3790,12 +3847,12 @@ Stage3end_new_splice (int *found_score, int nmismatches_donor, int nmismatches_a
Substring_T donor, Substring_T acceptor, Chrpos_T distance,
bool shortdistancep, int splicing_penalty, int querylength, int amb_nmatches,
#ifdef LARGE_GENOMES
- Uint8list_T ambcoords_left, Uint8list_T ambcoords_right,
+ Uint8list_T ambcoords_donor, Uint8list_T ambcoords_acceptor,
#else
- Uintlist_T ambcoords_left, Uintlist_T ambcoords_right,
+ Uintlist_T ambcoords_donor, Uintlist_T ambcoords_acceptor,
#endif
- Intlist_T amb_knowni_left, Intlist_T amb_knowni_right,
- Intlist_T amb_nmismatches_left, Intlist_T amb_nmismatches_right,
+ Intlist_T amb_knowni_donor, Intlist_T amb_knowni_acceptor,
+ Intlist_T amb_nmismatches_donor, Intlist_T amb_nmismatches_acceptor,
bool copy_donor_p, bool copy_acceptor_p, bool first_read_p, int sensedir,
bool sarrayp) {
T new;
@@ -3803,8 +3860,8 @@ Stage3end_new_splice (int *found_score, int nmismatches_donor, int nmismatches_a
Substring_T substring_for_concordance; /* always the inner substring */
new = (T) MALLOC_OUT(sizeof(*new));
- debug0(printf("Stage3end_new_splice %p with sensedir %d, donor substring %p and acceptor substring %p\n",
- new,sensedir,donor,acceptor));
+ debug0(printf("Stage3end_new_splice %p with sensedir %d, donor substring %p and acceptor substring %p, and amb_nmatches %d\n",
+ new,sensedir,donor,acceptor,amb_nmatches));
new->deletion = (char *) NULL;
new->querylength_adj = new->querylength = querylength;
@@ -3966,147 +4023,147 @@ Stage3end_new_splice (int *found_score, int nmismatches_donor, int nmismatches_a
/* printf("Making splice with shortdistancep = %d, donor chrnum %d, and acceptor chrnum %d => chrnum %d\n",
shortdistancep,Substring_chrnum(donor),Substring_chrnum(acceptor),new->chrnum); */
- if (donor == NULL) {
- new->genomicstart = Substring_genomicstart(acceptor);
- new->genomicend = Substring_genomicend(acceptor);
-
- } else if (acceptor == NULL) {
- new->genomicstart = Substring_genomicstart(donor);
- new->genomicend = Substring_genomicend(donor);
-
- } else if (sensedir == SENSE_FORWARD) {
- new->genomicstart = Substring_genomicstart(donor);
- new->genomicend = Substring_genomicend(acceptor);
+#ifdef LARGE_GENOMES
+ new->ambcoords_donor = Uint8list_to_array_out(&new->nambcoords_donor,ambcoords_donor);
+ new->ambcoords_acceptor = Uint8list_to_array_out(&new->nambcoords_acceptor,ambcoords_acceptor);
+#else
+ new->ambcoords_donor = Uintlist_to_array_out(&new->nambcoords_donor,ambcoords_donor);
+ new->ambcoords_acceptor = Uintlist_to_array_out(&new->nambcoords_acceptor,ambcoords_acceptor);
+#endif
- } else if (sensedir == SENSE_ANTI) {
- new->genomicstart = Substring_genomicstart(acceptor);
- new->genomicend = Substring_genomicend(donor);
+ new->amb_knowni_donor = Intlist_to_array_out(&ignore,amb_knowni_donor);
+ new->amb_knowni_acceptor = Intlist_to_array_out(&ignore,amb_knowni_acceptor);
+ new->amb_nmismatches_donor = Intlist_to_array_out(&ignore,amb_nmismatches_donor);
+ new->amb_nmismatches_acceptor = Intlist_to_array_out(&ignore,amb_nmismatches_acceptor);
- } else {
- abort();
- }
- debug0(printf(" hittype is %s, genomicpos %u..%u\n",
- hittype_string(new->hittype),new->genomicstart,new->genomicend));
+ if (sensedir == SENSE_FORWARD) {
+ if (donor == NULL) {
+ new->genomicstart = Substring_genomicstart(acceptor);
+ new->genomicend = Substring_genomicend(acceptor);
- new->amb_nmatches_start = new->amb_nmatches_end = 0;
- new->start_ambiguous_p = new->end_ambiguous_p = false;
+ new->start_ambiguous_p = true;
+ new->start_amb_nmatches = amb_nmatches;
+ new->start_ambcoords = new->ambcoords_donor;
+ new->start_nambcoords = new->nambcoords_donor;
+ new->start_amb_knowni = new->amb_knowni_donor;
+ new->start_amb_nmismatches = new->amb_nmismatches_donor;
+
+ new->end_ambiguous_p = false;
+ new->end_amb_nmatches = 0;
+ new->end_ambcoords = NULL;
+ new->end_nambcoords = 0;
+ new->end_amb_knowni = NULL;
+ new->end_amb_nmismatches = NULL;
- if (new->genomicstart < new->genomicend) {
- new->low = new->genomicstart;
- new->high = new->genomicend;
+ } else if (acceptor == NULL) {
+ new->genomicstart = Substring_genomicstart(donor);
+ new->genomicend = Substring_genomicend(donor);
- if (ambcoords_left != NULL) {
- new->amb_nmatches_start = amb_nmatches;
- new->start_ambiguous_p = true;
- new->amb_nmatches_end = 0;
- } else if (ambcoords_right != NULL) {
- new->amb_nmatches_end = amb_nmatches;
new->end_ambiguous_p = true;
- new->amb_nmatches_start = 0;
- } else if (donor == NULL) {
- if (sensedir == SENSE_FORWARD) {
- if ((new->amb_nmatches_start = amb_nmatches) > 0) {
- new->start_ambiguous_p = true;
- }
- } else if (sensedir == SENSE_ANTI) {
- if ((new->amb_nmatches_end = amb_nmatches) > 0) {
- new->end_ambiguous_p = true;
- }
- } else {
- abort();
- }
+ new->end_amb_nmatches = amb_nmatches;
+ new->end_ambcoords = new->ambcoords_acceptor;
+ new->end_nambcoords = new->nambcoords_acceptor;
+ new->end_amb_knowni = new->amb_knowni_acceptor;
+ new->end_amb_nmismatches = new->amb_nmismatches_acceptor;
- } else if (acceptor == NULL) {
- if (sensedir == SENSE_FORWARD) {
- if ((new->amb_nmatches_end = amb_nmatches) > 0) {
- new->end_ambiguous_p = true;
- }
- } else if (sensedir == SENSE_ANTI) {
- if ((new->amb_nmatches_start = amb_nmatches) > 0) {
- new->start_ambiguous_p = true;
- }
- } else {
- abort();
- }
+ new->start_ambiguous_p = false;
+ new->start_amb_nmatches = 0;
+ new->start_ambcoords = NULL;
+ new->start_nambcoords = 0;
+ new->start_amb_knowni = NULL;
+ new->start_amb_nmismatches = NULL;
} else {
- /* new->amb_nmatches_start = new->amb_nmatches_end = 0; */
- }
+ new->genomicstart = Substring_genomicstart(donor);
+ new->genomicend = Substring_genomicend(acceptor);
-#ifdef LARGE_GENOMES
- new->start_ambcoords = Uint8list_to_array_out(&new->start_nambcoords,ambcoords_left);
- new->end_ambcoords = Uint8list_to_array_out(&new->end_nambcoords,ambcoords_right);
-#else
- new->start_ambcoords = Uintlist_to_array_out(&new->start_nambcoords,ambcoords_left);
- new->end_ambcoords = Uintlist_to_array_out(&new->end_nambcoords,ambcoords_right);
-#endif
- new->start_amb_knowni = Intlist_to_array_out(&new->start_nambcoords,amb_knowni_left);
- new->end_amb_knowni = Intlist_to_array_out(&new->end_nambcoords,amb_knowni_right);
- new->start_amb_nmismatches = Intlist_to_array_out(&ignore,amb_nmismatches_left);
- new->end_amb_nmismatches = Intlist_to_array_out(&ignore,amb_nmismatches_right);
+ new->start_ambiguous_p = new->end_ambiguous_p = false;
+ new->start_amb_nmatches = new->end_amb_nmatches = 0;
+ new->start_ambcoords = new->end_ambcoords = NULL;
+ new->start_nambcoords = new->end_nambcoords = 0;
+ new->start_amb_knowni = new->end_amb_knowni = NULL;
+ new->start_amb_nmismatches = new->end_amb_nmismatches = NULL;
+ }
} else {
- new->low = new->genomicend;
- new->high = new->genomicstart;
+ if (donor == NULL) {
+ new->genomicstart = Substring_genomicstart(acceptor);
+ new->genomicend = Substring_genomicend(acceptor);
- if (ambcoords_right != NULL) {
- new->amb_nmatches_start = amb_nmatches;
- new->start_ambiguous_p = true;
- new->amb_nmatches_end = 0;
- } else if (ambcoords_left != NULL) {
- new->amb_nmatches_end = amb_nmatches;
new->end_ambiguous_p = true;
- new->amb_nmatches_start = 0;
- } else if (donor == NULL) {
- /* Note: Same as for plus */
- if (sensedir == SENSE_FORWARD) {
- if ((new->amb_nmatches_start = amb_nmatches) > 0) {
- new->start_ambiguous_p = true;
- }
- } else if (sensedir == SENSE_ANTI) {
- if ((new->amb_nmatches_end = amb_nmatches) > 0) {
- new->end_ambiguous_p = true;
- }
- } else {
- abort();
- }
+ new->end_amb_nmatches = amb_nmatches;
+ new->end_ambcoords = new->ambcoords_donor;
+ new->end_nambcoords = new->nambcoords_donor;
+ new->end_amb_knowni = new->amb_knowni_donor;
+ new->end_amb_nmismatches = new->amb_nmismatches_donor;
+
+ new->start_ambiguous_p = false;
+ new->start_amb_nmatches = 0;
+ new->start_ambcoords = NULL;
+ new->start_nambcoords = 0;
+ new->start_amb_knowni = NULL;
+ new->start_amb_nmismatches = NULL;
} else if (acceptor == NULL) {
- /* Note: Same as for plus */
- if (sensedir == SENSE_FORWARD) {
- if ((new->amb_nmatches_end = amb_nmatches) > 0) {
- new->end_ambiguous_p = true;
- }
- } else if (sensedir == SENSE_ANTI) {
- if ((new->amb_nmatches_start = amb_nmatches) > 0) {
- new->start_ambiguous_p = true;
- }
- } else {
- abort();
- }
+ new->genomicstart = Substring_genomicstart(donor);
+ new->genomicend = Substring_genomicend(donor);
+
+ new->start_ambiguous_p = true;
+ new->start_amb_nmatches = amb_nmatches;
+ new->start_ambcoords = new->ambcoords_acceptor;
+ new->start_nambcoords = new->nambcoords_acceptor;
+ new->start_amb_knowni = new->amb_knowni_acceptor;
+ new->start_amb_nmismatches = new->amb_nmismatches_acceptor;
+
+ new->end_ambiguous_p = false;
+ new->end_amb_nmatches = 0;
+ new->end_ambcoords = NULL;
+ new->end_nambcoords = 0;
+ new->end_amb_knowni = NULL;
+ new->end_amb_nmismatches = NULL;
} else {
- /* new->amb_nmatches_start = new->amb_nmatches_end = 0; */
+ new->genomicstart = Substring_genomicstart(acceptor);
+ new->genomicend = Substring_genomicend(donor);
+
+ new->start_amb_nmatches = new->end_amb_nmatches = 0;
+ new->start_ambiguous_p = new->end_ambiguous_p = false;
+ new->start_ambcoords = new->end_ambcoords = NULL;
+ new->start_nambcoords = new->end_nambcoords = 0;
+ new->start_amb_knowni = new->end_amb_knowni = NULL;
+ new->start_amb_nmismatches = new->end_amb_nmismatches = NULL;
}
+ }
-#ifdef LARGE_GENOMES
- new->start_ambcoords = Uint8list_to_array_out(&new->start_nambcoords,ambcoords_right);
- new->end_ambcoords = Uint8list_to_array_out(&new->end_nambcoords,ambcoords_left);
-#else
- new->start_ambcoords = Uintlist_to_array_out(&new->start_nambcoords,ambcoords_right);
- new->end_ambcoords = Uintlist_to_array_out(&new->end_nambcoords,ambcoords_left);
-#endif
- new->start_amb_knowni = Intlist_to_array_out(&new->start_nambcoords,amb_knowni_right);
- new->end_amb_knowni = Intlist_to_array_out(&new->end_nambcoords,amb_knowni_left);
- new->start_amb_nmismatches = Intlist_to_array_out(&ignore,amb_nmismatches_right);
- new->end_amb_nmismatches = Intlist_to_array_out(&ignore,amb_nmismatches_left);
+
+ if (new->genomicstart < new->genomicend) {
+ new->low = new->genomicstart;
+ new->high = new->genomicend;
+
+ } else {
+ new->low = new->genomicend;
+ new->high = new->genomicstart;
+ }
+
+ debug0(printf(" hittype is %s, genomicpos %u..%u\n",
+ hittype_string(new->hittype),new->genomicstart,new->genomicend));
+ debug0(printf("start_ambiguous_p %d (%d starts), end_ambiguous_p %d (%d ends)\n",
+ new->start_ambiguous_p,new->start_nambcoords,new->end_ambiguous_p,new->end_nambcoords));
+ debug0(printf("start_amb_nmatches %d, end_amb_nmatches %d\n",new->start_amb_nmatches,new->end_amb_nmatches));
+
+#ifdef CHECK_ASSERTIONS
+ if (new->start_ambiguous_p == true && new->start_nambcoords == 0) {
+ abort();
+ }
+ if (new->end_ambiguous_p == true && new->end_nambcoords == 0) {
+ abort();
}
- debug0(printf("start nambcoords %d, end nambcoords %d\n",new->start_nambcoords,new->end_nambcoords));
+#endif
+
new->genomiclength = new->high - new->low;
new->guided_insertlength = 0U;
-
new->nchimera_known = Substring_nchimera_known(donor) + Substring_nchimera_known(acceptor);
new->nchimera_novel = Substring_nchimera_novel(donor) + Substring_nchimera_novel(acceptor);
if (new->start_ambiguous_p == true && favor_ambiguous_p == true) {
@@ -4232,7 +4289,6 @@ Stage3end_new_splice (int *found_score, int nmismatches_donor, int nmismatches_a
abort();
}
}
- debug0(printf("plusp is %d\n",new->plusp));
new->nmismatches_whole = nmismatches_donor + nmismatches_acceptor;
new->score = new->ntscore = splicing_penalty + new->nmismatches_whole;
@@ -4308,7 +4364,7 @@ Stage3end_new_splice (int *found_score, int nmismatches_donor, int nmismatches_a
}
new->distance = distance;
- new->acceptor_distance = new->donor_distance = 0U;
+ new->shortexonA_distance = new->shortexonD_distance = 0U;
new->paired_usedp = false;
new->paired_seenp = false;
@@ -4330,24 +4386,25 @@ Stage3end_new_splice (int *found_score, int nmismatches_donor, int nmismatches_a
/* Never returns NULL. Never copies substrings. Always shortdistance. */
+/* Donor ----(A distance)---- [A Shortexon D] ----(D distance)---- Acceptor */
T
Stage3end_new_shortexon (int *found_score, Substring_T donor, Substring_T acceptor, Substring_T shortexon,
- Chrpos_T acceptor_distance, Chrpos_T donor_distance,
int amb_nmatches_donor, int amb_nmatches_acceptor,
#ifdef LARGE_GENOMES
- Uint8list_T ambcoords_left, Uint8list_T ambcoords_right,
+ Uint8list_T ambcoords_donor, Uint8list_T ambcoords_acceptor,
#else
- Uintlist_T ambcoords_left, Uintlist_T ambcoords_right,
+ Uintlist_T ambcoords_donor, Uintlist_T ambcoords_acceptor,
#endif
- Intlist_T amb_knowni_left, Intlist_T amb_knowni_right,
- Intlist_T amb_nmismatches_left, Intlist_T amb_nmismatches_right,
+ Intlist_T amb_knowni_donor, Intlist_T amb_knowni_acceptor,
+ Intlist_T amb_nmismatches_donor, Intlist_T amb_nmismatches_acceptor,
bool copy_donor_p, bool copy_acceptor_p, bool copy_shortexon_p,
int splicing_penalty, int querylength, int sensedir, bool sarrayp) {
T new;
int ignore;
new = (T) MALLOC_OUT(sizeof(*new));
- debug0(printf("Stage3end_new_shortexon %p\n",new));
+ debug0(printf("Stage3end_new_shortexon %p, amb_donor %d, amb_acceptor %d\n",
+ new,amb_nmatches_donor,amb_nmatches_acceptor));
new->deletion = (char *) NULL;
new->querylength_adj = new->querylength = querylength;
@@ -4364,6 +4421,9 @@ Stage3end_new_shortexon (int *found_score, Substring_T donor, Substring_T accept
new->substringD = new->substringA = (Substring_T) NULL;
new->sensedir_nonamb = SENSE_NULL; /* Ignore sensedir based on double ambiguous ends */
+ new->shortexonA_distance = 0;
+ new->shortexonD_distance = 0;
+
} else {
if (donor == NULL) {
new->hittype = TWO_THIRDS_SHORTEXON;
@@ -4372,6 +4432,25 @@ Stage3end_new_shortexon (int *found_score, Substring_T donor, Substring_T accept
} else {
new->hittype = SHORTEXON;
}
+
+ /* Compute distances */
+ if (donor == NULL) {
+ new->shortexonA_distance = 0;
+ } else if (Substring_splicecoord_A(shortexon) > Substring_splicecoord(donor)) {
+ new->shortexonA_distance = Substring_splicecoord_A(shortexon) - Substring_splicecoord(donor);
+ } else {
+ new->shortexonA_distance = Substring_splicecoord(donor) - Substring_splicecoord_A(shortexon);
+ }
+
+ if (acceptor == NULL) {
+ new->shortexonD_distance = 0;
+ } else if (Substring_splicecoord_D(shortexon) > Substring_splicecoord(acceptor)) {
+ new->shortexonD_distance = Substring_splicecoord_D(shortexon) - Substring_splicecoord(acceptor);
+ } else {
+ new->shortexonD_distance = Substring_splicecoord(acceptor) - Substring_splicecoord_D(shortexon);
+ }
+ new->distance = new->shortexonA_distance + new->shortexonD_distance;
+
new->substring1 = copy_shortexon_p ? Substring_copy(shortexon) : shortexon;
if (sensedir == SENSE_FORWARD) {
new->substringD = new->substring0 = copy_donor_p ? Substring_copy(donor) : donor;
@@ -4412,80 +4491,82 @@ Stage3end_new_shortexon (int *found_score, Substring_T donor, Substring_T accept
/* printf("Making splice with shortdistancep = %d, donor chrnum %d, and acceptor chrnum %d => chrnum %d\n",
shortdistancep,Substring_chrnum(donor),Substring_chrnum(acceptor),new->chrnum); */
+
+ new->amb_nmatches_donor = amb_nmatches_donor;
+ new->amb_nmatches_acceptor = amb_nmatches_acceptor;
+
+#ifdef LARGE_GENOMES
+ new->ambcoords_donor = Uint8list_to_array_out(&new->nambcoords_donor,ambcoords_donor);
+ new->ambcoords_acceptor = Uint8list_to_array_out(&new->nambcoords_acceptor,ambcoords_acceptor);
+#else
+ new->ambcoords_donor = Uintlist_to_array_out(&new->nambcoords_donor,ambcoords_donor);
+ new->ambcoords_acceptor = Uintlist_to_array_out(&new->nambcoords_acceptor,ambcoords_acceptor);
+#endif
+
+ new->amb_knowni_donor = Intlist_to_array_out(&ignore,amb_knowni_donor);
+ new->amb_knowni_acceptor = Intlist_to_array_out(&ignore,amb_knowni_acceptor);
+ new->amb_nmismatches_donor = Intlist_to_array_out(&ignore,amb_nmismatches_donor);
+ new->amb_nmismatches_acceptor = Intlist_to_array_out(&ignore,amb_nmismatches_acceptor);
+
+
if (sensedir == SENSE_FORWARD) {
new->genomicstart = (donor != NULL ? Substring_genomicstart(donor) : Substring_genomicstart(shortexon));
new->genomicend = (acceptor != NULL ? Substring_genomicend(acceptor) : Substring_genomicend(shortexon));
- } else if (sensedir == SENSE_ANTI) {
+
+ new->start_amb_nmatches = new->amb_nmatches_donor;
+ new->start_ambcoords = new->ambcoords_donor;
+ new->start_nambcoords = new->nambcoords_donor;
+ new->start_amb_knowni = new->amb_knowni_donor;
+ new->start_amb_nmismatches = new->amb_nmismatches_donor;
+
+ new->end_amb_nmatches = new->amb_nmatches_acceptor;
+ new->end_ambcoords = new->ambcoords_acceptor;
+ new->end_nambcoords = new->nambcoords_acceptor;
+ new->end_amb_knowni = new->amb_knowni_acceptor;
+ new->end_amb_nmismatches = new->amb_nmismatches_acceptor;
+
+ new->start_ambiguous_p = (ambcoords_donor != NULL) ? true : false;
+ new->end_ambiguous_p = (ambcoords_acceptor != NULL) ? true : false;
+
+ } else {
new->genomicstart = (acceptor != NULL ? Substring_genomicstart(acceptor) : Substring_genomicstart(shortexon));
new->genomicend = (donor != NULL ? Substring_genomicend(donor) : Substring_genomicend(shortexon));
- } else {
- abort();
+
+ new->start_amb_nmatches = new->amb_nmatches_acceptor;
+ new->start_ambcoords = new->ambcoords_acceptor;
+ new->start_nambcoords = new->nambcoords_acceptor;
+ new->start_amb_knowni = new->amb_knowni_acceptor;
+ new->start_amb_nmismatches = new->amb_nmismatches_acceptor;
+
+ new->end_amb_nmatches = new->amb_nmatches_donor;
+ new->end_ambcoords = new->ambcoords_donor;
+ new->end_nambcoords = new->nambcoords_donor;
+ new->end_amb_knowni = new->amb_knowni_donor;
+ new->end_amb_nmismatches = new->amb_nmismatches_donor;
+
+ new->start_ambiguous_p = (ambcoords_acceptor != NULL) ? true : false;
+ new->end_ambiguous_p = (ambcoords_donor != NULL) ? true : false;
}
if (new->genomicstart < new->genomicend) {
+ debug0(printf("plus %s\n",print_sense(sensedir)));
new->low = new->genomicstart;
new->high = new->genomicend;
- if (ambcoords_left != NULL) {
- new->amb_nmatches_start = (sensedir == SENSE_FORWARD) ? amb_nmatches_donor : amb_nmatches_acceptor;
- } else {
- new->amb_nmatches_start = 0;
- }
-
- if (ambcoords_right != NULL) {
- new->amb_nmatches_end = (sensedir == SENSE_FORWARD) ? amb_nmatches_acceptor : amb_nmatches_donor;
- } else {
- new->amb_nmatches_end = 0;
- }
-
- new->start_ambiguous_p = (ambcoords_left != NULL) ? true : false;
- new->end_ambiguous_p = (ambcoords_right != NULL) ? true : false;
-#ifdef LARGE_GENOMES
- new->start_ambcoords = Uint8list_to_array_out(&new->start_nambcoords,ambcoords_left);
- new->end_ambcoords = Uint8list_to_array_out(&new->end_nambcoords,ambcoords_right);
-#else
- new->start_ambcoords = Uintlist_to_array_out(&new->start_nambcoords,ambcoords_left);
- new->end_ambcoords = Uintlist_to_array_out(&new->end_nambcoords,ambcoords_right);
-#endif
- new->start_amb_knowni = Intlist_to_array_out(&new->start_nambcoords,amb_knowni_left);
- new->end_amb_knowni = Intlist_to_array_out(&new->end_nambcoords,amb_knowni_right);
- new->start_amb_nmismatches = Intlist_to_array_out(&ignore,amb_nmismatches_left);
- new->end_amb_nmismatches = Intlist_to_array_out(&ignore,amb_nmismatches_right);
-
} else {
+ debug0(printf("minus %s\n",print_sense(sensedir)));
new->low = new->genomicend;
new->high = new->genomicstart;
+ }
- if (ambcoords_right != NULL) {
- new->amb_nmatches_start = (sensedir == SENSE_FORWARD) ? amb_nmatches_donor : amb_nmatches_acceptor;
- } else {
- new->amb_nmatches_start = 0;
- }
-
- if (ambcoords_left != NULL) {
- new->amb_nmatches_end = (sensedir == SENSE_FORWARD) ? amb_nmatches_acceptor : amb_nmatches_donor;
- } else {
- new->amb_nmatches_end = 0;
- }
+ debug0(printf(" hittype is %s, genomicpos %u..%u\n",
+ hittype_string(new->hittype),new->genomicstart,new->genomicend));
+ debug0(printf("start_ambiguous_p %d, end_ambiguous_p %d\n",new->start_ambiguous_p,new->end_ambiguous_p));
+ debug0(printf("start_amb_nmatches %d, end_amb_nmatches %d\n",new->start_amb_nmatches,new->end_amb_nmatches));
- new->start_ambiguous_p = (ambcoords_right != NULL) ? true : false;
- new->end_ambiguous_p = (ambcoords_left != NULL) ? true : false;
-#ifdef LARGE_GENOMES
- new->start_ambcoords = Uint8list_to_array_out(&new->start_nambcoords,ambcoords_right);
- new->end_ambcoords = Uint8list_to_array_out(&new->end_nambcoords,ambcoords_left);
-#else
- new->start_ambcoords = Uintlist_to_array_out(&new->start_nambcoords,ambcoords_right);
- new->end_ambcoords = Uintlist_to_array_out(&new->end_nambcoords,ambcoords_left);
-#endif
- new->start_amb_knowni = Intlist_to_array_out(&new->start_nambcoords,amb_knowni_right);
- new->end_amb_knowni = Intlist_to_array_out(&new->end_nambcoords,amb_knowni_left);
- new->start_amb_nmismatches = Intlist_to_array_out(&ignore,amb_nmismatches_right);
- new->end_amb_nmismatches = Intlist_to_array_out(&ignore,amb_nmismatches_left);
- }
new->genomiclength = new->high - new->low;
new->guided_insertlength = 0U;
-
new->nchimera_known = Substring_nchimera_known(shortexon) + Substring_nchimera_known(donor) + Substring_nchimera_known(acceptor);
new->nchimera_novel = Substring_nchimera_novel(shortexon) + Substring_nchimera_novel(donor) + Substring_nchimera_novel(acceptor);
if (new->start_ambiguous_p == true && favor_ambiguous_p == true) {
@@ -4566,9 +4647,6 @@ Stage3end_new_shortexon (int *found_score, Substring_T donor, Substring_T accept
}
/* new->nmismatches_refdiff = Substring_nmismatches_refdiff(donor) + Substring_nmismatches_refdiff(acceptor) + Substring_nmismatches_refdiff(shortexon); */
- new->amb_nmatches_donor = amb_nmatches_donor;
- new->amb_nmatches_acceptor = amb_nmatches_acceptor;
-
new->nmatches = Substring_nmatches(shortexon);
new->nmatches_posttrim = Substring_nmatches_posttrim(shortexon);
if (donor == NULL) {
@@ -4576,7 +4654,7 @@ Stage3end_new_shortexon (int *found_score, Substring_T donor, Substring_T accept
new->nmatches += amb_nmatches_donor;
}
} else {
- assert(amb_nmatches_donor == 0);
+ /* assert(amb_nmatches_donor == 0); */
new->nmatches += Substring_nmatches(donor);
}
if (acceptor == NULL) {
@@ -4584,7 +4662,7 @@ Stage3end_new_shortexon (int *found_score, Substring_T donor, Substring_T accept
new->nmatches += amb_nmatches_acceptor;
}
} else {
- assert(amb_nmatches_acceptor == 0);
+ /* assert(amb_nmatches_acceptor == 0); */
new->nmatches += Substring_nmatches(acceptor);
}
@@ -4613,10 +4691,6 @@ Stage3end_new_shortexon (int *found_score, Substring_T donor, Substring_T accept
*found_score = new->score;
}
- new->distance = acceptor_distance + donor_distance;
- new->acceptor_distance = acceptor_distance;
- new->donor_distance = donor_distance;
-
new->paired_usedp = false;
new->paired_seenp = false;
new->concordantp = false;
@@ -4822,18 +4896,23 @@ Stage3end_new_terminal (int querystart, int queryend, Univcoord_T left, Compress
/* new->gene_overlap = NO_KNOWN_GENE; -- initialized later when resolving multimappers */
new->tally = -1L;
- new->amb_nmatches_start = new->amb_nmatches_end = 0;
+ new->start_amb_nmatches = new->end_amb_nmatches = 0;
+ new->amb_nmatches_donor = new->amb_nmatches_acceptor = 0;
new->start_ambiguous_p = new->end_ambiguous_p = false;
new->start_ambcoords = new->end_ambcoords = (Univcoord_T *) NULL;
+ new->ambcoords_donor = new->ambcoords_acceptor = (Univcoord_T *) NULL;
new->start_amb_knowni = new->end_amb_knowni = (int *) NULL;
+ new->amb_knowni_donor = new->amb_knowni_acceptor = (int *) NULL;
new->start_amb_nmismatches = new->end_amb_nmismatches = (int *) NULL;
+ new->amb_nmismatches_donor = new->amb_nmismatches_acceptor = (int *) NULL;
new->start_nambcoords = new->end_nambcoords = 0;
+ new->nambcoords_donor = new->nambcoords_acceptor = 0;
new->nchimera_known = 0;
new->nchimera_novel = 0;
new->distance = 0U;
- new->acceptor_distance = new->donor_distance = 0U;
+ new->shortexonA_distance = new->shortexonD_distance = 0U;
new->sensedir = new->sensedir_nonamb = SENSE_NULL;
new->paired_usedp = false;
@@ -5060,7 +5139,7 @@ ATAGCCCACACGTTCCCCTTAAATAAGACATCACGATGGATCACAGGTCTATCACCCTATTAACCACTCACGGGAG
/* new->gene_overlap = NO_KNOWN_GENE; -- initialized later when resolving multimappers */
new->tally = -1L;
- if ((new->amb_nmatches_start = ambig_end_length_5) == 0) {
+ if ((new->start_amb_nmatches = ambig_end_length_5) == 0) {
new->gmap_start_endtype = END;
} else if (ambig_splicetype_5 == DONOR || ambig_splicetype_5 == ANTIDONOR) {
new->gmap_start_endtype = AMB_DON;
@@ -5072,7 +5151,7 @@ ATAGCCCACACGTTCCCCTTAAATAAGACATCACGATGGATCACAGGTCTATCACCCTATTAACCACTCACGGGAG
abort();
}
- if ((new->amb_nmatches_end = ambig_end_length_3) == 0) {
+ if ((new->end_amb_nmatches = ambig_end_length_3) == 0) {
new->gmap_end_endtype = END;
} else if (ambig_splicetype_3 == DONOR || ambig_splicetype_3 == ANTIDONOR) {
new->gmap_end_endtype = AMB_DON;
@@ -5084,16 +5163,22 @@ ATAGCCCACACGTTCCCCTTAAATAAGACATCACGATGGATCACAGGTCTATCACCCTATTAACCACTCACGGGAG
abort();
}
+ new->amb_nmatches_donor = new->amb_nmatches_acceptor = 0;
+
new->start_ambiguous_p = new->end_ambiguous_p = false;
new->start_ambcoords = new->end_ambcoords = (Univcoord_T *) NULL;
+ new->ambcoords_donor = new->ambcoords_acceptor = (Univcoord_T *) NULL;
new->start_amb_knowni = new->end_amb_knowni = (int *) NULL;
+ new->amb_knowni_donor = new->amb_knowni_acceptor = (int *) NULL;
new->start_amb_nmismatches = new->end_amb_nmismatches = (int *) NULL;
+ new->amb_nmismatches_donor = new->amb_nmismatches_acceptor = (int *) NULL;
new->start_nambcoords = new->end_nambcoords = 0;
+ new->nambcoords_donor = new->nambcoords_acceptor = 0;
new->nchimera_known = 0;
new->nchimera_novel = 0; /* nintrons? */
new->distance = 0U;
- new->acceptor_distance = new->donor_distance = 0U;
+ new->shortexonA_distance = new->shortexonD_distance = 0;
new->paired_usedp = false;
new->paired_seenp = false;
@@ -5787,12 +5872,13 @@ Stage3end_optimal_score_aux (bool *eliminatedp, List_T hitlist, int cutoff_level
List_T optimal = NULL, p;
T hit;
int n;
- int minscore = MAX_READLENGTH;
+ int minscore_start = MAX_READLENGTH, minscore_end = MAX_READLENGTH;
int max_nmatches = 0, max_nmatches_posttrim = 0;
int trim_left, trim_right;
int min_trim_left = MAX_READLENGTH, min_trim_right = MAX_READLENGTH;
int max_trim_left_terminal = 0, max_trim_right_terminal = 0;
int nindelbreaks;
+ int cutoff_level_start, cutoff_level_end;
#ifdef TRANSLOC_SPECIAL
bool non_translocation_p = false;
@@ -5890,9 +5976,9 @@ Stage3end_optimal_score_aux (bool *eliminatedp, List_T hitlist, int cutoff_level
if (hit->hittype == TERMINAL && finalp == false) {
/* Ignore */
- hit->score_eventrim = 0;
+ hit->score_eventrim_start = hit->score_eventrim_end = 0;
} else if (hit->hittype == GMAP) {
- hit->score_eventrim = 0; /* was hit->penalties */
+ hit->score_eventrim_start = hit->score_eventrim_end = 0; /* was hit->penalties */
debug4(printf("score GMAP:"));
#if 0
if (Stage3end_bad_stretch_p(hit,query_compress_fwd,query_compress_rev) == true) {
@@ -5901,6 +5987,7 @@ Stage3end_optimal_score_aux (bool *eliminatedp, List_T hitlist, int cutoff_level
}
#endif
+#if 0
if (0 && hit->trim_left <= 8) {
/* Ignore small trims */
} else if (hit->trim_left > trim_left) {
@@ -5913,43 +6000,69 @@ Stage3end_optimal_score_aux (bool *eliminatedp, List_T hitlist, int cutoff_level
hit->score_eventrim += hit->trim_right - trim_right;
debug4(printf(" add trim right (%d - %d).",hit->trim_right,trim_right));
}
+#endif
+
+ hit->score_eventrim_start += Pair_nmismatches_region(&nindelbreaks,hit->pairarray,hit->npairs,
+ trim_left,/*trim_end*/hit->trim_right,hit->querylength_adj);
+ hit->score_eventrim_end += Pair_nmismatches_region(&nindelbreaks,hit->pairarray,hit->npairs,
+ /*trim_start*/hit->trim_left,trim_right,hit->querylength_adj);
+ debug4(printf(" add nmismatches %d or %d.",
+ Pair_nmismatches_region(&nindelbreaks,hit->pairarray,hit->npairs,
+ trim_left,/*trim_end*/hit->trim_right,hit->querylength_adj),
+ Pair_nmismatches_region(&nindelbreaks,hit->pairarray,hit->npairs,
+ /*trim_start*/hit->trim_left,trim_right,hit->querylength_adj)));
+ hit->score_eventrim_start += indel_penalty_middle * nindelbreaks;
+ hit->score_eventrim_end += indel_penalty_middle * nindelbreaks;
- hit->score_eventrim += Pair_nmismatches_region(&nindelbreaks,hit->pairarray,hit->npairs,
- trim_left,trim_right,hit->querylength_adj);
- debug4(printf(" add nmismatches %d.",Pair_nmismatches_region(&nindelbreaks,hit->pairarray,hit->npairs,
- trim_left,trim_right,hit->querylength_adj)));
- hit->score_eventrim += indel_penalty_middle * nindelbreaks;
- hit->score_eventrim += hit->amb_nmatches_start / ambig_end_interval;
- debug4(printf(" add amb start %d/%d.",hit->amb_nmatches_start,ambig_end_interval));
- hit->score_eventrim += hit->amb_nmatches_end / ambig_end_interval;
- debug4(printf(" add amb end %d/%d.",hit->amb_nmatches_end,ambig_end_interval));
- debug4(printf(" RESULT: %d\n",hit->score_eventrim));
+ hit->score_eventrim_start += hit->start_amb_nmatches / ambig_end_interval;
+ debug4(printf(" add amb start %d/%d.",hit->start_amb_nmatches,ambig_end_interval));
+ hit->score_eventrim_end += hit->end_amb_nmatches / ambig_end_interval;
+ debug4(printf(" add amb end %d/%d.",hit->end_amb_nmatches,ambig_end_interval));
+ debug4(printf(" RESULT: %d or %d\n",hit->score_eventrim_start,hit->score_eventrim_end));
} else {
debug4(printf("score OTHER:"));
- hit->score_eventrim = hit->penalties;
+ hit->score_eventrim_start = hit->penalties;
+ hit->score_eventrim_end = hit->penalties;
debug4(printf(" penalties %d.",hit->penalties));
- hit->score_eventrim += Substring_count_mismatches_region(hit->substring0,trim_left,trim_right,
- query_compress_fwd,query_compress_rev);
- debug4(printf(" substring 0 %d.",Substring_count_mismatches_region(hit->substring0,trim_left,trim_right,
+ hit->score_eventrim_start += Substring_count_mismatches_region(hit->substring0,trim_left,/*trim_end*/hit->trim_right,
+ query_compress_fwd,query_compress_rev);
+ debug4(printf(" substring 0 %d.",Substring_count_mismatches_region(hit->substring0,trim_left,/*trim_end*/hit->trim_right,
+ query_compress_fwd,query_compress_rev)));
+
+ hit->score_eventrim_start += Substring_count_mismatches_region(hit->substring1,trim_left,/*trim_end*/hit->trim_right,
+ query_compress_fwd,query_compress_rev);
+ debug4(printf(" substring 1 %d.",Substring_count_mismatches_region(hit->substring1,trim_left,/*trim_end*/hit->trim_right,
+ query_compress_fwd,query_compress_rev)));
+
+ hit->score_eventrim_start += Substring_count_mismatches_region(hit->substring2,trim_left,/*trim_end*/hit->trim_right,
+ query_compress_fwd,query_compress_rev);
+ debug4(printf(" substring 2 %d.",Substring_count_mismatches_region(hit->substring2,trim_left,/*trim_end*/hit->trim_right,
query_compress_fwd,query_compress_rev)));
- hit->score_eventrim += Substring_count_mismatches_region(hit->substring1,trim_left,trim_right,
- query_compress_fwd,query_compress_rev);
- debug4(printf(" substring 1 %d.",Substring_count_mismatches_region(hit->substring1,trim_left,trim_right,
+
+ hit->score_eventrim_end += Substring_count_mismatches_region(hit->substring0,/*trim_start*/hit->trim_left,trim_right,
+ query_compress_fwd,query_compress_rev);
+ debug4(printf(" substring 0 %d.",Substring_count_mismatches_region(hit->substring0,/*trim_start*/hit->trim_left,trim_right,
+ query_compress_fwd,query_compress_rev)));
+
+ hit->score_eventrim_end += Substring_count_mismatches_region(hit->substring1,/*trim_start*/hit->trim_left,trim_right,
+ query_compress_fwd,query_compress_rev);
+ debug4(printf(" substring 1 %d.",Substring_count_mismatches_region(hit->substring1,/*trim_start*/hit->trim_left,trim_right,
query_compress_fwd,query_compress_rev)));
- hit->score_eventrim += Substring_count_mismatches_region(hit->substring2,trim_left,trim_right,
- query_compress_fwd,query_compress_rev);
- debug4(printf(" substring 2 %d.",Substring_count_mismatches_region(hit->substring2,trim_left,trim_right,
+ hit->score_eventrim_end += Substring_count_mismatches_region(hit->substring2,/*trim_start*/hit->trim_left,trim_right,
+ query_compress_fwd,query_compress_rev);
+ debug4(printf(" substring 2 %d.",Substring_count_mismatches_region(hit->substring2,/*trim_start*/hit->trim_left,trim_right,
query_compress_fwd,query_compress_rev)));
if (hit->hittype == INSERTION || hit->hittype == DELETION) {
- hit->score_eventrim += indel_penalty_middle;
+ hit->score_eventrim_start += indel_penalty_middle;
+ hit->score_eventrim_end += indel_penalty_middle;
debug4(printf(" add indel %d.",indel_penalty_middle));
}
- debug4(printf(" RESULT: %d\n",hit->score_eventrim));
+ debug4(printf(" RESULT: %d or %d\n",hit->score_eventrim_start,hit->score_eventrim_end));
}
}
@@ -5972,15 +6085,19 @@ Stage3end_optimal_score_aux (bool *eliminatedp, List_T hitlist, int cutoff_level
/* Skip from setting minscore */
}
#endif
- if (hit->score_eventrim < minscore) {
- minscore = hit->score_eventrim;
+ if (hit->score_eventrim_start < minscore_start) {
+ minscore_start = hit->score_eventrim_start;
+ }
+ if (hit->score_eventrim_end < minscore_end) {
+ minscore_end = hit->score_eventrim_end;
}
}
}
- debug4(printf("Stage3end_optimal_score over %d hits: minscore = %d + subopt:%d\n",
- n,minscore,suboptimal_mismatches));
- minscore += suboptimal_mismatches;
+ debug4(printf("Stage3end_optimal_score over %d hits: minscore = (%d or %d) + subopt:%d\n",
+ n,minscore_start,minscore_end,suboptimal_mismatches));
+ minscore_start += suboptimal_mismatches;
+ minscore_end += suboptimal_mismatches;
max_nmatches -= suboptimal_mismatches;
max_nmatches_posttrim -= suboptimal_mismatches;
@@ -5990,7 +6107,8 @@ Stage3end_optimal_score_aux (bool *eliminatedp, List_T hitlist, int cutoff_level
cutoff_level = minscore;
}
#else
- cutoff_level = minscore;
+ cutoff_level_start = minscore_start;
+ cutoff_level_end = minscore_end;
#endif
for (p = hitlist; p != NULL; p = p->rest) {
@@ -6025,22 +6143,23 @@ Stage3end_optimal_score_aux (bool *eliminatedp, List_T hitlist, int cutoff_level
}
#endif
- } else if (hit->score_eventrim > cutoff_level) {
+ } else if (hit->score_eventrim_start > cutoff_level_start && hit->score_eventrim_end > cutoff_level_end) {
/* For dibasep were previously using hit->ntscore, but gives false positives */
- debug4(printf("Eliminating a hit of type %s with score_eventrim %d > cutoff_level %d\n",
- hittype_string(hit->hittype),hit->score_eventrim,cutoff_level));
+ debug4(printf("Eliminating a hit of type %s with score_eventrim %d or %d > cutoff_level %d\n",
+ hittype_string(hit->hittype),hit->score_eventrim_start,hit->score_eventrim_end,cutoff_level));
*eliminatedp = true;
Stage3end_free(&hit);
- } else if (hit->score_eventrim > minscore /* && hit->nmatches_posttrim < max_nmatches_posttrim */) {
- debug4(printf("Eliminating a hit with score_eventrim %d and type %s\n",
- hit->score_eventrim,hittype_string(hit->hittype)));
+ } else if (hit->score_eventrim_start > minscore_start && hit->score_eventrim_end > minscore_end
+ /* && hit->nmatches_posttrim < max_nmatches_posttrim */) {
+ debug4(printf("Eliminating a hit with score_eventrim %d or %d and type %s\n",
+ hit->score_eventrim_start,hit->score_eventrim_end,hittype_string(hit->hittype)));
*eliminatedp = true;
Stage3end_free(&hit);
} else {
- debug4(printf("Keeping a hit with score_eventrim %d and type %s\n",
- hit->score_eventrim,hittype_string(hit->hittype)));
+ debug4(printf("Keeping a hit with score_eventrim %d or %d and type %s\n",
+ hit->score_eventrim_start,hit->score_eventrim_end,hittype_string(hit->hittype)));
optimal = List_push(optimal,hit);
}
}
@@ -7898,8 +8017,8 @@ print_shortexon (FILE *fp, T chimera, int score,
if (chimera->sensedir == SENSE_FORWARD && invertp == false) {
- distance1 = chimera->acceptor_distance;
- distance2 = chimera->donor_distance;
+ distance1 = chimera->shortexonA_distance;
+ distance2 = chimera->shortexonD_distance;
if (donor != NULL) {
fprintf(fp," ");
@@ -7932,8 +8051,8 @@ print_shortexon (FILE *fp, T chimera, int score,
}
} else if (chimera->sensedir == SENSE_FORWARD && invertp == true) {
- distance1 = chimera->donor_distance;
- distance2 = chimera->acceptor_distance;
+ distance1 = chimera->shortexonD_distance;
+ distance2 = chimera->shortexonA_distance;
if (acceptor != NULL) {
fprintf(fp," ");
@@ -7966,8 +8085,8 @@ print_shortexon (FILE *fp, T chimera, int score,
}
} else if (chimera->sensedir == SENSE_ANTI && invertp == false) {
- distance1 = chimera->donor_distance;
- distance2 = chimera->acceptor_distance;
+ distance1 = chimera->shortexonD_distance;
+ distance2 = chimera->shortexonA_distance;
if (acceptor != NULL) {
fprintf(fp," ");
@@ -8000,8 +8119,8 @@ print_shortexon (FILE *fp, T chimera, int score,
}
} else if (chimera->sensedir == SENSE_ANTI && invertp == true) {
- distance2 = chimera->donor_distance;
- distance1 = chimera->acceptor_distance;
+ distance2 = chimera->shortexonD_distance;
+ distance1 = chimera->shortexonA_distance;
if (donor != NULL) {
fprintf(fp," ");
@@ -8658,6 +8777,14 @@ resolve_inside_ambiguous_splice_plus (int *unresolved_amb_nmatches, T *hit5, T *
int splice_pos;
int ignore_found_score = 0;
+#ifdef LARGE_GENOMES
+ Uint8list_T ambcoords;
+#else
+ Uintlist_T ambcoords;
+#endif
+ Intlist_T amb_knowni, amb_nmismatches;
+
+
*unresolved_amb_nmatches = 0;
debug9(printf("resolve plus: hit5 %s ambiguous %d,%d and hit3 %s ambiguous %d,%d\n",
@@ -8712,8 +8839,8 @@ resolve_inside_ambiguous_splice_plus (int *unresolved_amb_nmatches, T *hit5, T *
#if 0
/* No longer holds for GMAP */
- assert((*hit5)->amb_nmatches_end > 0);
- assert((*hit3)->amb_nmatches_start > 0);
+ assert((*hit5)->end_amb_nmatches > 0);
+ assert((*hit3)->start_amb_nmatches > 0);
#endif
#ifdef USE_BINGO
@@ -8726,8 +8853,8 @@ resolve_inside_ambiguous_splice_plus (int *unresolved_amb_nmatches, T *hit5, T *
if (nbest == 0) {
debug9(printf("\nnbest is zero: amb_nmatches = %d...%d",
- (*hit5)->amb_nmatches_end,(*hit3)->amb_nmatches_start));
- *unresolved_amb_nmatches = (*hit5)->amb_nmatches_end + (*hit3)->amb_nmatches_start;
+ (*hit5)->end_amb_nmatches,(*hit3)->start_amb_nmatches));
+ *unresolved_amb_nmatches = (*hit5)->end_amb_nmatches + (*hit3)->start_amb_nmatches;
} else if (nbest == 1) {
debug9(printf("\nnbest is 1, with nmismatches %d\n",best_nmismatches));
new5p = true; new3p = true; bingoi5 = besti5; bingoi3 = besti3;
@@ -8775,8 +8902,8 @@ resolve_inside_ambiguous_splice_plus (int *unresolved_amb_nmatches, T *hit5, T *
#if 0
/* No longer holds for GMAP */
- assert((*hit5)->amb_nmatches_end > 0);
- assert((*hit3)->amb_nmatches_start == 0);
+ assert((*hit5)->end_amb_nmatches > 0);
+ assert((*hit3)->start_amb_nmatches == 0);
#endif
#ifdef USE_BINGO
@@ -8789,8 +8916,8 @@ resolve_inside_ambiguous_splice_plus (int *unresolved_amb_nmatches, T *hit5, T *
if (nbest == 0) {
debug9(printf("\nnbest is zero: amb_nmatches = %d...%d",
- (*hit5)->amb_nmatches_end,(*hit3)->amb_nmatches_start));
- *unresolved_amb_nmatches = (*hit5)->amb_nmatches_end;
+ (*hit5)->end_amb_nmatches,(*hit3)->start_amb_nmatches));
+ *unresolved_amb_nmatches = (*hit5)->end_amb_nmatches;
} else if (nbest == 1) {
debug9(printf("\nnbest is 1, with nmismatches %d\n",best_nmismatches));
new5p = true; bingoi5 = besti5;
@@ -8838,8 +8965,8 @@ resolve_inside_ambiguous_splice_plus (int *unresolved_amb_nmatches, T *hit5, T *
#if 0
/* No longer holds for GMAP */
- assert((*hit5)->amb_nmatches_end == 0);
- assert((*hit3)->amb_nmatches_start > 0);
+ assert((*hit5)->end_amb_nmatches == 0);
+ assert((*hit3)->start_amb_nmatches > 0);
#endif
#ifdef USE_BINGO
@@ -8852,8 +8979,8 @@ resolve_inside_ambiguous_splice_plus (int *unresolved_amb_nmatches, T *hit5, T *
if (nbest == 0) {
debug9(printf("\nnbest is zero: amb_nmatches = %d...%d",
- (*hit5)->amb_nmatches_end,(*hit3)->amb_nmatches_start));
- *unresolved_amb_nmatches = (*hit3)->amb_nmatches_start;
+ (*hit5)->end_amb_nmatches,(*hit3)->start_amb_nmatches));
+ *unresolved_amb_nmatches = (*hit3)->start_amb_nmatches;
} else if (nbest == 1) {
debug9(printf("\nnbest is 1, with nmismatches %d\n",best_nmismatches));
new3p = true; bingoi3 = besti3;
@@ -8872,9 +8999,9 @@ resolve_inside_ambiguous_splice_plus (int *unresolved_amb_nmatches, T *hit5, T *
donor_splicecoord = Substring_splicecoord_D(shortexon);
/* donor_knowni = Substring_splicesites_knowni_D(shortexon); */
splice_pos = Substring_chimera_pos_D(shortexon);
- acceptor_splicecoord = (*hit5)->end_ambcoords[bingoi5];
- acceptor_knowni = (*hit5)->end_amb_knowni[bingoi5];
- nmismatches_shortend = (*hit5)->end_amb_nmismatches[bingoi5];
+ acceptor_splicecoord = (*hit5)->ambcoords_acceptor[bingoi5];
+ acceptor_knowni = (*hit5)->amb_knowni_acceptor[bingoi5];
+ nmismatches_shortend = (*hit5)->amb_nmismatches_acceptor[bingoi5];
segment_left = acceptor_splicecoord - splice_pos;
if ((acceptor = Substring_new_acceptor(acceptor_splicecoord,acceptor_knowni,splice_pos,nmismatches_shortend,
@@ -8885,16 +9012,30 @@ resolve_inside_ambiguous_splice_plus (int *unresolved_amb_nmatches, T *hit5, T *
debug9(printf("Resolved shortexon, End 1: Splice from donor %u to acceptor %u, with nmismatches %d\n",
donor_splicecoord,acceptor_splicecoord,nmismatches_shortend));
old = *hit5;
- *hit5 = Stage3end_new_shortexon(&ignore_found_score,/*donor*/(*hit5)->substringD,acceptor,shortexon,
- /*acceptor_distance*/(*hit5)->acceptor_distance,
- /*donor_distance*/acceptor_splicecoord - donor_splicecoord,
- (*hit5)->amb_nmatches_donor,/*amb_nmatches_acceptor*/0,
- /*ambcoords_left*/NULL,/*ambcoords_right*/NULL,
- /*amb_knowni_left*/NULL,/*amb_knowni_right*/NULL,
- /*amb_nmismatches_left*/NULL,/*amb_nmismatches_right*/NULL,
+#ifdef LARGE_GENOMES
+ ambcoords = Uint8list_from_array(old->ambcoords_donor,old->nambcoords_donor);
+#else
+ ambcoords = Uintlist_from_array(old->ambcoords_donor,old->nambcoords_donor);
+#endif
+ amb_knowni = Intlist_from_array(old->amb_knowni_donor,old->nambcoords_donor);
+ amb_nmismatches = Intlist_from_array(old->amb_nmismatches_donor,old->nambcoords_donor);
+
+ *hit5 = Stage3end_new_shortexon(&ignore_found_score,/*donor*/old->substringD,acceptor,shortexon,
+ old->amb_nmatches_donor,/*amb_nmatches_acceptor*/0,
+ ambcoords,/*ambcoords_acceptor*/NULL,
+ amb_knowni,/*amb_knowni_acceptor*/NULL,
+ amb_nmismatches,/*amb_nmismatches_acceptor*/NULL,
/*copy_donor_p*/true,/*copy_acceptor_p*/false,/*copy_shortexon_p*/true,
localsplicing_penalty,querylength5,/*sensedir*/SENSE_FORWARD,
/*sarrayp*/false);
+ Intlist_free(&amb_nmismatches);
+ Intlist_free(&amb_knowni);
+#ifdef LARGE_GENOMES
+ Uint8list_free(&ambcoords);
+#else
+ Uintlist_free(&ambcoords);
+#endif
+
if (*private5p == true) {
Stage3end_free(&old);
}
@@ -8908,9 +9049,9 @@ resolve_inside_ambiguous_splice_plus (int *unresolved_amb_nmatches, T *hit5, T *
acceptor_splicecoord = Substring_splicecoord_A(shortexon);
/* acceptor_knowni = Substring_splicesites_knowni_A(shortexon); */
splice_pos = Substring_chimera_pos_A(shortexon);
- donor_splicecoord = (*hit5)->end_ambcoords[bingoi5];
- donor_knowni = (*hit5)->end_amb_knowni[bingoi5];
- nmismatches_shortend = (*hit5)->end_amb_nmismatches[bingoi5];
+ donor_splicecoord = (*hit5)->ambcoords_donor[bingoi5];
+ donor_knowni = (*hit5)->amb_knowni_donor[bingoi5];
+ nmismatches_shortend = (*hit5)->amb_nmismatches_donor[bingoi5];
segment_left = donor_splicecoord - splice_pos;
if ((donor = Substring_new_donor(donor_splicecoord,donor_knowni,splice_pos,nmismatches_shortend,
@@ -8921,16 +9062,30 @@ resolve_inside_ambiguous_splice_plus (int *unresolved_amb_nmatches, T *hit5, T *
debug9(printf("Resolved shortexon, End 6: Splice from antiacceptor %u to antidonor %u, with nmismatches %d\n",
acceptor_splicecoord,donor_splicecoord,nmismatches_shortend));
old = *hit5;
- *hit5 = Stage3end_new_shortexon(&ignore_found_score,donor,/*acceptor*/(*hit5)->substringA,shortexon,
- /*acceptor_distance*/donor_splicecoord - acceptor_splicecoord,
- /*donor_distance*/(*hit5)->donor_distance,
- /*amb_nmatches_donor*/0,(*hit5)->amb_nmatches_acceptor,
- /*ambcoords_left*/NULL,/*ambcoords_right*/NULL,
- /*amb_knowni_left*/NULL,/*amb_knowni_right*/NULL,
- /*amb_nmismatches_left*/NULL,/*amb_nmismatches_right*/NULL,
+#ifdef LARGE_GENOMES
+ ambcoords = Uint8list_from_array(old->ambcoords_acceptor,old->nambcoords_acceptor);
+#else
+ ambcoords = Uintlist_from_array(old->ambcoords_acceptor,old->nambcoords_acceptor);
+#endif
+ amb_knowni = Intlist_from_array(old->amb_knowni_acceptor,old->nambcoords_acceptor);
+ amb_nmismatches = Intlist_from_array(old->amb_nmismatches_acceptor,old->nambcoords_acceptor);
+
+ *hit5 = Stage3end_new_shortexon(&ignore_found_score,donor,/*acceptor*/old->substringA,shortexon,
+ /*amb_nmatches_donor*/0,old->amb_nmatches_acceptor,
+ /*ambcoords_donor*/NULL,ambcoords,
+ /*amb_knowni_donor*/NULL,amb_knowni,
+ /*amb_nmismatches_donor*/NULL,amb_nmismatches,
/*copy_donor_p*/false,/*copy_acceptor_p*/true,/*copy_shortexon_p*/true,
localsplicing_penalty,querylength5,/*sensedir*/SENSE_ANTI,
/*sarrayp*/false);
+ Intlist_free(&amb_nmismatches);
+ Intlist_free(&amb_knowni);
+#ifdef LARGE_GENOMES
+ Uint8list_free(&ambcoords);
+#else
+ Uintlist_free(&ambcoords);
+#endif
+
if (*private5p == true) {
Stage3end_free(&old);
}
@@ -8951,9 +9106,9 @@ resolve_inside_ambiguous_splice_plus (int *unresolved_amb_nmatches, T *hit5, T *
donor_splicecoord = Substring_splicecoord(donor);
/* donor_knowni = Substring_splicesites_knowni(donor); */
splice_pos = Substring_chimera_pos(donor);
- acceptor_splicecoord = (*hit5)->end_ambcoords[bingoi5];
- acceptor_knowni = (*hit5)->end_amb_knowni[bingoi5];
- nmismatches_shortend = (*hit5)->end_amb_nmismatches[bingoi5];
+ acceptor_splicecoord = (*hit5)->ambcoords_acceptor[bingoi5];
+ acceptor_knowni = (*hit5)->amb_knowni_acceptor[bingoi5];
+ nmismatches_shortend = (*hit5)->amb_nmismatches_acceptor[bingoi5];
segment_left = acceptor_splicecoord - splice_pos;
if ((acceptor = Substring_new_acceptor(acceptor_splicecoord,acceptor_knowni,splice_pos,nmismatches_shortend,
@@ -8966,10 +9121,10 @@ resolve_inside_ambiguous_splice_plus (int *unresolved_amb_nmatches, T *hit5, T *
old = *hit5;
*hit5 = Stage3end_new_splice(&ignore_found_score,Substring_nmismatches_whole(donor),/*nmismatches_acceptor*/nmismatches_shortend,
donor,acceptor,/*distance*/acceptor_splicecoord - donor_splicecoord,
- /*shortdistancep*/true,localsplicing_penalty,querylength5,
- /*amb_nmatches*/0,/*ambcoords_left*/NULL,/*ambcoords_right*/NULL,
- /*amb_knowni_left*/NULL,/*amb_knowni_right*/NULL,
- /*amb_nmismatches_left*/NULL,/*amb_nmismatches_right*/NULL,
+ /*shortdistancep*/true,localsplicing_penalty,querylength5,/*amb_nmatches*/0,
+ /*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
+ /*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
+ /*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
/*copy_donor_p*/true,/*copy_acceptor_p*/false,/*first_read_p*/true,/*sensedir*/SENSE_FORWARD,
/*sarrayp*/false);
if (*private5p == true) {
@@ -8986,9 +9141,9 @@ resolve_inside_ambiguous_splice_plus (int *unresolved_amb_nmatches, T *hit5, T *
acceptor_splicecoord = Substring_splicecoord(acceptor);
/* acceptor_knowni = Substring_splicesites_knowni(acceptor); */
splice_pos = Substring_chimera_pos(acceptor);
- donor_splicecoord = (*hit5)->end_ambcoords[bingoi5];
- donor_knowni = (*hit5)->end_amb_knowni[bingoi5];
- nmismatches_shortend = (*hit5)->end_amb_nmismatches[bingoi5];
+ donor_splicecoord = (*hit5)->ambcoords_donor[bingoi5];
+ donor_knowni = (*hit5)->amb_knowni_donor[bingoi5];
+ nmismatches_shortend = (*hit5)->amb_nmismatches_donor[bingoi5];
segment_left = donor_splicecoord - splice_pos;
if ((donor = Substring_new_donor(donor_splicecoord,donor_knowni,splice_pos,nmismatches_shortend,
@@ -9001,10 +9156,10 @@ resolve_inside_ambiguous_splice_plus (int *unresolved_amb_nmatches, T *hit5, T *
old = *hit5;
*hit5 = Stage3end_new_splice(&ignore_found_score,/*nmismatches_donor*/nmismatches_shortend,Substring_nmismatches_whole(acceptor),
donor,acceptor,/*distance*/donor_splicecoord - acceptor_splicecoord,
- /*shortdistancep*/true,localsplicing_penalty,querylength5,
- /*amb_nmatches*/0,/*ambcoords_left*/NULL,/*ambcoords_right*/NULL,
- /*amb_knowni_left*/NULL,/*amb_knowni_right*/NULL,
- /*amb_nmismatches_left*/NULL,/*amb_nmismatches_right*/NULL,
+ /*shortdistancep*/true,localsplicing_penalty,querylength5,/*amb_nmatches*/0,
+ /*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
+ /*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
+ /*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
/*copy_donor_p*/false,/*copy_acceptor_p*/true,/*first_read_p*/true,/*sensedir*/SENSE_ANTI,
/*sarrayp*/false);
if (*private5p == true) {
@@ -9030,9 +9185,9 @@ resolve_inside_ambiguous_splice_plus (int *unresolved_amb_nmatches, T *hit5, T *
donor_splicecoord = Substring_splicecoord_D(shortexon);
/* donor_knowni = Substring_splicesites_knowni_D(shortexon); */
splice_pos = Substring_chimera_pos_D(shortexon);
- acceptor_splicecoord = (*hit3)->start_ambcoords[bingoi3];
- acceptor_knowni = (*hit3)->start_amb_knowni[bingoi3];
- nmismatches_shortend = (*hit3)->start_amb_nmismatches[bingoi3];
+ acceptor_splicecoord = (*hit3)->ambcoords_acceptor[bingoi3];
+ acceptor_knowni = (*hit3)->amb_knowni_acceptor[bingoi3];
+ nmismatches_shortend = (*hit3)->amb_nmismatches_acceptor[bingoi3];
segment_left = acceptor_splicecoord - splice_pos;
if ((acceptor = Substring_new_acceptor(acceptor_splicecoord,acceptor_knowni,splice_pos,nmismatches_shortend,
@@ -9043,16 +9198,30 @@ resolve_inside_ambiguous_splice_plus (int *unresolved_amb_nmatches, T *hit5, T *
debug9(printf("Resolved shortexonr, End 5: Splice from antidonor #%d to antiacceptor #%d, with nmismatches %d\n",
donor_splicecoord,acceptor_splicecoord,nmismatches_shortend));
old = *hit3;
- *hit3 = Stage3end_new_shortexon(&ignore_found_score,/*donor*/(*hit3)->substringD,acceptor,shortexon,
- /*acceptor_distance*/(*hit3)->acceptor_distance,
- /*donor_distance*/donor_splicecoord - acceptor_splicecoord,
- (*hit3)->amb_nmatches_donor,/*amb_nmatches_acceptor*/0,
- /*ambcoords_left*/NULL,/*ambcoords_right*/NULL,
- /*amb_knowni_left*/NULL,/*amb_knowni_right*/NULL,
- /*amb_nmismatches_left*/NULL,/*amb_nmismatches_right*/NULL,
+#ifdef LARGE_GENOMES
+ ambcoords = Uint8list_from_array(old->ambcoords_donor,old->nambcoords_donor);
+#else
+ ambcoords = Uintlist_from_array(old->ambcoords_donor,old->nambcoords_donor);
+#endif
+ amb_knowni = Intlist_from_array(old->amb_knowni_donor,old->nambcoords_donor);
+ amb_nmismatches = Intlist_from_array(old->amb_nmismatches_donor,old->nambcoords_donor);
+
+ *hit3 = Stage3end_new_shortexon(&ignore_found_score,/*donor*/old->substringD,acceptor,shortexon,
+ old->amb_nmatches_donor,/*amb_nmatches_acceptor*/0,
+ ambcoords,/*ambcoords_acceptor*/NULL,
+ amb_knowni,/*amb_knowni_acceptor*/NULL,
+ amb_nmismatches,/*amb_nmismatches_acceptor*/NULL,
/*copy_donor_p*/true,/*copy_acceptor_p*/false,/*copy_shortexon_p*/true,
localsplicing_penalty,querylength3,/*sensedir*/SENSE_ANTI,
/*sarrayp*/false);
+ Intlist_free(&amb_nmismatches);
+ Intlist_free(&amb_knowni);
+#ifdef LARGE_GENOMES
+ Uint8list_free(&ambcoords);
+#else
+ Uintlist_free(&ambcoords);
+#endif
+
if (*private3p == true) {
Stage3end_free(&old);
}
@@ -9066,9 +9235,9 @@ resolve_inside_ambiguous_splice_plus (int *unresolved_amb_nmatches, T *hit5, T *
acceptor_splicecoord = Substring_splicecoord_A(shortexon);
/* acceptor_knowni = Substring_splicesites_knowni_A(shortexon); */
splice_pos = Substring_chimera_pos_A(shortexon);
- donor_splicecoord = (*hit3)->start_ambcoords[bingoi3];
- donor_knowni = (*hit3)->start_amb_knowni[bingoi3];
- nmismatches_shortend = (*hit3)->start_amb_nmismatches[bingoi3];
+ donor_splicecoord = (*hit3)->ambcoords_donor[bingoi3];
+ donor_knowni = (*hit3)->amb_knowni_donor[bingoi3];
+ nmismatches_shortend = (*hit3)->amb_nmismatches_donor[bingoi3];
segment_left = donor_splicecoord - splice_pos;
if ((donor = Substring_new_donor(donor_splicecoord,donor_knowni,splice_pos,nmismatches_shortend,
@@ -9079,16 +9248,30 @@ resolve_inside_ambiguous_splice_plus (int *unresolved_amb_nmatches, T *hit5, T *
debug9(printf("Resolved shortexon, End 2: Splice from acceptor #%d to donor #%d, with nmismatches %d\n",
acceptor_splicecoord,donor_splicecoord,nmismatches_shortend));
old = *hit3;
- *hit3 = Stage3end_new_shortexon(&ignore_found_score,donor,/*acceptor*/(*hit3)->substringA,shortexon,
- /*acceptor_distance*/acceptor_splicecoord - donor_splicecoord,
- /*donor_distance*/(*hit3)->donor_distance,
- /*amb_nmatches_donor*/0,(*hit3)->amb_nmatches_acceptor,
- /*ambcoords_left*/NULL,/*ambcoords_right*/NULL,
- /*amb_knowni_left*/NULL,/*amb_knowni_right*/NULL,
- /*amb_nmismatches_left*/NULL,/*amb_nmismatches_right*/NULL,
+#ifdef LARGE_GENOMES
+ ambcoords = Uint8list_from_array(old->ambcoords_acceptor,old->nambcoords_acceptor);
+#else
+ ambcoords = Uintlist_from_array(old->ambcoords_acceptor,old->nambcoords_acceptor);
+#endif
+ amb_knowni = Intlist_from_array(old->amb_knowni_acceptor,old->nambcoords_acceptor);
+ amb_nmismatches = Intlist_from_array(old->amb_nmismatches_acceptor,old->nambcoords_acceptor);
+
+ *hit3 = Stage3end_new_shortexon(&ignore_found_score,donor,/*acceptor*/old->substringA,shortexon,
+ /*amb_nmatches_donor*/0,old->amb_nmatches_acceptor,
+ /*ambcoords_donor*/NULL,ambcoords,
+ /*amb_knowni_donor*/NULL,amb_knowni,
+ /*amb_nmismatches_donor*/NULL,amb_nmismatches,
/*copy_donor_p*/false,/*copy_acceptor_p*/true,/*copy_shortexon_p*/true,
localsplicing_penalty,querylength3,/*sensedir*/SENSE_FORWARD,
/*sarrayp*/false);
+ Intlist_free(&amb_nmismatches);
+ Intlist_free(&amb_knowni);
+#ifdef LARGE_GENOMES
+ Uint8list_free(&ambcoords);
+#else
+ Uintlist_free(&ambcoords);
+#endif
+
if (*private3p == true) {
Stage3end_free(&old);
}
@@ -9109,9 +9292,9 @@ resolve_inside_ambiguous_splice_plus (int *unresolved_amb_nmatches, T *hit5, T *
donor_splicecoord = Substring_splicecoord(donor);
/* donor_knowni = Substring_splicesites_knowni(donor); */
splice_pos = Substring_chimera_pos(donor);
- acceptor_splicecoord = (*hit3)->start_ambcoords[bingoi3];
- acceptor_knowni = (*hit3)->start_amb_knowni[bingoi3];
- nmismatches_shortend = (*hit3)->start_amb_nmismatches[bingoi3];
+ acceptor_splicecoord = (*hit3)->ambcoords_acceptor[bingoi3];
+ acceptor_knowni = (*hit3)->amb_knowni_acceptor[bingoi3];
+ nmismatches_shortend = (*hit3)->amb_nmismatches_acceptor[bingoi3];
segment_left = acceptor_splicecoord - splice_pos;
if ((acceptor = Substring_new_acceptor(acceptor_splicecoord,acceptor_knowni,splice_pos,nmismatches_shortend,
@@ -9124,10 +9307,10 @@ resolve_inside_ambiguous_splice_plus (int *unresolved_amb_nmatches, T *hit5, T *
old = *hit3;
*hit3 = Stage3end_new_splice(&ignore_found_score,Substring_nmismatches_whole(donor),/*nmismatches_acceptor*/nmismatches_shortend,
donor,acceptor,/*distance*/donor_splicecoord - acceptor_splicecoord,
- /*shortdistancep*/true,localsplicing_penalty,querylength3,
- /*amb_nmatches*/0,/*ambcoords_left*/NULL,/*ambcoords_right*/NULL,
- /*amb_knowni_left*/NULL,/*amb_knowni_right*/NULL,
- /*amb_nmismatches_left*/NULL,/*amb_nmismatches_right*/NULL,
+ /*shortdistancep*/true,localsplicing_penalty,querylength3,/*amb_nmatches*/0,
+ /*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
+ /*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
+ /*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
/*copy_donor_p*/true,/*copy_acceptor_p*/false,/*first_read_p*/false,
/*sensedir*/SENSE_ANTI,/*sarrayp*/false);
if (*private3p == true) {
@@ -9144,9 +9327,9 @@ resolve_inside_ambiguous_splice_plus (int *unresolved_amb_nmatches, T *hit5, T *
acceptor_splicecoord = Substring_splicecoord(acceptor);
/* acceptor_knowni = Substring_splicesites_knowni(acceptor); */
splice_pos = Substring_chimera_pos(acceptor);
- donor_splicecoord = (*hit3)->start_ambcoords[bingoi3];
- donor_knowni = (*hit3)->start_amb_knowni[bingoi3];
- nmismatches_shortend = (*hit3)->start_amb_nmismatches[bingoi3];
+ donor_splicecoord = (*hit3)->ambcoords_donor[bingoi3];
+ donor_knowni = (*hit3)->amb_knowni_donor[bingoi3];
+ nmismatches_shortend = (*hit3)->amb_nmismatches_donor[bingoi3];
segment_left = donor_splicecoord - splice_pos;
if ((donor = Substring_new_donor(donor_splicecoord,donor_knowni,splice_pos,nmismatches_shortend,
@@ -9160,10 +9343,10 @@ resolve_inside_ambiguous_splice_plus (int *unresolved_amb_nmatches, T *hit5, T *
old = *hit3;
*hit3 = Stage3end_new_splice(&ignore_found_score,/*nmismatches_donor*/nmismatches_shortend,Substring_nmismatches_whole(acceptor),
donor,acceptor,/*distance*/acceptor_splicecoord - donor_splicecoord,
- /*shortdistancep*/true,localsplicing_penalty,querylength3,
- /*amb_nmatches*/0,/*ambcoords_left*/NULL,/*ambcoords_right*/NULL,
- /*amb_knowni_left*/NULL,/*amb_knowni_right*/NULL,
- /*amb_nmismatches_left*/NULL,/*amb_nmismatches_right*/NULL,
+ /*shortdistancep*/true,localsplicing_penalty,querylength3,/*amb_nmatches*/0,
+ /*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
+ /*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
+ /*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
/*copy_donor_p*/false,/*copy_acceptor_p*/true,/*first_read_p*/false,
/*sensedir*/SENSE_FORWARD,/*sarrayp*/false);
if (*private3p == true) {
@@ -9204,6 +9387,14 @@ resolve_inside_ambiguous_splice_minus (int *unresolved_amb_nmatches, T *hit5, T
int splice_pos;
int ignore_found_score = 0;
+#ifdef LARGE_GENOMES
+ Uint8list_T ambcoords;
+#else
+ Uintlist_T ambcoords;
+#endif
+ Intlist_T amb_knowni, amb_nmismatches;
+
+
*unresolved_amb_nmatches = 0;
debug9(printf("resolve minus: hit5 %s ambiguous %d,%d and hit3 %s ambiguous %d,%d\n",
@@ -9258,8 +9449,8 @@ resolve_inside_ambiguous_splice_minus (int *unresolved_amb_nmatches, T *hit5, T
#if 0
/* No longer holds for GMAP */
- assert((*hit5)->amb_nmatches_end > 0);
- assert((*hit3)->amb_nmatches_start > 0);
+ assert((*hit5)->end_amb_nmatches > 0);
+ assert((*hit3)->start_amb_nmatches > 0);
#endif
#ifdef USE_BINGO
@@ -9272,8 +9463,8 @@ resolve_inside_ambiguous_splice_minus (int *unresolved_amb_nmatches, T *hit5, T
if (nbest == 0) {
debug9(printf("\nnbest is zero: amb_nmatches = %d...%d",
- (*hit5)->amb_nmatches_end,(*hit3)->amb_nmatches_start));
- *unresolved_amb_nmatches = (*hit5)->amb_nmatches_end + (*hit3)->amb_nmatches_start;
+ (*hit5)->end_amb_nmatches,(*hit3)->start_amb_nmatches));
+ *unresolved_amb_nmatches = (*hit5)->end_amb_nmatches + (*hit3)->start_amb_nmatches;
} else if (nbest == 1) {
debug9(printf("\nnbest is 1, with nmismatches %d\n",best_nmismatches));
new5p = true; new3p = true; bingoi5 = besti5; bingoi3 = besti3;
@@ -9322,8 +9513,8 @@ resolve_inside_ambiguous_splice_minus (int *unresolved_amb_nmatches, T *hit5, T
#if 0
/* No longer holds for GMAP */
- assert((*hit5)->amb_nmatches_end > 0);
- assert((*hit3)->amb_nmatches_start == 0);
+ assert((*hit5)->end_amb_nmatches > 0);
+ assert((*hit3)->start_amb_nmatches == 0);
#endif
#ifdef USE_BINGO
@@ -9336,8 +9527,8 @@ resolve_inside_ambiguous_splice_minus (int *unresolved_amb_nmatches, T *hit5, T
if (nbest == 0) {
debug9(printf("\nnbest is zero: amb_nmatches = %d...%d",
- (*hit5)->amb_nmatches_end,(*hit3)->amb_nmatches_start));
- *unresolved_amb_nmatches = (*hit5)->amb_nmatches_end;
+ (*hit5)->end_amb_nmatches,(*hit3)->start_amb_nmatches));
+ *unresolved_amb_nmatches = (*hit5)->end_amb_nmatches;
} else if (nbest == 1) {
debug9(printf("\nnbest is 1, with nmismatches %d\n",best_nmismatches));
new5p = true; bingoi5 = besti5;
@@ -9385,8 +9576,8 @@ resolve_inside_ambiguous_splice_minus (int *unresolved_amb_nmatches, T *hit5, T
#if 0
/* No longer holds for GMAP */
- assert((*hit5)->amb_nmatches_end == 0);
- assert((*hit3)->amb_nmatches_start > 0);
+ assert((*hit5)->end_amb_nmatches == 0);
+ assert((*hit3)->start_amb_nmatches > 0);
#endif
#ifdef USE_BINGO
@@ -9399,8 +9590,8 @@ resolve_inside_ambiguous_splice_minus (int *unresolved_amb_nmatches, T *hit5, T
if (nbest == 0) {
debug9(printf("\nnbest is zero: amb_nmatches = %d...%d",
- (*hit5)->amb_nmatches_end,(*hit3)->amb_nmatches_start));
- *unresolved_amb_nmatches = (*hit3)->amb_nmatches_start;
+ (*hit5)->end_amb_nmatches,(*hit3)->start_amb_nmatches));
+ *unresolved_amb_nmatches = (*hit3)->start_amb_nmatches;
} else if (nbest == 1) {
debug9(printf("\nnbest is 1, with nmismatches %d\n",best_nmismatches));
new3p = true; bingoi3 = besti3;
@@ -9419,9 +9610,9 @@ resolve_inside_ambiguous_splice_minus (int *unresolved_amb_nmatches, T *hit5, T
donor_splicecoord = Substring_splicecoord_D(shortexon);
/* donor_knowni = Substring_splicesites_knowni_D(shortexon); */
splice_pos = Substring_chimera_pos_D(shortexon);
- acceptor_splicecoord = (*hit5)->end_ambcoords[bingoi5];
- acceptor_knowni = (*hit5)->end_amb_knowni[bingoi5];
- nmismatches_shortend = (*hit5)->end_amb_nmismatches[bingoi5];
+ acceptor_splicecoord = (*hit5)->ambcoords_acceptor[bingoi5];
+ acceptor_knowni = (*hit5)->amb_knowni_acceptor[bingoi5];
+ nmismatches_shortend = (*hit5)->amb_nmismatches_acceptor[bingoi5];
segment_left = acceptor_splicecoord - (querylength5 - splice_pos);
if ((acceptor = Substring_new_acceptor(acceptor_splicecoord,acceptor_knowni,
@@ -9433,16 +9624,30 @@ resolve_inside_ambiguous_splice_minus (int *unresolved_amb_nmatches, T *hit5, T
debug9(printf("Resolved shortexon, End 3: Splice from donor #%d to acceptor #%d, with nmismatches %d\n",
donor_splicecoord,acceptor_splicecoord,nmismatches_shortend));
old = *hit5;
- *hit5 = Stage3end_new_shortexon(&ignore_found_score,/*donor*/(*hit5)->substringD,acceptor,shortexon,
- /*acceptor_distance*/(*hit5)->acceptor_distance,
- /*donor_distance*/donor_splicecoord - acceptor_splicecoord,
- (*hit5)->amb_nmatches_donor,/*amb_nmatches_acceptor*/0,
- /*ambcoords_left*/NULL,/*ambcoords_right*/NULL,
- /*amb_knowni_left*/NULL,/*amb_knowni_right*/NULL,
- /*amb_nmismatches_left*/NULL,/*amb_nmismatches_right*/NULL,
+#ifdef LARGE_GENOMES
+ ambcoords = Uint8list_from_array(old->ambcoords_donor,old->nambcoords_donor);
+#else
+ ambcoords = Uintlist_from_array(old->ambcoords_donor,old->nambcoords_donor);
+#endif
+ amb_knowni = Intlist_from_array(old->amb_knowni_donor,old->nambcoords_donor);
+ amb_nmismatches = Intlist_from_array(old->amb_nmismatches_donor,old->nambcoords_donor);
+
+ *hit5 = Stage3end_new_shortexon(&ignore_found_score,/*donor*/old->substringD,acceptor,shortexon,
+ old->amb_nmatches_donor,/*amb_nmatches_acceptor*/0,
+ ambcoords,/*ambcoords_acceptor*/NULL,
+ amb_knowni,/*amb_knowni_acceptor*/NULL,
+ amb_nmismatches,/*amb_nmismatches_acceptor*/NULL,
/*copy_donor_p*/true,/*copy_acceptor_p*/false,/*copy_shortexon_p*/true,
localsplicing_penalty,querylength5,/*sensedir*/SENSE_FORWARD,
/*sarrayp*/false);
+ Intlist_free(&amb_nmismatches);
+ Intlist_free(&amb_knowni);
+#ifdef LARGE_GENOMES
+ Uint8list_free(&ambcoords);
+#else
+ Uintlist_free(&ambcoords);
+#endif
+
if (*private5p == true) {
Stage3end_free(&old);
}
@@ -9456,9 +9661,9 @@ resolve_inside_ambiguous_splice_minus (int *unresolved_amb_nmatches, T *hit5, T
acceptor_splicecoord = Substring_splicecoord_A(shortexon);
/* acceptor_knowni = Substring_splicesites_knowni_A(shortexon); */
splice_pos = Substring_chimera_pos_A(shortexon);
- donor_splicecoord = (*hit5)->end_ambcoords[bingoi5];
- donor_knowni = (*hit5)->end_amb_knowni[bingoi5];
- nmismatches_shortend = (*hit5)->end_amb_nmismatches[bingoi5];
+ donor_splicecoord = (*hit5)->ambcoords_donor[bingoi5];
+ donor_knowni = (*hit5)->amb_knowni_donor[bingoi5];
+ nmismatches_shortend = (*hit5)->amb_nmismatches_donor[bingoi5];
segment_left = donor_splicecoord - (querylength5 - splice_pos);
if ((donor = Substring_new_donor(donor_splicecoord,donor_knowni,querylength5 - splice_pos,nmismatches_shortend,
@@ -9469,16 +9674,30 @@ resolve_inside_ambiguous_splice_minus (int *unresolved_amb_nmatches, T *hit5, T
debug9(printf("Resolved shortexon, End 8: Splice from antiacceptor #%d to antidonor #%d, with nmismatches_shortend %d\n",
acceptor_splicecoord,donor_splicecoord,nmismatches_shortend));
old = *hit5;
- *hit5 = Stage3end_new_shortexon(&ignore_found_score,donor,/*acceptor*/(*hit5)->substringA,shortexon,
- /*acceptor_distance*/acceptor_splicecoord - donor_splicecoord,
- /*donor_distance*/(*hit5)->donor_distance,
- /*amb_nmatches_donor*/0,(*hit5)->amb_nmatches_acceptor,
- /*ambcoords_left*/NULL,/*ambcoords_right*/NULL,
- /*amb_knowni_left*/NULL,/*amb_knowni_right*/NULL,
- /*amb_nmismatches_left*/NULL,/*amb_nmismatches_right*/NULL,
+#ifdef LARGE_GENOMES
+ ambcoords = Uint8list_from_array(old->ambcoords_acceptor,old->nambcoords_acceptor);
+#else
+ ambcoords = Uintlist_from_array(old->ambcoords_acceptor,old->nambcoords_acceptor);
+#endif
+ amb_knowni = Intlist_from_array(old->amb_knowni_acceptor,old->nambcoords_acceptor);
+ amb_nmismatches = Intlist_from_array(old->amb_nmismatches_acceptor,old->nambcoords_acceptor);
+
+ *hit5 = Stage3end_new_shortexon(&ignore_found_score,donor,/*acceptor*/old->substringA,shortexon,
+ /*amb_nmatches_donor*/0,old->amb_nmatches_acceptor,
+ /*ambcoords_donor*/NULL,ambcoords,
+ /*amb_knowni_donor*/NULL,amb_knowni,
+ /*amb_nmismatches_donor*/NULL,amb_nmismatches,
/*copy_donor_p*/false,/*copy_acceptor_p*/true,/*copy_shortexon_p*/true,
localsplicing_penalty,querylength5,/*sensedir*/SENSE_ANTI,
/*sarrayp*/false);
+ Intlist_free(&amb_nmismatches);
+ Intlist_free(&amb_knowni);
+#ifdef LARGE_GENOMES
+ Uint8list_free(&ambcoords);
+#else
+ Uintlist_free(&ambcoords);
+#endif
+
if (*private5p == true) {
Stage3end_free(&old);
}
@@ -9498,9 +9717,9 @@ resolve_inside_ambiguous_splice_minus (int *unresolved_amb_nmatches, T *hit5, T
donor_splicecoord = Substring_splicecoord(donor);
/* donor_knowni = Substring_splicesites_knowni(donor); */
splice_pos = Substring_chimera_pos(donor);
- acceptor_splicecoord = (*hit5)->end_ambcoords[bingoi5];
- acceptor_knowni = (*hit5)->end_amb_knowni[bingoi5];
- nmismatches_shortend = (*hit5)->end_amb_nmismatches[bingoi5];
+ acceptor_splicecoord = (*hit5)->ambcoords_acceptor[bingoi5];
+ acceptor_knowni = (*hit5)->amb_knowni_acceptor[bingoi5];
+ nmismatches_shortend = (*hit5)->amb_nmismatches_acceptor[bingoi5];
segment_left = acceptor_splicecoord - (querylength5 - splice_pos);
if ((acceptor = Substring_new_acceptor(acceptor_splicecoord,acceptor_knowni,querylength5 - splice_pos,nmismatches_shortend,
@@ -9513,10 +9732,10 @@ resolve_inside_ambiguous_splice_minus (int *unresolved_amb_nmatches, T *hit5, T
old = *hit5;
*hit5 = Stage3end_new_splice(&ignore_found_score,Substring_nmismatches_whole(donor),/*nmismatches_acceptor*/nmismatches_shortend,
donor,acceptor,/*distance*/donor_splicecoord - acceptor_splicecoord,
- /*shortdistancep*/true,localsplicing_penalty,querylength5,
- /*amb_nmatches*/0,/*ambcoords_left*/NULL,/*ambcoords_right*/NULL,
- /*amb_knowni_left*/NULL,/*amb_knowni_right*/NULL,
- /*amb_nmismatches_left*/NULL,/*amb_nmismatches_right*/NULL,
+ /*shortdistancep*/true,localsplicing_penalty,querylength5,/*amb_nmatches*/0,
+ /*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
+ /*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
+ /*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
/*copy_donor_p*/true,/*copy_acceptor_p*/false,/*first_read_p*/true,
/*sensedir*/SENSE_FORWARD,/*sarrayp*/false);
if (*private5p == true) {
@@ -9533,9 +9752,9 @@ resolve_inside_ambiguous_splice_minus (int *unresolved_amb_nmatches, T *hit5, T
acceptor_splicecoord = Substring_splicecoord(acceptor);
/* acceptor_knowni = Substring_splicesites_knowni(acceptor); */
splice_pos = Substring_chimera_pos(acceptor);
- donor_splicecoord = (*hit5)->end_ambcoords[bingoi5];
- donor_knowni = (*hit5)->end_amb_knowni[bingoi5];
- nmismatches_shortend = (*hit5)->end_amb_nmismatches[bingoi5];
+ donor_splicecoord = (*hit5)->ambcoords_donor[bingoi5];
+ donor_knowni = (*hit5)->amb_knowni_donor[bingoi5];
+ nmismatches_shortend = (*hit5)->amb_nmismatches_donor[bingoi5];
segment_left = donor_splicecoord - (querylength5 - splice_pos);
if ((donor = Substring_new_donor(donor_splicecoord,donor_knowni,querylength5 - splice_pos,nmismatches_shortend,
@@ -9548,10 +9767,10 @@ resolve_inside_ambiguous_splice_minus (int *unresolved_amb_nmatches, T *hit5, T
old = *hit5;
*hit5 = Stage3end_new_splice(&ignore_found_score,/*nmismatches_donor*/nmismatches_shortend,Substring_nmismatches_whole(acceptor),
donor,acceptor,/*distance*/acceptor_splicecoord - donor_splicecoord,
- /*shortdistancep*/true,localsplicing_penalty,querylength5,
- /*amb_nmatches*/0,/*ambcoords_left*/NULL,/*ambcoords_right*/NULL,
- /*amb_knowni_left*/NULL,/*amb_knowni_right*/NULL,
- /*amb_nmismatches_left*/NULL,/*amb_nmismatches_right*/NULL,
+ /*shortdistancep*/true,localsplicing_penalty,querylength5,/*amb_nmatches*/0,
+ /*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
+ /*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
+ /*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
/*copy_donor_p*/false,/*copy_acceptor_p*/true,/*first_read_p*/true,
/*sensedir*/SENSE_ANTI,/*sarrayp*/false);
if (*private5p == true) {
@@ -9577,9 +9796,9 @@ resolve_inside_ambiguous_splice_minus (int *unresolved_amb_nmatches, T *hit5, T
donor_splicecoord = Substring_splicecoord_D(shortexon);
/* donor_knowni = Substring_splicesites_knowni_D(shortexon); */
splice_pos = Substring_chimera_pos_D(shortexon);
- acceptor_splicecoord = (*hit3)->start_ambcoords[bingoi3];
- acceptor_knowni = (*hit3)->start_amb_knowni[bingoi3];
- nmismatches_shortend = (*hit3)->start_amb_nmismatches[bingoi3];
+ acceptor_splicecoord = (*hit3)->ambcoords_acceptor[bingoi3];
+ acceptor_knowni = (*hit3)->amb_knowni_acceptor[bingoi3];
+ nmismatches_shortend = (*hit3)->amb_nmismatches_acceptor[bingoi3];
segment_left = acceptor_splicecoord - (querylength3 - splice_pos);
if ((acceptor = Substring_new_acceptor(acceptor_splicecoord,acceptor_knowni,querylength3 - splice_pos,nmismatches_shortend,
@@ -9590,16 +9809,30 @@ resolve_inside_ambiguous_splice_minus (int *unresolved_amb_nmatches, T *hit5, T
debug9(printf("Resolved shortexon, End 7: Splice from antidonor #%d to antiacceptor #%d, with nmismatches %d\n",
donor_splicecoord,acceptor_splicecoord,nmismatches_shortend));
old = *hit3;
- *hit3 = Stage3end_new_shortexon(&ignore_found_score,/*donor*/(*hit3)->substringD,acceptor,shortexon,
- /*acceptor_distance*/(*hit3)->acceptor_distance,
- /*donor_distance*/acceptor_splicecoord - donor_splicecoord,
- (*hit3)->amb_nmatches_donor,/*amb_nmatches_acceptor*/0,
- /*ambcoords_left*/NULL,/*ambcoords_right*/NULL,
- /*amb_knowni_left*/NULL,/*amb_knowni_right*/NULL,
- /*amb_nmismatches_left*/NULL,/*amb_nmismatches_right*/NULL,
+#ifdef LARGE_GENOMES
+ ambcoords = Uint8list_from_array(old->ambcoords_donor,old->nambcoords_donor);
+#else
+ ambcoords = Uintlist_from_array(old->ambcoords_donor,old->nambcoords_donor);
+#endif
+ amb_knowni = Intlist_from_array(old->amb_knowni_donor,old->nambcoords_donor);
+ amb_nmismatches = Intlist_from_array(old->amb_nmismatches_donor,old->nambcoords_donor);
+
+ *hit3 = Stage3end_new_shortexon(&ignore_found_score,/*donor*/old->substringD,acceptor,shortexon,
+ old->amb_nmatches_donor,/*amb_nmatches_acceptor*/0,
+ ambcoords,/*ambcoords_acceptor*/NULL,
+ amb_knowni,/*amb_knowni_acceptor*/NULL,
+ amb_nmismatches,/*amb_nmismatches_acceptor*/NULL,
/*copy_donor_p*/true,/*copy_acceptor_p*/false,/*copy_shortexon_p*/true,
localsplicing_penalty,querylength3,/*sensedir*/SENSE_ANTI,
/*sarrayp*/false);
+ Intlist_free(&amb_nmismatches);
+ Intlist_free(&amb_knowni);
+#ifdef LARGE_GENOMES
+ Uint8list_free(&ambcoords);
+#else
+ Uintlist_free(&ambcoords);
+#endif
+
if (*private3p == true) {
Stage3end_free(&old);
}
@@ -9613,9 +9846,9 @@ resolve_inside_ambiguous_splice_minus (int *unresolved_amb_nmatches, T *hit5, T
acceptor_splicecoord = Substring_splicecoord_A(shortexon);
/* acceptor_knowni = Substring_splicesites_knowni_A(shortexon); */
splice_pos = Substring_chimera_pos_A(shortexon);
- donor_splicecoord = (*hit3)->start_ambcoords[bingoi3];
- donor_knowni = (*hit3)->start_amb_knowni[bingoi3];
- nmismatches_shortend = (*hit3)->start_amb_nmismatches[bingoi3];
+ donor_splicecoord = (*hit3)->ambcoords_donor[bingoi3];
+ donor_knowni = (*hit3)->amb_knowni_donor[bingoi3];
+ nmismatches_shortend = (*hit3)->amb_nmismatches_donor[bingoi3];
segment_left = donor_splicecoord - (querylength3 - splice_pos);
if ((donor = Substring_new_donor(donor_splicecoord,donor_knowni,querylength3 - splice_pos,nmismatches_shortend,
@@ -9626,16 +9859,30 @@ resolve_inside_ambiguous_splice_minus (int *unresolved_amb_nmatches, T *hit5, T
debug9(printf("Resolved halfsplice_acceptor, End 4: Splice from acceptor #%d to #%d, with nmismatches %d\n",
acceptor_splicecoord,donor_splicecoord,nmismatches_shortend));
old = *hit3;
- *hit3 = Stage3end_new_shortexon(&ignore_found_score,donor,/*acceptor*/(*hit3)->substringA,shortexon,
- /*acceptor_distance*/donor_splicecoord - acceptor_splicecoord,
- /*donor_distance*/(*hit3)->donor_distance,
- /*amb_nmatches_donor*/0,(*hit3)->amb_nmatches_acceptor,
- /*ambcoords_left*/NULL,/*ambcoords_right*/NULL,
- /*amb_knowni_left*/NULL,/*amb_knowni_right*/NULL,
- /*amb_nmismatches_left*/NULL,/*amb_nmismatches_right*/NULL,
+#ifdef LARGE_GENOMES
+ ambcoords = Uint8list_from_array(old->ambcoords_acceptor,old->nambcoords_acceptor);
+#else
+ ambcoords = Uintlist_from_array(old->ambcoords_acceptor,old->nambcoords_acceptor);
+#endif
+ amb_knowni = Intlist_from_array(old->amb_knowni_acceptor,old->nambcoords_acceptor);
+ amb_nmismatches = Intlist_from_array(old->amb_nmismatches_acceptor,old->nambcoords_acceptor);
+
+ *hit3 = Stage3end_new_shortexon(&ignore_found_score,donor,/*acceptor*/old->substringA,shortexon,
+ /*amb_nmatches_donor*/0,old->amb_nmatches_acceptor,
+ /*ambcoords_donor*/NULL,ambcoords,
+ /*amb_knowni_donor*/NULL,amb_knowni,
+ /*amb_nmismatches_donor*/NULL,amb_nmismatches,
/*copy_donor_p*/false,/*copy_acceptor_p*/true,/*copy_shortexon_p*/true,
localsplicing_penalty,querylength3,/*sensedir*/SENSE_FORWARD,
/*sarrayp*/false);
+ Intlist_free(&amb_nmismatches);
+ Intlist_free(&amb_knowni);
+#ifdef LARGE_GENOMES
+ Uint8list_free(&ambcoords);
+#else
+ Uintlist_free(&ambcoords);
+#endif
+
if (*private3p == true) {
Stage3end_free(&old);
}
@@ -9655,9 +9902,9 @@ resolve_inside_ambiguous_splice_minus (int *unresolved_amb_nmatches, T *hit5, T
donor_splicecoord = Substring_splicecoord(donor);
/* donor_knowni = Substring_splicesites_knowni(donor); */
splice_pos = Substring_chimera_pos(donor);
- acceptor_splicecoord = (*hit3)->start_ambcoords[bingoi3];
- acceptor_knowni = (*hit3)->start_amb_knowni[bingoi3];
- nmismatches_shortend = (*hit3)->start_amb_nmismatches[bingoi3];
+ acceptor_splicecoord = (*hit3)->ambcoords_acceptor[bingoi3];
+ acceptor_knowni = (*hit3)->amb_knowni_acceptor[bingoi3];
+ nmismatches_shortend = (*hit3)->amb_nmismatches_acceptor[bingoi3];
segment_left = acceptor_splicecoord - (querylength3 - splice_pos);
if ((acceptor = Substring_new_acceptor(acceptor_splicecoord,acceptor_knowni,querylength3 - splice_pos,nmismatches_shortend,
@@ -9670,10 +9917,10 @@ resolve_inside_ambiguous_splice_minus (int *unresolved_amb_nmatches, T *hit5, T
old = *hit3;
*hit3 = Stage3end_new_splice(&ignore_found_score,Substring_nmismatches_whole(donor),/*nmismatches_acceptor*/nmismatches_shortend,
donor,acceptor,/*distance*/acceptor_splicecoord - donor_splicecoord,
- /*shortdistancep*/true,localsplicing_penalty,querylength3,
- /*amb_nmatches*/0,/*ambcoords_left*/NULL,/*ambcoords_right*/NULL,
- /*amb_knowni_left*/NULL,/*amb_knowni_right*/NULL,
- /*amb_nmismatches_left*/NULL,/*amb_nmismatches_right*/NULL,
+ /*shortdistancep*/true,localsplicing_penalty,querylength3,/*amb_nmatches*/0,
+ /*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
+ /*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
+ /*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
/*copy_donor_p*/true,/*copy_acceptor_p*/false,/*first_read_p*/false,
/*sensedir*/SENSE_ANTI,/*sarrayp*/false);
if (*private3p == true) {
@@ -9690,9 +9937,9 @@ resolve_inside_ambiguous_splice_minus (int *unresolved_amb_nmatches, T *hit5, T
acceptor_splicecoord = Substring_splicecoord(acceptor);
/* acceptor_knowni = Substring_splicesites_knowni(acceptor); */
splice_pos = Substring_chimera_pos(acceptor);
- donor_splicecoord = (*hit3)->start_ambcoords[bingoi3];
- donor_knowni = (*hit3)->start_amb_knowni[bingoi3];
- nmismatches_shortend = (*hit3)->start_amb_nmismatches[bingoi3];
+ donor_splicecoord = (*hit3)->ambcoords_donor[bingoi3];
+ donor_knowni = (*hit3)->amb_knowni_donor[bingoi3];
+ nmismatches_shortend = (*hit3)->amb_nmismatches_donor[bingoi3];
segment_left = donor_splicecoord - (querylength3 - splice_pos);
if ((donor = Substring_new_donor(donor_splicecoord,donor_knowni,querylength3 - splice_pos,nmismatches_shortend,
@@ -9705,10 +9952,10 @@ resolve_inside_ambiguous_splice_minus (int *unresolved_amb_nmatches, T *hit5, T
old = *hit3;
*hit3 = Stage3end_new_splice(&ignore_found_score,/*nmismatches_donor*/nmismatches_shortend,Substring_nmismatches_whole(acceptor),
donor,acceptor,/*distance*/donor_splicecoord - acceptor_splicecoord,
- /*shortdistancep*/true,localsplicing_penalty,querylength3,
- /*amb_nmatches*/0,/*ambcoords_left*/NULL,/*ambcoords_right*/NULL,
- /*amb_knowni_left*/NULL,/*amb_knowni_right*/NULL,
- /*amb_nmismatches_left*/NULL,/*amb_nmismatches_right*/NULL,
+ /*shortdistancep*/true,localsplicing_penalty,querylength3,/*amb_nmatches*/0,
+ /*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
+ /*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
+ /*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
/*copy_donor_p*/false,/*copy_acceptor_p*/true,/*first_read_p*/false,
/*sensedir*/SENSE_FORWARD,/*sarrayp*/false);
if (*private3p == true) {
@@ -10049,18 +10296,18 @@ Stage3pair_new (T hit5, T hit3, Univcoord_T *splicesites,
if (hit5->sensedir == SENSE_FORWARD) {
copy = Stage3end_new_splice(&found_score,/*nmismatches_donor*/Substring_nmismatches_whole(hit5->substring1),
/*nmismatches_acceptor*/0,/*donor*/hit5->substring1,/*acceptor*/NULL,/*distance*/0U,
- /*shortdistancep*/true,localsplicing_penalty,hit5->querylength,
- /*amb_nmatches*/0,/*ambcoords_left*/NULL,/*ambcoords_right*/NULL,
- /*amb_nmismatches_left*/NULL,/*amb_nmismatches_right*/NULL,
+ /*shortdistancep*/true,localsplicing_penalty,hit5->querylength,/*amb_nmatches*/0,
+ /*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
+ /*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
/*copy_donor_p*/true,/*copy_acceptor_p*/false,/*first_read_p*/true,
/*sensedir*/hit5->sensedir,hit5->sarrayp);
} else if (hit5->sensedir == SENSE_ANTI) {
copy = Stage3end_new_splice(&found_score,/*nmismatches_donor*/0,
/*nmismatches_acceptor*/Substring_nmismatches_whole(hit5->substring1),/*donor*/NULL,
/*acceptor*/hit5->substring1,/*distance*/0U,
- /*shortdistancep*/true,localsplicing_penalty,hit5->querylength,
- /*amb_nmatches*/0,/*ambcoords_left*/NULL,/*ambcoords_right*/NULL,
- /*amb_nmismatches_left*/NULL,/*amb_nmismatches_right*/NULL,
+ /*shortdistancep*/true,localsplicing_penalty,hit5->querylength,/*amb_nmatches*/0,
+ /*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
+ /*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
/*copy_donor_p*/false,/*copy_acceptor_p*/true,/*first_read_p*/true,
/*sensedir*/hit5->sensedir,hit5->sarrayp);
} else {
@@ -10080,17 +10327,17 @@ Stage3pair_new (T hit5, T hit3, Univcoord_T *splicesites,
copy = Stage3end_new_splice(&found_score,/*nmismatches_donor*/0,
/*nmismatches_acceptor*/Substring_nmismatches_whole(hit3->substring2),/*donor*/NULL,
/*acceptor*/hit3->substring2,/*distance*/0U,
- /*shortdistancep*/true,localsplicing_penalty,hit3->querylength,
- /*amb_nmatches*/0,/*ambcoords_left*/NULL,/*ambcoords_right*/NULL,
- /*amb_nmismatches_left*/NULL,/*amb_nmismatches_right*/NULL,
+ /*shortdistancep*/true,localsplicing_penalty,hit3->querylength,/*amb_nmatches*/0,
+ /*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
+ /*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
/*copy_donor_p*/false,/*copy_acceptor_p*/true,/*first_read_p*/true,
/*sensedir*/hit3->sensedir,hit3->sarrayp);
} else if (hit3->sensedir == SENSE_ANTI) {
copy = Stage3end_new_splice(&found_score,/*nmismatches_donor*/Substring_nmismatches_whole(hit3->substring2),
/*nmismatches_acceptor*/0,/*donor*/hit3->substring2,/*acceptor*/NULL,/*distance*/0U,
- /*shortdistancep*/true,localsplicing_penalty,hit3->querylength,
- /*amb_nmatches*/0,/*ambcoords_left*/NULL,/*ambcoords_right*/NULL,
- /*amb_nmismatches_left*/NULL,/*amb_nmismatches_right*/NULL,
+ /*shortdistancep*/true,localsplicing_penalty,hit3->querylength,/*amb_nmatches*/0,
+ /*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
+ /*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
/*copy_donor_p*/true,/*copy_acceptor_p*/false,/*first_read_p*/true,
/*sensedir*/hit3->sensedir,hit3->sarrayp);
} else {
@@ -10172,18 +10419,18 @@ Stage3pair_new (T hit5, T hit3, Univcoord_T *splicesites,
if (hit5->sensedir == SENSE_FORWARD) {
copy = Stage3end_new_splice(&found_score,/*nmismatches_donor*/Substring_nmismatches_whole(hit5->substring1),
/*nmismatches_acceptor*/0,/*donor*/hit5->substring1,/*acceptor*/NULL,/*distance*/0U,
- /*shortdistancep*/true,localsplicing_penalty,hit5->querylength,
- /*amb_nmatches*/0,/*ambcoords_left*/NULL,/*ambcoords_right*/NULL,
- /*amb_nmismatches_left*/NULL,/*amb_nmismatches_right*/NULL,
+ /*shortdistancep*/true,localsplicing_penalty,hit5->querylength,/*amb_nmatches*/0,
+ /*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
+ /*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
/*copy_donor_p*/true,/*copy_acceptor_p*/false,/*first_read_p*/false,
/*sensedir*/hit5->sensedir,hit5->sarrayp);
} else if (hit5->sensedir == SENSE_ANTI) {
copy = Stage3end_new_splice(&found_score,/*nmismatches_donor*/0,
/*nmismatches_acceptor*/Substring_nmismatches_whole(hit5->substring1),/*donor*/NULL,
/*acceptor*/hit5->substring1,/*distance*/0U,
- /*shortdistancep*/true,localsplicing_penalty,hit5->querylength,
- /*amb_nmatches*/0,/*ambcoords_left*/NULL,/*ambcoords_right*/NULL,
- /*amb_nmismatches_left*/NULL,/*amb_nmismatches_right*/NULL,
+ /*shortdistancep*/true,localsplicing_penalty,hit5->querylength,/*amb_nmatches*/0,
+ /*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
+ /*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
/*copy_donor_p*/false,/*copy_acceptor_p*/true,/*first_read_p*/false,
/*sensedir*/hit5->sensedir,hit5->sarrayp);
} else {
@@ -10203,17 +10450,17 @@ Stage3pair_new (T hit5, T hit3, Univcoord_T *splicesites,
copy = Stage3end_new_splice(&found_score,/*nmismatches_donor*/0,
/*nmismatches_acceptor*/Substring_nmismatches_whole(hit3->substring2),/*donor*/NULL,
/*acceptor*/hit3->substring2,/*distance*/0U,
- /*shortdistancep*/true,localsplicing_penalty,hit3->querylength,
- /*amb_nmatches*/0,/*ambcoords_left*/NULL,/*ambcoords_right*/NULL,
- /*amb_nmismatches_left*/NULL,/*amb_nmismatches_right*/NULL,
+ /*shortdistancep*/true,localsplicing_penalty,hit3->querylength,/*amb_nmatches*/0,
+ /*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
+ /*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
/*copy_donor_p*/false,/*copy_acceptor_p*/true,/*first_read_p*/false,
/*sensedir*/hit3->sensedir,hit3->sarrayp);
} else if (hit3->sensedir == SENSE_ANTI) {
copy = Stage3end_new_splice(&found_score,/*nmismatches_donor*/Substring_nmismatches_whole(hit3->substring2),
/*nmismatches_acceptor*/0,/*donor*/hit3->substring2,/*acceptor*/NULL,/*distance*/0U,
- /*shortdistancep*/true,localsplicing_penalty,hit3->querylength,
- /*amb_nmatches*/0,/*ambcoords_left*/NULL,/*ambcoords_right*/NULL,
- /*amb_nmismatches_left*/NULL,/*amb_nmismatches_right*/NULL,
+ /*shortdistancep*/true,localsplicing_penalty,hit3->querylength,/*amb_nmatches*/0,
+ /*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
+ /*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
/*copy_donor_p*/true,/*copy_acceptor_p*/false,/*first_read_p*/false,
/*sensedir*/hit3->sensedir,hit3->sarrayp);
} else {
@@ -12410,9 +12657,10 @@ Stage3pair_optimal_score_aux (bool *eliminatedp, List_T hitpairlist, int cutoff_
List_T optimal = NULL, p;
Stage3pair_T hitpair;
T hit5, hit3;
- int cutoff_level_5, cutoff_level_3, score;
+ int cutoff_level_5_start, cutoff_level_5_end, cutoff_level_3_start, cutoff_level_3_end, score;
int n;
- int minscore5 = MAX_READLENGTH, minscore3 = MAX_READLENGTH, minscore = MAX_READLENGTH + MAX_READLENGTH;
+ int minscore5_start = MAX_READLENGTH, minscore5_end = MAX_READLENGTH,
+ minscore3_start = MAX_READLENGTH, minscore3_end = MAX_READLENGTH, minscore = MAX_READLENGTH + MAX_READLENGTH;
/* int max_nmatches = 0, max_nmatches_posttrim, minscore = MAX_READLENGTH + MAX_READLENGTH; */
#ifdef USE_OPTIMAL_SCORE_BINGO
int minscore_bingo = MAX_READLENGTH + MAX_READLENGTH;
@@ -12594,9 +12842,9 @@ Stage3pair_optimal_score_aux (bool *eliminatedp, List_T hitpairlist, int cutoff_
if (hit5->hittype == TERMINAL && non_double_terminal_p == true && finalp == false) {
/* Ignore */
- hit5->score_eventrim = 0;
+ hit5->score_eventrim_start = hit5->score_eventrim_end = 0;
} else if (hit5->hittype == GMAP) {
- hit5->score_eventrim = 0; /* was hit5->penalties */
+ hit5->score_eventrim_start = hit5->score_eventrim_end = 0; /* was hit5->penalties */
debug6(printf("score 5' GMAP:"));
#if 0
if (Stage3end_bad_stretch_p(hit5,query5_compress_fwd,query5_compress_rev) == true) {
@@ -12605,6 +12853,7 @@ Stage3pair_optimal_score_aux (bool *eliminatedp, List_T hitpairlist, int cutoff_
}
#endif
+#if 0
if (0 && hit5->trim_left <= 8) {
/* Ignore small trims */
} else if (hit5->trim_left > trim_left_5) {
@@ -12617,50 +12866,75 @@ Stage3pair_optimal_score_aux (bool *eliminatedp, List_T hitpairlist, int cutoff_
hit5->score_eventrim += hit5->trim_right - trim_right_5;
debug6(printf(" add trim right (%d - %d).",hit5->trim_right,trim_right_5));
}
+#endif
- hit5->score_eventrim += Pair_nmismatches_region(&nindelbreaks,hit5->pairarray,hit5->npairs,
- trim_left_5,trim_right_5,hit5->querylength_adj);
- debug6(printf(" add nmismatches %d.",Pair_nmismatches_region(&nindelbreaks,hit5->pairarray,hit5->npairs,
- trim_left_5,trim_right_5,hit5->querylength_adj)));
- hit5->score_eventrim += indel_penalty_middle * nindelbreaks;
- hit5->score_eventrim += hit5->amb_nmatches_start / ambig_end_interval;
- debug6(printf(" add amb start %d/%d.",hit5->amb_nmatches_start,ambig_end_interval));
- hit5->score_eventrim += hit5->amb_nmatches_end / ambig_end_interval;
- debug6(printf(" add amb end %d/%d.",hit5->amb_nmatches_end,ambig_end_interval));
- debug6(printf(" RESULT: %d\n",hit5->score_eventrim));
+ hit5->score_eventrim_start += Pair_nmismatches_region(&nindelbreaks,hit5->pairarray,hit5->npairs,
+ trim_left_5,/*trim_end*/hit5->trim_right,hit5->querylength_adj);
+ hit5->score_eventrim_end += Pair_nmismatches_region(&nindelbreaks,hit5->pairarray,hit5->npairs,
+ /*trim_start*/hit5->trim_left,trim_right_5,hit5->querylength_adj);
+ debug6(printf(" add nmismatches %d or %d.",
+ Pair_nmismatches_region(&nindelbreaks,hit5->pairarray,hit5->npairs,
+ trim_left_5,/*trim_end*/hit5->trim_right,hit5->querylength_adj),
+ Pair_nmismatches_region(&nindelbreaks,hit5->pairarray,hit5->npairs,
+ /*trim_start*/hit5->trim_left,trim_right_5,hit5->querylength_adj)));
+ hit5->score_eventrim_start += indel_penalty_middle * nindelbreaks;
+ hit5->score_eventrim_end += indel_penalty_middle * nindelbreaks;
+ hit5->score_eventrim_start += hit5->start_amb_nmatches / ambig_end_interval;
+ debug6(printf(" add amb start %d/%d.",hit5->start_amb_nmatches,ambig_end_interval));
+ hit5->score_eventrim_end += hit5->end_amb_nmatches / ambig_end_interval;
+ debug6(printf(" add amb end %d/%d.",hit5->end_amb_nmatches,ambig_end_interval));
+ debug6(printf(" RESULT: %d or %d\n",hit5->score_eventrim_start,hit5->score_eventrim_end));
} else {
debug6(printf("score 5' OTHER:"));
- hit5->score_eventrim = hit5->penalties;
+ hit5->score_eventrim_start = hit5->penalties;
+ hit5->score_eventrim_end = hit5->penalties;
debug6(printf(" penalties %d.",hit5->penalties));
- hit5->score_eventrim += Substring_count_mismatches_region(hit5->substring0,trim_left_5,trim_right_5,
- query5_compress_fwd,query5_compress_rev);
- debug6(printf(" substring 0 %d.",Substring_count_mismatches_region(hit5->substring0,trim_left_5,trim_right_5,
+ hit5->score_eventrim_start += Substring_count_mismatches_region(hit5->substring0,trim_left_5,/*trim_end*/hit5->trim_right,
+ query5_compress_fwd,query5_compress_rev);
+ debug6(printf(" substring 0 %d.",Substring_count_mismatches_region(hit5->substring0,trim_left_5,/*trim_end*/hit5->trim_right,
+ query5_compress_fwd,query5_compress_rev)));
+
+ hit5->score_eventrim_start += Substring_count_mismatches_region(hit5->substring1,trim_left_5,/*trim_end*/hit5->trim_right,
+ query5_compress_fwd,query5_compress_rev);
+ debug6(printf(" substring 1 %d.",Substring_count_mismatches_region(hit5->substring1,trim_left_5,/*trim_end*/hit5->trim_right,
query5_compress_fwd,query5_compress_rev)));
- hit5->score_eventrim += Substring_count_mismatches_region(hit5->substring1,trim_left_5,trim_right_5,
- query5_compress_fwd,query5_compress_rev);
- debug6(printf(" substring 1 %d.",Substring_count_mismatches_region(hit5->substring1,trim_left_5,trim_right_5,
+ hit5->score_eventrim_start += Substring_count_mismatches_region(hit5->substring2,trim_left_5,/*trim_end*/hit5->trim_right,
+ query5_compress_fwd,query5_compress_rev);
+ debug6(printf(" substring 2 %d.",Substring_count_mismatches_region(hit5->substring2,trim_left_5,/*trim_end*/hit5->trim_right,
query5_compress_fwd,query5_compress_rev)));
- hit5->score_eventrim += Substring_count_mismatches_region(hit5->substring2,trim_left_5,trim_right_5,
- query5_compress_fwd,query5_compress_rev);
- debug6(printf(" substring 2 %d.",Substring_count_mismatches_region(hit5->substring2,trim_left_5,trim_right_5,
+
+ hit5->score_eventrim_end += Substring_count_mismatches_region(hit5->substring0,/*trim_start*/hit5->trim_left,trim_right_5,
+ query5_compress_fwd,query5_compress_rev);
+ debug6(printf(" substring 0 %d.",Substring_count_mismatches_region(hit5->substring0,/*trim_start*/hit5->trim_left,trim_right_5,
+ query5_compress_fwd,query5_compress_rev)));
+
+ hit5->score_eventrim_end += Substring_count_mismatches_region(hit5->substring1,/*trim_start*/hit5->trim_left,trim_right_5,
+ query5_compress_fwd,query5_compress_rev);
+ debug6(printf(" substring 1 %d.",Substring_count_mismatches_region(hit5->substring1,/*trim_start*/hit5->trim_left,trim_right_5,
+ query5_compress_fwd,query5_compress_rev)));
+
+ hit5->score_eventrim_end += Substring_count_mismatches_region(hit5->substring2,/*trim_start*/hit5->trim_left,trim_right_5,
+ query5_compress_fwd,query5_compress_rev);
+ debug6(printf(" substring 2 %d.",Substring_count_mismatches_region(hit5->substring2,/*trim_start*/hit5->trim_left,trim_right_5,
query5_compress_fwd,query5_compress_rev)));
if (hit5->hittype == INSERTION || hit5->hittype == DELETION) {
- hit5->score_eventrim += indel_penalty_middle;
+ hit5->score_eventrim_start += indel_penalty_middle;
+ hit5->score_eventrim_end += indel_penalty_middle;
debug6(printf(" add indel %d.",indel_penalty_middle));
}
- debug6(printf(" RESULT: %d\n",hit5->score_eventrim));
+ debug6(printf(" RESULT: %d or %d\n",hit5->score_eventrim_start,hit5->score_eventrim_end));
}
if (hit3->hittype == TERMINAL && non_double_terminal_p == true && finalp == false) {
/* Ignore */
- hit3->score_eventrim = 0;
+ hit3->score_eventrim_start = hit3->score_eventrim_end = 0;
} else if (hit3->hittype == GMAP) {
- hit3->score_eventrim = 0; /* was hit3->penalties */
+ hit3->score_eventrim_start = hit3->score_eventrim_end = 0; /* was hit3->penalties */
debug6(printf("score 3' GMAP:"));
#if 0
if (Stage3end_bad_stretch_p(hit3,query3_compress_fwd,query3_compress_rev) == true) {
@@ -12669,6 +12943,7 @@ Stage3pair_optimal_score_aux (bool *eliminatedp, List_T hitpairlist, int cutoff_
}
#endif
+#if 0
if (0 && hit3->trim_left <= 8) {
/* Ignore small trims */
} else if (hit3->trim_left > trim_left_3) {
@@ -12681,45 +12956,78 @@ Stage3pair_optimal_score_aux (bool *eliminatedp, List_T hitpairlist, int cutoff_
hit3->score_eventrim += hit3->trim_right - trim_right_3;
debug6(printf(" add trim right (%d - %d).",hit3->trim_right,trim_right_3));
}
+#endif
- hit3->score_eventrim += Pair_nmismatches_region(&nindelbreaks,hit3->pairarray,hit3->npairs,
- trim_left_3,trim_right_3,hit3->querylength_adj);
- debug6(printf(" add nmismatches %d.",Pair_nmismatches_region(&nindelbreaks,hit3->pairarray,hit3->npairs,
- trim_left_3,trim_right_3,hit3->querylength_adj)));
- hit3->score_eventrim += indel_penalty_middle * nindelbreaks;
- hit3->score_eventrim += hit3->amb_nmatches_start / ambig_end_interval;
- debug6(printf(" add amb start %d/%d.",hit3->amb_nmatches_start,ambig_end_interval));
- hit3->score_eventrim += hit3->amb_nmatches_end / ambig_end_interval;
- debug6(printf(" add amb end %d/%d.",hit3->amb_nmatches_end,ambig_end_interval));
- debug6(printf(" RESULT: %d\n",hit3->score_eventrim));
+ hit3->score_eventrim_start += Pair_nmismatches_region(&nindelbreaks,hit3->pairarray,hit3->npairs,
+ trim_left_3,/*trim_end*/hit3->trim_right,hit3->querylength_adj);
+ hit3->score_eventrim_end += Pair_nmismatches_region(&nindelbreaks,hit3->pairarray,hit3->npairs,
+ /*trim_start*/hit3->trim_left,trim_right_3,hit3->querylength_adj);
+ debug6(printf(" add nmismatches %d or %d.",
+ Pair_nmismatches_region(&nindelbreaks,hit3->pairarray,hit3->npairs,
+ trim_left_3,/*trim_end*/hit3->trim_right,hit3->querylength_adj),
+ Pair_nmismatches_region(&nindelbreaks,hit3->pairarray,hit3->npairs,
+ /*trim_start*/hit3->trim_left,trim_right_3,hit3->querylength_adj)));
+ hit3->score_eventrim_start += indel_penalty_middle * nindelbreaks;
+ hit3->score_eventrim_end += indel_penalty_middle * nindelbreaks;
+ hit3->score_eventrim_start += hit3->start_amb_nmatches / ambig_end_interval;
+ debug6(printf(" add amb start %d/%d.",hit3->start_amb_nmatches,ambig_end_interval));
+ hit3->score_eventrim_end += hit3->end_amb_nmatches / ambig_end_interval;
+ debug6(printf(" add amb end %d/%d.",hit3->end_amb_nmatches,ambig_end_interval));
+ debug6(printf(" RESULT: %d or %d\n",hit3->score_eventrim_start,hit3->score_eventrim_end));
} else {
debug6(printf("score 3' OTHER:"));
- hit3->score_eventrim = hit3->penalties;
+ hit3->score_eventrim_start = hit3->score_eventrim_end = hit3->penalties;
debug6(printf(" penalties %d.",hit3->penalties));
- hit3->score_eventrim += Substring_count_mismatches_region(hit3->substring0,trim_left_3,trim_right_3,
- query3_compress_fwd,query3_compress_rev);
- debug6(printf(" substring 0 %d.",Substring_count_mismatches_region(hit3->substring0,trim_left_3,trim_right_3,
+ hit3->score_eventrim_start += Substring_count_mismatches_region(hit3->substring0,trim_left_3,/*trim_end*/hit3->trim_right,
+ query3_compress_fwd,query3_compress_rev);
+ debug6(printf(" substring 0 %d.",Substring_count_mismatches_region(hit3->substring0,trim_left_3,/*trim_end*/hit3->trim_right
+ query3_compress_fwd,query3_compress_rev)));
+
+ hit3->score_eventrim_start += Substring_count_mismatches_region(hit3->substring1,trim_left_3,/*trim_end*/hit3->trim_right,
+ query3_compress_fwd,query3_compress_rev);
+ debug6(printf(" substring 1 %d.",Substring_count_mismatches_region(hit3->substring1,trim_left_3,/*trim_end*/hit3->trim_right,
+ query3_compress_fwd,query3_compress_rev)));
+
+ hit3->score_eventrim_start += Substring_count_mismatches_region(hit3->substring2,trim_left_3,/*trim_end*/hit3->trim_right,
+ query3_compress_fwd,query3_compress_rev);
+ debug6(printf(" substring 2 %d.",Substring_count_mismatches_region(hit3->substring2,trim_left_3,/*trim_end*/hit3->trim_right,
+ query3_compress_fwd,query3_compress_rev)));
+
+
+ hit3->score_eventrim_end += Substring_count_mismatches_region(hit3->substring0,/*trim_start*/hit3->trim_left,trim_right_3,
+ query3_compress_fwd,query3_compress_rev);
+ debug6(printf(" substring 0 %d.",Substring_count_mismatches_region(hit3->substring0,/*trim_start*/hit3->trim_left,trim_right_3,
query3_compress_fwd,query3_compress_rev)));
- hit3->score_eventrim += Substring_count_mismatches_region(hit3->substring1,trim_left_3,trim_right_3,
- query3_compress_fwd,query3_compress_rev);
- debug6(printf(" substring 1 %d.",Substring_count_mismatches_region(hit3->substring1,trim_left_3,trim_right_3,
+ hit3->score_eventrim_end += Substring_count_mismatches_region(hit3->substring1,/*trim_start*/hit3->trim_left,trim_right_3,
+ query3_compress_fwd,query3_compress_rev);
+ debug6(printf(" substring 1 %d.",Substring_count_mismatches_region(hit3->substring1,/*trim_start*/hit3->trim_left,trim_right_3,
query3_compress_fwd,query3_compress_rev)));
- hit3->score_eventrim += Substring_count_mismatches_region(hit3->substring2,trim_left_3,trim_right_3,
- query3_compress_fwd,query3_compress_rev);
- debug6(printf(" substring 2 %d.",Substring_count_mismatches_region(hit3->substring2,trim_left_3,trim_right_3,
+ hit3->score_eventrim_end += Substring_count_mismatches_region(hit3->substring2,/*trim_start*/hit3->trim_left,trim_right_3,
+ query3_compress_fwd,query3_compress_rev);
+ debug6(printf(" substring 2 %d.",Substring_count_mismatches_region(hit3->substring2,/*trim_start*/hit3->trim_left,trim_right_3,
query3_compress_fwd,query3_compress_rev)));
if (hit3->hittype == INSERTION || hit3->hittype == DELETION) {
- hit3->score_eventrim += indel_penalty_middle;
+ hit3->score_eventrim_start += indel_penalty_middle;
+ hit3->score_eventrim_end += indel_penalty_middle;
debug6(printf(" add indel %d.",indel_penalty_middle));
}
- debug6(printf(" RESULT: %d\n",hit3->score_eventrim));
+ debug6(printf(" RESULT: %d or %d\n",hit3->score_eventrim_start,hit3->score_eventrim_end));
}
- hitpair->score_eventrim = hit5->score_eventrim + hit3->score_eventrim;
+ if (hit5->score_eventrim_start < hit5->score_eventrim_end) {
+ hitpair->score_eventrim = hit5->score_eventrim_start;
+ } else {
+ hitpair->score_eventrim = hit5->score_eventrim_end;
+ }
+ if (hit3->score_eventrim_start < hit3->score_eventrim_end) {
+ hitpair->score_eventrim += hit3->score_eventrim_start;
+ } else {
+ hitpair->score_eventrim += hit3->score_eventrim_end;
+ }
if (hitpair->score_eventrim < minscore) {
minscore = hitpair->score_eventrim;
}
@@ -12728,63 +13036,85 @@ Stage3pair_optimal_score_aux (bool *eliminatedp, List_T hitpairlist, int cutoff_
for (p = hitpairlist; p != NULL; p = p->rest) {
hitpair = (Stage3pair_T) p->first;
- debug6(printf("%lu..%lu %u..%u|%u..%u types %s and %s, score_eventrim %d+%d, pairlength %d, outerlength %u\n",
+ debug6(printf("%lu..%lu %u..%u|%u..%u types %s and %s, score_eventrim %d|%d+%d|%d, pairlength %d, outerlength %u\n",
hitpair->low,hitpair->high,
hitpair->hit5->low - hitpair->hit5->chroffset,hitpair->hit5->high - hitpair->hit5->chroffset,
hitpair->hit3->low - hitpair->hit3->chroffset,hitpair->hit3->high - hitpair->hit3->chroffset,
hittype_string(hitpair->hit5->hittype),hittype_string(hitpair->hit3->hittype),
- hitpair->hit5->score_eventrim,hitpair->hit3->score_eventrim,
+ hitpair->hit5->score_eventrim_start,hitpair->hit5->score_eventrim_end,
+ hitpair->hit3->score_eventrim_start,hitpair->hit3->score_eventrim_end,
hitpair->insertlength,hitpair->outerlength));
if (hitpair->hit5->hittype == TERMINAL && non_terminal_5p == true) {
/* Don't use to determine minscore5 */
- } else if (hitpair->hit5->score_eventrim < minscore5) {
- minscore5 = hitpair->hit5->score_eventrim;
+ } else {
+ if (hitpair->hit5->score_eventrim_start < minscore5_start) {
+ minscore5_start = hitpair->hit5->score_eventrim_start;
+ }
+ if (hitpair->hit5->score_eventrim_end < minscore5_end) {
+ minscore5_end = hitpair->hit5->score_eventrim_end;
+ }
}
if (hitpair->hit3->hittype == TERMINAL && non_terminal_3p == true) {
/* Don't use to determine minscore3 */
- } else if (hitpair->hit3->score_eventrim < minscore3) {
- minscore3 = hitpair->hit3->score_eventrim;
+ } else {
+ if (hitpair->hit3->score_eventrim_start < minscore3_start) {
+ minscore3_start = hitpair->hit3->score_eventrim_start;
+ }
+ if (hitpair->hit3->score_eventrim_end < minscore3_end) {
+ minscore3_end = hitpair->hit3->score_eventrim_end;
+ }
}
}
- debug6(printf("Stage3pair_optimal_score over %d pairs: minscore = %d and %d + subopt:%d\n",
- n,minscore5,minscore3,suboptimal_mismatches));
+ debug6(printf("Stage3pair_optimal_score over %d pairs: minscore = %d|%d and %d|%d + subopt:%d\n",
+ n,minscore5_start,minscore5_end,minscore3_start,minscore3_end,suboptimal_mismatches));
if (non_double_terminal_p == true && finalp == false) {
/* finalp == false. Add suboptimal_mismatches to each end. */
- minscore5 += suboptimal_mismatches;
- minscore3 += suboptimal_mismatches;
- cutoff_level_5 = minscore5;
- cutoff_level_3 = minscore3;
+ minscore5_start += suboptimal_mismatches;
+ minscore5_end += suboptimal_mismatches;
+ minscore3_start += suboptimal_mismatches;
+ minscore3_end += suboptimal_mismatches;
+ cutoff_level_5_start = minscore5_start;
+ cutoff_level_5_end = minscore5_end;
+ cutoff_level_3_start = minscore3_start;
+ cutoff_level_3_end = minscore3_end;
for (p = hitpairlist; p != NULL; p = p->rest) {
hitpair = (Stage3pair_T) p->first;
if (hitpair->hit5->hittype == TERMINAL || hitpair->hit3->hittype == TERMINAL) {
- debug6(printf("Prefinal: Keeping a hit pair of type %s-%s with score_eventrim %d and %d, because finalp is false\n",
+ debug6(printf("Prefinal: Keeping a hit pair of type %s-%s with score_eventrim %d|%d and %d|%d, because finalp is false\n",
hittype_string(hitpair->hit5->hittype),hittype_string(hitpair->hit3->hittype),
- hitpair->hit5->score_eventrim,hitpair->hit3->score_eventrim));
+ hitpair->hit5->score_eventrim_start,hitpair->hit5->score_eventrim_end,
+ hitpair->hit3->score_eventrim_start,hitpair->hit3->score_eventrim_end));
optimal = List_push(optimal,hitpair);
} else if (keep_gmap_p == true && (hitpair->hit5->hittype == GMAP || hitpair->hit3->hittype == GMAP)) {
/* GMAP hits already found to be better than their corresponding terminals */
- debug6(printf("Prefinal: Keeping a hit pair of type %s-%s with score_eventrim %d and %d, because keep_gmap_p is true\n",
+ debug6(printf("Prefinal: Keeping a hit pair of type %s-%s with score_eventrim %d|%d and %d|%d, because keep_gmap_p is true\n",
hittype_string(hitpair->hit5->hittype),hittype_string(hitpair->hit3->hittype),
- hitpair->hit5->score_eventrim,hitpair->hit3->score_eventrim));
+ hitpair->hit5->score_eventrim_start,hitpair->hit5->score_eventrim_end,
+ hitpair->hit3->score_eventrim_start,hitpair->hit3->score_eventrim_end));
optimal = List_push(optimal,hitpair);
- } else if (hitpair->hit5->score_eventrim > cutoff_level_5 && hitpair->hit3->score_eventrim > cutoff_level_3) {
- debug6(printf("Prefinal: Eliminating a hit pair at %lu..%lu %u..%u|%u..%u with score_eventrim_5 %d > cutoff_level_5 %d and score_eventrim_3 %d > cutoff_level_3 %d (finalp %d)\n",
+ } else if (hitpair->hit5->score_eventrim_start > cutoff_level_5_start && hitpair->hit5->score_eventrim_end > cutoff_level_5_end &&
+ hitpair->hit3->score_eventrim_start > cutoff_level_3_start && hitpair->hit3->score_eventrim_end > cutoff_level_3_end) {
+ debug6(printf("Prefinal: Eliminating a hit pair at %lu..%lu %u..%u|%u..%u with score_eventrim_5 %d|%d > cutoff_level_5 %d|%d and score_eventrim_3 %d|%d > cutoff_level_3 %d|%d (finalp %d)\n",
hitpair->low,hitpair->high,
hitpair->hit5->low - hitpair->hit5->chroffset,hitpair->hit5->high - hitpair->hit5->chroffset,
hitpair->hit3->low - hitpair->hit3->chroffset,hitpair->hit3->high - hitpair->hit3->chroffset,
- hitpair->hit5->score_eventrim,cutoff_level_5,hitpair->hit3->score_eventrim,cutoff_level_3,finalp));
+ hitpair->hit5->score_eventrim_start,hitpair->hit5->score_eventrim_end,cutoff_level_5_start,cutoff_level_5_end,
+ hitpair->hit3->score_eventrim_start,hitpair->hit3->score_eventrim_end,cutoff_level_3_start,cutoff_level_3_end,
+ finalp));
*eliminatedp = true;
Stage3pair_free(&hitpair);
} else {
- debug6(printf("Prefinal: Keeping a hit pair with score_eventrim %d and %d (cutoff_level %d and %d)\n",
- hitpair->hit5->score_eventrim,hitpair->hit3->score_eventrim,cutoff_level_5,cutoff_level_3));
+ debug6(printf("Prefinal: Keeping a hit pair with score_eventrim %d|%d and %d|%d (cutoff_level %d|%d and %d|%d)\n",
+ hitpair->hit5->score_eventrim_start,hitpair->hit5->score_eventrim_end,
+ hitpair->hit3->score_eventrim_start,hitpair->hit3->score_eventrim_end,
+ cutoff_level_5_start,cutoff_level_5_end,cutoff_level_3_start,cutoff_level_3_end));
optimal = List_push(optimal,hitpair);
}
}
@@ -12808,12 +13138,13 @@ Stage3pair_optimal_score_aux (bool *eliminatedp, List_T hitpairlist, int cutoff_
for (p = hitpairlist; p != NULL; p = p->rest) {
hitpair = (Stage3pair_T) p->first;
- debug6(printf("Final: %lu..%lu %u..%u|%u..%u types %s and %s, score_eventrim %d (%d+%d), pairlength %d, outerlength %u\n",
+ debug6(printf("Final: %lu..%lu %u..%u|%u..%u types %s and %s, score_eventrim %d (%d|%d+%d|%d), pairlength %d, outerlength %u\n",
hitpair->low,hitpair->high,
hitpair->hit5->low - hitpair->hit5->chroffset,hitpair->hit5->high - hitpair->hit5->chroffset,
hitpair->hit3->low - hitpair->hit3->chroffset,hitpair->hit3->high - hitpair->hit3->chroffset,
- hittype_string(hitpair->hit5->hittype),hittype_string(hitpair->hit3->hittype),
- hitpair->score_eventrim,hitpair->hit5->score_eventrim,hitpair->hit3->score_eventrim,
+ hittype_string(hitpair->hit5->hittype),hittype_string(hitpair->hit3->hittype),hitpair->score_eventrim,
+ hitpair->hit5->score_eventrim_start,hitpair->hit5->score_eventrim_end,
+ hitpair->hit3->score_eventrim_start,hitpair->hit3->score_eventrim_end,
hitpair->insertlength,hitpair->outerlength));
#if 0
diff --git a/src/stage3hr.h b/src/stage3hr.h
index 35e61be..d492305 100644
--- a/src/stage3hr.h
+++ b/src/stage3hr.h
@@ -1,4 +1,4 @@
-/* $Id: stage3hr.h 138745 2014-06-11 19:04:25Z twu $ */
+/* $Id: stage3hr.h 140368 2014-07-02 00:56:33Z twu $ */
#ifndef STAGE3HR_INCLUDED
#define STAGE3HR_INCLUDED
@@ -323,25 +323,24 @@ Stage3end_new_splice (int *found_score, int donor_nmismatches, int acceptor_nmis
Substring_T donor, Substring_T acceptor, Chrpos_T distance,
bool shortdistancep, int splicing_penalty, int querylength, int amb_nmatches,
#ifdef LARGE_GENOMES
- Uint8list_T ambcoords_left, Uint8list_T ambcoords_right,
+ Uint8list_T ambcoords_donor, Uint8list_T ambcoords_acceptor,
#else
- Uintlist_T ambcoords_left, Uintlist_T ambcoords_right,
+ Uintlist_T ambcoords_donor, Uintlist_T ambcoords_acceptor,
#endif
- Intlist_T amb_knowni_left, Intlist_T amb_knowni_right,
- Intlist_T amb_nmismatches_left, Intlist_T amb_nmismatches_right,
+ Intlist_T amb_knowni_donor, Intlist_T amb_knowni_acceptor,
+ Intlist_T amb_nmismatches_donor, Intlist_T amb_nmismatches_acceptor,
bool copy_donor_p, bool copy_acceptor_p,
bool first_read_p, int sensedir, bool sarrayp);
extern T
Stage3end_new_shortexon (int *found_score, Substring_T donor, Substring_T acceptor, Substring_T shortexon,
- Chrpos_T acceptor_distance, Chrpos_T donor_distance,
int amb_nmatches_donor, int amb_nmatches_acceptor,
#ifdef LARGE_GENOMES
- Uint8list_T ambcoords_left, Uint8list_T ambcoords_right,
+ Uint8list_T ambcoords_donor, Uint8list_T ambcoords_acceptor,
#else
- Uintlist_T ambcoords_left, Uintlist_T ambcoords_right,
+ Uintlist_T ambcoords_donor, Uintlist_T ambcoords_acceptor,
#endif
- Intlist_T amb_knowni_left, Intlist_T amb_knowni_right,
- Intlist_T amb_nmismatches_left, Intlist_T amb_nmismatches_right,
+ Intlist_T amb_knowni_donor, Intlist_T amb_knowni_acceptor,
+ Intlist_T amb_nmismatches_donor, Intlist_T amb_nmismatches_acceptor,
bool copy_donor_p, bool copy_acceptor_p, bool copy_shortexon_p,
int splicing_penalty, int querylength, int sensedir, bool sarrayp);
diff --git a/src/substring.c b/src/substring.c
index 46303f7..705fabe 100644
--- a/src/substring.c
+++ b/src/substring.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: substring.c 138720 2014-06-11 17:07:51Z twu $";
+static char rcsid[] = "$Id: substring.c 140368 2014-07-02 00:56:33Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -1233,8 +1233,7 @@ Substring_insert_length_trimmed (T substring5, T substring3, int hit5_trim_left,
static void
-mark_mismatches_cmet_gsnap (char *gbuffer, char *query, int start, int end,
- int genestrand, bool first_read_p) {
+mark_mismatches_cmet_gsnap (char *gbuffer, char *query, int start, int end, int genestrand) {
int i;
debug1(printf("\n"));
@@ -1244,60 +1243,30 @@ mark_mismatches_cmet_gsnap (char *gbuffer, char *query, int start, int end,
debug1(printf("count: "));
if (genestrand == +2) {
- if (first_read_p == false) {
- for (i = start; i < end; i++) {
- if (gbuffer[i] == 'C' && query[i] == 'T') {
- debug1(printf("."));
- gbuffer[i] = '.';
- } else if (query[i] != gbuffer[i]) {
- debug1(printf("x"));
- assert(gbuffer[i] != OUTOFBOUNDS);
- gbuffer[i] = (char) tolower(gbuffer[i]);
- } else {
- debug1(printf("*"));
- }
- }
- } else {
- for (i = start; i < end; i++) {
- if (gbuffer[i] == 'G' && query[i] == 'A') {
- debug1(printf("."));
- gbuffer[i] = '.';
- } else if (query[i] != gbuffer[i]) {
- debug1(printf("x"));
- assert(gbuffer[i] != OUTOFBOUNDS);
- gbuffer[i] = (char) tolower(gbuffer[i]);
- } else {
- debug1(printf("*"));
- }
+ for (i = start; i < end; i++) {
+ if (gbuffer[i] == 'G' && query[i] == 'A') {
+ debug1(printf("."));
+ gbuffer[i] = '.';
+ } else if (query[i] != gbuffer[i]) {
+ debug1(printf("x"));
+ assert(gbuffer[i] != OUTOFBOUNDS);
+ gbuffer[i] = (char) tolower(gbuffer[i]);
+ } else {
+ debug1(printf("*"));
}
}
} else {
- if (first_read_p == true) {
- for (i = start; i < end; i++) {
- if (gbuffer[i] == 'C' && query[i] == 'T') {
- debug1(printf("."));
- gbuffer[i] = '.';
- } else if (query[i] != gbuffer[i]) {
- debug1(printf("x"));
- assert(gbuffer[i] != OUTOFBOUNDS);
- gbuffer[i] = (char) tolower(gbuffer[i]);
- } else {
- debug1(printf("*"));
- }
- }
- } else {
- for (i = start; i < end; i++) {
- if (gbuffer[i] == 'G' && query[i] == 'A') {
- debug1(printf("."));
- gbuffer[i] = '.';
- } else if (query[i] != gbuffer[i]) {
- debug1(printf("x"));
- assert(gbuffer[i] != OUTOFBOUNDS);
- gbuffer[i] = (char) tolower(gbuffer[i]);
- } else {
- debug1(printf("*"));
- }
+ for (i = start; i < end; i++) {
+ if (gbuffer[i] == 'C' && query[i] == 'T') {
+ debug1(printf("."));
+ gbuffer[i] = '.';
+ } else if (query[i] != gbuffer[i]) {
+ debug1(printf("x"));
+ assert(gbuffer[i] != OUTOFBOUNDS);
+ gbuffer[i] = (char) tolower(gbuffer[i]);
+ } else {
+ debug1(printf("*"));
}
}
}
@@ -1307,8 +1276,7 @@ mark_mismatches_cmet_gsnap (char *gbuffer, char *query, int start, int end,
static void
-mark_mismatches_cmet_sam (char *gbuffer, char *query, int start, int end,
- int genestrand, bool first_read_p) {
+mark_mismatches_cmet_sam (char *gbuffer, char *query, int start, int end, int genestrand) {
int i;
debug1(printf("query: %s\n",query));
@@ -1316,72 +1284,36 @@ mark_mismatches_cmet_sam (char *gbuffer, char *query, int start, int end,
debug1(printf("count: "));
if (genestrand == +2) {
- if (first_read_p == false) {
- for (i = start; i < end; i++) {
- if (gbuffer[i] == 'C' && query[i] == 'T') {
- debug1(printf("."));
-#if 0
- /* Want to show mismatches */
- gbuffer[i] = 'T'; /* Avoids showing mismatches in MD and NM strings */
-#endif
- } else if (query[i] != gbuffer[i]) {
- debug1(printf("x"));
- assert(gbuffer[i] != OUTOFBOUNDS);
- gbuffer[i] = (char) tolower(gbuffer[i]);
- } else {
- debug1(printf("*"));
- }
- }
- } else {
- for (i = start; i < end; i++) {
- if (gbuffer[i] == 'G' && query[i] == 'A') {
- debug1(printf("."));
+ for (i = start; i < end; i++) {
+ if (gbuffer[i] == 'G' && query[i] == 'A') {
+ debug1(printf("."));
#if 0
- /* Want to show mismatches */
- gbuffer[i] = 'A'; /* Avoids showing mismatches in MD and NM strings */
+ /* Want to show mismatches */
+ gbuffer[i] = 'A'; /* Avoids showing mismatches in MD and NM strings */
#endif
- } else if (query[i] != gbuffer[i]) {
- debug1(printf("x"));
- assert(gbuffer[i] != OUTOFBOUNDS);
- gbuffer[i] = (char) tolower(gbuffer[i]);
- } else {
- debug1(printf("*"));
- }
+ } else if (query[i] != gbuffer[i]) {
+ debug1(printf("x"));
+ assert(gbuffer[i] != OUTOFBOUNDS);
+ gbuffer[i] = (char) tolower(gbuffer[i]);
+ } else {
+ debug1(printf("*"));
}
}
} else {
- if (first_read_p == true) {
- for (i = start; i < end; i++) {
- if (gbuffer[i] == 'C' && query[i] == 'T') {
- debug1(printf("."));
-#if 0
- /* Want to show mismatches */
- gbuffer[i] = 'T'; /* Avoids showing mismatches in MD and NM strings */
-#endif
- } else if (query[i] != gbuffer[i]) {
- debug1(printf("x"));
- assert(gbuffer[i] != OUTOFBOUNDS);
- gbuffer[i] = (char) tolower(gbuffer[i]);
- } else {
- debug1(printf("*"));
- }
- }
- } else {
- for (i = start; i < end; i++) {
- if (gbuffer[i] == 'G' && query[i] == 'A') {
- debug1(printf("."));
+ for (i = start; i < end; i++) {
+ if (gbuffer[i] == 'C' && query[i] == 'T') {
+ debug1(printf("."));
#if 0
- /* Want to show mismatches */
- gbuffer[i] = 'A'; /* Avoids showing mismatches in MD and NM strings */
+ /* Want to show mismatches */
+ gbuffer[i] = 'T'; /* Avoids showing mismatches in MD and NM strings */
#endif
- } else if (query[i] != gbuffer[i]) {
- debug1(printf("x"));
- assert(gbuffer[i] != OUTOFBOUNDS);
- gbuffer[i] = (char) tolower(gbuffer[i]);
- } else {
- debug1(printf("*"));
- }
+ } else if (query[i] != gbuffer[i]) {
+ debug1(printf("x"));
+ assert(gbuffer[i] != OUTOFBOUNDS);
+ gbuffer[i] = (char) tolower(gbuffer[i]);
+ } else {
+ debug1(printf("*"));
}
}
}
@@ -1392,8 +1324,7 @@ mark_mismatches_cmet_sam (char *gbuffer, char *query, int start, int end,
static void
-mark_mismatches_atoi_gsnap (char *gbuffer, char *query, int start, int end,
- int genestrand, bool first_read_p) {
+mark_mismatches_atoi_gsnap (char *gbuffer, char *query, int start, int end, int genestrand) {
int i;
debug1(printf("query: %s\n",query));
@@ -1401,60 +1332,30 @@ mark_mismatches_atoi_gsnap (char *gbuffer, char *query, int start, int end,
debug1(printf("count: "));
if (genestrand == +2) {
- if (first_read_p == false) {
- for (i = start; i < end; i++) {
- if (gbuffer[i] == 'A' && query[i] == 'G') {
- debug1(printf("."));
- gbuffer[i] = '.';
- } else if (query[i] != gbuffer[i]) {
- debug1(printf("x"));
- assert(gbuffer[i] != OUTOFBOUNDS);
- gbuffer[i] = (char) tolower(gbuffer[i]);
- } else {
- debug1(printf("*"));
- }
- }
- } else {
- for (i = start; i < end; i++) {
- if (gbuffer[i] == 'T' && query[i] == 'C') {
- debug1(printf("."));
- gbuffer[i] = '.';
- } else if (query[i] != gbuffer[i]) {
- debug1(printf("x"));
- assert(gbuffer[i] != OUTOFBOUNDS);
- gbuffer[i] = (char) tolower(gbuffer[i]);
- } else {
- debug1(printf("*"));
- }
+ for (i = start; i < end; i++) {
+ if (gbuffer[i] == 'T' && query[i] == 'C') {
+ debug1(printf("."));
+ gbuffer[i] = '.';
+ } else if (query[i] != gbuffer[i]) {
+ debug1(printf("x"));
+ assert(gbuffer[i] != OUTOFBOUNDS);
+ gbuffer[i] = (char) tolower(gbuffer[i]);
+ } else {
+ debug1(printf("*"));
}
}
} else {
- if (first_read_p == true) {
- for (i = start; i < end; i++) {
- if (gbuffer[i] == 'A' && query[i] == 'G') {
- debug1(printf("."));
- gbuffer[i] = '.';
- } else if (query[i] != gbuffer[i]) {
- debug1(printf("x"));
- assert(gbuffer[i] != OUTOFBOUNDS);
- gbuffer[i] = (char) tolower(gbuffer[i]);
- } else {
- debug1(printf("*"));
- }
- }
- } else {
- for (i = start; i < end; i++) {
- if (gbuffer[i] == 'T' && query[i] == 'C') {
- debug1(printf("."));
- gbuffer[i] = '.';
- } else if (query[i] != gbuffer[i]) {
- debug1(printf("x"));
- assert(gbuffer[i] != OUTOFBOUNDS);
- gbuffer[i] = (char) tolower(gbuffer[i]);
- } else {
- debug1(printf("*"));
- }
+ for (i = start; i < end; i++) {
+ if (gbuffer[i] == 'A' && query[i] == 'G') {
+ debug1(printf("."));
+ gbuffer[i] = '.';
+ } else if (query[i] != gbuffer[i]) {
+ debug1(printf("x"));
+ assert(gbuffer[i] != OUTOFBOUNDS);
+ gbuffer[i] = (char) tolower(gbuffer[i]);
+ } else {
+ debug1(printf("*"));
}
}
}
@@ -1465,8 +1366,7 @@ mark_mismatches_atoi_gsnap (char *gbuffer, char *query, int start, int end,
static void
-mark_mismatches_atoi_sam (char *gbuffer, char *query, int start, int end,
- int genestrand, bool first_read_p) {
+mark_mismatches_atoi_sam (char *gbuffer, char *query, int start, int end, int genestrand) {
int i;
debug1(printf("query: %s\n",query));
@@ -1474,61 +1374,27 @@ mark_mismatches_atoi_sam (char *gbuffer, char *query, int start, int end,
debug1(printf("count: "));
if (genestrand == +2) {
- if (first_read_p == false) {
- for (i = start; i < end; i++) {
- if (gbuffer[i] == 'A' && query[i] == 'G') {
- debug1(printf("."));
+ if (gbuffer[i] == 'T' && query[i] == 'C') {
+ debug1(printf("."));
#if 0
- /* Want to show mismatches */
- gbuffer[i] = 'G'; /* Avoids showing mismatches in MD and NM strings */
+ /* Want to show mismatches */
+ gbuffer[i] = 'C'; /* Avoids showing mismatches in MD and NM strings */
#endif
- } else if (query[i] != gbuffer[i]) {
- debug1(printf("x"));
- assert(gbuffer[i] != OUTOFBOUNDS);
- gbuffer[i] = (char) tolower(gbuffer[i]);
- } else {
- debug1(printf("*"));
- }
- }
+ } else if (query[i] != gbuffer[i]) {
+ debug1(printf("x"));
+ assert(gbuffer[i] != OUTOFBOUNDS);
+ gbuffer[i] = (char) tolower(gbuffer[i]);
} else {
- if (gbuffer[i] == 'T' && query[i] == 'C') {
- debug1(printf("."));
-#if 0
- /* Want to show mismatches */
- gbuffer[i] = 'C'; /* Avoids showing mismatches in MD and NM strings */
-#endif
- } else if (query[i] != gbuffer[i]) {
- debug1(printf("x"));
- assert(gbuffer[i] != OUTOFBOUNDS);
- gbuffer[i] = (char) tolower(gbuffer[i]);
- } else {
- debug1(printf("*"));
- }
+ debug1(printf("*"));
}
} else {
- if (first_read_p == true) {
- for (i = start; i < end; i++) {
- if (gbuffer[i] == 'A' && query[i] == 'G') {
- debug1(printf("."));
-#if 0
- /* Want to show mismatches */
- gbuffer[i] = 'G'; /* Avoids showing mismatches in MD and NM strings */
-#endif
- } else if (query[i] != gbuffer[i]) {
- debug1(printf("x"));
- assert(gbuffer[i] != OUTOFBOUNDS);
- gbuffer[i] = (char) tolower(gbuffer[i]);
- } else {
- debug1(printf("*"));
- }
- }
- } else {
- if (gbuffer[i] == 'T' && query[i] == 'C') {
+ for (i = start; i < end; i++) {
+ if (gbuffer[i] == 'A' && query[i] == 'G') {
debug1(printf("."));
#if 0
/* Want to show mismatches */
- gbuffer[i] = 'C'; /* Avoids showing mismatches in MD and NM strings */
+ gbuffer[i] = 'G'; /* Avoids showing mismatches in MD and NM strings */
#endif
} else if (query[i] != gbuffer[i]) {
debug1(printf("x"));
@@ -1584,7 +1450,7 @@ Substring_setup (bool print_nsnpdiffs_p_in, bool print_snplabels_p_in,
static char *
embellish_genomic (char *genomic_diff, char *query, int querystart, int queryend, int querylength,
- int alignoffset, int extraleft, int extraright, int genestrand, bool first_read_p) {
+ int alignoffset, int extraleft, int extraright, int genestrand) {
char *result;
int i, j, k;
@@ -1601,9 +1467,9 @@ embellish_genomic (char *genomic_diff, char *query, int querystart, int queryend
if (mode == STANDARD) {
/* Skip */
} else if (mode == CMET_STRANDED || mode == CMET_NONSTRANDED) {
- mark_mismatches_cmet_gsnap(result,query,querystart,queryend,genestrand,first_read_p);
+ mark_mismatches_cmet_gsnap(result,query,querystart,queryend,genestrand);
} else if (mode == ATOI_STRANDED || mode == ATOI_NONSTRANDED) {
- mark_mismatches_atoi_gsnap(result,query,querystart,queryend,genestrand,first_read_p);
+ mark_mismatches_atoi_gsnap(result,query,querystart,queryend,genestrand);
} else {
abort();
}
@@ -1634,7 +1500,7 @@ embellish_genomic (char *genomic_diff, char *query, int querystart, int queryend
static char *
embellish_genomic_sam (char *genomic_diff, char *query, int querystart, int queryend, int querylength,
- int genomiclength, int alignoffset, int genestrand, bool first_read_p) {
+ int genomiclength, int alignoffset, int genestrand) {
char *result;
int i, j, k;
@@ -1646,9 +1512,9 @@ embellish_genomic_sam (char *genomic_diff, char *query, int querystart, int quer
if (mode == STANDARD) {
/* Skip */
} else if (mode == CMET_STRANDED || mode == CMET_NONSTRANDED) {
- mark_mismatches_cmet_sam(result,query,querystart,queryend,genestrand,first_read_p);
+ mark_mismatches_cmet_sam(result,query,querystart,queryend,genestrand);
} else if (mode == ATOI_STRANDED || mode == ATOI_NONSTRANDED) {
- mark_mismatches_atoi_sam(result,query,querystart,queryend,genestrand,first_read_p);
+ mark_mismatches_atoi_sam(result,query,querystart,queryend,genestrand);
} else {
abort();
}
@@ -1977,7 +1843,7 @@ Substring_display_prep (char **deletion, T this, char *query, Compress_T query_c
this->genomic_bothdiff = embellish_genomic(genomic_diff,query,this->querystart_orig,this->queryend_orig,
this->querylength,this->alignoffset,this->extraleft,this->extraright,
- this->genestrand,this->first_read_p);
+ this->genestrand);
if (snps_iit == NULL) {
this->genomic_refdiff = this->genomic_bothdiff;
@@ -1996,14 +1862,14 @@ Substring_display_prep (char **deletion, T this, char *query, Compress_T query_c
if (output_sam_p == false) {
this->genomic_refdiff = embellish_genomic(genomic_diff,query,this->querystart_orig,this->queryend_orig,
this->querylength,this->alignoffset,this->extraleft,this->extraright,
- this->genestrand,this->first_read_p);
+ this->genestrand);
}
}
if (output_sam_p == true) {
this->genomic_refdiff = embellish_genomic_sam(genomic_diff,query,this->querystart_orig,this->queryend_orig,
this->querylength,this->genomiclength,this->alignoffset,
- this->genestrand,this->first_read_p);
+ this->genestrand);
}
if (allocp == true) {
@@ -2033,7 +1899,7 @@ Substring_display_prep (char **deletion, T this, char *query, Compress_T query_c
this->genomic_bothdiff = embellish_genomic(genomic_diff,query,this->querystart_orig,this->queryend_orig,
this->querylength,this->alignoffset,this->extraleft,this->extraright,
- this->genestrand,this->first_read_p);
+ this->genestrand);
if (snps_iit == NULL) {
this->genomic_refdiff = this->genomic_bothdiff;
@@ -2054,14 +1920,14 @@ Substring_display_prep (char **deletion, T this, char *query, Compress_T query_c
if (output_sam_p == false) {
this->genomic_refdiff = embellish_genomic(genomic_diff,query,this->querystart_orig,this->queryend_orig,
this->querylength,this->alignoffset,this->extraleft,this->extraright,
- this->genestrand,this->first_read_p);
+ this->genestrand);
}
}
if (output_sam_p == true) {
this->genomic_refdiff = embellish_genomic_sam(genomic_diff,query,this->querystart_orig,this->queryend_orig,
this->querylength,this->genomiclength,this->alignoffset,
- this->genestrand,this->first_read_p);
+ this->genestrand);
}
if (allocp == true) {
diff --git a/src/uint8list.c b/src/uint8list.c
index 66e8a1a..ce48b35 100644
--- a/src/uint8list.c
+++ b/src/uint8list.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: uint8list.c 136085 2014-05-13 23:00:04Z twu $";
+static char rcsid[] = "$Id: uint8list.c 140368 2014-07-02 00:56:33Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -123,6 +123,20 @@ Uint8list_to_array_out (int *n, T list) {
}
T
+Uint8list_from_array (UINT8 *array, int n) {
+ T list = NULL, p;
+
+ while (--n >= 0) {
+ p = (T) MALLOC(sizeof(*p));
+ p->first = array[n];
+ p->rest = list;
+ list = p;
+ }
+
+ return list;
+}
+
+T
Uint8list_copy (T list) {
T head, *p = &head;
diff --git a/src/uint8list.h b/src/uint8list.h
index 9f452c9..3b080f9 100644
--- a/src/uint8list.h
+++ b/src/uint8list.h
@@ -1,4 +1,4 @@
-/* $Id: uint8list.h 136085 2014-05-13 23:00:04Z twu $ */
+/* $Id: uint8list.h 140368 2014-07-02 00:56:33Z twu $ */
#ifndef UINT8LIST_INCLUDED
#define UINT8LIST_INCLUDED
#include "types.h"
@@ -27,6 +27,8 @@ extern UINT8 *
Uint8list_to_array (int *n, T list);
extern UINT8 *
Uint8list_to_array_out (int *n, T list);
+extern T
+Uint8list_from_array (UINT8 *array, int n);
extern T
Uint8list_copy (T list);
extern T
diff --git a/src/uintlist.c b/src/uintlist.c
index f0647c7..b198c4a 100644
--- a/src/uintlist.c
+++ b/src/uintlist.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: uintlist.c 136085 2014-05-13 23:00:04Z twu $";
+static char rcsid[] = "$Id: uintlist.c 140368 2014-07-02 00:56:33Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -123,6 +123,20 @@ Uintlist_to_array_out (int *n, T list) {
}
T
+Uintlist_from_array (UINT4 *array, int n) {
+ T list = NULL, p;
+
+ while (--n >= 0) {
+ p = (T) MALLOC(sizeof(*p));
+ p->first = array[n];
+ p->rest = list;
+ list = p;
+ }
+
+ return list;
+}
+
+T
Uintlist_copy (T list) {
T head, *p = &head;
diff --git a/src/uintlist.h b/src/uintlist.h
index 1677bcb..666502c 100644
--- a/src/uintlist.h
+++ b/src/uintlist.h
@@ -1,4 +1,4 @@
-/* $Id: uintlist.h 136085 2014-05-13 23:00:04Z twu $ */
+/* $Id: uintlist.h 140368 2014-07-02 00:56:33Z twu $ */
#ifndef UINTLIST_INCLUDED
#define UINTLIST_INCLUDED
#include "types.h"
@@ -27,6 +27,8 @@ extern UINT4 *
Uintlist_to_array (int *n, T list);
extern UINT4 *
Uintlist_to_array_out (int *n, T list);
+extern T
+Uintlist_from_array (UINT4 *array, int n);
extern T
Uintlist_copy (T list);
extern T
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/gmap.git
More information about the debian-med-commit
mailing list