[med-svn] [gmap] 05/08: Imported Upstream version 2016-05-01
Alex Mestiashvili
malex-guest at moszumanska.debian.org
Fri May 13 17:05:53 UTC 2016
This is an automated email from the git hooks/post-receive script.
malex-guest pushed a commit to branch master
in repository gmap.
commit 04146bd7837057a0a3a042c5e9d6e17b04919498
Author: Alexandre Mestiashvili <alex at biotec.tu-dresden.de>
Date: Fri May 13 18:30:03 2016 +0200
Imported Upstream version 2016-05-01
---
ChangeLog | 34 ++
VERSION | 2 +-
configure | 24 +-
src/Makefile.am | 4 +-
src/Makefile.in | 226 +-----------
src/datadir.c | 35 +-
src/dynprog.c | 12 +-
src/dynprog.h | 13 +-
src/dynprog_genome.c | 118 +++---
src/gmap.c | 23 +-
src/gsnap.c | 14 +-
src/pair.c | 156 ++++----
src/pair.h | 4 +-
src/sarray-read.c | 63 ++--
src/splice.c | 6 +-
src/stage1hr.c | 994 +++++++++++++++++++++++++++++++++------------------
src/stage3.c | 526 +++++++++++++++++----------
src/stage3.h | 27 +-
src/stage3hr.c | 884 ++++++++++++++++++++++++++++++++-------------
src/stage3hr.h | 11 +-
src/translation.c | 198 +++++++++-
src/translation.h | 10 +-
src/uniqscan.c | 3 +-
23 files changed, 2181 insertions(+), 1206 deletions(-)
diff --git a/ChangeLog b/ChangeLog
index dbe0a12..0dc54c8 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,37 @@
+2016-05-01 twu
+
+ * dynprog.c, dynprog.h, dynprog_genome.c, gmap.c, gsnap.c, pair.c, pair.h,
+ sarray-read.c, splice.c, src, stage1hr.c, stage3.c, stage3.h, stage3hr.c,
+ stage3hr.h, trunk, uniqscan.c: Merged revisions 188721 through 188751 from
+ branches/2016-04-29-improve-alignments
+
+ * Makefile.gsnaptoo.am, trunk: Property changes
+
+ * VERSION, config.site.rescomp.prd: Updated version number
+
+ * config.site.rescomp.tst: Added sanitize flag
+
+2016-04-30 twu
+
+ * papers: Removed papers directory from SVN
+
+ * Makefile.gsnaptoo.am, gmap.c, pair.c, src, stage3.c, translation.c,
+ translation.h: Merged revisions 188558 to 188717 from
+ branches/2016-04-27-alt-codons to allow for alternate genetic codes
+
+2016-04-20 twu
+
+ * archive.html, index.html: Updated for latest version
+
+ * stage3.c: Not allowing any ambiguous matches at 3' or 5' ends when
+ trimming
+
+ * datadir.c: Modified comments
+
+ * datadir.c: In find_fileroot, showing preference if <dbroot>.version is
+ found. Otherwise, handling the case where multiple .version files are
+ found.
+
2016-04-04 twu
* archive.html, index.html: Revised for latest version
diff --git a/VERSION b/VERSION
index 0918dc2..35be3e2 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-2016-04-04
\ No newline at end of file
+2016-05-01
\ No newline at end of file
diff --git a/configure b/configure
index 9e5f972..b3227d0 100755
--- a/configure
+++ b/configure
@@ -1,6 +1,6 @@
#! /bin/sh
# Guess values for system-dependent variables and create Makefiles.
-# Generated by GNU Autoconf 2.63 for gmap 2016-04-04.
+# Generated by GNU Autoconf 2.63 for gmap 2016-05-01.
#
# Report bugs to <Thomas Wu <twu at gene.com>>.
#
@@ -745,8 +745,8 @@ SHELL=${CONFIG_SHELL-/bin/sh}
# Identity of this package.
PACKAGE_NAME='gmap'
PACKAGE_TARNAME='gmap'
-PACKAGE_VERSION='2016-04-04'
-PACKAGE_STRING='gmap 2016-04-04'
+PACKAGE_VERSION='2016-05-01'
+PACKAGE_STRING='gmap 2016-05-01'
PACKAGE_BUGREPORT='Thomas Wu <twu at gene.com>'
ac_unique_file="src/gmap.c"
@@ -1518,7 +1518,7 @@ if test "$ac_init_help" = "long"; then
# Omit some internal or obsolete options to make the list less imposing.
# This message is too long to be a string in the A/UX 3.1 sh.
cat <<_ACEOF
-\`configure' configures gmap 2016-04-04 to adapt to many kinds of systems.
+\`configure' configures gmap 2016-05-01 to adapt to many kinds of systems.
Usage: $0 [OPTION]... [VAR=VALUE]...
@@ -1589,7 +1589,7 @@ fi
if test -n "$ac_init_help"; then
case $ac_init_help in
- short | recursive ) echo "Configuration of gmap 2016-04-04:";;
+ short | recursive ) echo "Configuration of gmap 2016-05-01:";;
esac
cat <<\_ACEOF
@@ -1708,7 +1708,7 @@ fi
test -n "$ac_init_help" && exit $ac_status
if $ac_init_version; then
cat <<\_ACEOF
-gmap configure 2016-04-04
+gmap configure 2016-05-01
generated by GNU Autoconf 2.63
Copyright (C) 1992, 1993, 1994, 1995, 1996, 1998, 1999, 2000, 2001,
@@ -1722,7 +1722,7 @@ cat >config.log <<_ACEOF
This file contains any messages produced by compilers while
running configure, to aid debugging if configure makes a mistake.
-It was created by gmap $as_me 2016-04-04, which was
+It was created by gmap $as_me 2016-05-01, which was
generated by GNU Autoconf 2.63. Invocation command line was
$ $0 $@
@@ -2092,8 +2092,8 @@ ac_compiler_gnu=$ac_cv_c_compiler_gnu
{ $as_echo "$as_me:$LINENO: checking package version" >&5
$as_echo_n "checking package version... " >&6; }
-{ $as_echo "$as_me:$LINENO: result: 2016-04-04" >&5
-$as_echo "2016-04-04" >&6; }
+{ $as_echo "$as_me:$LINENO: result: 2016-05-01" >&5
+$as_echo "2016-05-01" >&6; }
### Read defaults
@@ -4159,7 +4159,7 @@ fi
# Define the identity of the package.
PACKAGE='gmap'
- VERSION='2016-04-04'
+ VERSION='2016-05-01'
cat >>confdefs.h <<_ACEOF
@@ -24542,7 +24542,7 @@ exec 6>&1
# report actual input values of CONFIG_FILES etc. instead of their
# values after options handling.
ac_log="
-This file was extended by gmap $as_me 2016-04-04, which was
+This file was extended by gmap $as_me 2016-05-01, which was
generated by GNU Autoconf 2.63. Invocation command line was
CONFIG_FILES = $CONFIG_FILES
@@ -24605,7 +24605,7 @@ Report bugs to <bug-autoconf at gnu.org>."
_ACEOF
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
ac_cs_version="\\
-gmap config.status 2016-04-04
+gmap config.status 2016-05-01
configured by $0, generated by GNU Autoconf 2.63,
with options \\"`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`\\"
diff --git a/src/Makefile.am b/src/Makefile.am
index 46ac87c..34c8ecf 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -257,7 +257,7 @@ GSNAP_FILES = fopen.h bool.h types.h separator.h comp.h \
intron.c intron.h boyer-moore.c boyer-moore.h changepoint.c changepoint.h pbinom.c pbinom.h \
dynprog.c dynprog.h dynprog_simd.c dynprog_simd.h \
dynprog_single.c dynprog_single.h dynprog_genome.c dynprog_genome.h dynprog_cdna.c dynprog_cdna.h dynprog_end.c dynprog_end.h \
- gbuffer.c gbuffer.h translation.c translation.h \
+ gbuffer.c gbuffer.h \
doublelist.c doublelist.h smooth.c smooth.h \
chimera.c chimera.h sense.h fastlog.h stage3.c stage3.h \
splicestringpool.c splicestringpool.h splicetrie_build.c splicetrie_build.h splicetrie.c splicetrie.h \
@@ -339,7 +339,7 @@ GSNAPL_FILES = fopen.h bool.h types.h separator.h comp.h \
intron.c intron.h boyer-moore.c boyer-moore.h changepoint.c changepoint.h pbinom.c pbinom.h \
dynprog.c dynprog.h dynprog_simd.c dynprog_simd.h \
dynprog_single.c dynprog_single.h dynprog_genome.c dynprog_genome.h dynprog_cdna.c dynprog_cdna.h dynprog_end.c dynprog_end.h \
- gbuffer.c gbuffer.h translation.c translation.h \
+ gbuffer.c gbuffer.h \
doublelist.c doublelist.h smooth.c smooth.h \
chimera.c chimera.h sense.h fastlog.h stage3.c stage3.h \
splicestringpool.c splicestringpool.h splicetrie_build.c splicetrie_build.h splicetrie.c splicetrie.h \
diff --git a/src/Makefile.in b/src/Makefile.in
index 91032a9..bd7bd7b 100644
--- a/src/Makefile.in
+++ b/src/Makefile.in
@@ -1027,7 +1027,6 @@ am__objects_20 = gsnap_avx2-except.$(OBJEXT) \
gsnap_avx2-dynprog_genome.$(OBJEXT) \
gsnap_avx2-dynprog_cdna.$(OBJEXT) \
gsnap_avx2-dynprog_end.$(OBJEXT) gsnap_avx2-gbuffer.$(OBJEXT) \
- gsnap_avx2-translation.$(OBJEXT) \
gsnap_avx2-doublelist.$(OBJEXT) gsnap_avx2-smooth.$(OBJEXT) \
gsnap_avx2-chimera.$(OBJEXT) gsnap_avx2-stage3.$(OBJEXT) \
gsnap_avx2-splicestringpool.$(OBJEXT) \
@@ -1098,7 +1097,6 @@ am__objects_21 = gsnap_nosimd-except.$(OBJEXT) \
gsnap_nosimd-dynprog_cdna.$(OBJEXT) \
gsnap_nosimd-dynprog_end.$(OBJEXT) \
gsnap_nosimd-gbuffer.$(OBJEXT) \
- gsnap_nosimd-translation.$(OBJEXT) \
gsnap_nosimd-doublelist.$(OBJEXT) \
gsnap_nosimd-smooth.$(OBJEXT) gsnap_nosimd-chimera.$(OBJEXT) \
gsnap_nosimd-stage3.$(OBJEXT) \
@@ -1165,7 +1163,6 @@ am__objects_22 = gsnap_sse2-except.$(OBJEXT) \
gsnap_sse2-dynprog_genome.$(OBJEXT) \
gsnap_sse2-dynprog_cdna.$(OBJEXT) \
gsnap_sse2-dynprog_end.$(OBJEXT) gsnap_sse2-gbuffer.$(OBJEXT) \
- gsnap_sse2-translation.$(OBJEXT) \
gsnap_sse2-doublelist.$(OBJEXT) gsnap_sse2-smooth.$(OBJEXT) \
gsnap_sse2-chimera.$(OBJEXT) gsnap_sse2-stage3.$(OBJEXT) \
gsnap_sse2-splicestringpool.$(OBJEXT) \
@@ -1230,10 +1227,9 @@ am__objects_23 = gsnap_sse41-except.$(OBJEXT) \
gsnap_sse41-dynprog_genome.$(OBJEXT) \
gsnap_sse41-dynprog_cdna.$(OBJEXT) \
gsnap_sse41-dynprog_end.$(OBJEXT) \
- gsnap_sse41-gbuffer.$(OBJEXT) \
- gsnap_sse41-translation.$(OBJEXT) \
- gsnap_sse41-doublelist.$(OBJEXT) gsnap_sse41-smooth.$(OBJEXT) \
- gsnap_sse41-chimera.$(OBJEXT) gsnap_sse41-stage3.$(OBJEXT) \
+ gsnap_sse41-gbuffer.$(OBJEXT) gsnap_sse41-doublelist.$(OBJEXT) \
+ gsnap_sse41-smooth.$(OBJEXT) gsnap_sse41-chimera.$(OBJEXT) \
+ gsnap_sse41-stage3.$(OBJEXT) \
gsnap_sse41-splicestringpool.$(OBJEXT) \
gsnap_sse41-splicetrie_build.$(OBJEXT) \
gsnap_sse41-splicetrie.$(OBJEXT) gsnap_sse41-splice.$(OBJEXT) \
@@ -1297,10 +1293,9 @@ am__objects_24 = gsnap_sse42-except.$(OBJEXT) \
gsnap_sse42-dynprog_genome.$(OBJEXT) \
gsnap_sse42-dynprog_cdna.$(OBJEXT) \
gsnap_sse42-dynprog_end.$(OBJEXT) \
- gsnap_sse42-gbuffer.$(OBJEXT) \
- gsnap_sse42-translation.$(OBJEXT) \
- gsnap_sse42-doublelist.$(OBJEXT) gsnap_sse42-smooth.$(OBJEXT) \
- gsnap_sse42-chimera.$(OBJEXT) gsnap_sse42-stage3.$(OBJEXT) \
+ gsnap_sse42-gbuffer.$(OBJEXT) gsnap_sse42-doublelist.$(OBJEXT) \
+ gsnap_sse42-smooth.$(OBJEXT) gsnap_sse42-chimera.$(OBJEXT) \
+ gsnap_sse42-stage3.$(OBJEXT) \
gsnap_sse42-splicestringpool.$(OBJEXT) \
gsnap_sse42-splicetrie_build.$(OBJEXT) \
gsnap_sse42-splicetrie.$(OBJEXT) gsnap_sse42-splice.$(OBJEXT) \
@@ -1364,10 +1359,9 @@ am__objects_25 = gsnap_ssse3-except.$(OBJEXT) \
gsnap_ssse3-dynprog_genome.$(OBJEXT) \
gsnap_ssse3-dynprog_cdna.$(OBJEXT) \
gsnap_ssse3-dynprog_end.$(OBJEXT) \
- gsnap_ssse3-gbuffer.$(OBJEXT) \
- gsnap_ssse3-translation.$(OBJEXT) \
- gsnap_ssse3-doublelist.$(OBJEXT) gsnap_ssse3-smooth.$(OBJEXT) \
- gsnap_ssse3-chimera.$(OBJEXT) gsnap_ssse3-stage3.$(OBJEXT) \
+ gsnap_ssse3-gbuffer.$(OBJEXT) gsnap_ssse3-doublelist.$(OBJEXT) \
+ gsnap_ssse3-smooth.$(OBJEXT) gsnap_ssse3-chimera.$(OBJEXT) \
+ gsnap_ssse3-stage3.$(OBJEXT) \
gsnap_ssse3-splicestringpool.$(OBJEXT) \
gsnap_ssse3-splicetrie_build.$(OBJEXT) \
gsnap_ssse3-splicetrie.$(OBJEXT) gsnap_ssse3-splice.$(OBJEXT) \
@@ -1439,10 +1433,9 @@ am__objects_27 = gsnapl_avx2-except.$(OBJEXT) \
gsnapl_avx2-dynprog_genome.$(OBJEXT) \
gsnapl_avx2-dynprog_cdna.$(OBJEXT) \
gsnapl_avx2-dynprog_end.$(OBJEXT) \
- gsnapl_avx2-gbuffer.$(OBJEXT) \
- gsnapl_avx2-translation.$(OBJEXT) \
- gsnapl_avx2-doublelist.$(OBJEXT) gsnapl_avx2-smooth.$(OBJEXT) \
- gsnapl_avx2-chimera.$(OBJEXT) gsnapl_avx2-stage3.$(OBJEXT) \
+ gsnapl_avx2-gbuffer.$(OBJEXT) gsnapl_avx2-doublelist.$(OBJEXT) \
+ gsnapl_avx2-smooth.$(OBJEXT) gsnapl_avx2-chimera.$(OBJEXT) \
+ gsnapl_avx2-stage3.$(OBJEXT) \
gsnapl_avx2-splicestringpool.$(OBJEXT) \
gsnapl_avx2-splicetrie_build.$(OBJEXT) \
gsnapl_avx2-splicetrie.$(OBJEXT) gsnapl_avx2-splice.$(OBJEXT) \
@@ -1515,7 +1508,6 @@ am__objects_28 = gsnapl_nosimd-except.$(OBJEXT) \
gsnapl_nosimd-dynprog_cdna.$(OBJEXT) \
gsnapl_nosimd-dynprog_end.$(OBJEXT) \
gsnapl_nosimd-gbuffer.$(OBJEXT) \
- gsnapl_nosimd-translation.$(OBJEXT) \
gsnapl_nosimd-doublelist.$(OBJEXT) \
gsnapl_nosimd-smooth.$(OBJEXT) gsnapl_nosimd-chimera.$(OBJEXT) \
gsnapl_nosimd-stage3.$(OBJEXT) \
@@ -1585,10 +1577,9 @@ am__objects_29 = gsnapl_sse2-except.$(OBJEXT) \
gsnapl_sse2-dynprog_genome.$(OBJEXT) \
gsnapl_sse2-dynprog_cdna.$(OBJEXT) \
gsnapl_sse2-dynprog_end.$(OBJEXT) \
- gsnapl_sse2-gbuffer.$(OBJEXT) \
- gsnapl_sse2-translation.$(OBJEXT) \
- gsnapl_sse2-doublelist.$(OBJEXT) gsnapl_sse2-smooth.$(OBJEXT) \
- gsnapl_sse2-chimera.$(OBJEXT) gsnapl_sse2-stage3.$(OBJEXT) \
+ gsnapl_sse2-gbuffer.$(OBJEXT) gsnapl_sse2-doublelist.$(OBJEXT) \
+ gsnapl_sse2-smooth.$(OBJEXT) gsnapl_sse2-chimera.$(OBJEXT) \
+ gsnapl_sse2-stage3.$(OBJEXT) \
gsnapl_sse2-splicestringpool.$(OBJEXT) \
gsnapl_sse2-splicetrie_build.$(OBJEXT) \
gsnapl_sse2-splicetrie.$(OBJEXT) gsnapl_sse2-splice.$(OBJEXT) \
@@ -1656,7 +1647,6 @@ am__objects_30 = gsnapl_sse41-except.$(OBJEXT) \
gsnapl_sse41-dynprog_cdna.$(OBJEXT) \
gsnapl_sse41-dynprog_end.$(OBJEXT) \
gsnapl_sse41-gbuffer.$(OBJEXT) \
- gsnapl_sse41-translation.$(OBJEXT) \
gsnapl_sse41-doublelist.$(OBJEXT) \
gsnapl_sse41-smooth.$(OBJEXT) gsnapl_sse41-chimera.$(OBJEXT) \
gsnapl_sse41-stage3.$(OBJEXT) \
@@ -1730,7 +1720,6 @@ am__objects_31 = gsnapl_sse42-except.$(OBJEXT) \
gsnapl_sse42-dynprog_cdna.$(OBJEXT) \
gsnapl_sse42-dynprog_end.$(OBJEXT) \
gsnapl_sse42-gbuffer.$(OBJEXT) \
- gsnapl_sse42-translation.$(OBJEXT) \
gsnapl_sse42-doublelist.$(OBJEXT) \
gsnapl_sse42-smooth.$(OBJEXT) gsnapl_sse42-chimera.$(OBJEXT) \
gsnapl_sse42-stage3.$(OBJEXT) \
@@ -1804,7 +1793,6 @@ am__objects_32 = gsnapl_ssse3-except.$(OBJEXT) \
gsnapl_ssse3-dynprog_cdna.$(OBJEXT) \
gsnapl_ssse3-dynprog_end.$(OBJEXT) \
gsnapl_ssse3-gbuffer.$(OBJEXT) \
- gsnapl_ssse3-translation.$(OBJEXT) \
gsnapl_ssse3-doublelist.$(OBJEXT) \
gsnapl_ssse3-smooth.$(OBJEXT) gsnapl_ssse3-chimera.$(OBJEXT) \
gsnapl_ssse3-stage3.$(OBJEXT) \
@@ -2416,7 +2404,7 @@ GSNAP_FILES = fopen.h bool.h types.h separator.h comp.h \
intron.c intron.h boyer-moore.c boyer-moore.h changepoint.c changepoint.h pbinom.c pbinom.h \
dynprog.c dynprog.h dynprog_simd.c dynprog_simd.h \
dynprog_single.c dynprog_single.h dynprog_genome.c dynprog_genome.h dynprog_cdna.c dynprog_cdna.h dynprog_end.c dynprog_end.h \
- gbuffer.c gbuffer.h translation.c translation.h \
+ gbuffer.c gbuffer.h \
doublelist.c doublelist.h smooth.c smooth.h \
chimera.c chimera.h sense.h fastlog.h stage3.c stage3.h \
splicestringpool.c splicestringpool.h splicetrie_build.c splicetrie_build.h splicetrie.c splicetrie.h \
@@ -2488,7 +2476,7 @@ GSNAPL_FILES = fopen.h bool.h types.h separator.h comp.h \
intron.c intron.h boyer-moore.c boyer-moore.h changepoint.c changepoint.h pbinom.c pbinom.h \
dynprog.c dynprog.h dynprog_simd.c dynprog_simd.h \
dynprog_single.c dynprog_single.h dynprog_genome.c dynprog_genome.h dynprog_cdna.c dynprog_cdna.h dynprog_end.c dynprog_end.h \
- gbuffer.c gbuffer.h translation.c translation.h \
+ gbuffer.c gbuffer.h \
doublelist.c doublelist.h smooth.c smooth.h \
chimera.c chimera.h sense.h fastlog.h stage3.c stage3.h \
splicestringpool.c splicestringpool.h splicetrie_build.c splicetrie_build.h splicetrie.c splicetrie.h \
@@ -4297,7 +4285,6 @@ distclean-compile:
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_avx2-stage3hr.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_avx2-stopwatch.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_avx2-substring.Po at am__quote@
- at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_avx2-translation.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_avx2-uintlist.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_avx2-univdiag.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_avx2-univinterval.Po at am__quote@
@@ -4387,7 +4374,6 @@ distclean-compile:
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_nosimd-stage3hr.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_nosimd-stopwatch.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_nosimd-substring.Po at am__quote@
- at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_nosimd-translation.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_nosimd-uintlist.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_nosimd-univdiag.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_nosimd-univinterval.Po at am__quote@
@@ -4477,7 +4463,6 @@ distclean-compile:
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_sse2-stage3hr.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_sse2-stopwatch.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_sse2-substring.Po at am__quote@
- at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_sse2-translation.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_sse2-uintlist.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_sse2-univdiag.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_sse2-univinterval.Po at am__quote@
@@ -4567,7 +4552,6 @@ distclean-compile:
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_sse41-stage3hr.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_sse41-stopwatch.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_sse41-substring.Po at am__quote@
- at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_sse41-translation.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_sse41-uintlist.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_sse41-univdiag.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_sse41-univinterval.Po at am__quote@
@@ -4657,7 +4641,6 @@ distclean-compile:
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_sse42-stage3hr.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_sse42-stopwatch.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_sse42-substring.Po at am__quote@
- at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_sse42-translation.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_sse42-uintlist.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_sse42-univdiag.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_sse42-univinterval.Po at am__quote@
@@ -4747,7 +4730,6 @@ distclean-compile:
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_ssse3-stage3hr.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_ssse3-stopwatch.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_ssse3-substring.Po at am__quote@
- at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_ssse3-translation.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_ssse3-uintlist.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_ssse3-univdiag.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_ssse3-univinterval.Po at am__quote@
@@ -4837,7 +4819,6 @@ distclean-compile:
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_avx2-stage3hr.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_avx2-stopwatch.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_avx2-substring.Po at am__quote@
- at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_avx2-translation.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_avx2-uint8list.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_avx2-uintlist.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_avx2-univinterval.Po at am__quote@
@@ -4925,7 +4906,6 @@ distclean-compile:
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_nosimd-stage3hr.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_nosimd-stopwatch.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_nosimd-substring.Po at am__quote@
- at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_nosimd-translation.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_nosimd-uint8list.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_nosimd-uintlist.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_nosimd-univinterval.Po at am__quote@
@@ -5013,7 +4993,6 @@ distclean-compile:
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_sse2-stage3hr.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_sse2-stopwatch.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_sse2-substring.Po at am__quote@
- at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_sse2-translation.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_sse2-uint8list.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_sse2-uintlist.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_sse2-univinterval.Po at am__quote@
@@ -5101,7 +5080,6 @@ distclean-compile:
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_sse41-stage3hr.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_sse41-stopwatch.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_sse41-substring.Po at am__quote@
- at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_sse41-translation.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_sse41-uint8list.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_sse41-uintlist.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_sse41-univinterval.Po at am__quote@
@@ -5189,7 +5167,6 @@ distclean-compile:
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_sse42-stage3hr.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_sse42-stopwatch.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_sse42-substring.Po at am__quote@
- at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_sse42-translation.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_sse42-uint8list.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_sse42-uintlist.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_sse42-univinterval.Po at am__quote@
@@ -5277,7 +5254,6 @@ distclean-compile:
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_ssse3-stage3hr.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_ssse3-stopwatch.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_ssse3-substring.Po at am__quote@
- at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_ssse3-translation.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_ssse3-uint8list.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_ssse3-uintlist.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_ssse3-univinterval.Po at am__quote@
@@ -23062,20 +23038,6 @@ gsnap_avx2-gbuffer.obj: gbuffer.c
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx2_CFLAGS) $(CFLAGS) -c -o gsnap_avx2-gbuffer.obj `if test -f 'gbuffer.c'; then $(CYGPATH_W) 'gbuffer.c'; else $(CYGPATH_W) '$(srcdir)/gbuffer.c'; fi`
-gsnap_avx2-translation.o: translation.c
- at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx2_CFLAGS) $(CFLAGS) -MT gsnap_avx2-translation.o -MD -MP -MF $(DEPDIR)/gsnap_avx2-translation.Tpo -c -o gsnap_avx2-translation.o `test -f 'translation.c' || echo '$(srcdir)/'`translation.c
- at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/gsnap_avx2-translation.Tpo $(DEPDIR)/gsnap_avx2-translation.Po
- at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='translation.c' object='gsnap_avx2-translation.o' libtool=no @AMDEPBACKSLASH@
- at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
- at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx2_CFLAGS) $(CFLAGS) -c -o gsnap_avx2-translation.o `test -f 'translation.c' || echo '$(srcdir)/'`translation.c
-
-gsnap_avx2-translation.obj: translation.c
- at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx2_CFLAGS) $(CFLAGS) -MT gsnap_avx2-translation.obj -MD -MP -MF $(DEPDIR)/gsnap_avx2-translation.Tpo -c -o gsnap_avx2-translation.obj `if test -f 'translation.c'; then $(CYGPATH_W) 'translation.c'; else $(CYGPATH_W) '$(srcdir)/translation.c'; fi`
- at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/gsnap_avx2-translation.Tpo $(DEPDIR)/gsnap_avx2-translation.Po
- at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='translation.c' object='gsnap_avx2-translation.obj' libtool=no @AMDEPBACKSLASH@
- at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
- at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx2_CFLAGS) $(CFLAGS) -c -o gsnap_avx2-translation.obj `if test -f 'translation.c'; then $(CYGPATH_W) 'translation.c'; else $(CYGPATH_W) '$(srcdir)/translation.c'; fi`
-
gsnap_avx2-doublelist.o: doublelist.c
@am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx2_CFLAGS) $(CFLAGS) -MT gsnap_avx2-doublelist.o -MD -MP -MF $(DEPDIR)/gsnap_avx2-doublelist.Tpo -c -o gsnap_avx2-doublelist.o `test -f 'doublelist.c' || echo '$(srcdir)/'`doublelist.c
@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/gsnap_avx2-doublelist.Tpo $(DEPDIR)/gsnap_avx2-doublelist.Po
@@ -24322,20 +24284,6 @@ gsnap_nosimd-gbuffer.obj: gbuffer.c
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_nosimd_CFLAGS) $(CFLAGS) -c -o gsnap_nosimd-gbuffer.obj `if test -f 'gbuffer.c'; then $(CYGPATH_W) 'gbuffer.c'; else $(CYGPATH_W) '$(srcdir)/gbuffer.c'; fi`
-gsnap_nosimd-translation.o: translation.c
- at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_nosimd_CFLAGS) $(CFLAGS) -MT gsnap_nosimd-translation.o -MD -MP -MF $(DEPDIR)/gsnap_nosimd-translation.Tpo -c -o gsnap_nosimd-translation.o `test -f 'translation.c' || echo '$(srcdir)/'`translation.c
- at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/gsnap_nosimd-translation.Tpo $(DEPDIR)/gsnap_nosimd-translation.Po
- at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='translation.c' object='gsnap_nosimd-translation.o' libtool=no @AMDEPBACKSLASH@
- at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
- at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_nosimd_CFLAGS) $(CFLAGS) -c -o gsnap_nosimd-translation.o `test -f 'translation.c' || echo '$(srcdir)/'`translation.c
-
-gsnap_nosimd-translation.obj: translation.c
- at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_nosimd_CFLAGS) $(CFLAGS) -MT gsnap_nosimd-translation.obj -MD -MP -MF $(DEPDIR)/gsnap_nosimd-translation.Tpo -c -o gsnap_nosimd-translation.obj `if test -f 'translation.c'; then $(CYGPATH_W) 'translation.c'; else $(CYGPATH_W) '$(srcdir)/translation.c'; fi`
- at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/gsnap_nosimd-translation.Tpo $(DEPDIR)/gsnap_nosimd-translation.Po
- at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='translation.c' object='gsnap_nosimd-translation.obj' libtool=no @AMDEPBACKSLASH@
- at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
- at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_nosimd_CFLAGS) $(CFLAGS) -c -o gsnap_nosimd-translation.obj `if test -f 'translation.c'; then $(CYGPATH_W) 'translation.c'; else $(CYGPATH_W) '$(srcdir)/translation.c'; fi`
-
gsnap_nosimd-doublelist.o: doublelist.c
@am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_nosimd_CFLAGS) $(CFLAGS) -MT gsnap_nosimd-doublelist.o -MD -MP -MF $(DEPDIR)/gsnap_nosimd-doublelist.Tpo -c -o gsnap_nosimd-doublelist.o `test -f 'doublelist.c' || echo '$(srcdir)/'`doublelist.c
@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/gsnap_nosimd-doublelist.Tpo $(DEPDIR)/gsnap_nosimd-doublelist.Po
@@ -25582,20 +25530,6 @@ gsnap_sse2-gbuffer.obj: gbuffer.c
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_sse2_CFLAGS) $(CFLAGS) -c -o gsnap_sse2-gbuffer.obj `if test -f 'gbuffer.c'; then $(CYGPATH_W) 'gbuffer.c'; else $(CYGPATH_W) '$(srcdir)/gbuffer.c'; fi`
-gsnap_sse2-translation.o: translation.c
- at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_sse2_CFLAGS) $(CFLAGS) -MT gsnap_sse2-translation.o -MD -MP -MF $(DEPDIR)/gsnap_sse2-translation.Tpo -c -o gsnap_sse2-translation.o `test -f 'translation.c' || echo '$(srcdir)/'`translation.c
- at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/gsnap_sse2-translation.Tpo $(DEPDIR)/gsnap_sse2-translation.Po
- at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='translation.c' object='gsnap_sse2-translation.o' libtool=no @AMDEPBACKSLASH@
- at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
- at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_sse2_CFLAGS) $(CFLAGS) -c -o gsnap_sse2-translation.o `test -f 'translation.c' || echo '$(srcdir)/'`translation.c
-
-gsnap_sse2-translation.obj: translation.c
- at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_sse2_CFLAGS) $(CFLAGS) -MT gsnap_sse2-translation.obj -MD -MP -MF $(DEPDIR)/gsnap_sse2-translation.Tpo -c -o gsnap_sse2-translation.obj `if test -f 'translation.c'; then $(CYGPATH_W) 'translation.c'; else $(CYGPATH_W) '$(srcdir)/translation.c'; fi`
- at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/gsnap_sse2-translation.Tpo $(DEPDIR)/gsnap_sse2-translation.Po
- at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='translation.c' object='gsnap_sse2-translation.obj' libtool=no @AMDEPBACKSLASH@
- at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
- at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_sse2_CFLAGS) $(CFLAGS) -c -o gsnap_sse2-translation.obj `if test -f 'translation.c'; then $(CYGPATH_W) 'translation.c'; else $(CYGPATH_W) '$(srcdir)/translation.c'; fi`
-
gsnap_sse2-doublelist.o: doublelist.c
@am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_sse2_CFLAGS) $(CFLAGS) -MT gsnap_sse2-doublelist.o -MD -MP -MF $(DEPDIR)/gsnap_sse2-doublelist.Tpo -c -o gsnap_sse2-doublelist.o `test -f 'doublelist.c' || echo '$(srcdir)/'`doublelist.c
@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/gsnap_sse2-doublelist.Tpo $(DEPDIR)/gsnap_sse2-doublelist.Po
@@ -26842,20 +26776,6 @@ gsnap_sse41-gbuffer.obj: gbuffer.c
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_sse41_CFLAGS) $(CFLAGS) -c -o gsnap_sse41-gbuffer.obj `if test -f 'gbuffer.c'; then $(CYGPATH_W) 'gbuffer.c'; else $(CYGPATH_W) '$(srcdir)/gbuffer.c'; fi`
-gsnap_sse41-translation.o: translation.c
- at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_sse41_CFLAGS) $(CFLAGS) -MT gsnap_sse41-translation.o -MD -MP -MF $(DEPDIR)/gsnap_sse41-translation.Tpo -c -o gsnap_sse41-translation.o `test -f 'translation.c' || echo '$(srcdir)/'`translation.c
- at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/gsnap_sse41-translation.Tpo $(DEPDIR)/gsnap_sse41-translation.Po
- at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='translation.c' object='gsnap_sse41-translation.o' libtool=no @AMDEPBACKSLASH@
- at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
- at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_sse41_CFLAGS) $(CFLAGS) -c -o gsnap_sse41-translation.o `test -f 'translation.c' || echo '$(srcdir)/'`translation.c
-
-gsnap_sse41-translation.obj: translation.c
- at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_sse41_CFLAGS) $(CFLAGS) -MT gsnap_sse41-translation.obj -MD -MP -MF $(DEPDIR)/gsnap_sse41-translation.Tpo -c -o gsnap_sse41-translation.obj `if test -f 'translation.c'; then $(CYGPATH_W) 'translation.c'; else $(CYGPATH_W) '$(srcdir)/translation.c'; fi`
- at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/gsnap_sse41-translation.Tpo $(DEPDIR)/gsnap_sse41-translation.Po
- at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='translation.c' object='gsnap_sse41-translation.obj' libtool=no @AMDEPBACKSLASH@
- at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
- at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_sse41_CFLAGS) $(CFLAGS) -c -o gsnap_sse41-translation.obj `if test -f 'translation.c'; then $(CYGPATH_W) 'translation.c'; else $(CYGPATH_W) '$(srcdir)/translation.c'; fi`
-
gsnap_sse41-doublelist.o: doublelist.c
@am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_sse41_CFLAGS) $(CFLAGS) -MT gsnap_sse41-doublelist.o -MD -MP -MF $(DEPDIR)/gsnap_sse41-doublelist.Tpo -c -o gsnap_sse41-doublelist.o `test -f 'doublelist.c' || echo '$(srcdir)/'`doublelist.c
@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/gsnap_sse41-doublelist.Tpo $(DEPDIR)/gsnap_sse41-doublelist.Po
@@ -28102,20 +28022,6 @@ gsnap_sse42-gbuffer.obj: gbuffer.c
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_sse42_CFLAGS) $(CFLAGS) -c -o gsnap_sse42-gbuffer.obj `if test -f 'gbuffer.c'; then $(CYGPATH_W) 'gbuffer.c'; else $(CYGPATH_W) '$(srcdir)/gbuffer.c'; fi`
-gsnap_sse42-translation.o: translation.c
- at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_sse42_CFLAGS) $(CFLAGS) -MT gsnap_sse42-translation.o -MD -MP -MF $(DEPDIR)/gsnap_sse42-translation.Tpo -c -o gsnap_sse42-translation.o `test -f 'translation.c' || echo '$(srcdir)/'`translation.c
- at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/gsnap_sse42-translation.Tpo $(DEPDIR)/gsnap_sse42-translation.Po
- at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='translation.c' object='gsnap_sse42-translation.o' libtool=no @AMDEPBACKSLASH@
- at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
- at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_sse42_CFLAGS) $(CFLAGS) -c -o gsnap_sse42-translation.o `test -f 'translation.c' || echo '$(srcdir)/'`translation.c
-
-gsnap_sse42-translation.obj: translation.c
- at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_sse42_CFLAGS) $(CFLAGS) -MT gsnap_sse42-translation.obj -MD -MP -MF $(DEPDIR)/gsnap_sse42-translation.Tpo -c -o gsnap_sse42-translation.obj `if test -f 'translation.c'; then $(CYGPATH_W) 'translation.c'; else $(CYGPATH_W) '$(srcdir)/translation.c'; fi`
- at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/gsnap_sse42-translation.Tpo $(DEPDIR)/gsnap_sse42-translation.Po
- at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='translation.c' object='gsnap_sse42-translation.obj' libtool=no @AMDEPBACKSLASH@
- at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
- at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_sse42_CFLAGS) $(CFLAGS) -c -o gsnap_sse42-translation.obj `if test -f 'translation.c'; then $(CYGPATH_W) 'translation.c'; else $(CYGPATH_W) '$(srcdir)/translation.c'; fi`
-
gsnap_sse42-doublelist.o: doublelist.c
@am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_sse42_CFLAGS) $(CFLAGS) -MT gsnap_sse42-doublelist.o -MD -MP -MF $(DEPDIR)/gsnap_sse42-doublelist.Tpo -c -o gsnap_sse42-doublelist.o `test -f 'doublelist.c' || echo '$(srcdir)/'`doublelist.c
@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/gsnap_sse42-doublelist.Tpo $(DEPDIR)/gsnap_sse42-doublelist.Po
@@ -29362,20 +29268,6 @@ gsnap_ssse3-gbuffer.obj: gbuffer.c
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_ssse3_CFLAGS) $(CFLAGS) -c -o gsnap_ssse3-gbuffer.obj `if test -f 'gbuffer.c'; then $(CYGPATH_W) 'gbuffer.c'; else $(CYGPATH_W) '$(srcdir)/gbuffer.c'; fi`
-gsnap_ssse3-translation.o: translation.c
- at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_ssse3_CFLAGS) $(CFLAGS) -MT gsnap_ssse3-translation.o -MD -MP -MF $(DEPDIR)/gsnap_ssse3-translation.Tpo -c -o gsnap_ssse3-translation.o `test -f 'translation.c' || echo '$(srcdir)/'`translation.c
- at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/gsnap_ssse3-translation.Tpo $(DEPDIR)/gsnap_ssse3-translation.Po
- at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='translation.c' object='gsnap_ssse3-translation.o' libtool=no @AMDEPBACKSLASH@
- at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
- at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_ssse3_CFLAGS) $(CFLAGS) -c -o gsnap_ssse3-translation.o `test -f 'translation.c' || echo '$(srcdir)/'`translation.c
-
-gsnap_ssse3-translation.obj: translation.c
- at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_ssse3_CFLAGS) $(CFLAGS) -MT gsnap_ssse3-translation.obj -MD -MP -MF $(DEPDIR)/gsnap_ssse3-translation.Tpo -c -o gsnap_ssse3-translation.obj `if test -f 'translation.c'; then $(CYGPATH_W) 'translation.c'; else $(CYGPATH_W) '$(srcdir)/translation.c'; fi`
- at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/gsnap_ssse3-translation.Tpo $(DEPDIR)/gsnap_ssse3-translation.Po
- at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='translation.c' object='gsnap_ssse3-translation.obj' libtool=no @AMDEPBACKSLASH@
- at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
- at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_ssse3_CFLAGS) $(CFLAGS) -c -o gsnap_ssse3-translation.obj `if test -f 'translation.c'; then $(CYGPATH_W) 'translation.c'; else $(CYGPATH_W) '$(srcdir)/translation.c'; fi`
-
gsnap_ssse3-doublelist.o: doublelist.c
@am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_ssse3_CFLAGS) $(CFLAGS) -MT gsnap_ssse3-doublelist.o -MD -MP -MF $(DEPDIR)/gsnap_ssse3-doublelist.Tpo -c -o gsnap_ssse3-doublelist.o `test -f 'doublelist.c' || echo '$(srcdir)/'`doublelist.c
@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/gsnap_ssse3-doublelist.Tpo $(DEPDIR)/gsnap_ssse3-doublelist.Po
@@ -30664,20 +30556,6 @@ gsnapl_avx2-gbuffer.obj: gbuffer.c
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx2_CFLAGS) $(CFLAGS) -c -o gsnapl_avx2-gbuffer.obj `if test -f 'gbuffer.c'; then $(CYGPATH_W) 'gbuffer.c'; else $(CYGPATH_W) '$(srcdir)/gbuffer.c'; fi`
-gsnapl_avx2-translation.o: translation.c
- at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx2_CFLAGS) $(CFLAGS) -MT gsnapl_avx2-translation.o -MD -MP -MF $(DEPDIR)/gsnapl_avx2-translation.Tpo -c -o gsnapl_avx2-translation.o `test -f 'translation.c' || echo '$(srcdir)/'`translation.c
- at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/gsnapl_avx2-translation.Tpo $(DEPDIR)/gsnapl_avx2-translation.Po
- at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='translation.c' object='gsnapl_avx2-translation.o' libtool=no @AMDEPBACKSLASH@
- at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
- at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx2_CFLAGS) $(CFLAGS) -c -o gsnapl_avx2-translation.o `test -f 'translation.c' || echo '$(srcdir)/'`translation.c
-
-gsnapl_avx2-translation.obj: translation.c
- at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx2_CFLAGS) $(CFLAGS) -MT gsnapl_avx2-translation.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx2-translation.Tpo -c -o gsnapl_avx2-translation.obj `if test -f 'translation.c'; then $(CYGPATH_W) 'translation.c'; else $(CYGPATH_W) '$(srcdir)/translation.c'; fi`
- at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/gsnapl_avx2-translation.Tpo $(DEPDIR)/gsnapl_avx2-translation.Po
- at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='translation.c' object='gsnapl_avx2-translation.obj' libtool=no @AMDEPBACKSLASH@
- at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
- at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx2_CFLAGS) $(CFLAGS) -c -o gsnapl_avx2-translation.obj `if test -f 'translation.c'; then $(CYGPATH_W) 'translation.c'; else $(CYGPATH_W) '$(srcdir)/translation.c'; fi`
-
gsnapl_avx2-doublelist.o: doublelist.c
@am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx2_CFLAGS) $(CFLAGS) -MT gsnapl_avx2-doublelist.o -MD -MP -MF $(DEPDIR)/gsnapl_avx2-doublelist.Tpo -c -o gsnapl_avx2-doublelist.o `test -f 'doublelist.c' || echo '$(srcdir)/'`doublelist.c
@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/gsnapl_avx2-doublelist.Tpo $(DEPDIR)/gsnapl_avx2-doublelist.Po
@@ -31896,20 +31774,6 @@ gsnapl_nosimd-gbuffer.obj: gbuffer.c
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_nosimd_CFLAGS) $(CFLAGS) -c -o gsnapl_nosimd-gbuffer.obj `if test -f 'gbuffer.c'; then $(CYGPATH_W) 'gbuffer.c'; else $(CYGPATH_W) '$(srcdir)/gbuffer.c'; fi`
-gsnapl_nosimd-translation.o: translation.c
- at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_nosimd_CFLAGS) $(CFLAGS) -MT gsnapl_nosimd-translation.o -MD -MP -MF $(DEPDIR)/gsnapl_nosimd-translation.Tpo -c -o gsnapl_nosimd-translation.o `test -f 'translation.c' || echo '$(srcdir)/'`translation.c
- at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/gsnapl_nosimd-translation.Tpo $(DEPDIR)/gsnapl_nosimd-translation.Po
- at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='translation.c' object='gsnapl_nosimd-translation.o' libtool=no @AMDEPBACKSLASH@
- at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
- at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_nosimd_CFLAGS) $(CFLAGS) -c -o gsnapl_nosimd-translation.o `test -f 'translation.c' || echo '$(srcdir)/'`translation.c
-
-gsnapl_nosimd-translation.obj: translation.c
- at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_nosimd_CFLAGS) $(CFLAGS) -MT gsnapl_nosimd-translation.obj -MD -MP -MF $(DEPDIR)/gsnapl_nosimd-translation.Tpo -c -o gsnapl_nosimd-translation.obj `if test -f 'translation.c'; then $(CYGPATH_W) 'translation.c'; else $(CYGPATH_W) '$(srcdir)/translation.c'; fi`
- at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/gsnapl_nosimd-translation.Tpo $(DEPDIR)/gsnapl_nosimd-translation.Po
- at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='translation.c' object='gsnapl_nosimd-translation.obj' libtool=no @AMDEPBACKSLASH@
- at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
- at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_nosimd_CFLAGS) $(CFLAGS) -c -o gsnapl_nosimd-translation.obj `if test -f 'translation.c'; then $(CYGPATH_W) 'translation.c'; else $(CYGPATH_W) '$(srcdir)/translation.c'; fi`
-
gsnapl_nosimd-doublelist.o: doublelist.c
@am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_nosimd_CFLAGS) $(CFLAGS) -MT gsnapl_nosimd-doublelist.o -MD -MP -MF $(DEPDIR)/gsnapl_nosimd-doublelist.Tpo -c -o gsnapl_nosimd-doublelist.o `test -f 'doublelist.c' || echo '$(srcdir)/'`doublelist.c
@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/gsnapl_nosimd-doublelist.Tpo $(DEPDIR)/gsnapl_nosimd-doublelist.Po
@@ -33128,20 +32992,6 @@ gsnapl_sse2-gbuffer.obj: gbuffer.c
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_sse2_CFLAGS) $(CFLAGS) -c -o gsnapl_sse2-gbuffer.obj `if test -f 'gbuffer.c'; then $(CYGPATH_W) 'gbuffer.c'; else $(CYGPATH_W) '$(srcdir)/gbuffer.c'; fi`
-gsnapl_sse2-translation.o: translation.c
- at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_sse2_CFLAGS) $(CFLAGS) -MT gsnapl_sse2-translation.o -MD -MP -MF $(DEPDIR)/gsnapl_sse2-translation.Tpo -c -o gsnapl_sse2-translation.o `test -f 'translation.c' || echo '$(srcdir)/'`translation.c
- at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/gsnapl_sse2-translation.Tpo $(DEPDIR)/gsnapl_sse2-translation.Po
- at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='translation.c' object='gsnapl_sse2-translation.o' libtool=no @AMDEPBACKSLASH@
- at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
- at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_sse2_CFLAGS) $(CFLAGS) -c -o gsnapl_sse2-translation.o `test -f 'translation.c' || echo '$(srcdir)/'`translation.c
-
-gsnapl_sse2-translation.obj: translation.c
- at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_sse2_CFLAGS) $(CFLAGS) -MT gsnapl_sse2-translation.obj -MD -MP -MF $(DEPDIR)/gsnapl_sse2-translation.Tpo -c -o gsnapl_sse2-translation.obj `if test -f 'translation.c'; then $(CYGPATH_W) 'translation.c'; else $(CYGPATH_W) '$(srcdir)/translation.c'; fi`
- at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/gsnapl_sse2-translation.Tpo $(DEPDIR)/gsnapl_sse2-translation.Po
- at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='translation.c' object='gsnapl_sse2-translation.obj' libtool=no @AMDEPBACKSLASH@
- at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
- at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_sse2_CFLAGS) $(CFLAGS) -c -o gsnapl_sse2-translation.obj `if test -f 'translation.c'; then $(CYGPATH_W) 'translation.c'; else $(CYGPATH_W) '$(srcdir)/translation.c'; fi`
-
gsnapl_sse2-doublelist.o: doublelist.c
@am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_sse2_CFLAGS) $(CFLAGS) -MT gsnapl_sse2-doublelist.o -MD -MP -MF $(DEPDIR)/gsnapl_sse2-doublelist.Tpo -c -o gsnapl_sse2-doublelist.o `test -f 'doublelist.c' || echo '$(srcdir)/'`doublelist.c
@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/gsnapl_sse2-doublelist.Tpo $(DEPDIR)/gsnapl_sse2-doublelist.Po
@@ -34360,20 +34210,6 @@ gsnapl_sse41-gbuffer.obj: gbuffer.c
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_sse41_CFLAGS) $(CFLAGS) -c -o gsnapl_sse41-gbuffer.obj `if test -f 'gbuffer.c'; then $(CYGPATH_W) 'gbuffer.c'; else $(CYGPATH_W) '$(srcdir)/gbuffer.c'; fi`
-gsnapl_sse41-translation.o: translation.c
- at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_sse41_CFLAGS) $(CFLAGS) -MT gsnapl_sse41-translation.o -MD -MP -MF $(DEPDIR)/gsnapl_sse41-translation.Tpo -c -o gsnapl_sse41-translation.o `test -f 'translation.c' || echo '$(srcdir)/'`translation.c
- at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/gsnapl_sse41-translation.Tpo $(DEPDIR)/gsnapl_sse41-translation.Po
- at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='translation.c' object='gsnapl_sse41-translation.o' libtool=no @AMDEPBACKSLASH@
- at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
- at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_sse41_CFLAGS) $(CFLAGS) -c -o gsnapl_sse41-translation.o `test -f 'translation.c' || echo '$(srcdir)/'`translation.c
-
-gsnapl_sse41-translation.obj: translation.c
- at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_sse41_CFLAGS) $(CFLAGS) -MT gsnapl_sse41-translation.obj -MD -MP -MF $(DEPDIR)/gsnapl_sse41-translation.Tpo -c -o gsnapl_sse41-translation.obj `if test -f 'translation.c'; then $(CYGPATH_W) 'translation.c'; else $(CYGPATH_W) '$(srcdir)/translation.c'; fi`
- at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/gsnapl_sse41-translation.Tpo $(DEPDIR)/gsnapl_sse41-translation.Po
- at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='translation.c' object='gsnapl_sse41-translation.obj' libtool=no @AMDEPBACKSLASH@
- at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
- at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_sse41_CFLAGS) $(CFLAGS) -c -o gsnapl_sse41-translation.obj `if test -f 'translation.c'; then $(CYGPATH_W) 'translation.c'; else $(CYGPATH_W) '$(srcdir)/translation.c'; fi`
-
gsnapl_sse41-doublelist.o: doublelist.c
@am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_sse41_CFLAGS) $(CFLAGS) -MT gsnapl_sse41-doublelist.o -MD -MP -MF $(DEPDIR)/gsnapl_sse41-doublelist.Tpo -c -o gsnapl_sse41-doublelist.o `test -f 'doublelist.c' || echo '$(srcdir)/'`doublelist.c
@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/gsnapl_sse41-doublelist.Tpo $(DEPDIR)/gsnapl_sse41-doublelist.Po
@@ -35592,20 +35428,6 @@ gsnapl_sse42-gbuffer.obj: gbuffer.c
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_sse42_CFLAGS) $(CFLAGS) -c -o gsnapl_sse42-gbuffer.obj `if test -f 'gbuffer.c'; then $(CYGPATH_W) 'gbuffer.c'; else $(CYGPATH_W) '$(srcdir)/gbuffer.c'; fi`
-gsnapl_sse42-translation.o: translation.c
- at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_sse42_CFLAGS) $(CFLAGS) -MT gsnapl_sse42-translation.o -MD -MP -MF $(DEPDIR)/gsnapl_sse42-translation.Tpo -c -o gsnapl_sse42-translation.o `test -f 'translation.c' || echo '$(srcdir)/'`translation.c
- at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/gsnapl_sse42-translation.Tpo $(DEPDIR)/gsnapl_sse42-translation.Po
- at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='translation.c' object='gsnapl_sse42-translation.o' libtool=no @AMDEPBACKSLASH@
- at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
- at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_sse42_CFLAGS) $(CFLAGS) -c -o gsnapl_sse42-translation.o `test -f 'translation.c' || echo '$(srcdir)/'`translation.c
-
-gsnapl_sse42-translation.obj: translation.c
- at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_sse42_CFLAGS) $(CFLAGS) -MT gsnapl_sse42-translation.obj -MD -MP -MF $(DEPDIR)/gsnapl_sse42-translation.Tpo -c -o gsnapl_sse42-translation.obj `if test -f 'translation.c'; then $(CYGPATH_W) 'translation.c'; else $(CYGPATH_W) '$(srcdir)/translation.c'; fi`
- at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/gsnapl_sse42-translation.Tpo $(DEPDIR)/gsnapl_sse42-translation.Po
- at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='translation.c' object='gsnapl_sse42-translation.obj' libtool=no @AMDEPBACKSLASH@
- at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
- at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_sse42_CFLAGS) $(CFLAGS) -c -o gsnapl_sse42-translation.obj `if test -f 'translation.c'; then $(CYGPATH_W) 'translation.c'; else $(CYGPATH_W) '$(srcdir)/translation.c'; fi`
-
gsnapl_sse42-doublelist.o: doublelist.c
@am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_sse42_CFLAGS) $(CFLAGS) -MT gsnapl_sse42-doublelist.o -MD -MP -MF $(DEPDIR)/gsnapl_sse42-doublelist.Tpo -c -o gsnapl_sse42-doublelist.o `test -f 'doublelist.c' || echo '$(srcdir)/'`doublelist.c
@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/gsnapl_sse42-doublelist.Tpo $(DEPDIR)/gsnapl_sse42-doublelist.Po
@@ -36824,20 +36646,6 @@ gsnapl_ssse3-gbuffer.obj: gbuffer.c
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_ssse3_CFLAGS) $(CFLAGS) -c -o gsnapl_ssse3-gbuffer.obj `if test -f 'gbuffer.c'; then $(CYGPATH_W) 'gbuffer.c'; else $(CYGPATH_W) '$(srcdir)/gbuffer.c'; fi`
-gsnapl_ssse3-translation.o: translation.c
- at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_ssse3_CFLAGS) $(CFLAGS) -MT gsnapl_ssse3-translation.o -MD -MP -MF $(DEPDIR)/gsnapl_ssse3-translation.Tpo -c -o gsnapl_ssse3-translation.o `test -f 'translation.c' || echo '$(srcdir)/'`translation.c
- at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/gsnapl_ssse3-translation.Tpo $(DEPDIR)/gsnapl_ssse3-translation.Po
- at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='translation.c' object='gsnapl_ssse3-translation.o' libtool=no @AMDEPBACKSLASH@
- at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
- at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_ssse3_CFLAGS) $(CFLAGS) -c -o gsnapl_ssse3-translation.o `test -f 'translation.c' || echo '$(srcdir)/'`translation.c
-
-gsnapl_ssse3-translation.obj: translation.c
- at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_ssse3_CFLAGS) $(CFLAGS) -MT gsnapl_ssse3-translation.obj -MD -MP -MF $(DEPDIR)/gsnapl_ssse3-translation.Tpo -c -o gsnapl_ssse3-translation.obj `if test -f 'translation.c'; then $(CYGPATH_W) 'translation.c'; else $(CYGPATH_W) '$(srcdir)/translation.c'; fi`
- at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/gsnapl_ssse3-translation.Tpo $(DEPDIR)/gsnapl_ssse3-translation.Po
- at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='translation.c' object='gsnapl_ssse3-translation.obj' libtool=no @AMDEPBACKSLASH@
- at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
- at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_ssse3_CFLAGS) $(CFLAGS) -c -o gsnapl_ssse3-translation.obj `if test -f 'translation.c'; then $(CYGPATH_W) 'translation.c'; else $(CYGPATH_W) '$(srcdir)/translation.c'; fi`
-
gsnapl_ssse3-doublelist.o: doublelist.c
@am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_ssse3_CFLAGS) $(CFLAGS) -MT gsnapl_ssse3-doublelist.o -MD -MP -MF $(DEPDIR)/gsnapl_ssse3-doublelist.Tpo -c -o gsnapl_ssse3-doublelist.o `test -f 'doublelist.c' || echo '$(srcdir)/'`doublelist.c
@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/gsnapl_ssse3-doublelist.Tpo $(DEPDIR)/gsnapl_ssse3-doublelist.Po
diff --git a/src/datadir.c b/src/datadir.c
index 579f2d1..1d461c1 100644
--- a/src/datadir.c
+++ b/src/datadir.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: datadir.c 175304 2015-09-24 17:47:33Z twu $";
+static char rcsid[] = "$Id: datadir.c 188038 2016-04-20 17:47:47Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -77,9 +77,10 @@ find_homedir_config () {
static char *
find_fileroot (char *genomesubdir, char *genomedir, char *dbroot) {
- char *fileroot, *filename, *p;
+ char *fileroot = NULL, *filename, *p;
struct dirent *entry;
DIR *dp;
+ int nchoices = 0;
if ((dp = opendir(genomesubdir)) == NULL) {
/* Problem found. Try to diagnose */
@@ -96,27 +97,45 @@ find_fileroot (char *genomesubdir, char *genomedir, char *dbroot) {
exit(9);
}
}
+
+ /* Determine the number of .version files */
while ((entry = readdir(dp)) != NULL) {
filename = entry->d_name;
if ((p = rindex(filename,'.')) != NULL) {
if (!strcmp(p,".version")) {
+ FREE(fileroot);
fileroot = (char *) CALLOC(p - &(filename[0]) + 1,sizeof(char));
strncpy(fileroot,filename,p-&(filename[0]));
- if (closedir(dp) < 0) {
- fprintf(stderr,"Unable to close directory %s\n",genomesubdir);
+ if (!strcmp(fileroot,dbroot)) {
+ /* However, if dbroot exists, just take that */
+ if (closedir(dp) < 0) {
+ fprintf(stderr,"Unable to close directory %s\n",genomesubdir);
+ }
+ return fileroot;
+
+ } else {
+ nchoices += 1;
}
- return fileroot;
}
}
}
+ /* Did not find dbroot */
if (closedir(dp) < 0) {
fprintf(stderr,"Unable to close directory %s\n",genomesubdir);
}
- fprintf(stderr,"Unable to find file ending with .version in directory %s\n",genomesubdir);
- exit(9);
-}
+ if (nchoices == 1) {
+ return fileroot;
+ } else if (nchoices == 0) {
+ fprintf(stderr,"Unable to find file ending with .version in directory %s\n",genomesubdir);
+ exit(9);
+ } else {
+ fprintf(stderr,"Found multiple files (%d) ending with .version in directory %s\n",nchoices,genomesubdir);
+ FREE(fileroot);
+ exit(9);
+ }
+}
diff --git a/src/dynprog.c b/src/dynprog.c
index 48a7dff..21cb6c0 100644
--- a/src/dynprog.c
+++ b/src/dynprog.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: dynprog.c 184440 2016-02-17 21:03:49Z twu $";
+static char rcsid[] = "$Id: dynprog.c 188752 2016-05-01 17:28:22Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -89,16 +89,6 @@ static char rcsid[] = "$Id: dynprog.c 184440 2016-02-17 21:03:49Z twu $";
#define NEG_INFINITY_DISPLAY (-99)
#endif
-#define FULLMATCH 3
-#define HALFMATCH 1
-#define AMBIGUOUS 0
-
-
-/* These values were set to -5, -4, -3, but this led to chopped ends
- in GMAP alignments, and failure to find chimeras */
-#define MISMATCH_HIGHQ -3
-#define MISMATCH_MEDQ -2
-#define MISMATCH_LOWQ -1
/* Previously allowed lower mismatch scores on end to allow more
complete alignments to the end, and because ends are typically of
diff --git a/src/dynprog.h b/src/dynprog.h
index a9be567..3b43485 100644
--- a/src/dynprog.h
+++ b/src/dynprog.h
@@ -1,4 +1,4 @@
-/* $Id: dynprog.h 184430 2016-02-17 19:56:26Z twu $ */
+/* $Id: dynprog.h 188752 2016-05-01 17:28:22Z twu $ */
#ifndef DYNPROG_INCLUDED
#define DYNPROG_INCLUDED
#ifdef HAVE_CONFIG_H
@@ -39,6 +39,17 @@ typedef struct Dynprog_T *Dynprog_T; /* Needed before header files below */
#endif
+#define FULLMATCH 3
+#define HALFMATCH 1
+#define AMBIGUOUS 0
+
+/* These values were set to -5, -4, -3, but this led to chopped ends
+ in GMAP alignments, and failure to find chimeras */
+#define MISMATCH_HIGHQ -3
+#define MISMATCH_MEDQ -2
+#define MISMATCH_LOWQ -1
+
+
typedef enum {HIGHQ, MEDQ, LOWQ, ENDQ} Mismatchtype_T;
#define NMISMATCHTYPES 4
diff --git a/src/dynprog_genome.c b/src/dynprog_genome.c
index e8b6f39..236b633 100644
--- a/src/dynprog_genome.c
+++ b/src/dynprog_genome.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: dynprog_genome.c 184458 2016-02-18 00:06:33Z twu $";
+static char rcsid[] = "$Id: dynprog_genome.c 188752 2016-05-01 17:28:22Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -587,21 +587,25 @@ bridge_intron_gap_8_intron_level (int *bestrL, int *bestrR, int *bestcL, int *be
for (cL = cloL; cL < /* left of main diagonal*/rL; cL++) {
/* The following check limits genomic inserts (horizontal) and
multiple cDNA inserts (vertical). */
- if (left_known[cL] > 0) {
+ if (left_known[cL] > 0 && directionsL_lower_nogap[rL][cL] == DIAG) {
scoreL = (int) matrixL_lower[rL][cL];
+#if 0
if (directionsL_lower_nogap[rL][cL] != DIAG) {
/* Favor gaps away from intron if possible */
- scoreL -= 1;
+ scoreL -= 100;
}
+#endif
/* Disallow leftoffset + cL >= rightoffset - cR, or cR >= rightoffset - leftoffset - cL */
for (cR = cloR; cR < /* left of main diagonal*/rR && cR < rightoffset-leftoffset-cL; cR++) {
- if (right_known[cR] > 0) {
+ if (right_known[cR] > 0 && directionsR_lower_nogap[rR][cR] == DIAG) {
scoreR = (int) matrixR_lower[rR][cR];
+#if 0
if (directionsR_lower_nogap[rR][cR] != DIAG) {
/* Favor gaps away from intron if possible */
- scoreR -= 1;
+ scoreR -= 100;
}
+#endif
if ((score = scoreL + scoreR) > bestscore ||
(score >= bestscore && jump_late_p)) { /* Use >= for jump late */
@@ -638,12 +642,14 @@ bridge_intron_gap_8_intron_level (int *bestrL, int *bestrR, int *bestcL, int *be
}
for (/* at main diagonal*/; cR < chighR && cR < rightoffset-leftoffset-cL; cR++) {
- if (right_known[cR] > 0) {
+ if (right_known[cR] > 0 && directionsR_upper_nogap[cR][rR] == DIAG) {
scoreR = (int) matrixR_upper[cR][rR];
+#if 0
if (directionsR_upper_nogap[cR][rR] != DIAG) {
/* Favor gaps away from intron if possible */
- scoreR -= 1;
+ scoreR -= 100;
}
+#endif
if ((score = scoreL + scoreR) > bestscore ||
(score >= bestscore && jump_late_p)) { /* Use >= for jump late */
@@ -684,21 +690,25 @@ bridge_intron_gap_8_intron_level (int *bestrL, int *bestrR, int *bestcL, int *be
for (/* at main diagonal*/; cL < chighL; cL++) {
/* The following check limits genomic inserts (horizontal) and
multiple cDNA inserts (vertical). */
- if (left_known[cL] > 0) {
+ if (left_known[cL] > 0 && directionsL_upper_nogap[cL][rL] == DIAG) {
scoreL = (int) matrixL_upper[cL][rL];
+#if 0
if (directionsL_upper_nogap[cL][rL] != DIAG) {
/* Favor gaps away from intron if possible */
- scoreL -= 1;
+ scoreL -= 100;
}
+#endif
/* Disallow leftoffset + cL >= rightoffset - cR, or cR >= rightoffset - leftoffset - cL */
for (cR = cloR; cR < /* left of main diagonal*/rR && cR < rightoffset-leftoffset-cL; cR++) {
- if (right_known[cR] > 0) {
+ if (right_known[cR] > 0 && directionsR_lower_nogap[rR][cR] == DIAG) {
scoreR = (int) matrixR_lower[rR][cR];
+#if 0
if (directionsR_lower_nogap[rR][cR] != DIAG) {
/* Favor gaps away from intron if possible */
- scoreR -= 1;
+ scoreR -= 100;
}
+#endif
if ((score = scoreL + scoreR) > bestscore ||
(score >= bestscore && jump_late_p)) { /* Use >= for jump late */
@@ -735,12 +745,14 @@ bridge_intron_gap_8_intron_level (int *bestrL, int *bestrR, int *bestcL, int *be
}
for (/* at main diagonal*/; cR < chighR && cR < rightoffset-leftoffset-cL; cR++) {
- if (right_known[cR] > 0) {
+ if (right_known[cR] > 0 && directionsR_upper_nogap[cR][rR] == DIAG) {
scoreR = (int) matrixR_upper[cR][rR];
+#if 0
if (directionsR_upper_nogap[cR][rR] != DIAG) {
/* Favor gaps away from intron if possible */
- scoreR -= 1;
+ scoreR -= 100;
}
+#endif
if ((score = scoreL + scoreR) > bestscore ||
(score >= bestscore && jump_late_p)) { /* Use >= for jump late */
@@ -1078,7 +1090,7 @@ bridge_intron_gap_8_site_level (int *bestrL, int *bestrR, int *bestcL, int *best
scoreL = (int) matrixL_upper[cL][rL];
if (directionsL_upper_nogap[cL][rL] != DIAG) {
/* Favor gaps away from intron if possible */
- scoreL -= 1;
+ scoreL -= 100;
}
/* Disallow leftoffset + cL >= rightoffset - cR, or cR >= rightoffset - leftoffset - cL */
@@ -1094,7 +1106,7 @@ bridge_intron_gap_8_site_level (int *bestrL, int *bestrR, int *bestcL, int *best
scoreR = (int) matrixR_lower[rR][cR];
if (directionsR_lower_nogap[rR][cR] != DIAG) {
/* Favor gaps away from intron if possible */
- scoreR -= 1;
+ scoreR -= 100;
}
#ifdef USE_SCOREI
@@ -1173,7 +1185,7 @@ bridge_intron_gap_8_site_level (int *bestrL, int *bestrR, int *bestcL, int *best
scoreR = (int) matrixR_upper[cR][rR];
if (directionsR_upper_nogap[cR][rR] != DIAG) {
/* Favor gaps away from intron if possible */
- scoreR -= 1;
+ scoreR -= 100;
}
#ifdef USE_SCOREI
@@ -1253,7 +1265,7 @@ bridge_intron_gap_8_site_level (int *bestrL, int *bestrR, int *bestcL, int *best
scoreR = (int) matrixR_upper[cR][rR];
if (directionsR_upper_nogap[cR][rR] != DIAG) {
/* Favor gaps away from intron if possible */
- scoreR -= 1;
+ scoreR -= 100;
}
/* Disallow leftoffset + cL >= rightoffset - cR, or cR >= rightoffset - leftoffset - cL */
@@ -1269,7 +1281,7 @@ bridge_intron_gap_8_site_level (int *bestrL, int *bestrR, int *bestcL, int *best
scoreL = (int) matrixL_lower[rL][cL];
if (directionsL_lower_nogap[rL][cL] != DIAG) {
/* Favor gaps away from intron if possible */
- scoreL -= 1;
+ scoreL -= 100;
}
#ifdef USE_SCOREI
@@ -1348,7 +1360,7 @@ bridge_intron_gap_8_site_level (int *bestrL, int *bestrR, int *bestcL, int *best
scoreL = (int) matrixL_upper[cL][rL];
if (directionsL_upper_nogap[cL][rL] != DIAG) {
/* Favor gaps away from intron if possible */
- scoreL -= 1;
+ scoreL -= 100;
}
#ifdef USE_SCOREI
@@ -1598,21 +1610,25 @@ bridge_intron_gap_16_intron_level (int *bestrL, int *bestrR, int *bestcL, int *b
for (cL = cloL; cL < /* left of main diagonal*/rL; cL++) {
/* The following check limits genomic inserts (horizontal) and
multiple cDNA inserts (vertical). */
- if (left_known[cL] > 0) {
+ if (left_known[cL] > 0 && directionsL_lower_nogap[rL][cL] == DIAG) {
scoreL = (int) matrixL_lower[rL][cL];
+#if 0
if (directionsL_lower_nogap[rL][cL] != DIAG) {
/* Favor gaps away from intron if possible */
- scoreL -= 1;
+ scoreL -= 100;
}
+#endif
/* Disallow leftoffset + cL >= rightoffset - cR, or cR >= rightoffset - leftoffset - cL */
for (cR = cloR; cR < /* left of main diagonal*/rR && cR < rightoffset-leftoffset-cL; cR++) {
- if (right_known[cR] > 0) {
+ if (right_known[cR] > 0 && directionsR_lower_nogap[rR][cR] == DIAG) {
scoreR = (int) matrixR_lower[rR][cR];
+#if 0
if (directionsR_lower_nogap[rR][cR] != DIAG) {
/* Favor gaps away from intron if possible */
- scoreR -= 1;
+ scoreR -= 100;
}
+#endif
if ((score = scoreL + scoreR) > bestscore ||
(score >= bestscore && jump_late_p)) { /* Use >= for jump late */
@@ -1649,12 +1665,14 @@ bridge_intron_gap_16_intron_level (int *bestrL, int *bestrR, int *bestcL, int *b
}
for (/* at main diagonal*/; cR < chighR && cR < rightoffset-leftoffset-cL; cR++) {
- if (right_known[cR] > 0) {
+ if (right_known[cR] > 0 && directionsR_upper_nogap[cR][rR] == DIAG) {
scoreR = (int) matrixR_upper[cR][rR];
+#if 0
if (directionsR_upper_nogap[cR][rR] != DIAG) {
/* Favor gaps away from intron if possible */
- scoreR -= 1;
+ scoreR -= 100;
}
+#endif
if ((score = scoreL + scoreR) > bestscore ||
(score >= bestscore && jump_late_p)) { /* Use >= for jump late */
@@ -1695,21 +1713,25 @@ bridge_intron_gap_16_intron_level (int *bestrL, int *bestrR, int *bestcL, int *b
for (/* at main diagonal*/; cL < chighL; cL++) {
/* The following check limits genomic inserts (horizontal) and
multiple cDNA inserts (vertical). */
- if (left_known[cL] > 0) {
+ if (left_known[cL] > 0 && directionsL_upper_nogap[cL][rL] == DIAG) {
scoreL = (int) matrixL_upper[cL][rL];
+#if 0
if (directionsL_upper_nogap[cL][rL] != DIAG) {
/* Favor gaps away from intron if possible */
- scoreL -= 1;
+ scoreL -= 100;
}
+#endif
/* Disallow leftoffset + cL >= rightoffset - cR, or cR >= rightoffset - leftoffset - cL */
for (cR = cloR; cR < /* left of main diagonal*/rR && cR < rightoffset-leftoffset-cL; cR++) {
- if (right_known[cR] > 0) {
+ if (right_known[cR] > 0 && directionsR_lower_nogap[rR][cR] == DIAG) {
scoreR = (int) matrixR_lower[rR][cR];
+#if 0
if (directionsR_lower_nogap[rR][cR] != DIAG) {
/* Favor gaps away from intron if possible */
- scoreR -= 1;
+ scoreR -= 100;
}
+#endif
if ((score = scoreL + scoreR) > bestscore ||
(score >= bestscore && jump_late_p)) { /* Use >= for jump late */
@@ -1746,12 +1768,14 @@ bridge_intron_gap_16_intron_level (int *bestrL, int *bestrR, int *bestcL, int *b
}
for (/* at main diagonal*/; cR < chighR && cR < rightoffset-leftoffset-cL; cR++) {
- if (right_known[cR] > 0) {
+ if (right_known[cR] > 0 && directionsR_upper_nogap[cR][rR] == DIAG) {
scoreR = (int) matrixR_upper[cR][rR];
+#if 0
if (directionsR_upper_nogap[cR][rR] != DIAG) {
/* Favor gaps away from intron if possible */
- scoreR -= 1;
+ scoreR -= 100;
}
+#endif
if ((score = scoreL + scoreR) > bestscore ||
(score >= bestscore && jump_late_p)) { /* Use >= for jump late */
@@ -2088,7 +2112,7 @@ bridge_intron_gap_16_site_level (int *bestrL, int *bestrR, int *bestcL, int *bes
scoreL = (int) matrixL_upper[cL][rL];
if (directionsL_upper_nogap[cL][rL] != DIAG) {
/* Favor gaps away from intron if possible */
- scoreL -= 1;
+ scoreL -= 100;
}
/* Disallow leftoffset + cL >= rightoffset - cR, or cR >= rightoffset - leftoffset - cL */
@@ -2104,7 +2128,7 @@ bridge_intron_gap_16_site_level (int *bestrL, int *bestrR, int *bestcL, int *bes
scoreR = (int) matrixR_lower[rR][cR];
if (directionsR_lower_nogap[rR][cR] != DIAG) {
/* Favor gaps away from intron if possible */
- scoreR -= 1;
+ scoreR -= 100;
}
#ifdef USE_SCOREI
@@ -2183,7 +2207,7 @@ bridge_intron_gap_16_site_level (int *bestrL, int *bestrR, int *bestcL, int *bes
scoreR = (int) matrixR_upper[cR][rR];
if (directionsR_upper_nogap[cR][rR] != DIAG) {
/* Favor gaps away from intron if possible */
- scoreR -= 1;
+ scoreR -= 100;
}
#ifdef USE_SCOREI
@@ -2264,7 +2288,7 @@ bridge_intron_gap_16_site_level (int *bestrL, int *bestrR, int *bestcL, int *bes
scoreR = (int) matrixR_upper[cR][rR];
if (directionsR_upper_nogap[cR][rR] != DIAG) {
/* Favor gaps away from intron if possible */
- scoreR -= 1;
+ scoreR -= 100;
}
/* Disallow leftoffset + cL >= rightoffset - cR, or cR >= rightoffset - leftoffset - cL */
@@ -2280,7 +2304,7 @@ bridge_intron_gap_16_site_level (int *bestrL, int *bestrR, int *bestcL, int *bes
scoreL = (int) matrixL_lower[rL][cL];
if (directionsL_lower_nogap[rL][cL] != DIAG) {
/* Favor gaps away from intron if possible */
- scoreL -= 1;
+ scoreL -= 100;
}
#ifdef USE_SCOREI
@@ -2359,7 +2383,7 @@ bridge_intron_gap_16_site_level (int *bestrL, int *bestrR, int *bestcL, int *bes
scoreL = (int) matrixL_upper[cL][rL];
if (directionsL_upper_nogap[cL][rL] != DIAG) {
/* Favor gaps away from intron if possible */
- scoreL -= 1;
+ scoreL -= 100;
}
#ifdef USE_SCOREI
@@ -2608,21 +2632,25 @@ bridge_intron_gap_intron_level (int *bestrL, int *bestrR, int *bestcL, int *best
for (cL = cloL; cL < chighL; cL++) {
/* The following check limits genomic inserts (horizontal) and
multiple cDNA inserts (vertical). */
- if (left_known[cL] > 0) {
+ if (left_known[cL] > 0 && directionsL_nogap[cL][rL] == DIAG) {
scoreL = (int) matrixL[cL][rL];
+#if 0
if (directionsL_nogap[cL][rL] != DIAG) {
/* Favor gaps away from intron if possible */
- scoreL -= 1;
+ scoreL -= 100;
}
+#endif
/* Disallow leftoffset + cL >= rightoffset - cR, or cR >= rightoffset - leftoffset - cL */
for (cR = cloR; cR < chighR && cR < rightoffset-leftoffset-cL; cR++) {
- if (right_known[cR] > 0) {
+ if (right_known[cR] > 0 && directionsR_nogap[cR][rR] == DIAG) {
scoreR = (int) matrixR[cR][rR];
+#if 0
if (directionsR_nogap[cR][rR] != DIAG) {
/* Favor gaps away from intron if possible */
- scoreR -= 1;
+ scoreR -= 100;
}
+#endif
if ((score = scoreL + scoreR) > bestscore ||
(score >= bestscore && jump_late_p)) { /* Use >= for jump late */
@@ -2895,7 +2923,7 @@ bridge_intron_gap_site_level (int *bestrL, int *bestrR, int *bestcL, int *bestcR
scoreL = (int) matrixL[cL][rL];
if (directionsL_nogap[cL][rL] != DIAG) {
/* Favor gaps away from intron if possible */
- scoreL -= 1;
+ scoreL -= 100;
}
/* Disallow leftoffset + cL >= rightoffset - cR, or cR >= rightoffset - leftoffset - cL */
@@ -2911,7 +2939,7 @@ bridge_intron_gap_site_level (int *bestrL, int *bestrR, int *bestcL, int *bestcR
scoreR = (int) matrixR[cR][rR];
if (directionsR_nogap[cR][rR] != DIAG) {
/* Favor gaps away from intron if possible */
- scoreR -= 1;
+ scoreR -= 100;
}
#ifdef USE_SCOREI
@@ -2991,7 +3019,7 @@ bridge_intron_gap_site_level (int *bestrL, int *bestrR, int *bestcL, int *bestcR
scoreR = (int) matrixR[cR][rR];
if (directionsR_nogap[cR][rR] != DIAG) {
/* Favor gaps away from intron if possible */
- scoreR -= 1;
+ scoreR -= 100;
}
/* Disallow leftoffset + cL >= rightoffset - cR, or cR >= rightoffset - leftoffset - cL */
@@ -3007,7 +3035,7 @@ bridge_intron_gap_site_level (int *bestrL, int *bestrR, int *bestcL, int *bestcR
scoreL = (int) matrixL[cL][rL];
if (directionsL_nogap[cL][rL] != DIAG) {
/* Favor gaps away from intron if possible */
- scoreL -= 1;
+ scoreL -= 100;
}
#ifdef USE_SCOREI
diff --git a/src/gmap.c b/src/gmap.c
index 717550b..e084365 100644
--- a/src/gmap.c
+++ b/src/gmap.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: gmap.c 186739 2016-03-30 23:55:14Z twu $";
+static char rcsid[] = "$Id: gmap.c 188752 2016-05-01 17:28:22Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -61,6 +61,7 @@ static char rcsid[] = "$Id: gmap.c 186739 2016-03-30 23:55:14Z twu $";
#include "diagpool.h"
#include "cellpool.h"
#include "stopwatch.h"
+#include "translation.h" /* For Translation_setup */
#include "genome.h"
#include "genome-write.h"
#include "genome128_hr.h" /* For Genome_hr_setup */
@@ -175,6 +176,8 @@ static char rcsid[] = "$Id: gmap.c 186739 2016-03-30 23:55:14Z twu $";
* Global variables
************************************************************************/
+static int translation_code = 1;
+
static Univ_IIT_T chromosome_iit = NULL;
static Univ_IIT_T altscaffold_iit = NULL;
static Univcoord_T genomelength;
@@ -505,6 +508,7 @@ static struct option long_options[] = {
#else
{"localsplicedist", required_argument, 0, 'w'}, /* shortsplicedist */
#endif
+ {"translation-code", required_argument, 0, 0}, /* translation_code */
{"nthreads", required_argument, 0, 't'}, /* nworkers */
{"splicingdir", required_argument, 0, 0}, /* user_splicingdir */
@@ -1128,11 +1132,12 @@ update_stage3list (List_T stage3list, Sequence_T queryseq,
#ifdef PMAP
subseq_offset = Sequence_subseq_offset(queryseq); /* in nucleotides */
#endif
- pairarray = Stage3_compute(&pairs,&npairs,&goodness,&cdna_direction,&sensedir,
+ pairarray = Stage3_compute(&cdna_direction,&sensedir,&pairs,&npairs,&goodness,
&matches,&nmatches_posttrim,&max_match_length,
&ambig_end_length_5,&ambig_end_length_3,
&ambig_splicetype_5,&ambig_splicetype_3,
- &ambig_prob_5,&ambig_prob_3,&unknowns,&mismatches,&qopens,&qindels,&topens,&tindels,
+ &ambig_prob_5,&ambig_prob_3,
+ &unknowns,&mismatches,&qopens,&qindels,&topens,&tindels,
&ncanonical,&nsemicanonical,&nnoncanonical,&min_splice_prob,
Stage2_middle(stage2),Stage2_all_starts(stage2),Stage2_all_ends(stage2),
#ifdef PMAP
@@ -5171,6 +5176,9 @@ parse_command_line (int argc, char *argv[], int optind) {
} else if (!strcmp(long_name,"no-chimeras")) {
chimera_margin = 0;
+ } else if (!strcmp(long_name,"translation-code")) {
+ translation_code = atoi(check_valid_int(optarg));
+
} else if (!strcmp(long_name,"min-intronlength")) {
min_intronlength = atoi(check_valid_int(optarg));
@@ -6498,6 +6506,9 @@ main (int argc, char *argv[]) {
fprintf(stderr,"done\n");
}
+
+ Translation_setup(translation_code);
+
if (user_pairalign_p == true) {
/* Creation of genomebits/genomecomp and initialization done within single_thread() for each input sequence */
@@ -7125,6 +7136,12 @@ Output options\n\
",output_buffer_size);
+ fprintf(stdout,"\
+ --translation-code=INT Genetic code used for translating codons to amino acids and computing CDS\n\
+ Integer value (default=1) corresponds to an available code at\n\
+ http://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi\n\
+");
+
#ifdef PMAP
fprintf(stdout,"\
-Y, --tolerant Translates genome with corrections for frameshifts\n\
diff --git a/src/gsnap.c b/src/gsnap.c
index 9b339b7..06f4769 100644
--- a/src/gsnap.c
+++ b/src/gsnap.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: gsnap.c 186727 2016-03-30 23:02:10Z twu $";
+static char rcsid[] = "$Id: gsnap.c 188752 2016-05-01 17:28:22Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -341,7 +341,8 @@ static int min_shortend = 2;
/* static bool find_novel_doublesplices_p = true; */
static int antistranded_penalty = 0; /* Most RNA-Seq is non-stranded */
-static int end_detail = 1; /* 2 (high), 1 (medium), or 0 (low) */
+/* Now that we don't use terminals, can have end_detail being high without too much slowdown */
+static int end_detail = 2; /* 2 (high), 1 (medium), or 0 (low) */
static Width_T index1part;
static Width_T required_index1part = 0;
@@ -3352,6 +3353,7 @@ worker_setup (char *genomesubdir, char *fileroot) {
expected_pairlength,pairlength_deviation,
localsplicing_penalty,indel_penalty_middle,antistranded_penalty,
favor_multiexon_p,gmap_min_nconsecutive,end_detail,subopt_levels,
+ max_middle_insertions,max_middle_deletions,
novelsplicingp,shortsplicedist,
merge_samechr_p,circularp,altlocp,alias_starts,alias_ends,failedinput_root,print_m8_p,want_random_p);
SAM_setup(add_paired_nomappers_p,paired_flag_means_concordant_p,
@@ -4197,11 +4199,13 @@ is still designed to be fast.\n\
to turn off trimming, specify 0). Warning: turning trimming off\n\
will give false positive indels at the ends of reads\n\
",trim_indel_score);
+
fprintf(stdout,"\
- --end-detail=STRING Amount of alignment detail at ends of read: high, medium (default), or low\n\
- Warning: high detail at ends will slow down speed of program by a\n\
- factor of 3 or so. Medium detail does not incur much slow down, though\n\
+ --end-detail=STRING Amount of alignment detail at ends of read: high (default), medium, or low\n\
+ Note: medium detail could increase speed by 20%% or so, but will miss some\n\
+ splices at the ends of reads\n\
");
+
fprintf(stdout,"\
-V, --snpsdir=STRING Directory for SNPs index files (created using snpindex) (default is\n\
location of genome index files specified using -D and -d)\n \
diff --git a/src/pair.c b/src/pair.c
index 4b44204..0f2e1cb 100644
--- a/src/pair.c
+++ b/src/pair.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: pair.c 186094 2016-03-17 22:30:40Z twu $";
+static char rcsid[] = "$Id: pair.c 188752 2016-05-01 17:28:22Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -28,7 +28,6 @@ static char rcsid[] = "$Id: pair.c 186094 2016-03-17 22:30:40Z twu $";
#include "separator.h"
#include "scores.h"
#include "segmentpos.h"
-#include "translation.h"
#include "maxent.h"
#include "maxent_hr.h"
#include "mapq.h"
@@ -381,7 +380,6 @@ Pair_clip_bounded_array (struct T *source, int npairs, int minpos, int maxpos) {
-
/* Head of list is the medial part of the read */
List_T
Pair_protect_end5 (List_T pairs) {
@@ -1629,6 +1627,21 @@ Pair_dump_array_stderr (struct T *pairs, int npairs, bool zerobasedp) {
}
+void
+Pair_dump_comp_array (struct T *pairs, int npairs) {
+ struct T *this;
+ int i;
+
+ for (i = 0; i < npairs; i++) {
+ this = pairs++;
+ printf("%c",this->comp);
+ }
+ printf("\n");
+
+ return;
+}
+
+
Chrpos_T
Pair_genomicpos (struct T *pairs, int npairs, int querypos, bool headp) {
struct T *this;
@@ -2065,11 +2078,11 @@ Pair_print_exonsummary (Filestring_T fp, struct T *pairs, int npairs, Chrnum_T c
}
if (genome != NULL) {
- if (cdna_direction >= 0) {
+ if (cdna_direction > 0) {
FPRINTF(fp," %.3f, %.3f",
donor_score(chroffset+exon_genomeend-1,chroffset,!watsonp,genome,chromosome_iit),
acceptor_score(chroffset+exon_genomestart-1,chroffset,!watsonp,genome,chromosome_iit));
- } else {
+ } else if (cdna_direction < 0) {
FPRINTF(fp," %.3f, %.3f",
acceptor_score(chroffset+exon_genomeend-1,chroffset,watsonp,genome,chromosome_iit),
donor_score(chroffset+exon_genomestart-1,chroffset,watsonp,genome,chromosome_iit));
@@ -3361,7 +3374,7 @@ print_endtypes (Filestring_T fp,
} else if (cdna_direction < 0) {
prob = Maxent_hr_antidonor_prob(chroffset+exon_genomestart-1U,chroffset);
} else {
- abort();
+ prob = 0.0;
}
} else {
prev_splicesitepos = exon_genomestart;
@@ -3370,7 +3383,7 @@ print_endtypes (Filestring_T fp,
} else if (cdna_direction < 0) {
prob = Maxent_hr_donor_prob(chroffset+exon_genomestart,chroffset);
} else {
- abort();
+ prob = 0.0;
}
}
FPRINTF(fp,"donor:%.2f",prob);
@@ -3383,7 +3396,7 @@ print_endtypes (Filestring_T fp,
} else if (cdna_direction < 0) {
prob = Maxent_hr_antiacceptor_prob(chroffset+exon_genomestart,chroffset);
} else {
- abort();
+ prob = 0.0;
}
} else {
prev_splicesitepos = exon_genomestart;
@@ -3392,10 +3405,14 @@ print_endtypes (Filestring_T fp,
} else if (cdna_direction < 0) {
prob = Maxent_hr_acceptor_prob(chroffset+exon_genomestart-1U,chroffset);
} else {
- abort();
+ prob = 0.0;
}
}
FPRINTF(fp,"acceptor:%.2f",prob);
+ } else if (endtype1 == FRAG) {
+ typeint1 = donor_typeint;
+ prev_splicesitepos = exon_genomestart;
+ FPRINTF(fp,"frag");
} else {
FPRINTF(fp,"unknown");
}
@@ -3417,7 +3434,7 @@ print_endtypes (Filestring_T fp,
} else if (cdna_direction < 0) {
prob = Maxent_hr_antidonor_prob(chroffset+exon_genomeend-1U,chroffset);
} else {
- abort();
+ prob = 0.0;
}
} else {
splicesitepos = exon_genomeend-1U;
@@ -3426,7 +3443,7 @@ print_endtypes (Filestring_T fp,
} else if (cdna_direction < 0) {
prob = Maxent_hr_donor_prob(chroffset+exon_genomeend,chroffset);
} else {
- abort();
+ prob = 0.0;
}
}
FPRINTF(fp,"donor:%.2f",prob);
@@ -3439,7 +3456,7 @@ print_endtypes (Filestring_T fp,
} else if (cdna_direction < 0) {
prob = Maxent_hr_antiacceptor_prob(chroffset+exon_genomeend,chroffset);
} else {
- abort();
+ prob = 0.0;
}
} else {
splicesitepos = exon_genomeend-1U;
@@ -3448,10 +3465,13 @@ print_endtypes (Filestring_T fp,
} else if (cdna_direction < 0) {
prob = Maxent_hr_acceptor_prob(chroffset+exon_genomeend-1U,chroffset);
} else {
- abort();
+ prob = 0.0;
}
}
FPRINTF(fp,"acceptor:%.2f",prob);
+ } else if (endtype2 == FRAG) {
+ splicesitepos = exon_genomeend;
+ FPRINTF(fp,"frag");
} else {
FPRINTF(fp,"unknown");
}
@@ -3465,7 +3485,7 @@ print_endtypes (Filestring_T fp,
/* Double introns */
if (cdna_direction > 0) {
FPRINTF(fp,",dir:sense,splice_type:consistent");
- } else {
+ } else if (cdna_direction < 0) {
FPRINTF(fp,",dir:antisense,splice_type:consistent");
}
FPRINTF(fp,",splice_dist_1:%u,splice_dist_2:%u",prev_splice_dist,splice_dist);
@@ -3478,7 +3498,7 @@ print_endtypes (Filestring_T fp,
/* Prev intron */
if (cdna_direction > 0) {
FPRINTF(fp,",dir:sense,splice_type:consistent");
- } else {
+ } else if (cdna_direction < 0) {
FPRINTF(fp,",dir:antisense,splice_type:consistent");
}
FPRINTF(fp,",splice_dist_1:%u",prev_splice_dist);
@@ -3493,7 +3513,7 @@ print_endtypes (Filestring_T fp,
/* Next intron */
if (cdna_direction > 0) {
FPRINTF(fp,",dir:sense,splice_type:consistent");
- } else {
+ } else if (cdna_direction < 0) {
FPRINTF(fp,",dir:antisense,splice_type:consistent");
}
FPRINTF(fp,",splice_dist_2:%u",splice_dist);
@@ -3662,14 +3682,18 @@ Pair_print_gsnap (Filestring_T fp, struct T *pairs_querydir, int npairs, int nse
} else if (prev_endtype == ACC) {
prev_splice_dist = splice_dist;
prev_endtype = DON;
+ } else if (prev_endtype == FRAG) {
+ prev_splice_dist = splice_dist;
} else {
- prev_splice_dist = 0U;
+ prev_splice_dist = 0;
}
if (cdna_direction > 0) {
endtype = DON;
- } else {
+ } else if (cdna_direction < 0) {
endtype = ACC;
+ } else {
+ endtype = FRAG;
}
if (watsonp == true) {
@@ -3756,8 +3780,10 @@ Pair_print_gsnap (Filestring_T fp, struct T *pairs_querydir, int npairs, int nse
} else if (prev_endtype == ACC) {
prev_splice_dist = splice_dist;
prev_endtype = DON;
+ } else if (prev_endtype == FRAG) {
+ prev_splice_dist = splice_dist;
} else {
- prev_splice_dist = 0U;
+ prev_splice_dist = 0;
}
/* indel_pos = this->querypos; */
@@ -3827,6 +3853,8 @@ Pair_print_gsnap (Filestring_T fp, struct T *pairs_querydir, int npairs, int nse
} else if (prev_endtype == ACC) {
prev_endtype = DON;
prev_splice_dist = splice_dist;
+ } else if (prev_endtype == FRAG) {
+ prev_splice_dist = splice_dist;
} else {
prev_splice_dist = 0U;
}
@@ -3927,6 +3955,8 @@ Pair_print_gsnap (Filestring_T fp, struct T *pairs_querydir, int npairs, int nse
} else if (prev_endtype == ACC) {
prev_endtype = DON;
prev_splice_dist = splice_dist;
+ } else if (prev_endtype == FRAG) {
+ prev_splice_dist = splice_dist;
} else {
prev_splice_dist = 0U;
}
@@ -4400,15 +4430,15 @@ Pair_guess_cdna_direction_array (int *sensedir, struct T *pairs_querydir, int np
FREE(pairs);
}
- if (sense_prob == 0.0 && antisense_prob == 0.0) {
- *sensedir = SENSE_NULL;
- return 0;
- } else if (sense_prob >= antisense_prob) {
+ if (sense_prob > antisense_prob) {
*sensedir = SENSE_FORWARD;
return +1;
- } else {
+ } else if (sense_prob < antisense_prob) {
*sensedir = SENSE_ANTI;
return -1;
+ } else {
+ *sensedir = SENSE_NULL;
+ return 0;
}
}
@@ -4489,7 +4519,6 @@ Pair_fix_cdna_direction_array (struct T *pairs_querydir, int npairs, int cdna_di
-
int
Pair_gsnap_nsegments (int *total_nmismatches, int *total_nindels, int *nintrons,
int *nindelbreaks, struct T *pairs, int npairs) {
@@ -4564,10 +4593,8 @@ Pair_gsnap_nsegments (int *total_nmismatches, int *total_nindels, int *nintrons,
exit(9);
}
- } else {
- if (this->genome != this->cdna) {
- (*total_nmismatches) += 1;
- }
+ } else if (this->genome != this->cdna) {
+ (*total_nmismatches) += 1;
}
}
}
@@ -4698,18 +4725,17 @@ print_chopped_end (Filestring_T fp, char *contents, int querylength,
int hardclip_start, int hardclip_end) {
int i;
- if (hardclip_start > 0) {
- for (i = 0; i < hardclip_start; i++) {
- PUTC(contents[i],fp);
- }
- return;
+ for (i = 0; i < hardclip_start; i++) {
+ PUTC(contents[i],fp);
+ }
- } else {
- for (i = querylength - hardclip_end; i < querylength; i++) {
- PUTC(contents[i],fp);
- }
- return;
+ /* No separator */
+
+ for (i = querylength - hardclip_end; i < querylength; i++) {
+ PUTC(contents[i],fp);
}
+
+ return;
}
/* Differs from Shortread version, in that hardclip_high and hardclip_low are not reversed */
@@ -4718,18 +4744,17 @@ print_chopped_end_revcomp (Filestring_T fp, char *contents, int querylength,
int hardclip_start, int hardclip_end) {
int i;
- if (hardclip_start > 0) {
- for (i = hardclip_start - 1; i >= 0; --i) {
- PUTC(complCode[(int) contents[i]],fp);
- }
- return;
+ for (i = querylength - 1; i >= querylength - hardclip_end; --i) {
+ PUTC(complCode[(int) contents[i]],fp);
+ }
- } else {
- for (i = querylength - 1; i >= querylength - hardclip_end; --i) {
- PUTC(complCode[(int) contents[i]],fp);
- }
- return;
+ /* No separator */
+
+ for (i = hardclip_start - 1; i >= 0; --i) {
+ PUTC(complCode[(int) contents[i]],fp);
}
+
+ return;
}
@@ -5027,7 +5052,8 @@ print_sam_line (Filestring_T fp, char *abbrev, bool first_read_p, char *acc1, ch
FPRINTF(fp,"\tXO:Z:%s",abbrev);
/* 12. TAGS: XS */
-#ifdef GSNAP
+#if 0
+ /* Previously used for GSNAP */
if (sensedir == SENSE_NULL) {
sensedir = mate_sensedir;
}
@@ -5281,11 +5307,9 @@ Pair_compute_cigar (bool *intronp, int *hardclip_start, int *hardclip_end, struc
int Mlength = 0, Ilength = 0, Dlength = 0;
bool in_exon = false, deletionp;
struct T *ptr, *prev, *this = NULL;
- /* int exon_querystart = -1; */
int exon_queryend = -1;
Chrpos_T exon_genomestart = -1;
Chrpos_T exon_genomeend, genome_gap;
- Chrpos_T intron_start, intron_end;
int query_gap;
int last_querypos = -1;
Chrpos_T last_genomepos = -1U;
@@ -5328,21 +5352,22 @@ Pair_compute_cigar (bool *intronp, int *hardclip_start, int *hardclip_end, struc
this = ptr++;
#if 0
- print_tokens_sam(stdout,tokens);
- printf("querypos %d, %c %c, exon %u..%u, intron %u..%u\n",
- this->querypos,this->cdna,this->genome,exon_genomestart,exon_genomeend,
- intron_start,intron_end);
+ /* print_tokens_sam(stdout,tokens); */
+ Pair_dump_one(this,true);
+ printf("\n");
#endif
if (this->gapp) {
if (in_exon == true) {
exon_queryend = last_querypos + 1;
exon_genomeend = last_genomepos + 1;
+#if 0
if (watsonp) {
intron_start = exon_genomeend + 1;
} else {
intron_start = exon_genomeend - 1;
}
+#endif
if (Mlength > 0) {
sprintf(token,"%dM",Mlength);
@@ -5369,20 +5394,19 @@ Pair_compute_cigar (bool *intronp, int *hardclip_start, int *hardclip_end, struc
if (in_exon == false) {
/* exon_querystart = this->querypos + 1; */
exon_genomestart = this->genomepos + 1;
- if (watsonp) {
- intron_end = exon_genomestart - 1;
- } else {
- intron_end = exon_genomestart + 1;
- }
if (prev != NULL) {
/* Gap */
/* abs() gives a large value when flag -m64 is specified */
/* genome_gap = abs(intron_end - intron_start) + 1; */
- if (intron_end > intron_start) {
- genome_gap = (intron_end - intron_start) + 1;
+ if (watsonp) {
+ /* intron_end = exon_genomestart - 1; */
+ /* genome_gap = (intron_end - intron_start) + 1; */
+ genome_gap = exon_genomestart - exon_genomeend - 1;
} else {
- genome_gap = (intron_start - intron_end) + 1;
+ /* intron_end = exon_genomestart + 1; */
+ /* genome_gap = (intron_start - intron_end) + 1; */
+ genome_gap = exon_genomeend - exon_genomestart - 1;
}
deletionp = false;
@@ -5505,8 +5529,8 @@ Pair_compute_cigar (bool *intronp, int *hardclip_start, int *hardclip_end, struc
}
/* prev = this; */
- exon_queryend = last_querypos + 1;
- exon_genomeend = last_genomepos + 1;
+ /* exon_queryend = last_querypos + 1; */
+ /* exon_genomeend = last_genomepos + 1; */
if (Mlength > 0) {
sprintf(token,"%dM",Mlength);
diff --git a/src/pair.h b/src/pair.h
index f38a793..6147d2b 100644
--- a/src/pair.h
+++ b/src/pair.h
@@ -1,4 +1,4 @@
-/* $Id: pair.h 184432 2016-02-17 20:00:02Z twu $ */
+/* $Id: pair.h 188752 2016-05-01 17:28:22Z twu $ */
#ifndef PAIR_INCLUDED
#define PAIR_INCLUDED
@@ -117,6 +117,8 @@ extern void
Pair_dump_array (struct T *pairs, int npairs, bool zerobasedp);
extern void
Pair_dump_array_stderr (struct T *pairs, int npairs, bool zerobasedp);
+extern void
+Pair_dump_comp_array (struct T *pairs, int npairs);
extern Chrpos_T
Pair_genomicpos (struct T *pairs, int npairs, int querypos, bool headp);
extern int
diff --git a/src/sarray-read.c b/src/sarray-read.c
index 2a69e35..72c85e3 100644
--- a/src/sarray-read.c
+++ b/src/sarray-read.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: sarray-read.c 186964 2016-04-04 20:27:35Z twu $";
+static char rcsid[] = "$Id: sarray-read.c 188752 2016-05-01 17:28:22Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -228,6 +228,7 @@ print_vector_uint (__m128i x) {
return;
}
+#ifdef HAVE_AVX2
static void
print_vector_hex_256 (__m256i x) {
UINT4 *s = (UINT4 *) &x;
@@ -246,6 +247,7 @@ print_vector_uint_256 (__m256i x) {
return;
}
#endif
+#endif
@@ -5531,7 +5533,7 @@ get_diagonals (Univdiag_T *middle_diagonal, List_T *best_right_diagonals, List_T
printf("Right side before sorting\n");
for (i = 0; i < ndiagonals; i++) {
diagonal = diagonal_array[i];
- printf("%d..%d at %u\n",diagonal->querystart,diagonal->queryend,diagonal->diagonal);
+ printf("%d..%d at %u\n",diagonal->querystart,diagonal->queryend,diagonal->univdiagonal);
}
#endif
@@ -5643,7 +5645,7 @@ get_diagonals (Univdiag_T *middle_diagonal, List_T *best_right_diagonals, List_T
printf("Left side after sorting\n");
for (i = 0; i < ndiagonals; i++) {
diagonal = diagonal_array[i];
- printf("%d..%d at %u\n",diagonal->querystart,diagonal->queryend,diagonal->diagonal);
+ printf("%d..%d at %u\n",diagonal->querystart,diagonal->queryend,diagonal->univdiagonal);
}
#endif
@@ -5891,16 +5893,19 @@ find_best_path (List_T *right_paths, Intlist_T *right_endpoints_sense, Intlist_T
}
/* Distinguish right paths by looking for indel (which wins) or splicing */
- debug13(printf("Have %d right_paths\n",List_length(*right_paths)));
+ debug13(printf("Have %d right_paths. Distinguish by looking for indels\n",List_length(*right_paths)));
for (p = *right_paths; p != NULL; p = List_next(p)) {
ambig_path = (List_T) List_head(p);
diagonal = (Univdiag_T) List_head(ambig_path);
left = diagonal->univdiagonal;
+ debug13(printf("left %u, prev_left %u\n",left,prev_left));
if (left < prev_left) {
/* Insertion */
+ debug13(printf("Found insertion\n"));
right_indel_diagonal = diagonal;
- } else if (prev_left - left < MIN_INTRONLEN) {
+ } else if (left - prev_left < MIN_INTRONLEN) {
/* Deletion */
+ debug13(printf("Found deletion\n"));
right_indel_diagonal = diagonal;
}
}
@@ -6106,7 +6111,7 @@ find_best_path (List_T *right_paths, Intlist_T *right_endpoints_sense, Intlist_T
if (right_indel_diagonal != NULL) {
debug13(printf("Pushing right indel diagonal onto middle: query %d..%d, diagonal %u\n",
- right_indel_diagonal->querystart,right_indel_diagonal->queryend,right_indel_diagonal->univdiagonal));
+ right_indel_diagonal->querystart,right_indel_diagonal->queryend,right_indel_diagonal->univdiagonal - chroffset));
middle_path = List_push(middle_path,(void *) right_indel_diagonal);
}
@@ -6114,14 +6119,14 @@ find_best_path (List_T *right_paths, Intlist_T *right_endpoints_sense, Intlist_T
while (common_diagonal != NULL) {
middle_path = List_push(middle_path,(void *) common_diagonal);
debug13(printf("Pushing common diagonal onto middle: query %d..%d, diagonal %u\n",
- common_diagonal->querystart,common_diagonal->queryend,common_diagonal->univdiagonal));
+ common_diagonal->querystart,common_diagonal->queryend,common_diagonal->univdiagonal - chroffset));
common_diagonal = common_diagonal->prev;
}
/* B. Process original middle diagonal */
middle_path = List_push(middle_path,(void *) middle_diagonal);
debug13(printf("Pushing middle diagonal onto middle: query %d..%d, diagonal %u\n",
- middle_diagonal->querystart,middle_diagonal->queryend,middle_diagonal->univdiagonal));
+ middle_diagonal->querystart,middle_diagonal->queryend,middle_diagonal->univdiagonal - chroffset));
/* C3. Traceback for dynamic programming */
@@ -6186,16 +6191,19 @@ find_best_path (List_T *right_paths, Intlist_T *right_endpoints_sense, Intlist_T
}
/* Distinguish left paths by looking for indel (which wins) or splicing */
- debug13(printf("Have %d left_paths\n",List_length(*left_paths)));
+ debug13(printf("Have %d left_paths. Distinguish by looking for indel\n",List_length(*left_paths)));
for (p = *left_paths; p != NULL; p = List_next(p)) {
ambig_path = (List_T) List_head(p);
prev_diagonal = (Univdiag_T) List_head(ambig_path);
prev_left = prev_diagonal->univdiagonal;
+ debug13(printf("left %u, prev_left %u\n",left,prev_left));
if (left < prev_left) {
/* Insertion */
+ debug13(printf("Found insertion\n"));
left_indel_diagonal = prev_diagonal;
- } else if (prev_left - left < MIN_INTRONLEN) {
+ } else if (left - prev_left < MIN_INTRONLEN) {
/* Deletion */
+ debug13(printf("Found deletion\n"));
left_indel_diagonal = prev_diagonal;
}
}
@@ -6352,7 +6360,7 @@ find_best_path (List_T *right_paths, Intlist_T *right_endpoints_sense, Intlist_T
for (p = diagonal_path; p != NULL; p = List_next(p)) {
diagonal = (Univdiag_T) List_head(p);
debug13(printf("Pushing common diagonal onto middle: query %d..%d, diagonal %u\n",
- diagonal->querystart,diagonal->queryend,diagonal->univdiagonal));
+ diagonal->querystart,diagonal->queryend,diagonal->univdiagonal - chroffset));
middle_path = List_push(middle_path,(void *) diagonal);
}
List_free(&diagonal_path);
@@ -6360,7 +6368,7 @@ find_best_path (List_T *right_paths, Intlist_T *right_endpoints_sense, Intlist_T
if (left_indel_diagonal != NULL) {
debug13(printf("Pushing left indel diagonal onto middle: query %d..%d, diagonal %u\n",
- left_indel_diagonal->querystart,left_indel_diagonal->queryend,left_indel_diagonal->univdiagonal));
+ left_indel_diagonal->querystart,left_indel_diagonal->queryend,left_indel_diagonal->univdiagonal - chroffset));
middle_path = List_push(middle_path,(void *) left_indel_diagonal);
}
@@ -6448,7 +6456,8 @@ run_gmap_plus (List_T gmap, List_T middle_path, List_T start_paths, List_T end_p
stage2pairs = (List_T) NULL;
for (p = diagonal_path; p != NULL; p = List_next(p)) {
diagonal = (Univdiag_T) List_head(p);
- debug13(printf("Diagonal %d..%d at %u\n",diagonal->querystart,diagonal->queryend,diagonal->univdiagonal));
+ debug13(printf("Diagonal %d..%d at %u [%u]\n",
+ diagonal->querystart,diagonal->queryend,diagonal->univdiagonal,diagonal->univdiagonal - chroffset));
querypos = diagonal->querystart;
genomepos = diagonal->univdiagonal + diagonal->querystart - chroffset;
while (querypos <= diagonal->queryend) {
@@ -6478,7 +6487,8 @@ run_gmap_plus (List_T gmap, List_T middle_path, List_T start_paths, List_T end_p
stage2pairs = (List_T) NULL;
for (p = diagonal_path; p != NULL; p = List_next(p)) {
diagonal = (Univdiag_T) List_head(p);
- debug13(printf("Diagonal %d..%d at %u\n",diagonal->querystart,diagonal->queryend,diagonal->univdiagonal));
+ debug13(printf("Diagonal %d..%d at %u [%u]\n",
+ diagonal->querystart,diagonal->queryend,diagonal->univdiagonal,diagonal->univdiagonal - chroffset));
querypos = diagonal->querystart;
genomepos = diagonal->univdiagonal + diagonal->querystart - chroffset;
while (querypos <= diagonal->queryend) {
@@ -6501,7 +6511,8 @@ run_gmap_plus (List_T gmap, List_T middle_path, List_T start_paths, List_T end_p
printf("MIDDLE DIAGONALS, PLUS\n");
for (p = middle_path; p != NULL; p = List_next(p)) {
diagonal = (Univdiag_T) List_head(p);
- printf("Diagonal %d..%d at %u\n",diagonal->querystart,diagonal->queryend,diagonal->univdiagonal);
+ printf("Diagonal %d..%d at %u [%u]\n",
+ diagonal->querystart,diagonal->queryend,diagonal->univdiagonal,diagonal->univdiagonal - chroffset);
}
#endif
@@ -6627,7 +6638,8 @@ run_gmap_minus (List_T gmap, List_T middle_path, List_T start_paths, List_T end_
stage2pairs = (List_T) NULL;
for (p = diagonal_path; p != NULL; p = List_next(p)) {
diagonal = (Univdiag_T) List_head(p);
- debug13(printf("Diagonal %d..%d at %u\n",diagonal->querystart,diagonal->queryend,diagonal->univdiagonal));
+ debug13(printf("Diagonal %d..%d at %u [%u]\n",
+ diagonal->querystart,diagonal->queryend,diagonal->univdiagonal,diagonal->univdiagonal - chroffset));
querypos = querylength - 1 - diagonal->queryend;
genomepos = chrhigh - (diagonal->univdiagonal + diagonal->queryend);
while (querypos <= querylength - 1 - diagonal->querystart) {
@@ -6657,7 +6669,8 @@ run_gmap_minus (List_T gmap, List_T middle_path, List_T start_paths, List_T end_
stage2pairs = (List_T) NULL;
for (p = diagonal_path; p != NULL; p = List_next(p)) {
diagonal = (Univdiag_T) List_head(p);
- debug13(printf("Diagonal %d..%d at %u\n",diagonal->querystart,diagonal->queryend,diagonal->univdiagonal));
+ debug13(printf("Diagonal %d..%d at %u [%u]\n",
+ diagonal->querystart,diagonal->queryend,diagonal->univdiagonal,diagonal->univdiagonal - chroffset));
querypos = querylength - 1 - diagonal->queryend;
genomepos = chrhigh - (diagonal->univdiagonal + diagonal->queryend);
while (querypos <= querylength - 1 - diagonal->querystart) {
@@ -6680,7 +6693,8 @@ run_gmap_minus (List_T gmap, List_T middle_path, List_T start_paths, List_T end_
printf("MIDDLE DIAGONALS, MINUS\n");
for (p = middle_path; p != NULL; p = List_next(p)) {
diagonal = (Univdiag_T) List_head(p);
- printf("Diagonal %d..%d at %u\n",diagonal->querystart,diagonal->queryend,diagonal->univdiagonal);
+ printf("Diagonal %d..%d at %u [%u]\n",
+ diagonal->querystart,diagonal->queryend,diagonal->univdiagonal,diagonal->univdiagonal - chroffset);
}
#endif
@@ -6976,7 +6990,7 @@ solve_via_segments (int *found_score, bool *completep, List_T hits, List_T middl
printf("Original diagonals:\n");
for (p = middle_path; p != NULL; p = List_next(p)) {
diagonal = (Univdiag_T) List_head(p);
- printf("%d..%d at %u\n",diagonal->querystart,diagonal->queryend,diagonal->univdiagonal);
+ printf("%d..%d at %u [%u]\n",diagonal->querystart,diagonal->queryend,diagonal->univdiagonal,diagonal->univdiagonal - chroffset);
}
printf("\n");
#endif
@@ -7040,7 +7054,9 @@ solve_via_segments (int *found_score, bool *completep, List_T hits, List_T middl
printf("Super diagonals on chrnum %d:\n",chrnum);
for (p = super_path; p != NULL; p = List_next(p)) {
diagonal = (Univdiag_T) List_head(p);
- printf("%d..%d at %u with %d mismatches\n",diagonal->querystart,diagonal->queryend,diagonal->univdiagonal,diagonal->intscore);
+ printf("%d..%d at %u [%u] with %d mismatches\n",
+ diagonal->querystart,diagonal->queryend,diagonal->univdiagonal,
+ diagonal->univdiagonal - chroffset,diagonal->intscore);
}
printf("\n");
#endif
@@ -7052,7 +7068,8 @@ solve_via_segments (int *found_score, bool *completep, List_T hits, List_T middl
prev_diagonal = (Univdiag_T) List_head(p);
prev_left = prev_diagonal->univdiagonal;
- debug13(printf("left %u for diagonal %d..%d\n",prev_left,prev_diagonal->querystart,prev_diagonal->queryend));
+ debug13(printf("Diagonal %d..%d for left %u [%u]\n",
+ prev_diagonal->querystart,prev_diagonal->queryend,prev_left,prev_left - chroffset));
sense_endpoints = Intlist_push(NULL,prev_diagonal->querystart);
antisense_endpoints = Intlist_push(NULL,prev_diagonal->querystart);
@@ -7068,8 +7085,8 @@ solve_via_segments (int *found_score, bool *completep, List_T hits, List_T middl
left = diagonal->univdiagonal;
assert(left != prev_left); /* Because we already handled mismatches above */
- debug13(printf("Diagonal %d..%d at leftpos %u, diff %d\n",
- diagonal->querystart,diagonal->queryend,left,left - prev_left));
+ debug13(printf("Diagonal %d..%d at leftpos %u [%u], diff %d\n",
+ diagonal->querystart,diagonal->queryend,left,left - chroffset,left - prev_left));
if (left < prev_left) {
/* Insertion */
diff --git a/src/splice.c b/src/splice.c
index 4752c84..8b9b15e 100644
--- a/src/splice.c
+++ b/src/splice.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: splice.c 186965 2016-04-04 20:29:04Z twu $";
+static char rcsid[] = "$Id: splice.c 188752 2016-05-01 17:28:22Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -384,7 +384,7 @@ Splice_resolve_sense (int *best_knowni_i, int *best_knowni_j,
debug1(printf("Returning %d with probi %f and probj %f\n",best_splice_pos,*best_prob_i,*best_prob_j));
debug1(printf("nmismatches %d and %d\n",*best_nmismatches_i,*best_nmismatches_j));
return best_splice_pos;
- } else if (*best_prob_i > 0.80 && *best_prob_j > 0.85) {
+ } else if (*best_prob_i > 0.40 && *best_prob_j > 0.40) {
debug1(printf("Returning %d with probi %f and probj %f\n",best_splice_pos,*best_prob_i,*best_prob_j));
debug1(printf("nmismatches %d and %d\n",*best_nmismatches_i,*best_nmismatches_j));
return best_splice_pos;
@@ -691,7 +691,7 @@ Splice_resolve_antisense (int *best_knowni_i, int *best_knowni_j,
debug1(printf("Returning %d with probi %f and probj %f\n",best_splice_pos,*best_prob_i,*best_prob_j));
debug1(printf("nmismatches %d and %d\n",*best_nmismatches_i,*best_nmismatches_j));
return best_splice_pos;
- } else if (*best_prob_i > 0.80 && *best_prob_j > 0.85) {
+ } else if (*best_prob_i > 0.40 && *best_prob_j > 0.40) {
debug1(printf("Returning %d with probi %f and probj %f\n",best_splice_pos,*best_prob_i,*best_prob_j));
debug1(printf("nmismatches %d and %d\n",*best_nmismatches_i,*best_nmismatches_j));
return best_splice_pos;
diff --git a/src/stage1hr.c b/src/stage1hr.c
index bb016fd..33fe37a 100644
--- a/src/stage1hr.c
+++ b/src/stage1hr.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: stage1hr.c 186091 2016-03-17 22:23:16Z twu $";
+static char rcsid[] = "$Id: stage1hr.c 188752 2016-05-01 17:28:22Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -97,6 +97,8 @@ static char rcsid[] = "$Id: stage1hr.c 186091 2016-03-17 22:23:16Z twu $";
#define MAX_NTERMINALS 100
#define MAX_ALLOCATION 200
+#define PAIRMAX_ADDITIONAL 10000 /* Allows for finding of unpaired GMAP alignments beyond pairmax */
+
static bool use_sarray_p = true;
static bool use_only_sarray_p = true;
@@ -108,6 +110,7 @@ static int maxpaths_search;
/* For spliceable (really "joinable", if we consider indels) */
static Chrpos_T overall_max_distance;
+static Chrpos_T overall_max_distance_novelend;
/* Other distances */
static int min_indel_end_matches;
@@ -115,7 +118,6 @@ static int max_middle_insertions_default; /* If negative, then compute queryleng
static int max_middle_deletions;
static Chrpos_T shortsplicedist;
static Chrpos_T shortsplicedist_known;
-static Chrpos_T shortsplicedist_novelend;
/* Penalties */
@@ -13235,18 +13237,23 @@ run_gmap_for_region (bool *good_start_p, bool *good_end_p, History_T gmap_histor
Stage2_T stage2;
Univinterval_T interval;
- List_T pairs;
- struct Pair_T *pairarray;
+ List_T pairs1, pairs2;
+ struct Pair_T *pairarray1, *pairarray2;
Univcoord_T start, end;
- double min_splice_prob;
- int goodness;
- int npairs, nsegments, nmismatches_whole, nindels, nintrons, nindelbreaks;
+ double min_splice_prob_1, min_splice_prob_2;
+ int goodness1, goodness2;
+ int npairs1, npairs2, nsegments, nmismatches_whole, nindels, nintrons, nindelbreaks;
int cdna_direction, sensedir;
- int matches, unknowns, mismatches, qopens, qindels, topens, tindels;
- int nmatches_posttrim, max_match_length, ambig_end_length_5, ambig_end_length_3;
- Splicetype_T ambig_splicetype_5, ambig_splicetype_3;
- double ambig_prob_5, ambig_prob_3;
- int ncanonical, nsemicanonical, nnoncanonical;
+ int matches1, unknowns1, mismatches1, qopens1, qindels1, topens1, tindels1,
+ ncanonical1, nsemicanonical1, nnoncanonical1;
+ int matches2, unknowns2, mismatches2, qopens2, qindels2, topens2, tindels2,
+ ncanonical2, nsemicanonical2, nnoncanonical2;
+ int nmatches_posttrim_1, max_match_length_1, ambig_end_length_5_1, ambig_end_length_3_1;
+ int nmatches_posttrim_2, max_match_length_2, ambig_end_length_5_2, ambig_end_length_3_2;
+ Splicetype_T ambig_splicetype_5_1, ambig_splicetype_3_1;
+ Splicetype_T ambig_splicetype_5_2, ambig_splicetype_3_2;
+ double ambig_prob_5_1, ambig_prob_3_1;
+ double ambig_prob_5_2, ambig_prob_3_2;
/* int maxintronlen_bound; */
@@ -13309,6 +13316,7 @@ run_gmap_for_region (bool *good_start_p, bool *good_end_p, History_T gmap_histor
}
#else
/* maxintronlen_bound = shortsplicedist; */
+ /* maxintronlen_bound = overall_max_distance; */ /* was shortsplicedist, but misses microinversions */
#endif
@@ -13333,140 +13341,261 @@ run_gmap_for_region (bool *good_start_p, bool *good_end_p, History_T gmap_histor
for (p = all_stage2results; p != NULL; p = List_next(p)) {
stage2 = (Stage2_T) List_head(p);
- if ((pairarray = Stage3_compute(&pairs,&npairs,&goodness,&cdna_direction,&sensedir,
- &matches,&nmatches_posttrim,&max_match_length,
- &ambig_end_length_5,&ambig_end_length_3,
- &ambig_splicetype_5,&ambig_splicetype_3,
- &ambig_prob_5,&ambig_prob_3,
- &unknowns,&mismatches,&qopens,&qindels,&topens,&tindels,
- &ncanonical,&nsemicanonical,&nnoncanonical,&min_splice_prob,
- Stage2_middle(stage2),Stage2_all_starts(stage2),Stage2_all_ends(stage2),
+ if ((pairarray1 = Stage3_compute(&cdna_direction,&sensedir,&pairs1,&npairs1,&goodness1,
+ &matches1,&nmatches_posttrim_1,&max_match_length_1,
+ &ambig_end_length_5_1,&ambig_end_length_3_1,
+ &ambig_splicetype_5_1,&ambig_splicetype_3_1,
+ &ambig_prob_5_1,&ambig_prob_3_1,
+ &unknowns1,&mismatches1,&qopens1,&qindels1,&topens1,&tindels1,
+ &ncanonical1,&nsemicanonical1,&nnoncanonical1,&min_splice_prob_1,
+
+ &pairarray2,&pairs2,&npairs2,&goodness2,
+ &matches2,&nmatches_posttrim_2,&max_match_length_2,
+ &ambig_end_length_5_2,&ambig_end_length_3_2,
+ &ambig_splicetype_5_2,&ambig_splicetype_3_2,
+ &ambig_prob_5_2,&ambig_prob_3_2,
+ &unknowns2,&mismatches2,&qopens2,&qindels2,&topens2,&tindels2,
+ &ncanonical2,&nsemicanonical2,&nnoncanonical2,&min_splice_prob_2,
+
+ Stage2_middle(stage2),Stage2_all_starts(stage2),Stage2_all_ends(stage2),
#ifdef END_KNOWNSPLICING_SHORTCUT
- cutoff_level,/*queryptr*/watsonp ? queryuc_ptr : queryrc,
- watsonp ? query_compress_fwd : query_compress_rev,
+ cutoff_level,/*queryptr*/watsonp ? queryuc_ptr : queryrc,
+ watsonp ? query_compress_fwd : query_compress_rev,
#endif
- /*queryseq_ptr*/queryuc_ptr,queryuc_ptr,querylength,/*skiplength*/0,
+ /*queryseq_ptr*/queryuc_ptr,queryuc_ptr,querylength,/*skiplength*/0,
#ifdef EXTRACT_GENOMICSEG
- /*query_subseq_offset*/0,
+ /*query_subseq_offset*/0,
#else
- /*query_subseq_offset*/0,
-#endif
- chrnum,chroffset,chrhigh,
- knownsplice_limit_low,knownsplice_limit_high,watsonp,genestrand,
- /*jump_late_p*/watsonp ? false : true,
-
- maxpeelback,pairpool,dynprogL,dynprogM,dynprogR,
- sense_try,/*sense_filter*/0,
- oligoindices_minor,diagpool,cellpool)) == NULL) {
+ /*query_subseq_offset*/0,
+#endif
+ chrnum,chroffset,chrhigh,
+ knownsplice_limit_low,knownsplice_limit_high,watsonp,genestrand,
+ /*jump_late_p*/watsonp ? false : true,
+
+ maxpeelback,pairpool,dynprogL,dynprogM,dynprogR,
+ sense_try,/*sense_filter*/0,
+ oligoindices_minor,diagpool,cellpool)) == NULL) {
debug13(printf("stage3 is NULL\n"));
stored_hits = List_push(stored_hits,(void *) NULL);
- } else {
- debug13(printf("stage3 is not NULL\n"));
+ } else if (cdna_direction == 0) {
+ debug13(printf("stage3 is not NULL, and cdna direction not determined\n"));
+ debug13a(Pair_dump_array(pairarray1,npairs1,true));
- debug13a(Pair_dump_array(pairarray,npairs,true));
+ nsegments = Pair_gsnap_nsegments(&nmismatches_whole,&nindels,&nintrons,&nindelbreaks,
+ pairarray1,npairs1);
+ if (watsonp == true) {
+ start = subtract_bounded(chroffset + Pair_genomepos(&(pairarray1[0])),
+ /*minusterm*/Pair_querypos(&(pairarray1[0])),chroffset);
+ end = add_bounded(chroffset + Pair_genomepos(&(pairarray1[npairs1-1])),
+ /*plusterm*/querylength - 1 - Pair_querypos(&(pairarray1[npairs1-1])),chrhigh);
+ if ((hit = Stage3end_new_gmap(nmismatches_whole,nmatches_posttrim_1,max_match_length_1,
+ ambig_end_length_5_1,ambig_end_length_3_1,
+ ambig_splicetype_5_1,ambig_splicetype_3_1,
+ min_splice_prob_1,
+ pairarray1,npairs1,nsegments,nintrons,nindelbreaks,
+ /*left*/start,/*genomiclength*/end - start + 1,
+ /*plusp*/watsonp,genestrand,
+ accession,querylength,chrnum,chroffset,chrhigh,chrlength,
+ /*cdna_direction*/+1,/*sensedir*/SENSE_FORWARD,
+ /*gmap_source*/GMAP_VIA_REGION)) == NULL) {
+ debug13(printf("Stage3end_new_gmap returns NULL\n"));
+ stored_hits = List_push(stored_hits,(void *) NULL);
+ FREE_OUT(pairarray1);
- if (0 && Stage3_short_alignment_p(pairarray,npairs,querylength) == true) {
- /* Very bad alignment */
- debug13(printf("Very bad alignment\n"));
- stored_hits = List_push(stored_hits,(void *) NULL);
- FREE_OUT(pairarray);
+ } else {
+ if (Stage3end_trim_left(hit) < GOOD_GMAP_END) {
+ *good_start_p = true;
+ }
+ if (Stage3end_trim_right(hit) < GOOD_GMAP_END) {
+ *good_end_p = true;
+ }
+ debug13(printf("Trim at start: %d, trim at end: %d\n",
+ Stage3end_trim_left(hit),Stage3end_trim_right(hit)));
+ stored_hits = List_push(stored_hits,(void *) Stage3end_copy(hit));
+ hits = List_push(hits,(void *) hit);
+ }
} else {
- nsegments = Pair_gsnap_nsegments(&nmismatches_whole,&nindels,&nintrons,&nindelbreaks,
- pairarray,npairs);
- if (watsonp == true) {
- start = subtract_bounded(chroffset + Pair_genomepos(&(pairarray[0])),
- /*minusterm*/Pair_querypos(&(pairarray[0])),chroffset);
- end = add_bounded(chroffset + Pair_genomepos(&(pairarray[npairs-1])),
- /*plusterm*/querylength - 1 - Pair_querypos(&(pairarray[npairs-1])),chrhigh);
- if ((hit = Stage3end_new_gmap(nmismatches_whole,nmatches_posttrim,max_match_length,
- ambig_end_length_5,ambig_end_length_3,
- ambig_splicetype_5,ambig_splicetype_3,
- min_splice_prob,
- pairarray,npairs,nsegments,nintrons,nindelbreaks,
- /*left*/start,/*genomiclength*/end - start + 1,
- /*plusp*/watsonp,genestrand,
- accession,querylength,chrnum,chroffset,chrhigh,chrlength,
- cdna_direction,sensedir,/*gmap_source*/GMAP_VIA_REGION)) == NULL) {
- debug13(printf("Stage3end_new_gmap returns NULL\n"));
- stored_hits = List_push(stored_hits,(void *) NULL);
- FREE_OUT(pairarray);
+ start = add_bounded(chroffset + Pair_genomepos(&(pairarray1[0])),
+ /*plusterm*/Pair_querypos(&(pairarray1[0])),chrhigh);
+ end = subtract_bounded(chroffset + Pair_genomepos(&(pairarray1[npairs1-1])),
+ /*minusterm*/querylength - 1 - Pair_querypos(&(pairarray1[npairs1-1])),chroffset);
+ if ((hit = Stage3end_new_gmap(nmismatches_whole,nmatches_posttrim_1,max_match_length_1,
+ ambig_end_length_5_1,ambig_end_length_3_1,
+ ambig_splicetype_5_1,ambig_splicetype_3_1,
+ min_splice_prob_1,
+ pairarray1,npairs1,nsegments,nintrons,nindelbreaks,
+ /*left*/end,/*genomiclength*/start - end + 1,
+ /*plusp*/watsonp,genestrand,
+ accession,querylength,chrnum,chroffset,chrhigh,chrlength,
+ /*cdna_direction*/+1,/*sensedir*/SENSE_FORWARD,
+ /*gmap_source*/GMAP_VIA_REGION)) == NULL) {
+ debug13(printf("Stage3end_new_gmap returns NULL\n"));
+ stored_hits = List_push(stored_hits,(void *) NULL);
+ FREE_OUT(pairarray1);
-#if 0
- } else if (Stage3end_bad_stretch_p(hit,query_compress_fwd,query_compress_rev) == true) {
- debug13(printf("Stage3end_new_gmap has a bad stretch\n"));
- Stage3end_free(&hit);
- stored_hits = List_push(stored_hits,(void *) NULL);
- /* FREE_OUT(pairarray); */
-#endif
-
- } else {
- if (Stage3end_trim_left(hit) < GOOD_GMAP_END) {
- *good_start_p = true;
- }
- if (Stage3end_trim_right(hit) < GOOD_GMAP_END) {
- *good_end_p = true;
- }
- debug13(printf("Trim at start: %d, trim at end: %d\n",
- Stage3end_trim_left(hit),Stage3end_trim_right(hit)));
- /* Don't throw away GMAP hits */
- if (0 && (Stage3end_trim_left_raw(hit) >= GOOD_GMAP_END || Stage3end_trim_right_raw(hit) >= GOOD_GMAP_END)) {
- stored_hits = List_push(stored_hits,(void *) NULL);
- Stage3end_free(&hit);
- } else {
- stored_hits = List_push(stored_hits,(void *) Stage3end_copy(hit));
- hits = List_push(hits,(void *) hit);
- }
+ } else {
+ if (Stage3end_trim_right(hit) < GOOD_GMAP_END) {
+ *good_start_p = true;
+ }
+ if (Stage3end_trim_left(hit) < GOOD_GMAP_END) {
+ *good_end_p = true;
}
+ debug13(printf("Trim at start: %d, trim at end: %d (raw %d and %d)\n",
+ Stage3end_trim_right(hit),Stage3end_trim_left(hit),
+ Stage3end_trim_right_raw(hit),Stage3end_trim_left_raw(hit)));
+ /* Don't throw away GMAP hits */
+ stored_hits = List_push(stored_hits,(void *) Stage3end_copy(hit));
+ hits = List_push(hits,(void *) hit);
+ }
+ }
+ /* Don't free pairarray1 */
+
+ debug13a(Pair_dump_array(pairarray2,npairs2,true));
+
+ nsegments = Pair_gsnap_nsegments(&nmismatches_whole,&nindels,&nintrons,&nindelbreaks,
+ pairarray2,npairs2);
+ if (watsonp == true) {
+ start = subtract_bounded(chroffset + Pair_genomepos(&(pairarray2[0])),
+ /*minusterm*/Pair_querypos(&(pairarray2[0])),chroffset);
+ end = add_bounded(chroffset + Pair_genomepos(&(pairarray2[npairs2-1])),
+ /*plusterm*/querylength - 1 - Pair_querypos(&(pairarray2[npairs2-1])),chrhigh);
+ if ((hit = Stage3end_new_gmap(nmismatches_whole,nmatches_posttrim_2,max_match_length_2,
+ ambig_end_length_5_2,ambig_end_length_3_2,
+ ambig_splicetype_5_2,ambig_splicetype_3_2,
+ min_splice_prob_2,
+ pairarray2,npairs2,nsegments,nintrons,nindelbreaks,
+ /*left*/start,/*genomiclength*/end - start + 1,
+ /*plusp*/watsonp,genestrand,
+ accession,querylength,chrnum,chroffset,chrhigh,chrlength,
+ /*cdna_direction*/-1,/*sensedir*/SENSE_ANTI,
+ /*gmap_source*/GMAP_VIA_REGION)) == NULL) {
+ debug13(printf("Stage3end_new_gmap returns NULL\n"));
+ stored_hits = List_push(stored_hits,(void *) NULL);
+ FREE_OUT(pairarray2);
+
} else {
- start = add_bounded(chroffset + Pair_genomepos(&(pairarray[0])),
- /*plusterm*/Pair_querypos(&(pairarray[0])),chrhigh);
- end = subtract_bounded(chroffset + Pair_genomepos(&(pairarray[npairs-1])),
- /*minusterm*/querylength - 1 - Pair_querypos(&(pairarray[npairs-1])),chroffset);
- if ((hit = Stage3end_new_gmap(nmismatches_whole,nmatches_posttrim,max_match_length,
- ambig_end_length_5,ambig_end_length_3,
- ambig_splicetype_5,ambig_splicetype_3,
- min_splice_prob,
- pairarray,npairs,nsegments,nintrons,nindelbreaks,
- /*left*/end,/*genomiclength*/start - end + 1,
- /*plusp*/watsonp,genestrand,
- accession,querylength,chrnum,chroffset,chrhigh,chrlength,
- cdna_direction,sensedir,/*gmap_source*/GMAP_VIA_REGION)) == NULL) {
- debug13(printf("Stage3end_new_gmap returns NULL\n"));
- stored_hits = List_push(stored_hits,(void *) NULL);
- FREE_OUT(pairarray);
+ if (Stage3end_trim_left(hit) < GOOD_GMAP_END) {
+ *good_start_p = true;
+ }
+ if (Stage3end_trim_right(hit) < GOOD_GMAP_END) {
+ *good_end_p = true;
+ }
+ debug13(printf("Trim at start: %d, trim at end: %d\n",
+ Stage3end_trim_left(hit),Stage3end_trim_right(hit)));
+ stored_hits = List_push(stored_hits,(void *) Stage3end_copy(hit));
+ hits = List_push(hits,(void *) hit);
+ }
-#if 0
- } else if (Stage3end_bad_stretch_p(hit,query_compress_fwd,query_compress_rev) == true) {
- debug13(printf("Stage3end_new_gmap has a bad stretch\n"));
- stored_hits = List_push(stored_hits,(void *) NULL);
- Stage3end_free(&hit);
- /* FREE_OUT(pairarray); */
-#endif
+ } else {
+ start = add_bounded(chroffset + Pair_genomepos(&(pairarray2[0])),
+ /*plusterm*/Pair_querypos(&(pairarray2[0])),chrhigh);
+ end = subtract_bounded(chroffset + Pair_genomepos(&(pairarray2[npairs2-1])),
+ /*minusterm*/querylength - 1 - Pair_querypos(&(pairarray2[npairs2-1])),chroffset);
+ if ((hit = Stage3end_new_gmap(nmismatches_whole,nmatches_posttrim_2,max_match_length_2,
+ ambig_end_length_5_2,ambig_end_length_3_2,
+ ambig_splicetype_5_2,ambig_splicetype_3_2,
+ min_splice_prob_2,
+ pairarray2,npairs2,nsegments,nintrons,nindelbreaks,
+ /*left*/end,/*genomiclength*/start - end + 1,
+ /*plusp*/watsonp,genestrand,
+ accession,querylength,chrnum,chroffset,chrhigh,chrlength,
+ /*cdna_direction*/-1,/*sensedir*/SENSE_ANTI,
+ /*gmap_source*/GMAP_VIA_REGION)) == NULL) {
+ debug13(printf("Stage3end_new_gmap returns NULL\n"));
+ stored_hits = List_push(stored_hits,(void *) NULL);
+ FREE_OUT(pairarray2);
- } else {
- if (Stage3end_trim_right(hit) < GOOD_GMAP_END) {
- *good_start_p = true;
- }
- if (Stage3end_trim_left(hit) < GOOD_GMAP_END) {
- *good_end_p = true;
- }
- debug13(printf("Trim at start: %d, trim at end: %d (raw %d and %d)\n",
- Stage3end_trim_right(hit),Stage3end_trim_left(hit),
- Stage3end_trim_right_raw(hit),Stage3end_trim_left_raw(hit)));
- /* Don't throw away GMAP hits */
- if (0 && (Stage3end_trim_left_raw(hit) >= GOOD_GMAP_END || Stage3end_trim_right_raw(hit) >= GOOD_GMAP_END)) {
- stored_hits = List_push(stored_hits,(void *) NULL);
- Stage3end_free(&hit);
- } else {
- stored_hits = List_push(stored_hits,(void *) Stage3end_copy(hit));
- hits = List_push(hits,(void *) hit);
- }
+ } else {
+ if (Stage3end_trim_right(hit) < GOOD_GMAP_END) {
+ *good_start_p = true;
+ }
+ if (Stage3end_trim_left(hit) < GOOD_GMAP_END) {
+ *good_end_p = true;
+ }
+ debug13(printf("Trim at start: %d, trim at end: %d (raw %d and %d)\n",
+ Stage3end_trim_right(hit),Stage3end_trim_left(hit),
+ Stage3end_trim_right_raw(hit),Stage3end_trim_left_raw(hit)));
+ /* Don't throw away GMAP hits */
+ stored_hits = List_push(stored_hits,(void *) Stage3end_copy(hit));
+ hits = List_push(hits,(void *) hit);
+ }
+ }
+ /* Don't free pairarray2 */
+
+ } else {
+ debug13(printf("stage3 is not NULL, and cdna direction is determined to be %d\n",cdna_direction));
+ debug13a(Pair_dump_array(pairarray1,npairs1,true));
+
+ nsegments = Pair_gsnap_nsegments(&nmismatches_whole,&nindels,&nintrons,&nindelbreaks,
+ pairarray1,npairs1);
+ if (watsonp == true) {
+ start = subtract_bounded(chroffset + Pair_genomepos(&(pairarray1[0])),
+ /*minusterm*/Pair_querypos(&(pairarray1[0])),chroffset);
+ end = add_bounded(chroffset + Pair_genomepos(&(pairarray1[npairs1-1])),
+ /*plusterm*/querylength - 1 - Pair_querypos(&(pairarray1[npairs1-1])),chrhigh);
+ if ((hit = Stage3end_new_gmap(nmismatches_whole,nmatches_posttrim_1,max_match_length_1,
+ ambig_end_length_5_1,ambig_end_length_3_1,
+ ambig_splicetype_5_1,ambig_splicetype_3_1,
+ min_splice_prob_1,
+ pairarray1,npairs1,nsegments,nintrons,nindelbreaks,
+ /*left*/start,/*genomiclength*/end - start + 1,
+ /*plusp*/watsonp,genestrand,
+ accession,querylength,chrnum,chroffset,chrhigh,chrlength,
+ cdna_direction,sensedir,/*gmap_source*/GMAP_VIA_REGION)) == NULL) {
+ debug13(printf("Stage3end_new_gmap returns NULL\n"));
+ stored_hits = List_push(stored_hits,(void *) NULL);
+ FREE_OUT(pairarray1);
+
+ } else {
+ if (Stage3end_trim_left(hit) < GOOD_GMAP_END) {
+ *good_start_p = true;
+ }
+ if (Stage3end_trim_right(hit) < GOOD_GMAP_END) {
+ *good_end_p = true;
+ }
+ debug13(printf("Trim at start: %d, trim at end: %d\n",
+ Stage3end_trim_left(hit),Stage3end_trim_right(hit)));
+ stored_hits = List_push(stored_hits,(void *) Stage3end_copy(hit));
+ hits = List_push(hits,(void *) hit);
+ }
+
+ } else {
+ start = add_bounded(chroffset + Pair_genomepos(&(pairarray1[0])),
+ /*plusterm*/Pair_querypos(&(pairarray1[0])),chrhigh);
+ end = subtract_bounded(chroffset + Pair_genomepos(&(pairarray1[npairs1-1])),
+ /*minusterm*/querylength - 1 - Pair_querypos(&(pairarray1[npairs1-1])),chroffset);
+ if ((hit = Stage3end_new_gmap(nmismatches_whole,nmatches_posttrim_1,max_match_length_1,
+ ambig_end_length_5_1,ambig_end_length_3_1,
+ ambig_splicetype_5_1,ambig_splicetype_3_1,
+ min_splice_prob_1,
+ pairarray1,npairs1,nsegments,nintrons,nindelbreaks,
+ /*left*/end,/*genomiclength*/start - end + 1,
+ /*plusp*/watsonp,genestrand,
+ accession,querylength,chrnum,chroffset,chrhigh,chrlength,
+ cdna_direction,sensedir,/*gmap_source*/GMAP_VIA_REGION)) == NULL) {
+ debug13(printf("Stage3end_new_gmap returns NULL\n"));
+ stored_hits = List_push(stored_hits,(void *) NULL);
+ FREE_OUT(pairarray1);
+
+ } else {
+ if (Stage3end_trim_right(hit) < GOOD_GMAP_END) {
+ *good_start_p = true;
+ }
+ if (Stage3end_trim_left(hit) < GOOD_GMAP_END) {
+ *good_end_p = true;
}
+ debug13(printf("Trim at start: %d, trim at end: %d (raw %d and %d)\n",
+ Stage3end_trim_right(hit),Stage3end_trim_left(hit),
+ Stage3end_trim_right_raw(hit),Stage3end_trim_left_raw(hit)));
+ /* Don't throw away GMAP hits */
+ stored_hits = List_push(stored_hits,(void *) Stage3end_copy(hit));
+ hits = List_push(hits,(void *) hit);
}
- /* Don't free pairarray */
}
+ /* Don't free pairarray1 */
+
}
Stage2_free(&stage2);
@@ -13489,8 +13618,9 @@ run_gmap_for_region (bool *good_start_p, bool *good_end_p, History_T gmap_histor
}
-static Stage3end_T
-align_single_hit_with_gmap (Stage3end_T hit, char *queryuc_ptr, int querylength,
+static void
+align_single_hit_with_gmap (Stage3end_T *result1, Stage3end_T *result2, Stage3end_T hit,
+ char *queryuc_ptr, int querylength,
#ifdef END_KNOWNSPLICING_SHORTCUT
char *queryrc, bool invertedp,
#endif
@@ -13501,32 +13631,35 @@ align_single_hit_with_gmap (Stage3end_T hit, char *queryuc_ptr, int querylength,
/* Both events are tested by Stage3end_anomalous_splice_p */
if (Stage3end_chrnum(hit) == 0) {
/* Translocation */
- return (Stage3end_T) NULL;
+ *result1 = *result2 = (Stage3end_T) NULL;
} else if (Stage3end_hittype(hit) == SAMECHR_SPLICE) {
/* A genomic event that doesn't get reflected in chrnum */
- return (Stage3end_T) NULL;
+ *result1 = *result2 = (Stage3end_T) NULL;
} else if (Stage3end_hittype(hit) == GMAP) {
if (Stage3end_plusp(hit) == true) {
- return Stage3end_gmap_run_gmap_plus(hit,queryuc_ptr,querylength,genestrand,
- maxpeelback,pairpool,dynprogL,dynprogM,dynprogR,
- oligoindices_minor,diagpool,cellpool);
+ *result1 = Stage3end_gmap_run_gmap_plus(&(*result2),hit,queryuc_ptr,querylength,genestrand,
+ maxpeelback,pairpool,dynprogL,dynprogM,dynprogR,
+ oligoindices_minor,diagpool,cellpool);
} else {
- return Stage3end_gmap_run_gmap_minus(hit,queryuc_ptr,querylength,genestrand,
- maxpeelback,pairpool,dynprogL,dynprogM,dynprogR,
- oligoindices_minor,diagpool,cellpool);
+ *result1 = Stage3end_gmap_run_gmap_minus(&(*result2),hit,queryuc_ptr,querylength,genestrand,
+ maxpeelback,pairpool,dynprogL,dynprogM,dynprogR,
+ oligoindices_minor,diagpool,cellpool);
}
} else if (Stage3end_plusp(hit) == true) {
- return Stage3end_substrings_run_gmap_plus(hit,queryuc_ptr,querylength,genestrand,
- maxpeelback,pairpool,dynprogL,dynprogM,dynprogR,
- oligoindices_minor,diagpool,cellpool,extend_ends_p);
+ debug13(printf("Running Stage3end_substrings_run_gmap_plus\n"));
+ *result1 = Stage3end_substrings_run_gmap_plus(&(*result2),hit,queryuc_ptr,querylength,genestrand,
+ maxpeelback,pairpool,dynprogL,dynprogM,dynprogR,
+ oligoindices_minor,diagpool,cellpool,extend_ends_p);
} else {
- return Stage3end_substrings_run_gmap_minus(hit,queryuc_ptr,querylength,genestrand,
- maxpeelback,pairpool,dynprogL,dynprogM,dynprogR,
- oligoindices_minor,diagpool,cellpool,extend_ends_p);
+ debug13(printf("Running Stage3end_substrings_run_gmap_minus\n"));
+ *result1 = Stage3end_substrings_run_gmap_minus(&(*result2),hit,queryuc_ptr,querylength,genestrand,
+ maxpeelback,pairpool,dynprogL,dynprogM,dynprogR,
+ oligoindices_minor,diagpool,cellpool,extend_ends_p);
}
+ return;
}
@@ -13613,8 +13746,8 @@ convert_plus_segments_to_gmap_via_region (History_T gmap_history, List_T hits,
orighigh = left + (querylength - anchor_segment->querypos3);
/* extend left */
- knownsplice_limit_low = subtract_bounded(origlow,shortsplicedist,chroffset);
- mappingstart = segmentstart = subtract_bounded(origlow,shortsplicedist,chroffset);
+ knownsplice_limit_low = subtract_bounded(origlow,overall_max_distance,chroffset);
+ mappingstart = segmentstart = subtract_bounded(origlow,overall_max_distance,chroffset);
debug13(printf("Original bounds A: knownsplice_limit_low %u, mappingstart %u\n",
knownsplice_limit_low - chroffset,mappingstart - chroffset));
@@ -13918,8 +14051,8 @@ convert_minus_segments_to_gmap_via_region (History_T gmap_history, List_T hits,
knownsplice_limit_low - chroffset,mappingstart - chroffset));
/* extend left */
- knownsplice_limit_high = add_bounded(orighigh,shortsplicedist,chrhigh);
- mappingend = segmentend = add_bounded(orighigh,shortsplicedist,chrhigh);
+ knownsplice_limit_high = add_bounded(orighigh,overall_max_distance,chrhigh);
+ mappingend = segmentend = add_bounded(orighigh,overall_max_distance,chrhigh);
debug13(printf("Original bounds D: knownsplice_limit_high %u, mappingend %u\n",
knownsplice_limit_high - chroffset,mappingend - chroffset));
@@ -14166,24 +14299,31 @@ convert_plus_segments_to_gmap (List_T hits, char *queryuc_ptr, int querylength,
Stage3end_T hit;
Pair_T *array;
- struct Pair_T *pairarray;
- List_T pairs, stage2pairs, unsorted_pairs;
+ struct Pair_T *pairarray1, *pairarray2;
+ List_T pairs1, pairs2, stage2pairs, unsorted_pairs;
int querypos, boundpos, seglength;
Chrpos_T genomepos, min_genomepos, max_genomepos;
- char comp, c, g, g_alt;
+ char c, g, g_alt;
char *gsequence_orig, *gsequence_alt;
Segment_T *sorted, *sorted_allocated;
int *scores, *scores_allocated, best_score, score;
int *prev_left, *prev_right, *prev_allocated, besti;
- int sensedir;
- int npairs, goodness, cdna_direction, matches, nmatches_posttrim,
- max_match_length, ambig_end_length_5, ambig_end_length_3,
- unknowns, mismatches, qopens, qindels, topens, tindels,
- ncanonical, nsemicanonical, nnoncanonical;
- double ambig_prob_5, ambig_prob_3, min_splice_prob;
- Splicetype_T ambig_splicetype_5, ambig_splicetype_3;
+ int npairs;
+ int cdna_direction, sensedir;
+ int npairs1, goodness1, matches1, nmatches_posttrim_1,
+ max_match_length_1, ambig_end_length_5_1, ambig_end_length_3_1,
+ unknowns1, mismatches1, qopens1, qindels1, topens1, tindels1,
+ ncanonical1, nsemicanonical1, nnoncanonical1;
+ int npairs2, goodness2, matches2, nmatches_posttrim_2,
+ max_match_length_2, ambig_end_length_5_2, ambig_end_length_3_2,
+ unknowns2, mismatches2, qopens2, qindels2, topens2, tindels2,
+ ncanonical2, nsemicanonical2, nnoncanonical2;
+ double ambig_prob_5_1, ambig_prob_3_1, min_splice_prob_1;
+ double ambig_prob_5_2, ambig_prob_3_2, min_splice_prob_2;
+ Splicetype_T ambig_splicetype_5_1, ambig_splicetype_3_1;
+ Splicetype_T ambig_splicetype_5_2, ambig_splicetype_3_2;
Univcoord_T start, end, left;
int nsegments, nmismatches_whole, nindels, nintrons, nindelbreaks;
@@ -14228,8 +14368,7 @@ convert_plus_segments_to_gmap (List_T hits, char *queryuc_ptr, int querylength,
while (endk < plus_nsegments && plus_segments[endk].diagonal < anchor_segment->diagonal + shortsplicedist) {
endk++;
}
- debug13(printf("%s read: Found plus segments %d to %d inclusive for anchor %d\n",
- first_read_p ? "First" : "Second",startk+1,endk-1,anchork));
+ debug13(printf("Found plus segments %d to %d inclusive for anchor %d\n",startk+1,endk-1,anchork));
/* Dynamic programming on left (low) side (querypos5) */
@@ -14381,8 +14520,8 @@ convert_plus_segments_to_gmap (List_T hits, char *queryuc_ptr, int querylength,
}
}
- debug13(printf("%s read: Processing plus segments %d to %d inclusive: novelp %d, pairablep %d\n",
- first_read_p ? "First" : "Second",startk+1,endk-1,novelp,pairablep));
+ debug13(printf("Processing plus segments %d to %d inclusive: novelp %d, pairablep %d\n",
+ startk+1,endk-1,novelp,pairablep));
if (novelp == true && (pairablep == true || require_pairing_p == false)) {
anchor_segment->usedp = true;
chrnum = anchor_segment->chrnum;
@@ -14570,14 +14709,19 @@ convert_plus_segments_to_gmap (List_T hits, char *queryuc_ptr, int querylength,
g = gsequence_orig[i];
g_alt = gsequence_alt[i];
if (g == c || g_alt == c) {
- comp = MATCH_COMP;
+ debug13(printf("Pushing %c %c %c at %d,%u\n",c,/*comp*/MATCH_COMP,g,querypos,genomepos));
+ unsorted_pairs = Pairpool_push(unsorted_pairs,pairpool,querypos,genomepos,
+ /*cdna*/c,/*comp*/MATCH_COMP,/*genome*/g,/*genomealt*/g_alt,
+ /*dynprogindex*/0);
+#if 0
} else {
- comp = MISMATCH_COMP;
+ /* Let stage 3 handle mismatches */
+ debug13(printf("Pushing %c %c %c at %d,%u\n",c,/*comp*/MISMATCH_COMP,g,querypos,genomepos));
+ unsorted_pairs = Pairpool_push(unsorted_pairs,pairpool,querypos,genomepos,
+ /*cdna*/c,/*comp*/MISMATCH_COMP,/*genome*/g,/*genomealt*/g_alt,
+ /*dynprogindex*/0);
+#endif
}
- debug13(printf("Pushing %c %c %c at %d,%u\n",c,comp,g,querypos,genomepos));
- unsorted_pairs = Pairpool_push(unsorted_pairs,pairpool,querypos,genomepos,
- /*cdna*/c,comp,/*genome*/g,/*genomealt*/g_alt,
- /*dynprogindex*/0);
querypos++;
genomepos++;
}
@@ -14607,14 +14751,19 @@ convert_plus_segments_to_gmap (List_T hits, char *queryuc_ptr, int querylength,
g = gsequence_orig[i];
g_alt = gsequence_alt[i];
if (g == c || g_alt == c) {
- comp = MATCH_COMP;
+ debug13(printf("Pushing %c %c %c at %d,%u\n",c,/*comp*/MATCH_COMP,g,querypos,genomepos));
+ unsorted_pairs = Pairpool_push(unsorted_pairs,pairpool,querypos,genomepos,
+ /*cdna*/c,/*comp*/MATCH_COMP,/*genome*/g,/*genomealt*/g_alt,
+ /*dynprogindex*/0);
+#if 0
} else {
- comp = MISMATCH_COMP;
+ /* Let stage 3 handle mismatches */
+ debug13(printf("Pushing %c %c %c at %d,%u\n",c,/*comp*/MISMATCH_COMP,g,querypos,genomepos));
+ unsorted_pairs = Pairpool_push(unsorted_pairs,pairpool,querypos,genomepos,
+ /*cdna*/c,/*comp*/MISMATCH_COMP,/*genome*/g,/*genomealt*/g_alt,
+ /*dynprogindex*/0);
+#endif
}
- debug13(printf("Pushing %c %c %c at %d,%u\n",c,comp,g,querypos,genomepos));
- unsorted_pairs = Pairpool_push(unsorted_pairs,pairpool,querypos,genomepos,
- /*cdna*/c,comp,/*genome*/g,/*genomealt*/g_alt,
- /*dynprogindex*/0);
querypos++;
genomepos++;
i++;
@@ -14653,14 +14802,19 @@ convert_plus_segments_to_gmap (List_T hits, char *queryuc_ptr, int querylength,
g = gsequence_orig[i];
g_alt = gsequence_alt[i];
if (g == c || g_alt == c) {
- comp = MATCH_COMP;
+ debug13(printf("Pushing %c %c %c at %d,%u\n",c,/*comp*/MATCH_COMP,g,querypos,genomepos));
+ unsorted_pairs = Pairpool_push(unsorted_pairs,pairpool,querypos,genomepos,
+ /*cdna*/c,/*comp*/MATCH_COMP,/*genome*/g,/*genomealt*/g_alt,
+ /*dynprogindex*/0);
+#if 0
} else {
- comp = MISMATCH_COMP;
+ /* Let stage 3 handle mismatches */
+ debug13(printf("Pushing %c %c %c at %d,%u\n",c,/*comp*/MISMATCH_COMP,g,querypos,genomepos));
+ unsorted_pairs = Pairpool_push(unsorted_pairs,pairpool,querypos,genomepos,
+ /*cdna*/c,/*comp*/MISMATCH_COMP,/*genome*/g,/*genomealt*/g_alt,
+ /*dynprogindex*/0);
+#endif
}
- debug13(printf("Pushing %c %c %c at %d,%u\n",c,comp,g,querypos,genomepos));
- unsorted_pairs = Pairpool_push(unsorted_pairs,pairpool,querypos,genomepos,
- /*cdna*/c,comp,/*genome*/g,/*genomealt*/g_alt,
- /*dynprogindex*/0);
querypos++;
genomepos++;
i++;
@@ -14696,50 +14850,106 @@ convert_plus_segments_to_gmap (List_T hits, char *queryuc_ptr, int querylength,
/* Run GMAP */
if (stage2pairs == NULL) {
/* hit = (T) NULL; */
- } else if ((pairarray = Stage3_compute(&pairs,&npairs,&goodness,&cdna_direction,&sensedir,
- &matches,&nmatches_posttrim,&max_match_length,
- &ambig_end_length_5,&ambig_end_length_3,
- &ambig_splicetype_5,&ambig_splicetype_3,
- &ambig_prob_5,&ambig_prob_3,
- &unknowns,&mismatches,&qopens,&qindels,&topens,&tindels,
- &ncanonical,&nsemicanonical,&nnoncanonical,&min_splice_prob,
- stage2pairs,/*all_stage2_starts*/NULL,/*all_stage2_ends*/NULL,
+ } else if ((pairarray1 = Stage3_compute(&cdna_direction,&sensedir,&pairs1,&npairs1,&goodness1,
+ &matches1,&nmatches_posttrim_1,&max_match_length_1,
+ &ambig_end_length_5_1,&ambig_end_length_3_1,
+ &ambig_splicetype_5_1,&ambig_splicetype_3_1,
+ &ambig_prob_5_1,&ambig_prob_3_1,
+ &unknowns1,&mismatches1,&qopens1,&qindels1,&topens1,&tindels1,
+ &ncanonical1,&nsemicanonical1,&nnoncanonical1,&min_splice_prob_1,
+
+ &pairarray2,&pairs2,&npairs2,&goodness2,
+ &matches2,&nmatches_posttrim_2,&max_match_length_2,
+ &ambig_end_length_5_2,&ambig_end_length_3_2,
+ &ambig_splicetype_5_2,&ambig_splicetype_3_2,
+ &ambig_prob_5_2,&ambig_prob_3_2,
+ &unknowns2,&mismatches2,&qopens2,&qindels2,&topens2,&tindels2,
+ &ncanonical2,&nsemicanonical2,&nnoncanonical2,&min_splice_prob_2,
+
+ stage2pairs,/*all_stage2_starts*/NULL,/*all_stage2_ends*/NULL,
#ifdef END_KNOWNSPLICING_SHORTCUT
- cutoff_level,/*queryptr*/watsonp ? queryuc_ptr : queryrc,
- watsonp ? query_compress_fwd : query_compress_rev,
+ cutoff_level,/*queryptr*/watsonp ? queryuc_ptr : queryrc,
+ watsonp ? query_compress_fwd : query_compress_rev,
#endif
- /*queryseq_ptr*/queryuc_ptr,queryuc_ptr,querylength,/*skiplength*/0,
+ /*queryseq_ptr*/queryuc_ptr,queryuc_ptr,querylength,/*skiplength*/0,
#ifdef EXTRACT_GENOMICSEG
- /*query_subseq_offset*/0,
+ /*query_subseq_offset*/0,
#else
- /*query_subseq_offset*/0,
+ /*query_subseq_offset*/0,
#endif
- chrnum,chroffset,chrhigh,
- knownsplice_limit_low,knownsplice_limit_high,/*plusp*/true,genestrand,
- /*jump_late_p*/false,maxpeelback,pairpool,dynprogL,dynprogM,dynprogR,
- /*sense_try*/0,/*sense_filter*/0,
- oligoindices_minor,diagpool,cellpool)) == NULL) {
+ chrnum,chroffset,chrhigh,
+ knownsplice_limit_low,knownsplice_limit_high,/*plusp*/true,genestrand,
+ /*jump_late_p*/false,maxpeelback,pairpool,dynprogL,dynprogM,dynprogR,
+ /*sense_try*/0,/*sense_filter*/0,
+ oligoindices_minor,diagpool,cellpool)) == NULL) {
/* hit = (T) NULL; */
+ } else if (cdna_direction == 0) {
+ nsegments = Pair_gsnap_nsegments(&nmismatches_whole,&nindels,&nintrons,&nindelbreaks,
+ pairarray1,npairs1);
+ start = subtract_bounded(chroffset + Pair_genomepos(&(pairarray1[0])),
+ /*minusterm*/Pair_querypos(&(pairarray1[0])),chroffset);
+ end = add_bounded(chroffset + Pair_genomepos(&(pairarray1[npairs1-1])),
+ /*plusterm*/querylength - 1 - Pair_querypos(&(pairarray1[npairs1-1])),chrhigh);
+
+ if ((hit = Stage3end_new_gmap(nmismatches_whole,nmatches_posttrim_1,max_match_length_1,
+ ambig_end_length_5_1,ambig_end_length_3_1,
+ ambig_splicetype_5_1,ambig_splicetype_3_1,
+ min_splice_prob_1,
+ pairarray1,npairs1,nsegments,nintrons,nindelbreaks,
+ /*left*/start,/*genomiclength*/end - start + 1,
+ /*plusp*/true,genestrand,
+ /*accession*/NULL,querylength,chrnum,chroffset,chrhigh,chrlength,
+ /*cdna_direction*/+1,/*sensedir*/SENSE_FORWARD,
+ /*gmap_source*/GMAP_VIA_SEGMENTS)) == NULL) {
+
+ FREE_OUT(pairarray1);
+ } else {
+ hits = List_push(hits,(void *) hit);
+ }
+
+ nsegments = Pair_gsnap_nsegments(&nmismatches_whole,&nindels,&nintrons,&nindelbreaks,
+ pairarray2,npairs2);
+ start = subtract_bounded(chroffset + Pair_genomepos(&(pairarray2[0])),
+ /*minusterm*/Pair_querypos(&(pairarray2[0])),chroffset);
+ end = add_bounded(chroffset + Pair_genomepos(&(pairarray2[npairs2-1])),
+ /*plusterm*/querylength - 1 - Pair_querypos(&(pairarray2[npairs2-1])),chrhigh);
+
+ if ((hit = Stage3end_new_gmap(nmismatches_whole,nmatches_posttrim_2,max_match_length_2,
+ ambig_end_length_5_2,ambig_end_length_3_2,
+ ambig_splicetype_5_2,ambig_splicetype_3_2,
+ min_splice_prob_2,
+ pairarray2,npairs2,nsegments,nintrons,nindelbreaks,
+ /*left*/start,/*genomiclength*/end - start + 1,
+ /*plusp*/true,genestrand,
+ /*accession*/NULL,querylength,chrnum,chroffset,chrhigh,chrlength,
+ /*cdna_direction*/-1,/*sensedir*/SENSE_ANTI,
+ /*gmap_source*/GMAP_VIA_SEGMENTS)) == NULL) {
+
+ FREE_OUT(pairarray2);
+ } else {
+ hits = List_push(hits,(void *) hit);
+ }
+
} else {
nsegments = Pair_gsnap_nsegments(&nmismatches_whole,&nindels,&nintrons,&nindelbreaks,
- pairarray,npairs);
- start = subtract_bounded(chroffset + Pair_genomepos(&(pairarray[0])),
- /*minusterm*/Pair_querypos(&(pairarray[0])),chroffset);
- end = add_bounded(chroffset + Pair_genomepos(&(pairarray[npairs-1])),
- /*plusterm*/querylength - 1 - Pair_querypos(&(pairarray[npairs-1])),chrhigh);
-
- if ((hit = Stage3end_new_gmap(nmismatches_whole,nmatches_posttrim,max_match_length,
- ambig_end_length_5,ambig_end_length_3,
- ambig_splicetype_5,ambig_splicetype_3,
- min_splice_prob,
- pairarray,npairs,nsegments,nintrons,nindelbreaks,
+ pairarray1,npairs1);
+ start = subtract_bounded(chroffset + Pair_genomepos(&(pairarray1[0])),
+ /*minusterm*/Pair_querypos(&(pairarray1[0])),chroffset);
+ end = add_bounded(chroffset + Pair_genomepos(&(pairarray1[npairs1-1])),
+ /*plusterm*/querylength - 1 - Pair_querypos(&(pairarray1[npairs1-1])),chrhigh);
+
+ if ((hit = Stage3end_new_gmap(nmismatches_whole,nmatches_posttrim_1,max_match_length_1,
+ ambig_end_length_5_1,ambig_end_length_3_1,
+ ambig_splicetype_5_1,ambig_splicetype_3_1,
+ min_splice_prob_1,
+ pairarray1,npairs1,nsegments,nintrons,nindelbreaks,
/*left*/start,/*genomiclength*/end - start + 1,
/*plusp*/true,genestrand,
/*accession*/NULL,querylength,chrnum,chroffset,chrhigh,chrlength,
cdna_direction,sensedir,/*gmap_source*/GMAP_VIA_SEGMENTS)) == NULL) {
- FREE_OUT(pairarray);
+ FREE_OUT(pairarray1);
} else {
hits = List_push(hits,(void *) hit);
}
@@ -14803,24 +15013,31 @@ convert_minus_segments_to_gmap (List_T hits, char *queryuc_ptr, int querylength,
Stage3end_T hit;
Pair_T *array;
- struct Pair_T *pairarray;
- List_T pairs, stage2pairs, unsorted_pairs;
+ struct Pair_T *pairarray1, *pairarray2;
+ List_T pairs1, pairs2, stage2pairs, unsorted_pairs;
int querypos, boundpos, seglength;
Chrpos_T genomepos, min_genomepos, max_genomepos;
- char comp, c, g, g_alt;
+ char c, g, g_alt;
char *gsequence_orig, *gsequence_alt;
Segment_T *sorted, *sorted_allocated;
int *scores, *scores_allocated, best_score, score;
int *prev_left, *prev_right, *prev_allocated, besti;
- int sensedir;
- int npairs, goodness, cdna_direction, matches, nmatches_posttrim,
- max_match_length, ambig_end_length_5, ambig_end_length_3,
- unknowns, mismatches, qopens, qindels, topens, tindels,
- ncanonical, nsemicanonical, nnoncanonical;
- double ambig_prob_5, ambig_prob_3, min_splice_prob;
- Splicetype_T ambig_splicetype_5, ambig_splicetype_3;
+ int npairs;
+ int cdna_direction, sensedir;
+ int npairs1, goodness1, matches1, nmatches_posttrim_1,
+ max_match_length_1, ambig_end_length_5_1, ambig_end_length_3_1,
+ unknowns1, mismatches1, qopens1, qindels1, topens1, tindels1,
+ ncanonical1, nsemicanonical1, nnoncanonical1;
+ int npairs2, goodness2, matches2, nmatches_posttrim_2,
+ max_match_length_2, ambig_end_length_5_2, ambig_end_length_3_2,
+ unknowns2, mismatches2, qopens2, qindels2, topens2, tindels2,
+ ncanonical2, nsemicanonical2, nnoncanonical2;
+ double ambig_prob_5_1, ambig_prob_3_1, min_splice_prob_1;
+ double ambig_prob_5_2, ambig_prob_3_2, min_splice_prob_2;
+ Splicetype_T ambig_splicetype_5_1, ambig_splicetype_3_1;
+ Splicetype_T ambig_splicetype_5_2, ambig_splicetype_3_2;
Univcoord_T start, end, left;
int nsegments, nmismatches_whole, nindels, nintrons, nindelbreaks;
@@ -14865,8 +15082,7 @@ convert_minus_segments_to_gmap (List_T hits, char *queryuc_ptr, int querylength,
while (endk < minus_nsegments && minus_segments[endk].diagonal < anchor_segment->diagonal + shortsplicedist) {
endk++;
}
- debug13(printf("%s read: Found minus segments %d to %d inclusive for anchor %d\n",
- first_read_p ? "First" : "Second",startk+1,endk-1,anchork));
+ debug13(printf("Found minus segments %d to %d inclusive for anchor %d\n",startk+1,endk-1,anchork));
/* Dynamic programming on left (low) side (querypos3) */
@@ -15019,8 +15235,8 @@ convert_minus_segments_to_gmap (List_T hits, char *queryuc_ptr, int querylength,
}
- debug13(printf("%s read: Processing minus segments %d to %d inclusive: novelp %d, pairablep %d\n",
- first_read_p ? "First" : "Second",startk+1,endk-1,novelp,pairablep));
+ debug13(printf("Processing minus segments %d to %d inclusive: novelp %d, pairablep %d\n",
+ startk+1,endk-1,novelp,pairablep));
if (novelp == true && (pairablep == true || require_pairing_p == false)) {
anchor_segment->usedp = true;
chrnum = anchor_segment->chrnum;
@@ -15206,14 +15422,19 @@ convert_minus_segments_to_gmap (List_T hits, char *queryuc_ptr, int querylength,
g = gsequence_orig[i];
g_alt = gsequence_alt[i];
if (g == c || g_alt == c) {
- comp = MATCH_COMP;
+ debug13(printf("Pushing %c %c %c at %d,%u\n",c,/*comp*/MATCH_COMP,g,querypos,genomepos));
+ unsorted_pairs = Pairpool_push(unsorted_pairs,pairpool,querypos,genomepos,
+ /*cdna*/c,/*comp*/MATCH_COMP,/*genome*/g,/*genomealt*/g_alt,
+ /*dynprogindex*/0);
+#if 0
} else {
- comp = MISMATCH_COMP;
+ /* Let stage 3 handle mismatches */
+ debug13(printf("Pushing %c %c %c at %d,%u\n",c,/*comp*/MISMATCH_COMP,g,querypos,genomepos));
+ unsorted_pairs = Pairpool_push(unsorted_pairs,pairpool,querypos,genomepos,
+ /*cdna*/c,/*comp*/MISMATCH_COMP,/*genome*/g,/*genomealt*/g_alt,
+ /*dynprogindex*/0);
+#endif
}
- debug13(printf("Pushing %c %c %c at %d,%u\n",c,comp,g,querypos,genomepos));
- unsorted_pairs = Pairpool_push(unsorted_pairs,pairpool,querypos,genomepos,
- /*cdna*/c,comp,/*genome*/g,/*genomealt*/g_alt,
- /*dynprogindex*/0);
querypos++;
genomepos++;
}
@@ -15247,14 +15468,19 @@ convert_minus_segments_to_gmap (List_T hits, char *queryuc_ptr, int querylength,
g = gsequence_orig[i];
g_alt = gsequence_alt[i];
if (g == c || g_alt == c) {
- comp = MATCH_COMP;
+ debug13(printf("Pushing %c %c %c at %d,%u\n",c,/*comp*/MATCH_COMP,g,querypos,genomepos));
+ unsorted_pairs = Pairpool_push(unsorted_pairs,pairpool,querypos,genomepos,
+ /*cdna*/c,/*comp*/MATCH_COMP,/*genome*/g,/*genomealt*/g_alt,
+ /*dynprogindex*/0);
+#if 0
} else {
- comp = MISMATCH_COMP;
+ /* Let stage 3 handle mismatches */
+ debug13(printf("Pushing %c %c %c at %d,%u\n",c,/*comp*/MISMATCH_COMP,g,querypos,genomepos));
+ unsorted_pairs = Pairpool_push(unsorted_pairs,pairpool,querypos,genomepos,
+ /*cdna*/c,/*comp*/MISMATCH_COMP,/*genome*/g,/*genomealt*/g_alt,
+ /*dynprogindex*/0);
+#endif
}
- debug13(printf("Pushing %c %c %c at %d,%u\n",c,comp,g,querypos,genomepos));
- unsorted_pairs = Pairpool_push(unsorted_pairs,pairpool,querypos,genomepos,
- /*cdna*/c,comp,/*genome*/g,/*genomealt*/g_alt,
- /*dynprogindex*/0);
querypos++;
genomepos++;
i++;
@@ -15286,14 +15512,19 @@ convert_minus_segments_to_gmap (List_T hits, char *queryuc_ptr, int querylength,
g = gsequence_orig[i];
g_alt = gsequence_alt[i];
if (g == c || g_alt == c) {
- comp = MATCH_COMP;
+ debug13(printf("Pushing %c %c %c at %d,%u\n",c,/*comp*/MATCH_COMP,g,querypos,genomepos));
+ unsorted_pairs = Pairpool_push(unsorted_pairs,pairpool,querypos,genomepos,
+ /*cdna*/c,/*comp*/MATCH_COMP,/*genome*/g,/*genomealt*/g_alt,
+ /*dynprogindex*/0);
+#if 0
} else {
- comp = MISMATCH_COMP;
+ /* Let stage 3 handle mismatches */
+ debug13(printf("Pushing %c %c %c at %d,%u\n",c,/*comp*/MISMATCH_COMP,g,querypos,genomepos));
+ unsorted_pairs = Pairpool_push(unsorted_pairs,pairpool,querypos,genomepos,
+ /*cdna*/c,/*comp*/MISMATCH_COMP,/*genome*/g,/*genomealt*/g_alt,
+ /*dynprogindex*/0);
+#endif
}
- debug13(printf("Pushing %c %c %c at %d,%u\n",c,comp,g,querypos,genomepos));
- unsorted_pairs = Pairpool_push(unsorted_pairs,pairpool,querypos,genomepos,
- /*cdna*/c,comp,/*genome*/g,/*genomealt*/g_alt,
- /*dynprogindex*/0);
querypos++;
genomepos++;
i++;
@@ -15331,49 +15562,103 @@ convert_minus_segments_to_gmap (List_T hits, char *queryuc_ptr, int querylength,
if (stage2pairs == NULL) {
/* hit = (T) NULL; */
- } else if ((pairarray = Stage3_compute(&pairs,&npairs,&goodness,&cdna_direction,&sensedir,
- &matches,&nmatches_posttrim,&max_match_length,
- &ambig_end_length_5,&ambig_end_length_3,
- &ambig_splicetype_5,&ambig_splicetype_3,
- &ambig_prob_5,&ambig_prob_3,
- &unknowns,&mismatches,&qopens,&qindels,&topens,&tindels,
- &ncanonical,&nsemicanonical,&nnoncanonical,&min_splice_prob,
- stage2pairs,/*all_stage2_starts*/NULL,/*all_stage2_ends*/NULL,
+ } else if ((pairarray1 = Stage3_compute(&cdna_direction,&sensedir,&pairs1,&npairs1,&goodness1,
+ &matches1,&nmatches_posttrim_1,&max_match_length_1,
+ &ambig_end_length_5_1,&ambig_end_length_3_1,
+ &ambig_splicetype_5_1,&ambig_splicetype_3_1,
+ &ambig_prob_5_1,&ambig_prob_3_1,
+ &unknowns1,&mismatches1,&qopens1,&qindels1,&topens1,&tindels1,
+ &ncanonical1,&nsemicanonical1,&nnoncanonical1,&min_splice_prob_1,
+
+ &pairarray2,&pairs2,&npairs2,&goodness2,
+ &matches2,&nmatches_posttrim_2,&max_match_length_2,
+ &ambig_end_length_5_2,&ambig_end_length_3_2,
+ &ambig_splicetype_5_2,&ambig_splicetype_3_2,
+ &ambig_prob_5_2,&ambig_prob_3_2,
+ &unknowns2,&mismatches2,&qopens2,&qindels2,&topens2,&tindels2,
+ &ncanonical2,&nsemicanonical2,&nnoncanonical2,&min_splice_prob_2,
+
+ stage2pairs,/*all_stage2_starts*/NULL,/*all_stage2_ends*/NULL,
#ifdef END_KNOWNSPLICING_SHORTCUT
- cutoff_level,/*queryptr*/watsonp ? queryuc_ptr : queryrc,
- watsonp ? query_compress_fwd : query_compress_rev,
+ cutoff_level,/*queryptr*/watsonp ? queryuc_ptr : queryrc,
+ watsonp ? query_compress_fwd : query_compress_rev,
#endif
- /*queryseq_ptr*/queryuc_ptr,queryuc_ptr,querylength,/*skiplength*/0,
+ /*queryseq_ptr*/queryuc_ptr,queryuc_ptr,querylength,/*skiplength*/0,
#ifdef EXTRACT_GENOMICSEG
- /*query_subseq_offset*/0,
+ /*query_subseq_offset*/0,
#else
- /*query_subseq_offset*/0,
+ /*query_subseq_offset*/0,
#endif
- chrnum,chroffset,chrhigh,
- knownsplice_limit_low,knownsplice_limit_high,/*plusp*/false,genestrand,
- /*jump_late_p*/true,maxpeelback,pairpool,dynprogL,dynprogM,dynprogR,
- /*sense_try*/0,/*sense_filter*/0,
- oligoindices_minor,diagpool,cellpool)) == NULL) {
+ chrnum,chroffset,chrhigh,
+ knownsplice_limit_low,knownsplice_limit_high,/*plusp*/false,genestrand,
+ /*jump_late_p*/true,maxpeelback,pairpool,dynprogL,dynprogM,dynprogR,
+ /*sense_try*/0,/*sense_filter*/0,
+ oligoindices_minor,diagpool,cellpool)) == NULL) {
/* hit = (T) NULL; */
+ } else if (cdna_direction == 0) {
+ nsegments = Pair_gsnap_nsegments(&nmismatches_whole,&nindels,&nintrons,&nindelbreaks,
+ pairarray1,npairs1);
+ start = add_bounded(chroffset + Pair_genomepos(&(pairarray1[0])),
+ /*plusterm*/Pair_querypos(&(pairarray1[0])),chrhigh);
+ end = subtract_bounded(chroffset + Pair_genomepos(&(pairarray1[npairs1-1])),
+ /*minusterm*/querylength - 1 - Pair_querypos(&(pairarray1[npairs1-1])),chroffset);
+
+ if ((hit = Stage3end_new_gmap(nmismatches_whole,nmatches_posttrim_1,max_match_length_1,
+ ambig_end_length_5_1,ambig_end_length_3_1,
+ ambig_splicetype_5_1,ambig_splicetype_3_1,
+ min_splice_prob_1,
+ pairarray1,npairs1,nsegments,nintrons,nindelbreaks,
+ /*left*/end,/*genomiclength*/start - end + 1,
+ /*plusp*/false,genestrand,
+ /*accession*/NULL,querylength,chrnum,chroffset,chrhigh,chrlength,
+ /*cdna_direction*/+1,/*sensedir*/SENSE_FORWARD,
+ /*gmap_source*/GMAP_VIA_SEGMENTS)) == NULL) {
+ FREE_OUT(pairarray1);
+ } else {
+ hits = List_push(hits,(void *) hit);
+ }
+
+ nsegments = Pair_gsnap_nsegments(&nmismatches_whole,&nindels,&nintrons,&nindelbreaks,
+ pairarray2,npairs2);
+ start = add_bounded(chroffset + Pair_genomepos(&(pairarray2[0])),
+ /*plusterm*/Pair_querypos(&(pairarray2[0])),chrhigh);
+ end = subtract_bounded(chroffset + Pair_genomepos(&(pairarray2[npairs2-1])),
+ /*minusterm*/querylength - 1 - Pair_querypos(&(pairarray2[npairs2-1])),chroffset);
+
+ if ((hit = Stage3end_new_gmap(nmismatches_whole,nmatches_posttrim_2,max_match_length_2,
+ ambig_end_length_5_2,ambig_end_length_3_2,
+ ambig_splicetype_5_2,ambig_splicetype_3_2,
+ min_splice_prob_2,
+ pairarray2,npairs2,nsegments,nintrons,nindelbreaks,
+ /*left*/end,/*genomiclength*/start - end + 1,
+ /*plusp*/false,genestrand,
+ /*accession*/NULL,querylength,chrnum,chroffset,chrhigh,chrlength,
+ /*cdna_direction*/-1,/*sensedir*/SENSE_ANTI,
+ /*gmap_source*/GMAP_VIA_SEGMENTS)) == NULL) {
+ FREE_OUT(pairarray2);
+ } else {
+ hits = List_push(hits,(void *) hit);
+ }
+
} else {
nsegments = Pair_gsnap_nsegments(&nmismatches_whole,&nindels,&nintrons,&nindelbreaks,
- pairarray,npairs);
- start = add_bounded(chroffset + Pair_genomepos(&(pairarray[0])),
- /*plusterm*/Pair_querypos(&(pairarray[0])),chrhigh);
- end = subtract_bounded(chroffset + Pair_genomepos(&(pairarray[npairs-1])),
- /*minusterm*/querylength - 1 - Pair_querypos(&(pairarray[npairs-1])),chroffset);
-
- if ((hit = Stage3end_new_gmap(nmismatches_whole,nmatches_posttrim,max_match_length,
- ambig_end_length_5,ambig_end_length_3,
- ambig_splicetype_5,ambig_splicetype_3,
- min_splice_prob,
- pairarray,npairs,nsegments,nintrons,nindelbreaks,
+ pairarray1,npairs1);
+ start = add_bounded(chroffset + Pair_genomepos(&(pairarray1[0])),
+ /*plusterm*/Pair_querypos(&(pairarray1[0])),chrhigh);
+ end = subtract_bounded(chroffset + Pair_genomepos(&(pairarray1[npairs1-1])),
+ /*minusterm*/querylength - 1 - Pair_querypos(&(pairarray1[npairs1-1])),chroffset);
+
+ if ((hit = Stage3end_new_gmap(nmismatches_whole,nmatches_posttrim_1,max_match_length_1,
+ ambig_end_length_5_1,ambig_end_length_3_1,
+ ambig_splicetype_5_1,ambig_splicetype_3_1,
+ min_splice_prob_1,
+ pairarray1,npairs1,nsegments,nintrons,nindelbreaks,
/*left*/end,/*genomiclength*/start - end + 1,
/*plusp*/false,genestrand,
/*accession*/NULL,querylength,chrnum,chroffset,chrhigh,chrlength,
cdna_direction,sensedir,/*gmap_source*/GMAP_VIA_SEGMENTS)) == NULL) {
- FREE_OUT(pairarray);
+ FREE_OUT(pairarray1);
} else {
hits = List_push(hits,(void *) hit);
}
@@ -15414,7 +15699,7 @@ align_singleend_with_gmap (List_T result, char *queryuc_ptr, int querylength,
Dynprog_T dynprogL, Dynprog_T dynprogM, Dynprog_T dynprogR,
int user_maxlevel) {
List_T new_result = NULL;
- Stage3end_T hit, gmap;
+ Stage3end_T hit, gmap1, gmap2;
List_T p;
int genestrand;
int i;
@@ -15470,21 +15755,26 @@ align_singleend_with_gmap (List_T result, char *queryuc_ptr, int querylength,
Stage3end_contains_known_splicesite(hit)));
/* Want high quality because we already have a pretty good answer */
- if ((gmap = align_single_hit_with_gmap(hit,queryuc_ptr,querylength,
+ align_single_hit_with_gmap(&gmap1,&gmap2,hit,queryuc_ptr,querylength,
#ifdef END_KNOWNSPLICING_SHORTCUT
- queryrc,Shortread_invertedp(queryseq),
+ queryrc,Shortread_invertedp(queryseq),
#endif
- oligoindices_minor,
- pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,
- genestrand,/*extend_ends_p*/false)) != NULL) {
+ oligoindices_minor,
+ pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,
+ genestrand,/*extend_ends_p*/false);
+ if (gmap1 == NULL) {
+ new_result = List_push(new_result,(void *) hit);
+ } else {
debug13(missing_hit = querylength - Stage3end_nmatches_posttrim(hit));
- debug13(missing_gmap = querylength - Stage3end_nmatches_posttrim(gmap));
+ debug13(missing_gmap = querylength - Stage3end_nmatches_posttrim(gmap1));
debug13(printf("GMAP %p with %d matches, %d missing compared with original hit with %d matches, %d missing\n",
- gmap,Stage3end_nmatches_posttrim(gmap),missing_gmap,Stage3end_nmatches_posttrim(hit),missing_hit));
- new_result = List_push(new_result,(void *) gmap);
+ gmap1,Stage3end_nmatches_posttrim(gmap1),missing_gmap,Stage3end_nmatches_posttrim(hit),missing_hit));
+ new_result = List_push(new_result,(void *) gmap1);
+
+ if (gmap2 != NULL) {
+ new_result = List_push(new_result,(void *) gmap2);
+ }
Stage3end_set_improved_by_gmap(hit);
- } else {
- new_result = List_push(new_result,(void *) hit);
}
}
}
@@ -15539,7 +15829,7 @@ align_end (int *cutoff_level, T this,
longsinglesplicing = NULL, distantsplicing = NULL, gmap_hits = NULL;
List_T plus_anchor_segments = NULL, minus_anchor_segments = NULL;
List_T p;
- Stage3end_T hit, gmap;
+ Stage3end_T hit, gmap1, gmap2;
int found_score, done_level, opt_level, fast_level, mismatch_level, nmismatches;
int max_splice_mismatches, i;
int nhits = 0, nsplicepairs = 0;
@@ -16206,11 +16496,12 @@ align_end (int *cutoff_level, T this,
/* Previously criterion for skipping find_terminals was (greedy ||
subs || indels || singlesplicing || doublesplicing ||
shortendsplicing || longsinglesplicing || distantsplicing) */
- if (found_score > opt_level) {
+ if (0 && found_score > opt_level) {
terminals = find_terminals(plus_anchor_segments,minus_anchor_segments,
querylength,query_lastpos,
query_compress_fwd,query_compress_rev,
/*max_mismatches_allowed*/done_level,genestrand);
+
}
debug(printf("Before GMAP:\n"));
@@ -16285,18 +16576,22 @@ align_end (int *cutoff_level, T this,
for (p = hits; p != NULL && i < max_gmap_improvement; p = List_next(p)) {
hit = (Stage3end_T) List_head(p);
- if ((gmap = align_single_hit_with_gmap(hit,queryuc_ptr,querylength,
+ align_single_hit_with_gmap(&gmap1,&gmap2,hit,queryuc_ptr,querylength,
#ifdef END_KNOWNSPLICING_SHORTCUT
- queryrc,Shortread_invertedp(queryseq),
+ queryrc,Shortread_invertedp(queryseq),
#endif
- oligoindices_minor,
- pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,
- genestrand,/*extend_ends_p*/true)) != NULL) {
+ oligoindices_minor,
+ pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,
+ genestrand,/*extend_ends_p*/true);
+ if (gmap1 != NULL) {
debug13(missing_hit = querylength - Stage3end_nmatches_posttrim(hit));
- debug13(missing_gmap = querylength - Stage3end_nmatches_posttrim(gmap));
+ debug13(missing_gmap = querylength - Stage3end_nmatches_posttrim(gmap1));
debug13(printf("GMAP %p with %d matches, %d missing compared with original terminal with %d matches, %d missing\n",
- gmap,Stage3end_nmatches_posttrim(gmap),missing_gmap,Stage3end_nmatches_posttrim(hit),missing_hit));
- gmap_hits = List_push(gmap_hits,(void *) gmap);
+ gmap1,Stage3end_nmatches_posttrim(gmap1),missing_gmap,Stage3end_nmatches_posttrim(hit),missing_hit));
+ gmap_hits = List_push(gmap_hits,(void *) gmap1);
+ if (gmap2 != NULL) {
+ gmap_hits = List_push(gmap_hits,(void *) gmap2);
+ }
Stage3end_set_improved_by_gmap(hit);
}
}
@@ -16712,12 +17007,12 @@ align_halfmapping_with_gmap (History_T gmap_history, Stage3end_T hit5, Stage3end
Stage3end_sensedir(hit5),genomicbound - chroffset));
knownsplice_limit_low = mappingstart = segmentstart = genomicbound;
- knownsplice_limit_high = add_bounded(Stage3end_genomicend(hit5),pairmax + shortsplicedist,chrhigh);
- segmentend = add_bounded(Stage3end_genomicend(hit5),pairmax,chrhigh);
+ knownsplice_limit_high = add_bounded(Stage3end_genomicend(hit5),pairmax + overall_max_distance,chrhigh);
+ segmentend = add_bounded(Stage3end_genomicend(hit5),pairmax + PAIRMAX_ADDITIONAL,chrhigh);
#ifdef LONG_ENDSPLICES
- mappingend = add_bounded(Stage3end_genomicend(hit5),pairmax + shortsplicedist,chrhigh);
+ mappingend = add_bounded(Stage3end_genomicend(hit5),pairmax + overall_max_distance,chrhigh);
#else
- mappingend = add_bounded(Stage3end_genomicend(hit5),pairmax + shortsplicedist_novelend,chrhigh);
+ mappingend = add_bounded(Stage3end_genomicend(hit5),pairmax + overall_max_distance_novelend,chrhigh);
debug13(printf("Original bounds E: knownsplice_limit_low %u, knownsplice_limit_high %u, mappingend %u\n",
knownsplice_limit_low - chroffset,knownsplice_limit_high - chroffset,mappingend - chroffset));
#endif
@@ -16750,7 +17045,7 @@ align_halfmapping_with_gmap (History_T gmap_history, Stage3end_T hit5, Stage3end
plus_segments[i].querypos5,plus_segments[i].querypos3));
if (query_lastpos - plus_segments[i].querypos3 >= STAGE2_MIN_OLIGO + index1interval) {
/* Case 1. Missing end of query, so there could be a middle splice */
- debug13b(printf(" query_lastpos %d - querypos3 %d >= %d + %d, so using this diagonal plus shortsplicedist\n",
+ debug13b(printf(" query_lastpos %d - querypos3 %d >= %d + %d, so using this diagonal plus overall_max_distance\n",
query_lastpos,plus_segments[i].querypos3,STAGE2_MIN_OLIGO,index1interval));
#ifdef USE_GREEDY
if ((mappingpos = add_bounded(plus_segments[i].diagonal,shortsplicedist_novelend,chrhigh)) < middle_mappingend_greedy &&
@@ -16762,7 +17057,7 @@ align_halfmapping_with_gmap (History_T gmap_history, Stage3end_T hit5, Stage3end
#endif
#ifdef LONG_ENDSPLICES
- if ((mappingpos = add_bounded(plus_segments[i].diagonal,shortsplicedist,chrhigh)) > middle_mappingend_last) {
+ if ((mappingpos = add_bounded(plus_segments[i].diagonal,overall_max_distance,chrhigh)) > middle_mappingend_last) {
/* Use > for NOT_GREEDY */
middle_mappingend_last = mappingpos;
middle_mappingend_p = true;
@@ -16799,7 +17094,7 @@ align_halfmapping_with_gmap (History_T gmap_history, Stage3end_T hit5, Stage3end
#ifdef USE_GREEDY
if (close_mappingend_p == true) {
- close_knownsplice_limit_high = add_bounded(close_mappingend_greedy,shortsplicedist,chrhigh);
+ close_knownsplice_limit_high = add_bounded(close_mappingend_greedy,overall_max_distance,chrhigh);
} else if (middle_mappingend_p == true) {
debug13(printf("Using middle mappingend\n"));
close_knownsplice_limit_high = middle_mappingend_greedy;
@@ -16821,7 +17116,7 @@ align_halfmapping_with_gmap (History_T gmap_history, Stage3end_T hit5, Stage3end
knownsplice_limit_high = middle_mappingend_last;
mappingend = middle_mappingend_last;
} else if (close_mappingend_p == true && close_mappingend_last != close_mappingend_greedy) {
- knownsplice_limit_high = add_bounded(close_mappingend_last,shortsplicedist,chrhigh);
+ knownsplice_limit_high = add_bounded(close_mappingend_last,overall_max_distance,chrhigh);
mappingend = close_mappingend_last;
}
#else
@@ -16883,12 +17178,12 @@ align_halfmapping_with_gmap (History_T gmap_history, Stage3end_T hit5, Stage3end
Stage3end_sensedir(hit5),genomicbound - chroffset));
knownsplice_limit_high = mappingend = segmentend = genomicbound;
- knownsplice_limit_low = subtract_bounded(Stage3end_genomicend(hit5),pairmax + shortsplicedist,chroffset);
- segmentstart = subtract_bounded(Stage3end_genomicend(hit5),pairmax,chroffset);
+ knownsplice_limit_low = subtract_bounded(Stage3end_genomicend(hit5),pairmax + overall_max_distance,chroffset);
+ segmentstart = subtract_bounded(Stage3end_genomicend(hit5),pairmax + PAIRMAX_ADDITIONAL,chroffset);
#ifdef LONG_ENDSPLICES
- mappingstart = subtract_bounded(Stage3end_genomicend(hit5),pairmax + shortsplicedist,chroffset);
+ mappingstart = subtract_bounded(Stage3end_genomicend(hit5),pairmax + overall_max_distance,chroffset);
#else
- mappingstart = subtract_bounded(Stage3end_genomicend(hit5),pairmax + shortsplicedist_novelend,chroffset);
+ mappingstart = subtract_bounded(Stage3end_genomicend(hit5),pairmax + overall_max_distance_novelend,chroffset);
#endif
debug13(printf("Original bounds F: knownsplice_limit_low %u, knownsplice_limit_high %u, mappingstart %u\n",
knownsplice_limit_low - chroffset,knownsplice_limit_high - chroffset,mappingstart - chroffset));
@@ -16923,7 +17218,7 @@ align_halfmapping_with_gmap (History_T gmap_history, Stage3end_T hit5, Stage3end
minus_segments[i].querypos5,minus_segments[i].querypos3));
if (query_lastpos - minus_segments[i].querypos3 >= STAGE2_MIN_OLIGO + index1interval) {
/* Case 2. Missing end of query, so there could be a middle splice */
- debug13b(printf(" query_lastpos %d - querypos3 %d >= %d + %d, so using this diagonal plus shortsplicedist\n",
+ debug13b(printf(" query_lastpos %d - querypos3 %d >= %d + %d, so using this diagonal plus overall_max_distance\n",
query_lastpos,minus_segments[i].querypos3,STAGE2_MIN_OLIGO,index1interval));
#ifdef USE_GREEDY
if ((mappingpos = subtract_bounded(minus_segments[i].diagonal,querylength + shortsplicedist_novelend,chroffset)) > middle_mappingstart_greedy &&
@@ -16934,7 +17229,7 @@ align_halfmapping_with_gmap (History_T gmap_history, Stage3end_T hit5, Stage3end
}
#endif
#ifdef LONG_ENDSPLICES
- if ((mappingpos = subtract_bounded(minus_segments[i].diagonal,querylength + shortsplicedist,chroffset)) < middle_mappingstart_last) {
+ if ((mappingpos = subtract_bounded(minus_segments[i].diagonal,querylength + overall_max_distance,chroffset)) < middle_mappingstart_last) {
/* Use < for NOT_GREEDY */
middle_mappingstart_last = mappingpos;
middle_mappingstart_p = true;
@@ -16971,7 +17266,7 @@ align_halfmapping_with_gmap (History_T gmap_history, Stage3end_T hit5, Stage3end
#ifdef USE_GREEDY
if (close_mappingstart_p == true) {
- close_knownsplice_limit_low = subtract_bounded(close_mappingstart_greedy,shortsplicedist,chroffset);
+ close_knownsplice_limit_low = subtract_bounded(close_mappingstart_greedy,overall_max_distance,chroffset);
} else if (middle_mappingstart_p == true) {
debug13(printf("Using middle mappingstart\n"));
close_knownsplice_limit_low = middle_mappingstart_greedy;
@@ -16993,7 +17288,7 @@ align_halfmapping_with_gmap (History_T gmap_history, Stage3end_T hit5, Stage3end
knownsplice_limit_low = middle_mappingstart_last;
mappingstart = middle_mappingstart_last;
} else if (close_mappingstart_p == true && close_mappingstart_last != close_mappingstart_greedy) {
- knownsplice_limit_low = subtract_bounded(close_mappingstart_last,shortsplicedist,chroffset);
+ knownsplice_limit_low = subtract_bounded(close_mappingstart_last,overall_max_distance,chroffset);
mappingstart = close_mappingstart_last;
}
#else
@@ -17073,12 +17368,12 @@ align_halfmapping_with_gmap (History_T gmap_history, Stage3end_T hit5, Stage3end
Stage3end_sensedir(hit3),genomicbound - chroffset));
knownsplice_limit_high = mappingend = segmentend = genomicbound;
- knownsplice_limit_low = subtract_bounded(Stage3end_genomicstart(hit3),pairmax + shortsplicedist,chroffset);
- segmentstart = subtract_bounded(Stage3end_genomicstart(hit3),pairmax,chroffset);
+ knownsplice_limit_low = subtract_bounded(Stage3end_genomicstart(hit3),pairmax + overall_max_distance,chroffset);
+ segmentstart = subtract_bounded(Stage3end_genomicstart(hit3),pairmax + PAIRMAX_ADDITIONAL,chroffset);
#ifdef LONG_ENDSPLICES
- mappingstart = subtract_bounded(Stage3end_genomicstart(hit3),pairmax + shortsplicedist,chroffset);
+ mappingstart = subtract_bounded(Stage3end_genomicstart(hit3),pairmax + overall_max_distance,chroffset);
#else
- mappingstart = subtract_bounded(Stage3end_genomicstart(hit3),pairmax + shortsplicedist_novelend,chroffset);
+ mappingstart = subtract_bounded(Stage3end_genomicstart(hit3),pairmax + overall_max_distance_novelend,chroffset);
#endif
close_mappingstart_last = middle_mappingstart_last = Stage3end_genomicstart(hit3);
@@ -17111,7 +17406,7 @@ align_halfmapping_with_gmap (History_T gmap_history, Stage3end_T hit5, Stage3end
plus_segments[i].querypos5,plus_segments[i].querypos3));
if (plus_segments[i].querypos5 >= STAGE2_MIN_OLIGO + index1interval) {
/* Case 3. Missing start of query, so there could be a middle splice */
- debug13b(printf(" querypos5 %d >= %d + %d, so using this diagonal plus shortsplicedist\n",
+ debug13b(printf(" querypos5 %d >= %d + %d, so using this diagonal plus overall_max_distance\n",
plus_segments[i].querypos5,STAGE2_MIN_OLIGO,index1interval));
#ifdef USE_GREEDY
if ((mappingpos = subtract_bounded(plus_segments[i].diagonal,querylength + shortsplicedist_novelend,chroffset)) > middle_mappingstart_greedy &&
@@ -17122,7 +17417,7 @@ align_halfmapping_with_gmap (History_T gmap_history, Stage3end_T hit5, Stage3end
}
#endif
#ifdef LONG_ENDSPLICES
- if ((mappingpos = subtract_bounded(plus_segments[i].diagonal,querylength + shortsplicedist,chroffset)) < middle_mappingstart_last) {
+ if ((mappingpos = subtract_bounded(plus_segments[i].diagonal,querylength + overall_max_distance,chroffset)) < middle_mappingstart_last) {
/* Use < for NOT_GREEDY */
middle_mappingstart_last = mappingpos;
middle_mappingstart_p = true;
@@ -17159,7 +17454,7 @@ align_halfmapping_with_gmap (History_T gmap_history, Stage3end_T hit5, Stage3end
#ifdef USE_GREEDY
if (close_mappingstart_p == true) {
- close_knownsplice_limit_low = subtract_bounded(close_mappingstart_greedy,shortsplicedist,chroffset);
+ close_knownsplice_limit_low = subtract_bounded(close_mappingstart_greedy,overall_max_distance,chroffset);
} else if (middle_mappingstart_p == true) {
debug13(printf("Using middle mappingstart\n"));
close_knownsplice_limit_low = middle_mappingstart_greedy;
@@ -17181,7 +17476,7 @@ align_halfmapping_with_gmap (History_T gmap_history, Stage3end_T hit5, Stage3end
knownsplice_limit_low = middle_mappingstart_last;
mappingstart = middle_mappingstart_last;
} else if (close_mappingstart_p == true && close_mappingstart_last != close_mappingstart_greedy) {
- knownsplice_limit_low = subtract_bounded(close_mappingstart_last,shortsplicedist,chroffset);
+ knownsplice_limit_low = subtract_bounded(close_mappingstart_last,overall_max_distance,chroffset);
mappingstart = close_mappingstart_last;
}
#else
@@ -17242,12 +17537,12 @@ align_halfmapping_with_gmap (History_T gmap_history, Stage3end_T hit5, Stage3end
Stage3end_sensedir(hit3),genomicbound - chroffset));
knownsplice_limit_low = mappingstart = segmentstart = genomicbound;
- knownsplice_limit_high = add_bounded(Stage3end_genomicstart(hit3),pairmax + shortsplicedist,chrhigh);
- segmentend = add_bounded(Stage3end_genomicstart(hit3),pairmax,chrhigh);
+ knownsplice_limit_high = add_bounded(Stage3end_genomicstart(hit3),pairmax + overall_max_distance,chrhigh);
+ segmentend = add_bounded(Stage3end_genomicstart(hit3),pairmax + PAIRMAX_ADDITIONAL,chrhigh);
#ifdef LONG_ENDSPLICES
- mappingend = add_bounded(Stage3end_genomicstart(hit3),pairmax + shortsplicedist,chrhigh);
+ mappingend = add_bounded(Stage3end_genomicstart(hit3),pairmax + overall_max_distance,chrhigh);
#else
- mappingend = add_bounded(Stage3end_genomicstart(hit3),pairmax + shortsplicedist_novelend,chrhigh);
+ mappingend = add_bounded(Stage3end_genomicstart(hit3),pairmax + overall_max_distance_novelend,chrhigh);
#endif
close_mappingend_last = middle_mappingend_last = Stage3end_genomicstart(hit3);
@@ -17278,7 +17573,7 @@ align_halfmapping_with_gmap (History_T gmap_history, Stage3end_T hit5, Stage3end
minus_segments[i].querypos5,minus_segments[i].querypos3));
if (minus_segments[i].querypos5 >= STAGE2_MIN_OLIGO + index1interval) {
/* Case 4. Missing start of query, so there could be a middle splice */
- debug13b(printf(" querypos5 %d >= %d + %d, so using this diagonal plus shortsplicedist\n",
+ debug13b(printf(" querypos5 %d >= %d + %d, so using this diagonal plus overall_max_distance\n",
minus_segments[i].querypos5,STAGE2_MIN_OLIGO,index1interval));
#ifdef USE_GREEDY
if ((mappingpos = add_bounded(minus_segments[i].diagonal,shortsplicedist_novelend,chrhigh)) < middle_mappingend_greedy &&
@@ -17289,7 +17584,7 @@ align_halfmapping_with_gmap (History_T gmap_history, Stage3end_T hit5, Stage3end
}
#endif
#ifdef LONG_ENDSPLICES
- if ((mappingpos = add_bounded(minus_segments[i].diagonal,shortsplicedist,chrhigh)) > middle_mappingend_last) {
+ if ((mappingpos = add_bounded(minus_segments[i].diagonal,overall_max_distance,chrhigh)) > middle_mappingend_last) {
/* Use > for NOT_GREEDY */
middle_mappingend_last = mappingpos;
middle_mappingend_p = true;
@@ -17326,7 +17621,7 @@ align_halfmapping_with_gmap (History_T gmap_history, Stage3end_T hit5, Stage3end
#ifdef USE_GREEDY
if (close_mappingend_p == true) {
- close_knownsplice_limit_high = add_bounded(close_mappingend_greedy,shortsplicedist,chrhigh);
+ close_knownsplice_limit_high = add_bounded(close_mappingend_greedy,overall_max_distance,chrhigh);
} else if (middle_mappingend_p == true) {
debug13(printf("Using middle mappingend\n"));
close_knownsplice_limit_high = middle_mappingend_greedy;
@@ -17348,7 +17643,7 @@ align_halfmapping_with_gmap (History_T gmap_history, Stage3end_T hit5, Stage3end
knownsplice_limit_high = middle_mappingend_last;
mappingend = middle_mappingend_last;
} else if (close_mappingend_p == true && close_mappingend_last != close_mappingend_greedy) {
- knownsplice_limit_high = add_bounded(close_mappingend_last,shortsplicedist,chrhigh);
+ knownsplice_limit_high = add_bounded(close_mappingend_last,overall_max_distance,chrhigh);
mappingend = close_mappingend_last;
}
#else
@@ -17515,7 +17810,7 @@ align_pair_with_gmap (Pairtype_T *final_pairtype, List_T result,
int cutoff_level_5, int cutoff_level_3, bool expect_concordant_p) {
Stage3pair_T newpair, stage3pair;
List_T gmap5_hits = NULL, gmap3_hits = NULL;
- Stage3end_T hit5, hit3, gmap5, gmap3;
+ Stage3end_T hit5, hit3, gmap5, gmap3, gmap5_1, gmap5_2, gmap3_1, gmap3_2;
List_T p, a, b, rest;
int genestrand;
int i;
@@ -17534,7 +17829,7 @@ align_pair_with_gmap (Pairtype_T *final_pairtype, List_T result,
genestrand = Stage3pair_genestrand(stage3pair);
hit5 = Stage3pair_hit5(stage3pair);
hit3 = Stage3pair_hit3(stage3pair);
- gmap5 = gmap3 = (Stage3end_T) NULL;
+ gmap5_1 = gmap5_2 = gmap3_1 = gmap3_2 = (Stage3end_T) NULL;
debug13(printf("GMAP improvement #%d: Entering align_pair_with_gmap with hittypes %s and %s\n",
i,Stage3end_hittype_string(hit5),Stage3end_hittype_string(hit3)));
@@ -17566,18 +17861,23 @@ align_pair_with_gmap (Pairtype_T *final_pairtype, List_T result,
querylength5,Stage3end_nmatches_posttrim(hit5),cutoff_level_5));
} else {
- if ((gmap5 = align_single_hit_with_gmap(hit5,queryuc_ptr_5,querylength5,
+ debug13(printf("Aligning hit5 with GMAP\n"));
+ align_single_hit_with_gmap(&gmap5_1,&gmap5_2,hit5,queryuc_ptr_5,querylength5,
#ifdef END_KNOWNSPLICING_SHORTCUT
- queryrc5,Shortread_invertedp(queryseq5),
+ queryrc5,Shortread_invertedp(queryseq5),
#endif
- oligoindices_minor,
- pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,
- genestrand,/*extend_ends_p*/true)) != NULL) {
+ oligoindices_minor,
+ pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,
+ genestrand,/*extend_ends_p*/true);
+ if (gmap5_1 != NULL) {
debug13(missing_hit = querylength5 - Stage3end_nmatches_posttrim(hit5));
- debug13(missing_gmap = querylength5 - Stage3end_nmatches_posttrim(gmap5));
+ debug13(missing_gmap = querylength5 - Stage3end_nmatches_posttrim(gmap5_1));
debug13(printf("GMAP %p with %d matches, %d missing compared with original 5' hit with %d matches, %d missing\n",
- gmap5,Stage3end_nmatches_posttrim(gmap5),missing_gmap,Stage3end_nmatches_posttrim(hit5),missing_hit));
- gmap5_hits = List_push(gmap5_hits,(void *) gmap5);
+ gmap5_1,Stage3end_nmatches_posttrim(gmap5_1),missing_gmap,Stage3end_nmatches_posttrim(hit5),missing_hit));
+ gmap5_hits = List_push(gmap5_hits,(void *) gmap5_1);
+ if (gmap5_2 != NULL) {
+ gmap5_hits = List_push(gmap5_hits,(void *) gmap5_2);
+ }
Stage3end_set_improved_by_gmap(hit5);
}
}
@@ -17591,19 +17891,23 @@ align_pair_with_gmap (Pairtype_T *final_pairtype, List_T result,
querylength3,Stage3end_nmatches_posttrim(hit3),cutoff_level_3));
} else {
- debug13(printf("expect_concordant_p is false, so running GMAP single end on 3'\n"));
- if ((gmap3 = align_single_hit_with_gmap(hit3,queryuc_ptr_3,querylength3,
+ debug13(printf("Aligning hit3 with GMAP\n"));
+ align_single_hit_with_gmap(&gmap3_1,&gmap3_2,hit3,queryuc_ptr_3,querylength3,
#ifdef END_KNOWNSPLICING_SHORTCUT
- queryrc3,Shortread_invertedp(queryseq3),
+ queryrc3,Shortread_invertedp(queryseq3),
#endif
- oligoindices_minor,
- pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,
- genestrand,/*extend_ends_p*/true)) != NULL) {
+ oligoindices_minor,
+ pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,
+ genestrand,/*extend_ends_p*/true);
+ if (gmap3_1 != NULL) {
debug13(missing_hit = querylength3 - Stage3end_nmatches_posttrim(hit3));
- debug13(missing_gmap = querylength3 - Stage3end_nmatches_posttrim(gmap3));
+ debug13(missing_gmap = querylength3 - Stage3end_nmatches_posttrim(gmap3_1));
debug13(printf("GMAP %p with %d matches, %d missing compared with original 3' hit with %d matches, %d missing\n",
- gmap3,Stage3end_nmatches_posttrim(gmap3),missing_gmap,Stage3end_nmatches_posttrim(hit3),missing_hit));
- gmap3_hits = List_push(gmap3_hits,(void *) gmap3);
+ gmap3_1,Stage3end_nmatches_posttrim(gmap3_1),missing_gmap,Stage3end_nmatches_posttrim(hit3),missing_hit));
+ gmap3_hits = List_push(gmap3_hits,(void *) gmap3_1);
+ if (gmap3_2 != NULL) {
+ gmap3_hits = List_push(gmap3_hits,(void *) gmap3_2);
+ }
Stage3end_set_improved_by_gmap(hit3);
}
}
@@ -19075,6 +19379,7 @@ align_pair (bool *abort_pairing_p, int *found_score, int *cutoff_level_5, int *c
}
/* Search 8: Terminals */
+ if (0) {
if (nconcordant == 0 || *found_score > opt_level) {
terminals5 = find_terminals(plus_anchor_segments_5,minus_anchor_segments_5,
querylength5,query5_lastpos,
@@ -19097,6 +19402,7 @@ align_pair (bool *abort_pairing_p, int *found_score, int *cutoff_level_5, int *c
debug(printf("After pairing terminals, found %d concordant, %d nsamechr, found_score %d\n",
nconcordant,nsamechr,*found_score));
}
+ }
/* Search 9: GMAP pairsearch/halfmapping */
@@ -20895,8 +21201,6 @@ Stage1hr_setup (bool use_sarray_p_in, bool use_only_sarray_p_in, int index1part_
max_middle_deletions = max_middle_deletions_in;
shortsplicedist = shortsplicedist_in;
- shortsplicedist_known = shortsplicedist_known_in;
- shortsplicedist_novelend = shortsplicedist_novelend_in;
overall_max_distance = shortsplicedist;
if (max_middle_deletions > (int) overall_max_distance) {
diff --git a/src/stage3.c b/src/stage3.c
index a6501e6..83e4808 100644
--- a/src/stage3.c
+++ b/src/stage3.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: stage3.c 184517 2016-02-18 17:44:24Z twu $";
+static char rcsid[] = "$Id: stage3.c 188752 2016-05-01 17:28:22Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -23,7 +23,9 @@ static char rcsid[] = "$Id: stage3.c 184517 2016-02-18 17:44:24Z twu $";
#include "intron.h"
#include "pbinom.h"
#include "changepoint.h"
+#ifndef GSNAP
#include "translation.h"
+#endif
#ifdef PMAP
#include "backtranslation.h"
#endif
@@ -71,7 +73,7 @@ static char rcsid[] = "$Id: stage3.c 184517 2016-02-18 17:44:24Z twu $";
#define DYNPROGINDEX_MINOR +1
#define DUAL_BREAK_PROB_THRESHOLD 0.90
-#define MIN_STAGE2_FOR_DUALBREAK 24
+#define MIN_STAGE2_FOR_DUALBREAK 8 /* was 24, but misses small exons */
#define THETA_SLACK 0.10
#define TRIM_END_PVALUE 1e-4
@@ -93,6 +95,8 @@ static char rcsid[] = "$Id: stage3.c 184517 2016-02-18 17:44:24Z twu $";
#define END_SPLICESITE_PROB_MATCH 0.90
#define END_SPLICESITE_PROB_MISMATCH 0.95
+#define END_MIN_EXONLENGTH 12
+
#if 0
/* No longer used. Not sure why it was used before */
#define END_SPLICESITE_EXON_LENGTH 100 /* If shorter than this, then don't look for end splice site */
@@ -1300,6 +1304,7 @@ insert_gapholders (List_T pairs, char *queryseq_ptr, char *queryuc_ptr,
#else
path = List_transfer_one(path,&pairs);
#endif
+
} else if (pair->gapp == true) {
debug(printf("Removing a gap with queryjump = %d, genomejump = %d\n",
pair->queryjump,pair->genomejump));
@@ -2690,7 +2695,7 @@ clean_path_end3 (List_T path, int ambig_end_length_3) {
/* Remove any remaining nonmatches, gaps, or indels at 3' end */
if (path != NULL) {
lastpair = path->first;
- while (lastpair->gapp || (lastpair->comp != MATCH_COMP && lastpair->comp != DYNPROG_MATCH_COMP && lastpair->comp != AMBIGUOUS_COMP)) {
+ while (lastpair->gapp || (lastpair->comp != MATCH_COMP && lastpair->comp != DYNPROG_MATCH_COMP /*&& lastpair->comp != AMBIGUOUS_COMP*/)) {
debug(printf("Removing nonmatch at 3' end: "));
debug(Pair_dump_one(lastpair,/*zerobasedp*/true));
debug(printf("\n"));
@@ -2733,7 +2738,7 @@ clean_pairs_end5 (List_T pairs, int ambig_end_length_5) {
/* Remove any remaining nonmatches, gaps, or indels at 5' end */
if (pairs != NULL) {
firstpair = pairs->first;
- while (firstpair->gapp || (firstpair->comp != MATCH_COMP && firstpair->comp != DYNPROG_MATCH_COMP && firstpair->comp != AMBIGUOUS_COMP)) {
+ while (firstpair->gapp || (firstpair->comp != MATCH_COMP && firstpair->comp != DYNPROG_MATCH_COMP /*&& firstpair->comp != AMBIGUOUS_COMP*/)) {
debug(printf("Removing nonmatch at 5' end: "));
debug(Pair_dump_one(firstpair,/*zerobasedp*/true));
debug(printf("\n"));
@@ -3458,7 +3463,6 @@ enough_matches (int matches, int genomejump
static bool
canonicalp (bool knowngapp, char comp, double donor_prob, double acceptor_prob, int cdna_direction) {
-
if (knowngapp) {
return true;
@@ -4204,20 +4208,20 @@ fill_in_gaps (List_T path, Pairpool_T pairpool, char *queryseq_ptr,
debug7(printf("known intron is true, so an intron\n"));
intronp = true;
} else if (splicingp == false) {
- debug7(printf("splicingp is false, so not an intron\n"));
+ debug7(printf("splicingp is false, so not an intron, but an indel\n"));
intronp = false;
#if 0
} else if (sensedir == SENSE_NULL) {
/* Can lead to very large deletions */
- debug7(printf("sensedir == SENSE_NULL, so not an intron\n"));
+ debug7(printf("sensedir == SENSE_NULL, so not an intron, but an indel\n"));
intronp = false;
#endif
} else if (intronlength < min_intronlength) {
- debug7(printf("intronlength %d < min_intronlength %d, so not an intron\n",
+ debug7(printf("intronlength %d < min_intronlength %d, so not an intron, but an indel\n",
intronlength,min_intronlength));
intronp = false;
} else if (intronlength >= max_deletionlength) {
- debug7(printf("intronlength %d >= max_deletionlength %d, so an intron\n",
+ debug7(printf("intronlength %d >= max_deletionlength %d, so an intron, not an indel\n",
intronlength,max_deletionlength));
intronp = true;
} else {
@@ -4874,9 +4878,9 @@ pick_cdna_direction (int *winning_cdna_direction, int *sensedir,
#endif
} else {
- debug11(printf("scores all equal, so fwd wins\n"));
+ debug11(printf("scores all equal, so fwd wins, but setting cdna_direction to be 0\n"));
/* No clear intron direction, so allow under all sense_filters */
- *winning_cdna_direction = +1;
+ *winning_cdna_direction = 0;
*sensedir = SENSE_NULL;
return pairs_fwd;
}
@@ -5195,6 +5199,10 @@ Stage3_backtranslate_cdna (T this) {
return;
}
+#elif defined(GSNAP)
+
+/* No need to perform translation */
+
#else
static void
@@ -5254,6 +5262,8 @@ Stage3_translate_genomic (T this, int npairs, bool fulllengthp, int cds_startpos
}
#endif
+
+#ifndef GSNAP
void
Stage3_translate_cdna_via_reference (T this, T reference, bool literalrefp) {
bool fixshiftp = !literalrefp;
@@ -5282,6 +5292,8 @@ Stage3_translate_cdna_via_reference (T this, T reference, bool literalrefp) {
return;
}
+#endif
+
void
Stage3_fix_cdna_direction (T this, T reference) {
@@ -5305,6 +5317,7 @@ Stage3_fix_cdna_direction (T this, T reference) {
+#ifndef GSNAP
void
Stage3_translate (T this,
#ifdef PMAP
@@ -5349,8 +5362,10 @@ Stage3_translate (T this,
return;
}
+#endif
+#ifndef GSNAP
void
Stage3_translate_chimera (T this, T mate,
#ifdef PMAP
@@ -5446,9 +5461,10 @@ Stage3_translate_chimera (T this, T mate,
return;
}
+#endif
-
+#ifndef GSNAP
void
Stage3_print_pathsummary (Filestring_T fp, T this, int pathnum, Univ_IIT_T chromosome_iit, Univ_IIT_T contig_iit,
IIT_T altstrain_iit, Sequence_T queryseq,
@@ -5473,6 +5489,7 @@ Stage3_print_pathsummary (Filestring_T fp, T this, int pathnum, Univ_IIT_T chrom
return;
}
+#endif
void
@@ -5646,7 +5663,7 @@ Stage3_print_introns (Filestring_T fp, T this, Univ_IIT_T chromosome_iit, Sequen
}
-
+#ifndef GSNAP
void
Stage3_print_mutations (Filestring_T fp, T this, T reference, Univ_IIT_T chromosome_iit, Sequence_T queryseq,
char *dbversion, bool showalignp,
@@ -5680,6 +5697,7 @@ Stage3_print_mutations (Filestring_T fp, T this, T reference, Univ_IIT_T chromos
return;
}
+#endif
@@ -8626,7 +8644,8 @@ build_dual_breaks (bool *dual_break_p, int *dynprogindex_minor, int *dynproginde
debug(printf("Observed a dual break at %d..%d with queryjump = %d, genomejump = %d\n",
leftpair->querypos,rightpair->querypos,pair->queryjump,pair->genomejump));
- if (finalp == true) {
+ if (0 && finalp == true) {
+ /* If genomejump is too large, this causes problem with allocation in Dynprog_T objects */
debug(printf(" Final: solve as a single gap\n"));
pairs = traverse_single_gap(&filledp,&(*dynprogindex_minor),pairs,&path,leftpair,rightpair,
chroffset,chrhigh,
@@ -8903,9 +8922,9 @@ build_pairs_end5 (bool *knownsplicep, int *ambig_end_length_5, Splicetype_T *amb
}
-
+/* maxsize can be either 3 or nullgap */
static List_T
-build_pairs_singles (int *dynprogindex, List_T path,
+build_pairs_singles (int *dynprogindex, List_T path, int maxsize,
Univcoord_T chroffset, Univcoord_T chrhigh,
char *queryseq_ptr, char *queryuc_ptr, int querylength,
bool watsonp, bool jump_late_p, int maxpeelback, double defect_rate,
@@ -8915,7 +8934,7 @@ build_pairs_singles (int *dynprogindex, List_T path,
Pair_T pair, leftpair, rightpair;
bool filledp;
- debug(printf("\n** Starting build_pairs_singles\n"));
+ debug(printf("\n** Starting build_pairs_singles with maxsize %d\n",maxsize));
/* Remove gaps at beginning */
while (path != NULL && ((Pair_T) path->first)->gapp == true) {
@@ -8933,7 +8952,7 @@ build_pairs_singles (int *dynprogindex, List_T path,
pairs = List_transfer_one(pairs,&path);
#endif
- } else if (pair->queryjump > nullgap) {
+ } else if (pair->queryjump > maxsize) {
/* Large gap. Do nothing */
#ifdef WASTE
pairs = Pairpool_push_existing(pairs,pairpool,pair);
@@ -8970,7 +8989,7 @@ build_pairs_singles (int *dynprogindex, List_T path,
leftpair = path->first;
rightpair = pairs->first;
- debug(printf("Stage 3: Traversing single gap: leftquerypos = %d, rightquerypos = %d, leftgenomepos = %d, rightgenomepos = %d, queryjump %d, genomejump %d\n",
+ debug(printf("Stage 3: Traversing single gap small: leftquerypos = %d, rightquerypos = %d, leftgenomepos = %d, rightgenomepos = %d, queryjump %d, genomejump %d\n",
leftpair->querypos,rightpair->querypos,leftpair->genomepos,rightpair->genomepos,pair->queryjump,pair->genomejump));
pairs = traverse_single_gap(&filledp,&(*dynprogindex),pairs,&path,leftpair,rightpair,
chroffset,chrhigh,queryseq_ptr,queryuc_ptr,querylength,watsonp,
@@ -10697,7 +10716,9 @@ path_compute_dir (double *defect_rate, List_T pairs,
iter0 = 0;
dual_break_p = true;
while ((/* filterp == true || */ dual_break_p == true) && iter0 < MAXITER_CYCLES) {
- path = List_reverse(pairs);
+ /* path = List_reverse(pairs); */
+ /* Need to insert gapholders after Pairpool_join_end5 and Pairpool_join_end3 */
+ path = insert_gapholders(pairs,queryseq_ptr,queryuc_ptr,chroffset,chrhigh,watsonp,pairpool);
#ifdef PMAP
#if 0
@@ -10707,10 +10728,11 @@ path_compute_dir (double *defect_rate, List_T pairs,
#endif
#endif
- /* Pass 2A: solve straight gaps. path --> pairs (for defect rate) */
- debug(printf("\n*** Pass 2A (dir %d): Solve straight gaps. Iteration0 %d\n",
+ /* Pass 2A: solve straight gaps (small). path --> pairs (for defect rate) */
+ debug(printf("\n*** Pass 2A (dir %d): Solve straight gaps (small). Iteration0 %d\n",
cdna_direction,iter0));
- pairs = build_pairs_singles(&dynprogindex_minor,path,
+ debug(Pair_dump_list(path,true));
+ pairs = build_pairs_singles(&dynprogindex_minor,path,/*maxsize*/3,
chroffset,chrhigh,queryseq_ptr,queryuc_ptr,querylength,watsonp,
jump_late_p,maxpeelback,/*defect_rate*/0.0,pairpool,dynprogM,
last_genomedp5,last_genomedp3,/*forcep*/false,/*finalp*/false);
@@ -10739,9 +10761,9 @@ path_compute_dir (double *defect_rate, List_T pairs,
/* Pass 2C: solve straight gaps again. path --> pairs (for defect rate) */
- debug(printf("\n*** Pass 2C (dir %d): Solve straight gaps again. Iteration0 %d\n",
+ debug(printf("\n*** Pass 2C (dir %d): Solve straight gaps again (large). Iteration0 %d\n",
cdna_direction,iter0));
- pairs = build_pairs_singles(&dynprogindex_minor,path,
+ pairs = build_pairs_singles(&dynprogindex_minor,path,/*maxsize*/nullgap,
chroffset,chrhigh,queryseq_ptr,queryuc_ptr,querylength,watsonp,
jump_late_p,maxpeelback,/*defect_rate*/0.0,pairpool,dynprogM,
last_genomedp5,last_genomedp3,/*forcep*/false,/*finalp*/false);
@@ -11246,7 +11268,7 @@ path_compute_final (double defect_rate, List_T pairs, int cdna_direction, bool w
debug(printf("Entering path_compute_final\n"));
path = List_reverse(pairs);
- pairs = build_pairs_singles(&dynprogindex_minor,path,
+ pairs = build_pairs_singles(&dynprogindex_minor,path,/*maxsize*/nullgap,
chroffset,chrhigh,queryseq_ptr,queryuc_ptr,querylength,watsonp,
jump_late_p,maxpeelback,defect_rate,pairpool,dynprogM,
last_genomedp5,last_genomedp3,/*forcep*/true,/*finalp*/true);
@@ -11302,7 +11324,7 @@ trim_novel_spliceends (List_T pairs,
int i;
Pair_T pair;
- int trim5, trim3;
+ int trim5, trim3, exondist;
Univcoord_T genomicpos, start_genomicpos, middle_genomicpos, end_genomicpos;
Univcoord_T splice_genomepos_5, splice_genomepos_3, splice_genomepos_5_mm, splice_genomepos_3_mm;
Univcoord_T start, middle, end; /* start to middle has mismatches, while middle to end has none */
@@ -11367,6 +11389,15 @@ trim_novel_spliceends (List_T pairs,
i++;
}
+ /* Find distance from end to intron, if any */
+ exondist = 0;
+ while (p != NULL && ((Pair_T) List_head(p))->gapp == false &&
+ exondist < END_MIN_EXONLENGTH) {
+ p = List_next(p);
+ exondist++;
+ }
+ debug13(printf("exondist is %d\n",exondist));
+
if (mismatchp == false) {
/* Allow perfect overhangs into intron */
debug13(printf("Allowing perfect overhang into potential intron\n"));
@@ -11379,9 +11410,10 @@ trim_novel_spliceends (List_T pairs,
middle_genomicpos = middle + 1;
end_genomicpos = end + 1;
- assert(start_genomicpos >= end_genomicpos);
+ /* assert(start_genomicpos >= end_genomicpos); */
genomicpos = start_genomicpos;
- while (genomicpos >= middle_genomicpos) {
+ while (genomicpos >= middle_genomicpos &&
+ genomicpos >= end_genomicpos - exondist + END_MIN_EXONLENGTH) {
donor_prob = Maxent_hr_donor_prob(chroffset+genomicpos,chroffset); /* Case 1 */
debug13(printf("3', watson, sense anti %u %u %f mm\n",chroffset+genomicpos,genomicpos,donor_prob));
if (donor_prob > max_prob_3_mm) {
@@ -11390,7 +11422,8 @@ trim_novel_spliceends (List_T pairs,
}
genomicpos--;
}
- while (genomicpos >= end_genomicpos) {
+ while (genomicpos >= end_genomicpos &&
+ genomicpos >= end_genomicpos - exondist + END_MIN_EXONLENGTH) {
donor_prob = Maxent_hr_donor_prob(chroffset+genomicpos,chroffset); /* Case 1 */
debug13(printf("3', watson, sense anti %u %u %f\n",chroffset+genomicpos,genomicpos,donor_prob));
if (donor_prob > max_prob_3) {
@@ -11408,9 +11441,10 @@ trim_novel_spliceends (List_T pairs,
middle_genomicpos = (chrhigh - chroffset) - middle;
end_genomicpos = (chrhigh - chroffset) - end;
- assert(start_genomicpos <= end_genomicpos);
+ /* assert(start_genomicpos <= end_genomicpos); */
genomicpos = start_genomicpos;
- while (genomicpos <= middle_genomicpos) {
+ while (genomicpos <= middle_genomicpos &&
+ genomicpos <= end_genomicpos + exondist - END_MIN_EXONLENGTH) {
donor_prob = Maxent_hr_antidonor_prob(chroffset+genomicpos,chroffset); /* Case 3 */
debug13(printf("3', crick, sense forward %u %u %f mm\n",chroffset+genomicpos,genomicpos,donor_prob));
if (donor_prob > max_prob_3_mm) {
@@ -11419,7 +11453,8 @@ trim_novel_spliceends (List_T pairs,
}
genomicpos++;
}
- while (genomicpos <= end_genomicpos) {
+ while (genomicpos <= end_genomicpos &&
+ genomicpos <= end_genomicpos + exondist - END_MIN_EXONLENGTH) {
donor_prob = Maxent_hr_antidonor_prob(chroffset+genomicpos,chroffset); /* Case 3 */
debug13(printf("3', crick, sense forward %u %u %f\n",chroffset+genomicpos,genomicpos,donor_prob));
if (donor_prob > max_prob_3) {
@@ -11439,9 +11474,10 @@ trim_novel_spliceends (List_T pairs,
middle_genomicpos = middle + 1;
end_genomicpos = end + 1;
- assert(start_genomicpos >= end_genomicpos);
+ /* assert(start_genomicpos >= end_genomicpos); */
genomicpos = start_genomicpos;
- while (genomicpos >= middle_genomicpos) {
+ while (genomicpos >= middle_genomicpos &&
+ genomicpos >= end_genomicpos - exondist + END_MIN_EXONLENGTH) {
acceptor_prob = Maxent_hr_antiacceptor_prob(chroffset+genomicpos,chroffset); /* Case 5 */
debug13(printf("3', watson, sense forward %u %u %f mm\n",chroffset+genomicpos,genomicpos,acceptor_prob));
if (acceptor_prob > max_prob_3_mm) {
@@ -11450,7 +11486,8 @@ trim_novel_spliceends (List_T pairs,
}
genomicpos--;
}
- while (genomicpos >= end_genomicpos) {
+ while (genomicpos >= end_genomicpos &&
+ genomicpos >= end_genomicpos - exondist + END_MIN_EXONLENGTH) {
acceptor_prob = Maxent_hr_antiacceptor_prob(chroffset+genomicpos,chroffset); /* Case 5 */
debug13(printf("3', watson, sense forward %u %u %f\n",chroffset+genomicpos,genomicpos,acceptor_prob));
if (acceptor_prob > max_prob_3) {
@@ -11468,9 +11505,10 @@ trim_novel_spliceends (List_T pairs,
middle_genomicpos = (chrhigh - chroffset) - middle;
end_genomicpos = (chrhigh - chroffset) - end;
- assert(start_genomicpos <= end_genomicpos);
+ /* assert(start_genomicpos <= end_genomicpos); */
genomicpos = start_genomicpos;
- while (genomicpos <= middle_genomicpos) {
+ while (genomicpos <= middle_genomicpos &&
+ genomicpos <= end_genomicpos + exondist - END_MIN_EXONLENGTH) {
acceptor_prob = Maxent_hr_acceptor_prob(chroffset+genomicpos,chroffset); /* Case 7 */
debug13(printf("3', crick, sense anti %u %u %f mm\n",chroffset+genomicpos,genomicpos,acceptor_prob));
if (acceptor_prob > max_prob_3_mm) {
@@ -11479,7 +11517,8 @@ trim_novel_spliceends (List_T pairs,
}
genomicpos++;
}
- while (genomicpos <= end_genomicpos) {
+ while (genomicpos <= end_genomicpos &&
+ genomicpos <= end_genomicpos + exondist - END_MIN_EXONLENGTH) {
acceptor_prob = Maxent_hr_acceptor_prob(chroffset+genomicpos,chroffset); /* Case 7 */
debug13(printf("3', crick, sense anti %u %u %f\n",chroffset+genomicpos,genomicpos,acceptor_prob));
if (acceptor_prob > max_prob_3) {
@@ -11497,9 +11536,10 @@ trim_novel_spliceends (List_T pairs,
middle_genomicpos = middle + 1;
end_genomicpos = end + 1;
- assert(start_genomicpos >= end_genomicpos);
+ /* assert(start_genomicpos >= end_genomicpos); */
genomicpos = start_genomicpos;
- while (genomicpos >= middle_genomicpos) {
+ while (genomicpos >= middle_genomicpos &&
+ genomicpos >= end_genomicpos - exondist + END_MIN_EXONLENGTH) {
donor_prob = Maxent_hr_donor_prob(chroffset+genomicpos,chroffset); /* Case 1 */
acceptor_prob = Maxent_hr_antiacceptor_prob(chroffset+genomicpos,chroffset); /* Case 5 */
debug13(printf("3', watson, sense null %u %u %f %f mm\n",chroffset+genomicpos,genomicpos,donor_prob,acceptor_prob));
@@ -11525,7 +11565,8 @@ trim_novel_spliceends (List_T pairs,
}
genomicpos--;
}
- while (genomicpos >= end_genomicpos) {
+ while (genomicpos >= end_genomicpos &&
+ genomicpos >= end_genomicpos - exondist + END_MIN_EXONLENGTH) {
donor_prob = Maxent_hr_donor_prob(chroffset+genomicpos,chroffset); /* Case 1 */
acceptor_prob = Maxent_hr_antiacceptor_prob(chroffset+genomicpos,chroffset); /* Case 5 */
debug13(printf("3', watson, sense null %u %u %f %f\n",chroffset+genomicpos,genomicpos,donor_prob,acceptor_prob));
@@ -11558,9 +11599,10 @@ trim_novel_spliceends (List_T pairs,
middle_genomicpos = (chrhigh - chroffset) - middle;
end_genomicpos = (chrhigh - chroffset) - end;
- assert(start_genomicpos <= end_genomicpos);
+ /* assert(start_genomicpos <= end_genomicpos); */
genomicpos = start_genomicpos;
- while (genomicpos <= middle_genomicpos) {
+ while (genomicpos <= middle_genomicpos &&
+ genomicpos <= end_genomicpos + exondist - END_MIN_EXONLENGTH) {
donor_prob = Maxent_hr_antidonor_prob(chroffset+genomicpos,chroffset); /* Case 3 */
acceptor_prob = Maxent_hr_acceptor_prob(chroffset+genomicpos,chroffset); /* Case 7 */
debug13(printf("3', crick, sense null %u %u %f %f mm\n",chroffset+genomicpos,genomicpos,donor_prob,acceptor_prob));
@@ -11586,7 +11628,8 @@ trim_novel_spliceends (List_T pairs,
}
genomicpos++;
}
- while (genomicpos <= end_genomicpos) {
+ while (genomicpos <= end_genomicpos &&
+ genomicpos <= end_genomicpos + exondist - END_MIN_EXONLENGTH) {
donor_prob = Maxent_hr_antidonor_prob(chroffset+genomicpos,chroffset); /* Case 3 */
acceptor_prob = Maxent_hr_acceptor_prob(chroffset+genomicpos,chroffset); /* Case 7 */
debug13(printf("3', crick, sense null %u %u %f %f\n",chroffset+genomicpos,genomicpos,donor_prob,acceptor_prob));
@@ -11690,6 +11733,15 @@ trim_novel_spliceends (List_T pairs,
i++;
}
+ /* Find distance from end to intron, if any */
+ exondist = 0;
+ while (p != NULL && ((Pair_T) List_head(p))->gapp == false &&
+ exondist < END_MIN_EXONLENGTH) {
+ p = List_next(p);
+ exondist++;
+ }
+ debug13(printf("exondist is %d\n",exondist));
+
if (mismatchp == false) {
/* Allow perfect overhangs into intron */
debug13(printf("Allowing perfect overhang into potential intron\n"));
@@ -11702,9 +11754,10 @@ trim_novel_spliceends (List_T pairs,
middle_genomicpos = middle;
end_genomicpos = end;
- assert(start_genomicpos <= end_genomicpos);
+ /* assert(start_genomicpos <= end_genomicpos); */
genomicpos = start_genomicpos;
- while (genomicpos <= middle_genomicpos) {
+ while (genomicpos <= middle_genomicpos &&
+ genomicpos <= end_genomicpos + exondist - END_MIN_EXONLENGTH) {
acceptor_prob = Maxent_hr_acceptor_prob(chroffset+genomicpos,chroffset); /* Case 2 */
debug13(printf("5', watson, sense forward %u %u %f mm\n",chroffset+genomicpos,genomicpos,acceptor_prob));
if (acceptor_prob > max_prob_5_mm) {
@@ -11713,7 +11766,8 @@ trim_novel_spliceends (List_T pairs,
}
genomicpos++;
}
- while (genomicpos <= end_genomicpos) {
+ while (genomicpos <= end_genomicpos &&
+ genomicpos <= end_genomicpos + exondist - END_MIN_EXONLENGTH) {
acceptor_prob = Maxent_hr_acceptor_prob(chroffset+genomicpos,chroffset); /* Case 2 */
debug13(printf("5', watson, sense forward %u %u %f\n",chroffset+genomicpos,genomicpos,acceptor_prob));
if (acceptor_prob > max_prob_5) {
@@ -11731,9 +11785,10 @@ trim_novel_spliceends (List_T pairs,
middle_genomicpos = (chrhigh - chroffset) - middle + 1;
end_genomicpos = (chrhigh - chroffset) - end + 1;
- assert(start_genomicpos >= end_genomicpos);
+ /* assert(start_genomicpos >= end_genomicpos); */
genomicpos = start_genomicpos;
- while (genomicpos >= middle_genomicpos) {
+ while (genomicpos >= middle_genomicpos &&
+ genomicpos >= end_genomicpos - exondist + END_MIN_EXONLENGTH) {
acceptor_prob = Maxent_hr_antiacceptor_prob(chroffset+genomicpos,chroffset); /* Case 4 */
debug13(printf("5', crick, sense anti %u %u %f mm\n",chroffset+genomicpos,genomicpos,acceptor_prob));
if (acceptor_prob > max_prob_5_mm) {
@@ -11742,7 +11797,8 @@ trim_novel_spliceends (List_T pairs,
}
genomicpos--;
}
- while (genomicpos >= end_genomicpos) {
+ while (genomicpos >= end_genomicpos &&
+ genomicpos >= end_genomicpos - exondist + END_MIN_EXONLENGTH) {
acceptor_prob = Maxent_hr_antiacceptor_prob(chroffset+genomicpos,chroffset); /* Case 4 */
debug13(printf("5', crick, sense anti %u %u %f\n",chroffset+genomicpos,genomicpos,acceptor_prob));
if (acceptor_prob > max_prob_5) {
@@ -11762,9 +11818,10 @@ trim_novel_spliceends (List_T pairs,
middle_genomicpos = middle;
end_genomicpos = end;
- assert(start_genomicpos <= end_genomicpos);
+ /* assert(start_genomicpos <= end_genomicpos); */
genomicpos = start_genomicpos;
- while (genomicpos <= middle_genomicpos) {
+ while (genomicpos <= middle_genomicpos &&
+ genomicpos <= end_genomicpos + exondist - END_MIN_EXONLENGTH) {
donor_prob = Maxent_hr_antidonor_prob(chroffset+genomicpos,chroffset); /* Case 6 */
debug13(printf("5', watson, sense anti %u %u %f mm\n",chroffset+genomicpos,genomicpos,donor_prob));
if (donor_prob > max_prob_5_mm) {
@@ -11773,7 +11830,8 @@ trim_novel_spliceends (List_T pairs,
}
genomicpos++;
}
- while (genomicpos <= end_genomicpos) {
+ while (genomicpos <= end_genomicpos &&
+ genomicpos <= end_genomicpos + exondist - END_MIN_EXONLENGTH) {
donor_prob = Maxent_hr_antidonor_prob(chroffset+genomicpos,chroffset); /* Case 6 */
debug13(printf("5', watson, sense anti %u %u %f\n",chroffset+genomicpos,genomicpos,donor_prob));
if (donor_prob > max_prob_5) {
@@ -11791,9 +11849,10 @@ trim_novel_spliceends (List_T pairs,
middle_genomicpos = (chrhigh - chroffset) - middle + 1;
end_genomicpos = (chrhigh - chroffset) - end + 1;
- assert(start_genomicpos >= end_genomicpos);
+ /* assert(start_genomicpos >= end_genomicpos); */
genomicpos = start_genomicpos;
- while (genomicpos >= middle_genomicpos) {
+ while (genomicpos >= middle_genomicpos &&
+ genomicpos >= end_genomicpos - exondist + END_MIN_EXONLENGTH) {
donor_prob = Maxent_hr_donor_prob(chroffset+genomicpos,chroffset); /* Case 8 */
debug13(printf("5', crick, sense forward %u %u %f mm\n",chroffset+genomicpos,genomicpos,donor_prob));
if (donor_prob > max_prob_5_mm) {
@@ -11802,7 +11861,8 @@ trim_novel_spliceends (List_T pairs,
}
genomicpos--;
}
- while (genomicpos >= end_genomicpos) {
+ while (genomicpos >= end_genomicpos &&
+ genomicpos >= end_genomicpos - exondist + END_MIN_EXONLENGTH) {
donor_prob = Maxent_hr_donor_prob(chroffset+genomicpos,chroffset); /* Case 8 */
debug13(printf("5', crick, sense forward %u %u %f\n",chroffset+genomicpos,genomicpos,donor_prob));
if (donor_prob > max_prob_5) {
@@ -11820,9 +11880,10 @@ trim_novel_spliceends (List_T pairs,
middle_genomicpos = middle;
end_genomicpos = end;
- assert(start_genomicpos <= end_genomicpos);
+ /* assert(start_genomicpos <= end_genomicpos); */
genomicpos = start_genomicpos;
- while (genomicpos <= middle_genomicpos) {
+ while (genomicpos <= middle_genomicpos &&
+ genomicpos <= end_genomicpos + exondist - END_MIN_EXONLENGTH) {
acceptor_prob = Maxent_hr_acceptor_prob(chroffset+genomicpos,chroffset); /* Case 2 */
donor_prob = Maxent_hr_antidonor_prob(chroffset+genomicpos,chroffset); /* Case 6 */
debug13(printf("5', watson, sense null %u %u %f %f mm\n",chroffset+genomicpos,genomicpos,donor_prob,acceptor_prob));
@@ -11848,7 +11909,8 @@ trim_novel_spliceends (List_T pairs,
}
genomicpos++;
}
- while (genomicpos <= end_genomicpos) {
+ while (genomicpos <= end_genomicpos &&
+ genomicpos <= end_genomicpos + exondist - END_MIN_EXONLENGTH) {
acceptor_prob = Maxent_hr_acceptor_prob(chroffset+genomicpos,chroffset); /* Case 2 */
donor_prob = Maxent_hr_antidonor_prob(chroffset+genomicpos,chroffset); /* Case 6 */
debug13(printf("5', watson, sense null %u %u %f %f\n",chroffset+genomicpos,genomicpos,donor_prob,acceptor_prob));
@@ -11881,9 +11943,10 @@ trim_novel_spliceends (List_T pairs,
middle_genomicpos = (chrhigh - chroffset) - middle + 1;
end_genomicpos = (chrhigh - chroffset) - end + 1;
- assert(start_genomicpos >= end_genomicpos);
+ /* assert(start_genomicpos >= end_genomicpos); */
genomicpos = start_genomicpos;
- while (genomicpos >= middle_genomicpos) {
+ while (genomicpos >= middle_genomicpos &&
+ genomicpos >= end_genomicpos - exondist + END_MIN_EXONLENGTH) {
acceptor_prob = Maxent_hr_antiacceptor_prob(chroffset+genomicpos,chroffset); /* Case 4 */
donor_prob = Maxent_hr_donor_prob(chroffset+genomicpos,chroffset); /* Case 8 */
debug13(printf("5', crick, sense null %u %u %f %f mm\n",chroffset+genomicpos,genomicpos,donor_prob,acceptor_prob));
@@ -11909,7 +11972,8 @@ trim_novel_spliceends (List_T pairs,
}
genomicpos--;
}
- while (genomicpos >= end_genomicpos) {
+ while (genomicpos >= end_genomicpos &&
+ genomicpos >= end_genomicpos - exondist + END_MIN_EXONLENGTH) {
acceptor_prob = Maxent_hr_antiacceptor_prob(chroffset+genomicpos,chroffset); /* Case 4 */
donor_prob = Maxent_hr_donor_prob(chroffset+genomicpos,chroffset); /* Case 8 */
debug13(printf("5', crick, sense null %u %u %f %f\n",chroffset+genomicpos,genomicpos,donor_prob,acceptor_prob));
@@ -12281,13 +12345,24 @@ path_trim (double defect_rate, int *ambig_end_length_5, int *ambig_end_length_3,
/* Using alloca for last_genomedp5 and last_genomedp3 can cause stack overflow */
struct Pair_T *
-Stage3_compute (List_T *finalpairs, int *npairs, int *goodness, int *cdna_direction, int *sensedir,
- int *matches, int *nmatches_posttrim, int *max_match_length,
- int *ambig_end_length_5, int *ambig_end_length_3,
- Splicetype_T *ambig_splicetype_5, Splicetype_T *ambig_splicetype_3,
- double *ambig_prob_5, double *ambig_prob_3,
- int *unknowns, int *mismatches, int *qopens, int *qindels, int *topens, int *tindels,
- int *ncanonical, int *nsemicanonical, int *nnoncanonical, double *min_splice_prob,
+Stage3_compute (int *cdna_direction, int *sensedir, List_T *finalpairs1, int *npairs1, int *goodness1,
+ int *matches1, int *nmatches_posttrim_1, int *max_match_length_1,
+ int *ambig_end_length_5_1, int *ambig_end_length_3_1,
+ Splicetype_T *ambig_splicetype_5_1, Splicetype_T *ambig_splicetype_3_1,
+ double *ambig_prob_5_1, double *ambig_prob_3_1,
+ int *unknowns1, int *mismatches1, int *qopens1, int *qindels1, int *topens1, int *tindels1,
+ int *ncanonical1, int *nsemicanonical1, int *nnoncanonical1, double *min_splice_prob_1,
+
+#ifdef GSNAP
+ struct Pair_T **pairarray2, List_T *finalpairs2, int *npairs2, int *goodness2,
+ int *matches2, int *nmatches_posttrim_2, int *max_match_length_2,
+ int *ambig_end_length_5_2, int *ambig_end_length_3_2,
+ Splicetype_T *ambig_splicetype_5_2, Splicetype_T *ambig_splicetype_3_2,
+ double *ambig_prob_5_2, double *ambig_prob_3_2,
+ int *unknowns2, int *mismatches2, int *qopens2, int *qindels2, int *topens2, int *tindels2,
+ int *ncanonical2, int *nsemicanonical2, int *nnoncanonical2, double *min_splice_prob_2,
+#endif
+
List_T stage2pairs, List_T all_stage2_starts, List_T all_stage2_ends,
#ifdef PMAP
char *queryaaseq_ptr,
@@ -12300,7 +12375,7 @@ Stage3_compute (List_T *finalpairs, int *npairs, int *goodness, int *cdna_direct
int maxpeelback, Pairpool_T pairpool, Dynprog_T dynprogL, Dynprog_T dynprogM, Dynprog_T dynprogR,
int sense_try, int sense_filter,
Oligoindex_array_T oligoindices_minor, Diagpool_T diagpool, Cellpool_T cellpool) {
- struct Pair_T *pairarray;
+ struct Pair_T *pairarray1;
List_T p;
Chrpos_T *last_genomedp5_fwd = NULL, *last_genomedp3_fwd = NULL, *last_genomedp5_rev = NULL, *last_genomedp3_rev = NULL;
List_T pairs_pretrim, pairs_fwd, pairs_rev, best_pairs, temp_pairs, path_fwd, path_rev, best_path, temp_path;
@@ -12324,27 +12399,27 @@ Stage3_compute (List_T *finalpairs, int *npairs, int *goodness, int *cdna_direct
/* stage2pairs = Stage2_middle(stage2); */
-#ifdef DEBUG0
+#if defined(DEBUG0) || defined(DEBUG11)
if (watsonp == true) {
- printf("Stage 3: *** Starting stage 3 at chrnum #%d, chrstart %u, chrend %u, query_subseq_offset %d)\n",
+ printf("Stage 3: *** Starting stage 3 at chrnum #%d, chrstart %u, chrend %u, query_subseq_offset %d, sense try %d)\n",
chrnum,((Pair_T) stage2pairs->first)->genomepos,
- ((Pair_T) List_last_value(stage2pairs))->genomepos,query_subseq_offset);
+ ((Pair_T) List_last_value(stage2pairs))->genomepos,query_subseq_offset,sense_try);
} else {
- printf("Stage 3: *** Starting stage 3 at chrnum #%d, chrstart %u, chrend %u, query_subseq_offset %d)\n",
+ printf("Stage 3: *** Starting stage 3 at chrnum #%d, chrstart %u, chrend %u, query_subseq_offset %d, sense try %d)\n",
chrnum,(chrhigh - chroffset) - ((Pair_T) stage2pairs->first)->genomepos,
- (chrhigh - chroffset) - ((Pair_T) List_last_value(stage2pairs))->genomepos,query_subseq_offset);
+ (chrhigh - chroffset) - ((Pair_T) List_last_value(stage2pairs))->genomepos,query_subseq_offset,sense_try);
}
#endif
#ifdef DEBUG
if (watsonp == true) {
- printf("Stage 3: *** Starting stage 3 at chrnum #%d, chrstart %u, chrend %u, query_subseq_offset %d)\n",
+ printf("Stage 3: *** Starting stage 3 at chrnum #%d, chrstart %u, chrend %u, query_subseq_offset %d, sense try %d)\n",
chrnum,((Pair_T) stage2pairs->first)->genomepos,
- ((Pair_T) List_last_value(stage2pairs))->genomepos,query_subseq_offset);
+ ((Pair_T) List_last_value(stage2pairs))->genomepos,query_subseq_offset,sense_try);
} else {
- printf("Stage 3: *** Starting stage 3 at chrnum #%d, chrstart %u, chrend %u, query_subseq_offset %d)\n",
+ printf("Stage 3: *** Starting stage 3 at chrnum #%d, chrstart %u, chrend %u, query_subseq_offset %d, sense try %d)\n",
chrnum,(chrhigh - chroffset) - ((Pair_T) stage2pairs->first)->genomepos,
- (chrhigh - chroffset) - ((Pair_T) List_last_value(stage2pairs))->genomepos,query_subseq_offset);
+ (chrhigh - chroffset) - ((Pair_T) List_last_value(stage2pairs))->genomepos,query_subseq_offset,sense_try);
}
#endif
@@ -12374,13 +12449,10 @@ Stage3_compute (List_T *finalpairs, int *npairs, int *goodness, int *cdna_direct
/* 1. Middle */
if (pairs_fwd == NULL) {
path_fwd = (List_T) NULL;
-#ifdef DEBUG8
- } else if (stage3debug == POST_STAGE2) {
- path_fwd = List_reverse(pairs_fwd);
-#endif
} else {
last_genomedp5_fwd = (Chrpos_T *) CALLOC(querylength,sizeof(Chrpos_T));
last_genomedp3_fwd = (Chrpos_T *) CALLOC(querylength,sizeof(Chrpos_T));
+ debug(printf("*** Solve path_fwd\n"));
path_fwd = path_compute_dir(&defect_rate_fwd,pairs_fwd,/*cdna_direction*/+1,
watsonp,genestrand,jump_late_p,
#ifdef PMAP
@@ -12395,13 +12467,10 @@ Stage3_compute (List_T *finalpairs, int *npairs, int *goodness, int *cdna_direct
if (pairs_rev == NULL) {
path_rev = (List_T) NULL;
-#ifdef DEBUG8
- } else if (stage3debug == POST_STAGE2) {
- path_rev = List_reverse(pairs_rev);
-#endif
} else {
last_genomedp5_rev = (Chrpos_T *) CALLOC(querylength,sizeof(Chrpos_T));
last_genomedp3_rev = (Chrpos_T *) CALLOC(querylength,sizeof(Chrpos_T));
+ debug(printf("*** Solve path_rev\n"));
path_rev = path_compute_dir(&defect_rate_rev,pairs_rev,/*cdna_direction*/-1,
watsonp,genestrand,jump_late_p,
#ifdef PMAP
@@ -12467,10 +12536,6 @@ Stage3_compute (List_T *finalpairs, int *npairs, int *goodness, int *cdna_direct
debug(printf("Stage2 has %d starts and %d ends\n",List_length(all_stage2_starts),List_length(all_stage2_ends)));
if (path_fwd == NULL) {
pairs_fwd = (List_T) NULL;
-#ifdef DEBUG8
- } else if (stage3debug > NO_STAGE3DEBUG && stage3debug < POST_ENDS) {
- pairs_fwd = List_reverse(path_fwd);
-#endif
} else {
/* 3' end */
if (all_stage2_ends == NULL) {
@@ -12499,6 +12564,7 @@ Stage3_compute (List_T *finalpairs, int *npairs, int *goodness, int *cdna_direct
for (p = joined_ends; p != NULL; p = List_next(p)) {
copy = (List_T) List_head(p);
+ debug(printf("*** Solve path_fwd joined end\n"));
path_fwd = path_compute_dir(&defect_rate_temp,/*pairs*/List_reverse(copy),/*cdna_direction*/+1,
watsonp,genestrand,jump_late_p,
#ifdef PMAP
@@ -12557,6 +12623,7 @@ Stage3_compute (List_T *finalpairs, int *npairs, int *goodness, int *cdna_direct
for (p = joined_starts; p != NULL; p = List_next(p)) {
copy = (List_T) List_head(p);
+ debug(printf("*** Solve path_fwd joined start\n"));
path_fwd = path_compute_dir(&defect_rate_temp,/*pairs*/copy,/*cdna_direction*/+1,
watsonp,genestrand,jump_late_p,
#ifdef PMAP
@@ -12594,10 +12661,6 @@ Stage3_compute (List_T *finalpairs, int *npairs, int *goodness, int *cdna_direct
#ifndef PMAP
if (path_rev == NULL) {
pairs_rev = (List_T) NULL;
-#ifdef DEBUG8
- } else if (stage3debug > NO_STAGE3DEBUG && stage3debug < POST_ENDS) {
- pairs_rev = List_reverse(path_rev);
-#endif
} else {
/* 3' end */
if (all_stage2_ends == NULL) {
@@ -12618,6 +12681,7 @@ Stage3_compute (List_T *finalpairs, int *npairs, int *goodness, int *cdna_direct
for (p = joined_ends; p != NULL; p = List_next(p)) {
copy = (List_T) List_head(p);
+ debug(printf("*** Solve path_rev joined end\n"));
path_rev = path_compute_dir(&defect_rate_temp,/*pairs*/List_reverse(copy),/*cdna_direction*/-1,
watsonp,genestrand,jump_late_p,
#ifdef PMAP
@@ -12668,6 +12732,7 @@ Stage3_compute (List_T *finalpairs, int *npairs, int *goodness, int *cdna_direct
for (p = joined_starts; p != NULL; p = List_next(p)) {
copy = (List_T) List_head(p);
+ debug(printf("*** Solve path_rev joined start\n"));
path_rev = path_compute_dir(&defect_rate_temp,/*pairs*/copy,/*cdna_direction*/-1,
watsonp,genestrand,jump_late_p,
#ifdef PMAP
@@ -12703,39 +12768,23 @@ Stage3_compute (List_T *finalpairs, int *npairs, int *goodness, int *cdna_direct
#endif
-#ifdef DEBUG8
- if (stage3debug > NO_STAGE3DEBUG && stage3debug < POST_CANONICAL) {
- path_fwd = insert_gapholders(pairs_fwd,queryseq_ptr,queryuc_ptr,chroffset,chrhigh,watsonp,pairpool);
- pairs_fwd = assign_gap_types(path_fwd,/*cdna_direction*/+1,watsonp,queryseq_ptr,
- chrnum,chroffset,chrhigh,pairpool);
-
- path_rev = insert_gapholders(pairs_rev,queryseq_ptr,queryuc_ptr,chroffset,chrhigh,watsonp,pairpool);
- pairs_rev = assign_gap_types(path_rev,/*cdna_direction*/-1,watsonp,queryseq_ptr,
- chrnum,chroffset,chrhigh,pairpool);
-
-
- } else {
-#endif
- pairs_fwd = path_compute_final(defect_rate_fwd,pairs_fwd,/*cdna_direction*/+1,
- watsonp,genestrand,jump_late_p,querylength,
+ pairs_fwd = path_compute_final(defect_rate_fwd,pairs_fwd,/*cdna_direction*/+1,
+ watsonp,genestrand,jump_late_p,querylength,
#ifdef PMAP
- queryaaseq_ptr,
+ queryaaseq_ptr,
#endif
- queryseq_ptr,queryuc_ptr,chrnum,chroffset,chrhigh,
- maxpeelback,pairpool,dynprogL,dynprogM,dynprogR,last_genomedp5_fwd,last_genomedp3_fwd,
- oligoindices_minor,diagpool,cellpool);
+ queryseq_ptr,queryuc_ptr,chrnum,chroffset,chrhigh,
+ maxpeelback,pairpool,dynprogL,dynprogM,dynprogR,last_genomedp5_fwd,last_genomedp3_fwd,
+ oligoindices_minor,diagpool,cellpool);
- pairs_rev = path_compute_final(defect_rate_rev,pairs_rev,/*cdna_direction*/-1,
- watsonp,genestrand,jump_late_p,querylength,
+ pairs_rev = path_compute_final(defect_rate_rev,pairs_rev,/*cdna_direction*/-1,
+ watsonp,genestrand,jump_late_p,querylength,
#ifdef PMAP
- queryaaseq_ptr,
-#endif
- queryseq_ptr,queryuc_ptr,chrnum,chroffset,chrhigh,
- maxpeelback,pairpool,dynprogL,dynprogM,dynprogR,last_genomedp5_rev,last_genomedp3_rev,
- oligoindices_minor,diagpool,cellpool);
-#ifdef DEBUG8
- }
+ queryaaseq_ptr,
#endif
+ queryseq_ptr,queryuc_ptr,chrnum,chroffset,chrhigh,
+ maxpeelback,pairpool,dynprogL,dynprogM,dynprogR,last_genomedp5_rev,last_genomedp3_rev,
+ oligoindices_minor,diagpool,cellpool);
FREE(last_genomedp3_rev);
FREE(last_genomedp5_rev);
@@ -12814,89 +12863,194 @@ Stage3_compute (List_T *finalpairs, int *npairs, int *goodness, int *cdna_direct
#endif
sense_filter);
}
+
if (splicingp == false) {
*sensedir = SENSE_NULL;
}
if (pairs_pretrim == NULL) {
- *npairs = 0;
- *goodness = 0;
- *nmatches_posttrim = 0;
- *ambig_end_length_5 = *ambig_end_length_3 = 0;
- *ambig_prob_5 = *ambig_prob_3 = 0.0;
+#if 0
+ *npairs1 = 0;
+ *goodness1 = 0;
+ *nmatches_posttrim_1 = 0;
+ *ambig_end_length_5_1 = *ambig_end_length_3_1 = 0;
+ *ambig_prob_5_1 = *ambig_prob_3_1 = 0.0;
+#endif
return (struct Pair_T *) NULL;
- } else {
- if (*cdna_direction >= 0) {
- *ambig_end_length_5 = fwd_ambig_end_length_5;
- *ambig_end_length_3 = fwd_ambig_end_length_3;
- *ambig_splicetype_5 = fwd_ambig_splicetype_5;
- *ambig_splicetype_3 = fwd_ambig_splicetype_3;
- *ambig_prob_5 = fwd_ambig_prob_5;
- *ambig_prob_3 = fwd_ambig_prob_3;
- defect_rate = defect_rate_fwd;
- } else {
- *ambig_end_length_5 = rev_ambig_end_length_5;
- *ambig_end_length_3 = rev_ambig_end_length_3;
- *ambig_splicetype_5 = rev_ambig_splicetype_5;
- *ambig_splicetype_3 = rev_ambig_splicetype_3;
- *ambig_prob_5 = rev_ambig_prob_5;
- *ambig_prob_3 = rev_ambig_prob_3;
- defect_rate = defect_rate_rev;
- }
-#ifdef DEBUG8
- if (stage3debug > NO_STAGE3DEBUG && stage3debug < POST_TRIM) {
- *finalpairs = pairs_pretrim;
- } else {
-#endif
- *finalpairs = path_trim(defect_rate,&(*ambig_end_length_5),&(*ambig_end_length_3),
- &(*ambig_splicetype_5),&(*ambig_splicetype_3),
- &(*ambig_prob_5),&(*ambig_prob_3),
- pairs_pretrim,&(*cdna_direction),watsonp,
- jump_late_p,querylength,
+ }
+
#ifdef GSNAP
- &(*sensedir),
+ if (*cdna_direction == 0) {
+ debug11(printf("Initial cdna_direction is 0\n"));
+ *ambig_end_length_5_1 = fwd_ambig_end_length_5;
+ *ambig_end_length_3_1 = fwd_ambig_end_length_3;
+ *ambig_splicetype_5_1 = fwd_ambig_splicetype_5;
+ *ambig_splicetype_3_1 = fwd_ambig_splicetype_3;
+ *ambig_prob_5_1 = fwd_ambig_prob_5;
+ *ambig_prob_3_1 = fwd_ambig_prob_3;
+
+ *cdna_direction = +1;
+ *sensedir = SENSE_FORWARD;
+ *finalpairs1 = path_trim(defect_rate_fwd,&(*ambig_end_length_5_1),&(*ambig_end_length_3_1),
+ &(*ambig_splicetype_5_1),&(*ambig_splicetype_3_1),
+ &(*ambig_prob_5_1),&(*ambig_prob_3_1),
+ pairs_fwd,&(*cdna_direction),watsonp,
+ jump_late_p,querylength,
+#ifdef GSNAP
+ &(*sensedir),
#endif
- queryseq_ptr,queryuc_ptr,
- chroffset,chrhigh,knownsplice_limit_low,knownsplice_limit_high,
- maxpeelback,pairpool,dynprogL,dynprogR);
-#ifdef DEBUG8
- }
+ queryseq_ptr,queryuc_ptr,
+ chroffset,chrhigh,knownsplice_limit_low,knownsplice_limit_high,
+ maxpeelback,pairpool,dynprogL,dynprogR);
+
+ *ambig_end_length_5_2 = rev_ambig_end_length_5;
+ *ambig_end_length_3_2 = rev_ambig_end_length_3;
+ *ambig_splicetype_5_2 = rev_ambig_splicetype_5;
+ *ambig_splicetype_3_2 = rev_ambig_splicetype_3;
+ *ambig_prob_5_2 = rev_ambig_prob_5;
+ *ambig_prob_3_2 = rev_ambig_prob_3;
+
+ *cdna_direction = -1;
+ *sensedir = SENSE_ANTI;
+ *finalpairs2 = path_trim(defect_rate_rev,&(*ambig_end_length_5_2),&(*ambig_end_length_3_2),
+ &(*ambig_splicetype_5_2),&(*ambig_splicetype_3_2),
+ &(*ambig_prob_5_2),&(*ambig_prob_3_2),
+ pairs_rev,&(*cdna_direction),watsonp,
+ jump_late_p,querylength,
+#ifdef GSNAP
+ &(*sensedir),
#endif
+ queryseq_ptr,queryuc_ptr,
+ chroffset,chrhigh,knownsplice_limit_low,knownsplice_limit_high,
+ maxpeelback,pairpool,dynprogL,dynprogR);
+
+ if (*finalpairs1 != NULL && *finalpairs2 != NULL) {
+ debug11(printf("Both directions are non-null, so returning both\n"));
+ *nmatches_posttrim_1 = Pair_nmatches_posttrim(&(*max_match_length_1),*finalpairs1,/*pos5*/*ambig_end_length_5_1,
+ /*pos3*/querylength - (*ambig_end_length_3_1));
+ pairarray1 = make_pairarray(&(*npairs1),&(*finalpairs1),/*cdna_direction*/+1,watsonp,
+ pairpool,queryseq_ptr,chroffset,chrhigh,
+ ngap,query_subseq_offset,skiplength);
+ *goodness1 = Pair_fracidentity_array(&(*matches1),&(*unknowns1),&(*mismatches1),
+ &(*qopens1),&(*qindels1),&(*topens1),&(*tindels1),
+ &(*ncanonical1),&(*nsemicanonical1),&(*nnoncanonical1),
+ &(*min_splice_prob_1),pairarray1,*npairs1,/*cdna_direction*/+1);
+
+ debug0(printf("Result (%d pairs): %d matches, %d mismatches, %d qopens, %d qindels, %d topens, %d tindels\n",
+ *npairs1,*matches1,*mismatches1,*qopens1,*qindels1,*topens1,*tindels1));
+ debug0(Pair_dump_array(pairarray1,*npairs1,/*zerobasedp*/true));
+
+ *nmatches_posttrim_2 = Pair_nmatches_posttrim(&(*max_match_length_2),*finalpairs2,/*pos5*/*ambig_end_length_5_2,
+ /*pos3*/querylength - (*ambig_end_length_3_2));
+ *pairarray2 = make_pairarray(&(*npairs2),&(*finalpairs2),/*cdna_direction*/-1,watsonp,
+ pairpool,queryseq_ptr,chroffset,chrhigh,
+ ngap,query_subseq_offset,skiplength);
+ *goodness2 = Pair_fracidentity_array(&(*matches2),&(*unknowns2),&(*mismatches2),
+ &(*qopens2),&(*qindels2),&(*topens2),&(*tindels2),
+ &(*ncanonical2),&(*nsemicanonical2),&(*nnoncanonical2),
+ &(*min_splice_prob_2),*pairarray2,*npairs2,/*cdna_direction*/-1);
+
+ debug0(printf("Result (%d pairs): %d matches, %d mismatches, %d qopens, %d qindels, %d topens, %d tindels\n",
+ *npairs2,*matches2,*mismatches2,*qopens2,*qindels2,*topens2,*tindels2));
+ debug0(Pair_dump_array(*pairarray2,*npairs2,/*zerobasedp*/true));
+
+ *cdna_direction = 0;
+ *sensedir = SENSE_NULL;
+ return pairarray1;
- *nmatches_posttrim = Pair_nmatches_posttrim(&(*max_match_length),*finalpairs,/*pos5*/*ambig_end_length_5,
- /*pos3*/querylength - (*ambig_end_length_3));
+ } else if (*finalpairs1 != NULL) {
+ debug11(printf("Only forward direction is non-null, so retrying...\n"));
+ pairs_pretrim = pairs_fwd;
+ *cdna_direction = +1;
+ /* Continue below */
- /* printf("ambig_end_length = %d, %d\n",*ambig_end_length_5,*ambig_end_length_3); */
+ } else if (*finalpairs2 != NULL) {
+ debug11(printf("Only reverse direction is non-null, so retrying...\n"));
+ pairs_pretrim = pairs_rev;
+ *cdna_direction = -1;
+ /* Continue below */
- pairarray = make_pairarray(&(*npairs),&(*finalpairs),*cdna_direction,watsonp,
- pairpool,queryseq_ptr,chroffset,chrhigh,
- ngap,query_subseq_offset,skiplength);
- *goodness = Pair_fracidentity_array(&(*matches),&(*unknowns),&(*mismatches),
- &(*qopens),&(*qindels),&(*topens),&(*tindels),
- &(*ncanonical),&(*nsemicanonical),&(*nnoncanonical),
- &(*min_splice_prob),pairarray,*npairs,*cdna_direction);
+ } else {
+ return (struct Pair_T *) NULL;
+ }
+ }
+#endif
- debug0(printf("Result (%d pairs): %d matches, %d mismatches, %d qopens, %d qindels, %d topens, %d tindels\n",
- *npairs,*matches,*mismatches,*qopens,*qindels,*topens,*tindels));
+ if (*cdna_direction > 0) {
+ debug11(printf("Solving for forward direction\n"));
+ *ambig_end_length_5_1 = fwd_ambig_end_length_5;
+ *ambig_end_length_3_1 = fwd_ambig_end_length_3;
+ *ambig_splicetype_5_1 = fwd_ambig_splicetype_5;
+ *ambig_splicetype_3_1 = fwd_ambig_splicetype_3;
+ *ambig_prob_5_1 = fwd_ambig_prob_5;
+ *ambig_prob_3_1 = fwd_ambig_prob_3;
+ *sensedir = SENSE_FORWARD;
+ defect_rate = defect_rate_fwd;
+
+ } else if (*cdna_direction < 0) {
+ debug11(printf("Solving for reverse direction\n"));
+ *ambig_end_length_5_1 = rev_ambig_end_length_5;
+ *ambig_end_length_3_1 = rev_ambig_end_length_3;
+ *ambig_splicetype_5_1 = rev_ambig_splicetype_5;
+ *ambig_splicetype_3_1 = rev_ambig_splicetype_3;
+ *ambig_prob_5_1 = rev_ambig_prob_5;
+ *ambig_prob_3_1 = rev_ambig_prob_3;
+ *sensedir = SENSE_ANTI;
+ defect_rate = defect_rate_rev;
+ } else {
+#ifdef GSNAP
+ abort();
+#else
+ debug11(printf("Solving for unknown (forward) direction\n"));
+ *ambig_end_length_5_1 = fwd_ambig_end_length_5;
+ *ambig_end_length_3_1 = fwd_ambig_end_length_3;
+ *ambig_splicetype_5_1 = fwd_ambig_splicetype_5;
+ *ambig_splicetype_3_1 = fwd_ambig_splicetype_3;
+ *ambig_prob_5_1 = fwd_ambig_prob_5;
+ *ambig_prob_3_1 = fwd_ambig_prob_3;
+ *sensedir = SENSE_FORWARD;
+ defect_rate = defect_rate_fwd;
+#endif
+ }
-#if 0
- if (checkp == true && stage3debug == NO_STAGE3DEBUG &&
- Pair_check_array(pairarray,*npairs) == true) {
- Pair_dump_array(pairarray,*npairs,/*zerobasedp*/true);
-#ifndef DEBUG
- Except_raise(&coordinate_error,__FILE__,__LINE__);
+ *finalpairs1 = path_trim(defect_rate,&(*ambig_end_length_5_1),&(*ambig_end_length_3_1),
+ &(*ambig_splicetype_5_1),&(*ambig_splicetype_3_1),
+ &(*ambig_prob_5_1),&(*ambig_prob_3_1),
+ pairs_pretrim,&(*cdna_direction),watsonp,
+ jump_late_p,querylength,
+#ifdef GSNAP
+ &(*sensedir),
#endif
- }
+ queryseq_ptr,queryuc_ptr,
+ chroffset,chrhigh,knownsplice_limit_low,knownsplice_limit_high,
+ maxpeelback,pairpool,dynprogL,dynprogR);
+
+ *nmatches_posttrim_1 = Pair_nmatches_posttrim(&(*max_match_length_1),*finalpairs1,/*pos5*/*ambig_end_length_5_1,
+ /*pos3*/querylength - (*ambig_end_length_3_1));
+ pairarray1 = make_pairarray(&(*npairs1),&(*finalpairs1),*cdna_direction,watsonp,
+ pairpool,queryseq_ptr,chroffset,chrhigh,
+ ngap,query_subseq_offset,skiplength);
+ *goodness1 = Pair_fracidentity_array(&(*matches1),&(*unknowns1),&(*mismatches1),
+ &(*qopens1),&(*qindels1),&(*topens1),&(*tindels1),
+ &(*ncanonical1),&(*nsemicanonical1),&(*nnoncanonical1),
+ &(*min_splice_prob_1),pairarray1,*npairs1,*cdna_direction);
+
+ debug0(printf("Result (%d pairs): %d matches, %d mismatches, %d qopens, %d qindels, %d topens, %d tindels\n",
+ *npairs1,*matches1,*mismatches1,*qopens1,*qindels1,*topens1,*tindels1));
+ debug0(Pair_dump_array(pairarray1,*npairs1,/*zerobasedp*/true));
+
+#ifdef GSNAP
+ *pairarray2 = (struct Pair_T *) NULL;
+ *npairs2 = 0;
#endif
+ debug11(printf("Final cdna direction is %d\n",*cdna_direction));
+ debug11(printf("Final sensedir is %d\n",*sensedir));
- debug0(Pair_dump_array(pairarray,*npairs,/*zerobasedp*/true));
-
- return pairarray;
- }
+ return pairarray1;
}
diff --git a/src/stage3.h b/src/stage3.h
index 099a5eb..bb24fbc 100644
--- a/src/stage3.h
+++ b/src/stage3.h
@@ -1,4 +1,4 @@
-/* $Id: stage3.h 184470 2016-02-18 00:11:42Z twu $ */
+/* $Id: stage3.h 188752 2016-05-01 17:28:22Z twu $ */
#ifndef STAGE3_INCLUDED
#define STAGE3_INCLUDED
@@ -281,13 +281,24 @@ extern int
Stage3_good_part (struct Pair_T *pairarray, int npairs, int pos5, int pos3);
extern struct Pair_T *
-Stage3_compute (List_T *pairs, int *npairs, int *goodness, int *cdna_direction, int *sensedir,
- int *matches, int *nmatches_posttrim, int *max_match_length,
- int *ambig_end_length_5, int *ambig_end_length_3,
- Splicetype_T *ambig_splicetype_5, Splicetype_T *ambig_splicetype_3,
- double *ambig_prob_5, double *ambig_prob_3,
- int *unknowns, int *mismatches, int *qopens, int *qindels, int *topens, int *tindels,
- int *ncanonical, int *nsemicanonical, int *nnoncanonical, double *min_splice_prob,
+Stage3_compute (int *cdna_direction, int *sensedir1, List_T *pairs1, int *npairs1, int *goodness1,
+ int *matches1, int *nmatches_posttrim_1, int *max_match_length_1,
+ int *ambig_end_length_5_1, int *ambig_end_length_3_1,
+ Splicetype_T *ambig_splicetype_5_1, Splicetype_T *ambig_splicetype_3_1,
+ double *ambig_prob_5_1, double *ambig_prob_3_1,
+ int *unknowns1, int *mismatches1, int *qopens1, int *qindels1, int *topens1, int *tindels1,
+ int *ncanonical1, int *nsemicanonical1, int *nnoncanonical1, double *min_splice_prob_1,
+
+#ifdef GSNAP
+ struct Pair_T **pairarray2, List_T *pairs2, int *npairs2, int *goodness2,
+ int *matches2, int *nmatches_posttrim_2, int *max_match_length_2,
+ int *ambig_end_length_5_2, int *ambig_end_length_3_2,
+ Splicetype_T *ambig_splicetype_5_2, Splicetype_T *ambig_splicetype_3_2,
+ double *ambig_prob_5_2, double *ambig_prob_3_2,
+ int *unknowns2, int *mismatches2, int *qopens2, int *qindels2, int *topens2, int *tindels2,
+ int *ncanonical2, int *nsemicanonical2, int *nnoncanonical2, double *min_splice_prob_2,
+#endif
+
List_T stage2pairs, List_T all_stage2_starts, List_T all_stage2_ends,
#ifdef PMAP
char *queryaaseq_ptr,
diff --git a/src/stage3hr.c b/src/stage3hr.c
index e9563bd..76f063c 100644
--- a/src/stage3hr.c
+++ b/src/stage3hr.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: stage3hr.c 186731 2016-03-30 23:19:01Z twu $";
+static char rcsid[] = "$Id: stage3hr.c 188752 2016-05-01 17:28:22Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -206,7 +206,10 @@ static char rcsid[] = "$Id: stage3hr.c 186731 2016-03-30 23:19:01Z twu $";
/* Controlled by --end-detail. high (2): true/true, medium (1): false/true, low (0): false/false */
-static bool high_resolution_substring_ends_p = false; /* major performance hit, about 3x, for 5% of cases */
+
+/* Previously had major performance hit, about 3x, for 5% of cases,
+ but with latest modifications, has minor effect, maybe 20% */
+static bool high_resolution_substring_ends_p = true;
static bool high_resolution_gmap_ends_p = true; /* minor performance hit */
static bool want_random_p;
@@ -247,9 +250,15 @@ static int gmap_min_nconsecutive;
static int ambig_end_interval; /* For penalizing large ambiguous ends
in GMAP alignments, since such ends
should have been found */
+
static int subopt_levels;
static bool novelsplicingp;
+
+static Chrpos_T overall_max_distance;
+static int max_middle_insertions_default; /* If negative, then compute querylength - 2*min_indel_end_matches */
+static int max_middle_deletions;
static Chrpos_T shortsplicedist;
+
static bool merge_samechr_p;
static bool *circularp;
static bool *altlocp;
@@ -277,6 +286,7 @@ Stage3hr_setup (bool invert_first_p_in, bool invert_second_p_in, Genome_T genome
int localsplicing_penalty_in, int indel_penalty_middle_in,
int antistranded_penalty_in, bool favor_multiexon_p_in,
int gmap_min_nconsecutive_in, int end_detail, int subopt_levels_in,
+ int max_middle_insertions_in, int max_middle_deletions_in,
bool novelsplicingp_in, Chrpos_T shortsplicedist_in, bool merge_samechr_p_in,
bool *circularp_in, bool *altlocp_in, Univcoord_T *alias_starts_in, Univcoord_T *alias_ends_in,
char *failedinput_root_in, bool print_m8_p_in, bool want_random_p_in) {
@@ -338,7 +348,20 @@ Stage3hr_setup (bool invert_first_p_in, bool invert_second_p_in, Genome_T genome
subopt_levels = subopt_levels_in;
novelsplicingp = novelsplicingp_in;
+
+ max_middle_insertions_default = max_middle_insertions_in;
+ max_middle_deletions = max_middle_deletions_in;
+
shortsplicedist = shortsplicedist_in;
+
+ overall_max_distance = shortsplicedist;
+ if (max_middle_deletions > (int) overall_max_distance) {
+ overall_max_distance = max_middle_deletions;
+ }
+ if (max_middle_insertions_default > (int) overall_max_distance) {
+ overall_max_distance = max_middle_insertions_default;
+ }
+
merge_samechr_p = merge_samechr_p_in;
circularp = circularp_in;
altlocp = altlocp_in;
@@ -4800,8 +4823,8 @@ Stage3end_new_substrings (int *found_score, Intlist_T endpoints,
int nmismatches_bothdiff = 0;
- debug0(printf("Entered Stage3end_new_substrings at left %u, with plusp %d, sensedir %d, and endpoints %s\n",
- Uintlist_head(lefts),plusp,sensedir,Intlist_to_string(endpoints)));
+ debug0(printf("Entered Stage3end_new_substrings at left %u [%u], with plusp %d, sensedir %d, and endpoints %s\n",
+ Uintlist_head(lefts),Uintlist_head(lefts) - chroffset,plusp,sensedir,Intlist_to_string(endpoints)));
debug0(printf("There are %d endpoints, %d lefts, %d nmismatches, and %d junctions\n",
Intlist_length(endpoints),Uintlist_length(lefts),Intlist_length(nmismatches_list),List_length(junctions)));
debug0(printf("Ambig left %p, right %p\n",left_ambig,right_ambig));
@@ -4864,6 +4887,7 @@ Stage3end_new_substrings (int *found_score, Intlist_T endpoints,
if (genomicstart < chroffset && genomicend > chrhigh) {
/* Out of bounds on both sides */
Junction_gc(&junctions);
+ debug0(printf("Stage3end_new_substrings returning NULL\n"));
return (T) NULL;
} else if (genomicstart < chroffset) {
@@ -4871,6 +4895,7 @@ Stage3end_new_substrings (int *found_score, Intlist_T endpoints,
outofbounds_end = genomicend - chroffset;
debug0(printf("Out of bounds left (low) %d, out of bounds right (high) %d\n",outofbounds_start,outofbounds_end));
Junction_gc(&junctions);
+ debug0(printf("Stage3end_new_substrings returning NULL\n"));
return (T) NULL;
#if 0
/* Could consider this for the lowest substring */
@@ -4890,6 +4915,7 @@ Stage3end_new_substrings (int *found_score, Intlist_T endpoints,
outofbounds_end = genomicend - chrhigh;
debug0(printf("Out of bounds left (low) %d, out of bounds right (high) %d\n",outofbounds_start,outofbounds_end));
Junction_gc(&junctions);
+ debug0(printf("Stage3end_new_substrings returning NULL\n"));
return (T) NULL;
#if 0
/* Could consider this for the highest substring */
@@ -4930,8 +4956,8 @@ Stage3end_new_substrings (int *found_score, Intlist_T endpoints,
/*exactp*/Intlist_head(x) == 0 ? true : false,plusp,genestrand,
trim_left_action,trim_right_action,outofbounds_start,outofbounds_end,
/*minlength*/0,sensedir)) == NULL) {
- /* Don't know how to fix the junctions */
- debug0(printf("Don't know how to fix the junctions, so returning NULL from Stage3end_new_substrings\n"));
+ debug0(printf("Poor substring (plus) for %d..%d, so returning NULL from Stage3end_new_substrings\n",
+ querystart,queryend));
for (p = substrings; p != NULL; p = List_next(p)) {
substring = (Substring_T) List_head(p);
if (substring == left_ambig) {
@@ -4944,6 +4970,7 @@ Stage3end_new_substrings (int *found_score, Intlist_T endpoints,
}
List_free(&substrings);
Junction_gc(&junctions);
+ debug0(printf("Stage3end_new_substrings returning NULL\n"));
return (T) NULL;
} else {
substrings = List_push(substrings,(void *) substring);
@@ -5005,6 +5032,7 @@ Stage3end_new_substrings (int *found_score, Intlist_T endpoints,
if (genomicend < chroffset && genomicstart > chrhigh) {
/* Out of bounds on both sides */
Junction_gc(&junctions);
+ debug0(printf("Stage3end_new_substrings returning NULL\n"));
return (T) NULL;
} else if (genomicend < chroffset) {
@@ -5012,6 +5040,7 @@ Stage3end_new_substrings (int *found_score, Intlist_T endpoints,
outofbounds_start = genomicstart - chroffset;
debug0(printf("Out of bounds left (high) %d, out of bounds right (low) %d\n",outofbounds_start,outofbounds_end));
Junction_gc(&junctions);
+ debug0(printf("Stage3end_new_substrings returning NULL\n"));
return (T) NULL;
#if 0
/* Could consider this for the lowest substring */
@@ -5030,6 +5059,7 @@ Stage3end_new_substrings (int *found_score, Intlist_T endpoints,
outofbounds_start = genomicstart - chrhigh;
debug0(printf("Out of bounds left (high) %d, out of bounds right (low) %d\n",outofbounds_start,outofbounds_end));
Junction_gc(&junctions);
+ debug0(printf("Stage3end_new_substrings returning NULL\n"));
return (T) NULL;
#if 0
/* Could consider this for the highest substring */
@@ -5071,8 +5101,8 @@ Stage3end_new_substrings (int *found_score, Intlist_T endpoints,
/*exactp*/Intlist_head(x) == 0 ? true : false,plusp,genestrand,
trim_left_action,trim_right_action,outofbounds_start,outofbounds_end,
/*minlength*/0,sensedir)) == NULL) {
- /* Don't know how to fix the junctions */
- debug0(printf("Don't know how to fix the junctions, so returning NULL from Stage3end_new_substrings\n"));
+ debug0(printf("Poor substring (minus) for %d..%d, so returning NULL from Stage3end_new_substrings\n",
+ querylength - queryend,querylength - querystart));
for (p = substrings; p != NULL; p = List_next(p)) {
substring = (Substring_T) List_head(p);
if (substring == left_ambig) {
@@ -5085,6 +5115,7 @@ Stage3end_new_substrings (int *found_score, Intlist_T endpoints,
}
List_free(&substrings);
Junction_gc(&junctions);
+ debug0(printf("Stage3end_new_substrings returning NULL\n"));
return (T) NULL;
} else {
substrings = List_push(substrings,(void *) substring);
@@ -5258,19 +5289,23 @@ Stage3end_new_substrings (int *found_score, Intlist_T endpoints,
debug0(printf("Returning NULL from Stage3end_new_substrings because of circularalias of %d\n",new->circularalias));
Stage3end_free(&new);
/* Junction_gc(&junctions); -- Done by Stage3end_free */
+ debug0(printf("Stage3end_new_substrings returning NULL\n"));
return (T) NULL;
} else if (new->circularalias >= 0) {
new->altlocp = false;
debug0(printf("Returning circular %p from Stage3end_new_substrings with found_score %d\n",new,*found_score));
+ debug0(printf("Stage3end_new_substrings returning %p\n",new));
return new;
} else if ((new->altlocp = altlocp[chrnum]) == false) {
debug0(printf("Returning primary %p from Stage3end_new_substrings with found_score %d\n",new,*found_score));
+ debug0(printf("Stage3end_new_substrings returning %p\n",new));
return new;
} else {
debug0(printf("Returning altloc %p from Stage3end_new_substrings with found_score %d\n",new,*found_score));
+ debug0(printf("Stage3end_new_substrings returning %p\n",new));
return new;
}
}
@@ -5283,11 +5318,11 @@ Stage3end_new_substrings (int *found_score, Intlist_T endpoints,
/* Modified from run_gmap_plus in sarray-read.c */
/* extend_ends_p is an expensive operation, slowing down speed by 5x */
T
-Stage3end_substrings_run_gmap_plus (T this, char *queryuc_ptr, int querylength, int genestrand,
+Stage3end_substrings_run_gmap_plus (T *hit2, T this, char *queryuc_ptr, int querylength, int genestrand,
int maxpeelback, Pairpool_T pairpool, Dynprog_T dynprogL, Dynprog_T dynprogM, Dynprog_T dynprogR,
Oligoindex_array_T oligoindices_minor, Diagpool_T diagpool, Cellpool_T cellpool,
bool extend_ends_p) {
- T hit;
+ T hit1;
List_T stage2pairs, all_stage2_starts, all_stage2_ends;
List_T p, startp;
@@ -5295,21 +5330,26 @@ Stage3end_substrings_run_gmap_plus (T this, char *queryuc_ptr, int querylength,
int querystart, queryend;
Univcoord_T *ambcoords;
- int sensedir;
-
- struct Pair_T *pairarray;
- List_T pairs;
+ struct Pair_T *pairarray1, *pairarray2;
+ List_T pairs1, pairs2;
Substring_T substring, first_ambig, last_ambig;
int querypos, seglength;
Chrpos_T genomepos;
char c, g, g_alt, comp;
- int npairs, goodness, cdna_direction, matches, nmatches_posttrim,
- max_match_length, ambig_end_length_5, ambig_end_length_3,
- unknowns, mismatches, qopens, qindels, topens, tindels,
- ncanonical, nsemicanonical, nnoncanonical;
- double ambig_prob_5, ambig_prob_3, min_splice_prob;
- Splicetype_T ambig_splicetype_5, ambig_splicetype_3;
+ int cdna_direction, sensedir;
+ int npairs1, goodness1, matches1, nmatches_posttrim_1,
+ max_match_length_1, ambig_end_length_5_1, ambig_end_length_3_1,
+ unknowns1, mismatches1, qopens1, qindels1, topens1, tindels1,
+ ncanonical1, nsemicanonical1, nnoncanonical1;
+ int npairs2, goodness2, matches2, nmatches_posttrim_2,
+ max_match_length_2, ambig_end_length_5_2, ambig_end_length_3_2,
+ unknowns2, mismatches2, qopens2, qindels2, topens2, tindels2,
+ ncanonical2, nsemicanonical2, nnoncanonical2;
+ double ambig_prob_5_1, ambig_prob_3_1, min_splice_prob_1;
+ double ambig_prob_5_2, ambig_prob_3_2, min_splice_prob_2;
+ Splicetype_T ambig_splicetype_5_1, ambig_splicetype_3_1;
+ Splicetype_T ambig_splicetype_5_2, ambig_splicetype_3_2;
Univcoord_T knownsplice_limit_low, knownsplice_limit_high;
Univcoord_T start, end;
int nsegments, nmismatches_whole, nindels, nintrons, nindelbreaks;
@@ -5317,7 +5357,15 @@ Stage3end_substrings_run_gmap_plus (T this, char *queryuc_ptr, int querylength,
Chrpos_T chrstart, chrend;
- debug13(printf("Entered Stage3hr_substrings_run_gmap_plus\n"));
+ *hit2 = (T) NULL;
+ debug13(printf("Entered Stage3hr_substrings_run_gmap_plus with extend_ends_p %d\n",extend_ends_p));
+#ifdef DEBUG13
+ for (p = this->substrings_1toN; p != NULL; p = List_next(p)) {
+ substring = (Substring_T) List_head(p);
+ printf("%d..%d\n",Substring_querystart(substring),Substring_queryend(substring));
+ }
+#endif
+
#ifdef HAVE_ALLOCA
gsequence_orig = (char *) MALLOCA((querylength+1) * sizeof(char));
@@ -5386,6 +5434,7 @@ Stage3end_substrings_run_gmap_plus (T this, char *queryuc_ptr, int querylength,
substring = (Substring_T) List_head(this->substrings_1toN);
if (high_resolution_substring_ends_p == false) {
/* Don't try to solve ends, as requested by user */
+ debug13(printf("User does not want high resolution substring ends\n"));
all_stage2_starts = (List_T) NULL;
} else if ((querypos = Substring_querystart(substring)) < 15) {
/* Don't try to solve short ends, for sake of efficiency */
@@ -5393,10 +5442,10 @@ Stage3end_substrings_run_gmap_plus (T this, char *queryuc_ptr, int querylength,
all_stage2_starts = (List_T) NULL;
} else {
chrend = Substring_alignstart_trim_chr(substring);
- if (chrend < shortsplicedist) {
+ if (chrend < overall_max_distance) {
chrstart = 0;
} else {
- chrstart = chrend - shortsplicedist;
+ chrstart = chrend - overall_max_distance;
}
all_stage2_starts = Stage2_compute_starts(
#ifdef PMAP
@@ -5460,6 +5509,7 @@ Stage3end_substrings_run_gmap_plus (T this, char *queryuc_ptr, int querylength,
substring = (Substring_T) List_head(this->substrings_Nto1);
if (high_resolution_substring_ends_p == false) {
/* Don't try to solve ends, as requested by user */
+ debug13(printf("User does not want high resolution substring ends\n"));
all_stage2_ends = (List_T) NULL;
} else if ((querypos = Substring_queryend(substring)) > querylength - 15) {
/* Don't try to solve short ends, for sake of efficiency */
@@ -5467,10 +5517,10 @@ Stage3end_substrings_run_gmap_plus (T this, char *queryuc_ptr, int querylength,
all_stage2_ends = (List_T) NULL;
} else {
chrstart = Substring_alignend_trim_chr(substring);
- if (chrstart + shortsplicedist > (this->chrhigh - this->chroffset)) {
+ if (chrstart + overall_max_distance > (this->chrhigh - this->chroffset)) {
chrend = this->chrhigh - this->chroffset;
} else {
- chrend = chrstart + shortsplicedist;
+ chrend = chrstart + overall_max_distance;
}
all_stage2_ends = Stage2_compute_ends(
#ifdef PMAP
@@ -5492,9 +5542,11 @@ Stage3end_substrings_run_gmap_plus (T this, char *queryuc_ptr, int querylength,
/* Get better results if we turn off this branch, because GMAP finds
indels at ends, so do re-run GMAP in all cases */
- if (high_resolution_substring_ends_p == false && extend_ends_p == true && all_stage2_starts == NULL && all_stage2_ends == NULL) {
+ if (/* high_resolution_substring_ends_p == false && */
+ extend_ends_p == true && all_stage2_starts == NULL && all_stage2_ends == NULL) {
/* Don't run re-run GMAP on central portion. Just rely on substrings. */
- hit = (T) NULL;
+ debug13(printf("Just relying on substrings. hit1 is NULL\n"));
+ hit1 = (T) NULL;
} else {
/* F. Make stage2pairs */
@@ -5537,55 +5589,105 @@ Stage3end_substrings_run_gmap_plus (T this, char *queryuc_ptr, int querylength,
}
if (stage2pairs == NULL) {
- hit = (T) NULL;
+ hit1 = (T) NULL;
} else {
knownsplice_limit_high = ((Pair_T) stage2pairs->first)->genomepos + this->chroffset;
stage2pairs = List_reverse(stage2pairs);
knownsplice_limit_low = ((Pair_T) stage2pairs->first)->genomepos + this->chroffset;
- if ((pairarray = Stage3_compute(&pairs,&npairs,&goodness,&cdna_direction,&sensedir,
- &matches,&nmatches_posttrim,&max_match_length,
- &ambig_end_length_5,&ambig_end_length_3,
- &ambig_splicetype_5,&ambig_splicetype_3,
- &ambig_prob_5,&ambig_prob_3,
- &unknowns,&mismatches,&qopens,&qindels,&topens,&tindels,
- &ncanonical,&nsemicanonical,&nnoncanonical,&min_splice_prob,
- stage2pairs,all_stage2_starts,all_stage2_ends,
+ if ((pairarray1 = Stage3_compute(&cdna_direction,&sensedir,&pairs1,&npairs1,&goodness1,
+ &matches1,&nmatches_posttrim_1,&max_match_length_1,
+ &ambig_end_length_5_1,&ambig_end_length_3_1,
+ &ambig_splicetype_5_1,&ambig_splicetype_3_1,
+ &ambig_prob_5_1,&ambig_prob_3_1,
+ &unknowns1,&mismatches1,&qopens1,&qindels1,&topens1,&tindels1,
+ &ncanonical1,&nsemicanonical1,&nnoncanonical1,&min_splice_prob_1,
+
+ &pairarray2,&pairs2,&npairs2,&goodness2,
+ &matches2,&nmatches_posttrim_2,&max_match_length_2,
+ &ambig_end_length_5_2,&ambig_end_length_3_2,
+ &ambig_splicetype_5_2,&ambig_splicetype_3_2,
+ &ambig_prob_5_2,&ambig_prob_3_2,
+ &unknowns2,&mismatches2,&qopens2,&qindels2,&topens2,&tindels2,
+ &ncanonical2,&nsemicanonical2,&nnoncanonical2,&min_splice_prob_2,
+
+ stage2pairs,all_stage2_starts,all_stage2_ends,
#ifdef END_KNOWNSPLICING_SHORTCUT
- cutoff_level,/*queryptr*/watsonp ? queryuc_ptr : queryrc,
- watsonp ? query_compress_fwd : query_compress_rev,
+ cutoff_level,/*queryptr*/watsonp ? queryuc_ptr : queryrc,
+ watsonp ? query_compress_fwd : query_compress_rev,
#endif
- /*queryseq_ptr*/queryuc_ptr,queryuc_ptr,querylength,/*skiplength*/0,
+ /*queryseq_ptr*/queryuc_ptr,queryuc_ptr,querylength,/*skiplength*/0,
#ifdef EXTRACT_GENOMICSEG
- /*query_subseq_offset*/0,
+ /*query_subseq_offset*/0,
#else
- /*query_subseq_offset*/0,
+ /*query_subseq_offset*/0,
#endif
- this->chrnum,this->chroffset,this->chrhigh,
- knownsplice_limit_low,knownsplice_limit_high,/*plusp*/true,genestrand,
- /*jump_late_p*/false,maxpeelback,pairpool,dynprogL,dynprogM,dynprogR,
- /*sense_try*/0,/*sense_filter*/0,
- oligoindices_minor,diagpool,cellpool)) == NULL) {
- hit = (T) NULL;
+ this->chrnum,this->chroffset,this->chrhigh,
+ knownsplice_limit_low,knownsplice_limit_high,/*plusp*/true,genestrand,
+ /*jump_late_p*/false,maxpeelback,pairpool,dynprogL,dynprogM,dynprogR,
+ /*sense_try*/0,/*sense_filter*/0,
+ oligoindices_minor,diagpool,cellpool)) == NULL) {
+ hit1 = (T) NULL;
+
+ } else if (cdna_direction == 0) {
+ nsegments = Pair_gsnap_nsegments(&nmismatches_whole,&nindels,&nintrons,&nindelbreaks,
+ pairarray1,npairs1);
+ start = subtract_bounded(this->chroffset + Pair_genomepos(&(pairarray1[0])),
+ /*minusterm*/Pair_querypos(&(pairarray1[0])),this->chroffset);
+ end = add_bounded(this->chroffset + Pair_genomepos(&(pairarray1[npairs1-1])),
+ /*plusterm*/querylength - 1 - Pair_querypos(&(pairarray1[npairs1-1])),this->chrhigh);
+
+ if ((hit1 = Stage3end_new_gmap(nmismatches_whole,nmatches_posttrim_1,max_match_length_1,
+ ambig_end_length_5_1,ambig_end_length_3_1,
+ ambig_splicetype_5_1,ambig_splicetype_3_1,
+ min_splice_prob_1,
+ pairarray1,npairs1,nsegments,nintrons,nindelbreaks,
+ /*left*/start,/*genomiclength*/end - start + 1,
+ /*plusp*/true,genestrand,
+ /*accession*/NULL,querylength,this->chrnum,this->chroffset,this->chrhigh,this->chrlength,
+ /*cdna_direction*/+1,/*sensedir*/SENSE_FORWARD,
+ /*gmap_source*/GMAP_VIA_SUBSTRINGS)) == NULL) {
+ FREE_OUT(pairarray1);
+ }
+
+ nsegments = Pair_gsnap_nsegments(&nmismatches_whole,&nindels,&nintrons,&nindelbreaks,
+ pairarray2,npairs2);
+ start = subtract_bounded(this->chroffset + Pair_genomepos(&(pairarray2[0])),
+ /*minusterm*/Pair_querypos(&(pairarray2[0])),this->chroffset);
+ end = add_bounded(this->chroffset + Pair_genomepos(&(pairarray2[npairs2-1])),
+ /*plusterm*/querylength - 1 - Pair_querypos(&(pairarray2[npairs2-1])),this->chrhigh);
+
+ if ((*hit2 = Stage3end_new_gmap(nmismatches_whole,nmatches_posttrim_2,max_match_length_2,
+ ambig_end_length_5_2,ambig_end_length_3_2,
+ ambig_splicetype_5_2,ambig_splicetype_3_2,
+ min_splice_prob_2,
+ pairarray2,npairs2,nsegments,nintrons,nindelbreaks,
+ /*left*/start,/*genomiclength*/end - start + 1,
+ /*plusp*/true,genestrand,
+ /*accession*/NULL,querylength,this->chrnum,this->chroffset,this->chrhigh,this->chrlength,
+ /*cdna_direction*/-1,/*sensedir*/SENSE_ANTI,
+ /*gmap_source*/GMAP_VIA_SUBSTRINGS)) == NULL) {
+ FREE_OUT(pairarray2);
+ }
} else {
nsegments = Pair_gsnap_nsegments(&nmismatches_whole,&nindels,&nintrons,&nindelbreaks,
- pairarray,npairs);
- start = subtract_bounded(this->chroffset + Pair_genomepos(&(pairarray[0])),
- /*minusterm*/Pair_querypos(&(pairarray[0])),this->chroffset);
- end = add_bounded(this->chroffset + Pair_genomepos(&(pairarray[npairs-1])),
- /*plusterm*/querylength - 1 - Pair_querypos(&(pairarray[npairs-1])),this->chrhigh);
-
- if ((hit = Stage3end_new_gmap(nmismatches_whole,nmatches_posttrim,max_match_length,
- ambig_end_length_5,ambig_end_length_3,
- ambig_splicetype_5,ambig_splicetype_3,
- min_splice_prob,
- pairarray,npairs,nsegments,nintrons,nindelbreaks,
- /*left*/start,/*genomiclength*/end - start + 1,
- /*plusp*/true,genestrand,
- /*accession*/NULL,querylength,this->chrnum,this->chroffset,this->chrhigh,this->chrlength,
- cdna_direction,sensedir,/*gmap_source*/GMAP_VIA_SUBSTRINGS)) == NULL) {
- FREE_OUT(pairarray);
+ pairarray1,npairs1);
+ start = subtract_bounded(this->chroffset + Pair_genomepos(&(pairarray1[0])),
+ /*minusterm*/Pair_querypos(&(pairarray1[0])),this->chroffset);
+ end = add_bounded(this->chroffset + Pair_genomepos(&(pairarray1[npairs1-1])),
+ /*plusterm*/querylength - 1 - Pair_querypos(&(pairarray1[npairs1-1])),this->chrhigh);
+
+ if ((hit1 = Stage3end_new_gmap(nmismatches_whole,nmatches_posttrim_1,max_match_length_1,
+ ambig_end_length_5_1,ambig_end_length_3_1,
+ ambig_splicetype_5_1,ambig_splicetype_3_1,
+ min_splice_prob_1,
+ pairarray1,npairs1,nsegments,nintrons,nindelbreaks,
+ /*left*/start,/*genomiclength*/end - start + 1,
+ /*plusp*/true,genestrand,
+ /*accession*/NULL,querylength,this->chrnum,this->chroffset,this->chrhigh,this->chrlength,
+ cdna_direction,sensedir,/*gmap_source*/GMAP_VIA_SUBSTRINGS)) == NULL) {
+ FREE_OUT(pairarray1);
}
}
}
@@ -5602,18 +5704,18 @@ Stage3end_substrings_run_gmap_plus (T this, char *queryuc_ptr, int querylength,
FREE(gsequence_orig);
#endif
- return hit;
+ return hit1;
}
/* Modified from run_gmap_minus in sarray-read.c */
/* extend_ends_p is an expensive operation, slowing down speed by 5x */
T
-Stage3end_substrings_run_gmap_minus (T this, char *queryuc_ptr, int querylength, int genestrand,
+Stage3end_substrings_run_gmap_minus (T *hit2, T this, char *queryuc_ptr, int querylength, int genestrand,
int maxpeelback, Pairpool_T pairpool, Dynprog_T dynprogL, Dynprog_T dynprogM, Dynprog_T dynprogR,
Oligoindex_array_T oligoindices_minor, Diagpool_T diagpool, Cellpool_T cellpool,
bool extend_ends_p) {
- T hit;
+ T hit1;
List_T stage2pairs, all_stage2_starts, all_stage2_ends;
List_T p, startp;
@@ -5621,21 +5723,26 @@ Stage3end_substrings_run_gmap_minus (T this, char *queryuc_ptr, int querylength,
int querystart, queryend;
Univcoord_T *ambcoords;
- int sensedir;
-
- struct Pair_T *pairarray;
- List_T pairs;
+ struct Pair_T *pairarray1, *pairarray2;
+ List_T pairs1, pairs2;
Substring_T substring, first_ambig, last_ambig;
int querypos, seglength;
Chrpos_T genomepos;
char c, g, g_alt, comp;
- int npairs, goodness, cdna_direction, matches, nmatches_posttrim,
- max_match_length, ambig_end_length_5, ambig_end_length_3,
- unknowns, mismatches, qopens, qindels, topens, tindels,
- ncanonical, nsemicanonical, nnoncanonical;
- double ambig_prob_5, ambig_prob_3, min_splice_prob;
- Splicetype_T ambig_splicetype_5, ambig_splicetype_3;
+ int cdna_direction, sensedir;
+ int npairs1, goodness1, matches1, nmatches_posttrim_1,
+ max_match_length_1, ambig_end_length_5_1, ambig_end_length_3_1,
+ unknowns1, mismatches1, qopens1, qindels1, topens1, tindels1,
+ ncanonical1, nsemicanonical1, nnoncanonical1;
+ int npairs2, goodness2, matches2, nmatches_posttrim_2,
+ max_match_length_2, ambig_end_length_5_2, ambig_end_length_3_2,
+ unknowns2, mismatches2, qopens2, qindels2, topens2, tindels2,
+ ncanonical2, nsemicanonical2, nnoncanonical2;
+ double ambig_prob_5_1, ambig_prob_3_1, min_splice_prob_1;
+ double ambig_prob_5_2, ambig_prob_3_2, min_splice_prob_2;
+ Splicetype_T ambig_splicetype_5_1, ambig_splicetype_3_1;
+ Splicetype_T ambig_splicetype_5_2, ambig_splicetype_3_2;
Univcoord_T knownsplice_limit_low, knownsplice_limit_high;
Univcoord_T start, end;
int nsegments, nmismatches_whole, nindels, nintrons, nindelbreaks;
@@ -5643,7 +5750,14 @@ Stage3end_substrings_run_gmap_minus (T this, char *queryuc_ptr, int querylength,
Chrpos_T chrstart, chrend;
- debug13(printf("Entered Stage3hr_substrings_run_gmap_minus\n"));
+ *hit2 = (T) NULL;
+ debug13(printf("Entered Stage3hr_substrings_run_gmap_minus with extend_ends_p %d\n",extend_ends_p));
+#ifdef DEBUG13
+ for (p = this->substrings_1toN; p != NULL; p = List_next(p)) {
+ substring = (Substring_T) List_head(p);
+ printf("%d..%d\n",Substring_querystart(substring),Substring_queryend(substring));
+ }
+#endif
#ifdef HAVE_ALLOCA
gsequence_orig = (char *) MALLOCA((querylength+1) * sizeof(char));
@@ -5713,6 +5827,7 @@ Stage3end_substrings_run_gmap_minus (T this, char *queryuc_ptr, int querylength,
substring = (Substring_T) List_head(this->substrings_1toN);
if (high_resolution_substring_ends_p == false) {
/* Don't try to solve ends, as requested by user */
+ debug13(printf("User does not want high resolution substring ends\n"));
all_stage2_starts = (List_T) NULL;
} else if ((querypos = Substring_querystart(substring)) < 15) {
/* Don't try to solve short ends, for sake of efficiency */
@@ -5720,10 +5835,10 @@ Stage3end_substrings_run_gmap_minus (T this, char *queryuc_ptr, int querylength,
all_stage2_starts = (List_T) NULL;
} else {
chrstart = Substring_alignstart_trim_chr(substring);
- if (chrstart + shortsplicedist > (this->chrhigh - this->chroffset)) {
+ if (chrstart + overall_max_distance > (this->chrhigh - this->chroffset)) {
chrend = this->chrhigh - this->chroffset;
} else {
- chrend = chrstart + shortsplicedist;
+ chrend = chrstart + overall_max_distance;
}
all_stage2_starts = Stage2_compute_starts(
#ifdef PMAP
@@ -5787,6 +5902,7 @@ Stage3end_substrings_run_gmap_minus (T this, char *queryuc_ptr, int querylength,
substring = (Substring_T) List_head(this->substrings_Nto1);
if (high_resolution_substring_ends_p == false) {
/* Don't try to solve ends, as requested by user */
+ debug13(printf("User does not want high resolution substring ends\n"));
all_stage2_ends = (List_T) NULL;
} else if ((querypos = Substring_queryend(substring)) > querylength - 15) {
/* Don't try to solve short ends, for sake of efficiency */
@@ -5794,10 +5910,10 @@ Stage3end_substrings_run_gmap_minus (T this, char *queryuc_ptr, int querylength,
all_stage2_ends = (List_T) NULL;
} else {
chrend = Substring_alignend_trim_chr(substring);
- if (chrend < shortsplicedist) {
+ if (chrend < overall_max_distance) {
chrstart = 0;
} else {
- chrstart = chrend - shortsplicedist;
+ chrstart = chrend - overall_max_distance;
}
all_stage2_ends = Stage2_compute_ends(
#ifdef PMAP
@@ -5819,9 +5935,11 @@ Stage3end_substrings_run_gmap_minus (T this, char *queryuc_ptr, int querylength,
/* Get better results if we turn off this branch, because GMAP finds
indels at ends, so do re-run GMAP in all cases */
- if (high_resolution_substring_ends_p == false && extend_ends_p == true && all_stage2_starts == NULL && all_stage2_ends == NULL) {
+ if (/* high_resolution_substring_ends_p == false && */
+ extend_ends_p == true && all_stage2_starts == NULL && all_stage2_ends == NULL) {
/* Don't run re-run GMAP on central portion. Just rely on substrings. */
- hit = (T) NULL;
+ debug13(printf("Just relying on substrings. hit1 is NULL\n"));
+ hit1 = (T) NULL;
} else {
/* F. Make stage2pairs */
@@ -5868,54 +5986,102 @@ Stage3end_substrings_run_gmap_minus (T this, char *queryuc_ptr, int querylength,
}
if (stage2pairs == NULL) {
- hit = (T) NULL;
+ hit1 = (T) NULL;
} else {
knownsplice_limit_low = ((Pair_T) stage2pairs->first)->genomepos + this->chroffset;
stage2pairs = List_reverse(stage2pairs);
knownsplice_limit_high = ((Pair_T) stage2pairs->first)->genomepos + this->chroffset;
- if ((pairarray = Stage3_compute(&pairs,&npairs,&goodness,&cdna_direction,&sensedir,
- &matches,&nmatches_posttrim,&max_match_length,
- &ambig_end_length_5,&ambig_end_length_3,
- &ambig_splicetype_5,&ambig_splicetype_3,
- &ambig_prob_5,&ambig_prob_3,
- &unknowns,&mismatches,&qopens,&qindels,&topens,&tindels,
- &ncanonical,&nsemicanonical,&nnoncanonical,&min_splice_prob,
- stage2pairs,all_stage2_starts,all_stage2_ends,
+ if ((pairarray1 = Stage3_compute(&cdna_direction,&sensedir,&pairs1,&npairs1,&goodness1,
+ &matches1,&nmatches_posttrim_1,&max_match_length_1,
+ &ambig_end_length_5_1,&ambig_end_length_3_1,
+ &ambig_splicetype_5_1,&ambig_splicetype_3_1,
+ &ambig_prob_5_1,&ambig_prob_3_1,
+ &unknowns1,&mismatches1,&qopens1,&qindels1,&topens1,&tindels1,
+ &ncanonical1,&nsemicanonical1,&nnoncanonical1,&min_splice_prob_1,
+
+ &pairarray2,&pairs2,&npairs2,&goodness2,
+ &matches2,&nmatches_posttrim_2,&max_match_length_2,
+ &ambig_end_length_5_2,&ambig_end_length_3_2,
+ &ambig_splicetype_5_2,&ambig_splicetype_3_2,
+ &ambig_prob_5_2,&ambig_prob_3_2,
+ &unknowns2,&mismatches2,&qopens2,&qindels2,&topens2,&tindels2,
+ &ncanonical2,&nsemicanonical2,&nnoncanonical2,&min_splice_prob_2,
+
+ stage2pairs,all_stage2_starts,all_stage2_ends,
#ifdef END_KNOWNSPLICING_SHORTCUT
- cutoff_level,/*queryptr*/watsonp ? queryuc_ptr : queryrc,
- watsonp ? query_compress_fwd : query_compress_rev,
+ cutoff_level,/*queryptr*/watsonp ? queryuc_ptr : queryrc,
+ watsonp ? query_compress_fwd : query_compress_rev,
#endif
- /*queryseq_ptr*/queryuc_ptr,queryuc_ptr,querylength,/*skiplength*/0,
+ /*queryseq_ptr*/queryuc_ptr,queryuc_ptr,querylength,/*skiplength*/0,
#ifdef EXTRACT_GENOMICSEG
- /*query_subseq_offset*/0,
+ /*query_subseq_offset*/0,
#else
- /*query_subseq_offset*/0,
+ /*query_subseq_offset*/0,
#endif
- this->chrnum,this->chroffset,this->chrhigh,
- knownsplice_limit_low,knownsplice_limit_high,/*plusp*/false,genestrand,
- /*jump_late_p*/true,maxpeelback,pairpool,dynprogL,dynprogM,dynprogR,
- /*sense_try*/0,/*sense_filter*/0,
- oligoindices_minor,diagpool,cellpool)) == NULL) {
- hit = (T) NULL;
+ this->chrnum,this->chroffset,this->chrhigh,
+ knownsplice_limit_low,knownsplice_limit_high,/*plusp*/false,genestrand,
+ /*jump_late_p*/true,maxpeelback,pairpool,dynprogL,dynprogM,dynprogR,
+ /*sense_try*/0,/*sense_filter*/0,
+ oligoindices_minor,diagpool,cellpool)) == NULL) {
+ hit1 = (T) NULL;
+
+ } else if (cdna_direction == 0) {
+ nsegments = Pair_gsnap_nsegments(&nmismatches_whole,&nindels,&nintrons,&nindelbreaks,
+ pairarray1,npairs1);
+ start = add_bounded(this->chroffset + Pair_genomepos(&(pairarray1[0])),
+ /*plusterm*/Pair_querypos(&(pairarray1[0])),this->chrhigh);
+ end = subtract_bounded(this->chroffset + Pair_genomepos(&(pairarray1[npairs1-1])),
+ /*minusterm*/querylength - 1 - Pair_querypos(&(pairarray1[npairs1-1])),this->chroffset);
+ if ((hit1 = Stage3end_new_gmap(nmismatches_whole,nmatches_posttrim_1,max_match_length_1,
+ ambig_end_length_5_1,ambig_end_length_3_1,
+ ambig_splicetype_5_1,ambig_splicetype_3_1,
+ min_splice_prob_1,
+ pairarray1,npairs1,nsegments,nintrons,nindelbreaks,
+ /*left*/end,/*genomiclength*/start - end + 1,
+ /*plusp*/false,genestrand,
+ /*accession*/NULL,querylength,this->chrnum,this->chroffset,this->chrhigh,this->chrlength,
+ /*cdna_direction*/+1,/*sensedir*/SENSE_FORWARD,
+ /*gmap_source*/GMAP_VIA_SUBSTRINGS)) == NULL) {
+ FREE_OUT(pairarray1);
+ }
+
+ nsegments = Pair_gsnap_nsegments(&nmismatches_whole,&nindels,&nintrons,&nindelbreaks,
+ pairarray2,npairs2);
+ start = add_bounded(this->chroffset + Pair_genomepos(&(pairarray2[0])),
+ /*plusterm*/Pair_querypos(&(pairarray2[0])),this->chrhigh);
+ end = subtract_bounded(this->chroffset + Pair_genomepos(&(pairarray2[npairs2-1])),
+ /*minusterm*/querylength - 1 - Pair_querypos(&(pairarray2[npairs2-1])),this->chroffset);
+ if ((*hit2 = Stage3end_new_gmap(nmismatches_whole,nmatches_posttrim_2,max_match_length_2,
+ ambig_end_length_5_2,ambig_end_length_3_2,
+ ambig_splicetype_5_2,ambig_splicetype_3_2,
+ min_splice_prob_2,
+ pairarray2,npairs2,nsegments,nintrons,nindelbreaks,
+ /*left*/end,/*genomiclength*/start - end + 1,
+ /*plusp*/false,genestrand,
+ /*accession*/NULL,querylength,this->chrnum,this->chroffset,this->chrhigh,this->chrlength,
+ /*cdna_direction*/-1,/*sensedir*/SENSE_ANTI,
+ /*gmap_source*/GMAP_VIA_SUBSTRINGS)) == NULL) {
+ FREE_OUT(pairarray2);
+ }
} else {
nsegments = Pair_gsnap_nsegments(&nmismatches_whole,&nindels,&nintrons,&nindelbreaks,
- pairarray,npairs);
- start = add_bounded(this->chroffset + Pair_genomepos(&(pairarray[0])),
- /*plusterm*/Pair_querypos(&(pairarray[0])),this->chrhigh);
- end = subtract_bounded(this->chroffset + Pair_genomepos(&(pairarray[npairs-1])),
- /*minusterm*/querylength - 1 - Pair_querypos(&(pairarray[npairs-1])),this->chroffset);
- if ((hit = Stage3end_new_gmap(nmismatches_whole,nmatches_posttrim,max_match_length,
- ambig_end_length_5,ambig_end_length_3,
- ambig_splicetype_5,ambig_splicetype_3,
- min_splice_prob,
- pairarray,npairs,nsegments,nintrons,nindelbreaks,
- /*left*/end,/*genomiclength*/start - end + 1,
- /*plusp*/false,genestrand,
- /*accession*/NULL,querylength,this->chrnum,this->chroffset,this->chrhigh,this->chrlength,
- cdna_direction,sensedir,/*gmap_source*/GMAP_VIA_SUBSTRINGS)) == NULL) {
- FREE_OUT(pairarray);
+ pairarray1,npairs1);
+ start = add_bounded(this->chroffset + Pair_genomepos(&(pairarray1[0])),
+ /*plusterm*/Pair_querypos(&(pairarray1[0])),this->chrhigh);
+ end = subtract_bounded(this->chroffset + Pair_genomepos(&(pairarray1[npairs1-1])),
+ /*minusterm*/querylength - 1 - Pair_querypos(&(pairarray1[npairs1-1])),this->chroffset);
+ if ((hit1 = Stage3end_new_gmap(nmismatches_whole,nmatches_posttrim_1,max_match_length_1,
+ ambig_end_length_5_1,ambig_end_length_3_1,
+ ambig_splicetype_5_1,ambig_splicetype_3_1,
+ min_splice_prob_1,
+ pairarray1,npairs1,nsegments,nintrons,nindelbreaks,
+ /*left*/end,/*genomiclength*/start - end + 1,
+ /*plusp*/false,genestrand,
+ /*accession*/NULL,querylength,this->chrnum,this->chroffset,this->chrhigh,this->chrlength,
+ cdna_direction,sensedir,/*gmap_source*/GMAP_VIA_SUBSTRINGS)) == NULL) {
+ FREE_OUT(pairarray1);
}
}
}
@@ -5932,54 +6098,61 @@ Stage3end_substrings_run_gmap_minus (T this, char *queryuc_ptr, int querylength,
FREE(gsequence_orig);
#endif
- return hit;
+ return hit1;
}
/* Modified from Stage3end_substrings_run_gmap_plus */
T
-Stage3end_gmap_run_gmap_plus (T this, char *queryuc_ptr, int querylength, int genestrand,
+Stage3end_gmap_run_gmap_plus (T *hit2, T this, char *queryuc_ptr, int querylength, int genestrand,
int maxpeelback, Pairpool_T pairpool, Dynprog_T dynprogL, Dynprog_T dynprogM, Dynprog_T dynprogR,
Oligoindex_array_T oligoindices_minor, Diagpool_T diagpool, Cellpool_T cellpool) {
- T hit;
+ T hit1;
List_T stage2pairs, all_stage2_starts, all_stage2_ends;
int i;
- int sensedir;
-
- struct Pair_T *pairarray;
- List_T pairs;
+ struct Pair_T *pairarray1, *pairarray2;
+ List_T pairs1, pairs2;
int querypos;
- int npairs, goodness, cdna_direction, matches, nmatches_posttrim,
- max_match_length, ambig_end_length_5, ambig_end_length_3,
- unknowns, mismatches, qopens, qindels, topens, tindels,
- ncanonical, nsemicanonical, nnoncanonical;
- double ambig_prob_5, ambig_prob_3, min_splice_prob;
- Splicetype_T ambig_splicetype_5, ambig_splicetype_3;
+ int cdna_direction, sensedir;
+ int npairs1, goodness1, matches1, nmatches_posttrim_1,
+ max_match_length_1, ambig_end_length_5_1, ambig_end_length_3_1,
+ unknowns1, mismatches1, qopens1, qindels1, topens1, tindels1,
+ ncanonical1, nsemicanonical1, nnoncanonical1;
+ int npairs2, goodness2, matches2, nmatches_posttrim_2,
+ max_match_length_2, ambig_end_length_5_2, ambig_end_length_3_2,
+ unknowns2, mismatches2, qopens2, qindels2, topens2, tindels2,
+ ncanonical2, nsemicanonical2, nnoncanonical2;
+ double ambig_prob_5_1, ambig_prob_3_1, min_splice_prob_1;
+ double ambig_prob_5_2, ambig_prob_3_2, min_splice_prob_2;
+ Splicetype_T ambig_splicetype_5_1, ambig_splicetype_3_1;
+ Splicetype_T ambig_splicetype_5_2, ambig_splicetype_3_2;
Univcoord_T knownsplice_limit_low, knownsplice_limit_high;
Univcoord_T start, end;
int nsegments, nmismatches_whole, nindels, nintrons, nindelbreaks;
Chrpos_T chrstart, chrend;
- debug13(printf("Entered Stage3hr_gmap_run_gmap_plus\n"));
-
+ *hit2 = (T) NULL;
+ assert(this->gmap_cdna_direction != 0); /* Because we have already run GMAP once */
+ debug13(printf("Entered Stage3hr_gmap_run_gmap_plus with gmap_cdna_direction %d\n",this->gmap_cdna_direction));
/* D. Make all_stage2_starts (paths) */
if (high_resolution_gmap_ends_p == false) {
/* Don't try to solve ends, as requested by user */
+ debug13(printf("User does not want high resolution gmap ends\n"));
all_stage2_starts = (List_T) NULL;
} else if ((querypos = Pair_querypos(&(this->pairarray[0]))) < 15) {
/* Don't try to solve short ends, for sake of efficiency */
all_stage2_starts = (List_T) NULL;
} else {
chrend = Pair_genomepos(&(this->pairarray[0]));
- if (chrend < shortsplicedist) {
+ if (chrend < overall_max_distance) {
chrstart = 0;
} else {
- chrstart = chrend - shortsplicedist;
+ chrstart = chrend - overall_max_distance;
}
all_stage2_starts = Stage2_compute_starts(
#ifdef PMAP
@@ -6001,16 +6174,17 @@ Stage3end_gmap_run_gmap_plus (T this, char *queryuc_ptr, int querylength, int ge
/* E. Make all_stage2_ends (pairs) */
if (high_resolution_gmap_ends_p == false) {
/* Don't try to solve ends, as requested by user */
+ debug13(printf("User does not want high resolution gmap ends\n"));
all_stage2_ends = (List_T) NULL;
} else if ((querypos = Pair_querypos(&(this->pairarray[this->npairs - 1]))) > querylength - 15) {
/* Don't try to solve short ends, for sake of efficiency */
all_stage2_ends = (List_T) NULL;
} else {
chrstart = Pair_genomepos(&(this->pairarray[this->npairs - 1]));
- if (chrstart + shortsplicedist > (this->chrhigh - this->chroffset)) {
+ if (chrstart + overall_max_distance > (this->chrhigh - this->chroffset)) {
chrend = this->chrhigh - this->chroffset;
} else {
- chrend = chrstart + shortsplicedist;
+ chrend = chrstart + overall_max_distance;
}
all_stage2_ends = Stage2_compute_ends(
#ifdef PMAP
@@ -6030,7 +6204,7 @@ Stage3end_gmap_run_gmap_plus (T this, char *queryuc_ptr, int querylength, int ge
if (all_stage2_starts == NULL && all_stage2_ends == NULL) {
- hit = (T) NULL;
+ hit1 = (T) NULL;
} else {
/* F. Make stage2pairs */
@@ -6042,55 +6216,105 @@ Stage3end_gmap_run_gmap_plus (T this, char *queryuc_ptr, int querylength, int ge
debug13(printf("\n"));
if (stage2pairs == NULL) {
- hit = (T) NULL;
+ hit1 = (T) NULL;
} else {
knownsplice_limit_high = ((Pair_T) stage2pairs->first)->genomepos + this->chroffset;
stage2pairs = List_reverse(stage2pairs);
knownsplice_limit_low = ((Pair_T) stage2pairs->first)->genomepos + this->chroffset;
- if ((pairarray = Stage3_compute(&pairs,&npairs,&goodness,&cdna_direction,&sensedir,
- &matches,&nmatches_posttrim,&max_match_length,
- &ambig_end_length_5,&ambig_end_length_3,
- &ambig_splicetype_5,&ambig_splicetype_3,
- &ambig_prob_5,&ambig_prob_3,
- &unknowns,&mismatches,&qopens,&qindels,&topens,&tindels,
- &ncanonical,&nsemicanonical,&nnoncanonical,&min_splice_prob,
- stage2pairs,all_stage2_starts,all_stage2_ends,
+ if ((pairarray1 = Stage3_compute(&cdna_direction,&sensedir,&pairs1,&npairs1,&goodness1,
+ &matches1,&nmatches_posttrim_1,&max_match_length_1,
+ &ambig_end_length_5_1,&ambig_end_length_3_1,
+ &ambig_splicetype_5_1,&ambig_splicetype_3_1,
+ &ambig_prob_5_1,&ambig_prob_3_1,
+ &unknowns1,&mismatches1,&qopens1,&qindels1,&topens1,&tindels1,
+ &ncanonical1,&nsemicanonical1,&nnoncanonical1,&min_splice_prob_1,
+
+ &pairarray2,&pairs2,&npairs2,&goodness2,
+ &matches2,&nmatches_posttrim_2,&max_match_length_2,
+ &ambig_end_length_5_2,&ambig_end_length_3_2,
+ &ambig_splicetype_5_2,&ambig_splicetype_3_2,
+ &ambig_prob_5_2,&ambig_prob_3_2,
+ &unknowns2,&mismatches2,&qopens2,&qindels2,&topens2,&tindels2,
+ &ncanonical2,&nsemicanonical2,&nnoncanonical2,&min_splice_prob_2,
+
+ stage2pairs,all_stage2_starts,all_stage2_ends,
#ifdef END_KNOWNSPLICING_SHORTCUT
- cutoff_level,/*queryptr*/watsonp ? queryuc_ptr : queryrc,
- watsonp ? query_compress_fwd : query_compress_rev,
+ cutoff_level,/*queryptr*/watsonp ? queryuc_ptr : queryrc,
+ watsonp ? query_compress_fwd : query_compress_rev,
#endif
- /*queryseq_ptr*/queryuc_ptr,queryuc_ptr,querylength,/*skiplength*/0,
+ /*queryseq_ptr*/queryuc_ptr,queryuc_ptr,querylength,/*skiplength*/0,
#ifdef EXTRACT_GENOMICSEG
- /*query_subseq_offset*/0,
+ /*query_subseq_offset*/0,
#else
- /*query_subseq_offset*/0,
+ /*query_subseq_offset*/0,
#endif
- this->chrnum,this->chroffset,this->chrhigh,
- knownsplice_limit_low,knownsplice_limit_high,/*plusp*/true,genestrand,
- /*jump_late_p*/false,maxpeelback,pairpool,dynprogL,dynprogM,dynprogR,
- /*sense_try*/0,/*sense_filter*/0,
- oligoindices_minor,diagpool,cellpool)) == NULL) {
- hit = (T) NULL;
+ this->chrnum,this->chroffset,this->chrhigh,
+ knownsplice_limit_low,knownsplice_limit_high,/*plusp*/true,genestrand,
+ /*jump_late_p*/false,maxpeelback,pairpool,dynprogL,dynprogM,dynprogR,
+ /*sense_try*/0,/*sense_filter*/0,
+ oligoindices_minor,diagpool,cellpool)) == NULL) {
+ hit1 = (T) NULL;
+
+ } else if (cdna_direction == 0) {
+ nsegments = Pair_gsnap_nsegments(&nmismatches_whole,&nindels,&nintrons,&nindelbreaks,
+ pairarray1,npairs1);
+ start = subtract_bounded(this->chroffset + Pair_genomepos(&(pairarray1[0])),
+ /*minusterm*/Pair_querypos(&(pairarray1[0])),this->chroffset);
+ end = add_bounded(this->chroffset + Pair_genomepos(&(pairarray1[npairs1-1])),
+ /*plusterm*/querylength - 1 - Pair_querypos(&(pairarray1[npairs1-1])),this->chrhigh);
+
+ if ((hit1 = Stage3end_new_gmap(nmismatches_whole,nmatches_posttrim_1,max_match_length_1,
+ ambig_end_length_5_1,ambig_end_length_3_1,
+ ambig_splicetype_5_1,ambig_splicetype_3_1,
+ min_splice_prob_1,
+ pairarray1,npairs1,nsegments,nintrons,nindelbreaks,
+ /*left*/start,/*genomiclength*/end - start + 1,
+ /*plusp*/true,genestrand,
+ /*accession*/NULL,querylength,this->chrnum,this->chroffset,this->chrhigh,this->chrlength,
+ /*cdna_direction*/+1,/*sensedir*/SENSE_FORWARD,
+ /*gmap_source*/GMAP_VIA_SUBSTRINGS)) == NULL) {
+ FREE_OUT(pairarray1);
+ }
+
+ nsegments = Pair_gsnap_nsegments(&nmismatches_whole,&nindels,&nintrons,&nindelbreaks,
+ pairarray2,npairs2);
+ start = subtract_bounded(this->chroffset + Pair_genomepos(&(pairarray2[0])),
+ /*minusterm*/Pair_querypos(&(pairarray2[0])),this->chroffset);
+ end = add_bounded(this->chroffset + Pair_genomepos(&(pairarray2[npairs2-1])),
+ /*plusterm*/querylength - 1 - Pair_querypos(&(pairarray2[npairs2-1])),this->chrhigh);
+
+ if ((*hit2 = Stage3end_new_gmap(nmismatches_whole,nmatches_posttrim_2,max_match_length_2,
+ ambig_end_length_5_2,ambig_end_length_3_2,
+ ambig_splicetype_5_2,ambig_splicetype_3_2,
+ min_splice_prob_2,
+ pairarray2,npairs2,nsegments,nintrons,nindelbreaks,
+ /*left*/start,/*genomiclength*/end - start + 1,
+ /*plusp*/true,genestrand,
+ /*accession*/NULL,querylength,this->chrnum,this->chroffset,this->chrhigh,this->chrlength,
+ /*cdna_direction*/-1,/*sensedir*/SENSE_ANTI,
+ /*gmap_source*/GMAP_VIA_SUBSTRINGS)) == NULL) {
+ FREE_OUT(pairarray2);
+ }
} else {
nsegments = Pair_gsnap_nsegments(&nmismatches_whole,&nindels,&nintrons,&nindelbreaks,
- pairarray,npairs);
- start = subtract_bounded(this->chroffset + Pair_genomepos(&(pairarray[0])),
- /*minusterm*/Pair_querypos(&(pairarray[0])),this->chroffset);
- end = add_bounded(this->chroffset + Pair_genomepos(&(pairarray[npairs-1])),
- /*plusterm*/querylength - 1 - Pair_querypos(&(pairarray[npairs-1])),this->chrhigh);
-
- if ((hit = Stage3end_new_gmap(nmismatches_whole,nmatches_posttrim,max_match_length,
- ambig_end_length_5,ambig_end_length_3,
- ambig_splicetype_5,ambig_splicetype_3,
- min_splice_prob,
- pairarray,npairs,nsegments,nintrons,nindelbreaks,
- /*left*/start,/*genomiclength*/end - start + 1,
- /*plusp*/true,genestrand,
- /*accession*/NULL,querylength,this->chrnum,this->chroffset,this->chrhigh,this->chrlength,
- cdna_direction,sensedir,/*gmap_source*/GMAP_VIA_SUBSTRINGS)) == NULL) {
- FREE_OUT(pairarray);
+ pairarray1,npairs1);
+ start = subtract_bounded(this->chroffset + Pair_genomepos(&(pairarray1[0])),
+ /*minusterm*/Pair_querypos(&(pairarray1[0])),this->chroffset);
+ end = add_bounded(this->chroffset + Pair_genomepos(&(pairarray1[npairs1-1])),
+ /*plusterm*/querylength - 1 - Pair_querypos(&(pairarray1[npairs1-1])),this->chrhigh);
+
+ if ((hit1 = Stage3end_new_gmap(nmismatches_whole,nmatches_posttrim_1,max_match_length_1,
+ ambig_end_length_5_1,ambig_end_length_3_1,
+ ambig_splicetype_5_1,ambig_splicetype_3_1,
+ min_splice_prob_1,
+ pairarray1,npairs1,nsegments,nintrons,nindelbreaks,
+ /*left*/start,/*genomiclength*/end - start + 1,
+ /*plusp*/true,genestrand,
+ /*accession*/NULL,querylength,this->chrnum,this->chroffset,this->chrhigh,this->chrlength,
+ cdna_direction,sensedir,/*gmap_source*/GMAP_VIA_SUBSTRINGS)) == NULL) {
+ FREE_OUT(pairarray1);
}
}
@@ -6099,56 +6323,62 @@ Stage3end_gmap_run_gmap_plus (T this, char *queryuc_ptr, int querylength, int ge
}
}
- return hit;
+ return hit1;
}
/* Modified from Stage3end_substrings_run_gmap_minus */
T
-Stage3end_gmap_run_gmap_minus (T this, char *queryuc_ptr, int querylength, int genestrand,
+Stage3end_gmap_run_gmap_minus (T *hit2, T this, char *queryuc_ptr, int querylength, int genestrand,
int maxpeelback, Pairpool_T pairpool, Dynprog_T dynprogL, Dynprog_T dynprogM, Dynprog_T dynprogR,
Oligoindex_array_T oligoindices_minor, Diagpool_T diagpool, Cellpool_T cellpool) {
- T hit;
+ T hit1;
List_T stage2pairs, all_stage2_starts, all_stage2_ends;
int i;
- int sensedir;
-
Pair_T pair;
- struct Pair_T *pairarray;
- List_T pairs;
+ struct Pair_T *pairarray1, *pairarray2;
+ List_T pairs1, pairs2;
int querypos;
- int npairs, goodness, cdna_direction, matches, nmatches_posttrim,
- max_match_length, ambig_end_length_5, ambig_end_length_3,
- unknowns, mismatches, qopens, qindels, topens, tindels,
- ncanonical, nsemicanonical, nnoncanonical;
- double ambig_prob_5, ambig_prob_3, min_splice_prob;
- Splicetype_T ambig_splicetype_5, ambig_splicetype_3;
+ int cdna_direction, sensedir;
+ int npairs1, goodness1, matches1, nmatches_posttrim_1,
+ max_match_length_1, ambig_end_length_5_1, ambig_end_length_3_1,
+ unknowns1, mismatches1, qopens1, qindels1, topens1, tindels1,
+ ncanonical1, nsemicanonical1, nnoncanonical1;
+ int npairs2, goodness2, matches2, nmatches_posttrim_2,
+ max_match_length_2, ambig_end_length_5_2, ambig_end_length_3_2,
+ unknowns2, mismatches2, qopens2, qindels2, topens2, tindels2,
+ ncanonical2, nsemicanonical2, nnoncanonical2;
+ double ambig_prob_5_1, ambig_prob_3_1, min_splice_prob_1;
+ double ambig_prob_5_2, ambig_prob_3_2, min_splice_prob_2;
+ Splicetype_T ambig_splicetype_5_1, ambig_splicetype_3_1;
+ Splicetype_T ambig_splicetype_5_2, ambig_splicetype_3_2;
Univcoord_T knownsplice_limit_low, knownsplice_limit_high;
Univcoord_T start, end;
int nsegments, nmismatches_whole, nindels, nintrons, nindelbreaks;
Chrpos_T chrstart, chrend;
- debug13(printf("Entered Stage3hr_gmap_run_gmap_minus with %u..%u in chr %d of length %u\n",
- this->genomicstart - this->chroffset,this->genomicend - this->chroffset,
- this->chrnum,this->chrlength));
+ *hit2 = (T) NULL;
+ assert(this->gmap_cdna_direction != 0); /* Because we have already run GMAP once */
+ debug13(printf("Entered Stage3hr_gmap_run_gmap_minus with gmap_cdna_direction %d\n",this->gmap_cdna_direction));
/* D. Make all_stage2_starts (paths) */
if (high_resolution_gmap_ends_p == false) {
/* Don't try to solve ends, as requested by user */
+ debug13(printf("User does not want high resolution gmap ends\n"));
all_stage2_starts = (List_T) NULL;
} else if ((querypos = Pair_querypos(&(this->pairarray[0]))) < 15) {
/* Don't try to solve short ends, for sake of efficiency */
all_stage2_starts = (List_T) NULL;
} else {
chrstart = Pair_genomepos(&(this->pairarray[0]));
- if (chrstart + shortsplicedist > (this->chrhigh - this->chroffset)) {
+ if (chrstart + overall_max_distance > (this->chrhigh - this->chroffset)) {
chrend = this->chrhigh - this->chroffset;
} else {
- chrend = chrstart + shortsplicedist;
+ chrend = chrstart + overall_max_distance;
}
debug13(printf("For starts, chrstart %u, chrend %u\n",chrstart,chrend));
all_stage2_starts = Stage2_compute_starts(
@@ -6171,16 +6401,17 @@ Stage3end_gmap_run_gmap_minus (T this, char *queryuc_ptr, int querylength, int g
/* E. Make all_stage2_ends (pairs) */
if (high_resolution_gmap_ends_p == false) {
/* Don't try to solve ends, as requested by user */
+ debug13(printf("User does not want high resolution gmap ends\n"));
all_stage2_ends = (List_T) NULL;
} else if ((querypos = Pair_querypos(&(this->pairarray[this->npairs - 1]))) > querylength - 15) {
/* Don't try to solve short ends, for sake of efficiency */
all_stage2_ends = (List_T) NULL;
} else {
chrend = Pair_genomepos(&(this->pairarray[this->npairs - 1]));
- if (chrend < shortsplicedist) {
+ if (chrend < overall_max_distance) {
chrstart = 0;
} else {
- chrstart = chrend - shortsplicedist;
+ chrstart = chrend - overall_max_distance;
}
debug13(printf("For ends, chrstart %u, chrend %u\n",chrstart,chrend));
all_stage2_ends = Stage2_compute_ends(
@@ -6201,7 +6432,7 @@ Stage3end_gmap_run_gmap_minus (T this, char *queryuc_ptr, int querylength, int g
if (all_stage2_starts == NULL && all_stage2_ends == NULL) {
- hit = (T) NULL;
+ hit1 = (T) NULL;
} else {
/* F. Make stage2pairs */
@@ -6215,54 +6446,102 @@ Stage3end_gmap_run_gmap_minus (T this, char *queryuc_ptr, int querylength, int g
debug13(printf("\n"));
if (stage2pairs == NULL) {
- hit = (T) NULL;
+ hit1 = (T) NULL;
} else {
knownsplice_limit_low = ((Pair_T) stage2pairs->first)->genomepos + this->chroffset;
stage2pairs = List_reverse(stage2pairs);
knownsplice_limit_high = ((Pair_T) stage2pairs->first)->genomepos + this->chroffset;
- if ((pairarray = Stage3_compute(&pairs,&npairs,&goodness,&cdna_direction,&sensedir,
- &matches,&nmatches_posttrim,&max_match_length,
- &ambig_end_length_5,&ambig_end_length_3,
- &ambig_splicetype_5,&ambig_splicetype_3,
- &ambig_prob_5,&ambig_prob_3,
- &unknowns,&mismatches,&qopens,&qindels,&topens,&tindels,
- &ncanonical,&nsemicanonical,&nnoncanonical,&min_splice_prob,
- stage2pairs,all_stage2_starts,all_stage2_ends,
+ if ((pairarray1 = Stage3_compute(&cdna_direction,&sensedir,&pairs1,&npairs1,&goodness1,
+ &matches1,&nmatches_posttrim_1,&max_match_length_1,
+ &ambig_end_length_5_1,&ambig_end_length_3_1,
+ &ambig_splicetype_5_1,&ambig_splicetype_3_1,
+ &ambig_prob_5_1,&ambig_prob_3_1,
+ &unknowns1,&mismatches1,&qopens1,&qindels1,&topens1,&tindels1,
+ &ncanonical1,&nsemicanonical1,&nnoncanonical1,&min_splice_prob_1,
+
+ &pairarray2,&pairs2,&npairs2,&goodness2,
+ &matches2,&nmatches_posttrim_2,&max_match_length_2,
+ &ambig_end_length_5_2,&ambig_end_length_3_2,
+ &ambig_splicetype_5_2,&ambig_splicetype_3_2,
+ &ambig_prob_5_2,&ambig_prob_3_2,
+ &unknowns2,&mismatches2,&qopens2,&qindels2,&topens2,&tindels2,
+ &ncanonical2,&nsemicanonical2,&nnoncanonical2,&min_splice_prob_2,
+
+ stage2pairs,all_stage2_starts,all_stage2_ends,
#ifdef END_KNOWNSPLICING_SHORTCUT
- cutoff_level,/*queryptr*/watsonp ? queryuc_ptr : queryrc,
- watsonp ? query_compress_fwd : query_compress_rev,
+ cutoff_level,/*queryptr*/watsonp ? queryuc_ptr : queryrc,
+ watsonp ? query_compress_fwd : query_compress_rev,
#endif
- /*queryseq_ptr*/queryuc_ptr,queryuc_ptr,querylength,/*skiplength*/0,
+ /*queryseq_ptr*/queryuc_ptr,queryuc_ptr,querylength,/*skiplength*/0,
#ifdef EXTRACT_GENOMICSEG
- /*query_subseq_offset*/0,
+ /*query_subseq_offset*/0,
#else
- /*query_subseq_offset*/0,
+ /*query_subseq_offset*/0,
#endif
- this->chrnum,this->chroffset,this->chrhigh,
- knownsplice_limit_low,knownsplice_limit_high,/*plusp*/false,genestrand,
- /*jump_late_p*/true,maxpeelback,pairpool,dynprogL,dynprogM,dynprogR,
- /*sense_try*/0,/*sense_filter*/0,
- oligoindices_minor,diagpool,cellpool)) == NULL) {
- hit = (T) NULL;
+ this->chrnum,this->chroffset,this->chrhigh,
+ knownsplice_limit_low,knownsplice_limit_high,/*plusp*/false,genestrand,
+ /*jump_late_p*/true,maxpeelback,pairpool,dynprogL,dynprogM,dynprogR,
+ /*sense_try*/0,/*sense_filter*/0,
+ oligoindices_minor,diagpool,cellpool)) == NULL) {
+ hit1 = (T) NULL;
+
+ } else if (cdna_direction == 0) {
+ nsegments = Pair_gsnap_nsegments(&nmismatches_whole,&nindels,&nintrons,&nindelbreaks,
+ pairarray1,npairs1);
+ start = add_bounded(this->chroffset + Pair_genomepos(&(pairarray1[0])),
+ /*plusterm*/Pair_querypos(&(pairarray1[0])),this->chrhigh);
+ end = subtract_bounded(this->chroffset + Pair_genomepos(&(pairarray1[npairs1-1])),
+ /*minusterm*/querylength - 1 - Pair_querypos(&(pairarray1[npairs1-1])),this->chroffset);
+ if ((hit1 = Stage3end_new_gmap(nmismatches_whole,nmatches_posttrim_1,max_match_length_1,
+ ambig_end_length_5_1,ambig_end_length_3_1,
+ ambig_splicetype_5_1,ambig_splicetype_3_1,
+ min_splice_prob_1,
+ pairarray1,npairs1,nsegments,nintrons,nindelbreaks,
+ /*left*/end,/*genomiclength*/start - end + 1,
+ /*plusp*/false,genestrand,
+ /*accession*/NULL,querylength,this->chrnum,this->chroffset,this->chrhigh,this->chrlength,
+ /*cdna_direction*/+1,/*sensedir*/SENSE_FORWARD,
+ /*gmap_source*/GMAP_VIA_SUBSTRINGS)) == NULL) {
+ FREE_OUT(pairarray1);
+ }
+
+ nsegments = Pair_gsnap_nsegments(&nmismatches_whole,&nindels,&nintrons,&nindelbreaks,
+ pairarray2,npairs2);
+ start = add_bounded(this->chroffset + Pair_genomepos(&(pairarray2[0])),
+ /*plusterm*/Pair_querypos(&(pairarray2[0])),this->chrhigh);
+ end = subtract_bounded(this->chroffset + Pair_genomepos(&(pairarray2[npairs2-1])),
+ /*minusterm*/querylength - 1 - Pair_querypos(&(pairarray2[npairs2-1])),this->chroffset);
+ if ((*hit2 = Stage3end_new_gmap(nmismatches_whole,nmatches_posttrim_2,max_match_length_2,
+ ambig_end_length_5_2,ambig_end_length_3_2,
+ ambig_splicetype_5_2,ambig_splicetype_3_2,
+ min_splice_prob_2,
+ pairarray2,npairs2,nsegments,nintrons,nindelbreaks,
+ /*left*/end,/*genomiclength*/start - end + 1,
+ /*plusp*/false,genestrand,
+ /*accession*/NULL,querylength,this->chrnum,this->chroffset,this->chrhigh,this->chrlength,
+ /*cdna_direction*/-1,/*sensedir*/SENSE_ANTI,
+ /*gmap_source*/GMAP_VIA_SUBSTRINGS)) == NULL) {
+ FREE_OUT(pairarray2);
+ }
} else {
nsegments = Pair_gsnap_nsegments(&nmismatches_whole,&nindels,&nintrons,&nindelbreaks,
- pairarray,npairs);
- start = add_bounded(this->chroffset + Pair_genomepos(&(pairarray[0])),
- /*plusterm*/Pair_querypos(&(pairarray[0])),this->chrhigh);
- end = subtract_bounded(this->chroffset + Pair_genomepos(&(pairarray[npairs-1])),
- /*minusterm*/querylength - 1 - Pair_querypos(&(pairarray[npairs-1])),this->chroffset);
- if ((hit = Stage3end_new_gmap(nmismatches_whole,nmatches_posttrim,max_match_length,
- ambig_end_length_5,ambig_end_length_3,
- ambig_splicetype_5,ambig_splicetype_3,
- min_splice_prob,
- pairarray,npairs,nsegments,nintrons,nindelbreaks,
- /*left*/end,/*genomiclength*/start - end + 1,
- /*plusp*/false,genestrand,
- /*accession*/NULL,querylength,this->chrnum,this->chroffset,this->chrhigh,this->chrlength,
- cdna_direction,sensedir,/*gmap_source*/GMAP_VIA_SUBSTRINGS)) == NULL) {
- FREE_OUT(pairarray);
+ pairarray1,npairs1);
+ start = add_bounded(this->chroffset + Pair_genomepos(&(pairarray1[0])),
+ /*plusterm*/Pair_querypos(&(pairarray1[0])),this->chrhigh);
+ end = subtract_bounded(this->chroffset + Pair_genomepos(&(pairarray1[npairs1-1])),
+ /*minusterm*/querylength - 1 - Pair_querypos(&(pairarray1[npairs1-1])),this->chroffset);
+ if ((hit1 = Stage3end_new_gmap(nmismatches_whole,nmatches_posttrim_1,max_match_length_1,
+ ambig_end_length_5_1,ambig_end_length_3_1,
+ ambig_splicetype_5_1,ambig_splicetype_3_1,
+ min_splice_prob_1,
+ pairarray1,npairs1,nsegments,nintrons,nindelbreaks,
+ /*left*/end,/*genomiclength*/start - end + 1,
+ /*plusp*/false,genestrand,
+ /*accession*/NULL,querylength,this->chrnum,this->chroffset,this->chrhigh,this->chrlength,
+ cdna_direction,sensedir,/*gmap_source*/GMAP_VIA_SUBSTRINGS)) == NULL) {
+ FREE_OUT(pairarray1);
}
}
@@ -6271,12 +6550,10 @@ Stage3end_gmap_run_gmap_minus (T this, char *queryuc_ptr, int querylength, int g
}
}
- return hit;
+ return hit1;
}
-
-
T
Stage3end_new_exact (int *found_score, Univcoord_T left, int genomiclength, Compress_T query_compress,
bool plusp, int genestrand,
@@ -6492,7 +6769,8 @@ Stage3end_new_substitution (int *found_score, int nmismatches_whole, Univcoord_T
Univcoord_T genomicstart, genomicend;
int outofbounds_start = 0, outofbounds_end = 0;
- debug0(printf("Entered Stage3end_new_substitution at left %u and chrhigh %u, sarrayp %d\n",left,chrhigh,sarrayp));
+ debug0(printf("Entered Stage3end_new_substitution at left %u [%u] and chrhigh %u, sarrayp %d\n",
+ left,left - chroffset,chrhigh,sarrayp));
if (plusp == true) {
genomicstart = left;
@@ -6942,8 +7220,8 @@ Stage3end_new_insertion (int *found_score, int nindels, int indel_pos, int nmism
/* This method is now correct for SNP-tolerant alignment */
new->nmatches = Substring_nmatches(substring1) + Substring_nmatches(substring2);
new->nmatches_posttrim = Substring_nmatches_posttrim(substring1) + Substring_nmatches_posttrim(substring2);
- new->nmatches_posttrim += nindels; /* for use in goodness_cmp procedures */
- new->nmatches_posttrim -= indel_penalty; /* for use in goodness_cmp procedures */
+ /* new->nmatches_posttrim += nindels; -- for use in goodness_cmp procedures */
+ /* new->nmatches_posttrim -= indel_penalty; -- for use in goodness_cmp procedures */
#endif
new->trim_left = Substring_trim_left(substring1);
@@ -7268,7 +7546,7 @@ Stage3end_new_deletion (int *found_score, int nindels, int indel_pos, int nmisma
/* This method is now correct for SNP-tolerant alignment */
new->nmatches = Substring_nmatches(substring1) + Substring_nmatches(substring2);
new->nmatches_posttrim = Substring_nmatches_posttrim(substring1) + Substring_nmatches_posttrim(substring2);
- new->nmatches_posttrim -= indel_penalty; /* for use in goodness_cmp procedures */
+ /* new->nmatches_posttrim -= indel_penalty; -- for use in goodness_cmp procedures */
#endif
new->trim_left = Substring_trim_left(substring1);
@@ -8643,6 +8921,7 @@ Stage3end_new_gmap (int nmismatches_whole, int nmatches_posttrim, int max_match_
/* However, this leads to fatal bugs later, so restored these statements */
debug0(printf("Entered Stage3end_new_gmap with sensedir %d\n",sensedir));
+ assert(sensedir == SENSE_NULL || sensedir == SENSE_ANTI || sensedir == SENSE_FORWARD);
start = &(pairarray[0]);
end = &(pairarray[npairs-1]);
@@ -8696,10 +8975,11 @@ Stage3end_new_gmap (int nmismatches_whole, int nmatches_posttrim, int max_match_
new = (T) MALLOC_OUT(sizeof(*new));
- debug0(printf("Stage3end_new_gmap %p: left %llu, genomicstart/end %u..%u, chrhigh %llu, chrnum %d, nmismatches %d, cdna_direction %d, sensedir %d, max_match_length %d, gmap_source %d\n",
+ debug0(printf("Stage3end_new_gmap %p: left %llu, genomicstart/end %u..%u, chrhigh %llu, chrnum %d, nmismatches %d, nmatches_posttrim %d, cdna_direction %d, sensedir %d, max_match_length %d, gmap_source %d\n",
new,(unsigned long long) left,(unsigned int) (genomicstart - chroffset),(unsigned int) (genomicend - chroffset),
- (unsigned long long) chrhigh,chrnum,nmismatches_whole,cdna_direction,sensedir,max_match_length,gmap_source));
+ (unsigned long long) chrhigh,chrnum,nmismatches_whole,nmatches_posttrim,cdna_direction,sensedir,max_match_length,gmap_source));
debug0(printf(" ambig_end_length_5 %d, ambig_end_length_3 %d\n",ambig_end_length_5,ambig_end_length_3));
+ debug0(Pair_dump_comp_array(pairarray,npairs));
new->substrings_LtoH = (List_T) NULL;
new->substrings_1toN = (List_T) NULL;
@@ -8794,8 +9074,8 @@ Stage3end_new_gmap (int nmismatches_whole, int nmatches_posttrim, int max_match_
debug0(printf(" nmatches %d = posttrim %d + ambig_end_length_5 %d + ambig_end_length_3 %d\n",
new->nmatches,nmatches_posttrim,ambig_end_length_5,ambig_end_length_3));
- new->nmatches_posttrim -= localsplicing_penalty * nintrons; /* for use in goodness_cmp procedures */
- new->nmatches_posttrim -= indel_penalty_middle * nindelbreaks; /* for use in goodness_cmp procedures */
+ /* new->nmatches_posttrim -= localsplicing_penalty * nintrons; -- for use in goodness_cmp procedures */
+ /* new->nmatches_posttrim -= indel_penalty_middle * nindelbreaks; -- for use in goodness_cmp procedures */
if (new->nmatches_posttrim < querylength/2) {
debug0(printf(" nmatches %d < querylength %d/2, so returning NULL\n",
@@ -10965,6 +11245,15 @@ hit_equiv_cmp (Stage3end_T x, Stage3end_T y) {
return +1;
#endif
+#if 0
+ } else if (x->sensedir == y->sensedir) {
+ return 0;
+ } else if (x->sensedir > y->sensedir) {
+ return +1;
+ } else if (y->sensedir > x->sensedir) {
+ return -1;
+#endif
+
} else {
return 0;
}
@@ -11019,7 +11308,25 @@ hit_goodness_cmp (bool *equalp, Stage3end_T hit,
/* Favors definitive splices over ambiguous splices. So need to
make sure we don't make definitive splices unnecessarily */
- if (hit->nmatches_posttrim < best_hit->nmatches_posttrim) {
+ if (hit->nsegments > best_hit->nsegments) {
+ if (hit->nmatches_posttrim > best_hit->nmatches_posttrim) {
+ /* More segments and strictly more matches */
+ return +1;
+ } else {
+ /* More segments, but don't add anything */
+ return -1;
+ }
+
+ } else if (hit->nsegments < best_hit->nsegments) {
+ if (hit->nmatches_posttrim >= best_hit->nmatches_posttrim) {
+ /* Fewer segments, but same or more matches */
+ return +1;
+ } else {
+ /* Fewer segments and don't add anything */
+ return -1;
+ }
+
+ } else if (hit->nmatches_posttrim < best_hit->nmatches_posttrim) {
debug7(printf(" => %d loses by nmatches_posttrim\n",k));
return -1;
} else if (hit->nmatches_posttrim > best_hit->nmatches_posttrim) {
@@ -14157,8 +14464,20 @@ Stage3pair_new (T hit5, T hit3, int genestrand, Pairtype_T pairtype,
if (SENSE_CONSISTENT_P(hit5->sensedir,hit3->sensedir)) {
debug0(printf("senses are consistent\n"));
new->sense_consistent_p = true;
+
+ } else if (expect_concordant_p == true) {
+ debug5(printf(" Returning NULL, because senses are not consistent\n"));
+ if (private5p == true) {
+ Stage3end_free(&hit5);
+ }
+ if (private3p == true) {
+ Stage3end_free(&hit3);
+ }
+ FREE_OUT(new);
+ return (Stage3pair_T) NULL;
+
} else {
- debug0(printf("senses are inconsistent\n"));
+ debug0(printf("senses are inconsistent, but allowable\n"));
new->sense_consistent_p = false;
}
@@ -14618,6 +14937,20 @@ hitpair_equiv_cmp (Stage3pair_T x, Stage3pair_T y) {
}
#endif
+#if 0
+ } else if (x->hit5->sensedir == y->hit5->sensedir &&
+ x->hit3->sensedir == y->hit3->sensedir) {
+ return 0;
+ } else if (x->hit5->sensedir > y->hit5->sensedir) {
+ return +1;
+ } else if (y->hit5->sensedir > x->hit5->sensedir) {
+ return -1;
+ } else if (x->hit3->sensedir > y->hit3->sensedir) {
+ return +1;
+ } else if (y->hit3->sensedir > x->hit3->sensedir) {
+ return -1;
+#endif
+
} else {
return 0;
}
@@ -14696,6 +15029,7 @@ static bool
hitpair_subsumption (Stage3pair_T x, Stage3pair_T y) {
if (x->dir != y->dir) {
return false; /* Different strands */
+
} else if (x->low <= y->low && x->high >= y->high) {
return true;
} else if (y->low <= x->low && y->high >= x->high) {
@@ -15033,7 +15367,26 @@ hitpair_goodness_cmp (bool *equalp, Stage3pair_T hitpair,
/* Favors definitive splices over ambiguous splices. So need to
make sure we don't make definitive splices unnecessarily */
- if (hitpair->nmatches_posttrim < best_hitpair->nmatches_posttrim) {
+
+ if (hitpair->hit5->nsegments + hitpair->hit3->nsegments > best_hitpair->hit5->nsegments + best_hitpair->hit3->nsegments) {
+ if (hitpair->nmatches_posttrim > best_hitpair->nmatches_posttrim) {
+ /* More segments and strictly more matches */
+ return +1;
+ } else {
+ /* More segments, but don't add anything */
+ return -1;
+ }
+
+ } else if (hitpair->hit5->nsegments + hitpair->hit3->nsegments < best_hitpair->hit5->nsegments + best_hitpair->hit3->nsegments) {
+ if (hitpair->nmatches_posttrim >= best_hitpair->nmatches_posttrim) {
+ /* Fewer segments, but same or more matches */
+ return +1;
+ } else {
+ /* Fewer segments and don't add anything */
+ return -1;
+ }
+
+ } else if (hitpair->nmatches_posttrim < best_hitpair->nmatches_posttrim) {
/* k is worse */
debug8(printf(" => loses by nmatches_posttrim\n"));
return -1;
@@ -15283,6 +15636,13 @@ pair_remove_bad_superstretches (bool *keep_p, Stage3pair_T superstretch, List_T
stage3pair->dir,stage3pair->nmatches,stage3pair->nmatches_posttrim,
stage3pair->insertlength,stage3pair->amb_status_inside,
start_amb_length(stage3pair->hit5)+ end_amb_length(stage3pair->hit5),start_amb_length(stage3pair->hit3) + end_amb_length(stage3pair->hit3));
+ if (stage3pair->hit5->hittype == GMAP) {
+ Pair_dump_comp_array(stage3pair->hit5->pairarray,stage3pair->hit5->npairs);
+ }
+ if (stage3pair->hit3->hittype == GMAP) {
+ Pair_dump_comp_array(stage3pair->hit3->pairarray,stage3pair->hit3->npairs);
+ }
+
hitpair = (Stage3pair_T) List_head(q);
printf("subsumes that (%s, %s-%s): %p, %u..%u|%u..%u (dir = %d), nmatches: %d (%d posttrim), insertlength %d, amb_status_inside %d, amb_lengths %d and %d\n",
Pairtype_string(hitpair->pairtype),hittype_string(hitpair->hit5->hittype),
@@ -15292,6 +15652,12 @@ pair_remove_bad_superstretches (bool *keep_p, Stage3pair_T superstretch, List_T
hitpair->dir,hitpair->nmatches,hitpair->nmatches_posttrim,
hitpair->insertlength,hitpair->amb_status_inside,
start_amb_length(hitpair->hit5) + end_amb_length(hitpair->hit5),start_amb_length(hitpair->hit3) + end_amb_length(hitpair->hit3));
+ if (hitpair->hit5->hittype == GMAP) {
+ Pair_dump_comp_array(hitpair->hit5->pairarray,hitpair->hit5->npairs);
+ }
+ if (hitpair->hit3->hittype == GMAP) {
+ Pair_dump_comp_array(hitpair->hit3->pairarray,hitpair->hit3->npairs);
+ }
#endif
q = List_next(q);
}
@@ -15338,6 +15704,14 @@ pair_remove_bad_superstretches (bool *keep_p, Stage3pair_T superstretch, List_T
} else {
/* Exists a child better than parent */
debug8(printf("Exists a child better than parent, so deleting parent and equal and calling recursively among all (better) children\n"));
+#ifdef DEBUG8
+ if (stage3pair->hit5->hittype == GMAP) {
+ Pair_dump_comp_array(stage3pair->hit5->pairarray,stage3pair->hit5->npairs);
+ }
+ if (stage3pair->hit3->hittype == GMAP) {
+ Pair_dump_comp_array(stage3pair->hit3->pairarray,stage3pair->hit3->npairs);
+ }
+#endif
Stage3pair_free(&stage3pair);
for (r = equal; r != NULL; r = List_next(r)) {
hitpair = (Stage3pair_T) List_head(r);
@@ -15453,16 +15827,34 @@ pair_remove_overlaps (List_T hitpairlist, bool translocp, bool finalp) {
for (i = n - 1; i >= 0; --i) {
hitpair = hitpairs[i];
if (eliminate[i] == false) {
- debug8(printf(" Keeping %u..%u|%u..%u, nmatches (trimmed) %d, score %d, (dir = %d)\n",
+ debug8(printf(" Keeping %s|%s %u..%u|%u..%u, nmatches (trimmed) %d, score %d, (dir = %d)\n",
+ hittype_string(hitpair->hit5->hittype),hittype_string(hitpair->hit3->hittype),
hitpair->hit5->low - hitpair->hit5->chroffset,hitpair->hit5->high - hitpair->hit5->chroffset,
hitpair->hit3->low - hitpair->hit3->chroffset,hitpair->hit3->high - hitpair->hit3->chroffset,
hitpair->nmatches,hitpair->score,hitpair->dir));
+#ifdef DEBUG8
+ if (hitpair->hit5->hittype == GMAP) {
+ Pair_dump_comp_array(hitpair->hit5->pairarray,hitpair->hit5->npairs);
+ }
+ if (hitpair->hit3->hittype == GMAP) {
+ Pair_dump_comp_array(hitpair->hit3->pairarray,hitpair->hit3->npairs);
+ }
+#endif
unique = List_push(unique,(void *) hitpair);
} else {
- debug8(printf(" Eliminating %u..%u|%u..%u, nmatches (trimmed) %d, score %d, (dir = %d)\n",
+ debug8(printf(" Eliminating %s|%s %u..%u|%u..%u, nmatches (trimmed) %d, score %d, (dir = %d)\n",
+ hittype_string(hitpair->hit5->hittype),hittype_string(hitpair->hit3->hittype),
hitpair->hit5->low - hitpair->hit5->chroffset,hitpair->hit5->high - hitpair->hit5->chroffset,
hitpair->hit3->low - hitpair->hit3->chroffset,hitpair->hit3->high - hitpair->hit3->chroffset,
hitpair->nmatches,hitpair->score,hitpair->dir));
+#ifdef DEBUG8
+ if (hitpair->hit5->hittype == GMAP) {
+ Pair_dump_comp_array(hitpair->hit5->pairarray,hitpair->hit5->npairs);
+ }
+ if (hitpair->hit3->hittype == GMAP) {
+ Pair_dump_comp_array(hitpair->hit3->pairarray,hitpair->hit3->npairs);
+ }
+#endif
Stage3pair_free(&hitpair);
}
}
diff --git a/src/stage3hr.h b/src/stage3hr.h
index a3642dd..4556f1c 100644
--- a/src/stage3hr.h
+++ b/src/stage3hr.h
@@ -1,4 +1,4 @@
-/* $Id: stage3hr.h 184484 2016-02-18 03:11:53Z twu $ */
+/* $Id: stage3hr.h 188752 2016-05-01 17:28:22Z twu $ */
#ifndef STAGE3HR_INCLUDED
#define STAGE3HR_INCLUDED
@@ -43,6 +43,7 @@ Stage3hr_setup (bool invert_first_p_in, bool invert_second_p_in, Genome_T genome
int localsplicing_penalty_in, int indel_penalty_middle_in,
int antistranded_penalty_in, bool favor_multiexon_p_in,
int gmap_min_nconsecutive_in, int end_detail, int subopt_levels_in,
+ int max_middle_insertions_in, int max_middle_deletions_in,
bool novelsplicingp_in, Chrpos_T shortsplicedist_in, bool merge_samechr_p_in,
bool *circularp_in, bool *altlocp_in, Univcoord_T *alias_starts_in, Univcoord_T *alias_ends_in,
char *failedinput_root_in, bool print_m8_p_in, bool want_random_p_in);
@@ -312,22 +313,22 @@ Stage3end_new_substrings (int *found_score, Intlist_T endpoints,
Chrnum_T chrnum, Univcoord_T chroffset, Univcoord_T chrhigh,
Chrpos_T chrlength, bool sarrayp);
extern T
-Stage3end_substrings_run_gmap_plus (T this, char *queryuc_ptr, int querylength, int genestrand,
+Stage3end_substrings_run_gmap_plus (T *hit2, T this, char *queryuc_ptr, int querylength, int genestrand,
int maxpeelback, Pairpool_T pairpool, Dynprog_T dynprogL, Dynprog_T dynprogM, Dynprog_T dynprogR,
Oligoindex_array_T oligoindices_minor, Diagpool_T diagpool, Cellpool_T cellpool,
bool extend_ends_p);
extern T
-Stage3end_substrings_run_gmap_minus (T this, char *queryuc_ptr, int querylength, int genestrand,
+Stage3end_substrings_run_gmap_minus (T *hit2, T this, char *queryuc_ptr, int querylength, int genestrand,
int maxpeelback, Pairpool_T pairpool, Dynprog_T dynprogL, Dynprog_T dynprogM, Dynprog_T dynprogR,
Oligoindex_array_T oligoindices_minor, Diagpool_T diagpool, Cellpool_T cellpool,
bool extend_ends_p);
extern T
-Stage3end_gmap_run_gmap_plus (T this, char *queryuc_ptr, int querylength, int genestrand,
+Stage3end_gmap_run_gmap_plus (T *hit2, T this, char *queryuc_ptr, int querylength, int genestrand,
int maxpeelback, Pairpool_T pairpool, Dynprog_T dynprogL, Dynprog_T dynprogM, Dynprog_T dynprogR,
Oligoindex_array_T oligoindices_minor, Diagpool_T diagpool, Cellpool_T cellpool);
extern T
-Stage3end_gmap_run_gmap_minus (T this, char *queryuc_ptr, int querylength, int genestrand,
+Stage3end_gmap_run_gmap_minus (T *hit2, T this, char *queryuc_ptr, int querylength, int genestrand,
int maxpeelback, Pairpool_T pairpool, Dynprog_T dynprogL, Dynprog_T dynprogM, Dynprog_T dynprogR,
Oligoindex_array_T oligoindices_minor, Diagpool_T diagpool, Cellpool_T cellpool);
diff --git a/src/translation.c b/src/translation.c
index 328d487..8112765 100644
--- a/src/translation.c
+++ b/src/translation.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: translation.c 184469 2016-02-18 00:11:12Z twu $";
+static char rcsid[] = "$Id: translation.c 188718 2016-04-30 01:53:47Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -58,6 +58,7 @@ static char uppercaseCode[128] = UPPERCASE_U2T;
struct T {
int querypos;
char aa;
+ char initc;
Frame_T frame;
};
@@ -71,6 +72,7 @@ Translation_array_new (struct Pair_T *pairs, int translationlen) {
for (i = 0; i < translationlen; i++) {
new[i].querypos = pairs[i].querypos;
new[i].aa = ' ';
+ new[i].initc = ' ';
new[i].frame = NOFRAME;
}
@@ -105,8 +107,10 @@ Translation_dump (struct Pair_T *pairs, struct T *translation, int translationle
/************************************************************************/
-char
-Translation_get_codon (char a, char b, char c) {
+#if 0
+/* 1. Standard code */
+static char
+Translation_get_codon_old (char a, char b, char c) {
switch (b) {
case 'T':
switch (a) {
@@ -185,6 +189,157 @@ Translation_get_codon (char a, char b, char c) {
}
return 'X';
}
+#endif
+
+
+static char *translation_table;
+static char *initiation_table;
+
+/* Taken from http://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi" on April 2016 */
+
+void
+Translation_setup (int translation_code) {
+ switch (translation_code) {
+ case 1: /* The Standard Code */
+ translation_table = "FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG";
+ initiation_table = "---M---------------M---------------M----------------------------";
+ break;
+
+ case 2: /* The Vertebrate Mitochondrial Code */
+ translation_table = "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSS**VVVVAAAADDEEGGGG";
+ initiation_table = "--------------------------------MMMM---------------M------------";
+ break;
+
+ case 3: /* The Yeast Mitochondrial Code */
+ translation_table = "FFLLSSSSYY**CCWWTTTTPPPPHHQQRRRRIIMMTTTTNNKKSSRRVVVVAAAADDEEGGGG";
+ initiation_table = "----------------------------------MM----------------------------";
+ break;
+
+ case 4: /* The Mold, Protozoan, and Coelenterate Mitochondrial Code and the Mycoplasma/Spiroplasma Code */
+ translation_table = "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG";
+ initiation_table = "--MM---------------M------------MMMM---------------M------------";
+ break;
+
+ case 5: /* The Invertebrate Mitochondrial Code */
+ translation_table = "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSSSSVVVVAAAADDEEGGGG";
+ initiation_table = "---M----------------------------MMMM---------------M------------";
+ break;
+
+ case 6: /* The Ciliate, Dasycladacean, and Hexamita Nuclear Code */
+ translation_table = "FFLLSSSSYYQQCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG";
+ initiation_table = "-----------------------------------M----------------------------";
+ break;
+
+ case 9: /* The Echinoderm and Flatworm Mitochondrial Code */
+ translation_table = "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNNKSSSSVVVVAAAADDEEGGGG";
+ initiation_table = "-----------------------------------M---------------M------------";
+ break;
+
+ case 10: /* The Euplotid Nuclear Code */
+ translation_table = "FFLLSSSSYY**CCCWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG";
+ initiation_table = "-----------------------------------M----------------------------";
+ break;
+
+ case 11: /* The Bacterial, Archael, and Plant Plastid Code */
+ translation_table = "FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG";
+ initiation_table = "---M---------------M------------MMMM---------------M------------";
+ break;
+
+ case 12: /* The Alternative Yeast Nuclear Code */
+ translation_table = "FFLLSSSSYY**CC*WLLLSPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG";
+ initiation_table = "-------------------M---------------M----------------------------";
+ break;
+
+ case 13: /* The Ascidian Mitochondrial Code */
+ translation_table = "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSSGGVVVVAAAADDEEGGGG";
+ initiation_table = "---M------------------------------MM---------------M------------";
+ break;
+
+ case 14: /* The Alternative Flatworm Mitochondrial Code */
+ translation_table = "FFLLSSSSYYY*CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNNKSSSSVVVVAAAADDEEGGGG";
+ initiation_table = "-----------------------------------M----------------------------";
+ break;
+
+ case 16: /* Chlorophycean Mitochondrial Code */
+ translation_table = "FFLLSSSSYY*LCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG";
+ initiation_table = "-----------------------------------M----------------------------";
+ break;
+
+ case 21: /* Trematode Mitochondrial Code */
+ translation_table = "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNNKSSSSVVVVAAAADDEEGGGG";
+ initiation_table = "-----------------------------------M---------------M------------";
+ break;
+
+ case 22: /* Scenedesmus obliquus Mitochondrial Code */
+ translation_table = "FFLLSS*SYY*LCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG";
+ initiation_table = "-----------------------------------M----------------------------";
+ break;
+
+ case 23: /* Thraustochytrium Mitochondrial Code */
+ translation_table = "FF*LSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG";
+ initiation_table = "--------------------------------M--M---------------M------------";
+ break;
+
+ case 24: /* Pterobranchia Mitochondrial Code */
+ translation_table = "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSSKVVVVAAAADDEEGGGG";
+ initiation_table = "---M---------------M---------------M---------------M------------";
+ break;
+
+ case 25: /* Candidate Division SR1 and Gracilibacteria Code */
+ translation_table = "FFLLSSSSYY**CCGWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG";
+ initiation_table = "---M-------------------------------M---------------M------------";
+ break;
+
+ case 26: /* Pachysolen tannophilus Nuclear Code */
+ translation_table = "FFLLSSSSYY**CC*WLLLAPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG";
+ initiation_table = "-------------------M---------------M----------------------------";
+ break;
+
+ default:
+ fprintf(stderr,"Translation code %d is not supported\n",translation_code);
+ exit(9);
+ }
+
+ return;
+}
+
+
+static char
+Translation_get_codon (char *initc, char base1, char base2, char base3) {
+ int codoni;
+
+ switch (base1) {
+ case 'T': codoni = 0; break;
+ case 'C': codoni = 16; break;
+ case 'A': codoni = 32; break;
+ case 'G': codoni = 48; break;
+ default: codoni = -16; /* Negative enough so += 12 and += 3 will still yield a negative value */
+ }
+
+ switch (base2) {
+ case 'T': codoni += 0; break;
+ case 'C': codoni += 4; break;
+ case 'A': codoni += 8; break;
+ case 'G': codoni += 12; break;
+ default: codoni = -16;
+ }
+
+ switch (base3) {
+ case 'T': codoni += 0; break;
+ case 'C': codoni += 1; break;
+ case 'A': codoni += 2; break;
+ case 'G': codoni += 3; break;
+ default: codoni = -16;
+ }
+
+ if (codoni < 0) {
+ *initc = '-';
+ return 'X';
+ } else {
+ *initc = initiation_table[codoni];
+ return translation_table[codoni];
+ }
+}
#ifndef PMAP
@@ -198,7 +353,7 @@ find_bounds_forward (Frame_T *translation_frame, int *translation_starti,
int start0 = 0, start1 = 0, start2 = 0;
bool needmet0p, needmet1p, needmet2p;
bool endstop0p = false, endstop1p = false, endstop2p = false;
- char codon;
+ char codon, initc;
int i, frame;
if (fulllengthp == true) {
@@ -211,9 +366,10 @@ find_bounds_forward (Frame_T *translation_frame, int *translation_starti,
debug(printf("%d %c: %d %d %d\n",i,translation[i].aa,orf0,orf1,orf2));
frame = translation[i].frame;
if ((codon = translation[i].aa) != ' ') {
+ initc = translation[i].initc;
if (frame == FRAME0) {
if (needmet0p) {
- if (codon == 'M') {
+ if (initc == 'M') {
orf0 = 1;
start0 = i;
needmet0p = false;
@@ -234,7 +390,7 @@ find_bounds_forward (Frame_T *translation_frame, int *translation_starti,
}
} else if (frame == FRAME1) {
if (needmet1p) {
- if (codon == 'M') {
+ if (initc == 'M') {
orf1 = 1;
start1 = i;
needmet1p = false;
@@ -255,7 +411,7 @@ find_bounds_forward (Frame_T *translation_frame, int *translation_starti,
}
} else if (frame == FRAME2) {
if (needmet2p) {
- if (codon == 'M') {
+ if (initc == 'M') {
orf2 = 1;
start2 = i;
needmet2p = false;
@@ -355,7 +511,7 @@ find_bounds_backward (Frame_T *translation_frame, int *translation_starti,
int start0 = translationlen-1, start1 = translationlen-1, start2 = translationlen-1;
bool needmet0p, needmet1p, needmet2p;
bool endstop0p = false, endstop1p = false, endstop2p = false;
- char codon;
+ char codon, initc;
int i, frame;
if (fulllengthp == true) {
@@ -367,9 +523,10 @@ find_bounds_backward (Frame_T *translation_frame, int *translation_starti,
for (i = translationlen-1; i >= 0; --i) {
frame = translation[i].frame;
if ((codon = translation[i].aa) != ' ') {
+ initc = translation[i].initc;
if (frame == FRAME0) {
if (needmet0p) {
- if (codon == 'M') {
+ if (initc == 'M') {
orf0 = 1;
start0 = i;
needmet0p = false;
@@ -389,7 +546,7 @@ find_bounds_backward (Frame_T *translation_frame, int *translation_starti,
}
} else if (frame == FRAME1) {
if (needmet1p) {
- if (codon == 'M') {
+ if (initc == 'M') {
orf1 = 1;
start1 = i;
needmet1p = false;
@@ -409,7 +566,7 @@ find_bounds_backward (Frame_T *translation_frame, int *translation_starti,
}
} else if (frame == FRAME2) {
if (needmet2p) {
- if (codon == 'M') {
+ if (initc == 'M') {
orf2 = 1;
start2 = i;
needmet2p = false;
@@ -697,7 +854,7 @@ translate_pairs_forward (struct Pair_T *pairs, int npairs, bool revcompp) {
struct T *translation;
struct Pair_T *ptr, *pair;
int i, gpos = 0;
- char codon, nt2 = 'X', nt1 = 'X', nt0 = 'X';
+ char codon, initc, nt2 = 'X', nt1 = 'X', nt0 = 'X';
translation = Translation_array_new(pairs,npairs);
@@ -714,11 +871,12 @@ translate_pairs_forward (struct Pair_T *pairs, int npairs, bool revcompp) {
nt1 = nt0;
nt0 = revcompp ? complCode[(int) pair->genome] : uppercaseCode[(int) pair->genome];
- codon = Translation_get_codon(nt0,nt1,nt2);
+ codon = Translation_get_codon(&initc,nt0,nt1,nt2);
if (gpos < 2 && codon == 'X') {
/* translation[i].aa = ' '; */
} else {
translation[i].aa = codon;
+ translation[i].initc = initc;
switch (gpos % 3) {
case 0: translation[i].frame = FRAME0; break;
case 1: translation[i].frame = FRAME1; break;
@@ -737,7 +895,7 @@ translate_pairs_backward (struct Pair_T *pairs, int npairs, bool revcompp) {
struct T *translation;
struct Pair_T *ptr, *pair;
int i, gpos = 0;
- char codon, nt2 = 'X', nt1 = 'X', nt0 = 'X';
+ char codon, initc, nt2 = 'X', nt1 = 'X', nt0 = 'X';
translation = Translation_array_new(pairs,npairs);
@@ -754,11 +912,12 @@ translate_pairs_backward (struct Pair_T *pairs, int npairs, bool revcompp) {
nt1 = nt0;
nt0 = revcompp ? complCode[(int) pair->genome] : uppercaseCode[(int) pair->genome];
- codon = Translation_get_codon(nt0,nt1,nt2);
+ codon = Translation_get_codon(&initc,nt0,nt1,nt2);
if (gpos < 2 && codon == 'X') {
/* translation[i].aa = ' '; */
} else {
translation[i].aa = codon;
+ translation[i].initc = initc;
switch (gpos % 3) {
case 0: translation[i].frame = FRAME0; break;
case 1: translation[i].frame = FRAME1; break;
@@ -973,6 +1132,7 @@ get_codon_forward (int *nexti, struct Pair_T *pairs, int npairs, int starti, boo
char nt2 = 'X', nt1 = 'X', nt0 = 'X';
int j2 = -1, j1 = -1, j0 = -1;
int ncdna = 0, j;
+ char initc;
j = starti;
while (j < npairs && ncdna < 3) {
@@ -1001,7 +1161,7 @@ get_codon_forward (int *nexti, struct Pair_T *pairs, int npairs, int starti, boo
pairs[j2].aaphase_e = 2;
}
- return Translation_get_codon(nt0,nt1,nt2);
+ return Translation_get_codon(&initc,nt0,nt1,nt2);
}
static int
@@ -1009,6 +1169,7 @@ get_codon_backward (int *nexti, struct Pair_T *pairs, int starti, bool revcompp)
char nt2 = 'X', nt1 = 'X', nt0 = 'X';
int j2 = -1, j1 = -1, j0 = -1;
int ncdna = 0, j;
+ char initc;
j = starti;
while (j >= 0 && ncdna < 3) {
@@ -1037,7 +1198,7 @@ get_codon_backward (int *nexti, struct Pair_T *pairs, int starti, bool revcompp)
pairs[j2].aaphase_e = 2;
}
- return Translation_get_codon(nt0,nt1,nt2);
+ return Translation_get_codon(&initc,nt0,nt1,nt2);
}
@@ -1047,6 +1208,7 @@ get_codon_genomic (int *nexti, struct Pair_T *pairs, int npairs, int starti) {
char nt2 = 'X', nt1 = 'X', nt0 = 'X';
int j0, j1, j2;
int ngenomic = 0, j;
+ char initc;
j = starti;
while (ngenomic < 3) {
@@ -1073,7 +1235,7 @@ get_codon_genomic (int *nexti, struct Pair_T *pairs, int npairs, int starti) {
pairs[j1].aaphase_g = 1;
pairs[j2].aaphase_g = 2;
- return Translation_get_codon(nt0,nt1,nt2);
+ return Translation_get_codon(&initc,nt0,nt1,nt2);
}
#endif
diff --git a/src/translation.h b/src/translation.h
index 03d0585..0ce43b0 100644
--- a/src/translation.h
+++ b/src/translation.h
@@ -1,4 +1,4 @@
-/* $Id: translation.h 184469 2016-02-18 00:11:12Z twu $ */
+/* $Id: translation.h 188718 2016-04-30 01:53:47Z twu $ */
#ifndef TRANSLATION_INCLUDED
#define TRANSLATION_INCLUDED
@@ -11,8 +11,8 @@
#define T Translation_T
typedef struct T *T;
-extern char
-Translation_get_codon (char a, char b, char c);
+extern void
+Translation_setup (int translation_code);
#ifdef PMAP
extern void
@@ -39,7 +39,3 @@ Translation_print_comparison (Filestring_T fp, struct Pair_T *pairs, int npairs,
#undef T
#endif
-
-
-
-
diff --git a/src/uniqscan.c b/src/uniqscan.c
index 5025626..5d0799b 100644
--- a/src/uniqscan.c
+++ b/src/uniqscan.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: uniqscan.c 186737 2016-03-30 23:36:12Z twu $";
+static char rcsid[] = "$Id: uniqscan.c 188752 2016-05-01 17:28:22Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -1336,6 +1336,7 @@ main (int argc, char *argv[]) {
distances_observed_p,pairmax_linear,pairmax_circular,expected_pairlength,pairlength_deviation,
localsplicing_penalty,indel_penalty_middle,antistranded_penalty,
favor_multiexon_p,gmap_min_nconsecutive,/*end_detail*/1,subopt_levels,
+ max_middle_insertions,max_middle_deletions,
novelsplicingp,shortsplicedist,/*merge_samechr_p*/false,circularp,altlocp,alias_starts,alias_ends,
/*failedinput_root*/NULL,/*print_m8_p*/false,/*want_random_p*/true);
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/gmap.git
More information about the debian-med-commit
mailing list