[med-svn] [gmap] 09/16: Imported Upstream version 2016-06-03
Alex Mestiashvili
malex-guest at moszumanska.debian.org
Fri Jun 17 13:08:53 UTC 2016
This is an automated email from the git hooks/post-receive script.
malex-guest pushed a commit to branch master
in repository gmap.
commit dc88157b26b842f17508e297bfe6cbfb2660f648
Author: Alexandre Mestiashvili <alex at biotec.tu-dresden.de>
Date: Wed Jun 8 16:35:53 2016 +0200
Imported Upstream version 2016-06-03
---
ChangeLog | 21 ++++++
VERSION | 2 +-
configure | 24 +++----
src/gmap_select.c | 22 +++----
src/gmapl_select.c | 22 +++----
src/gsnap_select.c | 22 +++----
src/gsnapl_select.c | 22 +++----
src/indexdb.c | 17 ++++-
src/sam_sort.c | 5 +-
src/sarray-read.c | 10 +--
src/stage1hr.c | 182 ++++++++++++++++++++++++++++++++++------------------
src/stage3hr.c | 53 +++++++--------
src/stage3hr.h | 8 ++-
13 files changed, 252 insertions(+), 158 deletions(-)
diff --git a/ChangeLog b/ChangeLog
index 3c3c43b..ec71e96 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,24 @@
+2016-06-03 twu
+
+ * VERSION: Updated version number
+
+ * stage1hr.c: Replaced constant value of 15 with
+ min_distantsplicing_end_matches
+
+ * indexdb.c: Removed sanity check on positions filesize, which can fail on
+ multiple simultaneous instances of the process
+
+ * stage1hr.c, stage3hr.c, stage3hr.h: Searching for distant splicing based
+ on trim
+
+ * sarray-read.c: Turning off AVX2-specific version of
+ fill_positions_filtered_first
+
+ * sam_sort.c: Fixed warning message. Fixed memory leak.
+
+ * gmap_select.c, gmapl_select.c, gsnap_select.c, gsnapl_select.c: Improved
+ warning messages
+
2016-05-25 twu
* pair.c: Fixed calculation of circularpos for plus strand
diff --git a/VERSION b/VERSION
index 7099d80..67c7133 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-2016-05-25
\ No newline at end of file
+2016-06-03
\ No newline at end of file
diff --git a/configure b/configure
index 7dfd559..7e36b15 100755
--- a/configure
+++ b/configure
@@ -1,6 +1,6 @@
#! /bin/sh
# Guess values for system-dependent variables and create Makefiles.
-# Generated by GNU Autoconf 2.69 for gmap 2016-05-25.
+# Generated by GNU Autoconf 2.69 for gmap 2016-06-03.
#
# Report bugs to <Thomas Wu <twu at gene.com>>.
#
@@ -590,8 +590,8 @@ MAKEFLAGS=
# Identity of this package.
PACKAGE_NAME='gmap'
PACKAGE_TARNAME='gmap'
-PACKAGE_VERSION='2016-05-25'
-PACKAGE_STRING='gmap 2016-05-25'
+PACKAGE_VERSION='2016-06-03'
+PACKAGE_STRING='gmap 2016-06-03'
PACKAGE_BUGREPORT='Thomas Wu <twu at gene.com>'
PACKAGE_URL=''
@@ -1367,7 +1367,7 @@ if test "$ac_init_help" = "long"; then
# Omit some internal or obsolete options to make the list less imposing.
# This message is too long to be a string in the A/UX 3.1 sh.
cat <<_ACEOF
-\`configure' configures gmap 2016-05-25 to adapt to many kinds of systems.
+\`configure' configures gmap 2016-06-03 to adapt to many kinds of systems.
Usage: $0 [OPTION]... [VAR=VALUE]...
@@ -1438,7 +1438,7 @@ fi
if test -n "$ac_init_help"; then
case $ac_init_help in
- short | recursive ) echo "Configuration of gmap 2016-05-25:";;
+ short | recursive ) echo "Configuration of gmap 2016-06-03:";;
esac
cat <<\_ACEOF
@@ -1570,7 +1570,7 @@ fi
test -n "$ac_init_help" && exit $ac_status
if $ac_init_version; then
cat <<\_ACEOF
-gmap configure 2016-05-25
+gmap configure 2016-06-03
generated by GNU Autoconf 2.69
Copyright (C) 2012 Free Software Foundation, Inc.
@@ -2176,7 +2176,7 @@ cat >config.log <<_ACEOF
This file contains any messages produced by compilers while
running configure, to aid debugging if configure makes a mistake.
-It was created by gmap $as_me 2016-05-25, which was
+It was created by gmap $as_me 2016-06-03, which was
generated by GNU Autoconf 2.69. Invocation command line was
$ $0 $@
@@ -2526,8 +2526,8 @@ ac_compiler_gnu=$ac_cv_c_compiler_gnu
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking package version" >&5
$as_echo_n "checking package version... " >&6; }
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: 2016-05-25" >&5
-$as_echo "2016-05-25" >&6; }
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: 2016-06-03" >&5
+$as_echo "2016-06-03" >&6; }
### Read defaults
@@ -4392,7 +4392,7 @@ fi
# Define the identity of the package.
PACKAGE='gmap'
- VERSION='2016-05-25'
+ VERSION='2016-06-03'
cat >>confdefs.h <<_ACEOF
@@ -20048,7 +20048,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
# report actual input values of CONFIG_FILES etc. instead of their
# values after options handling.
ac_log="
-This file was extended by gmap $as_me 2016-05-25, which was
+This file was extended by gmap $as_me 2016-06-03, which was
generated by GNU Autoconf 2.69. Invocation command line was
CONFIG_FILES = $CONFIG_FILES
@@ -20114,7 +20114,7 @@ _ACEOF
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
ac_cs_version="\\
-gmap config.status 2016-05-25
+gmap config.status 2016-06-03
configured by $0, generated by GNU Autoconf 2.69,
with options \\"\$ac_cs_config\\"
diff --git a/src/gmap_select.c b/src/gmap_select.c
index 9a3e335..b7ecfcf 100644
--- a/src/gmap_select.c
+++ b/src/gmap_select.c
@@ -46,7 +46,7 @@ main (int argc, char *argv[]) {
if (avx2_support_p == true) {
new_argv[0] = "gmap.avx2";
if ((rc = execvp(new_argv[0],new_argv)) == -1 && errno == ENOENT) {
- fprintf(stderr,"Note: %s does not exist. May want to compile package on an AVX2 machine\n",new_argv[0]);
+ fprintf(stderr,"Note: %s does not exist. For faster speed, may want to compile package on an AVX2 machine\n",new_argv[0]);
} else {
free(new_argv);
return rc;
@@ -56,7 +56,7 @@ main (int argc, char *argv[]) {
if (sse4_2_support_p == true) {
new_argv[0] = "gmap.sse42";
if ((rc = execvp(new_argv[0],new_argv)) == -1 && errno == ENOENT) {
- fprintf(stderr,"Note: %s does not exist. May want to compile package on an SSE4.2 machine\n",new_argv[0]);
+ fprintf(stderr,"Note: %s does not exist. For faster speed, may want to compile package on an SSE4.2 machine\n",new_argv[0]);
} else {
free(new_argv);
return rc;
@@ -66,7 +66,7 @@ main (int argc, char *argv[]) {
if (sse4_1_support_p == true) {
new_argv[0] = "gmap.sse41";
if ((rc = execvp(new_argv[0],new_argv)) == -1 && errno == ENOENT) {
- fprintf(stderr,"Note: %s does not exist. May want to compile package on an SSE4.1 machine\n",new_argv[0]);
+ fprintf(stderr,"Note: %s does not exist. For faster speed, may want to compile package on an SSE4.1 machine\n",new_argv[0]);
} else {
free(new_argv);
return rc;
@@ -76,7 +76,7 @@ main (int argc, char *argv[]) {
if (ssse3_support_p == true) {
new_argv[0] = "gmap.ssse3";
if ((rc = execvp(new_argv[0],new_argv)) == -1 && errno == ENOENT) {
- fprintf(stderr,"Note: %s does not exist. May want to compile package on an SSSE3 machine\n",new_argv[0]);
+ fprintf(stderr,"Note: %s does not exist. For faster speed, may want to compile package on an SSSE3 machine\n",new_argv[0]);
} else {
free(new_argv);
return rc;
@@ -86,7 +86,7 @@ main (int argc, char *argv[]) {
if (sse2_support_p == true) {
new_argv[0] = "gmap.sse2";
if ((rc = execvp(new_argv[0],new_argv)) == -1 && errno == ENOENT) {
- fprintf(stderr,"Note: %s does not exist. May want to compile package on an SSE2 machine\n",new_argv[0]);
+ fprintf(stderr,"Note: %s does not exist. For faster speed, may want to compile package on an SSE2 machine\n",new_argv[0]);
} else {
free(new_argv);
return rc;
@@ -107,7 +107,7 @@ main (int argc, char *argv[]) {
new_argv[0] = (char *) malloc((strlen(dir) + strlen("/") + strlen("gmap.avx2") + 1) * sizeof(char));
sprintf(new_argv[0],"%s/gmap.avx2",dir);
if (file_exists_p(new_argv[0]) == false) {
- fprintf(stderr,"Note: %s does not exist. May want to compile package on an AVX2 machine\n",new_argv[0]);
+ fprintf(stderr,"Note: %s does not exist. For faster speed, may want to compile package on an AVX2 machine\n",new_argv[0]);
free(new_argv[0]);
} else {
rc = execvp(new_argv[0],new_argv);
@@ -121,7 +121,7 @@ main (int argc, char *argv[]) {
new_argv[0] = (char *) malloc((strlen(dir) + strlen("/") + strlen("gmap.sse42") + 1) * sizeof(char));
sprintf(new_argv[0],"%s/gmap.sse42",dir);
if (file_exists_p(new_argv[0]) == false) {
- fprintf(stderr,"Note: %s does not exist. May want to compile package on an SSE4.2 machine\n",new_argv[0]);
+ fprintf(stderr,"Note: %s does not exist. For faster speed, may want to compile package on an SSE4.2 machine\n",new_argv[0]);
free(new_argv[0]);
} else {
rc = execvp(new_argv[0],new_argv);
@@ -135,7 +135,7 @@ main (int argc, char *argv[]) {
new_argv[0] = (char *) malloc((strlen(dir) + strlen("/") + strlen("gmap.sse41") + 1) * sizeof(char));
sprintf(new_argv[0],"%s/gmap.sse41",dir);
if (file_exists_p(new_argv[0]) == false) {
- fprintf(stderr,"Note: %s does not exist. May want to compile package on an SSE4.1 machine\n",new_argv[0]);
+ fprintf(stderr,"Note: %s does not exist. For faster speed, may want to compile package on an SSE4.1 machine\n",new_argv[0]);
free(new_argv[0]);
} else {
rc = execvp(new_argv[0],new_argv);
@@ -149,7 +149,7 @@ main (int argc, char *argv[]) {
new_argv[0] = (char *) malloc((strlen(dir) + strlen("/") + strlen("gmap.ssse3") + 1) * sizeof(char));
sprintf(new_argv[0],"%s/gmap.ssse3",dir);
if (file_exists_p(new_argv[0]) == false) {
- fprintf(stderr,"Note: %s does not exist. May want to compile package on an SSSE3 machine\n",new_argv[0]);
+ fprintf(stderr,"Note: %s does not exist. For faster speed, may want to compile package on an SSSE3 machine\n",new_argv[0]);
free(new_argv[0]);
} else {
rc = execvp(new_argv[0],new_argv);
@@ -163,7 +163,7 @@ main (int argc, char *argv[]) {
new_argv[0] = (char *) malloc((strlen(dir) + strlen("/") + strlen("gmap.sse2") + 1) * sizeof(char));
sprintf(new_argv[0],"%s/gmap.sse2",dir);
if (file_exists_p(new_argv[0]) == false) {
- fprintf(stderr,"Note: %s does not exist. May want to compile package on an SSE2 machine\n",new_argv[0]);
+ fprintf(stderr,"Note: %s does not exist. For faster speed, may want to compile package on an SSE2 machine\n",new_argv[0]);
free(new_argv[0]);
} else {
rc = execvp(new_argv[0],new_argv);
@@ -177,7 +177,7 @@ main (int argc, char *argv[]) {
new_argv[0] = (char *) malloc((strlen(dir) + strlen("/") + strlen("gmap.nosimd") + 1) * sizeof(char));
sprintf(new_argv[0],"%s/gmap.nosimd",dir);
if (file_exists_p(new_argv[0]) == false) {
- fprintf(stderr,"Note: %s does not exist. May want to compile package on an non-SIMD machine\n",new_argv[0]);
+ fprintf(stderr,"Note: %s does not exist. For faster speed, may want to compile package on an non-SIMD machine\n",new_argv[0]);
free(new_argv[0]);
} else {
rc = execvp(new_argv[0],new_argv);
diff --git a/src/gmapl_select.c b/src/gmapl_select.c
index 0b908ca..5020d38 100644
--- a/src/gmapl_select.c
+++ b/src/gmapl_select.c
@@ -46,7 +46,7 @@ main (int argc, char *argv[]) {
if (avx2_support_p == true) {
new_argv[0] = "gmapl.avx2";
if ((rc = execvp(new_argv[0],new_argv)) == -1 && errno == ENOENT) {
- fprintf(stderr,"Note: %s does not exist. May want to compile package on an AVX2 machine\n",new_argv[0]);
+ fprintf(stderr,"Note: %s does not exist. For faster speed, may want to compile package on an AVX2 machine\n",new_argv[0]);
} else {
free(new_argv);
return rc;
@@ -56,7 +56,7 @@ main (int argc, char *argv[]) {
if (sse4_2_support_p == true) {
new_argv[0] = "gmapl.sse42";
if ((rc = execvp(new_argv[0],new_argv)) == -1 && errno == ENOENT) {
- fprintf(stderr,"Note: %s does not exist. May want to compile package on an SSE4.2 machine\n",new_argv[0]);
+ fprintf(stderr,"Note: %s does not exist. For faster speed, may want to compile package on an SSE4.2 machine\n",new_argv[0]);
} else {
free(new_argv);
return rc;
@@ -66,7 +66,7 @@ main (int argc, char *argv[]) {
if (sse4_1_support_p == true) {
new_argv[0] = "gmapl.sse41";
if ((rc = execvp(new_argv[0],new_argv)) == -1 && errno == ENOENT) {
- fprintf(stderr,"Note: %s does not exist. May want to compile package on an SSE4.1 machine\n",new_argv[0]);
+ fprintf(stderr,"Note: %s does not exist. For faster speed, may want to compile package on an SSE4.1 machine\n",new_argv[0]);
} else {
free(new_argv);
return rc;
@@ -76,7 +76,7 @@ main (int argc, char *argv[]) {
if (ssse3_support_p == true) {
new_argv[0] = "gmapl.ssse3";
if ((rc = execvp(new_argv[0],new_argv)) == -1 && errno == ENOENT) {
- fprintf(stderr,"Note: %s does not exist. May want to compile package on an SSSE3 machine\n",new_argv[0]);
+ fprintf(stderr,"Note: %s does not exist. For faster speed, may want to compile package on an SSSE3 machine\n",new_argv[0]);
} else {
free(new_argv);
return rc;
@@ -86,7 +86,7 @@ main (int argc, char *argv[]) {
if (sse2_support_p == true) {
new_argv[0] = "gmapl.sse2";
if ((rc = execvp(new_argv[0],new_argv)) == -1 && errno == ENOENT) {
- fprintf(stderr,"Note: %s does not exist. May want to compile package on an SSE2 machine\n",new_argv[0]);
+ fprintf(stderr,"Note: %s does not exist. For faster speed, may want to compile package on an SSE2 machine\n",new_argv[0]);
} else {
free(new_argv);
return rc;
@@ -107,7 +107,7 @@ main (int argc, char *argv[]) {
new_argv[0] = (char *) malloc((strlen(dir) + strlen("/") + strlen("gmapl.avx2") + 1) * sizeof(char));
sprintf(new_argv[0],"%s/gmapl.avx2",dir);
if (file_exists_p(new_argv[0]) == false) {
- fprintf(stderr,"Note: %s does not exist. May want to compile package on an AVX2 machine\n",new_argv[0]);
+ fprintf(stderr,"Note: %s does not exist. For faster speed, may want to compile package on an AVX2 machine\n",new_argv[0]);
free(new_argv[0]);
} else {
rc = execvp(new_argv[0],new_argv);
@@ -121,7 +121,7 @@ main (int argc, char *argv[]) {
new_argv[0] = (char *) malloc((strlen(dir) + strlen("/") + strlen("gmapl.sse42") + 1) * sizeof(char));
sprintf(new_argv[0],"%s/gmapl.sse42",dir);
if (file_exists_p(new_argv[0]) == false) {
- fprintf(stderr,"Note: %s does not exist. May want to compile package on an SSE4.2 machine\n",new_argv[0]);
+ fprintf(stderr,"Note: %s does not exist. For faster speed, may want to compile package on an SSE4.2 machine\n",new_argv[0]);
free(new_argv[0]);
} else {
rc = execvp(new_argv[0],new_argv);
@@ -135,7 +135,7 @@ main (int argc, char *argv[]) {
new_argv[0] = (char *) malloc((strlen(dir) + strlen("/") + strlen("gmapl.sse41") + 1) * sizeof(char));
sprintf(new_argv[0],"%s/gmapl.sse41",dir);
if (file_exists_p(new_argv[0]) == false) {
- fprintf(stderr,"Note: %s does not exist. May want to compile package on an SSE4.1 machine\n",new_argv[0]);
+ fprintf(stderr,"Note: %s does not exist. For faster speed, may want to compile package on an SSE4.1 machine\n",new_argv[0]);
free(new_argv[0]);
} else {
rc = execvp(new_argv[0],new_argv);
@@ -149,7 +149,7 @@ main (int argc, char *argv[]) {
new_argv[0] = (char *) malloc((strlen(dir) + strlen("/") + strlen("gmapl.ssse3") + 1) * sizeof(char));
sprintf(new_argv[0],"%s/gmapl.ssse3",dir);
if (file_exists_p(new_argv[0]) == false) {
- fprintf(stderr,"Note: %s does not exist. May want to compile package on an SSSE3 machine\n",new_argv[0]);
+ fprintf(stderr,"Note: %s does not exist. For faster speed, may want to compile package on an SSSE3 machine\n",new_argv[0]);
free(new_argv[0]);
} else {
rc = execvp(new_argv[0],new_argv);
@@ -163,7 +163,7 @@ main (int argc, char *argv[]) {
new_argv[0] = (char *) malloc((strlen(dir) + strlen("/") + strlen("gmapl.sse2") + 1) * sizeof(char));
sprintf(new_argv[0],"%s/gmapl.sse2",dir);
if (file_exists_p(new_argv[0]) == false) {
- fprintf(stderr,"Note: %s does not exist. May want to compile package on an SSE2 machine\n",new_argv[0]);
+ fprintf(stderr,"Note: %s does not exist. For faster speed, may want to compile package on an SSE2 machine\n",new_argv[0]);
free(new_argv[0]);
} else {
rc = execvp(new_argv[0],new_argv);
@@ -177,7 +177,7 @@ main (int argc, char *argv[]) {
new_argv[0] = (char *) malloc((strlen(dir) + strlen("/") + strlen("gmapl.nosimd") + 1) * sizeof(char));
sprintf(new_argv[0],"%s/gmapl.nosimd",dir);
if (file_exists_p(new_argv[0]) == false) {
- fprintf(stderr,"Note: %s does not exist. May want to compile package on an non-SIMD machine\n",new_argv[0]);
+ fprintf(stderr,"Note: %s does not exist. For faster speed, may want to compile package on an non-SIMD machine\n",new_argv[0]);
free(new_argv[0]);
} else {
rc = execvp(new_argv[0],new_argv);
diff --git a/src/gsnap_select.c b/src/gsnap_select.c
index 303c7d4..001d338 100644
--- a/src/gsnap_select.c
+++ b/src/gsnap_select.c
@@ -46,7 +46,7 @@ main (int argc, char *argv[]) {
if (avx2_support_p == true) {
new_argv[0] = "gsnap.avx2";
if ((rc = execvp(new_argv[0],new_argv)) == -1 && errno == ENOENT) {
- fprintf(stderr,"Note: %s does not exist. May want to compile package on an AVX2 machine\n",new_argv[0]);
+ fprintf(stderr,"Note: %s does not exist. For faster speed, may want to compile package on an AVX2 machine\n",new_argv[0]);
} else {
free(new_argv);
return rc;
@@ -56,7 +56,7 @@ main (int argc, char *argv[]) {
if (sse4_2_support_p == true) {
new_argv[0] = "gsnap.sse42";
if ((rc = execvp(new_argv[0],new_argv)) == -1 && errno == ENOENT) {
- fprintf(stderr,"Note: %s does not exist. May want to compile package on an SSE4.2 machine\n",new_argv[0]);
+ fprintf(stderr,"Note: %s does not exist. For faster speed, may want to compile package on an SSE4.2 machine\n",new_argv[0]);
} else {
free(new_argv);
return rc;
@@ -66,7 +66,7 @@ main (int argc, char *argv[]) {
if (sse4_1_support_p == true) {
new_argv[0] = "gsnap.sse41";
if ((rc = execvp(new_argv[0],new_argv)) == -1 && errno == ENOENT) {
- fprintf(stderr,"Note: %s does not exist. May want to compile package on an SSE4.1 machine\n",new_argv[0]);
+ fprintf(stderr,"Note: %s does not exist. For faster speed, may want to compile package on an SSE4.1 machine\n",new_argv[0]);
} else {
free(new_argv);
return rc;
@@ -76,7 +76,7 @@ main (int argc, char *argv[]) {
if (ssse3_support_p == true) {
new_argv[0] = "gsnap.ssse3";
if ((rc = execvp(new_argv[0],new_argv)) == -1 && errno == ENOENT) {
- fprintf(stderr,"Note: %s does not exist. May want to compile package on an SSSE3 machine\n",new_argv[0]);
+ fprintf(stderr,"Note: %s does not exist. For faster speed, may want to compile package on an SSSE3 machine\n",new_argv[0]);
} else {
free(new_argv);
return rc;
@@ -86,7 +86,7 @@ main (int argc, char *argv[]) {
if (sse2_support_p == true) {
new_argv[0] = "gsnap.sse2";
if ((rc = execvp(new_argv[0],new_argv)) == -1 && errno == ENOENT) {
- fprintf(stderr,"Note: %s does not exist. May want to compile package on an SSE2 machine\n",new_argv[0]);
+ fprintf(stderr,"Note: %s does not exist. For faster speed, may want to compile package on an SSE2 machine\n",new_argv[0]);
} else {
free(new_argv);
return rc;
@@ -107,7 +107,7 @@ main (int argc, char *argv[]) {
new_argv[0] = (char *) malloc((strlen(dir) + strlen("/") + strlen("gsnap.avx2") + 1) * sizeof(char));
sprintf(new_argv[0],"%s/gsnap.avx2",dir);
if (file_exists_p(new_argv[0]) == false) {
- fprintf(stderr,"Note: %s does not exist. May want to compile package on an AVX2 machine\n",new_argv[0]);
+ fprintf(stderr,"Note: %s does not exist. For faster speed, may want to compile package on an AVX2 machine\n",new_argv[0]);
free(new_argv[0]);
} else {
rc = execvp(new_argv[0],new_argv);
@@ -121,7 +121,7 @@ main (int argc, char *argv[]) {
new_argv[0] = (char *) malloc((strlen(dir) + strlen("/") + strlen("gsnap.sse42") + 1) * sizeof(char));
sprintf(new_argv[0],"%s/gsnap.sse42",dir);
if (file_exists_p(new_argv[0]) == false) {
- fprintf(stderr,"Note: %s does not exist. May want to compile package on an SSE4.2 machine\n",new_argv[0]);
+ fprintf(stderr,"Note: %s does not exist. For faster speed, may want to compile package on an SSE4.2 machine\n",new_argv[0]);
free(new_argv[0]);
} else {
rc = execvp(new_argv[0],new_argv);
@@ -135,7 +135,7 @@ main (int argc, char *argv[]) {
new_argv[0] = (char *) malloc((strlen(dir) + strlen("/") + strlen("gsnap.sse41") + 1) * sizeof(char));
sprintf(new_argv[0],"%s/gsnap.sse41",dir);
if (file_exists_p(new_argv[0]) == false) {
- fprintf(stderr,"Note: %s does not exist. May want to compile package on an SSE4.1 machine\n",new_argv[0]);
+ fprintf(stderr,"Note: %s does not exist. For faster speed, may want to compile package on an SSE4.1 machine\n",new_argv[0]);
free(new_argv[0]);
} else {
rc = execvp(new_argv[0],new_argv);
@@ -149,7 +149,7 @@ main (int argc, char *argv[]) {
new_argv[0] = (char *) malloc((strlen(dir) + strlen("/") + strlen("gsnap.ssse3") + 1) * sizeof(char));
sprintf(new_argv[0],"%s/gsnap.ssse3",dir);
if (file_exists_p(new_argv[0]) == false) {
- fprintf(stderr,"Note: %s does not exist. May want to compile package on an SSSE3 machine\n",new_argv[0]);
+ fprintf(stderr,"Note: %s does not exist. For faster speed, may want to compile package on an SSSE3 machine\n",new_argv[0]);
free(new_argv[0]);
} else {
rc = execvp(new_argv[0],new_argv);
@@ -163,7 +163,7 @@ main (int argc, char *argv[]) {
new_argv[0] = (char *) malloc((strlen(dir) + strlen("/") + strlen("gsnap.sse2") + 1) * sizeof(char));
sprintf(new_argv[0],"%s/gsnap.sse2",dir);
if (file_exists_p(new_argv[0]) == false) {
- fprintf(stderr,"Note: %s does not exist. May want to compile package on an SSE2 machine\n",new_argv[0]);
+ fprintf(stderr,"Note: %s does not exist. For faster speed, may want to compile package on an SSE2 machine\n",new_argv[0]);
free(new_argv[0]);
} else {
rc = execvp(new_argv[0],new_argv);
@@ -177,7 +177,7 @@ main (int argc, char *argv[]) {
new_argv[0] = (char *) malloc((strlen(dir) + strlen("/") + strlen("gsnap.nosimd") + 1) * sizeof(char));
sprintf(new_argv[0],"%s/gsnap.nosimd",dir);
if (file_exists_p(new_argv[0]) == false) {
- fprintf(stderr,"Note: %s does not exist. May want to compile package on an non-SIMD machine\n",new_argv[0]);
+ fprintf(stderr,"Note: %s does not exist. For faster speed, may want to compile package on an non-SIMD machine\n",new_argv[0]);
free(new_argv[0]);
} else {
rc = execvp(new_argv[0],new_argv);
diff --git a/src/gsnapl_select.c b/src/gsnapl_select.c
index 642a851..1730264 100644
--- a/src/gsnapl_select.c
+++ b/src/gsnapl_select.c
@@ -46,7 +46,7 @@ main (int argc, char *argv[]) {
if (avx2_support_p == true) {
new_argv[0] = "gsnapl.avx2";
if ((rc = execvp(new_argv[0],new_argv)) == -1 && errno == ENOENT) {
- fprintf(stderr,"Note: %s does not exist. May want to compile package on an AVX2 machine\n",new_argv[0]);
+ fprintf(stderr,"Note: %s does not exist. For faster speed, may want to compile package on an AVX2 machine\n",new_argv[0]);
} else {
free(new_argv);
return rc;
@@ -56,7 +56,7 @@ main (int argc, char *argv[]) {
if (sse4_2_support_p == true) {
new_argv[0] = "gsnapl.sse42";
if ((rc = execvp(new_argv[0],new_argv)) == -1 && errno == ENOENT) {
- fprintf(stderr,"Note: %s does not exist. May want to compile package on an SSE4.2 machine\n",new_argv[0]);
+ fprintf(stderr,"Note: %s does not exist. For faster speed, may want to compile package on an SSE4.2 machine\n",new_argv[0]);
} else {
free(new_argv);
return rc;
@@ -66,7 +66,7 @@ main (int argc, char *argv[]) {
if (sse4_1_support_p == true) {
new_argv[0] = "gsnapl.sse41";
if ((rc = execvp(new_argv[0],new_argv)) == -1 && errno == ENOENT) {
- fprintf(stderr,"Note: %s does not exist. May want to compile package on an SSE4.1 machine\n",new_argv[0]);
+ fprintf(stderr,"Note: %s does not exist. For faster speed, may want to compile package on an SSE4.1 machine\n",new_argv[0]);
} else {
free(new_argv);
return rc;
@@ -76,7 +76,7 @@ main (int argc, char *argv[]) {
if (ssse3_support_p == true) {
new_argv[0] = "gsnapl.ssse3";
if ((rc = execvp(new_argv[0],new_argv)) == -1 && errno == ENOENT) {
- fprintf(stderr,"Note: %s does not exist. May want to compile package on an SSSE3 machine\n",new_argv[0]);
+ fprintf(stderr,"Note: %s does not exist. For faster speed, may want to compile package on an SSSE3 machine\n",new_argv[0]);
} else {
free(new_argv);
return rc;
@@ -86,7 +86,7 @@ main (int argc, char *argv[]) {
if (sse2_support_p == true) {
new_argv[0] = "gsnapl.sse2";
if ((rc = execvp(new_argv[0],new_argv)) == -1 && errno == ENOENT) {
- fprintf(stderr,"Note: %s does not exist. May want to compile package on an SSE2 machine\n",new_argv[0]);
+ fprintf(stderr,"Note: %s does not exist. For faster speed, may want to compile package on an SSE2 machine\n",new_argv[0]);
} else {
free(new_argv);
return rc;
@@ -107,7 +107,7 @@ main (int argc, char *argv[]) {
new_argv[0] = (char *) malloc((strlen(dir) + strlen("/") + strlen("gsnapl.avx2") + 1) * sizeof(char));
sprintf(new_argv[0],"%s/gsnapl.avx2",dir);
if (file_exists_p(new_argv[0]) == false) {
- fprintf(stderr,"Note: %s does not exist. May want to compile package on an AVX2 machine\n",new_argv[0]);
+ fprintf(stderr,"Note: %s does not exist. For faster speed, may want to compile package on an AVX2 machine\n",new_argv[0]);
free(new_argv[0]);
} else {
rc = execvp(new_argv[0],new_argv);
@@ -121,7 +121,7 @@ main (int argc, char *argv[]) {
new_argv[0] = (char *) malloc((strlen(dir) + strlen("/") + strlen("gsnapl.sse42") + 1) * sizeof(char));
sprintf(new_argv[0],"%s/gsnapl.sse42",dir);
if (file_exists_p(new_argv[0]) == false) {
- fprintf(stderr,"Note: %s does not exist. May want to compile package on an SSE4.2 machine\n",new_argv[0]);
+ fprintf(stderr,"Note: %s does not exist. For faster speed, may want to compile package on an SSE4.2 machine\n",new_argv[0]);
free(new_argv[0]);
} else {
rc = execvp(new_argv[0],new_argv);
@@ -135,7 +135,7 @@ main (int argc, char *argv[]) {
new_argv[0] = (char *) malloc((strlen(dir) + strlen("/") + strlen("gsnapl.sse41") + 1) * sizeof(char));
sprintf(new_argv[0],"%s/gsnapl.sse41",dir);
if (file_exists_p(new_argv[0]) == false) {
- fprintf(stderr,"Note: %s does not exist. May want to compile package on an SSE4.1 machine\n",new_argv[0]);
+ fprintf(stderr,"Note: %s does not exist. For faster speed, may want to compile package on an SSE4.1 machine\n",new_argv[0]);
free(new_argv[0]);
} else {
rc = execvp(new_argv[0],new_argv);
@@ -149,7 +149,7 @@ main (int argc, char *argv[]) {
new_argv[0] = (char *) malloc((strlen(dir) + strlen("/") + strlen("gsnapl.ssse3") + 1) * sizeof(char));
sprintf(new_argv[0],"%s/gsnapl.ssse3",dir);
if (file_exists_p(new_argv[0]) == false) {
- fprintf(stderr,"Note: %s does not exist. May want to compile package on an SSSE3 machine\n",new_argv[0]);
+ fprintf(stderr,"Note: %s does not exist. For faster speed, may want to compile package on an SSSE3 machine\n",new_argv[0]);
free(new_argv[0]);
} else {
rc = execvp(new_argv[0],new_argv);
@@ -163,7 +163,7 @@ main (int argc, char *argv[]) {
new_argv[0] = (char *) malloc((strlen(dir) + strlen("/") + strlen("gsnapl.sse2") + 1) * sizeof(char));
sprintf(new_argv[0],"%s/gsnapl.sse2",dir);
if (file_exists_p(new_argv[0]) == false) {
- fprintf(stderr,"Note: %s does not exist. May want to compile package on an SSE2 machine\n",new_argv[0]);
+ fprintf(stderr,"Note: %s does not exist. For faster speed, may want to compile package on an SSE2 machine\n",new_argv[0]);
free(new_argv[0]);
} else {
rc = execvp(new_argv[0],new_argv);
@@ -177,7 +177,7 @@ main (int argc, char *argv[]) {
new_argv[0] = (char *) malloc((strlen(dir) + strlen("/") + strlen("gsnapl.nosimd") + 1) * sizeof(char));
sprintf(new_argv[0],"%s/gsnapl.nosimd",dir);
if (file_exists_p(new_argv[0]) == false) {
- fprintf(stderr,"Note: %s does not exist. May want to compile package on an non-SIMD machine\n",new_argv[0]);
+ fprintf(stderr,"Note: %s does not exist. For faster speed, may want to compile package on an non-SIMD machine\n",new_argv[0]);
free(new_argv[0]);
} else {
rc = execvp(new_argv[0],new_argv);
diff --git a/src/indexdb.c b/src/indexdb.c
index ea3b70d..6bd5d7e 100644
--- a/src/indexdb.c
+++ b/src/indexdb.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: indexdb.c 184480 2016-02-18 00:58:00Z twu $";
+static char rcsid[] = "$Id: indexdb.c 191134 2016-06-03 17:27:37Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -1330,9 +1330,12 @@ Indexdb_new_genome (Width_T *index1part, Width_T *index1interval,
#endif
+#if 0
+ /* Sanity check on positions filesize */
+ /* This check can fail when multiple jobs start simultaneously */
+
#ifdef PMAP
#else
- /* Sanity check on positions filesize */
#ifdef HAVE_64_BIT
poly_T = ~(~0ULL << 2*new->index1part);
#else
@@ -1352,6 +1355,7 @@ Indexdb_new_genome (Width_T *index1part, Width_T *index1interval,
abort();
}
#endif /* PMAP */
+#endif /* Sanity check */
} else {
fprintf(stderr,"Offsets compression type: bitpack64\n");
@@ -1472,7 +1476,6 @@ Indexdb_new_genome (Width_T *index1part, Width_T *index1interval,
#ifdef PMAP
#else
- /* Sanity check on positions filesize */
#ifdef LARGE_GENOMES
if (filenames->pages_filename != NULL) {
#ifdef HAVE_MMAP
@@ -1496,6 +1499,12 @@ Indexdb_new_genome (Width_T *index1part, Width_T *index1interval,
new->offsetspages[0] = -1U;
}
#endif
+
+
+#if 0
+ /* Sanity check on positions filesize */
+ /* This check can fail when multiple jobs start simultaneously */
+
#ifdef HAVE_64_BIT
poly_T = ~(~0ULL << 2*new->index1part);
#else
@@ -1521,6 +1530,8 @@ Indexdb_new_genome (Width_T *index1part, Width_T *index1interval,
filenames->positions_low_filename,(size_t) (end0*sizeof(UINT4)),filesize);
abort();
}
+#endif /* Sanity check */
+
#endif /* PMAP */
}
diff --git a/src/sam_sort.c b/src/sam_sort.c
index e7ce482..adfeb5f 100644
--- a/src/sam_sort.c
+++ b/src/sam_sort.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: sam_sort.c 186672 2016-03-29 23:46:34Z twu $";
+static char rcsid[] = "$Id: sam_sort.c 191131 2016-06-03 17:23:29Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -1458,7 +1458,7 @@ main (int argc, char *argv[]) {
ncells = (int *) CALLOC(ninputs,sizeof(int));
for (filei = 0; filei < ninputs; filei++) {
if ((sam_inputs[filei] = fopen(argv[filei],"r")) == NULL) {
- fprintf(stderr,"Cannot open SAM file %s\n",argv[i]);
+ fprintf(stderr,"Cannot open SAM file %s\n",argv[filei]);
exit(9);
}
}
@@ -1514,6 +1514,7 @@ main (int argc, char *argv[]) {
fprintf(stderr,"Done with analysis (%.1f seconds). Found %d SAM lines total.\n",
Stopwatch_stop(stopwatch),ncells_total);
+ Stopwatch_free(&stopwatch);
if (ncells_total == 0) {
/* Exit without printing header */
diff --git a/src/sarray-read.c b/src/sarray-read.c
index 72c85e3..b55b8fa 100644
--- a/src/sarray-read.c
+++ b/src/sarray-read.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: sarray-read.c 188752 2016-05-01 17:28:22Z twu $";
+static char rcsid[] = "$Id: sarray-read.c 191132 2016-06-03 17:24:03Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -2292,7 +2292,7 @@ fill_positions_std (int *npositions, Univcoord_T low_adj, Univcoord_T high_adj,
#ifdef HAVE_ALLOCA
-#if defined(HAVE_AVX2) && !defined(WORDS_BIGENDIAN)
+#if 0 && defined(HAVE_AVX2) && !defined(WORDS_BIGENDIAN)
/* Using pext method, because _mm256_shuffle_epi32 doesn't work well
because it works only within lanes, and MASTER_CONTROL does not extend
@@ -2300,12 +2300,10 @@ well to 256 bits */
static void
fill_positions_filtered_first (Elt_T this, T sarray, Univcoord_T low, Univcoord_T high) {
- int nmatches;
Univcoord_T low_adj, high_adj;
Univcoord_T *array = sarray->array, value0;
Sarrayptr_T *array_stop, *array_end, *array_ptr;
Univcoord_T *positions_temp;
- UINT8 pointer;
Univcoord_T *out;
__m256i converted, adjusted, match;
__m256i floor, ceiling, values, adj, p;
@@ -2315,7 +2313,11 @@ fill_positions_filtered_first (Elt_T this, T sarray, Univcoord_T low, Univcoord_
#if defined(REQUIRE_ALIGNMENT)
int n_prealign, k;
#endif
+#if defined(DEBUG) || defined(DEBUG7)
+ int nmatches;
+#endif
#ifdef DEBUG7
+ UINT8 pointer;
int i;
#endif
#ifdef DEBUG8
diff --git a/src/stage1hr.c b/src/stage1hr.c
index 053467c..73b0574 100644
--- a/src/stage1hr.c
+++ b/src/stage1hr.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: stage1hr.c 189206 2016-05-06 23:15:42Z twu $";
+static char rcsid[] = "$Id: stage1hr.c 191136 2016-06-03 17:32:53Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -15844,6 +15844,7 @@ align_end (int *cutoff_level, T this,
bool segments_computed_p = false;
Indexdb_T plus_indexdb, minus_indexdb;
bool allvalidp;
+ int min_trim, trim;
#ifndef LARGE_GENOMES
int nmisses_allowed_sarray;
#endif
@@ -16234,14 +16235,40 @@ align_end (int *cutoff_level, T this,
FREEA(acceptors_minus);
FREEA(antiacceptors_minus);
}
+ }
+
+ debug(printf("Before distant splicing:\n"));
+ debug(printf(" greedy: %d\n",List_length(greedy)));
+ debug(printf(" subs: %d\n",List_length(subs)));
+ debug(printf(" indels: %d\n",List_length(indels)));
+ debug(printf(" singlesplicing %d\n",List_length(singlesplicing)));
+ debug(printf(" doublesplicing %d\n",List_length(doublesplicing)));
+ debug(printf(" shortendsplicing: %d\n",List_length(shortendsplicing)));
+ debug(printf(" longsinglesplicing %d\n",List_length(longsinglesplicing)));
+ debug(printf(" done_level: %d\n",done_level));
+
+ hits = List_append(greedy,
+ List_append(subs,
+ List_append(terminals,
+ List_append(indels,
+ List_append(singlesplicing,shortendsplicing)))));
+ if (knownsplicingp || novelsplicingp || find_dna_chimeras_p) {
/* Search 7: Distant splicing */
- if (done_level < distantsplicing_penalty) {
+ min_trim = querylength;
+ for (p = hits; p != NULL; p = List_next(p)) {
+ hit = (Stage3end_T) List_head(p);
+ if ((trim = Stage3end_total_trim(hit)) < min_trim) {
+ min_trim = trim;
+ }
+ }
+
+ if (done_level < distantsplicing_penalty && min_trim < min_distantsplicing_end_matches) {
/* Want < and not <=, because otherwise distant splicing does not work on 50-bp reads */
/* Want <= and not <, because distant splicing needs to be better than other alternatives */
/* Don't find distant splicing */
- debug(printf("Skipping distant splicing because done_level %d < distantsplicing_penalty %d\n",
- done_level,distantsplicing_penalty));
+ debug(printf("Skipping distant splicing because done_level %d < distantsplicing_penalty %d and min_trim %d < %d\n",
+ done_level,distantsplicing_penalty,min_trim,min_distantsplicing_end_matches));
} else if (find_dna_chimeras_p == true) {
/* 9 (DNA). Find distant splicing for DNA */
@@ -16493,6 +16520,11 @@ align_end (int *cutoff_level, T this,
}
+ hits = List_append(hits,
+ List_append(longsinglesplicing,
+ List_append(shortendsplicing,distantsplicing)));
+
+
/* Search 8: Terminals */
/* Previously criterion for skipping find_terminals was (greedy ||
@@ -16506,26 +16538,6 @@ align_end (int *cutoff_level, T this,
}
- debug(printf("Before GMAP:\n"));
- debug(printf(" greedy: %d\n",List_length(greedy)));
- debug(printf(" subs: %d\n",List_length(subs)));
- debug(printf(" indels: %d\n",List_length(indels)));
- debug(printf(" singlesplicing %d\n",List_length(singlesplicing)));
- debug(printf(" doublesplicing %d\n",List_length(doublesplicing)));
- debug(printf(" shortendsplicing: %d\n",List_length(shortendsplicing)));
- debug(printf(" longsinglesplicing %d\n",List_length(longsinglesplicing)));
- debug(printf(" distantsplicing: %d\n",List_length(distantsplicing)));
- debug(printf(" terminals: %d\n",List_length(terminals)));
- debug(printf(" done_level: %d\n",done_level));
-
- hits = List_append(greedy,
- List_append(subs,
- List_append(terminals,
- List_append(indels,
- List_append(singlesplicing,
- List_append(longsinglesplicing,
- List_append(doublesplicing,
- List_append(shortendsplicing,distantsplicing))))))));
/* Search 9: GMAP via segments */
gmapp = true;
if (gmap_segments_p == false) {
@@ -16533,8 +16545,13 @@ align_end (int *cutoff_level, T this,
} else if (found_score < trigger_score_for_gmap) {
debug(printf("Test for stage 9: true because found_score %d >= trigger_score_for_gmap %d\n",found_score,trigger_score_for_gmap));
gmapp = false;
+ } else if (min_trim < min_distantsplicing_end_matches) {
+ gmapp = false;
+ } else if (distantsplicing != NULL) {
+ gmapp = false;
}
+
gmap_hits = (List_T) NULL;
if (gmapp == true) {
gmap_hits = convert_plus_segments_to_gmap(/*hits*/NULL,queryuc_ptr,querylength,query_lastpos,
@@ -18145,7 +18162,7 @@ better_free_end_exists_p (List_T greedy, List_T subs, List_T terminals,
static List_T
align_pair (bool *abort_pairing_p, int *found_score, int *cutoff_level_5, int *cutoff_level_3,
- List_T *samechr, List_T *conc_transloc,
+ List_T *samechr, List_T *conc_transloc, List_T *terminals,
History_T gmap_history_5, History_T gmap_history_3, List_T *hits5, List_T *hits3, T this5, T this3,
Compress_T query5_compress_fwd, Compress_T query5_compress_rev,
Compress_T query3_compress_fwd, Compress_T query3_compress_rev,
@@ -18199,6 +18216,7 @@ align_pair (bool *abort_pairing_p, int *found_score, int *cutoff_level_5, int *c
int nconcordant = 0, nsamechr = 0;
Indexdb_T plus_indexdb_5, plus_indexdb_3, minus_indexdb_5, minus_indexdb_3;
bool allvalidp5, allvalidp3;
+ int min_trim, trim;
#ifndef LARGE_GENOMES
int nmisses_allowed_sarray_5, nmisses_allowed_sarray_3;
#endif
@@ -18218,6 +18236,7 @@ align_pair (bool *abort_pairing_p, int *found_score, int *cutoff_level_5, int *c
*samechr = (List_T) NULL;
*conc_transloc = (List_T) NULL;
+ *terminals = (List_T) NULL;
*abort_pairing_p = false;
/* For paired-end alignment, ignore found_scores from single-end
@@ -18330,7 +18349,7 @@ align_pair (bool *abort_pairing_p, int *found_score, int *cutoff_level_5, int *c
} else {
hitpairs = Stage3_pair_up_concordant(&(*abort_pairing_p),&(*found_score),&nconcordant,&nsamechr,
- &(*samechr),&(*conc_transloc),
+ &(*samechr),&(*conc_transloc),&(*terminals),
hitpairs,hitarray5,/*narray5*/HITARRAY_GREEDY+1,
hitarray3,/*narray3*/HITARRAY_GREEDY+1,
*cutoff_level_5,*cutoff_level_3,
@@ -18370,7 +18389,7 @@ align_pair (bool *abort_pairing_p, int *found_score, int *cutoff_level_5, int *c
debug(printf("sarray initial: 3' end has %d greedy\n",List_length(greedy3)));
hitpairs = Stage3_pair_up_concordant(&(*abort_pairing_p),&(*found_score),&nconcordant,&nsamechr,
- &(*samechr),&(*conc_transloc),
+ &(*samechr),&(*conc_transloc),&(*terminals),
hitpairs,hitarray5,/*narray5*/HITARRAY_GREEDY+1,
hitarray3,/*narray3*/HITARRAY_GREEDY+1,
*cutoff_level_5,*cutoff_level_3,
@@ -18471,7 +18490,7 @@ align_pair (bool *abort_pairing_p, int *found_score, int *cutoff_level_5, int *c
hitarray5[HITARRAY_SUBS] = subs5; /* = Stage3end_remove_duplicates(subs5) */;
hitarray3[HITARRAY_SUBS] = subs3; /* = Stage3end_remove_duplicates(subs3) */;
hitpairs = Stage3_pair_up_concordant(&(*abort_pairing_p),&(*found_score),&nconcordant,&nsamechr,
- &(*samechr),&(*conc_transloc),
+ &(*samechr),&(*conc_transloc),&(*terminals),
hitpairs,hitarray5,/*narray5*/HITARRAY_SUBS+1,
hitarray3,/*narray3*/HITARRAY_SUBS+1,
*cutoff_level_5,*cutoff_level_3,
@@ -18520,7 +18539,7 @@ align_pair (bool *abort_pairing_p, int *found_score, int *cutoff_level_5, int *c
hitarray5[HITARRAY_SUBS] = subs5 /* = Stage3end_remove_duplicates(subs5,queryseq5,queryseq3) */;
hitarray3[HITARRAY_SUBS] = subs3 /* = Stage3end_remove_duplicates(subs3,queryseq5,queryseq3) */;
hitpairs = Stage3_pair_up_concordant(&(*abort_pairing_p),&(*found_score),&nconcordant,&nsamechr,
- &(*samechr),&(*conc_transloc),
+ &(*samechr),&(*conc_transloc),&(*terminals),
hitpairs,hitarray5,/*narray5*/HITARRAY_SUBS+1,
hitarray3,/*narray3*/HITARRAY_SUBS+1,
*cutoff_level_5,*cutoff_level_3,
@@ -18583,7 +18602,7 @@ align_pair (bool *abort_pairing_p, int *found_score, int *cutoff_level_5, int *c
hitarray5[HITARRAY_SUBS] = subs5 /* = Stage3end_remove_duplicates(subs5,queryseq5,queryseq3) */;
hitarray3[HITARRAY_SUBS] = subs3 /* = Stage3end_remove_duplicates(subs3,queryseq5,queryseq3) */;
hitpairs = Stage3_pair_up_concordant(&(*abort_pairing_p),&(*found_score),&nconcordant,&nsamechr,
- &(*samechr),&(*conc_transloc),
+ &(*samechr),&(*conc_transloc),&(*terminals),
hitpairs,hitarray5,/*narray5*/HITARRAY_SUBS+1,
hitarray3,/*narray3*/HITARRAY_SUBS+1,
*cutoff_level_5,*cutoff_level_3,
@@ -18699,7 +18718,7 @@ align_pair (bool *abort_pairing_p, int *found_score, int *cutoff_level_5, int *c
hitarray3[HITARRAY_SUBS] = subs3;
hitarray3[HITARRAY_INDELS] = indels3;
hitpairs = Stage3_pair_up_concordant(&(*abort_pairing_p),&(*found_score),&nconcordant,&nsamechr,
- &(*samechr),&(*conc_transloc),
+ &(*samechr),&(*conc_transloc),&(*terminals),
hitpairs,hitarray5,/*narray5*/HITARRAY_INDELS+1,
hitarray3,/*narray3*/HITARRAY_INDELS+1,
*cutoff_level_5,*cutoff_level_3,
@@ -18853,7 +18872,7 @@ align_pair (bool *abort_pairing_p, int *found_score, int *cutoff_level_5, int *c
hitarray3[HITARRAY_SINGLESPLICING] = singlesplicing3;
hitpairs = Stage3_pair_up_concordant(&(*abort_pairing_p),&(*found_score),&nconcordant,&nsamechr,
- &(*samechr),&(*conc_transloc),
+ &(*samechr),&(*conc_transloc),&(*terminals),
hitpairs,hitarray5,/*narray5*/HITARRAY_SINGLESPLICING+1,
hitarray3,/*narray3*/HITARRAY_SINGLESPLICING+1,
*cutoff_level_5,*cutoff_level_3,
@@ -18937,7 +18956,7 @@ align_pair (bool *abort_pairing_p, int *found_score, int *cutoff_level_5, int *c
debug(printf("Starting Stage3_pair_up_concordant\n"));
hitpairs = Stage3_pair_up_concordant(&(*abort_pairing_p),&(*found_score),&nconcordant,&nsamechr,
- &(*samechr),&(*conc_transloc),
+ &(*samechr),&(*conc_transloc),&(*terminals),
hitpairs,hitarray5,/*narray5*/HITARRAY_DOUBLESPLICING+1,
hitarray3,/*narray3*/HITARRAY_DOUBLESPLICING+1,
*cutoff_level_5,*cutoff_level_3,
@@ -19066,7 +19085,7 @@ align_pair (bool *abort_pairing_p, int *found_score, int *cutoff_level_5, int *c
debug4t(printf("Running Stage3_pair_up_concordant\n"));
/* Note: cannot use hitarray after we have removed overlapping alignments */
hitpairs = Stage3_pair_up_concordant(&(*abort_pairing_p),&(*found_score),&nconcordant,&nsamechr,
- &(*samechr),&(*conc_transloc),
+ &(*samechr),&(*conc_transloc),&(*terminals),
hitpairs,/*hitarray5*/&(*hits5),/*narray5*/1,
/*hitarray3*/&(*hits3),/*narray3*/1,
*cutoff_level_5,*cutoff_level_3,
@@ -19088,16 +19107,24 @@ align_pair (bool *abort_pairing_p, int *found_score, int *cutoff_level_5, int *c
/* Search 7: Distant splicing */
+ min_trim = querylength5 + querylength3;
+ for (p = hitpairs; p != NULL; p = List_next(p)) {
+ newpair = (Stage3pair_T) List_head(p);
+ if ((trim = Stage3pair_total_trim(newpair)) < min_trim) {
+ min_trim = trim;
+ }
+ }
- if (nconcordant > 0) {
+ debug(printf("? distant splicing. nconcordant is %d. min_trim is %d. done_level is %d + %d\n",
+ nconcordant,min_trim,done_level_5,done_level_3));
+ if (nconcordant > 0 && min_trim < min_distantsplicing_end_matches) {
/* Skip search for distant splicing */
} else if (*abort_pairing_p == true) {
/* Skip further searching */
} else if (knownsplicingp == false && novelsplicingp == false) {
- /* Find distant splicing for DNA */
-
+ /* TODO: Find distant splicing for DNA */
} else {
/* Find distant splicing for RNA */
@@ -19321,7 +19348,7 @@ align_pair (bool *abort_pairing_p, int *found_score, int *cutoff_level_5, int *c
#endif
/* Note: cannot use hitarray after we have removed overlapping alignments. Have to point to hits5 and hits3 and set narray5 = narray3 = 1 */
hitpairs = Stage3_pair_up_concordant(&(*abort_pairing_p),&(*found_score),&nconcordant,&nsamechr,
- &(*samechr),&(*conc_transloc),
+ &(*samechr),&(*conc_transloc),&(*terminals),
hitpairs,/*hitarray5*/&(*hits5),/*narray5*/1,
/*hitarray3*/&(*hits3),/*narray3*/1,
*cutoff_level_5,*cutoff_level_3,
@@ -19359,7 +19386,7 @@ align_pair (bool *abort_pairing_p, int *found_score, int *cutoff_level_5, int *c
if (nconcordant == 0 && (distantsplicing5 != NULL || distantsplicing3 != NULL)) {
/* Note: cannot use hitarray after we have removed overlapping alignments. Have to point to hits5 and hits3 and set narray5 = narray3 = 1 */
hitpairs = Stage3_pair_up_concordant(&(*abort_pairing_p),&(*found_score),&nconcordant,&nsamechr,
- &(*samechr),&(*conc_transloc),
+ &(*samechr),&(*conc_transloc),&(*terminals),
hitpairs,/*hitarray5*/&(*hits5),/*narray5*/1,
/*hitarray3*/&(*hits3),/*narray3*/1,
*cutoff_level_5,*cutoff_level_3,
@@ -19396,7 +19423,7 @@ align_pair (bool *abort_pairing_p, int *found_score, int *cutoff_level_5, int *c
*hits3 = List_append(*hits3,terminals3);
hitpairs = Stage3_pair_up_concordant(&(*abort_pairing_p),&(*found_score),&nconcordant,&nsamechr,
- &(*samechr),&(*conc_transloc),
+ &(*samechr),&(*conc_transloc),&(*terminals),
hitpairs,/*hitarray5*/&(*hits5),/*narray5*/1,
/*hitarray3*/&(*hits3),/*narray3*/1,
*cutoff_level_5,*cutoff_level_3,
@@ -19714,7 +19741,7 @@ align_pair (bool *abort_pairing_p, int *found_score, int *cutoff_level_5, int *c
hitarray5[HITARRAY_SINGLESPLICING] = singlesplicing5 /* = Stage3end_remove_duplicates(singlesplicing5,queryseq5,queryseq3) */;
hitarray3[HITARRAY_SINGLESPLICING] = singlesplicing3 /* = Stage3end_remove_duplicates(singlesplicing3,queryseq5,queryseq3) */;
hitpairs = Stage3_pair_up_concordant(&(*abort_pairing_p),&(*found_score),&nconcordant,&nsamechr,
- &(*samechr),&(*conc_transloc),
+ &(*samechr),&(*conc_transloc),&(*terminals),
hitpairs,hitarray5,/*narray5*/HITARRAY_DOUBLESPLICING+1,
hitarray3,/*narray3*/HITARRAY_DOUBLESPLICING+1,
*cutoff_level_5,*cutoff_level_3,
@@ -19900,8 +19927,8 @@ align_pair (bool *abort_pairing_p, int *found_score, int *cutoff_level_5, int *c
Floors_free(&floors3);
}
- debug(printf("Ending with %d hitpairs, %d samechr, %d conc_transloc\n",
- List_length(hitpairs),List_length(*samechr),List_length(*conc_transloc)));
+ debug(printf("Ending with %d hitpairs, %d samechr, %d conc_transloc, %d terminals\n",
+ List_length(hitpairs),List_length(*samechr),List_length(*conc_transloc),List_length(*terminals)));
hitpairs = Stage3pair_remove_circular_alias(hitpairs);
#if 0
@@ -19925,8 +19952,8 @@ choose_among_paired (int *best_nmatches_paired, int *best_nmatches_5, int *best_
Stage3pair_T hitpair;
int nmatches, nmatches5, nmatches3;
- debug16(printf("choose: %d hitpairs, %d conc_transloc, %d samechr\n",
- List_length(hitpairs),List_length(conc_transloc),List_length(samechr)));
+ debug16(printf("choose: %d hitpairs, %d conc_transloc, %d samechr, %d terminals\n",
+ List_length(hitpairs),List_length(conc_transloc),List_length(samechr),List_length(terminals)));
*best_nmatches_paired = 0;
for (p = hitpairs; p != NULL; p = p->rest) {
@@ -19989,7 +20016,7 @@ best_nmatches_singleend (List_T hits) {
/* Clean up all previous calculations */
static void
-paired_results_free (T this5, T this3, List_T hitpairs, List_T samechr, List_T conc_transloc,
+paired_results_free (T this5, T this3, List_T hitpairs, List_T samechr, List_T conc_transloc, List_T terminals,
List_T hits5, List_T hits3, int querylength5, int querylength3) {
List_T p;
Stage3pair_T stage3pair;
@@ -20012,6 +20039,12 @@ paired_results_free (T this5, T this3, List_T hitpairs, List_T samechr, List_T c
}
List_free(&conc_transloc);
+ for (p = terminals; p != NULL; p = List_next(p)) {
+ stage3pair = (Stage3pair_T) List_head(p);
+ Stage3pair_free(&stage3pair);
+ }
+ List_free(&conc_transloc);
+
stage3list_gc(&hits3);
stage3list_gc(&hits5);
Stage1_free(&this3,querylength3);
@@ -20101,12 +20134,12 @@ realign_separately (Stage3end_T **stage3array5, int *nhits5_primary, int *nhits5
}
-/* Have three lists: hitpairs, samechr, and conc_transloc => result */
+/* Have four lists: hitpairs + terminals, samechr, and conc_transloc => result */
static Stage3pair_T *
consolidate_paired_results (int *npaths_primary, int *npaths_altloc, int *first_absmq, int *second_absmq, Pairtype_T *final_pairtype,
Stage3end_T **stage3array5, int *nhits5_primary, int *nhits5_altloc, int *first_absmq5, int *second_absmq5,
Stage3end_T **stage3array3, int *nhits3_primary, int *nhits3_altloc, int *first_absmq3, int *second_absmq3,
- List_T hitpairs, List_T samechr, List_T conc_transloc,
+ List_T hitpairs, List_T samechr, List_T conc_transloc, List_T terminals,
List_T hits5, List_T hits3,
Compress_T query5_compress_fwd, Compress_T query5_compress_rev,
Compress_T query3_compress_fwd, Compress_T query3_compress_rev,
@@ -20147,6 +20180,7 @@ consolidate_paired_results (int *npaths_primary, int *npaths_altloc, int *first_
}
List_free(&conc_transloc);
+ hitpairs = List_append(hitpairs,terminals);
if (novelsplicingp || knownsplicingp) {
hitpairs = Stage3pair_remove_excess_terminals(hitpairs);
}
@@ -20216,6 +20250,12 @@ consolidate_paired_results (int *npaths_primary, int *npaths_altloc, int *first_
}
List_free(&hitpairs);
+ for (p = terminals; p != NULL; p = List_next(p)) {
+ stage3pair = (Stage3pair_T) List_head(p);
+ Stage3pair_free(&stage3pair);
+ }
+ List_free(&terminals);
+
for (p = conc_transloc; p != NULL; p = List_next(p)) {
stage3pair = (Stage3pair_T) List_head(p);
Stage3pair_free(&stage3pair);
@@ -20279,6 +20319,12 @@ consolidate_paired_results (int *npaths_primary, int *npaths_altloc, int *first_
}
List_free(&hitpairs);
+ for (p = terminals; p != NULL; p = List_next(p)) {
+ stage3pair = (Stage3pair_T) List_head(p);
+ Stage3pair_free(&stage3pair);
+ }
+ List_free(&terminals);
+
for (p = samechr; p != NULL; p = List_next(p)) {
stage3pair = (Stage3pair_T) List_head(p);
Stage3pair_free(&stage3pair);
@@ -20338,6 +20384,12 @@ consolidate_paired_results (int *npaths_primary, int *npaths_altloc, int *first_
}
List_free(&conc_transloc);
+ for (p = terminals; p != NULL; p = List_next(p)) {
+ stage3pair = (Stage3pair_T) List_head(p);
+ Stage3pair_free(&stage3pair);
+ }
+ List_free(&terminals);
+
result = (List_T) NULL;
} else {
@@ -20362,6 +20414,12 @@ consolidate_paired_results (int *npaths_primary, int *npaths_altloc, int *first_
}
List_free(&conc_transloc);
+ for (p = terminals; p != NULL; p = List_next(p)) {
+ stage3pair = (Stage3pair_T) List_head(p);
+ Stage3pair_free(&stage3pair);
+ }
+ List_free(&terminals);
+
if (gmap_improvement_p == false) {
debug16(printf("No GMAP improvement: Before removing overlaps, %d results\n",List_length(result)));
result = Stage3pair_remove_overlaps(result,/*translocp*/false,/*finalp*/true);
@@ -20607,7 +20665,7 @@ paired_read (int *npaths_primary, int *npaths_altloc, int *first_absmq, int *sec
Dynprog_T dynprogL, Dynprog_T dynprogM, Dynprog_T dynprogR,
Chrpos_T pairmax, bool keep_floors_p) {
Stage3pair_T *stage3pairarray;
- List_T hitpairs = NULL, samechr = NULL, conc_transloc = NULL, hits5 = NULL, hits3 = NULL;
+ List_T hitpairs = NULL, samechr = NULL, conc_transloc = NULL, terminals = NULL, hits5 = NULL, hits3 = NULL;
T this5, this3;
char *queryuc_ptr_5, *queryuc_ptr_3, *quality_string_5, *quality_string_3;
Compress_T query5_compress_fwd = NULL, query5_compress_rev = NULL, query3_compress_fwd = NULL, query3_compress_rev = NULL;
@@ -20693,7 +20751,7 @@ paired_read (int *npaths_primary, int *npaths_altloc, int *first_absmq, int *sec
make_complement_buffered(queryrc3,queryuc_ptr_3,querylength3);
hitpairs = align_pair(&abort_pairing_p,&found_score,&cutoff_level_5,&cutoff_level_3,
- &samechr,&conc_transloc,gmap_history_5,gmap_history_3,
+ &samechr,&conc_transloc,&terminals,gmap_history_5,gmap_history_3,
&hits5,&hits3,this5,this3,query5_compress_fwd,query5_compress_rev,
query3_compress_fwd,query3_compress_rev,
queryuc_ptr_5,queryuc_ptr_3,
@@ -20713,7 +20771,7 @@ paired_read (int *npaths_primary, int *npaths_altloc, int *first_absmq, int *sec
if (abort_pairing_p == true) {
debug16(printf("abort_pairing_p is true\n"));
- paired_results_free(this5,this3,hitpairs,samechr,conc_transloc,
+ paired_results_free(this5,this3,hitpairs,samechr,conc_transloc,terminals,
hits5,hits3,querylength5,querylength3);
this5 = Stage1_new(querylength5);
@@ -20749,7 +20807,7 @@ paired_read (int *npaths_primary, int *npaths_altloc, int *first_absmq, int *sec
consolidate_paired_results(&(*npaths_primary),&(*npaths_altloc),&(*first_absmq),&(*second_absmq),&(*final_pairtype),
&(*stage3array5),&(*nhits5_primary),&(*nhits5_altloc),&(*first_absmq5),&(*second_absmq5),
&(*stage3array3),&(*nhits3_primary),&(*nhits3_altloc),&(*first_absmq3),&(*second_absmq3),
- hitpairs,samechr,conc_transloc,hits5,hits3,
+ hitpairs,samechr,conc_transloc,terminals,hits5,hits3,
query5_compress_fwd,query5_compress_rev,query3_compress_fwd,query3_compress_rev,
queryseq5,queryuc_ptr_5,quality_string_5,querylength5,
queryseq3,queryuc_ptr_3,quality_string_3,querylength3,
@@ -20787,6 +20845,7 @@ paired_read_tolerant_nonstranded (int *npaths_primary, int *npaths_altloc, int *
List_T hitpairs, hitpairs_geneplus = NULL, hitpairs_geneminus = NULL;
List_T samechr, samechr_geneplus = NULL, samechr_geneminus = NULL;
List_T conc_transloc, conc_transloc_geneplus = NULL, conc_transloc_geneminus = NULL;
+ List_T terminals, terminals_geneplus = NULL, terminals_geneminus = NULL;
List_T hits5, hits3, hits_geneplus_5 = NULL, hits_geneplus_3 = NULL, hits_geneminus_5 = NULL, hits_geneminus_3 = NULL;
T this_geneplus_5, this_geneplus_3, this_geneminus_5, this_geneminus_3;
char *queryuc_ptr_5, *queryuc_ptr_3, *quality_string_5, *quality_string_3;
@@ -20878,7 +20937,7 @@ paired_read_tolerant_nonstranded (int *npaths_primary, int *npaths_altloc, int *
abort_pairing_p_geneplus = false;
hitpairs_geneplus = align_pair(&abort_pairing_p_geneplus,&found_score_geneplus,
&cutoff_level_5,&cutoff_level_3,
- &samechr_geneplus,&conc_transloc_geneplus,
+ &samechr_geneplus,&conc_transloc_geneplus,&terminals_geneplus,
gmap_history_5,gmap_history_3,
&hits_geneplus_5,&hits_geneplus_3,this_geneplus_5,this_geneplus_3,
query5_compress_fwd,query5_compress_rev,query3_compress_fwd,query3_compress_rev,
@@ -20901,7 +20960,7 @@ paired_read_tolerant_nonstranded (int *npaths_primary, int *npaths_altloc, int *
abort_pairing_p_geneminus = false;
hitpairs_geneminus = align_pair(&abort_pairing_p_geneminus,&found_score_geneminus,
&cutoff_level_5,&cutoff_level_3,
- &samechr_geneminus,&conc_transloc_geneminus,
+ &samechr_geneminus,&conc_transloc_geneminus,&terminals_geneminus,
gmap_history_5,gmap_history_3,
&hits_geneminus_5,&hits_geneminus_3,this_geneminus_5,this_geneminus_3,
query5_compress_fwd,query5_compress_rev,query3_compress_fwd,query3_compress_rev,
@@ -20922,12 +20981,12 @@ paired_read_tolerant_nonstranded (int *npaths_primary, int *npaths_altloc, int *
if (found_score_geneplus < found_score_geneminus) {
paired_results_free(this_geneminus_5,this_geneminus_3,hitpairs_geneminus,samechr_geneminus,conc_transloc_geneminus,
- hits_geneminus_5,hits_geneminus_3,querylength5,querylength3);
+ terminals_geneminus,hits_geneminus_5,hits_geneminus_3,querylength5,querylength3);
if (abort_pairing_p_geneplus == true) {
debug16(printf("abort_pairing_p_geneplus is true\n"));
paired_results_free(this_geneplus_5,this_geneplus_3,hitpairs_geneplus,samechr_geneplus,conc_transloc_geneplus,
- hits_geneplus_5,hits_geneplus_3,querylength5,querylength3);
+ terminals_geneplus,hits_geneplus_5,hits_geneplus_3,querylength5,querylength3);
this_geneplus_5 = Stage1_new(querylength5);
this_geneplus_3 = Stage1_new(querylength3);
@@ -20962,7 +21021,7 @@ paired_read_tolerant_nonstranded (int *npaths_primary, int *npaths_altloc, int *
consolidate_paired_results(&(*npaths_primary),&(*npaths_altloc),&(*first_absmq),&(*second_absmq),&(*final_pairtype),
&(*stage3array5),&(*nhits5_primary),&(*nhits5_altloc),&(*first_absmq5),&(*second_absmq5),
&(*stage3array3),&(*nhits3_primary),&(*nhits3_altloc),&(*first_absmq3),&(*second_absmq3),
- hitpairs_geneplus,samechr_geneplus,conc_transloc_geneplus,
+ hitpairs_geneplus,samechr_geneplus,conc_transloc_geneplus,terminals_geneplus,
hits_geneplus_5,hits_geneplus_3,
query5_compress_fwd,query5_compress_rev,query3_compress_fwd,query3_compress_rev,
queryseq5,queryuc_ptr_5,quality_string_5,querylength5,
@@ -20982,12 +21041,12 @@ paired_read_tolerant_nonstranded (int *npaths_primary, int *npaths_altloc, int *
} else if (found_score_geneminus < found_score_geneplus) {
paired_results_free(this_geneplus_5,this_geneplus_3,hitpairs_geneplus,samechr_geneplus,conc_transloc_geneplus,
- hits_geneplus_5,hits_geneplus_3,querylength5,querylength3);
+ terminals_geneplus,hits_geneplus_5,hits_geneplus_3,querylength5,querylength3);
if (abort_pairing_p_geneminus == true) {
debug16(printf("abort_pairing_p_geneminus is true\n"));
paired_results_free(this_geneminus_5,this_geneminus_3,hitpairs_geneminus,samechr_geneminus,conc_transloc_geneminus,
- hits_geneminus_5,hits_geneminus_3,querylength5,querylength3);
+ terminals_geneminus,hits_geneminus_5,hits_geneminus_3,querylength5,querylength3);
this_geneminus_5 = Stage1_new(querylength5);
this_geneminus_3 = Stage1_new(querylength3);
@@ -21022,7 +21081,7 @@ paired_read_tolerant_nonstranded (int *npaths_primary, int *npaths_altloc, int *
consolidate_paired_results(&(*npaths_primary),&(*npaths_altloc),&(*first_absmq),&(*second_absmq),&(*final_pairtype),
&(*stage3array5),&(*nhits5_primary),&(*nhits5_altloc),&(*first_absmq5),&(*second_absmq5),
&(*stage3array3),&(*nhits3_primary),&(*nhits3_altloc),&(*first_absmq3),&(*second_absmq3),
- hitpairs_geneminus,samechr_geneminus,conc_transloc_geneminus,
+ hitpairs_geneminus,samechr_geneminus,conc_transloc_geneminus,terminals_geneminus,
hits_geneminus_5,hits_geneminus_3,
query5_compress_fwd,query5_compress_rev,query3_compress_fwd,query3_compress_rev,
queryseq5,queryuc_ptr_5,quality_string_5,querylength5,
@@ -21044,6 +21103,7 @@ paired_read_tolerant_nonstranded (int *npaths_primary, int *npaths_altloc, int *
hitpairs = List_append(hitpairs_geneplus,hitpairs_geneminus);
samechr = List_append(samechr_geneplus,samechr_geneminus);
conc_transloc = List_append(conc_transloc_geneplus,conc_transloc_geneminus);
+ terminals = List_append(terminals_geneplus,terminals_geneminus);
hits5 = List_append(hits_geneplus_5,hits_geneminus_5);
hits3 = List_append(hits_geneplus_3,hits_geneminus_3);
@@ -21051,7 +21111,7 @@ paired_read_tolerant_nonstranded (int *npaths_primary, int *npaths_altloc, int *
consolidate_paired_results(&(*npaths_primary),&(*npaths_altloc),&(*first_absmq),&(*second_absmq),&(*final_pairtype),
&(*stage3array5),&(*nhits5_primary),&(*nhits5_altloc),&(*first_absmq5),&(*second_absmq5),
&(*stage3array3),&(*nhits3_primary),&(*nhits3_altloc),&(*first_absmq3),&(*second_absmq3),
- hitpairs,samechr,conc_transloc,hits5,hits3,
+ hitpairs,samechr,conc_transloc,terminals,hits5,hits3,
query5_compress_fwd,query5_compress_rev,query3_compress_fwd,query3_compress_rev,
queryseq5,queryuc_ptr_5,quality_string_5,querylength5,
queryseq3,queryuc_ptr_3,quality_string_3,querylength3,
diff --git a/src/stage3hr.c b/src/stage3hr.c
index c57ca33..eecca77 100644
--- a/src/stage3hr.c
+++ b/src/stage3hr.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: stage3hr.c 190013 2016-05-17 23:46:20Z twu $";
+static char rcsid[] = "$Id: stage3hr.c 191133 2016-06-03 17:26:49Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -952,6 +952,11 @@ Stage3end_trim_right_raw (T this) {
}
int
+Stage3end_total_trim (T this) {
+ return this->trim_left + start_amb_length(this) + this->trim_right + end_amb_length(this);
+}
+
+int
Stage3end_circularpos (T this) {
return this->circularpos;
}
@@ -1877,6 +1882,11 @@ Stage3pair_pairlength (Stage3pair_T this) {
}
int
+Stage3pair_total_trim (Stage3pair_T this) {
+ return Stage3end_total_trim(this->hit5) + Stage3end_total_trim(this->hit3);
+}
+
+int
Stage3pair_nmatches_posttrim (int *nmatches5, int *nmatches3, Stage3pair_T this) {
*nmatches5 = this->hit5->nmatches_posttrim;
*nmatches3 = this->hit3->nmatches_posttrim;
@@ -17009,11 +17019,10 @@ Stage3pair_remove_circular_alias (List_T hitpairlist) {
static List_T
pair_up_concordant_aux (bool *abort_pairing_p, int *found_score, int *nconcordant, int *nsamechr,
- List_T *samechr, List_T *conc_transloc, List_T hitpairs,
+ List_T *samechr, List_T *conc_transloc, List_T *terminals, List_T hitpairs,
T **hits5_plus, int *nhits5_plus, T **hits5_minus, int *nhits5_minus,
T **hits3_plus, int *nhits3_plus, T **hits3_minus, int *nhits3_minus,
bool *sorted5p, bool *sorted3p, int cutoff_level_5, int cutoff_level_3,
-
int querylength5, int querylength3, int maxpairedpaths, int genestrand) {
int new_found_score = *found_score;
int frontier_score, score5_start, score5_end, score5, score3, i, j;
@@ -17141,18 +17150,11 @@ pair_up_concordant_aux (bool *abort_pairing_p, int *found_score, int *nconcordan
/*private5p*/false,/*private3p*/false,/*expect_concordant_p*/true)) != NULL) {
debug5(printf("Have new pair with scores %d + %d, compared with new_found_score %d\n",hit5->score,hit3->score,new_found_score));
-#if 0
- /* Doesn't work with new substrings */
- if (start_amb_length(hit5) > 0 || end_amb_length(hit5) > 0 ||
- start_amb_length(hit3) > 0 || end_amb_length(hit3) > 0) {
- /* Don't use ambiguous splices to update found_score*/
- hitpairs = List_push(hitpairs,(void *) stage3pair);
- (*nconcordant)++;
-
- } /*else*/
-#endif
+ if (Stage3pair_total_trim(stage3pair) > 15) {
+ /* Don't use terminals to set new_found_score */
+ *terminals = List_push(*terminals,(void *) stage3pair);
- if (hit5->hittype == GMAP || hit3->hittype == GMAP) {
+ } else if (hit5->hittype == GMAP || hit3->hittype == GMAP) {
/* Don't use GMAP scores to set new_found_score */
hitpairs = List_push(hitpairs,(void *) stage3pair);
(*nconcordant)++;
@@ -17267,18 +17269,11 @@ pair_up_concordant_aux (bool *abort_pairing_p, int *found_score, int *nconcordan
/*private5p*/false,/*private3p*/false,/*expect_concordant_p*/true)) != NULL) {
debug5(printf("Have new pair with scores %d + %d, compared with new_found_score %d\n",hit5->score,hit3->score,new_found_score));
-#if 0
- /* Doesn't work with new substrings */
- if (start_amb_length(hit5) > 0 || end_amb_length(hit5) > 0 ||
- start_amb_length(hit3) > 0 || end_amb_length(hit3) > 0) {
- /* Don't use ambiguous splices to update found_score*/
- hitpairs = List_push(hitpairs,(void *) stage3pair);
- (*nconcordant)++;
-
- } /*else*/
-#endif
+ if (Stage3pair_total_trim(stage3pair) > 15) {
+ /* Don't use terminals to set new_found_score */
+ *terminals = List_push(*terminals,(void *) stage3pair);
- if (hit5->hittype == GMAP || hit3->hittype == GMAP) {
+ } else if (hit5->hittype == GMAP || hit3->hittype == GMAP) {
/* Don't use GMAP scores to set new_found_score */
hitpairs = List_push(hitpairs,(void *) stage3pair);
(*nconcordant)++;
@@ -17590,7 +17585,7 @@ sort_hits_by_trimmed_score (T **hits_plus, T **hits_minus, int *nhits_plus, int
/* Finds concordant pairs if nconcordant is 0 */
List_T
Stage3_pair_up_concordant (bool *abort_pairing_p, int *found_score, int *nconcordant, int *nsamechr,
- List_T *samechr, List_T *conc_transloc,
+ List_T *samechr, List_T *conc_transloc, List_T *terminals,
List_T hitpairs, List_T *hitarray5, int narray5, List_T *hitarray3, int narray3,
int cutoff_level_5, int cutoff_level_3,
int querylength5, int querylength3, int maxpairedpaths, int genestrand) {
@@ -17663,7 +17658,7 @@ Stage3_pair_up_concordant (bool *abort_pairing_p, int *found_score, int *nconcor
/* Look for concordant pairs among the non-terminals */
hitpairs = pair_up_concordant_aux(&(*abort_pairing_p),&(*found_score),&(*nconcordant),&(*nsamechr),
- &(*samechr),&(*conc_transloc),hitpairs,
+ &(*samechr),&(*conc_transloc),&(*terminals),hitpairs,
hits5_plus,nhits5_plus,hits5_minus,nhits5_minus,
hits3_plus,nhits3_plus,hits3_minus,nhits3_minus,
/*sorted5p*/sorted_hits5_p,/*sorted3p*/sorted_hits3_p,
@@ -17772,8 +17767,8 @@ Stage3_pair_up_concordant (bool *abort_pairing_p, int *found_score, int *nconcor
FREEA(nhits5_plus);
FREEA(nhits5_minus);
- debug5(printf("Finished with Stage3_pair_up_concordant: %d concordant, %d samechr, %d conc_transloc\n",
- List_length(hitpairs),List_length(*samechr),List_length(*conc_transloc)));
+ debug5(printf("Finished with Stage3_pair_up_concordant: %d concordant, %d samechr, %d conc_transloc, %d terminals\n",
+ List_length(hitpairs),List_length(*samechr),List_length(*conc_transloc),List_length(*terminals)));
return hitpairs;
}
diff --git a/src/stage3hr.h b/src/stage3hr.h
index 4556f1c..16e57f6 100644
--- a/src/stage3hr.h
+++ b/src/stage3hr.h
@@ -1,4 +1,4 @@
-/* $Id: stage3hr.h 188752 2016-05-01 17:28:22Z twu $ */
+/* $Id: stage3hr.h 191133 2016-06-03 17:26:49Z twu $ */
#ifndef STAGE3HR_INCLUDED
#define STAGE3HR_INCLUDED
@@ -134,6 +134,8 @@ Stage3end_trim_left_raw (T this);
extern int
Stage3end_trim_right_raw (T this);
extern int
+Stage3end_total_trim (T this);
+extern int
Stage3end_circularpos (T this);
@@ -273,6 +275,8 @@ Stage3pair_absmq_score (Stage3pair_T this);
extern Chrpos_T
Stage3pair_pairlength (Stage3pair_T this);
extern int
+Stage3pair_total_trim (Stage3pair_T this);
+extern int
Stage3pair_nmatches_posttrim (int *nmatches5, int *nmatches3, Stage3pair_T this);
extern bool
Stage3pair_concordantp (List_T hitpairs);
@@ -516,7 +520,7 @@ Stage3pair_remove_circular_alias (List_T hitpairlist);
extern List_T
Stage3_pair_up_concordant (bool *abort_pairing_p, int *found_score, int *nconcordant, int *nsamechr,
- List_T *samechr, List_T *conc_transloc,
+ List_T *samechr, List_T *conc_transloc, List_T *terminals,
List_T hitpairs, List_T *hitarray5, int narray5, List_T *hitarray3, int narray3,
int cutoff_level_5, int cutoff_level_3,
int querylength5, int querylength3, int maxpairedpaths, int genestrand);
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/gmap.git
More information about the debian-med-commit
mailing list