[med-svn] [gmap] 01/03: Imported Upstream version 2014-10-22
Andreas Tille
tille at debian.org
Sun Oct 26 07:23:35 UTC 2014
This is an automated email from the git hooks/post-receive script.
tille pushed a commit to branch master
in repository gmap.
commit 11ae4fff84668a7aa865d20e99c205b3cb5e82e2
Author: Andreas Tille <tille at debian.org>
Date: Sun Oct 26 08:20:27 2014 +0100
Imported Upstream version 2014-10-22
---
ChangeLog | 33 ++++++++++
VERSION | 2 +-
configure | 24 ++++----
src/bitpack64-write.c | 20 +++---
src/genome128_hr.c | 6 +-
src/gregion.c | 167 ++++++++++++++++++++++++++++++++++++++++----------
src/indexdb-write.c | 3 +-
src/sarray-read.c | 31 +++++++---
src/sarray-write.c | 3 +-
src/shortread.c | 92 +++++++++++++++------------
src/stage1hr.c | 3 +-
src/stage2.c | 134 +++++++++++++++++++---------------------
src/stage3hr.c | 57 ++++++++++++++---
13 files changed, 383 insertions(+), 192 deletions(-)
diff --git a/ChangeLog b/ChangeLog
index e24d61d..178c15e 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,36 @@
+2014-10-22 twu
+
+ * gregion.c: Checking size before deciding to use alloca or malloc.
+
+2014-10-16 twu
+
+ * stage2.c: Fixed an uninitialized variable in grand_fwd and grand_rev
+ procedures, plus the checks on maxintronlen in computing
+ grand_fwd_lookforward and grand_rev_lookforward.
+
+ * shortread.c: Allowing queryseq1 to be equal to SKIPPED. Removed unused
+ parameter acc from input_oneline routines.
+
+ * VERSION, index.html: Updated version number
+
+ * stage1hr.c: Added debugging statement
+
+ * sarray-read.c: Don't limit filling of best elt based on nmatches being
+ more than half of the read length
+
+ * stage3hr.c: Restored previous behavior where soft clips avoid
+ circularization
+
+ * indexdb-write.c, sarray-write.c: Removed unnecessary includes of
+ popcount.h
+
+ * bitpack64-write.c, genome128_hr.c: In lookups of clz_table, removing the
+ intermediate variable "top".
+
+ * stage3hr.c: Not allowing soft-clipping at ends to avoid circularization.
+ Added pre-processor macro SOFT_CLIPS_AVOID_CIRCULARIZATION to preserve
+ previous code.
+
2014-10-15 twu
* VERSION, config.site.rescomp.prd, config.site.rescomp.tst, index.html:
diff --git a/VERSION b/VERSION
index 9594e69..0fab9d5 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-2014-10-15
\ No newline at end of file
+2014-10-22
\ No newline at end of file
diff --git a/configure b/configure
index b77f154..290e333 100755
--- a/configure
+++ b/configure
@@ -1,6 +1,6 @@
#! /bin/sh
# Guess values for system-dependent variables and create Makefiles.
-# Generated by GNU Autoconf 2.63 for gmap 2014-10-15.
+# Generated by GNU Autoconf 2.63 for gmap 2014-10-22.
#
# Report bugs to <Thomas Wu <twu at gene.com>>.
#
@@ -745,8 +745,8 @@ SHELL=${CONFIG_SHELL-/bin/sh}
# Identity of this package.
PACKAGE_NAME='gmap'
PACKAGE_TARNAME='gmap'
-PACKAGE_VERSION='2014-10-15'
-PACKAGE_STRING='gmap 2014-10-15'
+PACKAGE_VERSION='2014-10-22'
+PACKAGE_STRING='gmap 2014-10-22'
PACKAGE_BUGREPORT='Thomas Wu <twu at gene.com>'
ac_unique_file="src/gmap.c"
@@ -1508,7 +1508,7 @@ if test "$ac_init_help" = "long"; then
# Omit some internal or obsolete options to make the list less imposing.
# This message is too long to be a string in the A/UX 3.1 sh.
cat <<_ACEOF
-\`configure' configures gmap 2014-10-15 to adapt to many kinds of systems.
+\`configure' configures gmap 2014-10-22 to adapt to many kinds of systems.
Usage: $0 [OPTION]... [VAR=VALUE]...
@@ -1579,7 +1579,7 @@ fi
if test -n "$ac_init_help"; then
case $ac_init_help in
- short | recursive ) echo "Configuration of gmap 2014-10-15:";;
+ short | recursive ) echo "Configuration of gmap 2014-10-22:";;
esac
cat <<\_ACEOF
@@ -1711,7 +1711,7 @@ fi
test -n "$ac_init_help" && exit $ac_status
if $ac_init_version; then
cat <<\_ACEOF
-gmap configure 2014-10-15
+gmap configure 2014-10-22
generated by GNU Autoconf 2.63
Copyright (C) 1992, 1993, 1994, 1995, 1996, 1998, 1999, 2000, 2001,
@@ -1725,7 +1725,7 @@ cat >config.log <<_ACEOF
This file contains any messages produced by compilers while
running configure, to aid debugging if configure makes a mistake.
-It was created by gmap $as_me 2014-10-15, which was
+It was created by gmap $as_me 2014-10-22, which was
generated by GNU Autoconf 2.63. Invocation command line was
$ $0 $@
@@ -2095,8 +2095,8 @@ ac_compiler_gnu=$ac_cv_c_compiler_gnu
{ $as_echo "$as_me:$LINENO: checking package version" >&5
$as_echo_n "checking package version... " >&6; }
-{ $as_echo "$as_me:$LINENO: result: 2014-10-15" >&5
-$as_echo "2014-10-15" >&6; }
+{ $as_echo "$as_me:$LINENO: result: 2014-10-22" >&5
+$as_echo "2014-10-22" >&6; }
### Read defaults
@@ -4147,7 +4147,7 @@ fi
# Define the identity of the package.
PACKAGE=gmap
- VERSION=2014-10-15
+ VERSION=2014-10-22
cat >>confdefs.h <<_ACEOF
@@ -25945,7 +25945,7 @@ exec 6>&1
# report actual input values of CONFIG_FILES etc. instead of their
# values after options handling.
ac_log="
-This file was extended by gmap $as_me 2014-10-15, which was
+This file was extended by gmap $as_me 2014-10-22, which was
generated by GNU Autoconf 2.63. Invocation command line was
CONFIG_FILES = $CONFIG_FILES
@@ -26008,7 +26008,7 @@ Report bugs to <bug-autoconf at gnu.org>."
_ACEOF
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
ac_cs_version="\\
-gmap config.status 2014-10-15
+gmap config.status 2014-10-22
configured by $0, generated by GNU Autoconf 2.63,
with options \\"`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`\\"
diff --git a/src/bitpack64-write.c b/src/bitpack64-write.c
index 24ef7a1..6a98ec1 100644
--- a/src/bitpack64-write.c
+++ b/src/bitpack64-write.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: bitpack64-write.c 132144 2014-04-02 16:02:28Z twu $";
+static char rcsid[] = "$Id: bitpack64-write.c 151045 2014-10-16 19:08:17Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -17,11 +17,13 @@ static char rcsid[] = "$Id: bitpack64-write.c 132144 2014-04-02 16:02:28Z twu $"
#include "mem.h"
#include "assert.h"
#include "fopen.h"
+#include "popcount.h"
#ifdef HAVE_SSE2
#include <emmintrin.h>
#endif
+
/* #define ALLOW_ODD_PACKSIZES 1 */
/* #define USE_ONE_FILE_FOR_FIXED 1 */
@@ -4424,7 +4426,7 @@ static int
compute_q4_diffs_bidir (UINT4 *diffs, UINT4 *values) {
UINT4 packsize;
int i;
- UINT4 maxdiff = 0, top;
+ UINT4 maxdiff = 0;
int firstbit, msb;
#if 0
@@ -4460,7 +4462,7 @@ compute_q4_diffs_bidir (UINT4 *diffs, UINT4 *values) {
asm("bsr %1,%0" : "=r"(msb) : "r"(maxdiff));
packsize = msb + 1;
#else
- firstbit = ((top = maxdiff >> 16) ? clz_table[top] : 16 + clz_table[maxdiff]);
+ firstbit = ((maxdiff >> 16) ? clz_table[maxdiff >> 16] : 16 + clz_table[maxdiff]);
packsize = 32 - firstbit;
#endif
@@ -4477,7 +4479,7 @@ static int
compute_q1_diffs (UINT4 *diffs, UINT4 *values) {
UINT4 packsize;
int i;
- UINT4 maxdiff = 0, top;
+ UINT4 maxdiff = 0;
int firstbit, msb;
#if 0
@@ -4502,7 +4504,7 @@ compute_q1_diffs (UINT4 *diffs, UINT4 *values) {
asm("bsr %1,%0" : "=r"(msb) : "r"(maxdiff));
packsize = msb + 1;
#else
- firstbit = ((top = maxdiff >> 16) ? clz_table[top] : 16 + clz_table[maxdiff]);
+ firstbit = ((maxdiff >> 16) ? clz_table[maxdiff >> 16] : 16 + clz_table[maxdiff]);
packsize = 32 - firstbit;
#endif
@@ -4521,7 +4523,7 @@ static int
compute_q4_diffs_bidir_huge (UINT4 *diffs, UINT8 *values) {
UINT4 packsize;
int i;
- UINT4 maxdiff = 0, top;
+ UINT4 maxdiff = 0;
int firstbit, msb;
@@ -4552,7 +4554,7 @@ compute_q4_diffs_bidir_huge (UINT4 *diffs, UINT8 *values) {
asm("bsr %1,%0" : "=r"(msb) : "r"(maxdiff));
packsize = msb + 1;
#else
- firstbit = ((top = maxdiff >> 16) ? clz_table[top] : 16 + clz_table[maxdiff]);
+ firstbit = ((maxdiff >> 16) ? clz_table[maxdiff >> 16] : 16 + clz_table[maxdiff]);
packsize = 32 - firstbit;
#endif
@@ -5314,7 +5316,7 @@ Bitpack64_write_fixed10_huge (char *pagesfile, char *ptrsfile, char *compfile,
static int
compute_packsize (UINT4 *values) {
UINT4 packsize;
- UINT4 maxvalue = 0, top;
+ UINT4 maxvalue = 0;
int i;
int firstbit, msb;
@@ -5334,7 +5336,7 @@ compute_packsize (UINT4 *values) {
asm("bsr %1,%0" : "=r"(msb) : "r"(maxvalue));
packsize = msb + 1;
#else
- firstbit = ((top = maxvalue >> 16) ? clz_table[top] : 16 + clz_table[maxvalue]);
+ firstbit = ((maxvalue >> 16) ? clz_table[maxvalue >> 16] : 16 + clz_table[maxvalue]);
packsize = 32 - firstbit;
#endif
diff --git a/src/genome128_hr.c b/src/genome128_hr.c
index ffc91f9..d455af1 100644
--- a/src/genome128_hr.c
+++ b/src/genome128_hr.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: genome128_hr.c 148186 2014-09-18 16:35:33Z twu $";
+static char rcsid[] = "$Id: genome128_hr.c 151045 2014-10-16 19:08:17Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -18649,7 +18649,7 @@ print_diff_leading_zeroes (__m128i _diff, int offset) {
#elif defined(HAVE_BUILTIN_CLZ)
#define count_leading_zeroes(diff) __builtin_clz(diff)
#else
-#define count_leading_zeroes(diff) ((top = diff >> 16) ? clz_table[top] : 16 + clz_table[diff])
+#define count_leading_zeroes(diff) ((diff >> 16) ? clz_table[diff >> 16] : 16 + clz_table[diff])
#endif
#ifdef HAVE_BMI1
@@ -18714,7 +18714,7 @@ print_diff_leading_zeroes (UINT4 diff, int offset) {
#elif defined(HAVE_BUILTIN_CLZ)
#define count_leading_zeroes_32(diff) __builtin_clz(diff)
#else
-#define count_leading_zeroes_32(diff) ((top = diff >> 16) ? clz_table[top] : 16 + clz_table[diff])
+#define count_leading_zeroes_32(diff) ((diff >> 16) ? clz_table[diff >> 16] : 16 + clz_table[diff])
#endif
#ifdef HAVE_BMI1
diff --git a/src/gregion.c b/src/gregion.c
index b0d7f98..c197605 100644
--- a/src/gregion.c
+++ b/src/gregion.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: gregion.c 145990 2014-08-25 21:47:32Z twu $";
+static char rcsid[] = "$Id: gregion.c 151507 2014-10-22 19:37:48Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -15,6 +15,9 @@ static char rcsid[] = "$Id: gregion.c 145990 2014-08-25 21:47:32Z twu $";
#define MAX_GENOMICLENGTH 2000000
+#define MAX_NCHRS_FOR_ALLOCA 100
+#define MAX_GREGIONS_FOR_ALLOCA 100
+
#define EXTRA_SHORTEND 30000
#define EXTRA_LONGEND 100000
@@ -516,10 +519,16 @@ Gregion_filter_unique (List_T gregionlist) {
}
);
- eliminate = (bool *) CALLOCA(n,sizeof(bool));
- array = (T *) MALLOCA(n * sizeof(T));
+ if (n < MAX_GREGIONS_FOR_ALLOCA) {
+ eliminate = (bool *) CALLOCA(n,sizeof(bool));
+ array = (T *) MALLOCA(n * sizeof(T));
+ List_fill_array_and_free((void **) array,&gregionlist);
+ } else {
+ eliminate = (bool *) CALLOC(n,sizeof(bool));
+ array = (T *) List_to_array(gregionlist,NULL);
+ List_free(&gregionlist);
+ }
- List_fill_array_and_free((void **) array,&gregionlist);
qsort(array,n,sizeof(T),weight_cmp);
for (i = 0; i < n; i++) {
@@ -559,8 +568,13 @@ Gregion_filter_unique (List_T gregionlist) {
}
}
- FREEA(eliminate);
- FREEA(array);
+ if (n < MAX_GREGIONS_FOR_ALLOCA) {
+ FREEA(eliminate);
+ FREEA(array);
+ } else {
+ FREE(eliminate);
+ FREE(array);
+ }
debug(
for (p = unique, i = 0; p != NULL; p = p->rest, i++) {
@@ -831,16 +845,29 @@ Gregion_sort_low_descending (List_T gregions) {
if ((n = List_length(gregions)) == 0) {
return (List_T) NULL;
- } else {
+
+ } else if (n < MAX_GREGIONS_FOR_ALLOCA) {
array = (Gregion_T *) MALLOCA(n * sizeof(Gregion_T));
List_fill_array((void **) array,gregions);
+
qsort(array,n,sizeof(Gregion_T),Gregion_low_descending_cmp);
-
for (i = n-1; i >= 0; i--) {
sorted = List_push(sorted,array[i]);
}
+
FREEA(array);
return sorted;
+
+ } else {
+ array = (Gregion_T *) List_to_array(gregions,NULL);
+
+ qsort(array,n,sizeof(Gregion_T),Gregion_low_descending_cmp);
+ for (i = n-1; i >= 0; i--) {
+ sorted = List_push(sorted,array[i]);
+ }
+
+ FREE(array);
+ return sorted;
}
}
@@ -854,16 +881,29 @@ Gregion_sort_high_ascending (List_T gregions) {
if ((n = List_length(gregions)) == 0) {
return (List_T) NULL;
- } else {
+
+ } else if (n < MAX_GREGIONS_FOR_ALLOCA) {
array = (Gregion_T *) MALLOCA(n * sizeof(Gregion_T));
List_fill_array((void **) array,gregions);
+
qsort(array,n,sizeof(Gregion_T),Gregion_high_ascending_cmp);
-
for (i = n-1; i >= 0; i--) {
sorted = List_push(sorted,array[i]);
}
+
FREEA(array);
return sorted;
+
+ } else {
+ array = (Gregion_T *) List_to_array(gregions,NULL);
+
+ qsort(array,n,sizeof(Gregion_T),Gregion_high_ascending_cmp);
+ for (i = n-1; i >= 0; i--) {
+ sorted = List_push(sorted,array[i]);
+ }
+
+ FREE(array);
+ return sorted;
}
}
@@ -879,6 +919,7 @@ struct Base_T {
bool usedp;
};
+#if 0
static void
Base_free (Base_T *old) {
if ((*old)->gregions != NULL) {
@@ -887,6 +928,7 @@ Base_free (Base_T *old) {
FREE(*old);
return;
}
+#endif
static Base_T
@@ -953,8 +995,12 @@ compute_ceilings (Uinttable_T low_basetable) {
int n, i;
n = Uinttable_length(low_basetable);
- keys = (Chrpos_T *) MALLOCA(n * sizeof(Chrpos_T));
- Uinttable_fill_keys(keys,low_basetable,/*sortp*/true);
+ if (n < MAX_NCHRS_FOR_ALLOCA) {
+ keys = (Chrpos_T *) MALLOCA(n * sizeof(Chrpos_T));
+ Uinttable_fill_keys(keys,low_basetable,/*sortp*/true);
+ } else {
+ keys = Uinttable_keys(low_basetable,/*sortp*/true);
+ }
debug2(printf("low_basetable has %d entries\n",n));
prevpos = 0U;
@@ -1045,7 +1091,11 @@ compute_ceilings (Uinttable_T low_basetable) {
}
}
- FREEA(keys);
+ if (n < MAX_NCHRS_FOR_ALLOCA) {
+ FREEA(keys);
+ } else {
+ FREE(keys);
+ }
return bestprevpos;
}
@@ -1067,8 +1117,12 @@ compute_floors (Uinttable_T high_basetable) {
int n, i;
n = Uinttable_length(high_basetable);
- keys = (Chrpos_T *) MALLOCA(n * sizeof(Chrpos_T));
- Uinttable_fill_keys(keys,high_basetable,/*sortp*/true);
+ if (n < MAX_NCHRS_FOR_ALLOCA) {
+ keys = (Chrpos_T *) MALLOCA(n * sizeof(Chrpos_T));
+ Uinttable_fill_keys(keys,high_basetable,/*sortp*/true);
+ } else {
+ keys = Uinttable_keys(high_basetable,/*sortp*/true);
+ }
debug2(printf("high_basetable has %d entries\n",n));
prevpos = (Chrpos_T) -1U;
@@ -1158,7 +1212,11 @@ compute_floors (Uinttable_T high_basetable) {
}
}
- FREEA(keys);
+ if (n < MAX_NCHRS_FOR_ALLOCA) {
+ FREEA(keys);
+ } else {
+ FREE(keys);
+ }
return bestprevpos;
}
@@ -1173,8 +1231,12 @@ traceback_ceilings (Uinttable_T low_basetable, Chrpos_T prevpos) {
int n, i;
n = Uinttable_length(low_basetable);
- keys = (Chrpos_T *) MALLOCA(n * sizeof(Chrpos_T));
- Uinttable_fill_keys(keys,low_basetable,/*sortp*/true);
+ if (n < MAX_NCHRS_FOR_ALLOCA) {
+ keys = (Chrpos_T *) MALLOCA(n * sizeof(Chrpos_T));
+ Uinttable_fill_keys(keys,low_basetable,/*sortp*/true);
+ } else {
+ keys = Uinttable_keys(low_basetable,/*sortp*/true);
+ }
ceiling = (Chrpos_T) -1U;
@@ -1205,7 +1267,11 @@ traceback_ceilings (Uinttable_T low_basetable, Chrpos_T prevpos) {
i--;
}
- FREEA(keys);
+ if (n < MAX_NCHRS_FOR_ALLOCA) {
+ FREEA(keys);
+ } else {
+ FREE(keys);
+ }
return;
}
@@ -1220,8 +1286,12 @@ traceback_floors (Uinttable_T high_basetable, Chrpos_T prevpos) {
int n, i;
n = Uinttable_length(high_basetable);
- keys = (Chrpos_T *) MALLOCA(n * sizeof(Chrpos_T));
- Uinttable_fill_keys(keys,high_basetable,/*sortp*/true);
+ if (n < MAX_NCHRS_FOR_ALLOCA) {
+ keys = (Chrpos_T *) MALLOCA(n * sizeof(Chrpos_T));
+ Uinttable_fill_keys(keys,high_basetable,/*sortp*/true);
+ } else {
+ keys = Uinttable_keys(high_basetable,/*sortp*/true);
+ }
floor = 0U;
@@ -1252,7 +1322,11 @@ traceback_floors (Uinttable_T high_basetable, Chrpos_T prevpos) {
i++;
}
- FREEA(keys);
+ if (n < MAX_NCHRS_FOR_ALLOCA) {
+ FREEA(keys);
+ } else {
+ FREE(keys);
+ }
return;
}
@@ -1268,8 +1342,12 @@ bound_gregions (Uinttable_T low_basetable, Uinttable_T high_basetable) {
int n, i;
n = Uinttable_length(low_basetable);
- keys = (Chrpos_T *) MALLOCA(n * sizeof(Chrpos_T));
- Uinttable_fill_keys(keys,low_basetable,/*sortp*/true);
+ if (n < MAX_NCHRS_FOR_ALLOCA) {
+ keys = (Chrpos_T *) MALLOCA(n * sizeof(Chrpos_T));
+ Uinttable_fill_keys(keys,low_basetable,/*sortp*/true);
+ } else {
+ keys = Uinttable_keys(low_basetable,/*sortp*/true);
+ }
for (i = 0; i < n; i++) {
base_low = (Base_T) Uinttable_get(low_basetable,keys[i]);
@@ -1288,12 +1366,20 @@ bound_gregions (Uinttable_T low_basetable, Uinttable_T high_basetable) {
}
}
- FREEA(keys);
+ if (n < MAX_NCHRS_FOR_ALLOCA) {
+ FREEA(keys);
+ } else {
+ FREE(keys);
+ }
n = Uinttable_length(high_basetable);
- keys = (Chrpos_T *) MALLOCA(n * sizeof(Chrpos_T));
- Uinttable_fill_keys(keys,high_basetable,/*sortp*/true);
+ if (n < MAX_NCHRS_FOR_ALLOCA) {
+ keys = (Chrpos_T *) MALLOCA(n * sizeof(Chrpos_T));
+ Uinttable_fill_keys(keys,high_basetable,/*sortp*/true);
+ } else {
+ keys = Uinttable_keys(high_basetable,/*sortp*/true);
+ }
for (i = n-1; i >= 0; i--) {
base_high = (Base_T) Uinttable_get(high_basetable,keys[i]);
@@ -1312,7 +1398,11 @@ bound_gregions (Uinttable_T low_basetable, Uinttable_T high_basetable) {
}
}
- FREEA(keys);
+ if (n < MAX_NCHRS_FOR_ALLOCA) {
+ FREEA(keys);
+ } else {
+ FREE(keys);
+ }
return;
}
@@ -1328,10 +1418,11 @@ Gregion_filter_clean (List_T gregionlist, int nchrs) {
Chrpos_T prevpos;
Chrnum_T chrnum;
- List_T unique = NULL, p;
+ List_T p;
T gregion;
int n;
#if 0
+ List_T unique = NULL;
T x, y, *array;
int i, j;
bool *eliminate;
@@ -1353,8 +1444,13 @@ Gregion_filter_clean (List_T gregionlist, int nchrs) {
}
);
- low_basetables = (Uinttable_T *) CALLOCA(nchrs+1,sizeof(Uinttable_T));
- high_basetables = (Uinttable_T *) CALLOCA(nchrs+1,sizeof(Uinttable_T));
+ if (nchrs < MAX_NCHRS_FOR_ALLOCA) {
+ low_basetables = (Uinttable_T *) CALLOCA(nchrs+1,sizeof(Uinttable_T));
+ high_basetables = (Uinttable_T *) CALLOCA(nchrs+1,sizeof(Uinttable_T));
+ } else {
+ low_basetables = (Uinttable_T *) CALLOC(nchrs+1,sizeof(Uinttable_T));
+ high_basetables = (Uinttable_T *) CALLOC(nchrs+1,sizeof(Uinttable_T));
+ }
for (p = gregionlist; p != NULL; p = List_next(p)) {
gregion = (T) List_head(p);
@@ -1395,8 +1491,13 @@ Gregion_filter_clean (List_T gregionlist, int nchrs) {
/* Todo: Free each table */
- FREEA(high_basetables);
- FREEA(low_basetables);
+ if (nchrs < MAX_NCHRS_FOR_ALLOCA) {
+ FREEA(high_basetables);
+ FREEA(low_basetables);
+ } else {
+ FREE(high_basetables);
+ FREE(low_basetables);
+ }
#if 0
eliminate = (bool *) CALLOC(n,sizeof(bool));
diff --git a/src/indexdb-write.c b/src/indexdb-write.c
index f9fe333..4c3792e 100644
--- a/src/indexdb-write.c
+++ b/src/indexdb-write.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: indexdb-write.c 132144 2014-04-02 16:02:28Z twu $";
+static char rcsid[] = "$Id: indexdb-write.c 151046 2014-10-16 19:08:41Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -60,7 +60,6 @@ static char rcsid[] = "$Id: indexdb-write.c 132144 2014-04-02 16:02:28Z twu $";
#include "indexdbdef.h"
#include "iit-read-univ.h"
#include "indexdb.h"
-#include "popcount.h"
#include "bitpack64-read.h"
#include "bitpack64-write.h"
diff --git a/src/sarray-read.c b/src/sarray-read.c
index aed9a11..69638d0 100644
--- a/src/sarray-read.c
+++ b/src/sarray-read.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: sarray-read.c 150216 2014-10-07 23:53:59Z twu $";
+static char rcsid[] = "$Id: sarray-read.c 151053 2014-10-16 19:57:23Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -1499,8 +1499,14 @@ Elt_fill_positions_all (Elt_T this, T sarray) {
Univcoord_T pos;
int i;
- if (this->positions_allocated == NULL) {
+ debug(printf("Entering Elt_fill_positions_all on %p\n",this));
+ if (this->positions_allocated != NULL) {
+ debug(printf(" positions_allocated is already non-NULL, so skipping\n"));
+
+ } else {
this->npositions = this->finalptr - this->initptr + 1;
+ debug(printf(" filling %d positions\n",this->npositions));
+
if (this->nmatches == 0 || this->npositions > EXCESS_SARRAY_HITS) {
this->positions_allocated = this->positions = (Univcoord_T *) NULL;
this->npositions = 0;
@@ -2394,8 +2400,9 @@ static void
Elt_dump (Elt_T elt) {
int k;
+ printf("Elt with %d positions:\n",elt->npositions);
for (k = 0; k < elt->npositions; k++) {
- printf("%d..%d:%u\n",elt->querystart,elt->queryend,elt->positions[k]);
+ printf(" %d..%d:%u\n",elt->querystart,elt->queryend,elt->positions[k]);
}
printf("\n");
@@ -3836,7 +3843,8 @@ Sarray_search_greedy (int *found_score, List_T *subs, List_T *indels, List_T *am
sarray_search(&initptr,&finalptr,&successp,&nmatches,&(queryuc_ptr[halfwaypos]),
querylength - halfwaypos,/*queryoffset*/halfwaypos,
query_compress_fwd,plus_sarray,/*plusp*/true,genestrand,first_read_p,plus_conversion);
- if (nmatches >= querylength - halfwaypos) {
+ /* Don't want to limit based on nmatches */
+ if (1 || nmatches >= querylength - halfwaypos) {
elt = Elt_new(halfwaypos,nmatches,initptr,finalptr);
Elt_fill_positions_all(elt,plus_sarray);
for (i = 0; i < elt->npositions; i++) {
@@ -3894,7 +3902,8 @@ Sarray_search_greedy (int *found_score, List_T *subs, List_T *indels, List_T *am
sarray_search(&initptr,&finalptr,&successp,&nmatches,&(queryrc[halfwaypos]),
querylength - halfwaypos,/*queryoffset*/halfwaypos,
query_compress_rev,minus_sarray,/*plusp*/false,genestrand,first_read_p,minus_conversion);
- if (nmatches >= querylength - halfwaypos) {
+ /* Don't want to limit based on nmatches */
+ if (1 || nmatches >= querylength - halfwaypos) {
elt = Elt_new(halfwaypos,nmatches,initptr,finalptr);
Elt_fill_positions_all(elt,minus_sarray);
for (i = 0; i < elt->npositions; i++) {
@@ -3946,11 +3955,12 @@ Sarray_search_greedy (int *found_score, List_T *subs, List_T *indels, List_T *am
debug(printf("plus_querypos %d vs querylength %d\n",plus_querypos,querylength));
if (nmatches <= best_plus_nmatches) {
/* Initial (left) elt was best */
- debug(printf("Initial elt was best\n"));
+ debug(printf("Initial elt %p was best:\n",best_plus_elt));
plus_set = List_push(NULL,elt);
if (plus_querypos >= querylength) {
chrhigh = 0U;
Elt_fill_positions_all(best_plus_elt,plus_sarray);
+ debug(Elt_dump(best_plus_elt));
for (i = 0; i < best_plus_elt->npositions; i++) {
left = best_plus_elt->positions[i];
if (left > chrhigh) {
@@ -3972,13 +3982,14 @@ Sarray_search_greedy (int *found_score, List_T *subs, List_T *indels, List_T *am
}
} else {
/* Second (right) elt is best */
- debug(printf("Second elt is best\n"));
+ debug(printf("Second elt %p is best:\n",elt));
plus_set = List_push(NULL,best_plus_elt);
best_plus_elt = elt;
best_plus_nmatches = nmatches;
if (plus_querypos >= querylength) {
chrhigh = 0U;
Elt_fill_positions_all(best_plus_elt,plus_sarray);
+ debug(Elt_dump(best_plus_elt));
for (i = 0; i < best_plus_elt->npositions; i++) {
left = best_plus_elt->positions[i];
if (left > chrhigh) {
@@ -4022,11 +4033,12 @@ Sarray_search_greedy (int *found_score, List_T *subs, List_T *indels, List_T *am
debug(printf("minus_querypos %d vs querylength %d\n",minus_querypos,querylength));
if (nmatches <= best_minus_nmatches) {
/* Initial (left) elt was best */
- debug(printf("Initial elt was best\n"));
+ debug(printf("Initial elt %p was best:\n",best_minus_elt));
minus_set = List_push(NULL,elt);
if (minus_querypos >= querylength) {
chrhigh = 0U;
Elt_fill_positions_all(best_minus_elt,minus_sarray);
+ debug(Elt_dump(best_minus_elt));
for (i = 0; i < best_minus_elt->npositions; i++) {
left = best_minus_elt->positions[i];
if (left > chrhigh) {
@@ -4048,13 +4060,14 @@ Sarray_search_greedy (int *found_score, List_T *subs, List_T *indels, List_T *am
}
} else {
/* Second (right) elt is best */
- debug(printf("Second elt is best\n"));
+ debug(printf("Second elt %p is best:\n",elt));
minus_set = List_push(NULL,best_minus_elt);
best_minus_elt = elt;
best_minus_nmatches = nmatches;
if (minus_querypos >= querylength) {
chrhigh = 0U;
Elt_fill_positions_all(best_minus_elt,minus_sarray);
+ debug(Elt_dump(best_minus_elt));
for (i = 0; i < best_minus_elt->npositions; i++) {
left = best_minus_elt->positions[i];
if (left > chrhigh) {
diff --git a/src/sarray-write.c b/src/sarray-write.c
index 5c2aa6e..68c6d20 100644
--- a/src/sarray-write.c
+++ b/src/sarray-write.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: sarray-write.c 140591 2014-07-03 16:08:23Z twu $";
+static char rcsid[] = "$Id: sarray-write.c 151046 2014-10-16 19:08:41Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -24,7 +24,6 @@ static char rcsid[] = "$Id: sarray-write.c 140591 2014-07-03 16:08:23Z twu $";
#include "genome128_hr.h"
#include "uintlist.h"
#include "intlist.h"
-#include "popcount.h"
#ifdef WORDS_BIGENDIAN
diff --git a/src/shortread.c b/src/shortread.c
index 110996a..c7251de 100644
--- a/src/shortread.c
+++ b/src/shortread.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: shortread.c 150681 2014-10-14 01:47:18Z twu $";
+static char rcsid[] = "$Id: shortread.c 151067 2014-10-16 21:09:00Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -903,10 +903,10 @@ skip_header_bzip2 (Bzip2_T fp) {
-#if 1
#define CONTROLM 13 /* From PC */
#define SPACE 32
+#if 0
static char *
find_bad_char (char *line) {
char *first, *p1, *p2;
@@ -946,6 +946,7 @@ find_bad_char (char *line) {
return first;
}
}
+#endif
static char *
find_spaces (int *nspaces, char *line) {
@@ -965,12 +966,11 @@ find_spaces (int *nspaces, char *line) {
return first;
}
}
-#endif
static int
-input_oneline (int *nextchar, char **longstring, char *Start, FILE *fp, char *acc, bool possible_fasta_header_p) {
+input_oneline (int *nextchar, char **longstring, char *Start, FILE *fp, bool possible_fasta_header_p) {
int remainder;
char *ptr, *p = NULL;
int strlenp, nspaces;
@@ -1060,7 +1060,7 @@ input_oneline (int *nextchar, char **longstring, char *Start, FILE *fp, char *ac
#ifdef HAVE_ZLIB
static int
-input_oneline_gzip (int *nextchar, char **longstring, char *Start, gzFile fp, char *acc, bool possible_fasta_header_p) {
+input_oneline_gzip (int *nextchar, char **longstring, char *Start, gzFile fp, bool possible_fasta_header_p) {
int remainder;
char *ptr, *p = NULL;
int strlenp, nspaces;
@@ -1148,7 +1148,7 @@ input_oneline_gzip (int *nextchar, char **longstring, char *Start, gzFile fp, ch
#ifdef HAVE_BZLIB
static int
-input_oneline_bzip2 (int *nextchar, char **longstring, char *Start, Bzip2_T fp, char *acc, bool possible_fasta_header_p) {
+input_oneline_bzip2 (int *nextchar, char **longstring, char *Start, Bzip2_T fp, bool possible_fasta_header_p) {
int remainder;
char *ptr, *p = NULL;
int strlenp, nspaces;
@@ -2189,7 +2189,7 @@ Shortread_read_fasta_shortreads (int *nextchar, T *queryseq2, FILE **input1, FIL
/* Process blank lines and loop again */
while (*nextchar != EOF && ((*nextchar = fgetc(*input1)) != '>')) {
}
- } else if ((fulllength1 = input_oneline(&(*nextchar),&long_read_1,&(Read1[0]),*input1,acc,
+ } else if ((fulllength1 = input_oneline(&(*nextchar),&long_read_1,&(Read1[0]),*input1,
/*possible_fasta_header_p*/true)) == 0) {
/* fprintf(stderr,"length is zero\n"); */
/* No sequence1. Don't process, but loop again */
@@ -2197,14 +2197,14 @@ Shortread_read_fasta_shortreads (int *nextchar, T *queryseq2, FILE **input1, FIL
} else {
/* queryseq1 is in Read1 */
/* See what is in next line */
- if ((fulllength2 = input_oneline(&(*nextchar),&long_read_2,&(Read2[0]),*input1,acc,
+ if ((fulllength2 = input_oneline(&(*nextchar),&long_read_2,&(Read2[0]),*input1,
/*possible_fasta_header_p*/true)) > 0) {
/* Paired-end, single file. queryseq1 is in Read1 and queryseq2 is in Read2 */
if (*nextchar == '+') {
/* Paired-end with quality strings */
skip_header(*input1);
*nextchar = fgetc(*input1);
- quality_length = input_oneline(&(*nextchar),&long_quality,&(Quality[0]),*input1,acc,
+ quality_length = input_oneline(&(*nextchar),&long_quality,&(Quality[0]),*input1,
/*possible_fasta_header_p*/false);
if (quality_length != fulllength1) {
fprintf(stderr,"Length %d of quality score differs from length %d of nucleotides in sequence %s\n",
@@ -2215,7 +2215,7 @@ Shortread_read_fasta_shortreads (int *nextchar, T *queryseq2, FILE **input1, FIL
queryseq1 = Shortread_new(acc,restofheader,filterp,Read1,long_read_1,fulllength1,
Quality,long_quality,quality_length,barcode_length,
invert_first_p,/*copy_acc_p*/false,skipp);
- quality_length = input_oneline(&(*nextchar),&long_quality,&(Quality[0]),*input1,acc,
+ quality_length = input_oneline(&(*nextchar),&long_quality,&(Quality[0]),*input1,
/*possible_fasta_header_p*/false);
if (quality_length != fulllength2) {
fprintf(stderr,"Length %d of quality score differs from length %d of nucleotides in sequence %s\n",
@@ -2255,7 +2255,7 @@ Shortread_read_fasta_shortreads (int *nextchar, T *queryseq2, FILE **input1, FIL
while (nextchar2 != EOF && ((nextchar2 = fgetc(*input2)) != '>')) {
}
(*queryseq2) = (T) NULL;
- } else if ((fulllength2 = input_oneline(&nextchar2,&long_read_2,&(Read2[0]),*input2,acc2,
+ } else if ((fulllength2 = input_oneline(&nextchar2,&long_read_2,&(Read2[0]),*input2,
/*possible_fasta_header_p*/true)) == 0) {
/* fprintf(stderr,"length is zero\n"); */
/* No sequence1. Don't process, but loop again */
@@ -2266,7 +2266,7 @@ Shortread_read_fasta_shortreads (int *nextchar, T *queryseq2, FILE **input1, FIL
/* End 1 with a quality string */
skip_header(*input1);
*nextchar = fgetc(*input1);
- quality_length = input_oneline(&(*nextchar),&long_quality,&(Quality[0]),*input1,acc,
+ quality_length = input_oneline(&(*nextchar),&long_quality,&(Quality[0]),*input1,
/*possible_fasta_header_p*/false);
if (quality_length != fulllength1) {
fprintf(stderr,"Length %d of quality score differs from length %d of nucleotides in sequence %s\n",
@@ -2288,7 +2288,7 @@ Shortread_read_fasta_shortreads (int *nextchar, T *queryseq2, FILE **input1, FIL
/* End 2 with a quality string */
skip_header(*input2);
nextchar2 = fgetc(*input2);
- quality_length = input_oneline(&nextchar2,&long_quality,&(Quality[0]),*input2,acc2,
+ quality_length = input_oneline(&nextchar2,&long_quality,&(Quality[0]),*input2,
/*possible_fasta_header_p*/false);
if (quality_length != fulllength2) {
fprintf(stderr,"Length %d of quality score differs from length %d of nucleotides in sequence %s\n",
@@ -2318,7 +2318,7 @@ Shortread_read_fasta_shortreads (int *nextchar, T *queryseq2, FILE **input1, FIL
/* Single-end with a quality string */
skip_header(*input1);
*nextchar = fgetc(*input1);
- quality_length = input_oneline(&(*nextchar),&long_quality,&(Quality[0]),*input1,acc,
+ quality_length = input_oneline(&(*nextchar),&long_quality,&(Quality[0]),*input1,
/*possible_fasta_header_p*/false);
if (quality_length != fulllength1) {
fprintf(stderr,"Length %d of quality score differs from length %d of nucleotides in sequence %s\n",
@@ -2341,7 +2341,9 @@ Shortread_read_fasta_shortreads (int *nextchar, T *queryseq2, FILE **input1, FIL
debug(printf("Returning queryseq with contents %s\n",queryseq1->contents));
- if (queryseq1 != NULL && queryseq1->acc != NULL && queryseq1->fulllength > 0) {
+ if (queryseq1 == (T) SKIPPED) {
+ return (T) SKIPPED;
+ } else if (queryseq1 != NULL && queryseq1->acc != NULL && queryseq1->fulllength > 0) {
return queryseq1;
}
}
@@ -2358,7 +2360,7 @@ Shortread_read_fasta_shortreads_gzip (int *nextchar, T *queryseq2, gzFile *input
int barcode_length, bool invert_first_p, bool invert_second_p) {
T queryseq1;
char *acc, *restofheader, *acc2, *restofheader2;
- char *long_read_1, *long_read_2, *long_quality;
+ char *long_read_1, *long_read_2;
int nextchar2;
int fulllength1, fulllength2;
bool filterp;
@@ -2429,7 +2431,7 @@ Shortread_read_fasta_shortreads_gzip (int *nextchar, T *queryseq2, gzFile *input
/* Process blank lines and loop again */
while (*nextchar != EOF && ((*nextchar = gzgetc(*input1)) != '>')) {
}
- } else if ((fulllength1 = input_oneline_gzip(&(*nextchar),&long_read_1,&(Read1[0]),*input1,acc,
+ } else if ((fulllength1 = input_oneline_gzip(&(*nextchar),&long_read_1,&(Read1[0]),*input1,
/*possible_fasta_header_p*/true)) == 0) {
/* fprintf(stderr,"length is zero\n"); */
/* No sequence1. Don't process, but loop again */
@@ -2437,7 +2439,7 @@ Shortread_read_fasta_shortreads_gzip (int *nextchar, T *queryseq2, gzFile *input
} else {
/* queryseq1 is in Read1 */
/* See what is in next line */
- if ((fulllength2 = input_oneline_gzip(&(*nextchar),&long_read_2,&(Read2[0]),*input1,acc,
+ if ((fulllength2 = input_oneline_gzip(&(*nextchar),&long_read_2,&(Read2[0]),*input1,
/*possible_fasta_header_p*/true)) > 0) {
/* Paired-end, single file. queryseq1 is in Read1 and queryseq2 is in Read2 */
queryseq1 = Shortread_new(acc,restofheader,filterp,Read1,long_read_1,fulllength1,
@@ -2470,7 +2472,7 @@ Shortread_read_fasta_shortreads_gzip (int *nextchar, T *queryseq2, gzFile *input
while (nextchar2 != EOF && ((nextchar2 = gzgetc(*input2)) != '>')) {
}
(*queryseq2) = (T) NULL;
- } else if ((fulllength2 = input_oneline_gzip(&nextchar2,&long_read_2,&(Read2[0]),*input2,acc2,
+ } else if ((fulllength2 = input_oneline_gzip(&nextchar2,&long_read_2,&(Read2[0]),*input2,
/*possible_fasta_header_p*/true)) == 0) {
/* fprintf(stderr,"length is zero\n"); */
/* No sequence1. Don't process, but loop again */
@@ -2498,7 +2500,9 @@ Shortread_read_fasta_shortreads_gzip (int *nextchar, T *queryseq2, gzFile *input
debug(printf("Returning queryseq with contents %s\n",queryseq1->contents));
- if (queryseq1 != NULL && queryseq1->acc != NULL && queryseq1->fulllength > 0) {
+ if (queryseq1 == (T) SKIPPED) {
+ return (T) SKIPPED;
+ } else if (queryseq1 != NULL && queryseq1->acc != NULL && queryseq1->fulllength > 0) {
return queryseq1;
}
}
@@ -2515,7 +2519,7 @@ Shortread_read_fasta_shortreads_bzip2 (int *nextchar, T *queryseq2, Bzip2_T *inp
int barcode_length, bool invert_first_p, bool invert_second_p) {
T queryseq1;
char *acc, *restofheader, *acc2, *restofheader2;
- char *long_read_1, *long_read_2, *long_quality;
+ char *long_read_1, *long_read_2;
int nextchar2;
int fulllength1, fulllength2;
bool filterp;
@@ -2583,7 +2587,7 @@ Shortread_read_fasta_shortreads_bzip2 (int *nextchar, T *queryseq2, Bzip2_T *inp
/* Process blank lines and loop again */
while (*nextchar != EOF && ((*nextchar = bzgetc(*input1)) != '>')) {
}
- } else if ((fulllength1 = input_oneline_bzip2(&(*nextchar),&long_read_1,&(Read1[0]),*input1,acc,
+ } else if ((fulllength1 = input_oneline_bzip2(&(*nextchar),&long_read_1,&(Read1[0]),*input1,
/*possible_fasta_header_p*/true)) == 0) {
/* fprintf(stderr,"length is zero\n"); */
/* No sequence1. Don't process, but loop again */
@@ -2591,7 +2595,7 @@ Shortread_read_fasta_shortreads_bzip2 (int *nextchar, T *queryseq2, Bzip2_T *inp
} else {
/* queryseq1 is in Read1 */
/* See what is in next line */
- if ((fulllength2 = input_oneline_bzip2(&(*nextchar),&long_read_2,&(Read2[0]),*input1,acc,
+ if ((fulllength2 = input_oneline_bzip2(&(*nextchar),&long_read_2,&(Read2[0]),*input1,
/*possible_fasta_header_p*/true)) > 0) {
/* Paired-end, single file. queryseq1 is in Read1 and queryseq2 is in Read2 */
queryseq1 = Shortread_new(acc,restofheader,filterp,Read1,long_read_1,fulllength1,
@@ -2620,7 +2624,7 @@ Shortread_read_fasta_shortreads_bzip2 (int *nextchar, T *queryseq2, Bzip2_T *inp
while (nextchar2 != EOF && ((nextchar2 = bzgetc(*input2)) != '>')) {
}
(*queryseq2) = (T) NULL;
- } else if ((fulllength2 = input_oneline_bzip2(&nextchar2,&long_read_2,&(Read2[0]),*input2,acc2,
+ } else if ((fulllength2 = input_oneline_bzip2(&nextchar2,&long_read_2,&(Read2[0]),*input2,
/*possible_fasta_header_p*/true)) == 0) {
/* fprintf(stderr,"length is zero\n"); */
/* No sequence1. Don't process, but loop again */
@@ -2648,7 +2652,9 @@ Shortread_read_fasta_shortreads_bzip2 (int *nextchar, T *queryseq2, Bzip2_T *inp
debug(printf("Returning queryseq with contents %s\n",queryseq1->contents));
- if (queryseq1 != NULL && queryseq1->acc != NULL && queryseq1->fulllength > 0) {
+ if (queryseq1 == (T) SKIPPED) {
+ return (T) SKIPPED;
+ } else if (queryseq1 != NULL && queryseq1->acc != NULL && queryseq1->fulllength > 0) {
return queryseq1;
}
}
@@ -2726,7 +2732,7 @@ Shortread_read_fastq_shortreads (int *nextchar, T *queryseq2, FILE **input1, FIL
*nextchar = EOF;
} else {
*nextchar = fgetc(*input1);
- if ((fulllength = input_oneline(&(*nextchar),&long_read_1,&(Read1[0]),*input1,acc,
+ if ((fulllength = input_oneline(&(*nextchar),&long_read_1,&(Read1[0]),*input1,
/*possible_fasta_header_p*/true)) == 0) {
FREE_IN(acc);
FREE_IN(restofheader);
@@ -2742,7 +2748,7 @@ Shortread_read_fastq_shortreads (int *nextchar, T *queryseq2, FILE **input1, FIL
} else {
skip_header(*input1);
*nextchar = fgetc(*input1);
- quality_length = input_oneline(&(*nextchar),&long_quality,&(Quality[0]),*input1,acc,
+ quality_length = input_oneline(&(*nextchar),&long_quality,&(Quality[0]),*input1,
/*possible_fasta_header_p*/false);
if (quality_length != fulllength) {
fprintf(stderr,"Length %d of quality score differs from length %d of nucleotides in sequence %s\n",
@@ -2784,7 +2790,7 @@ Shortread_read_fastq_shortreads (int *nextchar, T *queryseq2, FILE **input1, FIL
}
}
nextchar2 = fgetc(*input2);
- if ((fulllength = input_oneline(&nextchar2,&long_read_2,&(Read2[0]),*input2,acc,
+ if ((fulllength = input_oneline(&nextchar2,&long_read_2,&(Read2[0]),*input2,
/*possible_fasta_header_p*/true)) == 0) {
FREE_IN(acc);
FREE_IN(restofheader);
@@ -2800,7 +2806,7 @@ Shortread_read_fastq_shortreads (int *nextchar, T *queryseq2, FILE **input1, FIL
} else {
skip_header(*input2);
nextchar2 = fgetc(*input2);
- quality_length = input_oneline(&nextchar2,&long_quality,&(Quality[0]),*input2,acc,
+ quality_length = input_oneline(&nextchar2,&long_quality,&(Quality[0]),*input2,
/*possible_fasta_header_p*/false);
if (quality_length != fulllength) {
fprintf(stderr,"Length %d of quality score differs from length %d of nucleotides in sequence %s\n",
@@ -2817,7 +2823,9 @@ Shortread_read_fastq_shortreads (int *nextchar, T *queryseq2, FILE **input1, FIL
}
}
- if (queryseq1 != NULL && queryseq1->acc != NULL && queryseq1->fulllength > 0) {
+ if (queryseq1 == (T) SKIPPED) {
+ return (T) SKIPPED;
+ } else if (queryseq1 != NULL && queryseq1->acc != NULL && queryseq1->fulllength > 0) {
return queryseq1;
}
}
@@ -2898,7 +2906,7 @@ Shortread_read_fastq_shortreads_gzip (int *nextchar, T *queryseq2, gzFile *input
*nextchar = EOF;
} else {
*nextchar = gzgetc(*input1);
- if ((fulllength = input_oneline_gzip(&(*nextchar),&long_read_1,&(Read1[0]),*input1,acc,
+ if ((fulllength = input_oneline_gzip(&(*nextchar),&long_read_1,&(Read1[0]),*input1,
/*possible_fasta_header_p*/true)) == 0) {
FREE_IN(acc);
FREE_IN(restofheader);
@@ -2914,7 +2922,7 @@ Shortread_read_fastq_shortreads_gzip (int *nextchar, T *queryseq2, gzFile *input
} else {
skip_header_gzip(*input1);
*nextchar = gzgetc(*input1);
- quality_length = input_oneline_gzip(&(*nextchar),&long_quality,&(Quality[0]),*input1,acc,
+ quality_length = input_oneline_gzip(&(*nextchar),&long_quality,&(Quality[0]),*input1,
/*possible_fasta_header_p*/false);
if (quality_length != fulllength) {
fprintf(stderr,"Length %d of quality score differs from length %d of nucleotides in sequence %s\n",
@@ -2956,7 +2964,7 @@ Shortread_read_fastq_shortreads_gzip (int *nextchar, T *queryseq2, gzFile *input
}
}
nextchar2 = gzgetc(*input2);
- if ((fulllength = input_oneline_gzip(&nextchar2,&long_read_2,&(Read2[0]),*input2,acc,
+ if ((fulllength = input_oneline_gzip(&nextchar2,&long_read_2,&(Read2[0]),*input2,
/*possible_fasta_header_p*/true)) == 0) {
FREE_IN(acc);
FREE_IN(restofheader);
@@ -2972,7 +2980,7 @@ Shortread_read_fastq_shortreads_gzip (int *nextchar, T *queryseq2, gzFile *input
} else {
skip_header_gzip(*input2);
nextchar2 = gzgetc(*input2);
- quality_length = input_oneline_gzip(&nextchar2,&long_quality,&(Quality[0]),*input2,acc,
+ quality_length = input_oneline_gzip(&nextchar2,&long_quality,&(Quality[0]),*input2,
/*possible_fasta_header_p*/false);
if (quality_length != fulllength) {
fprintf(stderr,"Length %d of quality score differs from length %d of nucleotides in sequence %s\n",
@@ -2988,7 +2996,9 @@ Shortread_read_fastq_shortreads_gzip (int *nextchar, T *queryseq2, gzFile *input
}
}
- if (queryseq1 != NULL && queryseq1->acc != NULL && queryseq1->fulllength > 0) {
+ if (queryseq1 == (T) SKIPPED) {
+ return (T) SKIPPED;
+ } else if (queryseq1 != NULL && queryseq1->acc != NULL && queryseq1->fulllength > 0) {
return queryseq1;
}
}
@@ -3058,7 +3068,7 @@ Shortread_read_fastq_shortreads_bzip2 (int *nextchar, T *queryseq2, Bzip2_T *inp
*nextchar = EOF;
} else {
*nextchar = bzgetc(*input1);
- if ((fulllength = input_oneline_bzip2(&(*nextchar),&long_read_1,&(Read1[0]),*input1,acc,
+ if ((fulllength = input_oneline_bzip2(&(*nextchar),&long_read_1,&(Read1[0]),*input1,
/*possible_fasta_header_p*/true)) == 0) {
FREE_IN(acc);
FREE_IN(restofheader);
@@ -3074,7 +3084,7 @@ Shortread_read_fastq_shortreads_bzip2 (int *nextchar, T *queryseq2, Bzip2_T *inp
} else {
skip_header_bzip2(*input1);
*nextchar = bzgetc(*input1);
- quality_length = input_oneline_bzip2(&(*nextchar),&long_quality,&(Quality[0]),*input1,acc,
+ quality_length = input_oneline_bzip2(&(*nextchar),&long_quality,&(Quality[0]),*input1,
/*possible_fasta_header_p*/false);
if (quality_length != fulllength) {
fprintf(stderr,"Length %d of quality score differs from length %d of nucleotides in sequence %s\n",
@@ -3116,7 +3126,7 @@ Shortread_read_fastq_shortreads_bzip2 (int *nextchar, T *queryseq2, Bzip2_T *inp
}
}
nextchar2 = bzgetc(*input2);
- if ((fulllength = input_oneline_bzip2(&nextchar2,&long_read_2,&(Read2[0]),*input2,acc,
+ if ((fulllength = input_oneline_bzip2(&nextchar2,&long_read_2,&(Read2[0]),*input2,
/*possible_fasta_header_p*/true)) == 0) {
FREE_IN(acc);
FREE_IN(restofheader);
@@ -3132,7 +3142,7 @@ Shortread_read_fastq_shortreads_bzip2 (int *nextchar, T *queryseq2, Bzip2_T *inp
} else {
skip_header_bzip2(*input2);
nextchar2 = bzgetc(*input2);
- quality_length = input_oneline_bzip2(&nextchar2,&long_quality,&(Quality[0]),*input2,acc,
+ quality_length = input_oneline_bzip2(&nextchar2,&long_quality,&(Quality[0]),*input2,
/*possible_fasta_header_p*/false);
if (quality_length != fulllength) {
fprintf(stderr,"Length %d of quality score differs from length %d of nucleotides in sequence %s\n",
@@ -3148,7 +3158,9 @@ Shortread_read_fastq_shortreads_bzip2 (int *nextchar, T *queryseq2, Bzip2_T *inp
}
}
- if (queryseq1 != NULL && queryseq1->acc != NULL && queryseq1->fulllength > 0) {
+ if (queryseq1 == (T) SKIPPED) {
+ return (T) SKIPPED;
+ } else if (queryseq1 != NULL && queryseq1->acc != NULL && queryseq1->fulllength > 0) {
return queryseq1;
}
}
diff --git a/src/stage1hr.c b/src/stage1hr.c
index b09904b..f4e4209 100644
--- a/src/stage1hr.c
+++ b/src/stage1hr.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: stage1hr.c 148544 2014-09-22 20:42:34Z twu $";
+static char rcsid[] = "$Id: stage1hr.c 151054 2014-10-16 19:58:21Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -12873,6 +12873,7 @@ align_end (int *cutoff_level, History_T gmap_history, T this,
if ((done_level = opt_level + subopt_levels) > user_maxlevel) {
done_level = user_maxlevel;
}
+ debug(printf("SA> opt_level %d, done_level %d\n",opt_level,done_level));
} else {
#endif
diff --git a/src/stage2.c b/src/stage2.c
index 20e2896..98060a8 100644
--- a/src/stage2.c
+++ b/src/stage2.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: stage2.c 146634 2014-09-02 22:33:39Z twu $";
+static char rcsid[] = "$Id: stage2.c 151086 2014-10-16 23:52:16Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -3411,28 +3411,26 @@ align_compute_scores_lookback (int *ncells, struct Link_T **links, Chrpos_T **ma
if ((best_fwd_score = prevlink->fwd_score - (querypos - grand_fwd_querypos)) > 0) {
prevposition = mappings[grand_fwd_querypos][grand_fwd_hit];
debug12(printf("Considering prevposition %u to position %u as a grand fwd lookback\n",prevposition,position));
- if (position > prevposition + maxintronlen) {
- debug12(printf(" => Too long\n"));
- } else {
- for (hit = low_hit; hit < high_hit; hit++) {
+ for (hit = low_hit; hit < high_hit; hit++) {
+ if ((position = mappings[querypos][hit]) > prevposition + maxintronlen) {
+ debug12(printf(" => Too long\n"));
+ } else if (position >= prevposition + indexsize_nt) {
currlink = &(links[querypos][hit]);
- if ((position = mappings[querypos][hit]) >= prevposition + indexsize_nt) {
- currlink->fwd_consecutive = indexsize_nt;
- /* currlink->fwd_rootnlinks = 1; */
- currlink->fwd_pos = grand_fwd_querypos;
- currlink->fwd_hit = grand_fwd_hit;
- currlink->fwd_score = best_fwd_score;
+ currlink->fwd_consecutive = indexsize_nt;
+ /* currlink->fwd_rootnlinks = 1; */
+ currlink->fwd_pos = grand_fwd_querypos;
+ currlink->fwd_hit = grand_fwd_hit;
+ currlink->fwd_score = best_fwd_score;
#ifdef DEBUG9
- currlink->fwd_tracei = ++fwd_tracei;
- currlink->fwd_intronnfwd = prevlink->fwd_intronnfwd;
- currlink->fwd_intronnrev = prevlink->fwd_intronnrev;
- currlink->fwd_intronnunk = prevlink->fwd_intronnunk + 1;
+ currlink->fwd_tracei = ++fwd_tracei;
+ currlink->fwd_intronnfwd = prevlink->fwd_intronnfwd;
+ currlink->fwd_intronnrev = prevlink->fwd_intronnrev;
+ currlink->fwd_intronnunk = prevlink->fwd_intronnunk + 1;
#endif
- }
}
- debug12(printf("At querypos %d, setting all fwd hits to point back to grand_fwd %d,%d with a score of %d\n",
- querypos,grand_fwd_querypos,grand_fwd_hit,prevlink->fwd_score));
}
+ debug12(printf("At querypos %d, setting all fwd hits to point back to grand_fwd %d,%d with a score of %d\n",
+ querypos,grand_fwd_querypos,grand_fwd_hit,prevlink->fwd_score));
}
}
@@ -3461,28 +3459,26 @@ align_compute_scores_lookback (int *ncells, struct Link_T **links, Chrpos_T **ma
if ((best_rev_score = prevlink->rev_score - (querypos - grand_rev_querypos)) > 0) {
prevposition = mappings[grand_rev_querypos][grand_rev_hit];
debug12(printf("Considering prevposition %u to position %u as a grand rev lookback\n",prevposition,position));
- if (position > prevposition + maxintronlen) {
- debug12(printf(" => Too long\n"));
- } else {
- for (hit = low_hit; hit < high_hit; hit++) {
+ for (hit = low_hit; hit < high_hit; hit++) {
+ if ((position = mappings[querypos][hit]) > prevposition + maxintronlen) {
+ debug12(printf(" => Too long\n"));
+ } else if (position >= prevposition + indexsize_nt) {
currlink = &(links[querypos][hit]);
- if ((position = mappings[querypos][hit]) >= prevposition + indexsize_nt) {
- currlink->rev_consecutive = indexsize_nt;
- /* currlink->rev_rootnlinks = 1; */
- currlink->rev_pos = grand_rev_querypos;
- currlink->rev_hit = grand_rev_hit;
- currlink->rev_score = best_rev_score;
+ currlink->rev_consecutive = indexsize_nt;
+ /* currlink->rev_rootnlinks = 1; */
+ currlink->rev_pos = grand_rev_querypos;
+ currlink->rev_hit = grand_rev_hit;
+ currlink->rev_score = best_rev_score;
#ifdef DEBUG9
- currlink->rev_tracei = ++rev_tracei;
- currlink->rev_intronnrev = prevlink->rev_intronnfwd;
- currlink->rev_intronnrev = prevlink->rev_intronnrev;
- currlink->rev_intronnunk = prevlink->rev_intronnunk + 1;
+ currlink->rev_tracei = ++rev_tracei;
+ currlink->rev_intronnrev = prevlink->rev_intronnfwd;
+ currlink->rev_intronnrev = prevlink->rev_intronnrev;
+ currlink->rev_intronnunk = prevlink->rev_intronnunk + 1;
#endif
- }
}
- debug12(printf("At querypos %d, setting all rev hits to point back to grand_rev %d,%d with a score of %d\n",
- querypos,grand_rev_querypos,grand_rev_hit,prevlink->rev_score));
}
+ debug12(printf("At querypos %d, setting all rev hits to point back to grand_rev %d,%d with a score of %d\n",
+ querypos,grand_rev_querypos,grand_rev_hit,prevlink->rev_score));
}
}
@@ -4179,29 +4175,27 @@ align_compute_scores_lookforward (int *ncells, struct Link_T **links, Chrpos_T *
prevlink = &(links[grand_fwd_querypos][grand_fwd_hit]);
if ((best_fwd_score = prevlink->fwd_score - (grand_fwd_querypos - querypos)) > 0) {
prevposition = mappings[grand_fwd_querypos][grand_fwd_hit];
- debug12(printf("Considering prevposition %u to position %u as a grand fwd lookback\n",prevposition,position));
- if (position > prevposition + maxintronlen) {
- debug12(printf(" => Too long\n"));
- } else {
- for (hit = high_hit - 1; hit >= low_hit; --hit) {
+ debug12(printf("Considering prevposition %u to position %u as a grand fwd lookforward\n",prevposition,position));
+ for (hit = high_hit - 1; hit >= low_hit; --hit) {
+ if ((position = mappings[querypos][hit]) + maxintronlen < prevposition) {
+ debug12(printf(" => Too long\n"));
+ } else if (position + indexsize_nt <= prevposition) {
currlink = &(links[querypos][hit]);
- if ((position = mappings[querypos][hit]) + indexsize_nt <= prevposition) {
- currlink->fwd_consecutive = indexsize_nt;
- /* currlink->fwd_rootnlinks = 1; */
- currlink->fwd_pos = grand_fwd_querypos;
- currlink->fwd_hit = grand_fwd_hit;
- currlink->fwd_score = best_fwd_score;
+ currlink->fwd_consecutive = indexsize_nt;
+ /* currlink->fwd_rootnlinks = 1; */
+ currlink->fwd_pos = grand_fwd_querypos;
+ currlink->fwd_hit = grand_fwd_hit;
+ currlink->fwd_score = best_fwd_score;
#ifdef DEBUG9
- currlink->fwd_tracei = ++fwd_tracei;
- currlink->fwd_intronnfwd = prevlink->fwd_intronnfwd;
- currlink->fwd_intronnrev = prevlink->fwd_intronnrev;
- currlink->fwd_intronnunk = prevlink->fwd_intronnunk + 1;
+ currlink->fwd_tracei = ++fwd_tracei;
+ currlink->fwd_intronnfwd = prevlink->fwd_intronnfwd;
+ currlink->fwd_intronnrev = prevlink->fwd_intronnrev;
+ currlink->fwd_intronnunk = prevlink->fwd_intronnunk + 1;
#endif
- }
}
- debug12(printf("At querypos %d, setting all fwd hits to point back to grand_fwd %d,%d with a score of %d\n",
- querypos,grand_fwd_querypos,grand_fwd_hit,prevlink->fwd_score));
}
+ debug12(printf("At querypos %d, setting all fwd hits to point back to grand_fwd %d,%d with a score of %d\n",
+ querypos,grand_fwd_querypos,grand_fwd_hit,prevlink->fwd_score));
}
}
@@ -4229,29 +4223,27 @@ align_compute_scores_lookforward (int *ncells, struct Link_T **links, Chrpos_T *
prevlink = &(links[grand_rev_querypos][grand_rev_hit]);
if ((best_rev_score = prevlink->rev_score - (grand_rev_querypos - querypos)) > 0) {
prevposition = mappings[grand_rev_querypos][grand_rev_hit];
- debug12(printf("Considering prevposition %u to position %u as a grand rev lookback\n",prevposition,position));
- if (position > prevposition + maxintronlen) {
- debug12(printf(" => Too long\n"));
- } else {
- for (hit = high_hit - 1; hit >= low_hit; --hit) {
+ debug12(printf("Considering prevposition %u to position %u as a grand rev lookforward\n",prevposition,position));
+ for (hit = high_hit - 1; hit >= low_hit; --hit) {
+ if ((position = mappings[querypos][hit]) + maxintronlen < prevposition) {
+ debug12(printf(" => Too long\n"));
+ } else if (position + indexsize_nt <= prevposition) {
currlink = &(links[querypos][hit]);
- if ((position = mappings[querypos][hit]) + indexsize_nt <= prevposition) {
- currlink->rev_consecutive = indexsize_nt;
- /* currlink->rev_rootnlinks = 1; */
- currlink->rev_pos = grand_rev_querypos;
- currlink->rev_hit = grand_rev_hit;
- currlink->rev_score = best_rev_score;
+ currlink->rev_consecutive = indexsize_nt;
+ /* currlink->rev_rootnlinks = 1; */
+ currlink->rev_pos = grand_rev_querypos;
+ currlink->rev_hit = grand_rev_hit;
+ currlink->rev_score = best_rev_score;
#ifdef DEBUG9
- currlink->rev_tracei = ++rev_tracei;
- currlink->rev_intronnrev = prevlink->rev_intronnfwd;
- currlink->rev_intronnrev = prevlink->rev_intronnrev;
- currlink->rev_intronnunk = prevlink->rev_intronnunk + 1;
+ currlink->rev_tracei = ++rev_tracei;
+ currlink->rev_intronnrev = prevlink->rev_intronnfwd;
+ currlink->rev_intronnrev = prevlink->rev_intronnrev;
+ currlink->rev_intronnunk = prevlink->rev_intronnunk + 1;
#endif
- }
}
- debug12(printf("At querypos %d, setting all rev hits to point back to grand_rev %d,%d with a score of %d\n",
- querypos,grand_rev_querypos,grand_rev_hit,prevlink->rev_score));
}
+ debug12(printf("At querypos %d, setting all rev hits to point back to grand_rev %d,%d with a score of %d\n",
+ querypos,grand_rev_querypos,grand_rev_hit,prevlink->rev_score));
}
}
diff --git a/src/stage3hr.c b/src/stage3hr.c
index b507de0..44e4a80 100644
--- a/src/stage3hr.c
+++ b/src/stage3hr.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: stage3hr.c 149163 2014-09-27 03:11:12Z twu $";
+static char rcsid[] = "$Id: stage3hr.c 151052 2014-10-16 19:56:35Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -24,6 +24,7 @@ static char rcsid[] = "$Id: stage3hr.c 149163 2014-09-27 03:11:12Z twu $";
#include "fastlog.h"
+#define SOFT_CLIPS_AVOID_CIRCULARIZATION 1 /* Needed to avoid CIGAR strings like 1S99H */
#define MAX_HITS 100000
@@ -3104,12 +3105,24 @@ compute_circularpos (int *alias, T hit) {
hit->plusp,hit->querylength_adj);
} else if (hit->plusp == true) {
- if (hit->low + hit->trim_left >= hit->chroffset + hit->chrlength) {
+ if (
+#ifdef SOFT_CLIPS_AVOID_CIRCULARIZATION
+ hit->low + hit->trim_left >= hit->chroffset + hit->chrlength
+#else
+ hit->low >= hit->chroffset + hit->chrlength
+#endif
+ ) {
/* All of read after trimming is in circular alias */
*alias = +1;
return -1;
- } else if (hit->high - hit->trim_right <= hit->chroffset + hit->chrlength) {
+ } else if (
+#ifdef SOFT_CLIPS_AVOID_CIRCULARIZATION
+ hit->high - hit->trim_right <= hit->chroffset + hit->chrlength
+#else
+ hit->high <= hit->chroffset + hit->chrlength
+#endif
+ ) {
/* All of read after trimming is in circular proper */
*alias = -1;
return -1;
@@ -3131,26 +3144,40 @@ compute_circularpos (int *alias, T hit) {
}
} else {
- if (hit->low + hit->trim_right >= hit->chroffset + hit->chrlength) {
+ if (
+#ifdef SOFT_CLIPS_AVOID_CIRCULARIZATION
+ hit->low + hit->trim_right >= hit->chroffset + hit->chrlength
+#else
+ hit->low >= hit->chroffset + hit->chrlength
+#endif
+ ) {
/* All of read after trimming is in circular alias */
+ debug12(printf("All of read after trimming is in circular alias\n"));
*alias = +1;
return -1;
- } else if (hit->high - hit->trim_left <= hit->chroffset + hit->chrlength) {
+ } else if (
+#ifdef SOFT_CLIPS_AVOID_CIRCULARIZATION
+ hit->high - hit->trim_left <= hit->chroffset + hit->chrlength
+#else
+ hit->high <= hit->chroffset + hit->chrlength
+#endif
+ ) {
/* All of read after trimming is in circular proper */
+ debug12(printf("All of read after trimming is in circular proper\n"));
*alias = -1;
return -1;
} else {
*alias = 0;
if ((circularpos = Substring_circularpos(hit->substring2)) > 0) {
- debug12(printf("Returning circularpos %d from substring0 (minus)\n",circularpos));
+ debug12(printf("Returning circularpos %d from substring2 (minus)\n",circularpos));
return circularpos;
} else if ((circularpos = Substring_circularpos(hit->substring1)) > 0) {
debug12(printf("Returning circularpos %d from substring1 (minus)\n",circularpos));
return circularpos;
} else if ((circularpos = Substring_circularpos(hit->substring0)) > 0) {
- debug12(printf("Returning circularpos %d from substring2 (minus)\n",circularpos));
+ debug12(printf("Returning circularpos %d from substring0 (minus)\n",circularpos));
return circularpos;
} else {
return -1;
@@ -6298,9 +6325,11 @@ List_T
Stage3end_remove_circular_alias (List_T hitlist) {
List_T newlist = NULL, p;
T hit;
+#ifdef SOFT_CLIPS_AVOID_CIRCULARIZATION
int trim;
+#endif
- debug12(printf("Calling Stage3end_remove_circular_alias\n"));
+ debug12(printf("Calling Stage3end_remove_circular_alias on %d hits\n",List_length(hitlist)));
for (p = hitlist; p != NULL; p = p->rest) {
hit = (T) p->first;
@@ -6315,14 +6344,24 @@ Stage3end_remove_circular_alias (List_T hitlist) {
newlist = List_push(newlist,(void *) hit);
} else {
+#ifdef SOFT_CLIPS_AVOID_CIRCULARIZATION
+ /* This allows soft clips to avoid circularization */
if (hit->plusp == true) {
trim = hit->trim_left;
} else {
trim = hit->trim_right;
}
+#endif
- if (hit->low + trim >= hit->chroffset + hit->chrlength) {
+ if (
+#ifdef SOFT_CLIPS_AVOID_CIRCULARIZATION
+ hit->low + trim >= hit->chroffset + hit->chrlength
+#else
+ hit->low >= hit->chroffset + hit->chrlength
+#endif
+ ) {
/* All in circular alias */
+ debug12(printf("Freeing hit because all is in circular alias\n"));
Stage3end_free(&hit);
} else {
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/gmap.git
More information about the debian-med-commit
mailing list