[med-svn] [gmap] 01/05: Imported Upstream version 2014-12-24
Alex Mestiashvili
malex-guest at moszumanska.debian.org
Fri Mar 20 10:52:28 UTC 2015
This is an automated email from the git hooks/post-receive script.
malex-guest pushed a commit to branch master
in repository gmap.
commit 1149f7c837782c17462856e447f7154dc670adbc
Author: Alexandre Mestiashvili <alex at biotec.tu-dresden.de>
Date: Fri Mar 13 10:53:01 2015 +0100
Imported Upstream version 2014-12-24
---
ChangeLog | 12 ++
VERSION | 2 +-
configure | 24 ++--
src/outbuffer.c | 11 +-
src/samprint.c | 225 +++++++++++-------------------
src/samprint.h | 4 +-
src/shortread.c | 14 +-
src/stage3hr.c | 420 +++++++++++++++++++++++++++++++++++++++++++++++++++++---
8 files changed, 516 insertions(+), 196 deletions(-)
diff --git a/ChangeLog b/ChangeLog
index 0863155..a1b9de3 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,15 @@
+2015-03-13 twu
+
+ * outbuffer.c, samprint.c, samprint.h: Applied revision 160876 from trunk to
+ change SAM_compute_chrpos to search for the hardclipped substring, rather
+ than using substring_low
+
+ * shortread.c: Initializing nextchar2 in various procedures
+
+ * stage3hr.c: Applied revision 160755 from trunk to restore correct ilength
+ calculations for minus strand and adjust hardclips by checking adjacent
+ positions left and right of the crossover querypos.
+
2015-03-06 twu
* stage3hr.c: Fixed bampair_sort_cmp to eliminate duplicates that were not
diff --git a/VERSION b/VERSION
index 8e2517b..83a8c64 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-2014-12-23
\ No newline at end of file
+2014-12-24
\ No newline at end of file
diff --git a/configure b/configure
index 28a4d62..4a3fa72 100755
--- a/configure
+++ b/configure
@@ -1,6 +1,6 @@
#! /bin/sh
# Guess values for system-dependent variables and create Makefiles.
-# Generated by GNU Autoconf 2.63 for gmap 2014-12-23.
+# Generated by GNU Autoconf 2.63 for gmap 2014-12-24.
#
# Report bugs to <Thomas Wu <twu at gene.com>>.
#
@@ -745,8 +745,8 @@ SHELL=${CONFIG_SHELL-/bin/sh}
# Identity of this package.
PACKAGE_NAME='gmap'
PACKAGE_TARNAME='gmap'
-PACKAGE_VERSION='2014-12-23'
-PACKAGE_STRING='gmap 2014-12-23'
+PACKAGE_VERSION='2014-12-24'
+PACKAGE_STRING='gmap 2014-12-24'
PACKAGE_BUGREPORT='Thomas Wu <twu at gene.com>'
ac_unique_file="src/gmap.c"
@@ -1512,7 +1512,7 @@ if test "$ac_init_help" = "long"; then
# Omit some internal or obsolete options to make the list less imposing.
# This message is too long to be a string in the A/UX 3.1 sh.
cat <<_ACEOF
-\`configure' configures gmap 2014-12-23 to adapt to many kinds of systems.
+\`configure' configures gmap 2014-12-24 to adapt to many kinds of systems.
Usage: $0 [OPTION]... [VAR=VALUE]...
@@ -1583,7 +1583,7 @@ fi
if test -n "$ac_init_help"; then
case $ac_init_help in
- short | recursive ) echo "Configuration of gmap 2014-12-23:";;
+ short | recursive ) echo "Configuration of gmap 2014-12-24:";;
esac
cat <<\_ACEOF
@@ -1718,7 +1718,7 @@ fi
test -n "$ac_init_help" && exit $ac_status
if $ac_init_version; then
cat <<\_ACEOF
-gmap configure 2014-12-23
+gmap configure 2014-12-24
generated by GNU Autoconf 2.63
Copyright (C) 1992, 1993, 1994, 1995, 1996, 1998, 1999, 2000, 2001,
@@ -1732,7 +1732,7 @@ cat >config.log <<_ACEOF
This file contains any messages produced by compilers while
running configure, to aid debugging if configure makes a mistake.
-It was created by gmap $as_me 2014-12-23, which was
+It was created by gmap $as_me 2014-12-24, which was
generated by GNU Autoconf 2.63. Invocation command line was
$ $0 $@
@@ -2102,8 +2102,8 @@ ac_compiler_gnu=$ac_cv_c_compiler_gnu
{ $as_echo "$as_me:$LINENO: checking package version" >&5
$as_echo_n "checking package version... " >&6; }
-{ $as_echo "$as_me:$LINENO: result: 2014-12-23" >&5
-$as_echo "2014-12-23" >&6; }
+{ $as_echo "$as_me:$LINENO: result: 2014-12-24" >&5
+$as_echo "2014-12-24" >&6; }
### Read defaults
@@ -4162,7 +4162,7 @@ fi
# Define the identity of the package.
PACKAGE=gmap
- VERSION=2014-12-23
+ VERSION=2014-12-24
cat >>confdefs.h <<_ACEOF
@@ -26480,7 +26480,7 @@ exec 6>&1
# report actual input values of CONFIG_FILES etc. instead of their
# values after options handling.
ac_log="
-This file was extended by gmap $as_me 2014-12-23, which was
+This file was extended by gmap $as_me 2014-12-24, which was
generated by GNU Autoconf 2.63. Invocation command line was
CONFIG_FILES = $CONFIG_FILES
@@ -26543,7 +26543,7 @@ Report bugs to <bug-autoconf at gnu.org>."
_ACEOF
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
ac_cs_version="\\
-gmap config.status 2014-12-23
+gmap config.status 2014-12-24
configured by $0, generated by GNU Autoconf 2.63,
with options \\"`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`\\"
diff --git a/src/outbuffer.c b/src/outbuffer.c
index 8116462..bc7ffc8 100644
--- a/src/outbuffer.c
+++ b/src/outbuffer.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: outbuffer.c 157721 2015-01-29 22:22:28Z twu $";
+static char rcsid[] = "$Id: outbuffer.c 160877 2015-03-13 00:31:23Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -1675,8 +1675,7 @@ print_result_sam (T this, Result_T result, Request_T request) {
/* Stage3end_eval_and_sort(stage3array,npaths,this->maxpaths_report,queryseq1); */
stage3 = stage3array[0];
- chrpos = SAM_compute_chrpos(/*hardclip_low*/0,/*hardclip_high*/0,stage3,
- Stage3end_substring_low(stage3),Shortread_fulllength(queryseq1));
+ chrpos = SAM_compute_chrpos(/*hardclip_low*/0,/*hardclip_high*/0,stage3,Shortread_fulllength(queryseq1));
if (Stage3end_circularpos(stage3) > 0) {
fp = this->fp_unpaired_circular;
abbrev = ABBREV_UNPAIRED_CIRCULAR;
@@ -1716,8 +1715,7 @@ print_result_sam (T this, Result_T result, Request_T request) {
for (pathnum = 1; pathnum <= npaths && pathnum <= this->maxpaths_report; pathnum++) {
stage3 = stage3array[pathnum-1];
- chrpos = SAM_compute_chrpos(/*hardclip_low*/0,/*hardclip_high*/0,stage3,
- Stage3end_substring_low(stage3),Shortread_fulllength(queryseq1));
+ chrpos = SAM_compute_chrpos(/*hardclip_low*/0,/*hardclip_high*/0,stage3,Shortread_fulllength(queryseq1));
SAM_print(this->fp_unpaired_transloc,ABBREV_UNPAIRED_TRANSLOC,
stage3,/*mate*/NULL,/*acc1*/Shortread_accession(queryseq1),
/*acc2*/NULL,pathnum,npaths,
@@ -1753,8 +1751,7 @@ print_result_sam (T this, Result_T result, Request_T request) {
for (pathnum = 1; pathnum <= npaths && pathnum <= this->maxpaths_report; pathnum++) {
stage3 = stage3array[pathnum-1];
- chrpos = SAM_compute_chrpos(/*hardclip_low*/0,/*hardclip_high*/0,stage3,
- Stage3end_substring_low(stage3),Shortread_fulllength(queryseq1));
+ chrpos = SAM_compute_chrpos(/*hardclip_low*/0,/*hardclip_high*/0,stage3,Shortread_fulllength(queryseq1));
SAM_print(this->fp_unpaired_mult,ABBREV_UNPAIRED_MULT,
stage3,/*mate*/NULL,/*acc1*/Shortread_accession(queryseq1),
/*acc2*/NULL,pathnum,npaths,
diff --git a/src/samprint.c b/src/samprint.c
index 2ff9a24..c8cac55 100644
--- a/src/samprint.c
+++ b/src/samprint.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: samprint.c 154778 2014-12-06 03:32:33Z twu $";
+static char rcsid[] = "$Id: samprint.c 160877 2015-03-13 00:31:23Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -53,6 +53,13 @@ static char rcsid[] = "$Id: samprint.c 154778 2014-12-06 03:32:33Z twu $";
#define debug3(x)
#endif
+/* compute_chrpos */
+#ifdef DEBUG4
+#define debug4(x) x
+#else
+#define debug4(x)
+#endif
+
static bool quiet_if_excessive_p;
static int maxpaths_report;
@@ -311,6 +318,9 @@ SAM_compute_flag (bool plusp, Stage3end_T mate, Resulttype_T resulttype,
}
+#if 0
+/* Replaced by new adjust_hardclips procedure in stage3hr.c */
+
/* Shifts low_querypos and high_querypos upward until a matching
nucleotide is found from both hits. If not found, the shifts
low_querypos and high_querypos downward until a matching nucleotide
@@ -683,13 +693,15 @@ adjust_hardclips (int *hardclip_low, Stage3end_T hit_low, int low_querylength,
return;
}
+#endif
+
Chrpos_T
-SAM_compute_chrpos (int hardclip_low, int hardclip_high, Stage3end_T this, Substring_T substring_low, int querylength) {
+SAM_compute_chrpos (int hardclip_low, int hardclip_high, Stage3end_T this, int querylength) {
+ Substring_T substring_hardclipped;
Chrpos_T chrpos;
int querystart, queryend;
- bool plusp;
if (this == NULL) {
return 0U;
@@ -698,45 +710,46 @@ SAM_compute_chrpos (int hardclip_low, int hardclip_high, Stage3end_T this, Subst
chrpos = Pair_genomicpos_low(hardclip_low,hardclip_high,Stage3end_pairarray(this),Stage3end_npairs(this),
querylength,/*watsonp*/Stage3end_plusp(this),hide_soft_clips_p);
- } else {
- if (substring_low != NULL) {
- plusp = Substring_plusp(substring_low);
+ } else if (Stage3end_plusp(this) == true) {
+ substring_hardclipped = Stage3end_substring_containing(this,hardclip_low);
+ debug4(printf("Plus: Substring containing hardclip_low %d is %d..%d => %u..%u\n",
+ hardclip_low,Substring_querystart(substring_hardclipped),Substring_queryend(substring_hardclipped),
+ Substring_chrstart(substring_hardclipped),Substring_chrend(substring_hardclipped)));
+
+ /* Add 1 to report in 1-based coordinates */
+ if (hide_soft_clips_p == true) {
+ chrpos = Substring_alignstart(substring_hardclipped) - Substring_chroffset(substring_hardclipped) + 1U;
+ querystart = Substring_querystart_orig(substring_hardclipped);
+ /* queryend = Substring_queryend_orig(Stage3end_substring_high(this)); */
} else {
- plusp = Stage3end_plusp(this);
+ chrpos = Substring_alignstart_trim(substring_hardclipped) - Substring_chroffset(substring_hardclipped) + 1U;
+ querystart = Substring_querystart(substring_hardclipped);
+ /* queryend = Substring_queryend(Stage3end_substring_high(this)); */
}
- if (plusp == true) {
- /* Add 1 to report in 1-based coordinates */
- if (hide_soft_clips_p == true) {
- chrpos = Substring_alignstart(substring_low) - Substring_chroffset(substring_low) + 1U;
- querystart = Substring_querystart_orig(substring_low);
- /* queryend = Substring_queryend_orig(Stage3end_substring_high(this)); */
- } else {
- chrpos = Substring_alignstart_trim(substring_low) - Substring_chroffset(substring_low) + 1U;
- querystart = Substring_querystart(substring_low);
- /* queryend = Substring_queryend(Stage3end_substring_high(this)); */
- }
+ debug4(printf("Incrementing chrpos by %d\n",hardclip_low - querystart));
+ chrpos += hardclip_low - querystart;
- if (hardclip_low > querystart) {
- chrpos += hardclip_low - querystart;
- }
+ } else {
+ substring_hardclipped = Stage3end_substring_containing(this,querylength - 1 - hardclip_low);
+ debug4(printf("Minus: Substring containing querylength - 1 - hardclip_low %d is %d..%d => %u..%u\n",
+ querylength - 1 - hardclip_low,
+ Substring_querystart(substring_hardclipped),Substring_queryend(substring_hardclipped),
+ Substring_chrstart(substring_hardclipped),Substring_chrend(substring_hardclipped)));
+ /* Add 1 to report in 1-based coordinates */
+ if (hide_soft_clips_p == true) {
+ chrpos = Substring_alignend(substring_hardclipped) - Substring_chroffset(substring_hardclipped) + 1U;
+ /* querystart = Substring_querystart_orig(Stage3end_substring_high(this)); */
+ queryend = Substring_queryend_orig(substring_hardclipped);
} else {
- /* Add 1 to report in 1-based coordinates */
- if (hide_soft_clips_p == true) {
- chrpos = Substring_alignend(substring_low) - Substring_chroffset(substring_low) + 1U;
- /* querystart = Substring_querystart_orig(Stage3end_substring_high(this)); */
- queryend = Substring_queryend_orig(substring_low);
- } else {
- chrpos = Substring_alignend_trim(substring_low) - Substring_chroffset(substring_low) + 1U;
- /* querystart = Substring_querystart(Stage3end_substring_high(this)); */
- queryend = Substring_queryend(substring_low);
- }
-
- if (querylength - hardclip_low < queryend) {
- chrpos += queryend - (querylength - hardclip_low);
- }
+ chrpos = Substring_alignend_trim(substring_hardclipped) - Substring_chroffset(substring_hardclipped) + 1U;
+ /* querystart = Substring_querystart(Stage3end_substring_high(this)); */
+ queryend = Substring_queryend(substring_hardclipped);
}
+
+ debug4(printf("Incrementing chrpos by %d\n",queryend - (querylength - hardclip_low)));
+ chrpos += queryend - (querylength - hardclip_low);
}
return chrpos;
@@ -5506,8 +5519,8 @@ print_exon_exon (FILE *fp, char *abbrev, Stage3end_T this, Stage3end_T mate,
#endif
querylength = Shortread_fulllength(queryseq);
- donor_chrpos = SAM_compute_chrpos(hardclip_low,hardclip_high,this,/*substring_low*/donor,querylength);
- acceptor_chrpos = SAM_compute_chrpos(hardclip_low,hardclip_high,this,/*substring_low*/acceptor,querylength);
+ donor_chrpos = SAM_compute_chrpos(hardclip_low,hardclip_high,this,querylength);
+ acceptor_chrpos = SAM_compute_chrpos(hardclip_low,hardclip_high,this,querylength);
if (Stage3end_substring_low(this) == donor) {
concordant_chrpos = donor_chrpos;
} else if (Stage3end_substring_low(this) == acceptor) {
@@ -6139,20 +6152,20 @@ SAM_print (FILE *fp, char *abbrev, Stage3end_T this, Stage3end_T mate,
if (mate == NULL) {
chrpos = SAM_compute_chrpos(/*hardclip_low*/0,/*hardclip_high*/hardclip5_high,
- this,/*substring_low*/NULL,Shortread_fulllength(queryseq));
+ this,Shortread_fulllength(queryseq));
mate_chrpos = 0U;
hardclip3_low = hardclip3_high = 0;
} else if (first_read_p == true) {
chrpos = SAM_compute_chrpos(/*hardclip_low*/hardclip5_low,/*hardclip_high*/hardclip5_high,
- this,/*substring_low*/NULL,Shortread_fulllength(queryseq));
+ this,Shortread_fulllength(queryseq));
mate_chrpos = SAM_compute_chrpos(/*hardclip_low*/hardclip3_low,/*hardclip_high*/hardclip3_high,
- mate,Stage3end_substring_low(mate),Shortread_fulllength(queryseq_mate));
+ mate,Shortread_fulllength(queryseq_mate));
} else {
chrpos = SAM_compute_chrpos(/*hardclip_low*/hardclip3_low,/*hardclip_high*/hardclip3_high,
- this,/*substring_low*/NULL,Shortread_fulllength(queryseq));
+ this,Shortread_fulllength(queryseq));
mate_chrpos = SAM_compute_chrpos(/*hardclip_low*/hardclip5_low,/*hardclip_high*/hardclip5_high,
- mate,Stage3end_substring_low(mate),Shortread_fulllength(queryseq_mate));
+ mate,Shortread_fulllength(queryseq_mate));
}
flag = SAM_compute_flag(Stage3end_plusp(this),mate,resulttype,first_read_p,
@@ -6310,28 +6323,8 @@ SAM_print_paired (Result_T result, Resulttype_T resulttype,
abbrev = ABBREV_CONCORDANT_UNIQ;
}
- if (clipdir == 0) {
- chrpos5 = SAM_compute_chrpos(/*hardclip_low*/0,/*hardclip_high*/0,hit5,
- Stage3end_substring_low(hit5),Shortread_fulllength(queryseq1));
- chrpos3 = SAM_compute_chrpos(/*hardclip_low*/0,/*hardclip_high*/0,hit3,
- Stage3end_substring_low(hit3),Shortread_fulllength(queryseq2));
-
- } else if (hardclip3_high == 0 && hardclip5_low == 0) {
- debug3(printf("clipping %d on hit5 high and %d on hit3 low\n",hardclip5_high,hardclip3_low));
- adjust_hardclips(/*hardclip_low*/&hardclip3_low,/*hit_low*/hit3,/*low_querylength*/Shortread_fulllength(queryseq2),
- /*hardclip_high*/&hardclip5_high,/*hit_high*/hit5,/*high_querylength*/Shortread_fulllength(queryseq1));
- chrpos5 = SAM_compute_chrpos(hardclip5_low,hardclip5_high,hit5,Stage3end_substring_low(hit5),Shortread_fulllength(queryseq1));
- chrpos3 = SAM_compute_chrpos(hardclip3_low,hardclip3_high,hit3,Stage3end_substring_low(hit3),Shortread_fulllength(queryseq2));
- } else if (hardclip5_high == 0 && hardclip3_low == 0) {
- debug3(printf("clipping %d on hit5 low and %d on hit3 high\n",hardclip5_low,hardclip3_high));
- adjust_hardclips(/*hardclip_low*/&hardclip5_low,/*hit_low*/hit5,/*low_querylength*/Shortread_fulllength(queryseq1),
- /*hardclip_high*/&hardclip3_high,/*hit_high*/hit3,/*high_querylength*/Shortread_fulllength(queryseq2));
- chrpos5 = SAM_compute_chrpos(hardclip5_low,hardclip5_high,hit5,Stage3end_substring_low(hit5),Shortread_fulllength(queryseq1));
- chrpos3 = SAM_compute_chrpos(hardclip3_low,hardclip3_high,hit3,Stage3end_substring_low(hit3),Shortread_fulllength(queryseq2));
- } else {
- fprintf(stderr,"1 Problem: clipdir %d, but hardclip5 %d..%d and hardclip3 %d..%d\n",clipdir,hardclip5_low,hardclip5_high,hardclip3_low,hardclip3_high);
- abort();
- }
+ chrpos5 = SAM_compute_chrpos(hardclip5_low,hardclip5_high,hit5,Shortread_fulllength(queryseq1));
+ chrpos3 = SAM_compute_chrpos(hardclip3_low,hardclip3_high,hit3,Shortread_fulllength(queryseq2));
if (merge_overlap_p == false || clipdir == 0) {
/* print first end */
@@ -6443,27 +6436,8 @@ SAM_print_paired (Result_T result, Resulttype_T resulttype,
clipdir,hardclip5_low,hardclip5_high,hardclip3_low,hardclip3_high));
}
- if (clipdir == 0) {
- chrpos5 = SAM_compute_chrpos(/*hardclip_low*/0,/*hardclip_high*/0,hit5,
- Stage3end_substring_low(hit5),Shortread_fulllength(queryseq1));
- chrpos3 = SAM_compute_chrpos(/*hardclip_low*/0,/*hardclip_high*/0,hit3,
- Stage3end_substring_low(hit3),Shortread_fulllength(queryseq2));
- } else if (hardclip3_high == 0 && hardclip5_low == 0) {
- debug3(printf("clipping %d on hit5 high and %d on hit3 low\n",hardclip5_high,hardclip3_low));
- adjust_hardclips(/*hardclip_low*/&hardclip3_low,/*hit_low*/hit3,/*low_querylength*/Shortread_fulllength(queryseq2),
- /*hardclip_high*/&hardclip5_high,/*hit_high*/hit5,/*high_querylength*/Shortread_fulllength(queryseq1));
- chrpos5 = SAM_compute_chrpos(hardclip5_low,hardclip5_high,hit5,Stage3end_substring_low(hit5),Shortread_fulllength(queryseq1));
- chrpos3 = SAM_compute_chrpos(hardclip3_low,hardclip3_high,hit3,Stage3end_substring_low(hit3),Shortread_fulllength(queryseq2));
- } else if (hardclip5_high == 0 && hardclip3_low == 0) {
- debug3(printf("clipping %d on hit5 low and %d on hit3 high\n",hardclip5_low,hardclip3_high));
- adjust_hardclips(/*hardclip_low*/&hardclip5_low,/*hit_low*/hit5,/*low_querylength*/Shortread_fulllength(queryseq1),
- /*hardclip_high*/&hardclip3_high,/*hit_high*/hit3,/*high_querylength*/Shortread_fulllength(queryseq2));
- chrpos5 = SAM_compute_chrpos(hardclip5_low,hardclip5_high,hit5,Stage3end_substring_low(hit5),Shortread_fulllength(queryseq1));
- chrpos3 = SAM_compute_chrpos(hardclip3_low,hardclip3_high,hit3,Stage3end_substring_low(hit3),Shortread_fulllength(queryseq2));
- } else {
- fprintf(stderr,"2 Problem: clipdir %d, but hardclip5 %d..%d and hardclip3 %d..%d\n",clipdir,hardclip5_low,hardclip5_high,hardclip3_low,hardclip3_high);
- abort();
- }
+ chrpos5 = SAM_compute_chrpos(hardclip5_low,hardclip5_high,hit5,Shortread_fulllength(queryseq1));
+ chrpos3 = SAM_compute_chrpos(hardclip3_low,hardclip3_high,hit3,Shortread_fulllength(queryseq2));
/* print first end */
SAM_print(fp_concordant_transloc,ABBREV_CONCORDANT_TRANSLOC,
@@ -6541,31 +6515,8 @@ SAM_print_paired (Result_T result, Resulttype_T resulttype,
clipdir,hardclip5_low,hardclip5_high,hardclip3_low,hardclip3_high));
}
- if (clipdir == 0) {
- chrpos5 = SAM_compute_chrpos(/*hardclip_low*/0,/*hardclip_high*/0,hit5,
- Stage3end_substring_low(hit5),Shortread_fulllength(queryseq1));
- chrpos3 = SAM_compute_chrpos(/*hardclip_low*/0,/*hardclip_high*/0,hit3,
- Stage3end_substring_low(hit3),Shortread_fulllength(queryseq2));
- } else if (hardclip3_high == 0 && hardclip5_low == 0) {
- debug3(printf("clipping %d on hit5 high and %d on hit3 low\n",hardclip5_high,hardclip3_low));
- adjust_hardclips(/*hardclip_low*/&hardclip3_low,/*hit_low*/hit3,/*low_querylength*/Shortread_fulllength(queryseq2),
- /*hardclip_high*/&hardclip5_high,/*hit_high*/hit5,/*high_querylength*/Shortread_fulllength(queryseq1));
- chrpos5 = SAM_compute_chrpos(/*hardclip_low*/0,/*hardclip_high*/hardclip5_high,hit5,
- Stage3end_substring_low(hit5),Shortread_fulllength(queryseq1));
- chrpos3 = SAM_compute_chrpos(/*hardclip_low*/hardclip3_low,/*hardclip_high*/0,hit3,
- Stage3end_substring_low(hit3),Shortread_fulllength(queryseq2));
- } else if (hardclip5_high == 0 && hardclip3_low == 0) {
- debug3(printf("clipping %d on hit5 low and %d on hit3 high\n",hardclip5_low,hardclip3_high));
- adjust_hardclips(/*hardclip_low*/&hardclip5_low,/*hit_low*/hit5,/*low_querylength*/Shortread_fulllength(queryseq1),
- /*hardclip_high*/&hardclip3_high,/*hit_high*/hit3,/*high_querylength*/Shortread_fulllength(queryseq2));
- chrpos5 = SAM_compute_chrpos(/*hardclip_low*/hardclip5_low,/*hardclip_high*/0,hit5,
- Stage3end_substring_low(hit5),Shortread_fulllength(queryseq1));
- chrpos3 = SAM_compute_chrpos(/*hardclip_low*/0,/*hardclip_high*/hardclip3_high,hit3,
- Stage3end_substring_low(hit3),Shortread_fulllength(queryseq2));
- } else {
- fprintf(stderr,"3 Problem: clipdir %d, but hardclip5 %d..%d and hardclip3 %d..%d\n",clipdir,hardclip5_low,hardclip5_high,hardclip3_low,hardclip3_high);
- abort();
- }
+ chrpos5 = SAM_compute_chrpos(hardclip5_low,hardclip5_high,hit5,Shortread_fulllength(queryseq1));
+ chrpos3 = SAM_compute_chrpos(hardclip3_low,hardclip3_high,hit3,Shortread_fulllength(queryseq2));
if (merge_overlap_p == false || clipdir == 0) {
/* print first end */
@@ -6662,10 +6613,8 @@ SAM_print_paired (Result_T result, Resulttype_T resulttype,
hit5 = Stage3pair_hit5(stage3pair);
hit3 = Stage3pair_hit3(stage3pair);
- chrpos5 = SAM_compute_chrpos(/*hardclip_low*/0,/*hardclip_high*/hardclip5_high,hit5,
- Stage3end_substring_low(hit5),Shortread_fulllength(queryseq1));
- chrpos3 = SAM_compute_chrpos(/*hardclip_low*/hardclip3_low,/*hardclip_high*/0,hit3,
- Stage3end_substring_low(hit3),Shortread_fulllength(queryseq2));
+ chrpos5 = SAM_compute_chrpos(hardclip5_low,hardclip5_high,hit5,Shortread_fulllength(queryseq1));
+ chrpos3 = SAM_compute_chrpos(hardclip3_low,hardclip3_high,hit3,Shortread_fulllength(queryseq2));
/* print first end */
SAM_print(fp,abbrev,hit5,/*mate*/hit3,acc1,acc2,/*pathnum*/1,/*npaths*/1,
@@ -6726,10 +6675,8 @@ SAM_print_paired (Result_T result, Resulttype_T resulttype,
hit5 = Stage3pair_hit5(stage3pair);
hit3 = Stage3pair_hit3(stage3pair);
- chrpos5 = SAM_compute_chrpos(/*hardclip_low*/0,/*hardclip_high*/hardclip5_high,hit5,
- Stage3end_substring_low(hit5),Shortread_fulllength(queryseq1));
- chrpos3 = SAM_compute_chrpos(/*hardclip_low*/hardclip3_low,/*hardclip_high*/0,hit3,
- Stage3end_substring_low(hit3),Shortread_fulllength(queryseq2));
+ chrpos5 = SAM_compute_chrpos(hardclip5_low,hardclip5_high,hit5,Shortread_fulllength(queryseq1));
+ chrpos3 = SAM_compute_chrpos(hardclip3_low,hardclip3_high,hit3,Shortread_fulllength(queryseq2));
/* print first end */
SAM_print(fp_paired_mult,ABBREV_PAIRED_MULT,
@@ -6766,10 +6713,8 @@ SAM_print_paired (Result_T result, Resulttype_T resulttype,
hit5 = stage3array1[0];
hit3 = stage3array2[0];
- chrpos5 = SAM_compute_chrpos(/*hardclip_low*/0,/*hardclip_high*/hardclip5_high,hit5,
- Stage3end_substring_low(hit5),Shortread_fulllength(queryseq1));
- chrpos3 = SAM_compute_chrpos(/*hardclip_low*/hardclip3_low,/*hardclip_high*/0,hit3,
- Stage3end_substring_low(hit3),Shortread_fulllength(queryseq2));
+ chrpos5 = SAM_compute_chrpos(hardclip5_low,hardclip5_high,hit5,Shortread_fulllength(queryseq1));
+ chrpos3 = SAM_compute_chrpos(hardclip3_low,hardclip3_high,hit3,Shortread_fulllength(queryseq2));
if (Stage3end_circularpos(hit5) > 0 || Stage3end_circularpos(hit3) > 0) {
fp = fp_unpaired_circular;
@@ -6846,15 +6791,13 @@ SAM_print_paired (Result_T result, Resulttype_T resulttype,
} else {
mate = stage3array2[0];
hardclip3_low = hardclip3_high = 0;
- chrpos3 = SAM_compute_chrpos(/*hardclip_low*/hardclip3_low,/*hardclip_high*/0,mate,
- Stage3end_substring_low(mate),Shortread_fulllength(queryseq2));
+ chrpos3 = SAM_compute_chrpos(/*hardclip_low*/0,/*hardclip_high*/0,mate,Shortread_fulllength(queryseq2));
}
if (npaths1 == 1) {
stage3 = stage3array1[0];
hardclip5_low = hardclip5_high = 0;
- chrpos5 = SAM_compute_chrpos(/*hardclip_low*/0,/*hardclip_high*/hardclip5_high,stage3,
- Stage3end_substring_low(stage3),Shortread_fulllength(queryseq1));
+ chrpos5 = SAM_compute_chrpos(/*hardclip_low*/0,/*hardclip_high*/0,stage3,Shortread_fulllength(queryseq1));
SAM_print(fp,abbrev,stage3,mate,acc1,acc2,/*pathnum*/1,npaths1,
Stage3end_absmq_score(stage3),first_absmq1,second_absmq1,
@@ -6878,8 +6821,7 @@ SAM_print_paired (Result_T result, Resulttype_T resulttype,
for (pathnum = 1; pathnum <= npaths1 && pathnum <= maxpaths_report; pathnum++) {
stage3 = stage3array1[pathnum-1];
hardclip5_low = hardclip5_high = 0;
- chrpos5 = SAM_compute_chrpos(/*hardclip_low*/0,/*hardclip_high*/hardclip5_high,stage3,
- Stage3end_substring_low(stage3),Shortread_fulllength(queryseq1));
+ chrpos5 = SAM_compute_chrpos(/*hardclip_low*/0,/*hardclip_high*/hardclip5_high,stage3,Shortread_fulllength(queryseq1));
SAM_print(fp,abbrev,stage3,mate,acc1,acc2,pathnum,npaths1,
Stage3end_absmq_score(stage3),first_absmq1,second_absmq1,
@@ -6902,15 +6844,13 @@ SAM_print_paired (Result_T result, Resulttype_T resulttype,
} else {
mate = stage3array1[0];
hardclip5_low = hardclip5_high = 0;
- chrpos5 = SAM_compute_chrpos(/*hardclip_low*/0,/*hardclip_high*/hardclip5_high,mate,
- Stage3end_substring_low(mate),Shortread_fulllength(queryseq1));
+ chrpos5 = SAM_compute_chrpos(/*hardclip_low*/0,/*hardclip_high*/0,mate,Shortread_fulllength(queryseq1));
}
if (npaths2 == 1) {
stage3 = stage3array2[0];
hardclip3_low = hardclip3_high = 0;
- chrpos3 = SAM_compute_chrpos(/*hardclip_low*/hardclip3_low,/*hardclip_high*/0,stage3,
- Stage3end_substring_low(stage3),Shortread_fulllength(queryseq2));
+ chrpos3 = SAM_compute_chrpos(/*hardclip_low*/0,/*hardclip_high*/0,stage3,Shortread_fulllength(queryseq2));
SAM_print(fp,abbrev,stage3,mate,acc1,acc2,/*pathnum*/1,npaths2,
Stage3end_absmq_score(stage3),first_absmq2,second_absmq2,
@@ -6934,8 +6874,7 @@ SAM_print_paired (Result_T result, Resulttype_T resulttype,
for (pathnum = 1; pathnum <= npaths2 && pathnum <= maxpaths_report; pathnum++) {
stage3 = stage3array2[pathnum-1];
hardclip3_low = hardclip3_high = 0;
- chrpos3 = SAM_compute_chrpos(/*hardclip_low*/hardclip3_low,/*hardclip_high*/0,stage3,
- Stage3end_substring_low(stage3),Shortread_fulllength(queryseq2));
+ chrpos3 = SAM_compute_chrpos(/*hardclip_low*/0,/*hardclip_high*/0,stage3,Shortread_fulllength(queryseq2));
SAM_print(fp,abbrev,stage3,mate,acc1,acc2,pathnum,npaths2,
Stage3end_absmq_score(stage3),first_absmq2,second_absmq2,
@@ -7011,8 +6950,7 @@ SAM_print_paired (Result_T result, Resulttype_T resulttype,
} else {
mate = stage3array2[0];
hardclip3_low = hardclip3_high = 0;
- chrpos3 = SAM_compute_chrpos(/*hardclip_low*/hardclip3_low,/*hardclip_high*/0,mate,
- Stage3end_substring_low(mate),Shortread_fulllength(queryseq2));
+ chrpos3 = SAM_compute_chrpos(/*hardclip_low*/0,/*hardclip_high*/0,mate,Shortread_fulllength(queryseq2));
}
if (npaths1 == 0) {
@@ -7030,8 +6968,7 @@ SAM_print_paired (Result_T result, Resulttype_T resulttype,
stage3 = stage3array1[0];
hardclip5_low = hardclip5_high = 0;
- chrpos5 = SAM_compute_chrpos(/*hardclip_low*/0,/*hardclip_high*/hardclip5_high,stage3,
- Stage3end_substring_low(stage3),Shortread_fulllength(queryseq1));
+ chrpos5 = SAM_compute_chrpos(/*hardclip_low*/0,/*hardclip_high*/0,stage3,Shortread_fulllength(queryseq1));
SAM_print(fp,abbrev,stage3,mate,acc1,acc2,/*pathnum*/1,npaths1,
Stage3end_absmq_score(stage3),first_absmq1,/*second_absmq1*/0,
@@ -7057,8 +6994,7 @@ SAM_print_paired (Result_T result, Resulttype_T resulttype,
for (pathnum = 1; pathnum <= npaths1 && pathnum <= maxpaths_report; pathnum++) {
stage3 = stage3array1[pathnum-1];
hardclip5_low = hardclip5_high = 0;
- chrpos5 = SAM_compute_chrpos(/*hardclip_low*/0,/*hardclip_high*/hardclip5_high,stage3,
- Stage3end_substring_low(stage3),Shortread_fulllength(queryseq1));
+ chrpos5 = SAM_compute_chrpos(/*hardclip_low*/0,/*hardclip_high*/0,stage3,Shortread_fulllength(queryseq1));
SAM_print(fp,abbrev,stage3,mate,acc1,acc2,pathnum,npaths1,
Stage3end_absmq_score(stage3),first_absmq1,second_absmq1,
@@ -7081,8 +7017,7 @@ SAM_print_paired (Result_T result, Resulttype_T resulttype,
} else {
mate = stage3array1[0];
hardclip5_low = hardclip5_high = 0;
- chrpos5 = SAM_compute_chrpos(/*hardclip_low*/0,/*hardclip_high*/hardclip5_high,mate,
- Stage3end_substring_low(mate),Shortread_fulllength(queryseq1));
+ chrpos5 = SAM_compute_chrpos(/*hardclip_low*/0,/*hardclip_high*/0,mate,Shortread_fulllength(queryseq1));
}
if (npaths2 == 0) {
@@ -7100,8 +7035,7 @@ SAM_print_paired (Result_T result, Resulttype_T resulttype,
stage3 = stage3array2[0];
hardclip3_low = hardclip3_high = 0;
- chrpos3 = SAM_compute_chrpos(/*hardclip_low*/hardclip3_low,/*hardclip_high*/0,stage3,
- Stage3end_substring_low(stage3),Shortread_fulllength(queryseq2));
+ chrpos3 = SAM_compute_chrpos(/*hardclip_low*/0,/*hardclip_high*/0,stage3,Shortread_fulllength(queryseq2));
SAM_print(fp,abbrev,stage3,mate,acc1,acc2,/*pathnum*/1,npaths2,
Stage3end_absmq_score(stage3),first_absmq2,/*second_absmq2*/0,
@@ -7127,8 +7061,7 @@ SAM_print_paired (Result_T result, Resulttype_T resulttype,
for (pathnum = 1; pathnum <= npaths2 && pathnum <= maxpaths_report; pathnum++) {
stage3 = stage3array2[pathnum-1];
hardclip3_low = hardclip3_high = 0;
- chrpos3 = SAM_compute_chrpos(/*hardclip_low*/hardclip3_low,/*hardclip_high*/0,stage3,
- Stage3end_substring_low(stage3),Shortread_fulllength(queryseq2));
+ chrpos3 = SAM_compute_chrpos(/*hardclip_low*/0,/*hardclip_high*/0,stage3,Shortread_fulllength(queryseq2));
SAM_print(fp,abbrev,stage3,mate,acc1,acc2,pathnum,npaths2,
Stage3end_absmq_score(stage3),first_absmq2,second_absmq2,
diff --git a/src/samprint.h b/src/samprint.h
index d25ea42..9d1dc8b 100644
--- a/src/samprint.h
+++ b/src/samprint.h
@@ -1,4 +1,4 @@
-/* $Id: samprint.h 157232 2015-01-22 18:55:31Z twu $ */
+/* $Id: samprint.h 160877 2015-03-13 00:31:23Z twu $ */
#ifndef SAMPRINT_INCLUDED
#define SAMPRINT_INCLUDED
@@ -46,7 +46,7 @@ SAM_file_setup_all (FILE *failedinput_1_in, FILE *failedinput_2_in, FILE *fp_nom
FILE *fp_concordant_mult_in, FILE *fp_concordant_mult_xs_1_in, FILE *fp_concordant_mult_xs_2_in);
extern Chrpos_T
-SAM_compute_chrpos (int hardclip_low, int hardclip_high, Stage3end_T this, Substring_T substring_low, int querylength);
+SAM_compute_chrpos (int hardclip_low, int hardclip_high, Stage3end_T this, int querylength);
extern unsigned int
SAM_compute_flag (bool plusp, Stage3end_T mate, Resulttype_T resulttype,
diff --git a/src/shortread.c b/src/shortread.c
index c7251de..6db232e 100644
--- a/src/shortread.c
+++ b/src/shortread.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: shortread.c 151067 2014-10-16 21:09:00Z twu $";
+static char rcsid[] = "$Id: shortread.c 160875 2015-03-13 00:26:46Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -2122,7 +2122,7 @@ Shortread_read_fasta_shortreads (int *nextchar, T *queryseq2, FILE **input1, FIL
T queryseq1;
char *acc, *restofheader, *acc2, *restofheader2;
char *long_read_1, *long_read_2, *long_quality;
- int nextchar2;
+ int nextchar2 = '\0';
int fulllength1, fulllength2, quality_length;
bool filterp;
@@ -2361,7 +2361,7 @@ Shortread_read_fasta_shortreads_gzip (int *nextchar, T *queryseq2, gzFile *input
T queryseq1;
char *acc, *restofheader, *acc2, *restofheader2;
char *long_read_1, *long_read_2;
- int nextchar2;
+ int nextchar2 = '\0';
int fulllength1, fulllength2;
bool filterp;
@@ -2520,7 +2520,7 @@ Shortread_read_fasta_shortreads_bzip2 (int *nextchar, T *queryseq2, Bzip2_T *inp
T queryseq1;
char *acc, *restofheader, *acc2, *restofheader2;
char *long_read_1, *long_read_2;
- int nextchar2;
+ int nextchar2 = '\0';
int fulllength1, fulllength2;
bool filterp;
@@ -2671,7 +2671,7 @@ Shortread_read_fastq_shortreads (int *nextchar, T *queryseq2, FILE **input1, FIL
T queryseq1;
char *acc, *restofheader;
char *long_read_1, *long_read_2, *long_quality;
- int nextchar2;
+ int nextchar2 = '\0';
int fulllength, quality_length;
bool filterp;
@@ -2840,7 +2840,7 @@ Shortread_read_fastq_shortreads_gzip (int *nextchar, T *queryseq2, gzFile *input
T queryseq1;
char *acc, *restofheader;
char *long_read_1, *long_read_2, *long_quality;
- int nextchar2;
+ int nextchar2 = '\0';
int fulllength, quality_length;
bool filterp;
@@ -3014,7 +3014,7 @@ Shortread_read_fastq_shortreads_bzip2 (int *nextchar, T *queryseq2, Bzip2_T *inp
T queryseq1;
char *acc, *restofheader;
char *long_read_1, *long_read_2, *long_quality;
- int nextchar2;
+ int nextchar2 = '\0';
int fulllength, quality_length;
bool filterp;
diff --git a/src/stage3hr.c b/src/stage3hr.c
index cdb27d6..8002396 100644
--- a/src/stage3hr.c
+++ b/src/stage3hr.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: stage3hr.c 160319 2015-03-06 00:19:30Z twu $";
+static char rcsid[] = "$Id: stage3hr.c 160873 2015-03-13 00:22:08Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -1836,6 +1836,7 @@ find_ilengths (int *ilength_low, int *ilength_high, Stage3end_T hit, Univcoord_T
debug13(printf("Finding ilengths for common_genomicpos %u\n",(Chrpos_T) (common_genomicpos - chroffset)));
if (hit->hittype == GMAP) {
debug13(printf("Type is GMAP\n"));
+ debug13(Pair_dump_array(hit->pairarray,hit->npairs,true));
i = 0;
while (i < hit->npairs && hit->pairarray[i].genomepos != common_genomicpos - chroffset) {
i++;
@@ -1857,15 +1858,15 @@ find_ilengths (int *ilength_low, int *ilength_high, Stage3end_T hit, Univcoord_T
if (Substring_overlap_point_trimmed_p(hit->substring0,common_genomicpos)) {
debug13(printf("substring0: %u..%u\n",Substring_alignstart_trim(hit->substring0),Substring_alignend_trim(hit->substring0)));
*ilength_low = (common_genomicpos - Substring_alignstart_trim(hit->substring0) + 1);
- *ilength_high = (Substring_alignend_trim(hit->substring0) - common_genomicpos /*+ 1*/)
+ *ilength_high = ((Substring_alignend_trim(hit->substring0) - 1) - common_genomicpos + 1)
+ Substring_genomic_alignment_length(hit->substring1)
+ Substring_genomic_alignment_length(hit->substring2);
} else if (Substring_overlap_point_trimmed_p(hit->substring1,common_genomicpos)) {
debug13(printf("substring1: %u..%u\n",Substring_alignstart_trim(hit->substring1),Substring_alignend_trim(hit->substring1)));
*ilength_low = Substring_genomic_alignment_length(hit->substring0) +
- common_genomicpos - Substring_alignstart_trim(hit->substring1) + 1;
- *ilength_high = (Substring_alignend_trim(hit->substring1) - common_genomicpos /*+ 1*/)
+ (common_genomicpos - Substring_alignstart_trim(hit->substring1) + 1);
+ *ilength_high = ((Substring_alignend_trim(hit->substring1) - 1) - common_genomicpos + 1)
+ Substring_genomic_alignment_length(hit->substring2);
if (hit->hittype == INSERTION) {
*ilength_high += hit->nindels;
@@ -1876,7 +1877,7 @@ find_ilengths (int *ilength_low, int *ilength_high, Stage3end_T hit, Univcoord_T
*ilength_low = Substring_genomic_alignment_length(hit->substring0) +
Substring_genomic_alignment_length(hit->substring1) +
(common_genomicpos - Substring_alignstart_trim(hit->substring2) + 1);
- *ilength_high = (Substring_alignend_trim(hit->substring2) - common_genomicpos /*+ 1*/);
+ *ilength_high = ((Substring_alignend_trim(hit->substring2) - 1) - common_genomicpos + 1);
if (hit->hittype == INSERTION) {
*ilength_low += hit->nindels;
}
@@ -1892,14 +1893,14 @@ find_ilengths (int *ilength_low, int *ilength_high, Stage3end_T hit, Univcoord_T
debug13(printf("substring0: %u..%u\n",Substring_alignstart_trim(hit->substring0),Substring_alignend_trim(hit->substring0)));
*ilength_low = Substring_genomic_alignment_length(hit->substring2) +
Substring_genomic_alignment_length(hit->substring1) +
- (common_genomicpos - Substring_alignend_trim(hit->substring0) + 1);
- *ilength_high = (Substring_alignstart_trim(hit->substring0) - common_genomicpos /*+ 1*/);
+ (common_genomicpos - (Substring_alignend_trim(hit->substring0) + 1) + 1);
+ *ilength_high = (Substring_alignstart_trim(hit->substring0) - common_genomicpos + 1);
} else if (Substring_overlap_point_trimmed_p(hit->substring1,common_genomicpos)) {
debug13(printf("substring1: %u..%u\n",Substring_alignstart_trim(hit->substring1),Substring_alignend_trim(hit->substring1)));
*ilength_low = Substring_genomic_alignment_length(hit->substring2) +
- (common_genomicpos - Substring_alignend_trim(hit->substring1) + 1);
- *ilength_high = (Substring_alignstart_trim(hit->substring1) - common_genomicpos /*+ 1*/)
+ (common_genomicpos - (Substring_alignend_trim(hit->substring1) + 1) + 1);
+ *ilength_high = (Substring_alignstart_trim(hit->substring1) - common_genomicpos + 1)
+ Substring_genomic_alignment_length(hit->substring0);
if (hit->hittype == INSERTION) {
*ilength_low += hit->nindels;
@@ -1907,8 +1908,8 @@ find_ilengths (int *ilength_low, int *ilength_high, Stage3end_T hit, Univcoord_T
} else if (Substring_overlap_point_trimmed_p(hit->substring2,common_genomicpos)) {
debug13(printf("substring2: %u..%u\n",Substring_alignstart_trim(hit->substring2),Substring_alignend_trim(hit->substring2)));
- *ilength_low = (common_genomicpos - Substring_alignend_trim(hit->substring2) + 1);
- *ilength_high = (Substring_alignstart_trim(hit->substring2) - common_genomicpos /*+ 1*/)
+ *ilength_low = (common_genomicpos - (Substring_alignend_trim(hit->substring2) + 1) + 1);
+ *ilength_high = (Substring_alignstart_trim(hit->substring2) - common_genomicpos + 1)
+ Substring_genomic_alignment_length(hit->substring1)
+ Substring_genomic_alignment_length(hit->substring0);
if (hit->hittype == INSERTION) {
@@ -2514,6 +2515,366 @@ pair_common_genomicpos (Stage3end_T hit5, Stage3end_T hit3) {
}
+/* Replaces adjust_hardclips in samprint.c. */
+static void
+adjust_hardclips (int *hardclip_low, Stage3end_T hit_low, int low_querylength,
+ int *hardclip_high, Stage3end_T hit_high, int high_querylength) {
+ int orig_hardclip_low, orig_hardclip_high;
+ Substring_T low_substring, high_substring;
+ struct Pair_T *low_pairarray, *high_pairarray;
+ int low_querystart, low_queryend, low_npairs, high_npairs;
+ int low_querypos, high_querypos;
+ bool plusp;
+
+ debug13(printf("Entering adjust_hardclips with hardclip_low %d, hardclip_high %d\n",
+ *hardclip_low,*hardclip_high));
+ orig_hardclip_low = *hardclip_low;
+ orig_hardclip_high = *hardclip_high;
+
+ plusp = Stage3end_plusp(hit_low);
+
+ if (Stage3end_hittype(hit_low) == GMAP && Stage3end_hittype(hit_high) == GMAP) {
+ debug13(printf("Dual GMAP\n"));
+ low_pairarray = Stage3end_pairarray(hit_low);
+ low_npairs = Stage3end_npairs(hit_low);
+ high_pairarray = Stage3end_pairarray(hit_high);
+ high_npairs = Stage3end_npairs(hit_high);
+
+ if (plusp == true) {
+ low_querypos = *hardclip_low;
+ high_querypos = high_querylength - 1 - (*hardclip_high);
+
+ while (low_querypos < low_querylength && high_querypos < high_querylength &&
+ (Pairarray_contains_p(low_pairarray,low_npairs,low_querypos-1) == false ||
+ Pairarray_contains_p(high_pairarray,high_npairs,high_querypos-1) == false)) {
+ /* Shift clip querypos upward (rightward on chromosome) */
+ debug13(printf("Low GMAP does not contain %d or high GMAP does not contain lower querypos %d => ",
+ low_querypos-1,high_querypos-1));
+ (*hardclip_low)++;
+ (*hardclip_high)--;
+ low_querypos++;
+ high_querypos++;
+ debug13(printf("Trying hardclip_low %d and hardclip_high %d\n",*hardclip_low,*hardclip_high));
+ }
+
+ while (low_querypos >= 0 && high_querypos >= 0 &&
+ (Pairarray_contains_p(low_pairarray,low_npairs,low_querypos+1) == false ||
+ Pairarray_contains_p(high_pairarray,high_npairs,high_querypos+1) == false)) {
+ /* Shift clip querypos downward (leftward on chromosome) */
+ debug13(printf("Low GMAP does not contain %d or high GMAP does not contain higher querypos %d => ",
+ low_querypos+1,high_querypos+1));
+ (*hardclip_low)--;
+ (*hardclip_high)++;
+ low_querypos--;
+ high_querypos--;
+ debug13(printf("Trying hardclip_low %d and hardclip_high %d\n",*hardclip_low,*hardclip_high));
+ }
+
+ if (low_querypos < 0 || low_querypos >= low_querylength ||
+ high_querypos < 0 || high_querypos >= high_querylength) {
+ *hardclip_low = orig_hardclip_low;
+ *hardclip_high = orig_hardclip_high;
+ }
+
+ } else {
+ low_querypos = low_querylength - 1 - (*hardclip_low);
+ high_querypos = *hardclip_high;
+
+ while (low_querypos >= 0 && high_querypos >= 0 &&
+ (Pairarray_contains_p(low_pairarray,low_npairs,low_querypos+1) == false ||
+ Pairarray_contains_p(high_pairarray,high_npairs,high_querypos+1) == false)) {
+ /* Shift clip querypos downward (rightward on chromosome) */
+ debug13(printf("Low GMAP does not contain %d or high GMAP does not contain higher querypos %d => ",
+ low_querypos+1,high_querypos+1));
+ (*hardclip_low)++;
+ (*hardclip_high)--;
+ low_querypos--;
+ high_querypos--;
+ debug13(printf("Trying hardclip_low %d and hardclip_high %d\n",*hardclip_low,*hardclip_high));
+ }
+
+ while (low_querypos < low_querylength && high_querypos < high_querylength &&
+ (Pairarray_contains_p(low_pairarray,low_npairs,low_querypos-1) == false ||
+ Pairarray_contains_p(high_pairarray,high_npairs,high_querypos-1) == false)) {
+ /* Shift clip querypos upward (leftward on chromosome) */
+ debug13(printf("Low GMAP does not contain %d or high GMAP does not contain lower querypos %d => ",
+ low_querypos-1,high_querypos-1));
+ (*hardclip_low)--;
+ (*hardclip_high)++;
+ low_querypos++;
+ high_querypos++;
+ debug13(printf("Trying hardclip_low %d and hardclip_high %d\n",*hardclip_low,*hardclip_high));
+ }
+
+ if (low_querypos < 0 || low_querypos >= low_querylength ||
+ high_querypos < 0 || high_querypos >= high_querylength) {
+ *hardclip_low = orig_hardclip_low;
+ *hardclip_high = orig_hardclip_high;
+ }
+ }
+
+ } else if (Stage3end_hittype(hit_low) == GMAP) {
+ debug13(printf("Low GMAP\n"));
+ low_pairarray = Stage3end_pairarray(hit_low);
+ low_npairs = Stage3end_npairs(hit_low);
+
+ if (plusp == true) {
+ low_querypos = *hardclip_low;
+ high_querypos = high_querylength - 1 - (*hardclip_high);
+ high_substring = Stage3end_substring_containing(hit_high,high_querypos);
+ while (low_querypos < low_querylength && high_substring != NULL &&
+ (Pairarray_contains_p(low_pairarray,low_npairs,low_querypos-1) == false ||
+ Substring_contains_p(high_substring,high_querypos-1) == false)) {
+ /* Shift clip querypos upward (rightward on chromosome) */
+ debug13(printf("Low GMAP does not contain %d or high %d..%d does not contain lower querypos %d => ",
+ low_querypos-1,Substring_querystart(high_substring),Substring_queryend(high_substring),high_querypos-1));
+ (*hardclip_low)++;
+ (*hardclip_high)--;
+ low_querypos++;
+ high_querypos++;
+ high_substring = Stage3end_substring_containing(hit_high,high_querypos);
+ debug13(printf("Trying hardclip_low %d and hardclip_high %d\n",*hardclip_low,*hardclip_high));
+ }
+
+ while (low_querypos >= 0 && high_substring != NULL &&
+ (Pairarray_contains_p(low_pairarray,low_npairs,low_querypos+1) == false ||
+ Substring_contains_p(high_substring,high_querypos+1) == false)) {
+ /* Shift clip querypos downward (leftward on chromosome) */
+ debug13(printf("Low GMAP does not contain %d or high %d..%d does not contain higher querypos %d => ",
+ low_querypos+1,Substring_querystart(high_substring),Substring_queryend(high_substring),high_querypos+1));
+ (*hardclip_low)--;
+ (*hardclip_high)++;
+ low_querypos--;
+ high_querypos--;
+ high_substring = Stage3end_substring_containing(hit_high,high_querypos);
+ debug13(printf("Trying hardclip_low %d and hardclip_high %d\n",*hardclip_low,*hardclip_high));
+ }
+
+ if (low_querypos < 0 || low_querypos >= low_querylength || high_substring == NULL) {
+ *hardclip_low = orig_hardclip_low;
+ *hardclip_high = orig_hardclip_high;
+ }
+
+ } else {
+ low_querypos = low_querylength - 1 - (*hardclip_low);
+ high_querypos = *hardclip_high;
+ high_substring = Stage3end_substring_containing(hit_high,high_querypos);
+
+ while (low_querypos >= 0 && high_substring != NULL &&
+ (Pairarray_contains_p(low_pairarray,low_npairs,low_querypos+1) == false ||
+ Substring_contains_p(high_substring,high_querypos+1) == false)) {
+ /* Shift clip querypos downward (rightward on chromosome) */
+ debug13(printf("Low GMAP does not contain %d or high %d..%d does not contain higher querypos %d => ",
+ low_querypos+1,Substring_querystart(high_substring),Substring_queryend(high_substring),high_querypos+1));
+ (*hardclip_low)++;
+ (*hardclip_high)--;
+ low_querypos--;
+ high_querypos--;
+ high_substring = Stage3end_substring_containing(hit_high,high_querypos);
+ debug13(printf("Trying hardclip_low %d and hardclip_high %d\n",*hardclip_low,*hardclip_high));
+ }
+
+ while (low_querypos < low_querylength && high_substring != NULL &&
+ (Pairarray_contains_p(low_pairarray,low_npairs,low_querypos-1) == false ||
+ Substring_contains_p(high_substring,high_querypos-1) == false)) {
+ /* Shift clip querypos upward (leftward on chromosome) */
+ debug13(printf("Low GMAP does not contain %d or high %d..%d does not contain lower querypos %d => ",
+ low_querypos-1,Substring_querystart(high_substring),Substring_queryend(high_substring),high_querypos-1));
+ (*hardclip_low)--;
+ (*hardclip_high)++;
+ low_querypos++;
+ high_querypos++;
+ high_substring = Stage3end_substring_containing(hit_high,high_querypos);
+ debug13(printf("Trying hardclip_low %d and hardclip_high %d\n",*hardclip_low,*hardclip_high));
+ }
+
+ if (low_querypos < 0 || low_querypos >= low_querylength || high_substring == NULL) {
+ *hardclip_low = orig_hardclip_low;
+ *hardclip_high = orig_hardclip_high;
+ }
+ }
+
+ } else if (Stage3end_hittype(hit_high) == GMAP) {
+ debug13(printf("High GMAP\n"));
+ high_pairarray = Stage3end_pairarray(hit_high);
+ high_npairs = Stage3end_npairs(hit_high);
+
+ if (plusp == true) {
+ low_querypos = *hardclip_low;
+ high_querypos = high_querylength - 1 - (*hardclip_high);
+ low_substring = Stage3end_substring_containing(hit_low,low_querypos);
+
+ while (low_substring != NULL && high_querypos < high_querylength &&
+ (Substring_contains_p(low_substring,low_querypos-1) == false ||
+ Pairarray_contains_p(high_pairarray,high_npairs,high_querypos-1) == false)) {
+ /* Shift clip querypos upward (rightward on chromosome) */
+ debug13(printf("Low %d..%d does not contain %d or high GMAP does not contain lower querypos %d => ",
+ Substring_querystart(low_substring),Substring_queryend(low_substring),low_querypos-1,high_querypos-1));
+ (*hardclip_low)++;
+ (*hardclip_high)--;
+ low_querypos++;
+ high_querypos++;
+ low_substring = Stage3end_substring_containing(hit_low,low_querypos);
+ debug13(printf("Trying hardclip_low %d and hardclip_high %d\n",*hardclip_low,*hardclip_high));
+ }
+
+ while (low_substring != NULL && high_querypos >= 0 &&
+ (Substring_contains_p(low_substring,low_querypos+1) == false ||
+ Pairarray_contains_p(high_pairarray,high_npairs,high_querypos+1) == false)) {
+ /* Shift clip querypos downward (leftward on chromosome) */
+ debug13(printf("Low %d..%d does not contain %d or high GMAP does not contain higher querypos %d => ",
+ Substring_querystart(low_substring),Substring_queryend(low_substring),low_querypos+1,high_querypos+1));
+ (*hardclip_low)--;
+ (*hardclip_high)++;
+ low_querypos--;
+ high_querypos--;
+ low_substring = Stage3end_substring_containing(hit_low,low_querypos);
+ debug13(printf("Trying hardclip_low %d and hardclip_high %d\n",*hardclip_low,*hardclip_high));
+ }
+
+ if (high_querypos < 0 || high_querypos >= high_querylength || low_substring == NULL) {
+ *hardclip_low = orig_hardclip_low;
+ *hardclip_high = orig_hardclip_high;
+ }
+
+ } else {
+ low_querypos = low_querylength - 1 - (*hardclip_low);
+ high_querypos = *hardclip_high;
+ low_substring = Stage3end_substring_containing(hit_low,low_querypos);
+
+ while (low_substring != NULL && high_querypos >= 0 &&
+ (Substring_contains_p(low_substring,low_querypos+1) == false ||
+ Pairarray_contains_p(high_pairarray,high_npairs,high_querypos+1) == false)) {
+ /* Shift clip querypos downward (rightward on chromosome) */
+ debug13(printf("Low %d..%d does not contain %d or high GMAP does not contain higher querypos %d => ",
+ Substring_querystart(low_substring),Substring_queryend(low_substring),low_querypos+1,high_querypos+1));
+ (*hardclip_low)++;
+ (*hardclip_high)--;
+ low_querypos--;
+ high_querypos--;
+ low_substring = Stage3end_substring_containing(hit_low,low_querypos);
+ debug13(printf("Trying hardclip_low %d and hardclip_high %d\n",*hardclip_low,*hardclip_high));
+ }
+
+ while (low_substring != NULL && high_querypos < high_querylength &&
+ (Substring_contains_p(low_substring,low_querypos-1) == false ||
+ Pairarray_contains_p(high_pairarray,high_npairs,high_querypos-1) == false)) {
+ /* Shift clip querypos upward (rightward on chromosome) */
+ debug13(printf("Low %d..%d does not contain %d or high GMAP does not contain lower querypos %d => ",
+ Substring_querystart(low_substring),Substring_queryend(low_substring),low_querypos-1,high_querypos-1));
+ (*hardclip_low)--;
+ (*hardclip_high)++;
+ low_querypos++;
+ high_querypos++;
+ low_substring = Stage3end_substring_containing(hit_low,low_querypos);
+ debug13(printf("Trying hardclip_low %d and hardclip_high %d\n",*hardclip_low,*hardclip_high));
+ }
+
+ if (high_querypos < 0 || high_querypos >= high_querylength || low_substring == NULL) {
+ *hardclip_low = orig_hardclip_low;
+ *hardclip_high = orig_hardclip_high;
+ }
+ }
+
+ } else {
+ if (plusp == true) {
+ debug13(printf("Both substrings, plus\n"));
+
+ low_querypos = *hardclip_low;
+ high_querypos = high_querylength - 1 - *hardclip_high;
+ low_substring = Stage3end_substring_containing(hit_low,low_querypos);
+ high_substring = Stage3end_substring_containing(hit_high,high_querypos);
+
+ while (low_substring != NULL && high_substring != NULL &&
+ (Substring_contains_p(low_substring,low_querypos-1) == false ||
+ Substring_contains_p(high_substring,high_querypos-1) == false)) {
+ /* Shift clip querypos upward (rightward on chromosome) */
+ debug13(printf("Low %d..%d does not contain %d or high %d..%d does not contain lower querypos %d => ",
+ Substring_querystart(low_substring),Substring_queryend(low_substring),low_querypos-1,
+ Substring_querystart(high_substring),Substring_queryend(high_substring),high_querypos-1));
+ (*hardclip_low)++;
+ (*hardclip_high)--;
+ low_querypos++;
+ high_querypos++;
+ low_substring = Stage3end_substring_containing(hit_low,low_querypos);
+ high_substring = Stage3end_substring_containing(hit_high,high_querypos);
+ debug13(printf("Trying hardclip_low %d and hardclip_high %d\n",*hardclip_low,*hardclip_high));
+ }
+
+ while (low_substring != NULL && high_substring != NULL &&
+ (Substring_contains_p(low_substring,low_querypos+1) == false ||
+ Substring_contains_p(high_substring,high_querypos+1) == false)) {
+ /* Shift clip querypos downward (leftward on chromosome) */
+ debug13(printf("Low %d..%d does not contain %d or high %d..%d does not contain higher querypos %d => ",
+ Substring_querystart(low_substring),Substring_queryend(low_substring),low_querypos+1,
+ Substring_querystart(high_substring),Substring_queryend(high_substring),high_querypos+1));
+ (*hardclip_low)--;
+ (*hardclip_high)++;
+ low_querypos--;
+ high_querypos--;
+ low_substring = Stage3end_substring_containing(hit_low,low_querypos);
+ high_substring = Stage3end_substring_containing(hit_high,high_querypos);
+ debug13(printf("Trying hardclip_low %d and hardclip_high %d\n",*hardclip_low,*hardclip_high));
+ }
+
+ if (low_substring == NULL || high_substring == NULL) {
+ *hardclip_low = orig_hardclip_low;
+ *hardclip_high = orig_hardclip_high;
+ }
+
+ } else {
+ debug13(printf("Both substrings, minus\n"));
+
+ low_querypos = low_querylength - 1 - (*hardclip_low);
+ high_querypos = *hardclip_high;
+ low_substring = Stage3end_substring_containing(hit_low,low_querypos);
+ high_substring = Stage3end_substring_containing(hit_high,high_querypos);
+
+ while (low_substring != NULL && high_substring != NULL &&
+ (Substring_contains_p(low_substring,low_querypos+1) == false ||
+ Substring_contains_p(high_substring,high_querypos+1) == false)) {
+ /* Shift clip querypos downward (rightward on chromosome) */
+ debug13(printf("Low %d..%d does not contain %d or high %d..%d does not contain higher querypos %d => ",
+ Substring_querystart(low_substring),Substring_queryend(low_substring),low_querypos+1,
+ Substring_querystart(high_substring),Substring_queryend(high_substring),high_querypos+1));
+ (*hardclip_low)++;
+ (*hardclip_high)--;
+ low_querypos--;
+ high_querypos--;
+ low_substring = Stage3end_substring_containing(hit_low,low_querypos);
+ high_substring = Stage3end_substring_containing(hit_high,high_querypos);
+ debug13(printf("Trying hardclip_low %d and hardclip_high %d\n",*hardclip_low,*hardclip_high));
+ }
+
+ while (low_substring != NULL && high_substring != NULL &&
+ (Substring_contains_p(low_substring,low_querypos-1) == false ||
+ Substring_contains_p(high_substring,high_querypos-1) == false)) {
+ /* Shift clip querypos upward (rightward on chromosome) */
+ debug13(printf("Low %d..%d does not contain %d or high %d..%d does not contain lower querypos %d => ",
+ Substring_querystart(low_substring),Substring_queryend(low_substring),low_querypos-1,
+ Substring_querystart(high_substring),Substring_queryend(high_substring),high_querypos-1));
+ (*hardclip_low)--;
+ (*hardclip_high)++;
+ low_querypos++;
+ high_querypos++;
+ low_substring = Stage3end_substring_containing(hit_low,low_querypos);
+ high_substring = Stage3end_substring_containing(hit_high,high_querypos);
+ debug13(printf("Trying hardclip_low %d and hardclip_high %d\n",*hardclip_low,*hardclip_high));
+ }
+
+ if (low_substring == NULL || high_substring == NULL) {
+ *hardclip_low = orig_hardclip_low;
+ *hardclip_high = orig_hardclip_high;
+ }
+ }
+ }
+
+ debug13(printf("Exiting adjust_hardclips with hardclip_low %d, hardclip_high %d\n",
+ *hardclip_low,*hardclip_high));
+
+ return;
+}
+
/* Note: Do not alter this->insertlength, which is used for SAM
@@ -2606,15 +2967,20 @@ Stage3pair_overlap (int *hardclip5_low, int *hardclip5_high, int *hardclip3_low,
*hardclip5_low,*hardclip5_high,*hardclip3_low,*hardclip3_high));
*hardclip5_high += hit5->trim_right + hit5->end_amb_length;
*hardclip3_low += hit3->trim_left + hit3->start_amb_length;
- debug13(printf("Clip for ilength53 plus is hardclip5 %d..%d and hardclip3 %d..%d\n",
+ debug13(printf("Ambig clip for ilength53 plus is hardclip5 %d..%d and hardclip3 %d..%d\n",
+ *hardclip5_low,*hardclip5_high,*hardclip3_low,*hardclip3_high));
+ adjust_hardclips(&(*hardclip3_low),hit3,hit3->querylength_adj,
+ &(*hardclip5_high),hit5,hit5->querylength_adj);
+ debug13(printf("Adjusted clip for ilength53 plus is hardclip5 %d..%d and hardclip3 %d..%d\n",
*hardclip5_low,*hardclip5_high,*hardclip3_low,*hardclip3_high));
+
if (*hardclip5_high < 0) {
*hardclip5_high = 0;
}
if (*hardclip3_low < 0) {
*hardclip3_low = 0;
}
- debug13(printf("Clip for ilength53 plus is hardclip5 %d..%d and hardclip3 %d..%d\n",
+ debug13(printf("Positive clip for ilength53 plus is hardclip5 %d..%d and hardclip3 %d..%d\n",
*hardclip5_low,*hardclip5_high,*hardclip3_low,*hardclip3_high));
} else {
@@ -2639,15 +3005,20 @@ Stage3pair_overlap (int *hardclip5_low, int *hardclip5_high, int *hardclip3_low,
*hardclip5_low,*hardclip5_high,*hardclip3_low,*hardclip3_high));
*hardclip5_low += hit5->trim_left + hit5->start_amb_length;
*hardclip3_high += hit3->trim_right + hit3->end_amb_length;
- debug13(printf("Clip for ilength35 plus is hardclip5 %d..%d and hardclip3 %d..%d\n",
+ debug13(printf("Ambig clip for ilength35 plus is hardclip5 %d..%d and hardclip3 %d..%d\n",
*hardclip5_low,*hardclip5_high,*hardclip3_low,*hardclip3_high));
+ adjust_hardclips(&(*hardclip5_low),hit5,hit5->querylength_adj,
+ &(*hardclip3_high),hit3,hit3->querylength_adj);
+ debug13(printf("Adjusted clip for ilength35 plus is hardclip5 %d..%d and hardclip3 %d..%d\n",
+ *hardclip5_low,*hardclip5_high,*hardclip3_low,*hardclip3_high));
+
if (*hardclip5_low < 0) {
*hardclip5_low = 0;
}
if (*hardclip3_high < 0) {
*hardclip3_high = 0;
}
- debug13(printf("Clip for ilength35 plus is hardclip5 %d..%d and hardclip3 %d..%d\n",
+ debug13(printf("Positive clip for ilength35 plus is hardclip5 %d..%d and hardclip3 %d..%d\n",
*hardclip5_low,*hardclip5_high,*hardclip3_low,*hardclip3_high));
}
@@ -2712,7 +3083,11 @@ Stage3pair_overlap (int *hardclip5_low, int *hardclip5_high, int *hardclip3_low,
*hardclip5_low,*hardclip5_high,*hardclip3_low,*hardclip3_high));
*hardclip5_high += hit5->trim_left + hit5->start_amb_length;
*hardclip3_low += hit3->trim_right + hit3->end_amb_length;
- debug13(printf("Clip for ilength53 minus is hardclip5 %d..%d and hardclip3 %d..%d\n",
+ debug13(printf("Ambig clip for ilength53 minus is hardclip5 %d..%d and hardclip3 %d..%d\n",
+ *hardclip5_low,*hardclip5_high,*hardclip3_low,*hardclip3_high));
+ adjust_hardclips(&(*hardclip3_low),hit3,hit3->querylength_adj,
+ &(*hardclip5_high),hit5,hit5->querylength_adj);
+ debug13(printf("Adjusted clip for ilength53 minus is hardclip5 %d..%d and hardclip3 %d..%d\n",
*hardclip5_low,*hardclip5_high,*hardclip3_low,*hardclip3_high));
if (*hardclip5_high < 0) {
*hardclip5_high = 0;
@@ -2720,7 +3095,7 @@ Stage3pair_overlap (int *hardclip5_low, int *hardclip5_high, int *hardclip3_low,
if (*hardclip3_low < 0) {
*hardclip3_low = 0;
}
- debug13(printf("Clip for ilength53 minus is hardclip5 %d..%d and hardclip3 %d..%d\n",
+ debug13(printf("Positive clip for ilength53 minus is hardclip5 %d..%d and hardclip3 %d..%d\n",
*hardclip5_low,*hardclip5_high,*hardclip3_low,*hardclip3_high));
} else {
@@ -2739,23 +3114,26 @@ Stage3pair_overlap (int *hardclip5_low, int *hardclip5_high, int *hardclip3_low,
}
/* Want to clip 5 low and 3 high. Verified. */
- debug13(printf("Clip for ilength35 minus is hardclip5 %d..%d and hardclip3 %d..%d\n",
- *hardclip5_low,*hardclip5_high,*hardclip3_low,*hardclip3_high));
*hardclip5_low = ilength5_low + common_shift;
*hardclip3_high = overlap - (*hardclip5_low);
debug13(printf("Clip for ilength35 minus is hardclip5 %d..%d and hardclip3 %d..%d\n",
*hardclip5_low,*hardclip5_high,*hardclip3_low,*hardclip3_high));
*hardclip5_low += hit5->trim_right + hit5->end_amb_length;
*hardclip3_high += hit3->trim_left + hit3->start_amb_length;
- debug13(printf("Clip for ilength35 minus is hardclip5 %d..%d and hardclip3 %d..%d\n",
+ debug13(printf("Ambig clip for ilength35 minus is hardclip5 %d..%d and hardclip3 %d..%d\n",
+ *hardclip5_low,*hardclip5_high,*hardclip3_low,*hardclip3_high));
+ adjust_hardclips(&(*hardclip5_low),hit5,hit5->querylength_adj,
+ &(*hardclip3_high),hit3,hit3->querylength_adj);
+ debug13(printf("Adjusted clip for ilength35 minus is hardclip5 %d..%d and hardclip3 %d..%d\n",
*hardclip5_low,*hardclip5_high,*hardclip3_low,*hardclip3_high));
+
if (*hardclip5_low < 0) {
*hardclip5_low = 0;
}
if (*hardclip3_high < 0) {
*hardclip3_high = 0;
}
- debug13(printf("Clip for ilength35 minus is hardclip5 %d..%d and hardclip3 %d..%d\n",
+ debug13(printf("Positive clip for ilength35 minus is hardclip5 %d..%d and hardclip3 %d..%d\n",
*hardclip5_low,*hardclip5_high,*hardclip3_low,*hardclip3_high));
}
}
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/gmap.git
More information about the debian-med-commit
mailing list